[Midnightbsd-cvs] src: sys/netinet: bring in freebsd changes.

Fri Sep 12 20:54:26 EDT 2008

Log Message:
-----------
bring in freebsd changes.

Modified Files:
--------------
    src/sys/netinet:
        accf_data.c (r1.1.1.1 -> r1.2)
        accf_http.c (r1.1.1.1 -> r1.2)
        icmp6.h (r1.1.1.2 -> r1.2)
        icmp_var.h (r1.1.1.1 -> r1.2)
        if_atm.c (r1.1.1.1 -> r1.2)
        if_ether.c (r1.7 -> r1.8)
        igmp.c (r1.1.1.1 -> r1.2)
        igmp.h (r1.1.1.1 -> r1.2)
        igmp_var.h (r1.1.1.1 -> r1.2)
        in.c (r1.1.1.2 -> r1.2)
        in.h (r1.2 -> r1.3)
        in_cksum.c (r1.1.1.1 -> r1.2)
        in_gif.c (r1.1.1.2 -> r1.2)
        in_pcb.c (r1.1.1.2 -> r1.2)
        in_pcb.h (r1.1.1.1 -> r1.2)
        in_proto.c (r1.1.1.2 -> r1.2)
        in_rmx.c (r1.1.1.1 -> r1.2)
        in_var.h (r1.1.1.1 -> r1.2)
        ip.h (r1.1.1.1 -> r1.2)
        ip6.h (r1.1.1.1 -> r1.2)
        ip_carp.c (r1.1.1.2 -> r1.2)
        ip_carp.h (r1.1.1.1 -> r1.2)
        ip_divert.c (r1.1.1.1 -> r1.2)
        ip_dummynet.c (r1.1.1.2 -> r1.2)
        ip_dummynet.h (r1.1.1.2 -> r1.2)
        ip_ecn.c (r1.1.1.1 -> r1.2)
        ip_encap.c (r1.1.1.1 -> r1.2)
        ip_fastfwd.c (r1.1.1.1 -> r1.2)
        ip_fw.h (r1.1.1.2 -> r1.2)
        ip_fw2.c (r1.2 -> r1.3)
        ip_fw_pfil.c (r1.1.1.2 -> r1.2)
        ip_gre.c (r1.1.1.2 -> r1.2)
        ip_gre.h (r1.1.1.2 -> r1.2)
        ip_icmp.c (r1.1.1.2 -> r1.2)
        ip_id.c (r1.2 -> r1.3)
        ip_input.c (r1.2 -> r1.3)
        ip_mroute.c (r1.1.1.2 -> r1.2)
        ip_mroute.h (r1.1.1.2 -> r1.2)
        ip_output.c (r1.6 -> r1.7)
        ip_var.h (r1.2 -> r1.3)
        pim_var.h (r1.1.1.1 -> r1.2)
        raw_ip.c (r1.1.1.1 -> r1.2)
        tcp.h (r1.1.1.1 -> r1.2)
        tcp_debug.c (r1.1.1.1 -> r1.2)
        tcp_debug.h (r1.1.1.1 -> r1.2)
        tcp_fsm.h (r1.1.1.1 -> r1.2)
        tcp_hostcache.c (r1.1.1.1 -> r1.2)
        tcp_input.c (r1.5 -> r1.6)
        tcp_output.c (r1.3 -> r1.4)
        tcp_sack.c (r1.1.1.1 -> r1.2)
        tcp_seq.h (r1.1.1.1 -> r1.2)
        tcp_subr.c (r1.2 -> r1.3)
        tcp_syncache.c (r1.8 -> r1.9)
        tcp_timer.c (r1.2 -> r1.3)
        tcp_timer.h (r1.2 -> r1.3)
        tcp_usrreq.c (r1.7 -> r1.8)
        tcp_var.h (r1.3 -> r1.4)
        udp.h (r1.1.1.1 -> r1.2)
        udp_usrreq.c (r1.2 -> r1.3)
        udp_var.h (r1.1.1.1 -> r1.2)
    src/sys/netinet/libalias:
        alias.c (r1.1.1.1 -> r1.2)
        alias.h (r1.1.1.1 -> r1.2)
        alias_cuseeme.c (r1.1.1.1 -> r1.2)
        alias_db.c (r1.1.1.1 -> r1.2)
        alias_ftp.c (r1.1.1.1 -> r1.2)
        alias_irc.c (r1.1.1.1 -> r1.2)
        alias_local.h (r1.1.1.1 -> r1.2)
        alias_nbt.c (r1.1.1.1 -> r1.2)
        alias_old.c (r1.1.1.1 -> r1.2)
        alias_pptp.c (r1.1.1.1 -> r1.2)
        alias_proxy.c (r1.2 -> r1.3)
        alias_skinny.c (r1.1.1.1 -> r1.2)
        alias_smedia.c (r1.1.1.1 -> r1.2)
        alias_util.c (r1.1.1.1 -> r1.2)
        libalias.3 (r1.1.1.1 -> r1.2)
    src/sys/netinet6:
        dest6.c (r1.1.1.1 -> r1.2)
        frag6.c (r1.1.1.1 -> r1.2)
        in6.c (r1.3 -> r1.4)
        in6.h (r1.3 -> r1.4)
        in6_cksum.c (r1.1.1.1 -> r1.2)
        in6_gif.c (r1.1.1.2 -> r1.2)
        in6_ifattach.c (r1.1.1.2 -> r1.2)
        in6_ifattach.h (r1.1.1.2 -> r1.2)
        in6_pcb.c (r1.1.1.2 -> r1.2)
        in6_pcb.h (r1.1.1.1 -> r1.2)
        in6_proto.c (r1.2 -> r1.3)
        in6_rmx.c (r1.1.1.1 -> r1.2)
        in6_src.c (r1.1.1.2 -> r1.2)
        in6_var.h (r1.1.1.2 -> r1.2)
        ip6_forward.c (r1.1.1.1 -> r1.2)
        ip6_id.c (r1.1.1.1 -> r1.2)
        ip6_input.c (r1.1.1.2 -> r1.2)
        ip6_mroute.c (r1.2 -> r1.3)
        ip6_mroute.h (r1.1.1.1 -> r1.2)
        ip6_output.c (r1.1.1.2 -> r1.2)
        ip6_var.h (r1.1.1.2 -> r1.2)
        mld6.c (r1.2 -> r1.3)
        mld6_var.h (r1.1.1.2 -> r1.2)
        nd6.c (r1.3 -> r1.4)
        nd6.h (r1.1.1.2 -> r1.2)
        nd6_nbr.c (r1.1.1.2 -> r1.2)
        nd6_rtr.c (r1.2 -> r1.3)
        pim6_var.h (r1.1.1.1 -> r1.2)
        raw_ip6.c (r1.1.1.2 -> r1.2)
        route6.c (r1.2 -> r1.3)
        scope6.c (r1.1.1.1 -> r1.2)
        scope6_var.h (r1.1.1.1 -> r1.2)
        udp6_usrreq.c (r1.1.1.2 -> r1.2)
        udp6_var.h (r1.1.1.1 -> r1.2)

Added Files:
-----------
    src/sys/netinet:
        in_mcast.c (r1.1)
        ip_ipsec.c (r1.1)
        ip_ipsec.h (r1.1)
        ip_options.c (r1.1)
        ip_options.h (r1.1)
        sctp.h (r1.1)
        sctp_asconf.c (r1.1)
        sctp_asconf.h (r1.1)
        sctp_auth.c (r1.1)
        sctp_auth.h (r1.1)
        sctp_bsd_addr.c (r1.1)
        sctp_bsd_addr.h (r1.1)
        sctp_cc_functions.c (r1.1)
        sctp_cc_functions.h (r1.1)
        sctp_constants.h (r1.1)
        sctp_crc32.c (r1.1)
        sctp_crc32.h (r1.1)
        sctp_header.h (r1.1)
        sctp_indata.c (r1.1)
        sctp_indata.h (r1.1)
        sctp_input.c (r1.1)
        sctp_input.h (r1.1)
        sctp_lock_bsd.h (r1.1)
        sctp_os.h (r1.1)
        sctp_os_bsd.h (r1.1)
        sctp_output.c (r1.1)
        sctp_output.h (r1.1)
        sctp_pcb.c (r1.1)
        sctp_pcb.h (r1.1)
        sctp_peeloff.c (r1.1)
        sctp_peeloff.h (r1.1)
        sctp_structs.h (r1.1)
        sctp_sysctl.c (r1.1)
        sctp_sysctl.h (r1.1)
        sctp_timer.c (r1.1)
        sctp_timer.h (r1.1)
        sctp_uio.h (r1.1)
        sctp_usrreq.c (r1.1)
        sctp_var.h (r1.1)
        sctputil.c (r1.1)
        sctputil.h (r1.1)
        tcp_reass.c (r1.1)
        tcp_syncache.h (r1.1)
        tcp_timewait.c (r1.1)
    src/sys/netinet/libalias:
        alias_dummy.c (r1.1)
        alias_mod.c (r1.1)
        alias_mod.h (r1.1)

-------------- next part --------------
Index: udp_var.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/udp_var.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/udp_var.h -L sys/netinet/udp_var.h -u -r1.1.1.1 -r1.2

--- sys/netinet/udp_var.h
+++ sys/netinet/udp_var.h
@@ -1,6 +1,7 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1993
- *	The Regents of the University of California.  All rights reserved.
+ *	The Regents of the University of California.
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -27,18 +28,18 @@
  * SUCH DAMAGE.
  *
  *	@(#)udp_var.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/udp_var.h,v 1.29 2005/01/07 01:45:45 imp Exp $
+ * $FreeBSD: src/sys/netinet/udp_var.h,v 1.33 2007/07/10 09:30:46 rwatson Exp $
  */
 
 #ifndef _NETINET_UDP_VAR_H_
-#define _NETINET_UDP_VAR_H_
+#define	_NETINET_UDP_VAR_H_
 
 /*
  * UDP kernel structures and variables.
  */
-struct	udpiphdr {
-	struct	ipovly ui_i;		/* overlaid ip structure */
-	struct	udphdr ui_u;		/* udp header */
+struct udpiphdr {
+	struct ipovly	ui_i;		/* overlaid ip structure */
+	struct udphdr	ui_u;		/* udp header */
 };
 #define	ui_x1		ui_i.ih_x1
 #define	ui_pr		ui_i.ih_pr
@@ -50,7 +51,7 @@
 #define	ui_ulen		ui_u.uh_ulen
 #define	ui_sum		ui_u.uh_sum
 
-struct	udpstat {
+struct udpstat {
 				/* input statistics: */
 	u_long	udps_ipackets;		/* total input packets */
 	u_long	udps_hdrops;		/* packet shorter than header */
@@ -67,45 +68,45 @@
 	u_long	udps_fastout;		/* output packets on fast path */
 	/* of no socket on port, arrived as multicast */
 	u_long	udps_noportmcast;
+	u_long	udps_filtermcast;	/* blocked by multicast filter */
 };
 
 /*
- * Names for UDP sysctl objects
+ * Names for UDP sysctl objects.
  */
 #define	UDPCTL_CHECKSUM		1	/* checksum UDP packets */
-#define UDPCTL_STATS		2	/* statistics (read-only) */
+#define	UDPCTL_STATS		2	/* statistics (read-only) */
 #define	UDPCTL_MAXDGRAM		3	/* max datagram size */
 #define	UDPCTL_RECVSPACE	4	/* default receive buffer space */
 #define	UDPCTL_PCBLIST		5	/* list of PCBs for UDP sockets */
-#define UDPCTL_MAXID		6
+#define	UDPCTL_MAXID		6
 
-#define UDPCTL_NAMES { \
-	{ 0, 0 }, \
-	{ "checksum", CTLTYPE_INT }, \
-	{ "stats", CTLTYPE_STRUCT }, \
-	{ "maxdgram", CTLTYPE_INT }, \
-	{ "recvspace", CTLTYPE_INT }, \
-	{ "pcblist", CTLTYPE_STRUCT }, \
+#define	UDPCTL_NAMES	{						\
+	{ 0, 0 },							\
+	{ "checksum", CTLTYPE_INT },					\
+	{ "stats", CTLTYPE_STRUCT },					\
+	{ "maxdgram", CTLTYPE_INT },					\
+	{ "recvspace", CTLTYPE_INT },					\
+	{ "pcblist", CTLTYPE_STRUCT },					\
 }
 
 #ifdef _KERNEL
 SYSCTL_DECL(_net_inet_udp);
 
-extern struct	pr_usrreqs udp_usrreqs;
-extern struct	inpcbhead udb;
-extern struct	inpcbinfo udbinfo;
-extern u_long	udp_sendspace;
-extern u_long	udp_recvspace;
-extern struct	udpstat udpstat;
-extern int	log_in_vain;
-
-void	udp_ctlinput(int, struct sockaddr *, void *);
-void	udp_init(void);
-void	udp_input(struct mbuf *, int);
-
-struct inpcb *
-	udp_notify(struct inpcb *inp, int errno);
-int	udp_shutdown(struct socket *so);
+extern struct pr_usrreqs	udp_usrreqs;
+extern struct inpcbhead		udb;
+extern struct inpcbinfo		udbinfo;
+extern u_long			udp_sendspace;
+extern u_long			udp_recvspace;
+extern struct udpstat		udpstat;
+extern int			udp_blackhole;
+extern int			udp_log_in_vain;
+
+void		 udp_ctlinput(int, struct sockaddr *, void *);
+void		 udp_init(void);
+void		 udp_input(struct mbuf *, int);
+struct inpcb	*udp_notify(struct inpcb *inp, int errno);
+int		 udp_shutdown(struct socket *so);
 #endif
 
 #endif
--- /dev/null
+++ sys/netinet/sctp_var.h
@@ -0,0 +1,336 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_var.h,v 1.24 2005/03/06 16:04:19 itojun Exp $	 */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_var.h,v 1.20.2.1 2007/10/25 12:27:06 rrs Exp $");
+
+#ifndef _NETINET_SCTP_VAR_H_
+#define _NETINET_SCTP_VAR_H_
+
+#include <netinet/sctp_uio.h>
+
+#if defined(_KERNEL)
+
+extern struct pr_usrreqs sctp_usrreqs;
+
+
+#define sctp_feature_on(inp, feature)  (inp->sctp_features |= feature)
+#define sctp_feature_off(inp, feature) (inp->sctp_features &= ~feature)
+#define sctp_is_feature_on(inp, feature) (inp->sctp_features & feature)
+#define sctp_is_feature_off(inp, feature) ((inp->sctp_features & feature) == 0)
+
+/* managing mobility_feature in inpcb (by micchie) */
+#define sctp_mobility_feature_on(inp, feature)  (inp->sctp_mobility_features |= feature)
+#define sctp_mobility_feature_off(inp, feature) (inp->sctp_mobility_features &= ~feature)
+#define sctp_is_mobility_feature_on(inp, feature) (inp->sctp_mobility_features & feature)
+#define sctp_is_mobility_feature_off(inp, feature) ((inp->sctp_mobility_features & feature) == 0)
+
+#define sctp_maxspace(sb) (max((sb)->sb_hiwat,SCTP_MINIMAL_RWND))
+
+#define	sctp_sbspace(asoc, sb) ((long) ((sctp_maxspace(sb) > (asoc)->sb_cc) ? (sctp_maxspace(sb) - (asoc)->sb_cc) : 0))
+
+#define	sctp_sbspace_failedmsgs(sb) ((long) ((sctp_maxspace(sb) > (sb)->sb_cc) ? (sctp_maxspace(sb) - (sb)->sb_cc) : 0))
+
+#define sctp_sbspace_sub(a,b) ((a > b) ? (a - b) : 0)
+
+/*
+ * I tried to cache the readq entries at one point. But the reality
+ * is that it did not add any performance since this meant we had to
+ * lock the STCB on read. And at that point once you have to do an
+ * extra lock, it really does not matter if the lock is in the ZONE
+ * stuff or in our code. Note that this same problem would occur with
+ * an mbuf cache as well so it is not really worth doing, at least
+ * right now :-D
+ */
+
+#define sctp_free_a_readq(_stcb, _readq) { \
+	SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_readq, (_readq)); \
+	SCTP_DECR_READQ_COUNT(); \
+}
+
+#define sctp_alloc_a_readq(_stcb, _readq) { \
+	(_readq) = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_readq, struct sctp_queued_to_read); \
+	if ((_readq)) { \
+ 	     SCTP_INCR_READQ_COUNT(); \
+	} \
+}
+
+#define sctp_free_a_strmoq(_stcb, _strmoq) { \
+	SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_strmoq, (_strmoq)); \
+	SCTP_DECR_STRMOQ_COUNT(); \
+}
+
+#define sctp_alloc_a_strmoq(_stcb, _strmoq) { \
+	(_strmoq) = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_strmoq, struct sctp_stream_queue_pending); \
+	if ((_strmoq)) { \
+		SCTP_INCR_STRMOQ_COUNT(); \
+ 	} \
+}
+
+
+#define sctp_free_a_chunk(_stcb, _chk) { \
+        if(_stcb) { \
+          SCTP_TCB_LOCK_ASSERT((_stcb)); \
+          if ((_chk)->whoTo) { \
+                  sctp_free_remote_addr((_chk)->whoTo); \
+                  (_chk)->whoTo = NULL; \
+          } \
+          if (((_stcb)->asoc.free_chunk_cnt > sctp_asoc_free_resc_limit) || \
+               (sctppcbinfo.ipi_free_chunks > sctp_system_free_resc_limit)) { \
+	 	SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_chunk, (_chk)); \
+	 	SCTP_DECR_CHK_COUNT(); \
+	  } else { \
+	 	TAILQ_INSERT_TAIL(&(_stcb)->asoc.free_chunks, (_chk), sctp_next); \
+	 	(_stcb)->asoc.free_chunk_cnt++; \
+	 	atomic_add_int(&sctppcbinfo.ipi_free_chunks, 1); \
+          } \
+        } else { \
+		SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_chunk, (_chk)); \
+		SCTP_DECR_CHK_COUNT(); \
+	} \
+}
+
+#define sctp_alloc_a_chunk(_stcb, _chk) { \
+	if (TAILQ_EMPTY(&(_stcb)->asoc.free_chunks))  { \
+		(_chk) = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_chunk, struct sctp_tmit_chunk); \
+		if ((_chk)) { \
+			SCTP_INCR_CHK_COUNT(); \
+                        (_chk)->whoTo = NULL; \
+		} \
+	} else { \
+		(_chk) = TAILQ_FIRST(&(_stcb)->asoc.free_chunks); \
+		TAILQ_REMOVE(&(_stcb)->asoc.free_chunks, (_chk), sctp_next); \
+		atomic_subtract_int(&sctppcbinfo.ipi_free_chunks, 1); \
+                SCTP_STAT_INCR(sctps_cached_chk); \
+		(_stcb)->asoc.free_chunk_cnt--; \
+	} \
+}
+
+
+
+#define sctp_free_remote_addr(__net) { \
+	if ((__net)) {  \
+		if (atomic_fetchadd_int(&(__net)->ref_count, -1) == 1) { \
+			(void)SCTP_OS_TIMER_STOP(&(__net)->rxt_timer.timer); \
+			(void)SCTP_OS_TIMER_STOP(&(__net)->pmtu_timer.timer); \
+			(void)SCTP_OS_TIMER_STOP(&(__net)->fr_timer.timer); \
+                        if ((__net)->ro.ro_rt) { \
+				RTFREE((__net)->ro.ro_rt); \
+				(__net)->ro.ro_rt = NULL; \
+                        } \
+			if ((__net)->src_addr_selected) { \
+				sctp_free_ifa((__net)->ro._s_addr); \
+				(__net)->ro._s_addr = NULL; \
+			} \
+                        (__net)->src_addr_selected = 0; \
+			(__net)->dest_state = SCTP_ADDR_NOT_REACHABLE; \
+			SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_net, (__net)); \
+			SCTP_DECR_RADDR_COUNT(); \
+		} \
+	} \
+}
+
+#define sctp_sbfree(ctl, stcb, sb, m) { \
+	uint32_t val; \
+	val = atomic_fetchadd_int(&(sb)->sb_cc,-(SCTP_BUF_LEN((m)))); \
+	if (val < SCTP_BUF_LEN((m))) { \
+	   panic("sb_cc goes negative"); \
+	} \
+	val = atomic_fetchadd_int(&(sb)->sb_mbcnt,-(MSIZE)); \
+	if (val < MSIZE) { \
+	    panic("sb_mbcnt goes negative"); \
+	} \
+	if (((ctl)->do_not_ref_stcb == 0) && stcb) {\
+	  val = atomic_fetchadd_int(&(stcb)->asoc.sb_cc,-(SCTP_BUF_LEN((m)))); \
+	  if (val < SCTP_BUF_LEN((m))) {\
+	     panic("stcb->sb_cc goes negative"); \
+	  } \
+	  val = atomic_fetchadd_int(&(stcb)->asoc.my_rwnd_control_len,-(MSIZE)); \
+	  if (val < MSIZE) { \
+	     panic("asoc->mbcnt goes negative"); \
+	  } \
+	} \
+	if (SCTP_BUF_TYPE(m) != MT_DATA && SCTP_BUF_TYPE(m) != MT_HEADER && \
+	    SCTP_BUF_TYPE(m) != MT_OOBDATA) \
+		atomic_subtract_int(&(sb)->sb_ctl,SCTP_BUF_LEN((m))); \
+}
+
+
+#define sctp_sballoc(stcb, sb, m) { \
+	atomic_add_int(&(sb)->sb_cc,SCTP_BUF_LEN((m))); \
+	atomic_add_int(&(sb)->sb_mbcnt, MSIZE); \
+	if (stcb) { \
+		atomic_add_int(&(stcb)->asoc.sb_cc,SCTP_BUF_LEN((m))); \
+		atomic_add_int(&(stcb)->asoc.my_rwnd_control_len, MSIZE); \
+	} \
+	if (SCTP_BUF_TYPE(m) != MT_DATA && SCTP_BUF_TYPE(m) != MT_HEADER && \
+	    SCTP_BUF_TYPE(m) != MT_OOBDATA) \
+		atomic_add_int(&(sb)->sb_ctl,SCTP_BUF_LEN((m))); \
+}
+
+
+#define sctp_ucount_incr(val) { \
+	val++; \
+}
+
+#define sctp_ucount_decr(val) { \
+	if (val > 0) { \
+		val--; \
+	} else { \
+		val = 0; \
+	} \
+}
+
+#define sctp_mbuf_crush(data) do { \
+	struct mbuf *_m; \
+	_m = (data); \
+	while(_m && (SCTP_BUF_LEN(_m) == 0)) { \
+		(data)  = SCTP_BUF_NEXT(_m); \
+		SCTP_BUF_NEXT(_m) = NULL; \
+		sctp_m_free(_m); \
+		_m = (data); \
+	} \
+} while (0)
+
+#define sctp_flight_size_decrease(tp1) do { \
+	if (tp1->whoTo->flight_size >= tp1->book_size) \
+		tp1->whoTo->flight_size -= tp1->book_size; \
+	else \
+		tp1->whoTo->flight_size = 0; \
+} while (0)
+
+#define sctp_flight_size_increase(tp1) do { \
+       (tp1)->whoTo->flight_size += (tp1)->book_size; \
+} while (0)
+
+#ifdef SCTP_FS_SPEC_LOG
+#define sctp_total_flight_decrease(stcb, tp1) do { \
+        if(stcb->asoc.fs_index > SCTP_FS_SPEC_LOG_SIZE) \
+		stcb->asoc.fs_index = 0;\
+	stcb->asoc.fslog[stcb->asoc.fs_index].total_flight = stcb->asoc.total_flight; \
+	stcb->asoc.fslog[stcb->asoc.fs_index].tsn = tp1->rec.data.TSN_seq; \
+	stcb->asoc.fslog[stcb->asoc.fs_index].book = tp1->book_size; \
+	stcb->asoc.fslog[stcb->asoc.fs_index].sent = tp1->sent; \
+	stcb->asoc.fslog[stcb->asoc.fs_index].incr = 0; \
+	stcb->asoc.fslog[stcb->asoc.fs_index].decr = 1; \
+	stcb->asoc.fs_index++; \
+        tp1->window_probe = 0; \
+	if (stcb->asoc.total_flight >= tp1->book_size) { \
+		stcb->asoc.total_flight -= tp1->book_size; \
+		if (stcb->asoc.total_flight_count > 0) \
+			stcb->asoc.total_flight_count--; \
+	} else { \
+		stcb->asoc.total_flight = 0; \
+		stcb->asoc.total_flight_count = 0; \
+	} \
+} while (0)
+
+#define sctp_total_flight_increase(stcb, tp1) do { \
+        if(stcb->asoc.fs_index > SCTP_FS_SPEC_LOG_SIZE) \
+		stcb->asoc.fs_index = 0;\
+	stcb->asoc.fslog[stcb->asoc.fs_index].total_flight = stcb->asoc.total_flight; \
+	stcb->asoc.fslog[stcb->asoc.fs_index].tsn = tp1->rec.data.TSN_seq; \
+	stcb->asoc.fslog[stcb->asoc.fs_index].book = tp1->book_size; \
+	stcb->asoc.fslog[stcb->asoc.fs_index].sent = tp1->sent; \
+	stcb->asoc.fslog[stcb->asoc.fs_index].incr = 1; \
+	stcb->asoc.fslog[stcb->asoc.fs_index].decr = 0; \
+	stcb->asoc.fs_index++; \
+       (stcb)->asoc.total_flight_count++; \
+       (stcb)->asoc.total_flight += (tp1)->book_size; \
+} while (0)
+
+#else
+
+#define sctp_total_flight_decrease(stcb, tp1) do { \
+	if (stcb->asoc.total_flight >= tp1->book_size) { \
+		stcb->asoc.total_flight -= tp1->book_size; \
+		if (stcb->asoc.total_flight_count > 0) \
+			stcb->asoc.total_flight_count--; \
+	} else { \
+		stcb->asoc.total_flight = 0; \
+		stcb->asoc.total_flight_count = 0; \
+	} \
+} while (0)
+
+#define sctp_total_flight_increase(stcb, tp1) do { \
+       (stcb)->asoc.total_flight_count++; \
+       (stcb)->asoc.total_flight += (tp1)->book_size; \
+} while (0)
+
+#endif
+
+
+struct sctp_nets;
+struct sctp_inpcb;
+struct sctp_tcb;
+struct sctphdr;
+
+
+void sctp_close(struct socket *so);
+int sctp_disconnect(struct socket *so);
+
+void sctp_ctlinput __P((int, struct sockaddr *, void *));
+int sctp_ctloutput __P((struct socket *, struct sockopt *));
+void sctp_input __P((struct mbuf *, int));
+void sctp_drain __P((void));
+void sctp_init __P((void));
+
+
+void sctp_pcbinfo_cleanup(void);
+
+int sctp_shutdown __P((struct socket *));
+void sctp_notify 
+__P((struct sctp_inpcb *, struct ip *ip, struct sctphdr *,
+    struct sockaddr *, struct sctp_tcb *,
+    struct sctp_nets *));
+
+	int sctp_bindx(struct socket *, int, struct sockaddr_storage *,
+        int, int, struct proc *);
+
+/* can't use sctp_assoc_t here */
+	int sctp_peeloff(struct socket *, struct socket *, int, caddr_t, int *);
+
+	int sctp_ingetaddr(struct socket *,
+        struct sockaddr **
+);
+
+	int sctp_peeraddr(struct socket *,
+        struct sockaddr **
+);
+
+	int sctp_listen(struct socket *, int, struct thread *);
+
+	int sctp_accept(struct socket *, struct sockaddr **);
+
+#endif				/* _KERNEL */
+
+#endif				/* !_NETINET_SCTP_VAR_H_ */
Index: in_cksum.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/in_cksum.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/in_cksum.c -L sys/netinet/in_cksum.c -u -r1.1.1.1 -r1.2
--- sys/netinet/in_cksum.c
+++ sys/netinet/in_cksum.c
@@ -27,9 +27,11 @@
  * SUCH DAMAGE.
  *
  *	@(#)in_cksum.c	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/in_cksum.c,v 1.8 2005/01/07 01:45:44 imp Exp $
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/in_cksum.c,v 1.10 2007/10/07 20:44:22 silby Exp $");
+
 #include <sys/param.h>
 #include <sys/mbuf.h>
 
@@ -44,9 +46,7 @@
 #define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);}
 
 int
-in_cksum(m, len)
-	register struct mbuf *m;
-	register int len;
+in_cksum(struct mbuf *m, int len)
 {
 	register u_short *w;
 	register int sum = 0;
--- /dev/null
+++ sys/netinet/sctputil.h
@@ -0,0 +1,390 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+/* $KAME: sctputil.h,v 1.15 2005/03/06 16:04:19 itojun Exp $	 */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctputil.h,v 1.29.2.1 2007/11/06 02:48:04 rrs Exp $");
+#ifndef __sctputil_h__
+#define __sctputil_h__
+
+
+#if defined(_KERNEL)
+
+
+#ifdef SCTP_ASOCLOG_OF_TSNS
+void sctp_print_out_track_log(struct sctp_tcb *stcb);
+
+#endif
+
+#ifdef SCTP_MBUF_LOGGING
+struct mbuf *sctp_m_free(struct mbuf *m);
+void sctp_m_freem(struct mbuf *m);
+
+#else
+#define sctp_m_free m_free
+#define sctp_m_freem m_freem
+#endif
+
+#if defined(SCTP_LOCAL_TRACE_BUF) || defined(__APPLE__)
+void
+     sctp_log_trace(uint32_t fr, const char *str SCTP_UNUSED, uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint32_t e, uint32_t f);
+
+#endif
+
+#define sctp_get_associd(stcb) ((sctp_assoc_t)stcb->asoc.assoc_id)
+
+
+/*
+ * Function prototypes
+ */
+uint32_t
+sctp_get_ifa_hash_val(struct sockaddr *addr);
+
+struct sctp_ifa *
+         sctp_find_ifa_in_ep(struct sctp_inpcb *inp, struct sockaddr *addr, int hold_lock);
+
+struct sctp_ifa *
+         sctp_find_ifa_by_addr(struct sockaddr *addr, uint32_t vrf_id, int holds_lock);
+
+uint32_t sctp_select_initial_TSN(struct sctp_pcb *);
+
+uint32_t sctp_select_a_tag(struct sctp_inpcb *, int);
+
+int sctp_init_asoc(struct sctp_inpcb *, struct sctp_tcb *, int, uint32_t, uint32_t);
+
+void sctp_fill_random_store(struct sctp_pcb *);
+
+void
+sctp_timer_start(int, struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *);
+
+void
+sctp_timer_stop(int, struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *, uint32_t);
+
+int
+    sctp_dynamic_set_primary(struct sockaddr *sa, uint32_t vrf_id);
+
+uint32_t sctp_calculate_sum(struct mbuf *, int32_t *, uint32_t);
+
+void
+     sctp_mtu_size_reset(struct sctp_inpcb *, struct sctp_association *, uint32_t);
+
+void
+sctp_add_to_readq(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_queued_to_read *control,
+    struct sockbuf *sb,
+    int end,
+    int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+);
+
+int
+sctp_append_to_readq(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_queued_to_read *control,
+    struct mbuf *m,
+    int end,
+    int new_cumack,
+    struct sockbuf *sb);
+
+
+void sctp_iterator_worker(void);
+
+int find_next_best_mtu(int);
+
+void
+     sctp_timeout_handler(void *);
+
+uint32_t
+sctp_calculate_rto(struct sctp_tcb *, struct sctp_association *,
+    struct sctp_nets *, struct timeval *, int);
+
+uint32_t sctp_calculate_len(struct mbuf *);
+
+caddr_t sctp_m_getptr(struct mbuf *, int, int, uint8_t *);
+
+struct sctp_paramhdr *
+sctp_get_next_param(struct mbuf *, int,
+    struct sctp_paramhdr *, int);
+
+int sctp_add_pad_tombuf(struct mbuf *, int);
+
+int sctp_pad_lastmbuf(struct mbuf *, int, struct mbuf *);
+
+void 
+sctp_ulp_notify(uint32_t, struct sctp_tcb *, uint32_t, void *, int
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+);
+
+void
+sctp_pull_off_control_to_new_inp(struct sctp_inpcb *old_inp,
+    struct sctp_inpcb *new_inp,
+    struct sctp_tcb *stcb, int waitflags);
+
+
+void sctp_stop_timers_for_shutdown(struct sctp_tcb *);
+
+void 
+sctp_report_all_outbound(struct sctp_tcb *, int, int
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+);
+
+int sctp_expand_mapping_array(struct sctp_association *, uint32_t);
+
+void 
+sctp_abort_notification(struct sctp_tcb *, int, int
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+);
+
+/* We abort responding to an IP packet for some reason */
+void
+sctp_abort_association(struct sctp_inpcb *, struct sctp_tcb *,
+    struct mbuf *, int, struct sctphdr *, struct mbuf *, uint32_t);
+
+
+/* We choose to abort via user input */
+void
+sctp_abort_an_association(struct sctp_inpcb *, struct sctp_tcb *, int,
+    struct mbuf *, int
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+);
+
+void 
+sctp_handle_ootb(struct mbuf *, int, int, struct sctphdr *,
+    struct sctp_inpcb *, struct mbuf *, uint32_t);
+
+int 
+sctp_connectx_helper_add(struct sctp_tcb *stcb, struct sockaddr *addr,
+    int totaddr, int *error);
+
+struct sctp_tcb *
+sctp_connectx_helper_find(struct sctp_inpcb *inp, struct sockaddr *addr,
+    int *totaddr, int *num_v4, int *num_v6, int *error, int limit, int *bad_addr);
+
+int sctp_is_there_an_abort_here(struct mbuf *, int, uint32_t *);
+uint32_t sctp_is_same_scope(struct sockaddr_in6 *, struct sockaddr_in6 *);
+
+struct sockaddr_in6 *
+             sctp_recover_scope(struct sockaddr_in6 *, struct sockaddr_in6 *);
+
+#define sctp_recover_scope_mac(addr, store) do { \
+	 if ((addr->sin6_family == AF_INET6) && \
+	     (IN6_IS_SCOPE_LINKLOCAL(&addr->sin6_addr))) { \
+		*store = *addr; \
+		if (addr->sin6_scope_id == 0) { \
+			if (!sa6_recoverscope(store)) { \
+				addr = store; \
+			} \
+		} else { \
+			in6_clearscope(&addr->sin6_addr); \
+			addr = store; \
+		} \
+	 } \
+} while (0)
+
+
+int sctp_cmpaddr(struct sockaddr *, struct sockaddr *);
+
+void sctp_print_address(struct sockaddr *);
+void sctp_print_address_pkt(struct ip *, struct sctphdr *);
+
+void
+sctp_notify_partial_delivery_indication(struct sctp_tcb *stcb,
+    uint32_t error, int no_lock, uint32_t strseq);
+
+int
+sctp_release_pr_sctp_chunk(struct sctp_tcb *, struct sctp_tmit_chunk *,
+    int, struct sctpchunk_listhead *, int
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+);
+
+struct mbuf *sctp_generate_invmanparam(int);
+
+void 
+sctp_bindx_add_address(struct socket *so, struct sctp_inpcb *inp,
+    struct sockaddr *sa, sctp_assoc_t assoc_id,
+    uint32_t vrf_id, int *error, void *p);
+void 
+sctp_bindx_delete_address(struct socket *so, struct sctp_inpcb *inp,
+    struct sockaddr *sa, sctp_assoc_t assoc_id,
+    uint32_t vrf_id, int *error);
+
+int sctp_local_addr_count(struct sctp_tcb *stcb);
+
+#ifdef SCTP_MBCNT_LOGGING
+void
+sctp_free_bufspace(struct sctp_tcb *, struct sctp_association *,
+    struct sctp_tmit_chunk *, int);
+
+#else
+#define sctp_free_bufspace(stcb, asoc, tp1, chk_cnt)  \
+do { \
+	if (tp1->data != NULL) { \
+                atomic_subtract_int(&((asoc)->chunks_on_out_queue), chk_cnt); \
+		if ((asoc)->total_output_queue_size >= tp1->book_size) { \
+			atomic_subtract_int(&((asoc)->total_output_queue_size), tp1->book_size); \
+		} else { \
+			(asoc)->total_output_queue_size = 0; \
+		} \
+   	        if (stcb->sctp_socket && ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || \
+	            (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) { \
+			if (stcb->sctp_socket->so_snd.sb_cc >= tp1->book_size) { \
+				atomic_subtract_int(&((stcb)->sctp_socket->so_snd.sb_cc), tp1->book_size); \
+			} else { \
+				stcb->sctp_socket->so_snd.sb_cc = 0; \
+			} \
+		} \
+        } \
+} while (0)
+
+#endif
+
+#define sctp_free_spbufspace(stcb, asoc, sp)  \
+do { \
+ 	if (sp->data != NULL) { \
+                atomic_subtract_int(&(asoc)->chunks_on_out_queue, 1); \
+		if ((asoc)->total_output_queue_size >= sp->length) { \
+			atomic_subtract_int(&(asoc)->total_output_queue_size, sp->length); \
+		} else { \
+			(asoc)->total_output_queue_size = 0; \
+		} \
+   	        if (stcb->sctp_socket && ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || \
+	            (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) { \
+			if (stcb->sctp_socket->so_snd.sb_cc >= sp->length) { \
+				atomic_subtract_int(&stcb->sctp_socket->so_snd.sb_cc,sp->length); \
+			} else { \
+				stcb->sctp_socket->so_snd.sb_cc = 0; \
+			} \
+		} \
+        } \
+} while (0)
+
+#define sctp_snd_sb_alloc(stcb, sz)  \
+do { \
+	atomic_add_int(&stcb->asoc.total_output_queue_size,sz); \
+	if ((stcb->sctp_socket != NULL) && \
+	    ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || \
+	     (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) { \
+		atomic_add_int(&stcb->sctp_socket->so_snd.sb_cc,sz); \
+	} \
+} while (0)
+
+
+int
+sctp_soreceive(struct socket *so, struct sockaddr **psa,
+    struct uio *uio,
+    struct mbuf **mp0,
+    struct mbuf **controlp,
+    int *flagsp);
+
+
+/* For those not passing mbufs, this does the
+ * translations for you. Caller owns memory
+ * of size controllen returned in controlp.
+ */
+int 
+sctp_l_soreceive(struct socket *so,
+    struct sockaddr **name,
+    struct uio *uio,
+    char **controlp,
+    int *controllen,
+    int *flag);
+
+
+void
+     sctp_misc_ints(uint8_t from, uint32_t a, uint32_t b, uint32_t c, uint32_t d);
+
+void
+sctp_wakeup_log(struct sctp_tcb *stcb,
+    uint32_t cumtsn,
+    uint32_t wake_cnt, int from);
+
+void sctp_log_strm_del_alt(struct sctp_tcb *stcb, uint32_t, uint16_t, uint16_t, int);
+
+void sctp_log_nagle_event(struct sctp_tcb *stcb, int action);
+
+
+void
+     sctp_log_mb(struct mbuf *m, int from);
+
+void
+sctp_sblog(struct sockbuf *sb,
+    struct sctp_tcb *stcb, int from, int incr);
+
+void
+sctp_log_strm_del(struct sctp_queued_to_read *control,
+    struct sctp_queued_to_read *poschk,
+    int from);
+void sctp_log_cwnd(struct sctp_tcb *stcb, struct sctp_nets *, int, uint8_t);
+void rto_logging(struct sctp_nets *net, int from);
+
+void sctp_log_closing(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int16_t loc);
+
+void sctp_log_lock(struct sctp_inpcb *inp, struct sctp_tcb *stcb, uint8_t from);
+void sctp_log_maxburst(struct sctp_tcb *stcb, struct sctp_nets *, int, int, uint8_t);
+void sctp_log_block(uint8_t, struct socket *, struct sctp_association *, int);
+void sctp_log_rwnd(uint8_t, uint32_t, uint32_t, uint32_t);
+void sctp_log_mbcnt(uint8_t, uint32_t, uint32_t, uint32_t, uint32_t);
+void sctp_log_rwnd_set(uint8_t, uint32_t, uint32_t, uint32_t, uint32_t);
+int sctp_fill_stat_log(void *, size_t *);
+void sctp_log_fr(uint32_t, uint32_t, uint32_t, int);
+void sctp_log_sack(uint32_t, uint32_t, uint32_t, uint16_t, uint16_t, int);
+void sctp_log_map(uint32_t, uint32_t, uint32_t, int);
+
+void sctp_clr_stat_log(void);
+
+
+#ifdef SCTP_AUDITING_ENABLED
+void
+sctp_auditing(int, struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *);
+void sctp_audit_log(uint8_t, uint8_t);
+
+#endif
+
+
+#endif				/* _KERNEL */
+#endif
Index: ip_carp.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_carp.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/ip_carp.h -L sys/netinet/ip_carp.h -u -r1.1.1.1 -r1.2
--- sys/netinet/ip_carp.h
+++ sys/netinet/ip_carp.h
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet/ip_carp.h,v 1.1 2005/02/22 13:04:03 glebius Exp $	*/
+/*	$FreeBSD: src/sys/netinet/ip_carp.h,v 1.3 2006/12/01 18:37:41 imp Exp $	*/
 /*	$OpenBSD: ip_carp.h,v 1.8 2004/07/29 22:12:15 mcbride Exp $	*/
 
 /*
@@ -76,6 +76,10 @@
 	unsigned char	carp_md[20];	/* SHA1 HMAC */
 } __packed;
 
+#ifdef CTASSERT
+CTASSERT(sizeof(struct carp_header) == 36);
+#endif
+
 #define	CARP_DFLTTL		255
 
 /* carp_version */
@@ -148,7 +152,6 @@
 }
 
 #ifdef _KERNEL
-void		 carp_ifdetach (struct ifnet *);
 void		 carp_carpdev_state(void *);
 void		 carp_input (struct mbuf *, int);
 int		 carp6_input (struct mbuf **, int *, int);
--- /dev/null
+++ sys/netinet/sctp_timer.h
@@ -0,0 +1,104 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_timer.h,v 1.6 2005/03/06 16:04:18 itojun Exp $	 */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_timer.h,v 1.7 2007/09/13 10:36:42 rrs Exp $");
+
+#ifndef __sctp_timer_h__
+#define __sctp_timer_h__
+
+#if defined(_KERNEL)
+
+#define SCTP_RTT_SHIFT 3
+#define SCTP_RTT_VAR_SHIFT 2
+
+void
+sctp_early_fr_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net);
+
+struct sctp_nets *
+sctp_find_alternate_net(struct sctp_tcb *,
+    struct sctp_nets *, int mode);
+
+int
+sctp_threshold_management(struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *, uint16_t);
+
+int
+sctp_t3rxt_timer(struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *);
+int
+sctp_t1init_timer(struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *);
+int
+sctp_shutdown_timer(struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *);
+int
+sctp_heartbeat_timer(struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *, int);
+
+int sctp_is_hb_timer_running(struct sctp_tcb *stcb);
+int sctp_is_sack_timer_running(struct sctp_tcb *stcb);
+
+int
+sctp_cookie_timer(struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *);
+
+void
+sctp_pathmtu_timer(struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *);
+
+int
+sctp_shutdownack_timer(struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *);
+int
+sctp_strreset_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net);
+
+int
+sctp_asconf_timer(struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *);
+
+void
+sctp_delete_prim_timer(struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *);
+
+void
+sctp_autoclose_timer(struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *net);
+
+void sctp_audit_retranmission_queue(struct sctp_association *);
+
+void sctp_iterator_timer(struct sctp_iterator *it);
+
+
+#endif
+#endif
--- /dev/null
+++ sys/netinet/sctp_input.c
@@ -0,0 +1,5393 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_input.c,v 1.27 2005/03/06 16:04:17 itojun Exp $	 */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_input.c,v 1.65.2.1.2.1 2008/02/02 12:44:13 rwatson Exp $");
+
+#include <netinet/sctp_os.h>
+#include <netinet/sctp_var.h>
+#include <netinet/sctp_sysctl.h>
+#include <netinet/sctp_pcb.h>
+#include <netinet/sctp_header.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_output.h>
+#include <netinet/sctp_input.h>
+#include <netinet/sctp_auth.h>
+#include <netinet/sctp_indata.h>
+#include <netinet/sctp_asconf.h>
+#include <netinet/sctp_bsd_addr.h>
+#include <netinet/sctp_timer.h>
+
+
+
+static void
+sctp_stop_all_cookie_timers(struct sctp_tcb *stcb)
+{
+	struct sctp_nets *net;
+
+	/*
+	 * This now not only stops all cookie timers it also stops any INIT
+	 * timers as well. This will make sure that the timers are stopped
+	 * in all collision cases.
+	 */
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+		if (net->rxt_timer.type == SCTP_TIMER_TYPE_COOKIE) {
+			sctp_timer_stop(SCTP_TIMER_TYPE_COOKIE,
+			    stcb->sctp_ep,
+			    stcb,
+			    net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_1);
+		} else if (net->rxt_timer.type == SCTP_TIMER_TYPE_INIT) {
+			sctp_timer_stop(SCTP_TIMER_TYPE_INIT,
+			    stcb->sctp_ep,
+			    stcb,
+			    net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_2);
+		}
+	}
+}
+
+/* INIT handler */
+static void
+sctp_handle_init(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh,
+    struct sctp_init_chunk *cp, struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net, int *abort_no_unlock, uint32_t vrf_id)
+{
+	struct sctp_init *init;
+	struct mbuf *op_err;
+	uint32_t init_limit;
+
+	SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_init: handling INIT tcb:%p\n",
+	    stcb);
+	if (stcb == NULL) {
+		SCTP_INP_RLOCK(inp);
+		if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+			goto outnow;
+		}
+	}
+	op_err = NULL;
+	init = &cp->init;
+	/* First are we accepting? */
+	if ((inp->sctp_socket->so_qlimit == 0) && (stcb == NULL)) {
+		SCTPDBG(SCTP_DEBUG_INPUT2,
+		    "sctp_handle_init: Abort, so_qlimit:%d\n",
+		    inp->sctp_socket->so_qlimit);
+		/*
+		 * FIX ME ?? What about TCP model and we have a
+		 * match/restart case?
+		 */
+		sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
+		    vrf_id);
+		if (stcb)
+			*abort_no_unlock = 1;
+		goto outnow;
+	}
+	if (ntohs(cp->ch.chunk_length) < sizeof(struct sctp_init_chunk)) {
+		/* Invalid length */
+		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+		sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
+		    vrf_id);
+		if (stcb)
+			*abort_no_unlock = 1;
+		goto outnow;
+	}
+	/* validate parameters */
+	if (init->initiate_tag == 0) {
+		/* protocol error... send abort */
+		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+		sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
+		    vrf_id);
+		if (stcb)
+			*abort_no_unlock = 1;
+		goto outnow;
+	}
+	if (ntohl(init->a_rwnd) < SCTP_MIN_RWND) {
+		/* invalid parameter... send abort */
+		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+		sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
+		    vrf_id);
+		if (stcb)
+			*abort_no_unlock = 1;
+		goto outnow;
+	}
+	if (init->num_inbound_streams == 0) {
+		/* protocol error... send abort */
+		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+		sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
+		    vrf_id);
+		if (stcb)
+			*abort_no_unlock = 1;
+		goto outnow;
+	}
+	if (init->num_outbound_streams == 0) {
+		/* protocol error... send abort */
+		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+		sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
+		    vrf_id);
+		if (stcb)
+			*abort_no_unlock = 1;
+		goto outnow;
+	}
+	init_limit = offset + ntohs(cp->ch.chunk_length);
+	if (sctp_validate_init_auth_params(m, offset + sizeof(*cp),
+	    init_limit)) {
+		/* auth parameter(s) error... send abort */
+		sctp_abort_association(inp, stcb, m, iphlen, sh, NULL, vrf_id);
+		if (stcb)
+			*abort_no_unlock = 1;
+		goto outnow;
+	}
+	/* send an INIT-ACK w/cookie */
+	SCTPDBG(SCTP_DEBUG_INPUT3, "sctp_handle_init: sending INIT-ACK\n");
+	sctp_send_initiate_ack(inp, stcb, m, iphlen, offset, sh, cp, vrf_id,
+	    ((stcb == NULL) ? SCTP_HOLDS_LOCK : SCTP_NOT_LOCKED));
+outnow:
+	if (stcb == NULL) {
+		SCTP_INP_RUNLOCK(inp);
+	}
+}
+
+/*
+ * process peer "INIT/INIT-ACK" chunk returns value < 0 on error
+ */
+
+int
+sctp_is_there_unsent_data(struct sctp_tcb *stcb)
+{
+	int unsent_data = 0;
+	struct sctp_stream_queue_pending *sp;
+	struct sctp_stream_out *strq;
+	struct sctp_association *asoc;
+
+	/*
+	 * This function returns the number of streams that have true unsent
+	 * data on them. Note that as it looks through it will clean up any
+	 * places that have old data that has been sent but left at top of
+	 * stream queue.
+	 */
+	asoc = &stcb->asoc;
+	SCTP_TCB_SEND_LOCK(stcb);
+	if (!TAILQ_EMPTY(&asoc->out_wheel)) {
+		/* Check to see if some data queued */
+		TAILQ_FOREACH(strq, &asoc->out_wheel, next_spoke) {
+	is_there_another:
+			/* sa_ignore FREED_MEMORY */
+			sp = TAILQ_FIRST(&strq->outqueue);
+			if (sp == NULL) {
+				continue;
+			}
+			if ((sp->msg_is_complete) &&
+			    (sp->length == 0) &&
+			    (sp->sender_all_done)) {
+				/*
+				 * We are doing differed cleanup. Last time
+				 * through when we took all the data the
+				 * sender_all_done was not set.
+				 */
+				if (sp->put_last_out == 0) {
+					SCTP_PRINTF("Gak, put out entire msg with NO end!-1\n");
+					SCTP_PRINTF("sender_done:%d len:%d msg_comp:%d put_last_out:%d\n",
+					    sp->sender_all_done,
+					    sp->length,
+					    sp->msg_is_complete,
+					    sp->put_last_out);
+				}
+				atomic_subtract_int(&stcb->asoc.stream_queue_cnt, 1);
+				TAILQ_REMOVE(&strq->outqueue, sp, next);
+				sctp_free_remote_addr(sp->net);
+				if (sp->data) {
+					sctp_m_freem(sp->data);
+					sp->data = NULL;
+				}
+				sctp_free_a_strmoq(stcb, sp);
+				goto is_there_another;
+			} else {
+				unsent_data++;
+				continue;
+			}
+		}
+	}
+	SCTP_TCB_SEND_UNLOCK(stcb);
+	return (unsent_data);
+}
+
+static int
+sctp_process_init(struct sctp_init_chunk *cp, struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	struct sctp_init *init;
+	struct sctp_association *asoc;
+	struct sctp_nets *lnet;
+	unsigned int i;
+
+	init = &cp->init;
+	asoc = &stcb->asoc;
+	/* save off parameters */
+	asoc->peer_vtag = ntohl(init->initiate_tag);
+	asoc->peers_rwnd = ntohl(init->a_rwnd);
+	if (TAILQ_FIRST(&asoc->nets)) {
+		/* update any ssthresh's that may have a default */
+		TAILQ_FOREACH(lnet, &asoc->nets, sctp_next) {
+			lnet->ssthresh = asoc->peers_rwnd;
+
+			if (sctp_logging_level & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
+				sctp_log_cwnd(stcb, lnet, 0, SCTP_CWND_INITIALIZATION);
+			}
+		}
+	}
+	SCTP_TCB_SEND_LOCK(stcb);
+	if (asoc->pre_open_streams > ntohs(init->num_inbound_streams)) {
+		unsigned int newcnt;
+		struct sctp_stream_out *outs;
+		struct sctp_stream_queue_pending *sp;
+
+		/* cut back on number of streams */
+		newcnt = ntohs(init->num_inbound_streams);
+		/* This if is probably not needed but I am cautious */
+		if (asoc->strmout) {
+			/* First make sure no data chunks are trapped */
+			for (i = newcnt; i < asoc->pre_open_streams; i++) {
+				outs = &asoc->strmout[i];
+				sp = TAILQ_FIRST(&outs->outqueue);
+				while (sp) {
+					TAILQ_REMOVE(&outs->outqueue, sp,
+					    next);
+					asoc->stream_queue_cnt--;
+					sctp_ulp_notify(SCTP_NOTIFY_SPECIAL_SP_FAIL,
+					    stcb, SCTP_NOTIFY_DATAGRAM_UNSENT,
+					    sp, SCTP_SO_NOT_LOCKED);
+					if (sp->data) {
+						sctp_m_freem(sp->data);
+						sp->data = NULL;
+					}
+					sctp_free_remote_addr(sp->net);
+					sp->net = NULL;
+					/* Free the chunk */
+					SCTP_PRINTF("sp:%p tcb:%p weird free case\n",
+					    sp, stcb);
+
+					sctp_free_a_strmoq(stcb, sp);
+					/* sa_ignore FREED_MEMORY */
+					sp = TAILQ_FIRST(&outs->outqueue);
+				}
+			}
+		}
+		/* cut back the count and abandon the upper streams */
+		asoc->pre_open_streams = newcnt;
+	}
+	SCTP_TCB_SEND_UNLOCK(stcb);
+	asoc->streamoutcnt = asoc->pre_open_streams;
+	/* init tsn's */
+	asoc->highest_tsn_inside_map = asoc->asconf_seq_in = ntohl(init->initial_tsn) - 1;
+	if (sctp_logging_level & SCTP_MAP_LOGGING_ENABLE) {
+		sctp_log_map(0, 5, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT);
+	}
+	/* This is the next one we expect */
+	asoc->str_reset_seq_in = asoc->asconf_seq_in + 1;
+
+	asoc->mapping_array_base_tsn = ntohl(init->initial_tsn);
+	asoc->cumulative_tsn = asoc->asconf_seq_in;
+	asoc->last_echo_tsn = asoc->asconf_seq_in;
+	asoc->advanced_peer_ack_point = asoc->last_acked_seq;
+	/* open the requested streams */
+
+	if (asoc->strmin != NULL) {
+		/* Free the old ones */
+		struct sctp_queued_to_read *ctl;
+
+		for (i = 0; i < asoc->streamincnt; i++) {
+			ctl = TAILQ_FIRST(&asoc->strmin[i].inqueue);
+			while (ctl) {
+				TAILQ_REMOVE(&asoc->strmin[i].inqueue, ctl, next);
+				sctp_free_remote_addr(ctl->whoFrom);
+				ctl->whoFrom = NULL;
+				sctp_m_freem(ctl->data);
+				ctl->data = NULL;
+				sctp_free_a_readq(stcb, ctl);
+				ctl = TAILQ_FIRST(&asoc->strmin[i].inqueue);
+			}
+		}
+		SCTP_FREE(asoc->strmin, SCTP_M_STRMI);
+	}
+	asoc->streamincnt = ntohs(init->num_outbound_streams);
+	if (asoc->streamincnt > MAX_SCTP_STREAMS) {
+		asoc->streamincnt = MAX_SCTP_STREAMS;
+	}
+	SCTP_MALLOC(asoc->strmin, struct sctp_stream_in *, asoc->streamincnt *
+	    sizeof(struct sctp_stream_in), SCTP_M_STRMI);
+	if (asoc->strmin == NULL) {
+		/* we didn't get memory for the streams! */
+		SCTPDBG(SCTP_DEBUG_INPUT2, "process_init: couldn't get memory for the streams!\n");
+		return (-1);
+	}
+	for (i = 0; i < asoc->streamincnt; i++) {
+		asoc->strmin[i].stream_no = i;
+		asoc->strmin[i].last_sequence_delivered = 0xffff;
+		/*
+		 * U-stream ranges will be set when the cookie is unpacked.
+		 * Or for the INIT sender they are un set (if pr-sctp not
+		 * supported) when the INIT-ACK arrives.
+		 */
+		TAILQ_INIT(&asoc->strmin[i].inqueue);
+		asoc->strmin[i].delivery_started = 0;
+	}
+	/*
+	 * load_address_from_init will put the addresses into the
+	 * association when the COOKIE is processed or the INIT-ACK is
+	 * processed. Both types of COOKIE's existing and new call this
+	 * routine. It will remove addresses that are no longer in the
+	 * association (for the restarting case where addresses are
+	 * removed). Up front when the INIT arrives we will discard it if it
+	 * is a restart and new addresses have been added.
+	 */
+	/* sa_ignore MEMLEAK */
+	return (0);
+}
+
+/*
+ * INIT-ACK message processing/consumption returns value < 0 on error
+ */
+static int
+sctp_process_init_ack(struct mbuf *m, int iphlen, int offset,
+    struct sctphdr *sh, struct sctp_init_ack_chunk *cp, struct sctp_tcb *stcb,
+    struct sctp_nets *net, int *abort_no_unlock, uint32_t vrf_id)
+{
+	struct sctp_association *asoc;
+	struct mbuf *op_err;
+	int retval, abort_flag;
+	uint32_t initack_limit;
+
+	/* First verify that we have no illegal param's */
+	abort_flag = 0;
+	op_err = NULL;
+
+	op_err = sctp_arethere_unrecognized_parameters(m,
+	    (offset + sizeof(struct sctp_init_chunk)),
+	    &abort_flag, (struct sctp_chunkhdr *)cp);
+	if (abort_flag) {
+		/* Send an abort and notify peer */
+		sctp_abort_an_association(stcb->sctp_ep, stcb, SCTP_CAUSE_PROTOCOL_VIOLATION, op_err, SCTP_SO_NOT_LOCKED);
+		*abort_no_unlock = 1;
+		return (-1);
+	}
+	asoc = &stcb->asoc;
+	/* process the peer's parameters in the INIT-ACK */
+	retval = sctp_process_init((struct sctp_init_chunk *)cp, stcb, net);
+	if (retval < 0) {
+		return (retval);
+	}
+	initack_limit = offset + ntohs(cp->ch.chunk_length);
+	/* load all addresses */
+	if ((retval = sctp_load_addresses_from_init(stcb, m, iphlen,
+	    (offset + sizeof(struct sctp_init_chunk)), initack_limit, sh,
+	    NULL))) {
+		/* Huh, we should abort */
+		SCTPDBG(SCTP_DEBUG_INPUT1,
+		    "Load addresses from INIT causes an abort %d\n",
+		    retval);
+		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
+		    NULL, 0);
+		*abort_no_unlock = 1;
+		return (-1);
+	}
+	/* if the peer doesn't support asconf, flush the asconf queue */
+	if (asoc->peer_supports_asconf == 0) {
+		struct sctp_asconf_addr *aparam;
+
+		while (!TAILQ_EMPTY(&asoc->asconf_queue)) {
+			/* sa_ignore FREED_MEMORY */
+			aparam = TAILQ_FIRST(&asoc->asconf_queue);
+			TAILQ_REMOVE(&asoc->asconf_queue, aparam, next);
+			SCTP_FREE(aparam, SCTP_M_ASC_ADDR);
+		}
+	}
+	stcb->asoc.peer_hmac_id = sctp_negotiate_hmacid(stcb->asoc.peer_hmacs,
+	    stcb->asoc.local_hmacs);
+	if (op_err) {
+		sctp_queue_op_err(stcb, op_err);
+		/* queuing will steal away the mbuf chain to the out queue */
+		op_err = NULL;
+	}
+	/* extract the cookie and queue it to "echo" it back... */
+	if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+		sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+		    stcb->asoc.overall_error_count,
+		    0,
+		    SCTP_FROM_SCTP_INPUT,
+		    __LINE__);
+	}
+	stcb->asoc.overall_error_count = 0;
+	net->error_count = 0;
+
+	/*
+	 * Cancel the INIT timer, We do this first before queueing the
+	 * cookie. We always cancel at the primary to assue that we are
+	 * canceling the timer started by the INIT which always goes to the
+	 * primary.
+	 */
+	sctp_timer_stop(SCTP_TIMER_TYPE_INIT, stcb->sctp_ep, stcb,
+	    asoc->primary_destination, SCTP_FROM_SCTP_INPUT + SCTP_LOC_4);
+
+	/* calculate the RTO */
+	net->RTO = sctp_calculate_rto(stcb, asoc, net, &asoc->time_entered, sctp_align_safe_nocopy);
+
+	retval = sctp_send_cookie_echo(m, offset, stcb, net);
+	if (retval < 0) {
+		/*
+		 * No cookie, we probably should send a op error. But in any
+		 * case if there is no cookie in the INIT-ACK, we can
+		 * abandon the peer, its broke.
+		 */
+		if (retval == -3) {
+			/* We abort with an error of missing mandatory param */
+			op_err =
+			    sctp_generate_invmanparam(SCTP_CAUSE_MISSING_PARAM);
+			if (op_err) {
+				/*
+				 * Expand beyond to include the mandatory
+				 * param cookie
+				 */
+				struct sctp_inv_mandatory_param *mp;
+
+				SCTP_BUF_LEN(op_err) =
+				    sizeof(struct sctp_inv_mandatory_param);
+				mp = mtod(op_err,
+				    struct sctp_inv_mandatory_param *);
+				/* Subtract the reserved param */
+				mp->length =
+				    htons(sizeof(struct sctp_inv_mandatory_param) - 2);
+				mp->num_param = htonl(1);
+				mp->param = htons(SCTP_STATE_COOKIE);
+				mp->resv = 0;
+			}
+			sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
+			    sh, op_err, 0);
+			*abort_no_unlock = 1;
+		}
+		return (retval);
+	}
+	return (0);
+}
+
+static void
+sctp_handle_heartbeat_ack(struct sctp_heartbeat_chunk *cp,
+    struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	struct sockaddr_storage store;
+	struct sockaddr_in *sin;
+	struct sockaddr_in6 *sin6;
+	struct sctp_nets *r_net;
+	struct timeval tv;
+	int req_prim = 0;
+
+	if (ntohs(cp->ch.chunk_length) != sizeof(struct sctp_heartbeat_chunk)) {
+		/* Invalid length */
+		return;
+	}
+	sin = (struct sockaddr_in *)&store;
+	sin6 = (struct sockaddr_in6 *)&store;
+
+	memset(&store, 0, sizeof(store));
+	if (cp->heartbeat.hb_info.addr_family == AF_INET &&
+	    cp->heartbeat.hb_info.addr_len == sizeof(struct sockaddr_in)) {
+		sin->sin_family = cp->heartbeat.hb_info.addr_family;
+		sin->sin_len = cp->heartbeat.hb_info.addr_len;
+		sin->sin_port = stcb->rport;
+		memcpy(&sin->sin_addr, cp->heartbeat.hb_info.address,
+		    sizeof(sin->sin_addr));
+	} else if (cp->heartbeat.hb_info.addr_family == AF_INET6 &&
+	    cp->heartbeat.hb_info.addr_len == sizeof(struct sockaddr_in6)) {
+		sin6->sin6_family = cp->heartbeat.hb_info.addr_family;
+		sin6->sin6_len = cp->heartbeat.hb_info.addr_len;
+		sin6->sin6_port = stcb->rport;
+		memcpy(&sin6->sin6_addr, cp->heartbeat.hb_info.address,
+		    sizeof(sin6->sin6_addr));
+	} else {
+		return;
+	}
+	r_net = sctp_findnet(stcb, (struct sockaddr *)sin);
+	if (r_net == NULL) {
+		SCTPDBG(SCTP_DEBUG_INPUT1, "Huh? I can't find the address I sent it to, discard\n");
+		return;
+	}
+	if ((r_net && (r_net->dest_state & SCTP_ADDR_UNCONFIRMED)) &&
+	    (r_net->heartbeat_random1 == cp->heartbeat.hb_info.random_value1) &&
+	    (r_net->heartbeat_random2 == cp->heartbeat.hb_info.random_value2)) {
+		/*
+		 * If the its a HB and it's random value is correct when can
+		 * confirm the destination.
+		 */
+		r_net->dest_state &= ~SCTP_ADDR_UNCONFIRMED;
+		if (r_net->dest_state & SCTP_ADDR_REQ_PRIMARY) {
+			stcb->asoc.primary_destination = r_net;
+			r_net->dest_state &= ~SCTP_ADDR_WAS_PRIMARY;
+			r_net->dest_state &= ~SCTP_ADDR_REQ_PRIMARY;
+			r_net = TAILQ_FIRST(&stcb->asoc.nets);
+			if (r_net != stcb->asoc.primary_destination) {
+				/*
+				 * first one on the list is NOT the primary
+				 * sctp_cmpaddr() is much more efficent if
+				 * the primary is the first on the list,
+				 * make it so.
+				 */
+				TAILQ_REMOVE(&stcb->asoc.nets, stcb->asoc.primary_destination, sctp_next);
+				TAILQ_INSERT_HEAD(&stcb->asoc.nets, stcb->asoc.primary_destination, sctp_next);
+			}
+			req_prim = 1;
+		}
+		sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED,
+		    stcb, 0, (void *)r_net, SCTP_SO_NOT_LOCKED);
+	}
+	r_net->error_count = 0;
+	r_net->hb_responded = 1;
+	tv.tv_sec = cp->heartbeat.hb_info.time_value_1;
+	tv.tv_usec = cp->heartbeat.hb_info.time_value_2;
+	if (r_net->dest_state & SCTP_ADDR_NOT_REACHABLE) {
+		r_net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE;
+		r_net->dest_state |= SCTP_ADDR_REACHABLE;
+		sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
+		    SCTP_HEARTBEAT_SUCCESS, (void *)r_net, SCTP_SO_NOT_LOCKED);
+		/* now was it the primary? if so restore */
+		if (r_net->dest_state & SCTP_ADDR_WAS_PRIMARY) {
+			(void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, r_net);
+		}
+	}
+	/*
+	 * JRS 5/14/07 - If CMT PF is on and the destination is in PF state,
+	 * set the destination to active state and set the cwnd to one or
+	 * two MTU's based on whether PF1 or PF2 is being used. If a T3
+	 * timer is running, for the destination, stop the timer because a
+	 * PF-heartbeat was received.
+	 */
+	if (sctp_cmt_on_off && sctp_cmt_pf && (net->dest_state & SCTP_ADDR_PF) ==
+	    SCTP_ADDR_PF) {
+		if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
+			sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+			    stcb, net,
+			    SCTP_FROM_SCTP_INPUT + SCTP_LOC_5);
+		}
+		net->dest_state &= ~SCTP_ADDR_PF;
+		net->cwnd = net->mtu * sctp_cmt_pf;
+		SCTPDBG(SCTP_DEBUG_INPUT1, "Destination %p moved from PF to reachable with cwnd %d.\n",
+		    net, net->cwnd);
+	}
+	/* Now lets do a RTO with this */
+	r_net->RTO = sctp_calculate_rto(stcb, &stcb->asoc, r_net, &tv, sctp_align_safe_nocopy);
+	/* Mobility adaptation */
+	if (req_prim) {
+		if ((sctp_is_mobility_feature_on(stcb->sctp_ep,
+		    SCTP_MOBILITY_BASE) ||
+		    sctp_is_mobility_feature_on(stcb->sctp_ep,
+		    SCTP_MOBILITY_FASTHANDOFF)) &&
+		    sctp_is_mobility_feature_on(stcb->sctp_ep,
+		    SCTP_MOBILITY_PRIM_DELETED)) {
+
+			sctp_timer_stop(SCTP_TIMER_TYPE_PRIM_DELETED, stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_TIMER + SCTP_LOC_7);
+			if (sctp_is_mobility_feature_on(stcb->sctp_ep,
+			    SCTP_MOBILITY_FASTHANDOFF)) {
+				sctp_assoc_immediate_retrans(stcb,
+				    stcb->asoc.primary_destination);
+			}
+			if (sctp_is_mobility_feature_on(stcb->sctp_ep,
+			    SCTP_MOBILITY_BASE)) {
+				sctp_move_chunks_from_deleted_prim(stcb,
+				    stcb->asoc.primary_destination);
+			}
+			sctp_delete_prim_timer(stcb->sctp_ep, stcb,
+			    stcb->asoc.deleted_primary);
+		}
+	}
+}
+
+static void
+sctp_handle_abort(struct sctp_abort_chunk *cp,
+    struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	struct socket *so;
+
+#endif
+
+	SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_abort: handling ABORT\n");
+	if (stcb == NULL)
+		return;
+
+	/* stop any receive timers */
+	sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_6);
+	/* notify user of the abort and clean up... */
+	sctp_abort_notification(stcb, 0, SCTP_SO_NOT_LOCKED);
+	/* free the tcb */
+#if defined(SCTP_PANIC_ON_ABORT)
+	printf("stcb:%p state:%d rport:%d net:%p\n",
+	    stcb, stcb->asoc.state, stcb->rport, net);
+	if (!(stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET)) {
+		panic("Received an ABORT");
+	} else {
+		printf("No panic its in state %x closed\n", stcb->asoc.state);
+	}
+#endif
+	SCTP_STAT_INCR_COUNTER32(sctps_aborted);
+	if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
+	    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+		SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+	}
+#ifdef SCTP_ASOCLOG_OF_TSNS
+	sctp_print_out_track_log(stcb);
+#endif
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	so = SCTP_INP_SO(stcb->sctp_ep);
+	atomic_add_int(&stcb->asoc.refcnt, 1);
+	SCTP_TCB_UNLOCK(stcb);
+	SCTP_SOCKET_LOCK(so, 1);
+	SCTP_TCB_LOCK(stcb);
+	atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+	stcb->asoc.state |= SCTP_STATE_WAS_ABORTED;
+	(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
+	    SCTP_FROM_SCTP_INPUT + SCTP_LOC_6);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+	SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_abort: finished\n");
+}
+
+static void
+sctp_handle_shutdown(struct sctp_shutdown_chunk *cp,
+    struct sctp_tcb *stcb, struct sctp_nets *net, int *abort_flag)
+{
+	struct sctp_association *asoc;
+	int some_on_streamwheel;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	struct socket *so;
+
+#endif
+
+	SCTPDBG(SCTP_DEBUG_INPUT2,
+	    "sctp_handle_shutdown: handling SHUTDOWN\n");
+	if (stcb == NULL)
+		return;
+	asoc = &stcb->asoc;
+	if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
+	    (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+		return;
+	}
+	if (ntohs(cp->ch.chunk_length) != sizeof(struct sctp_shutdown_chunk)) {
+		/* Shutdown NOT the expected size */
+		return;
+	} else {
+		sctp_update_acked(stcb, cp, net, abort_flag);
+	}
+	if (asoc->control_pdapi) {
+		/*
+		 * With a normal shutdown we assume the end of last record.
+		 */
+		SCTP_INP_READ_LOCK(stcb->sctp_ep);
+		asoc->control_pdapi->end_added = 1;
+		asoc->control_pdapi->pdapi_aborted = 1;
+		asoc->control_pdapi = NULL;
+		SCTP_INP_READ_UNLOCK(stcb->sctp_ep);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		so = SCTP_INP_SO(stcb->sctp_ep);
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_SOCKET_LOCK(so, 1);
+		SCTP_TCB_LOCK(stcb);
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+		if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+			/* assoc was freed while we were unlocked */
+			SCTP_SOCKET_UNLOCK(so, 1);
+			return;
+		}
+#endif
+		sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+	}
+	/* goto SHUTDOWN_RECEIVED state to block new requests */
+	if (stcb->sctp_socket) {
+		if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+		    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT) &&
+		    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT)) {
+			SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_RECEIVED);
+			SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+			/*
+			 * notify upper layer that peer has initiated a
+			 * shutdown
+			 */
+			sctp_ulp_notify(SCTP_NOTIFY_PEER_SHUTDOWN, stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
+
+			/* reset time */
+			(void)SCTP_GETTIME_TIMEVAL(&asoc->time_entered);
+		}
+	}
+	if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) {
+		/*
+		 * stop the shutdown timer, since we WILL move to
+		 * SHUTDOWN-ACK-SENT.
+		 */
+		sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_8);
+	}
+	/* Now is there unsent data on a stream somewhere? */
+	some_on_streamwheel = sctp_is_there_unsent_data(stcb);
+
+	if (!TAILQ_EMPTY(&asoc->send_queue) ||
+	    !TAILQ_EMPTY(&asoc->sent_queue) ||
+	    some_on_streamwheel) {
+		/* By returning we will push more data out */
+		return;
+	} else {
+		/* no outstanding data to send, so move on... */
+		/* send SHUTDOWN-ACK */
+		sctp_send_shutdown_ack(stcb, stcb->asoc.primary_destination);
+		/* move to SHUTDOWN-ACK-SENT state */
+		if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+		    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+			SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+		}
+		SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT);
+		SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+		sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, net,
+		    SCTP_FROM_SCTP_INPUT + SCTP_LOC_7);
+		/* start SHUTDOWN timer */
+		sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK, stcb->sctp_ep,
+		    stcb, net);
+	}
+}
+
+static void
+sctp_handle_shutdown_ack(struct sctp_shutdown_ack_chunk *cp,
+    struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	struct sctp_association *asoc;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	struct socket *so;
+
+	so = SCTP_INP_SO(stcb->sctp_ep);
+#endif
+	SCTPDBG(SCTP_DEBUG_INPUT2,
+	    "sctp_handle_shutdown_ack: handling SHUTDOWN ACK\n");
+	if (stcb == NULL)
+		return;
+
+	asoc = &stcb->asoc;
+	/* process according to association state */
+	if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
+	    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+		/* unexpected SHUTDOWN-ACK... so ignore... */
+		SCTP_TCB_UNLOCK(stcb);
+		return;
+	}
+	if (asoc->control_pdapi) {
+		/*
+		 * With a normal shutdown we assume the end of last record.
+		 */
+		SCTP_INP_READ_LOCK(stcb->sctp_ep);
+		asoc->control_pdapi->end_added = 1;
+		asoc->control_pdapi->pdapi_aborted = 1;
+		asoc->control_pdapi = NULL;
+		SCTP_INP_READ_UNLOCK(stcb->sctp_ep);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_SOCKET_LOCK(so, 1);
+		SCTP_TCB_LOCK(stcb);
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+		if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+			/* assoc was freed while we were unlocked */
+			SCTP_SOCKET_UNLOCK(so, 1);
+			return;
+		}
+#endif
+		sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+	}
+	/* are the queues empty? */
+	if (!TAILQ_EMPTY(&asoc->send_queue) ||
+	    !TAILQ_EMPTY(&asoc->sent_queue) ||
+	    !TAILQ_EMPTY(&asoc->out_wheel)) {
+		sctp_report_all_outbound(stcb, 0, SCTP_SO_NOT_LOCKED);
+	}
+	/* stop the timer */
+	sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_9);
+	/* send SHUTDOWN-COMPLETE */
+	sctp_send_shutdown_complete(stcb, net);
+	/* notify upper layer protocol */
+	if (stcb->sctp_socket) {
+		sctp_ulp_notify(SCTP_NOTIFY_ASSOC_DOWN, stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
+		if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+		    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+			/* Set the connected flag to disconnected */
+			stcb->sctp_ep->sctp_socket->so_snd.sb_cc = 0;
+		}
+	}
+	SCTP_STAT_INCR_COUNTER32(sctps_shutdown);
+	/* free the TCB but first save off the ep */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	atomic_add_int(&stcb->asoc.refcnt, 1);
+	SCTP_TCB_UNLOCK(stcb);
+	SCTP_SOCKET_LOCK(so, 1);
+	SCTP_TCB_LOCK(stcb);
+	atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+	(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
+	    SCTP_FROM_SCTP_INPUT + SCTP_LOC_10);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+}
+
+/*
+ * Skip past the param header and then we will find the chunk that caused the
+ * problem. There are two possiblities ASCONF or FWD-TSN other than that and
+ * our peer must be broken.
+ */
+static void
+sctp_process_unrecog_chunk(struct sctp_tcb *stcb, struct sctp_paramhdr *phdr,
+    struct sctp_nets *net)
+{
+	struct sctp_chunkhdr *chk;
+
+	chk = (struct sctp_chunkhdr *)((caddr_t)phdr + sizeof(*phdr));
+	switch (chk->chunk_type) {
+	case SCTP_ASCONF_ACK:
+	case SCTP_ASCONF:
+		sctp_asconf_cleanup(stcb, net);
+		break;
+	case SCTP_FORWARD_CUM_TSN:
+		stcb->asoc.peer_supports_prsctp = 0;
+		break;
+	default:
+		SCTPDBG(SCTP_DEBUG_INPUT2,
+		    "Peer does not support chunk type %d(%x)??\n",
+		    chk->chunk_type, (uint32_t) chk->chunk_type);
+		break;
+	}
+}
+
+/*
+ * Skip past the param header and then we will find the param that caused the
+ * problem.  There are a number of param's in a ASCONF OR the prsctp param
+ * these will turn of specific features.
+ */
+static void
+sctp_process_unrecog_param(struct sctp_tcb *stcb, struct sctp_paramhdr *phdr)
+{
+	struct sctp_paramhdr *pbad;
+
+	pbad = phdr + 1;
+	switch (ntohs(pbad->param_type)) {
+		/* pr-sctp draft */
+	case SCTP_PRSCTP_SUPPORTED:
+		stcb->asoc.peer_supports_prsctp = 0;
+		break;
+	case SCTP_SUPPORTED_CHUNK_EXT:
+		break;
+		/* draft-ietf-tsvwg-addip-sctp */
+	case SCTP_ECN_NONCE_SUPPORTED:
+		stcb->asoc.peer_supports_ecn_nonce = 0;
+		stcb->asoc.ecn_nonce_allowed = 0;
+		stcb->asoc.ecn_allowed = 0;
+		break;
+	case SCTP_ADD_IP_ADDRESS:
+	case SCTP_DEL_IP_ADDRESS:
+	case SCTP_SET_PRIM_ADDR:
+		stcb->asoc.peer_supports_asconf = 0;
+		break;
+	case SCTP_SUCCESS_REPORT:
+	case SCTP_ERROR_CAUSE_IND:
+		SCTPDBG(SCTP_DEBUG_INPUT2, "Huh, the peer does not support success? or error cause?\n");
+		SCTPDBG(SCTP_DEBUG_INPUT2,
+		    "Turning off ASCONF to this strange peer\n");
+		stcb->asoc.peer_supports_asconf = 0;
+		break;
+	default:
+		SCTPDBG(SCTP_DEBUG_INPUT2,
+		    "Peer does not support param type %d(%x)??\n",
+		    pbad->param_type, (uint32_t) pbad->param_type);
+		break;
+	}
+}
+
+static int
+sctp_handle_error(struct sctp_chunkhdr *ch,
+    struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	int chklen;
+	struct sctp_paramhdr *phdr;
+	uint16_t error_type;
+	uint16_t error_len;
+	struct sctp_association *asoc;
+	int adjust;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	struct socket *so;
+
+#endif
+
+	/* parse through all of the errors and process */
+	asoc = &stcb->asoc;
+	phdr = (struct sctp_paramhdr *)((caddr_t)ch +
+	    sizeof(struct sctp_chunkhdr));
+	chklen = ntohs(ch->chunk_length) - sizeof(struct sctp_chunkhdr);
+	while ((size_t)chklen >= sizeof(struct sctp_paramhdr)) {
+		/* Process an Error Cause */
+		error_type = ntohs(phdr->param_type);
+		error_len = ntohs(phdr->param_length);
+		if ((error_len > chklen) || (error_len == 0)) {
+			/* invalid param length for this param */
+			SCTPDBG(SCTP_DEBUG_INPUT1, "Bogus length in error param- chunk left:%d errorlen:%d\n",
+			    chklen, error_len);
+			return (0);
+		}
+		switch (error_type) {
+		case SCTP_CAUSE_INVALID_STREAM:
+		case SCTP_CAUSE_MISSING_PARAM:
+		case SCTP_CAUSE_INVALID_PARAM:
+		case SCTP_CAUSE_NO_USER_DATA:
+			SCTPDBG(SCTP_DEBUG_INPUT1, "Software error we got a %d back? We have a bug :/ (or do they?)\n",
+			    error_type);
+			break;
+		case SCTP_CAUSE_STALE_COOKIE:
+			/*
+			 * We only act if we have echoed a cookie and are
+			 * waiting.
+			 */
+			if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED) {
+				int *p;
+
+				p = (int *)((caddr_t)phdr + sizeof(*phdr));
+				/* Save the time doubled */
+				asoc->cookie_preserve_req = ntohl(*p) << 1;
+				asoc->stale_cookie_count++;
+				if (asoc->stale_cookie_count >
+				    asoc->max_init_times) {
+					sctp_abort_notification(stcb, 0, SCTP_SO_NOT_LOCKED);
+					/* now free the asoc */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+					so = SCTP_INP_SO(stcb->sctp_ep);
+					atomic_add_int(&stcb->asoc.refcnt, 1);
+					SCTP_TCB_UNLOCK(stcb);
+					SCTP_SOCKET_LOCK(so, 1);
+					SCTP_TCB_LOCK(stcb);
+					atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+					(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
+					    SCTP_FROM_SCTP_INPUT + SCTP_LOC_11);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+					SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+					return (-1);
+				}
+				/* blast back to INIT state */
+				asoc->state &= ~SCTP_STATE_COOKIE_ECHOED;
+				asoc->state |= SCTP_STATE_COOKIE_WAIT;
+
+				sctp_stop_all_cookie_timers(stcb);
+				sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
+			}
+			break;
+		case SCTP_CAUSE_UNRESOLVABLE_ADDR:
+			/*
+			 * Nothing we can do here, we don't do hostname
+			 * addresses so if the peer does not like my IPv6
+			 * (or IPv4 for that matter) it does not matter. If
+			 * they don't support that type of address, they can
+			 * NOT possibly get that packet type... i.e. with no
+			 * IPv6 you can't recieve a IPv6 packet. so we can
+			 * safely ignore this one. If we ever added support
+			 * for HOSTNAME Addresses, then we would need to do
+			 * something here.
+			 */
+			break;
+		case SCTP_CAUSE_UNRECOG_CHUNK:
+			sctp_process_unrecog_chunk(stcb, phdr, net);
+			break;
+		case SCTP_CAUSE_UNRECOG_PARAM:
+			sctp_process_unrecog_param(stcb, phdr);
+			break;
+		case SCTP_CAUSE_COOKIE_IN_SHUTDOWN:
+			/*
+			 * We ignore this since the timer will drive out a
+			 * new cookie anyway and there timer will drive us
+			 * to send a SHUTDOWN_COMPLETE. We can't send one
+			 * here since we don't have their tag.
+			 */
+			break;
+		case SCTP_CAUSE_DELETING_LAST_ADDR:
+		case SCTP_CAUSE_RESOURCE_SHORTAGE:
+		case SCTP_CAUSE_DELETING_SRC_ADDR:
+			/*
+			 * We should NOT get these here, but in a
+			 * ASCONF-ACK.
+			 */
+			SCTPDBG(SCTP_DEBUG_INPUT2, "Peer sends ASCONF errors in a Operational Error?<%d>?\n",
+			    error_type);
+			break;
+		case SCTP_CAUSE_OUT_OF_RESC:
+			/*
+			 * And what, pray tell do we do with the fact that
+			 * the peer is out of resources? Not really sure we
+			 * could do anything but abort. I suspect this
+			 * should have came WITH an abort instead of in a
+			 * OP-ERROR.
+			 */
+			break;
+		default:
+			SCTPDBG(SCTP_DEBUG_INPUT1, "sctp_handle_error: unknown error type = 0x%xh\n",
+			    error_type);
+			break;
+		}
+		adjust = SCTP_SIZE32(error_len);
+		chklen -= adjust;
+		phdr = (struct sctp_paramhdr *)((caddr_t)phdr + adjust);
+	}
+	return (0);
+}
+
+static int
+sctp_handle_init_ack(struct mbuf *m, int iphlen, int offset,
+    struct sctphdr *sh, struct sctp_init_ack_chunk *cp, struct sctp_tcb *stcb,
+    struct sctp_nets *net, int *abort_no_unlock, uint32_t vrf_id)
+{
+	struct sctp_init_ack *init_ack;
+	struct mbuf *op_err;
+
+	SCTPDBG(SCTP_DEBUG_INPUT2,
+	    "sctp_handle_init_ack: handling INIT-ACK\n");
+
+	if (stcb == NULL) {
+		SCTPDBG(SCTP_DEBUG_INPUT2,
+		    "sctp_handle_init_ack: TCB is null\n");
+		return (-1);
+	}
+	if (ntohs(cp->ch.chunk_length) < sizeof(struct sctp_init_ack_chunk)) {
+		/* Invalid length */
+		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
+		    op_err, 0);
+		*abort_no_unlock = 1;
+		return (-1);
+	}
+	init_ack = &cp->init;
+	/* validate parameters */
+	if (init_ack->initiate_tag == 0) {
+		/* protocol error... send an abort */
+		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
+		    op_err, 0);
+		*abort_no_unlock = 1;
+		return (-1);
+	}
+	if (ntohl(init_ack->a_rwnd) < SCTP_MIN_RWND) {
+		/* protocol error... send an abort */
+		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
+		    op_err, 0);
+		*abort_no_unlock = 1;
+		return (-1);
+	}
+	if (init_ack->num_inbound_streams == 0) {
+		/* protocol error... send an abort */
+		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
+		    op_err, 0);
+		*abort_no_unlock = 1;
+		return (-1);
+	}
+	if (init_ack->num_outbound_streams == 0) {
+		/* protocol error... send an abort */
+		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
+		    op_err, 0);
+		*abort_no_unlock = 1;
+		return (-1);
+	}
+	/* process according to association state... */
+	switch (stcb->asoc.state & SCTP_STATE_MASK) {
+	case SCTP_STATE_COOKIE_WAIT:
+		/* this is the expected state for this chunk */
+		/* process the INIT-ACK parameters */
+		if (stcb->asoc.primary_destination->dest_state &
+		    SCTP_ADDR_UNCONFIRMED) {
+			/*
+			 * The primary is where we sent the INIT, we can
+			 * always consider it confirmed when the INIT-ACK is
+			 * returned. Do this before we load addresses
+			 * though.
+			 */
+			stcb->asoc.primary_destination->dest_state &=
+			    ~SCTP_ADDR_UNCONFIRMED;
+			sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED,
+			    stcb, 0, (void *)stcb->asoc.primary_destination, SCTP_SO_NOT_LOCKED);
+		}
+		if (sctp_process_init_ack(m, iphlen, offset, sh, cp, stcb,
+		    net, abort_no_unlock, vrf_id) < 0) {
+			/* error in parsing parameters */
+			return (-1);
+		}
+		/* update our state */
+		SCTPDBG(SCTP_DEBUG_INPUT2, "moving to COOKIE-ECHOED state\n");
+		SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_COOKIE_ECHOED);
+
+		/* reset the RTO calc */
+		if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+			sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+			    stcb->asoc.overall_error_count,
+			    0,
+			    SCTP_FROM_SCTP_INPUT,
+			    __LINE__);
+		}
+		stcb->asoc.overall_error_count = 0;
+		(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
+		/*
+		 * collapse the init timer back in case of a exponential
+		 * backoff
+		 */
+		sctp_timer_start(SCTP_TIMER_TYPE_COOKIE, stcb->sctp_ep,
+		    stcb, net);
+		/*
+		 * the send at the end of the inbound data processing will
+		 * cause the cookie to be sent
+		 */
+		break;
+	case SCTP_STATE_SHUTDOWN_SENT:
+		/* incorrect state... discard */
+		break;
+	case SCTP_STATE_COOKIE_ECHOED:
+		/* incorrect state... discard */
+		break;
+	case SCTP_STATE_OPEN:
+		/* incorrect state... discard */
+		break;
+	case SCTP_STATE_EMPTY:
+	case SCTP_STATE_INUSE:
+	default:
+		/* incorrect state... discard */
+		return (-1);
+		break;
+	}
+	SCTPDBG(SCTP_DEBUG_INPUT1, "Leaving handle-init-ack end\n");
+	return (0);
+}
+
+
+/*
+ * handle a state cookie for an existing association m: input packet mbuf
+ * chain-- assumes a pullup on IP/SCTP/COOKIE-ECHO chunk note: this is a
+ * "split" mbuf and the cookie signature does not exist offset: offset into
+ * mbuf to the cookie-echo chunk
+ */
+static struct sctp_tcb *
+sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
+    struct sctphdr *sh, struct sctp_state_cookie *cookie, int cookie_len,
+    struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets *net,
+    struct sockaddr *init_src, int *notification, sctp_assoc_t * sac_assoc_id,
+    uint32_t vrf_id)
+{
+	struct sctp_association *asoc;
+	struct sctp_init_chunk *init_cp, init_buf;
+	struct sctp_init_ack_chunk *initack_cp, initack_buf;
+	int chk_length;
+	int init_offset, initack_offset, i;
+	int retval;
+	int spec_flag = 0;
+	uint32_t how_indx;
+
+	/* I know that the TCB is non-NULL from the caller */
+	asoc = &stcb->asoc;
+	for (how_indx = 0; how_indx < sizeof(asoc->cookie_how); how_indx++) {
+		if (asoc->cookie_how[how_indx] == 0)
+			break;
+	}
+	if (how_indx < sizeof(asoc->cookie_how)) {
+		asoc->cookie_how[how_indx] = 1;
+	}
+	if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) {
+		/* SHUTDOWN came in after sending INIT-ACK */
+		struct mbuf *op_err;
+		struct sctp_paramhdr *ph;
+
+		sctp_send_shutdown_ack(stcb, stcb->asoc.primary_destination);
+		op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr),
+		    0, M_DONTWAIT, 1, MT_DATA);
+		if (op_err == NULL) {
+			/* FOOBAR */
+			return (NULL);
+		}
+		/* pre-reserve some space */
+		SCTP_BUF_RESV_UF(op_err, sizeof(struct ip6_hdr));
+		SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr));
+		SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
+		/* Set the len */
+		SCTP_BUF_LEN(op_err) = sizeof(struct sctp_paramhdr);
+		ph = mtod(op_err, struct sctp_paramhdr *);
+		ph->param_type = htons(SCTP_CAUSE_COOKIE_IN_SHUTDOWN);
+		ph->param_length = htons(sizeof(struct sctp_paramhdr));
+		sctp_send_operr_to(m, iphlen, op_err, cookie->peers_vtag,
+		    vrf_id);
+		if (how_indx < sizeof(asoc->cookie_how))
+			asoc->cookie_how[how_indx] = 2;
+		return (NULL);
+	}
+	/*
+	 * find and validate the INIT chunk in the cookie (peer's info) the
+	 * INIT should start after the cookie-echo header struct (chunk
+	 * header, state cookie header struct)
+	 */
+	init_offset = offset += sizeof(struct sctp_cookie_echo_chunk);
+
+	init_cp = (struct sctp_init_chunk *)
+	    sctp_m_getptr(m, init_offset, sizeof(struct sctp_init_chunk),
+	    (uint8_t *) & init_buf);
+	if (init_cp == NULL) {
+		/* could not pull a INIT chunk in cookie */
+		return (NULL);
+	}
+	chk_length = ntohs(init_cp->ch.chunk_length);
+	if (init_cp->ch.chunk_type != SCTP_INITIATION) {
+		return (NULL);
+	}
+	/*
+	 * find and validate the INIT-ACK chunk in the cookie (my info) the
+	 * INIT-ACK follows the INIT chunk
+	 */
+	initack_offset = init_offset + SCTP_SIZE32(chk_length);
+	initack_cp = (struct sctp_init_ack_chunk *)
+	    sctp_m_getptr(m, initack_offset, sizeof(struct sctp_init_ack_chunk),
+	    (uint8_t *) & initack_buf);
+	if (initack_cp == NULL) {
+		/* could not pull INIT-ACK chunk in cookie */
+		return (NULL);
+	}
+	chk_length = ntohs(initack_cp->ch.chunk_length);
+	if (initack_cp->ch.chunk_type != SCTP_INITIATION_ACK) {
+		return (NULL);
+	}
+	if ((ntohl(initack_cp->init.initiate_tag) == asoc->my_vtag) &&
+	    (ntohl(init_cp->init.initiate_tag) == asoc->peer_vtag)) {
+		/*
+		 * case D in Section 5.2.4 Table 2: MMAA process accordingly
+		 * to get into the OPEN state
+		 */
+		if (ntohl(initack_cp->init.initial_tsn) != asoc->init_seq_number) {
+			/*-
+			 * Opps, this means that we somehow generated two vtag's
+			 * the same. I.e. we did:
+			 *  Us               Peer
+			 *   <---INIT(tag=a)------
+			 *   ----INIT-ACK(tag=t)-->
+			 *   ----INIT(tag=t)------> *1
+			 *   <---INIT-ACK(tag=a)---
+                         *   <----CE(tag=t)------------- *2
+			 *
+			 * At point *1 we should be generating a different
+			 * tag t'. Which means we would throw away the CE and send
+			 * ours instead. Basically this is case C (throw away side).
+			 */
+			if (how_indx < sizeof(asoc->cookie_how))
+				asoc->cookie_how[how_indx] = 17;
+			return (NULL);
+
+		}
+		switch SCTP_GET_STATE
+			(asoc) {
+		case SCTP_STATE_COOKIE_WAIT:
+		case SCTP_STATE_COOKIE_ECHOED:
+			/*
+			 * INIT was sent but got a COOKIE_ECHO with the
+			 * correct tags... just accept it...but we must
+			 * process the init so that we can make sure we have
+			 * the right seq no's.
+			 */
+			/* First we must process the INIT !! */
+			retval = sctp_process_init(init_cp, stcb, net);
+			if (retval < 0) {
+				if (how_indx < sizeof(asoc->cookie_how))
+					asoc->cookie_how[how_indx] = 3;
+				return (NULL);
+			}
+			/* we have already processed the INIT so no problem */
+			sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb,
+			    net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_12);
+			sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_13);
+			/* update current state */
+			if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)
+				SCTP_STAT_INCR_COUNTER32(sctps_activeestab);
+			else
+				SCTP_STAT_INCR_COUNTER32(sctps_collisionestab);
+
+			SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+			if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
+				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+				    stcb->sctp_ep, stcb, asoc->primary_destination);
+			}
+			SCTP_STAT_INCR_GAUGE32(sctps_currestab);
+			sctp_stop_all_cookie_timers(stcb);
+			if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+			    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
+			    (inp->sctp_socket->so_qlimit == 0)
+			    ) {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+				struct socket *so;
+
+#endif
+				/*
+				 * Here is where collision would go if we
+				 * did a connect() and instead got a
+				 * init/init-ack/cookie done before the
+				 * init-ack came back..
+				 */
+				stcb->sctp_ep->sctp_flags |=
+				    SCTP_PCB_FLAGS_CONNECTED;
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+				so = SCTP_INP_SO(stcb->sctp_ep);
+				atomic_add_int(&stcb->asoc.refcnt, 1);
+				SCTP_TCB_UNLOCK(stcb);
+				SCTP_SOCKET_LOCK(so, 1);
+				SCTP_TCB_LOCK(stcb);
+				atomic_add_int(&stcb->asoc.refcnt, -1);
+				if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+					SCTP_SOCKET_UNLOCK(so, 1);
+					return (NULL);
+				}
+#endif
+				soisconnected(stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+				SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+			}
+			/* notify upper layer */
+			*notification = SCTP_NOTIFY_ASSOC_UP;
+			/*
+			 * since we did not send a HB make sure we don't
+			 * double things
+			 */
+			net->hb_responded = 1;
+			net->RTO = sctp_calculate_rto(stcb, asoc, net,
+			    &cookie->time_entered, sctp_align_unsafe_makecopy);
+
+			if (stcb->asoc.sctp_autoclose_ticks &&
+			    (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE))) {
+				sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE,
+				    inp, stcb, NULL);
+			}
+			break;
+		default:
+			/*
+			 * we're in the OPEN state (or beyond), so peer must
+			 * have simply lost the COOKIE-ACK
+			 */
+			break;
+			}	/* end switch */
+		sctp_stop_all_cookie_timers(stcb);
+		/*
+		 * We ignore the return code here.. not sure if we should
+		 * somehow abort.. but we do have an existing asoc. This
+		 * really should not fail.
+		 */
+		if (sctp_load_addresses_from_init(stcb, m, iphlen,
+		    init_offset + sizeof(struct sctp_init_chunk),
+		    initack_offset, sh, init_src)) {
+			if (how_indx < sizeof(asoc->cookie_how))
+				asoc->cookie_how[how_indx] = 4;
+			return (NULL);
+		}
+		/* respond with a COOKIE-ACK */
+		sctp_toss_old_cookies(stcb, asoc);
+		sctp_send_cookie_ack(stcb);
+		if (how_indx < sizeof(asoc->cookie_how))
+			asoc->cookie_how[how_indx] = 5;
+		return (stcb);
+	}
+	if (ntohl(initack_cp->init.initiate_tag) != asoc->my_vtag &&
+	    ntohl(init_cp->init.initiate_tag) == asoc->peer_vtag &&
+	    cookie->tie_tag_my_vtag == 0 &&
+	    cookie->tie_tag_peer_vtag == 0) {
+		/*
+		 * case C in Section 5.2.4 Table 2: XMOO silently discard
+		 */
+		if (how_indx < sizeof(asoc->cookie_how))
+			asoc->cookie_how[how_indx] = 6;
+		return (NULL);
+	}
+	if (ntohl(initack_cp->init.initiate_tag) == asoc->my_vtag &&
+	    (ntohl(init_cp->init.initiate_tag) != asoc->peer_vtag ||
+	    init_cp->init.initiate_tag == 0)) {
+		/*
+		 * case B in Section 5.2.4 Table 2: MXAA or MOAA my info
+		 * should be ok, re-accept peer info
+		 */
+		if (ntohl(initack_cp->init.initial_tsn) != asoc->init_seq_number) {
+			/*
+			 * Extension of case C. If we hit this, then the
+			 * random number generator returned the same vtag
+			 * when we first sent our INIT-ACK and when we later
+			 * sent our INIT. The side with the seq numbers that
+			 * are different will be the one that normnally
+			 * would have hit case C. This in effect "extends"
+			 * our vtags in this collision case to be 64 bits.
+			 * The same collision could occur aka you get both
+			 * vtag and seq number the same twice in a row.. but
+			 * is much less likely. If it did happen then we
+			 * would proceed through and bring up the assoc.. we
+			 * may end up with the wrong stream setup however..
+			 * which would be bad.. but there is no way to
+			 * tell.. until we send on a stream that does not
+			 * exist :-)
+			 */
+			if (how_indx < sizeof(asoc->cookie_how))
+				asoc->cookie_how[how_indx] = 7;
+
+			return (NULL);
+		}
+		if (how_indx < sizeof(asoc->cookie_how))
+			asoc->cookie_how[how_indx] = 8;
+		sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_14);
+		sctp_stop_all_cookie_timers(stcb);
+		/*
+		 * since we did not send a HB make sure we don't double
+		 * things
+		 */
+		net->hb_responded = 1;
+		if (stcb->asoc.sctp_autoclose_ticks &&
+		    sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE)) {
+			sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE, inp, stcb,
+			    NULL);
+		}
+		asoc->my_rwnd = ntohl(initack_cp->init.a_rwnd);
+		asoc->pre_open_streams = ntohs(initack_cp->init.num_outbound_streams);
+
+		/* Note last_cwr_tsn? where is this used? */
+		asoc->last_cwr_tsn = asoc->init_seq_number - 1;
+		if (ntohl(init_cp->init.initiate_tag) != asoc->peer_vtag) {
+			/*
+			 * Ok the peer probably discarded our data (if we
+			 * echoed a cookie+data). So anything on the
+			 * sent_queue should be marked for retransmit, we
+			 * may not get something to kick us so it COULD
+			 * still take a timeout to move these.. but it can't
+			 * hurt to mark them.
+			 */
+			struct sctp_tmit_chunk *chk;
+
+			TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
+				if (chk->sent < SCTP_DATAGRAM_RESEND) {
+					chk->sent = SCTP_DATAGRAM_RESEND;
+					sctp_flight_size_decrease(chk);
+					sctp_total_flight_decrease(stcb, chk);
+					sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+					spec_flag++;
+				}
+			}
+
+		}
+		/* process the INIT info (peer's info) */
+		retval = sctp_process_init(init_cp, stcb, net);
+		if (retval < 0) {
+			if (how_indx < sizeof(asoc->cookie_how))
+				asoc->cookie_how[how_indx] = 9;
+			return (NULL);
+		}
+		if (sctp_load_addresses_from_init(stcb, m, iphlen,
+		    init_offset + sizeof(struct sctp_init_chunk),
+		    initack_offset, sh, init_src)) {
+			if (how_indx < sizeof(asoc->cookie_how))
+				asoc->cookie_how[how_indx] = 10;
+			return (NULL);
+		}
+		if ((asoc->state & SCTP_STATE_COOKIE_WAIT) ||
+		    (asoc->state & SCTP_STATE_COOKIE_ECHOED)) {
+			*notification = SCTP_NOTIFY_ASSOC_UP;
+
+			if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+			    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
+			    (inp->sctp_socket->so_qlimit == 0)) {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+				struct socket *so;
+
+#endif
+				stcb->sctp_ep->sctp_flags |=
+				    SCTP_PCB_FLAGS_CONNECTED;
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+				so = SCTP_INP_SO(stcb->sctp_ep);
+				atomic_add_int(&stcb->asoc.refcnt, 1);
+				SCTP_TCB_UNLOCK(stcb);
+				SCTP_SOCKET_LOCK(so, 1);
+				SCTP_TCB_LOCK(stcb);
+				atomic_add_int(&stcb->asoc.refcnt, -1);
+				if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+					SCTP_SOCKET_UNLOCK(so, 1);
+					return (NULL);
+				}
+#endif
+				soisconnected(stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+				SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+			}
+			if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)
+				SCTP_STAT_INCR_COUNTER32(sctps_activeestab);
+			else
+				SCTP_STAT_INCR_COUNTER32(sctps_collisionestab);
+			SCTP_STAT_INCR_GAUGE32(sctps_currestab);
+		} else if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
+			SCTP_STAT_INCR_COUNTER32(sctps_restartestab);
+		} else {
+			SCTP_STAT_INCR_COUNTER32(sctps_collisionestab);
+		}
+		SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+		if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
+			sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+			    stcb->sctp_ep, stcb, asoc->primary_destination);
+		}
+		sctp_stop_all_cookie_timers(stcb);
+		sctp_toss_old_cookies(stcb, asoc);
+		sctp_send_cookie_ack(stcb);
+		if (spec_flag) {
+			/*
+			 * only if we have retrans set do we do this. What
+			 * this call does is get only the COOKIE-ACK out and
+			 * then when we return the normal call to
+			 * sctp_chunk_output will get the retrans out behind
+			 * this.
+			 */
+			sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_COOKIE_ACK, SCTP_SO_NOT_LOCKED);
+		}
+		if (how_indx < sizeof(asoc->cookie_how))
+			asoc->cookie_how[how_indx] = 11;
+
+		return (stcb);
+	}
+	if ((ntohl(initack_cp->init.initiate_tag) != asoc->my_vtag &&
+	    ntohl(init_cp->init.initiate_tag) != asoc->peer_vtag) &&
+	    cookie->tie_tag_my_vtag == asoc->my_vtag_nonce &&
+	    cookie->tie_tag_peer_vtag == asoc->peer_vtag_nonce &&
+	    cookie->tie_tag_peer_vtag != 0) {
+		struct sctpasochead *head;
+
+		/*
+		 * case A in Section 5.2.4 Table 2: XXMM (peer restarted)
+		 */
+		/* temp code */
+		if (how_indx < sizeof(asoc->cookie_how))
+			asoc->cookie_how[how_indx] = 12;
+		sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_15);
+		sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_16);
+
+		*sac_assoc_id = sctp_get_associd(stcb);
+		/* notify upper layer */
+		*notification = SCTP_NOTIFY_ASSOC_RESTART;
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		if ((SCTP_GET_STATE(asoc) != SCTP_STATE_OPEN) &&
+		    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+		    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT)) {
+			SCTP_STAT_INCR_GAUGE32(sctps_currestab);
+		}
+		if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
+			SCTP_STAT_INCR_GAUGE32(sctps_restartestab);
+		} else if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) {
+			SCTP_STAT_INCR_GAUGE32(sctps_collisionestab);
+		}
+		if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
+			SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+			sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+			    stcb->sctp_ep, stcb, asoc->primary_destination);
+
+		} else if (!(asoc->state & SCTP_STATE_SHUTDOWN_SENT)) {
+			/* move to OPEN state, if not in SHUTDOWN_SENT */
+			SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+		}
+		asoc->pre_open_streams =
+		    ntohs(initack_cp->init.num_outbound_streams);
+		asoc->init_seq_number = ntohl(initack_cp->init.initial_tsn);
+		asoc->sending_seq = asoc->asconf_seq_out = asoc->str_reset_seq_out = asoc->init_seq_number;
+
+		asoc->last_cwr_tsn = asoc->init_seq_number - 1;
+		asoc->asconf_seq_in = asoc->last_acked_seq = asoc->init_seq_number - 1;
+
+		asoc->str_reset_seq_in = asoc->init_seq_number;
+
+		asoc->advanced_peer_ack_point = asoc->last_acked_seq;
+		if (asoc->mapping_array) {
+			memset(asoc->mapping_array, 0,
+			    asoc->mapping_array_size);
+		}
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_INP_INFO_WLOCK();
+		SCTP_INP_WLOCK(stcb->sctp_ep);
+		SCTP_TCB_LOCK(stcb);
+		atomic_add_int(&stcb->asoc.refcnt, -1);
+		/* send up all the data */
+		SCTP_TCB_SEND_LOCK(stcb);
+
+		sctp_report_all_outbound(stcb, 1, SCTP_SO_NOT_LOCKED);
+		for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+			stcb->asoc.strmout[i].stream_no = i;
+			stcb->asoc.strmout[i].next_sequence_sent = 0;
+			stcb->asoc.strmout[i].last_msg_incomplete = 0;
+		}
+		/* process the INIT-ACK info (my info) */
+		asoc->my_vtag = ntohl(initack_cp->init.initiate_tag);
+		asoc->my_rwnd = ntohl(initack_cp->init.a_rwnd);
+
+		/* pull from vtag hash */
+		LIST_REMOVE(stcb, sctp_asocs);
+		/* re-insert to new vtag position */
+		head = &sctppcbinfo.sctp_asochash[SCTP_PCBHASH_ASOC(stcb->asoc.my_vtag,
+		    sctppcbinfo.hashasocmark)];
+		/*
+		 * put it in the bucket in the vtag hash of assoc's for the
+		 * system
+		 */
+		LIST_INSERT_HEAD(head, stcb, sctp_asocs);
+
+		/* Is this the first restart? */
+		if (stcb->asoc.in_restart_hash == 0) {
+			/* Ok add it to assoc_id vtag hash */
+			head = &sctppcbinfo.sctp_restarthash[SCTP_PCBHASH_ASOC(stcb->asoc.assoc_id,
+			    sctppcbinfo.hashrestartmark)];
+			LIST_INSERT_HEAD(head, stcb, sctp_tcbrestarhash);
+			stcb->asoc.in_restart_hash = 1;
+		}
+		/* process the INIT info (peer's info) */
+		SCTP_TCB_SEND_UNLOCK(stcb);
+		SCTP_INP_WUNLOCK(stcb->sctp_ep);
+		SCTP_INP_INFO_WUNLOCK();
+
+		retval = sctp_process_init(init_cp, stcb, net);
+		if (retval < 0) {
+			if (how_indx < sizeof(asoc->cookie_how))
+				asoc->cookie_how[how_indx] = 13;
+
+			return (NULL);
+		}
+		/*
+		 * since we did not send a HB make sure we don't double
+		 * things
+		 */
+		net->hb_responded = 1;
+
+		if (sctp_load_addresses_from_init(stcb, m, iphlen,
+		    init_offset + sizeof(struct sctp_init_chunk),
+		    initack_offset, sh, init_src)) {
+			if (how_indx < sizeof(asoc->cookie_how))
+				asoc->cookie_how[how_indx] = 14;
+
+			return (NULL);
+		}
+		/* respond with a COOKIE-ACK */
+		sctp_stop_all_cookie_timers(stcb);
+		sctp_toss_old_cookies(stcb, asoc);
+		sctp_send_cookie_ack(stcb);
+		if (how_indx < sizeof(asoc->cookie_how))
+			asoc->cookie_how[how_indx] = 15;
+
+		return (stcb);
+	}
+	if (how_indx < sizeof(asoc->cookie_how))
+		asoc->cookie_how[how_indx] = 16;
+	/* all other cases... */
+	return (NULL);
+}
+
+
+/*
+ * handle a state cookie for a new association m: input packet mbuf chain--
+ * assumes a pullup on IP/SCTP/COOKIE-ECHO chunk note: this is a "split" mbuf
+ * and the cookie signature does not exist offset: offset into mbuf to the
+ * cookie-echo chunk length: length of the cookie chunk to: where the init
+ * was from returns a new TCB
+ */
+static struct sctp_tcb *
+sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
+    struct sctphdr *sh, struct sctp_state_cookie *cookie, int cookie_len,
+    struct sctp_inpcb *inp, struct sctp_nets **netp,
+    struct sockaddr *init_src, int *notification,
+    int auth_skipped, uint32_t auth_offset, uint32_t auth_len,
+    uint32_t vrf_id)
+{
+	struct sctp_tcb *stcb;
+	struct sctp_init_chunk *init_cp, init_buf;
+	struct sctp_init_ack_chunk *initack_cp, initack_buf;
+	struct sockaddr_storage sa_store;
+	struct sockaddr *initack_src = (struct sockaddr *)&sa_store;
+	struct sockaddr_in *sin;
+	struct sockaddr_in6 *sin6;
+	struct sctp_association *asoc;
+	int chk_length;
+	int init_offset, initack_offset, initack_limit;
+	int retval;
+	int error = 0;
+	uint32_t old_tag;
+	uint8_t auth_chunk_buf[SCTP_PARAM_BUFFER_SIZE];
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	struct socket *so;
+
+	so = SCTP_INP_SO(inp);
+#endif
+
+	/*
+	 * find and validate the INIT chunk in the cookie (peer's info) the
+	 * INIT should start after the cookie-echo header struct (chunk
+	 * header, state cookie header struct)
+	 */
+	init_offset = offset + sizeof(struct sctp_cookie_echo_chunk);
+	init_cp = (struct sctp_init_chunk *)
+	    sctp_m_getptr(m, init_offset, sizeof(struct sctp_init_chunk),
+	    (uint8_t *) & init_buf);
+	if (init_cp == NULL) {
+		/* could not pull a INIT chunk in cookie */
+		SCTPDBG(SCTP_DEBUG_INPUT1,
+		    "process_cookie_new: could not pull INIT chunk hdr\n");
+		return (NULL);
+	}
+	chk_length = ntohs(init_cp->ch.chunk_length);
+	if (init_cp->ch.chunk_type != SCTP_INITIATION) {
+		SCTPDBG(SCTP_DEBUG_INPUT1, "HUH? process_cookie_new: could not find INIT chunk!\n");
+		return (NULL);
+	}
+	initack_offset = init_offset + SCTP_SIZE32(chk_length);
+	/*
+	 * find and validate the INIT-ACK chunk in the cookie (my info) the
+	 * INIT-ACK follows the INIT chunk
+	 */
+	initack_cp = (struct sctp_init_ack_chunk *)
+	    sctp_m_getptr(m, initack_offset, sizeof(struct sctp_init_ack_chunk),
+	    (uint8_t *) & initack_buf);
+	if (initack_cp == NULL) {
+		/* could not pull INIT-ACK chunk in cookie */
+		SCTPDBG(SCTP_DEBUG_INPUT1, "process_cookie_new: could not pull INIT-ACK chunk hdr\n");
+		return (NULL);
+	}
+	chk_length = ntohs(initack_cp->ch.chunk_length);
+	if (initack_cp->ch.chunk_type != SCTP_INITIATION_ACK) {
+		return (NULL);
+	}
+	/*
+	 * NOTE: We can't use the INIT_ACK's chk_length to determine the
+	 * "initack_limit" value.  This is because the chk_length field
+	 * includes the length of the cookie, but the cookie is omitted when
+	 * the INIT and INIT_ACK are tacked onto the cookie...
+	 */
+	initack_limit = offset + cookie_len;
+
+	/*
+	 * now that we know the INIT/INIT-ACK are in place, create a new TCB
+	 * and popluate
+	 */
+
+	/*
+	 * Here we do a trick, we set in NULL for the proc/thread argument.
+	 * We do this since in effect we only use the p argument when the
+	 * socket is unbound and we must do an implicit bind. Since we are
+	 * getting a cookie, we cannot be unbound.
+	 */
+	stcb = sctp_aloc_assoc(inp, init_src, 0, &error,
+	    ntohl(initack_cp->init.initiate_tag), vrf_id,
+	    (struct thread *)NULL
+	    );
+	if (stcb == NULL) {
+		struct mbuf *op_err;
+
+		/* memory problem? */
+		SCTPDBG(SCTP_DEBUG_INPUT1,
+		    "process_cookie_new: no room for another TCB!\n");
+		op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
+
+		sctp_abort_association(inp, (struct sctp_tcb *)NULL, m, iphlen,
+		    sh, op_err, vrf_id);
+		return (NULL);
+	}
+	/* get the correct sctp_nets */
+	if (netp)
+		*netp = sctp_findnet(stcb, init_src);
+
+	asoc = &stcb->asoc;
+	/* get scope variables out of cookie */
+	asoc->ipv4_local_scope = cookie->ipv4_scope;
+	asoc->site_scope = cookie->site_scope;
+	asoc->local_scope = cookie->local_scope;
+	asoc->loopback_scope = cookie->loopback_scope;
+
+	if ((asoc->ipv4_addr_legal != cookie->ipv4_addr_legal) ||
+	    (asoc->ipv6_addr_legal != cookie->ipv6_addr_legal)) {
+		struct mbuf *op_err;
+
+		/*
+		 * Houston we have a problem. The EP changed while the
+		 * cookie was in flight. Only recourse is to abort the
+		 * association.
+		 */
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
+		sctp_abort_association(inp, (struct sctp_tcb *)NULL, m, iphlen,
+		    sh, op_err, vrf_id);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_SOCKET_LOCK(so, 1);
+		SCTP_TCB_LOCK(stcb);
+#endif
+		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+		    SCTP_FROM_SCTP_INPUT + SCTP_LOC_16);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+		return (NULL);
+	}
+	/* process the INIT-ACK info (my info) */
+	old_tag = asoc->my_vtag;
+	asoc->assoc_id = asoc->my_vtag = ntohl(initack_cp->init.initiate_tag);
+	asoc->my_rwnd = ntohl(initack_cp->init.a_rwnd);
+	asoc->pre_open_streams = ntohs(initack_cp->init.num_outbound_streams);
+	asoc->init_seq_number = ntohl(initack_cp->init.initial_tsn);
+	asoc->sending_seq = asoc->asconf_seq_out = asoc->str_reset_seq_out = asoc->init_seq_number;
+	asoc->last_cwr_tsn = asoc->init_seq_number - 1;
+	asoc->asconf_seq_in = asoc->last_acked_seq = asoc->init_seq_number - 1;
+	asoc->str_reset_seq_in = asoc->init_seq_number;
+
+	asoc->advanced_peer_ack_point = asoc->last_acked_seq;
+
+	/* process the INIT info (peer's info) */
+	if (netp)
+		retval = sctp_process_init(init_cp, stcb, *netp);
+	else
+		retval = 0;
+	if (retval < 0) {
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_SOCKET_LOCK(so, 1);
+		SCTP_TCB_LOCK(stcb);
+#endif
+		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_16);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+		return (NULL);
+	}
+	/* load all addresses */
+	if (sctp_load_addresses_from_init(stcb, m, iphlen,
+	    init_offset + sizeof(struct sctp_init_chunk), initack_offset, sh,
+	    init_src)) {
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_SOCKET_LOCK(so, 1);
+		SCTP_TCB_LOCK(stcb);
+#endif
+		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_17);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+		return (NULL);
+	}
+	/*
+	 * verify any preceding AUTH chunk that was skipped
+	 */
+	/* pull the local authentication parameters from the cookie/init-ack */
+	sctp_auth_get_cookie_params(stcb, m,
+	    initack_offset + sizeof(struct sctp_init_ack_chunk),
+	    initack_limit - (initack_offset + sizeof(struct sctp_init_ack_chunk)));
+	if (auth_skipped) {
+		struct sctp_auth_chunk *auth;
+
+		auth = (struct sctp_auth_chunk *)
+		    sctp_m_getptr(m, auth_offset, auth_len, auth_chunk_buf);
+		if ((auth == NULL) || sctp_handle_auth(stcb, auth, m, auth_offset)) {
+			/* auth HMAC failed, dump the assoc and packet */
+			SCTPDBG(SCTP_DEBUG_AUTH1,
+			    "COOKIE-ECHO: AUTH failed\n");
+			atomic_add_int(&stcb->asoc.refcnt, 1);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			SCTP_TCB_UNLOCK(stcb);
+			SCTP_SOCKET_LOCK(so, 1);
+			SCTP_TCB_LOCK(stcb);
+#endif
+			(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_18);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+			atomic_subtract_int(&stcb->asoc.refcnt, 1);
+			return (NULL);
+		} else {
+			/* remaining chunks checked... good to go */
+			stcb->asoc.authenticated = 1;
+		}
+	}
+	/* update current state */
+	SCTPDBG(SCTP_DEBUG_INPUT2, "moving to OPEN state\n");
+	SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+	if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
+		sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+		    stcb->sctp_ep, stcb, asoc->primary_destination);
+	}
+	sctp_stop_all_cookie_timers(stcb);
+	SCTP_STAT_INCR_COUNTER32(sctps_passiveestab);
+	SCTP_STAT_INCR_GAUGE32(sctps_currestab);
+
+	/*
+	 * if we're doing ASCONFs, check to see if we have any new local
+	 * addresses that need to get added to the peer (eg. addresses
+	 * changed while cookie echo in flight).  This needs to be done
+	 * after we go to the OPEN state to do the correct asconf
+	 * processing. else, make sure we have the correct addresses in our
+	 * lists
+	 */
+
+	/* warning, we re-use sin, sin6, sa_store here! */
+	/* pull in local_address (our "from" address) */
+	if (cookie->laddr_type == SCTP_IPV4_ADDRESS) {
+		/* source addr is IPv4 */
+		sin = (struct sockaddr_in *)initack_src;
+		memset(sin, 0, sizeof(*sin));
+		sin->sin_family = AF_INET;
+		sin->sin_len = sizeof(struct sockaddr_in);
+		sin->sin_addr.s_addr = cookie->laddress[0];
+	} else if (cookie->laddr_type == SCTP_IPV6_ADDRESS) {
+		/* source addr is IPv6 */
+		sin6 = (struct sockaddr_in6 *)initack_src;
+		memset(sin6, 0, sizeof(*sin6));
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_len = sizeof(struct sockaddr_in6);
+		sin6->sin6_scope_id = cookie->scope_id;
+		memcpy(&sin6->sin6_addr, cookie->laddress,
+		    sizeof(sin6->sin6_addr));
+	} else {
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_SOCKET_LOCK(so, 1);
+		SCTP_TCB_LOCK(stcb);
+#endif
+		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_19);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+		return (NULL);
+	}
+
+	/* set up to notify upper layer */
+	*notification = SCTP_NOTIFY_ASSOC_UP;
+	if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
+	    (inp->sctp_socket->so_qlimit == 0)) {
+		/*
+		 * This is an endpoint that called connect() how it got a
+		 * cookie that is NEW is a bit of a mystery. It must be that
+		 * the INIT was sent, but before it got there.. a complete
+		 * INIT/INIT-ACK/COOKIE arrived. But of course then it
+		 * should have went to the other code.. not here.. oh well..
+		 * a bit of protection is worth having..
+		 */
+		stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED;
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_SOCKET_LOCK(so, 1);
+		SCTP_TCB_LOCK(stcb);
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+		if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+			SCTP_SOCKET_UNLOCK(so, 1);
+			return (NULL);
+		}
+#endif
+		soisconnected(stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+	} else if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
+	    (inp->sctp_socket->so_qlimit)) {
+		/*
+		 * We don't want to do anything with this one. Since it is
+		 * the listening guy. The timer will get started for
+		 * accepted connections in the caller.
+		 */
+		;
+	}
+	/* since we did not send a HB make sure we don't double things */
+	if ((netp) && (*netp))
+		(*netp)->hb_responded = 1;
+
+	if (stcb->asoc.sctp_autoclose_ticks &&
+	    sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE)) {
+		sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE, inp, stcb, NULL);
+	}
+	/* calculate the RTT */
+	(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
+	if ((netp) && (*netp)) {
+		(*netp)->RTO = sctp_calculate_rto(stcb, asoc, *netp,
+		    &cookie->time_entered, sctp_align_unsafe_makecopy);
+	}
+	/* respond with a COOKIE-ACK */
+	sctp_send_cookie_ack(stcb);
+
+	/*
+	 * check the address lists for any ASCONFs that need to be sent
+	 * AFTER the cookie-ack is sent
+	 */
+	sctp_check_address_list(stcb, m,
+	    initack_offset + sizeof(struct sctp_init_ack_chunk),
+	    initack_limit - (initack_offset + sizeof(struct sctp_init_ack_chunk)),
+	    initack_src, cookie->local_scope, cookie->site_scope,
+	    cookie->ipv4_scope, cookie->loopback_scope);
+
+
+	return (stcb);
+}
+
+
+/*
+ * handles a COOKIE-ECHO message stcb: modified to either a new or left as
+ * existing (non-NULL) TCB
+ */
+static struct mbuf *
+sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
+    struct sctphdr *sh, struct sctp_cookie_echo_chunk *cp,
+    struct sctp_inpcb **inp_p, struct sctp_tcb **stcb, struct sctp_nets **netp,
+    int auth_skipped, uint32_t auth_offset, uint32_t auth_len,
+    struct sctp_tcb **locked_tcb, uint32_t vrf_id)
+{
+	struct sctp_state_cookie *cookie;
+	struct sockaddr_in6 sin6;
+	struct sockaddr_in sin;
+	struct sctp_tcb *l_stcb = *stcb;
+	struct sctp_inpcb *l_inp;
+	struct sockaddr *to;
+	sctp_assoc_t sac_restart_id;
+	struct sctp_pcb *ep;
+	struct mbuf *m_sig;
+	uint8_t calc_sig[SCTP_SIGNATURE_SIZE], tmp_sig[SCTP_SIGNATURE_SIZE];
+	uint8_t *sig;
+	uint8_t cookie_ok = 0;
+	unsigned int size_of_pkt, sig_offset, cookie_offset;
+	unsigned int cookie_len;
+	struct timeval now;
+	struct timeval time_expires;
+	struct sockaddr_storage dest_store;
+	struct sockaddr *localep_sa = (struct sockaddr *)&dest_store;
+	struct ip *iph;
+	int notification = 0;
+	struct sctp_nets *netl;
+	int had_a_existing_tcb = 0;
+
+	SCTPDBG(SCTP_DEBUG_INPUT2,
+	    "sctp_handle_cookie: handling COOKIE-ECHO\n");
+
+	if (inp_p == NULL) {
+		return (NULL);
+	}
+	/* First get the destination address setup too. */
+	iph = mtod(m, struct ip *);
+	if (iph->ip_v == IPVERSION) {
+		/* its IPv4 */
+		struct sockaddr_in *lsin;
+
+		lsin = (struct sockaddr_in *)(localep_sa);
+		memset(lsin, 0, sizeof(*lsin));
+		lsin->sin_family = AF_INET;
+		lsin->sin_len = sizeof(*lsin);
+		lsin->sin_port = sh->dest_port;
+		lsin->sin_addr.s_addr = iph->ip_dst.s_addr;
+		size_of_pkt = SCTP_GET_IPV4_LENGTH(iph);
+	} else if (iph->ip_v == (IPV6_VERSION >> 4)) {
+		/* its IPv6 */
+		struct ip6_hdr *ip6;
+		struct sockaddr_in6 *lsin6;
+
+		lsin6 = (struct sockaddr_in6 *)(localep_sa);
+		memset(lsin6, 0, sizeof(*lsin6));
+		lsin6->sin6_family = AF_INET6;
+		lsin6->sin6_len = sizeof(struct sockaddr_in6);
+		ip6 = mtod(m, struct ip6_hdr *);
+		lsin6->sin6_port = sh->dest_port;
+		lsin6->sin6_addr = ip6->ip6_dst;
+		size_of_pkt = SCTP_GET_IPV6_LENGTH(ip6) + iphlen;
+	} else {
+		return (NULL);
+	}
+
+	cookie = &cp->cookie;
+	cookie_offset = offset + sizeof(struct sctp_chunkhdr);
+	cookie_len = ntohs(cp->ch.chunk_length);
+
+	if ((cookie->peerport != sh->src_port) &&
+	    (cookie->myport != sh->dest_port) &&
+	    (cookie->my_vtag != sh->v_tag)) {
+		/*
+		 * invalid ports or bad tag.  Note that we always leave the
+		 * v_tag in the header in network order and when we stored
+		 * it in the my_vtag slot we also left it in network order.
+		 * This maintains the match even though it may be in the
+		 * opposite byte order of the machine :->
+		 */
+		return (NULL);
+	}
+	if (cookie_len > size_of_pkt ||
+	    cookie_len < sizeof(struct sctp_cookie_echo_chunk) +
+	    sizeof(struct sctp_init_chunk) +
+	    sizeof(struct sctp_init_ack_chunk) + SCTP_SIGNATURE_SIZE) {
+		/* cookie too long!  or too small */
+		return (NULL);
+	}
+	/*
+	 * split off the signature into its own mbuf (since it should not be
+	 * calculated in the sctp_hmac_m() call).
+	 */
+	sig_offset = offset + cookie_len - SCTP_SIGNATURE_SIZE;
+	if (sig_offset > size_of_pkt) {
+		/* packet not correct size! */
+		/* XXX this may already be accounted for earlier... */
+		return (NULL);
+	}
+	m_sig = m_split(m, sig_offset, M_DONTWAIT);
+	if (m_sig == NULL) {
+		/* out of memory or ?? */
+		return (NULL);
+	}
+	/*
+	 * compute the signature/digest for the cookie
+	 */
+	ep = &(*inp_p)->sctp_ep;
+	l_inp = *inp_p;
+	if (l_stcb) {
+		SCTP_TCB_UNLOCK(l_stcb);
+	}
+	SCTP_INP_RLOCK(l_inp);
+	if (l_stcb) {
+		SCTP_TCB_LOCK(l_stcb);
+	}
+	/* which cookie is it? */
+	if ((cookie->time_entered.tv_sec < (long)ep->time_of_secret_change) &&
+	    (ep->current_secret_number != ep->last_secret_number)) {
+		/* it's the old cookie */
+		(void)sctp_hmac_m(SCTP_HMAC,
+		    (uint8_t *) ep->secret_key[(int)ep->last_secret_number],
+		    SCTP_SECRET_SIZE, m, cookie_offset, calc_sig, 0);
+	} else {
+		/* it's the current cookie */
+		(void)sctp_hmac_m(SCTP_HMAC,
+		    (uint8_t *) ep->secret_key[(int)ep->current_secret_number],
+		    SCTP_SECRET_SIZE, m, cookie_offset, calc_sig, 0);
+	}
+	/* get the signature */
+	SCTP_INP_RUNLOCK(l_inp);
+	sig = (uint8_t *) sctp_m_getptr(m_sig, 0, SCTP_SIGNATURE_SIZE, (uint8_t *) & tmp_sig);
+	if (sig == NULL) {
+		/* couldn't find signature */
+		sctp_m_freem(m_sig);
+		return (NULL);
+	}
+	/* compare the received digest with the computed digest */
+	if (memcmp(calc_sig, sig, SCTP_SIGNATURE_SIZE) != 0) {
+		/* try the old cookie? */
+		if ((cookie->time_entered.tv_sec == (long)ep->time_of_secret_change) &&
+		    (ep->current_secret_number != ep->last_secret_number)) {
+			/* compute digest with old */
+			(void)sctp_hmac_m(SCTP_HMAC,
+			    (uint8_t *) ep->secret_key[(int)ep->last_secret_number],
+			    SCTP_SECRET_SIZE, m, cookie_offset, calc_sig, 0);
+			/* compare */
+			if (memcmp(calc_sig, sig, SCTP_SIGNATURE_SIZE) == 0)
+				cookie_ok = 1;
+		}
+	} else {
+		cookie_ok = 1;
+	}
+
+	/*
+	 * Now before we continue we must reconstruct our mbuf so that
+	 * normal processing of any other chunks will work.
+	 */
+	{
+		struct mbuf *m_at;
+
+		m_at = m;
+		while (SCTP_BUF_NEXT(m_at) != NULL) {
+			m_at = SCTP_BUF_NEXT(m_at);
+		}
+		SCTP_BUF_NEXT(m_at) = m_sig;
+	}
+
+	if (cookie_ok == 0) {
+		SCTPDBG(SCTP_DEBUG_INPUT2, "handle_cookie_echo: cookie signature validation failed!\n");
+		SCTPDBG(SCTP_DEBUG_INPUT2,
+		    "offset = %u, cookie_offset = %u, sig_offset = %u\n",
+		    (uint32_t) offset, cookie_offset, sig_offset);
+		return (NULL);
+	}
+	/*
+	 * check the cookie timestamps to be sure it's not stale
+	 */
+	(void)SCTP_GETTIME_TIMEVAL(&now);
+	/* Expire time is in Ticks, so we convert to seconds */
+	time_expires.tv_sec = cookie->time_entered.tv_sec + TICKS_TO_SEC(cookie->cookie_life);
+	time_expires.tv_usec = cookie->time_entered.tv_usec;
+	if (timevalcmp(&now, &time_expires, >)) {
+		/* cookie is stale! */
+		struct mbuf *op_err;
+		struct sctp_stale_cookie_msg *scm;
+		uint32_t tim;
+
+		op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_stale_cookie_msg),
+		    0, M_DONTWAIT, 1, MT_DATA);
+		if (op_err == NULL) {
+			/* FOOBAR */
+			return (NULL);
+		}
+		/* pre-reserve some space */
+		SCTP_BUF_RESV_UF(op_err, sizeof(struct ip6_hdr));
+		SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr));
+		SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
+
+		/* Set the len */
+		SCTP_BUF_LEN(op_err) = sizeof(struct sctp_stale_cookie_msg);
+		scm = mtod(op_err, struct sctp_stale_cookie_msg *);
+		scm->ph.param_type = htons(SCTP_CAUSE_STALE_COOKIE);
+		scm->ph.param_length = htons((sizeof(struct sctp_paramhdr) +
+		    (sizeof(uint32_t))));
+		/* seconds to usec */
+		tim = (now.tv_sec - time_expires.tv_sec) * 1000000;
+		/* add in usec */
+		if (tim == 0)
+			tim = now.tv_usec - cookie->time_entered.tv_usec;
+		scm->time_usec = htonl(tim);
+		sctp_send_operr_to(m, iphlen, op_err, cookie->peers_vtag,
+		    vrf_id);
+		return (NULL);
+	}
+	/*
+	 * Now we must see with the lookup address if we have an existing
+	 * asoc. This will only happen if we were in the COOKIE-WAIT state
+	 * and a INIT collided with us and somewhere the peer sent the
+	 * cookie on another address besides the single address our assoc
+	 * had for him. In this case we will have one of the tie-tags set at
+	 * least AND the address field in the cookie can be used to look it
+	 * up.
+	 */
+	to = NULL;
+	if (cookie->addr_type == SCTP_IPV6_ADDRESS) {
+		memset(&sin6, 0, sizeof(sin6));
+		sin6.sin6_family = AF_INET6;
+		sin6.sin6_len = sizeof(sin6);
+		sin6.sin6_port = sh->src_port;
+		sin6.sin6_scope_id = cookie->scope_id;
+		memcpy(&sin6.sin6_addr.s6_addr, cookie->address,
+		    sizeof(sin6.sin6_addr.s6_addr));
+		to = (struct sockaddr *)&sin6;
+	} else if (cookie->addr_type == SCTP_IPV4_ADDRESS) {
+		memset(&sin, 0, sizeof(sin));
+		sin.sin_family = AF_INET;
+		sin.sin_len = sizeof(sin);
+		sin.sin_port = sh->src_port;
+		sin.sin_addr.s_addr = cookie->address[0];
+		to = (struct sockaddr *)&sin;
+	} else {
+		/* This should not happen */
+		return (NULL);
+	}
+	if ((*stcb == NULL) && to) {
+		/* Yep, lets check */
+		*stcb = sctp_findassociation_ep_addr(inp_p, to, netp, localep_sa, NULL);
+		if (*stcb == NULL) {
+			/*
+			 * We should have only got back the same inp. If we
+			 * got back a different ep we have a problem. The
+			 * original findep got back l_inp and now
+			 */
+			if (l_inp != *inp_p) {
+				SCTP_PRINTF("Bad problem find_ep got a diff inp then special_locate?\n");
+			}
+		} else {
+			if (*locked_tcb == NULL) {
+				/*
+				 * In this case we found the assoc only
+				 * after we locked the create lock. This
+				 * means we are in a colliding case and we
+				 * must make sure that we unlock the tcb if
+				 * its one of the cases where we throw away
+				 * the incoming packets.
+				 */
+				*locked_tcb = *stcb;
+
+				/*
+				 * We must also increment the inp ref count
+				 * since the ref_count flags was set when we
+				 * did not find the TCB, now we found it
+				 * which reduces the refcount.. we must
+				 * raise it back out to balance it all :-)
+				 */
+				SCTP_INP_INCR_REF((*stcb)->sctp_ep);
+				if ((*stcb)->sctp_ep != l_inp) {
+					SCTP_PRINTF("Huh? ep:%p diff then l_inp:%p?\n",
+					    (*stcb)->sctp_ep, l_inp);
+				}
+			}
+		}
+	}
+	if (to == NULL)
+		return (NULL);
+
+	cookie_len -= SCTP_SIGNATURE_SIZE;
+	if (*stcb == NULL) {
+		/* this is the "normal" case... get a new TCB */
+		*stcb = sctp_process_cookie_new(m, iphlen, offset, sh, cookie,
+		    cookie_len, *inp_p, netp, to, &notification,
+		    auth_skipped, auth_offset, auth_len, vrf_id);
+	} else {
+		/* this is abnormal... cookie-echo on existing TCB */
+		had_a_existing_tcb = 1;
+		*stcb = sctp_process_cookie_existing(m, iphlen, offset, sh,
+		    cookie, cookie_len, *inp_p, *stcb, *netp, to,
+		    &notification, &sac_restart_id, vrf_id);
+	}
+
+	if (*stcb == NULL) {
+		/* still no TCB... must be bad cookie-echo */
+		return (NULL);
+	}
+	/*
+	 * Ok, we built an association so confirm the address we sent the
+	 * INIT-ACK to.
+	 */
+	netl = sctp_findnet(*stcb, to);
+	/*
+	 * This code should in theory NOT run but
+	 */
+	if (netl == NULL) {
+		/* TSNH! Huh, why do I need to add this address here? */
+		int ret;
+
+		ret = sctp_add_remote_addr(*stcb, to, SCTP_DONOT_SETSCOPE,
+		    SCTP_IN_COOKIE_PROC);
+		netl = sctp_findnet(*stcb, to);
+	}
+	if (netl) {
+		if (netl->dest_state & SCTP_ADDR_UNCONFIRMED) {
+			netl->dest_state &= ~SCTP_ADDR_UNCONFIRMED;
+			(void)sctp_set_primary_addr((*stcb), (struct sockaddr *)NULL,
+			    netl);
+			sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED,
+			    (*stcb), 0, (void *)netl, SCTP_SO_NOT_LOCKED);
+		}
+	}
+	if (*stcb) {
+		sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, *inp_p,
+		    *stcb, NULL);
+	}
+	if ((*inp_p)->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) {
+		if (!had_a_existing_tcb ||
+		    (((*inp_p)->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) == 0)) {
+			/*
+			 * If we have a NEW cookie or the connect never
+			 * reached the connected state during collision we
+			 * must do the TCP accept thing.
+			 */
+			struct socket *so, *oso;
+			struct sctp_inpcb *inp;
+
+			if (notification == SCTP_NOTIFY_ASSOC_RESTART) {
+				/*
+				 * For a restart we will keep the same
+				 * socket, no need to do anything. I THINK!!
+				 */
+				sctp_ulp_notify(notification, *stcb, 0, (void *)&sac_restart_id, SCTP_SO_NOT_LOCKED);
+				return (m);
+			}
+			oso = (*inp_p)->sctp_socket;
+			atomic_add_int(&(*stcb)->asoc.refcnt, 1);
+			SCTP_TCB_UNLOCK((*stcb));
+			so = sonewconn(oso, 0
+			    );
+			SCTP_TCB_LOCK((*stcb));
+			atomic_subtract_int(&(*stcb)->asoc.refcnt, 1);
+
+			if (so == NULL) {
+				struct mbuf *op_err;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+				struct socket *pcb_so;
+
+#endif
+				/* Too many sockets */
+				SCTPDBG(SCTP_DEBUG_INPUT1, "process_cookie_new: no room for another socket!\n");
+				op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
+				sctp_abort_association(*inp_p, NULL, m, iphlen,
+				    sh, op_err, vrf_id);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+				pcb_so = SCTP_INP_SO(*inp_p);
+				atomic_add_int(&(*stcb)->asoc.refcnt, 1);
+				SCTP_TCB_UNLOCK((*stcb));
+				SCTP_SOCKET_LOCK(pcb_so, 1);
+				SCTP_TCB_LOCK((*stcb));
+				atomic_subtract_int(&(*stcb)->asoc.refcnt, 1);
+#endif
+				(void)sctp_free_assoc(*inp_p, *stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_20);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+				SCTP_SOCKET_UNLOCK(pcb_so, 1);
+#endif
+				return (NULL);
+			}
+			inp = (struct sctp_inpcb *)so->so_pcb;
+			SCTP_INP_INCR_REF(inp);
+			/*
+			 * We add the unbound flag here so that if we get an
+			 * soabort() before we get the move_pcb done, we
+			 * will properly cleanup.
+			 */
+			inp->sctp_flags = (SCTP_PCB_FLAGS_TCPTYPE |
+			    SCTP_PCB_FLAGS_CONNECTED |
+			    SCTP_PCB_FLAGS_IN_TCPPOOL |
+			    SCTP_PCB_FLAGS_UNBOUND |
+			    (SCTP_PCB_COPY_FLAGS & (*inp_p)->sctp_flags) |
+			    SCTP_PCB_FLAGS_DONT_WAKE);
+			inp->sctp_features = (*inp_p)->sctp_features;
+			inp->sctp_mobility_features = (*inp_p)->sctp_mobility_features;
+			inp->sctp_socket = so;
+			inp->sctp_frag_point = (*inp_p)->sctp_frag_point;
+			inp->partial_delivery_point = (*inp_p)->partial_delivery_point;
+			inp->sctp_context = (*inp_p)->sctp_context;
+			inp->inp_starting_point_for_iterator = NULL;
+			/*
+			 * copy in the authentication parameters from the
+			 * original endpoint
+			 */
+			if (inp->sctp_ep.local_hmacs)
+				sctp_free_hmaclist(inp->sctp_ep.local_hmacs);
+			inp->sctp_ep.local_hmacs =
+			    sctp_copy_hmaclist((*inp_p)->sctp_ep.local_hmacs);
+			if (inp->sctp_ep.local_auth_chunks)
+				sctp_free_chunklist(inp->sctp_ep.local_auth_chunks);
+			inp->sctp_ep.local_auth_chunks =
+			    sctp_copy_chunklist((*inp_p)->sctp_ep.local_auth_chunks);
+			(void)sctp_copy_skeylist(&(*inp_p)->sctp_ep.shared_keys,
+			    &inp->sctp_ep.shared_keys);
+
+			/*
+			 * Now we must move it from one hash table to
+			 * another and get the tcb in the right place.
+			 */
+			sctp_move_pcb_and_assoc(*inp_p, inp, *stcb);
+
+			atomic_add_int(&(*stcb)->asoc.refcnt, 1);
+			SCTP_TCB_UNLOCK((*stcb));
+
+			sctp_pull_off_control_to_new_inp((*inp_p), inp, *stcb,
+			    0);
+			SCTP_TCB_LOCK((*stcb));
+			atomic_subtract_int(&(*stcb)->asoc.refcnt, 1);
+
+
+			/*
+			 * now we must check to see if we were aborted while
+			 * the move was going on and the lock/unlock
+			 * happened.
+			 */
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+				/*
+				 * yep it was, we leave the assoc attached
+				 * to the socket since the sctp_inpcb_free()
+				 * call will send an abort for us.
+				 */
+				SCTP_INP_DECR_REF(inp);
+				return (NULL);
+			}
+			SCTP_INP_DECR_REF(inp);
+			/* Switch over to the new guy */
+			*inp_p = inp;
+			sctp_ulp_notify(notification, *stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
+
+			/*
+			 * Pull it from the incomplete queue and wake the
+			 * guy
+			 */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			atomic_add_int(&(*stcb)->asoc.refcnt, 1);
+			SCTP_TCB_UNLOCK((*stcb));
+			SCTP_SOCKET_LOCK(so, 1);
+#endif
+			soisconnected(so);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			SCTP_TCB_LOCK((*stcb));
+			atomic_subtract_int(&(*stcb)->asoc.refcnt, 1);
+			SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+			return (m);
+		}
+	}
+	if ((notification) && ((*inp_p)->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE)) {
+		sctp_ulp_notify(notification, *stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
+	}
+	return (m);
+}
+
+static void
+sctp_handle_cookie_ack(struct sctp_cookie_ack_chunk *cp,
+    struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	/* cp must not be used, others call this without a c-ack :-) */
+	struct sctp_association *asoc;
+
+	SCTPDBG(SCTP_DEBUG_INPUT2,
+	    "sctp_handle_cookie_ack: handling COOKIE-ACK\n");
+	if (stcb == NULL)
+		return;
+
+	asoc = &stcb->asoc;
+
+	sctp_stop_all_cookie_timers(stcb);
+	/* process according to association state */
+	if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED) {
+		/* state change only needed when I am in right state */
+		SCTPDBG(SCTP_DEBUG_INPUT2, "moving to OPEN state\n");
+		SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+		if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
+			sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+			    stcb->sctp_ep, stcb, asoc->primary_destination);
+
+		}
+		/* update RTO */
+		SCTP_STAT_INCR_COUNTER32(sctps_activeestab);
+		SCTP_STAT_INCR_GAUGE32(sctps_currestab);
+		if (asoc->overall_error_count == 0) {
+			net->RTO = sctp_calculate_rto(stcb, asoc, net,
+			    &asoc->time_entered, sctp_align_safe_nocopy);
+		}
+		(void)SCTP_GETTIME_TIMEVAL(&asoc->time_entered);
+		sctp_ulp_notify(SCTP_NOTIFY_ASSOC_UP, stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
+		if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+		    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			struct socket *so;
+
+#endif
+			stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED;
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			so = SCTP_INP_SO(stcb->sctp_ep);
+			atomic_add_int(&stcb->asoc.refcnt, 1);
+			SCTP_TCB_UNLOCK(stcb);
+			SCTP_SOCKET_LOCK(so, 1);
+			SCTP_TCB_LOCK(stcb);
+			atomic_subtract_int(&stcb->asoc.refcnt, 1);
+			if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+				SCTP_SOCKET_UNLOCK(so, 1);
+				return;
+			}
+#endif
+			soisconnected(stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+		}
+		sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep,
+		    stcb, net);
+		/*
+		 * since we did not send a HB make sure we don't double
+		 * things
+		 */
+		net->hb_responded = 1;
+
+		if (stcb->asoc.sctp_autoclose_ticks &&
+		    sctp_is_feature_on(stcb->sctp_ep, SCTP_PCB_FLAGS_AUTOCLOSE)) {
+			sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE,
+			    stcb->sctp_ep, stcb, NULL);
+		}
+		/*
+		 * send ASCONF if parameters are pending and ASCONFs are
+		 * allowed (eg. addresses changed when init/cookie echo were
+		 * in flight)
+		 */
+		if ((sctp_is_feature_on(stcb->sctp_ep, SCTP_PCB_FLAGS_DO_ASCONF)) &&
+		    (stcb->asoc.peer_supports_asconf) &&
+		    (!TAILQ_EMPTY(&stcb->asoc.asconf_queue))) {
+#ifdef SCTP_TIMER_BASED_ASCONF
+			sctp_timer_start(SCTP_TIMER_TYPE_ASCONF,
+			    stcb->sctp_ep, stcb,
+			    stcb->asoc.primary_destination);
+#else
+			sctp_send_asconf(stcb, stcb->asoc.primary_destination,
+			    SCTP_ADDR_NOT_LOCKED);
+#endif
+		}
+	}
+	/* Toss the cookie if I can */
+	sctp_toss_old_cookies(stcb, asoc);
+	if (!TAILQ_EMPTY(&asoc->sent_queue)) {
+		/* Restart the timer if we have pending data */
+		struct sctp_tmit_chunk *chk;
+
+		chk = TAILQ_FIRST(&asoc->sent_queue);
+		if (chk) {
+			sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+			    stcb, chk->whoTo);
+		}
+	}
+}
+
+static void
+sctp_handle_ecn_echo(struct sctp_ecne_chunk *cp,
+    struct sctp_tcb *stcb)
+{
+	struct sctp_nets *net;
+	struct sctp_tmit_chunk *lchk;
+	uint32_t tsn;
+
+	if (ntohs(cp->ch.chunk_length) != sizeof(struct sctp_ecne_chunk)) {
+		return;
+	}
+	SCTP_STAT_INCR(sctps_recvecne);
+	tsn = ntohl(cp->tsn);
+	/* ECN Nonce stuff: need a resync and disable the nonce sum check */
+	/* Also we make sure we disable the nonce_wait */
+	lchk = TAILQ_FIRST(&stcb->asoc.send_queue);
+	if (lchk == NULL) {
+		stcb->asoc.nonce_resync_tsn = stcb->asoc.sending_seq;
+	} else {
+		stcb->asoc.nonce_resync_tsn = lchk->rec.data.TSN_seq;
+	}
+	stcb->asoc.nonce_wait_for_ecne = 0;
+	stcb->asoc.nonce_sum_check = 0;
+
+	/* Find where it was sent, if possible */
+	net = NULL;
+	lchk = TAILQ_FIRST(&stcb->asoc.sent_queue);
+	while (lchk) {
+		if (lchk->rec.data.TSN_seq == tsn) {
+			net = lchk->whoTo;
+			break;
+		}
+		if (compare_with_wrap(lchk->rec.data.TSN_seq, tsn, MAX_SEQ))
+			break;
+		lchk = TAILQ_NEXT(lchk, sctp_next);
+	}
+	if (net == NULL)
+		/* default is we use the primary */
+		net = stcb->asoc.primary_destination;
+
+	if (compare_with_wrap(tsn, stcb->asoc.last_cwr_tsn, MAX_TSN)) {
+		/*
+		 * JRS - Use the congestion control given in the pluggable
+		 * CC module
+		 */
+		stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo(stcb, net);
+		/*
+		 * we reduce once every RTT. So we will only lower cwnd at
+		 * the next sending seq i.e. the resync_tsn.
+		 */
+		stcb->asoc.last_cwr_tsn = stcb->asoc.nonce_resync_tsn;
+	}
+	/*
+	 * We always send a CWR this way if our previous one was lost our
+	 * peer will get an update, or if it is not time again to reduce we
+	 * still get the cwr to the peer.
+	 */
+	sctp_send_cwr(stcb, net, tsn);
+}
+
+static void
+sctp_handle_ecn_cwr(struct sctp_cwr_chunk *cp, struct sctp_tcb *stcb)
+{
+	/*
+	 * Here we get a CWR from the peer. We must look in the outqueue and
+	 * make sure that we have a covered ECNE in teh control chunk part.
+	 * If so remove it.
+	 */
+	struct sctp_tmit_chunk *chk;
+	struct sctp_ecne_chunk *ecne;
+
+	TAILQ_FOREACH(chk, &stcb->asoc.control_send_queue, sctp_next) {
+		if (chk->rec.chunk_id.id != SCTP_ECN_ECHO) {
+			continue;
+		}
+		/*
+		 * Look for and remove if it is the right TSN. Since there
+		 * is only ONE ECNE on the control queue at any one time we
+		 * don't need to worry about more than one!
+		 */
+		ecne = mtod(chk->data, struct sctp_ecne_chunk *);
+		if (compare_with_wrap(ntohl(cp->tsn), ntohl(ecne->tsn),
+		    MAX_TSN) || (cp->tsn == ecne->tsn)) {
+			/* this covers this ECNE, we can remove it */
+			stcb->asoc.ecn_echo_cnt_onq--;
+			TAILQ_REMOVE(&stcb->asoc.control_send_queue, chk,
+			    sctp_next);
+			if (chk->data) {
+				sctp_m_freem(chk->data);
+				chk->data = NULL;
+			}
+			stcb->asoc.ctrl_queue_cnt--;
+			sctp_free_a_chunk(stcb, chk);
+			break;
+		}
+	}
+}
+
+static void
+sctp_handle_shutdown_complete(struct sctp_shutdown_complete_chunk *cp,
+    struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	struct sctp_association *asoc;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	struct socket *so;
+
+#endif
+
+	SCTPDBG(SCTP_DEBUG_INPUT2,
+	    "sctp_handle_shutdown_complete: handling SHUTDOWN-COMPLETE\n");
+	if (stcb == NULL)
+		return;
+
+	asoc = &stcb->asoc;
+	/* process according to association state */
+	if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT) {
+		/* unexpected SHUTDOWN-COMPLETE... so ignore... */
+		SCTPDBG(SCTP_DEBUG_INPUT2,
+		    "sctp_handle_shutdown_complete: not in SCTP_STATE_SHUTDOWN_ACK_SENT --- ignore\n");
+		SCTP_TCB_UNLOCK(stcb);
+		return;
+	}
+	/* notify upper layer protocol */
+	if (stcb->sctp_socket) {
+		sctp_ulp_notify(SCTP_NOTIFY_ASSOC_DOWN, stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
+		/* are the queues empty? they should be */
+		if (!TAILQ_EMPTY(&asoc->send_queue) ||
+		    !TAILQ_EMPTY(&asoc->sent_queue) ||
+		    !TAILQ_EMPTY(&asoc->out_wheel)) {
+			sctp_report_all_outbound(stcb, 0, SCTP_SO_NOT_LOCKED);
+		}
+	}
+	/* stop the timer */
+	sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_22);
+	SCTP_STAT_INCR_COUNTER32(sctps_shutdown);
+	/* free the TCB */
+	SCTPDBG(SCTP_DEBUG_INPUT2,
+	    "sctp_handle_shutdown_complete: calls free-asoc\n");
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	so = SCTP_INP_SO(stcb->sctp_ep);
+	atomic_add_int(&stcb->asoc.refcnt, 1);
+	SCTP_TCB_UNLOCK(stcb);
+	SCTP_SOCKET_LOCK(so, 1);
+	SCTP_TCB_LOCK(stcb);
+	atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+	(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_23);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+	return;
+}
+
+static int
+process_chunk_drop(struct sctp_tcb *stcb, struct sctp_chunk_desc *desc,
+    struct sctp_nets *net, uint8_t flg)
+{
+	switch (desc->chunk_type) {
+		case SCTP_DATA:
+		/* find the tsn to resend (possibly */
+		{
+			uint32_t tsn;
+			struct sctp_tmit_chunk *tp1;
+
+			tsn = ntohl(desc->tsn_ifany);
+			tp1 = TAILQ_FIRST(&stcb->asoc.sent_queue);
+			while (tp1) {
+				if (tp1->rec.data.TSN_seq == tsn) {
+					/* found it */
+					break;
+				}
+				if (compare_with_wrap(tp1->rec.data.TSN_seq, tsn,
+				    MAX_TSN)) {
+					/* not found */
+					tp1 = NULL;
+					break;
+				}
+				tp1 = TAILQ_NEXT(tp1, sctp_next);
+			}
+			if (tp1 == NULL) {
+				/*
+				 * Do it the other way , aka without paying
+				 * attention to queue seq order.
+				 */
+				SCTP_STAT_INCR(sctps_pdrpdnfnd);
+				tp1 = TAILQ_FIRST(&stcb->asoc.sent_queue);
+				while (tp1) {
+					if (tp1->rec.data.TSN_seq == tsn) {
+						/* found it */
+						break;
+					}
+					tp1 = TAILQ_NEXT(tp1, sctp_next);
+				}
+			}
+			if (tp1 == NULL) {
+				SCTP_STAT_INCR(sctps_pdrptsnnf);
+			}
+			if ((tp1) && (tp1->sent < SCTP_DATAGRAM_ACKED)) {
+				uint8_t *ddp;
+
+				if ((stcb->asoc.peers_rwnd == 0) &&
+				    ((flg & SCTP_FROM_MIDDLE_BOX) == 0)) {
+					SCTP_STAT_INCR(sctps_pdrpdiwnp);
+					return (0);
+				}
+				if (stcb->asoc.peers_rwnd == 0 &&
+				    (flg & SCTP_FROM_MIDDLE_BOX)) {
+					SCTP_STAT_INCR(sctps_pdrpdizrw);
+					return (0);
+				}
+				ddp = (uint8_t *) (mtod(tp1->data, caddr_t)+
+				    sizeof(struct sctp_data_chunk));
+				{
+					unsigned int iii;
+
+					for (iii = 0; iii < sizeof(desc->data_bytes);
+					    iii++) {
+						if (ddp[iii] != desc->data_bytes[iii]) {
+							SCTP_STAT_INCR(sctps_pdrpbadd);
+							return (-1);
+						}
+					}
+				}
+				/*
+				 * We zero out the nonce so resync not
+				 * needed
+				 */
+				tp1->rec.data.ect_nonce = 0;
+
+				if (tp1->do_rtt) {
+					/*
+					 * this guy had a RTO calculation
+					 * pending on it, cancel it
+					 */
+					tp1->do_rtt = 0;
+				}
+				SCTP_STAT_INCR(sctps_pdrpmark);
+				if (tp1->sent != SCTP_DATAGRAM_RESEND)
+					sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+				tp1->sent = SCTP_DATAGRAM_RESEND;
+				/*
+				 * mark it as if we were doing a FR, since
+				 * we will be getting gap ack reports behind
+				 * the info from the router.
+				 */
+				tp1->rec.data.doing_fast_retransmit = 1;
+				/*
+				 * mark the tsn with what sequences can
+				 * cause a new FR.
+				 */
+				if (TAILQ_EMPTY(&stcb->asoc.send_queue)) {
+					tp1->rec.data.fast_retran_tsn = stcb->asoc.sending_seq;
+				} else {
+					tp1->rec.data.fast_retran_tsn = (TAILQ_FIRST(&stcb->asoc.send_queue))->rec.data.TSN_seq;
+				}
+
+				/* restart the timer */
+				sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+				    stcb, tp1->whoTo, SCTP_FROM_SCTP_INPUT + SCTP_LOC_24);
+				sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+				    stcb, tp1->whoTo);
+
+				/* fix counts and things */
+				if (sctp_logging_level & SCTP_FLIGHT_LOGGING_ENABLE) {
+					sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_PDRP,
+					    tp1->whoTo->flight_size,
+					    tp1->book_size,
+					    (uintptr_t) stcb,
+					    tp1->rec.data.TSN_seq);
+				}
+				sctp_flight_size_decrease(tp1);
+				sctp_total_flight_decrease(stcb, tp1);
+			} {
+				/* audit code */
+				unsigned int audit;
+
+				audit = 0;
+				TAILQ_FOREACH(tp1, &stcb->asoc.sent_queue, sctp_next) {
+					if (tp1->sent == SCTP_DATAGRAM_RESEND)
+						audit++;
+				}
+				TAILQ_FOREACH(tp1, &stcb->asoc.control_send_queue,
+				    sctp_next) {
+					if (tp1->sent == SCTP_DATAGRAM_RESEND)
+						audit++;
+				}
+				if (audit != stcb->asoc.sent_queue_retran_cnt) {
+					SCTP_PRINTF("**Local Audit finds cnt:%d asoc cnt:%d\n",
+					    audit, stcb->asoc.sent_queue_retran_cnt);
+#ifndef SCTP_AUDITING_ENABLED
+					stcb->asoc.sent_queue_retran_cnt = audit;
+#endif
+				}
+			}
+		}
+		break;
+	case SCTP_ASCONF:
+		{
+			struct sctp_tmit_chunk *asconf;
+
+			TAILQ_FOREACH(asconf, &stcb->asoc.control_send_queue,
+			    sctp_next) {
+				if (asconf->rec.chunk_id.id == SCTP_ASCONF) {
+					break;
+				}
+			}
+			if (asconf) {
+				if (asconf->sent != SCTP_DATAGRAM_RESEND)
+					sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+				asconf->sent = SCTP_DATAGRAM_RESEND;
+				asconf->snd_count--;
+			}
+		}
+		break;
+	case SCTP_INITIATION:
+		/* resend the INIT */
+		stcb->asoc.dropped_special_cnt++;
+		if (stcb->asoc.dropped_special_cnt < SCTP_RETRY_DROPPED_THRESH) {
+			/*
+			 * If we can get it in, in a few attempts we do
+			 * this, otherwise we let the timer fire.
+			 */
+			sctp_timer_stop(SCTP_TIMER_TYPE_INIT, stcb->sctp_ep,
+			    stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_25);
+			sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
+		}
+		break;
+	case SCTP_SELECTIVE_ACK:
+		/* resend the sack */
+		sctp_send_sack(stcb);
+		break;
+	case SCTP_HEARTBEAT_REQUEST:
+		/* resend a demand HB */
+		if ((stcb->asoc.overall_error_count + 3) < stcb->asoc.max_send_times) {
+			/*
+			 * Only retransmit if we KNOW we wont destroy the
+			 * tcb
+			 */
+			(void)sctp_send_hb(stcb, 1, net);
+		}
+		break;
+	case SCTP_SHUTDOWN:
+		sctp_send_shutdown(stcb, net);
+		break;
+	case SCTP_SHUTDOWN_ACK:
+		sctp_send_shutdown_ack(stcb, net);
+		break;
+	case SCTP_COOKIE_ECHO:
+		{
+			struct sctp_tmit_chunk *cookie;
+
+			cookie = NULL;
+			TAILQ_FOREACH(cookie, &stcb->asoc.control_send_queue,
+			    sctp_next) {
+				if (cookie->rec.chunk_id.id == SCTP_COOKIE_ECHO) {
+					break;
+				}
+			}
+			if (cookie) {
+				if (cookie->sent != SCTP_DATAGRAM_RESEND)
+					sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+				cookie->sent = SCTP_DATAGRAM_RESEND;
+				sctp_stop_all_cookie_timers(stcb);
+			}
+		}
+		break;
+	case SCTP_COOKIE_ACK:
+		sctp_send_cookie_ack(stcb);
+		break;
+	case SCTP_ASCONF_ACK:
+		/* resend last asconf ack */
+		sctp_send_asconf_ack(stcb);
+		break;
+	case SCTP_FORWARD_CUM_TSN:
+		send_forward_tsn(stcb, &stcb->asoc);
+		break;
+		/* can't do anything with these */
+	case SCTP_PACKET_DROPPED:
+	case SCTP_INITIATION_ACK:	/* this should not happen */
+	case SCTP_HEARTBEAT_ACK:
+	case SCTP_ABORT_ASSOCIATION:
+	case SCTP_OPERATION_ERROR:
+	case SCTP_SHUTDOWN_COMPLETE:
+	case SCTP_ECN_ECHO:
+	case SCTP_ECN_CWR:
+	default:
+		break;
+	}
+	return (0);
+}
+
+void
+sctp_reset_in_stream(struct sctp_tcb *stcb, int number_entries, uint16_t * list)
+{
+	int i;
+	uint16_t temp;
+
+	/*
+	 * We set things to 0xffff since this is the last delivered sequence
+	 * and we will be sending in 0 after the reset.
+	 */
+
+	if (number_entries) {
+		for (i = 0; i < number_entries; i++) {
+			temp = ntohs(list[i]);
+			if (temp >= stcb->asoc.streamincnt) {
+				continue;
+			}
+			stcb->asoc.strmin[temp].last_sequence_delivered = 0xffff;
+		}
+	} else {
+		list = NULL;
+		for (i = 0; i < stcb->asoc.streamincnt; i++) {
+			stcb->asoc.strmin[i].last_sequence_delivered = 0xffff;
+		}
+	}
+	sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_RECV, stcb, number_entries, (void *)list, SCTP_SO_NOT_LOCKED);
+}
+
+static void
+sctp_reset_out_streams(struct sctp_tcb *stcb, int number_entries, uint16_t * list)
+{
+	int i;
+
+	if (number_entries == 0) {
+		for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+			stcb->asoc.strmout[i].next_sequence_sent = 0;
+		}
+	} else if (number_entries) {
+		for (i = 0; i < number_entries; i++) {
+			uint16_t temp;
+
+			temp = ntohs(list[i]);
+			if (temp >= stcb->asoc.streamoutcnt) {
+				/* no such stream */
+				continue;
+			}
+			stcb->asoc.strmout[temp].next_sequence_sent = 0;
+		}
+	}
+	sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_SEND, stcb, number_entries, (void *)list, SCTP_SO_NOT_LOCKED);
+}
+
+
+struct sctp_stream_reset_out_request *
+sctp_find_stream_reset(struct sctp_tcb *stcb, uint32_t seq, struct sctp_tmit_chunk **bchk)
+{
+	struct sctp_association *asoc;
+	struct sctp_stream_reset_out_req *req;
+	struct sctp_stream_reset_out_request *r;
+	struct sctp_tmit_chunk *chk;
+	int len, clen;
+
+	asoc = &stcb->asoc;
+	if (TAILQ_EMPTY(&stcb->asoc.control_send_queue)) {
+		asoc->stream_reset_outstanding = 0;
+		return (NULL);
+	}
+	if (stcb->asoc.str_reset == NULL) {
+		asoc->stream_reset_outstanding = 0;
+		return (NULL);
+	}
+	chk = stcb->asoc.str_reset;
+	if (chk->data == NULL) {
+		return (NULL);
+	}
+	if (bchk) {
+		/* he wants a copy of the chk pointer */
+		*bchk = chk;
+	}
+	clen = chk->send_size;
+	req = mtod(chk->data, struct sctp_stream_reset_out_req *);
+	r = &req->sr_req;
+	if (ntohl(r->request_seq) == seq) {
+		/* found it */
+		return (r);
+	}
+	len = SCTP_SIZE32(ntohs(r->ph.param_length));
+	if (clen > (len + (int)sizeof(struct sctp_chunkhdr))) {
+		/* move to the next one, there can only be a max of two */
+		r = (struct sctp_stream_reset_out_request *)((caddr_t)r + len);
+		if (ntohl(r->request_seq) == seq) {
+			return (r);
+		}
+	}
+	/* that seq is not here */
+	return (NULL);
+}
+
+static void
+sctp_clean_up_stream_reset(struct sctp_tcb *stcb)
+{
+	struct sctp_association *asoc;
+	struct sctp_tmit_chunk *chk = stcb->asoc.str_reset;
+
+	if (stcb->asoc.str_reset == NULL) {
+		return;
+	}
+	asoc = &stcb->asoc;
+
+	sctp_timer_stop(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo, SCTP_FROM_SCTP_INPUT + SCTP_LOC_26);
+	TAILQ_REMOVE(&asoc->control_send_queue,
+	    chk,
+	    sctp_next);
+	if (chk->data) {
+		sctp_m_freem(chk->data);
+		chk->data = NULL;
+	}
+	asoc->ctrl_queue_cnt--;
+	sctp_free_a_chunk(stcb, chk);
+	/* sa_ignore NO_NULL_CHK */
+	stcb->asoc.str_reset = NULL;
+}
+
+
+static int
+sctp_handle_stream_reset_response(struct sctp_tcb *stcb,
+    uint32_t seq, uint32_t action,
+    struct sctp_stream_reset_response *respin)
+{
+	uint16_t type;
+	int lparm_len;
+	struct sctp_association *asoc = &stcb->asoc;
+	struct sctp_tmit_chunk *chk;
+	struct sctp_stream_reset_out_request *srparam;
+	int number_entries;
+
+	if (asoc->stream_reset_outstanding == 0) {
+		/* duplicate */
+		return (0);
+	}
+	if (seq == stcb->asoc.str_reset_seq_out) {
+		srparam = sctp_find_stream_reset(stcb, seq, &chk);
+		if (srparam) {
+			stcb->asoc.str_reset_seq_out++;
+			type = ntohs(srparam->ph.param_type);
+			lparm_len = ntohs(srparam->ph.param_length);
+			if (type == SCTP_STR_RESET_OUT_REQUEST) {
+				number_entries = (lparm_len - sizeof(struct sctp_stream_reset_out_request)) / sizeof(uint16_t);
+				asoc->stream_reset_out_is_outstanding = 0;
+				if (asoc->stream_reset_outstanding)
+					asoc->stream_reset_outstanding--;
+				if (action == SCTP_STREAM_RESET_PERFORMED) {
+					/* do it */
+					sctp_reset_out_streams(stcb, number_entries, srparam->list_of_streams);
+				} else {
+					sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_FAILED_OUT, stcb, number_entries, srparam->list_of_streams, SCTP_SO_NOT_LOCKED);
+				}
+			} else if (type == SCTP_STR_RESET_IN_REQUEST) {
+				/* Answered my request */
+				number_entries = (lparm_len - sizeof(struct sctp_stream_reset_in_request)) / sizeof(uint16_t);
+				if (asoc->stream_reset_outstanding)
+					asoc->stream_reset_outstanding--;
+				if (action != SCTP_STREAM_RESET_PERFORMED) {
+					sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_FAILED_IN, stcb, number_entries, srparam->list_of_streams, SCTP_SO_NOT_LOCKED);
+				}
+			} else if (type == SCTP_STR_RESET_TSN_REQUEST) {
+				/**
+				 * a) Adopt the new in tsn.
+				 * b) reset the map
+				 * c) Adopt the new out-tsn
+				 */
+				struct sctp_stream_reset_response_tsn *resp;
+				struct sctp_forward_tsn_chunk fwdtsn;
+				int abort_flag = 0;
+
+				if (respin == NULL) {
+					/* huh ? */
+					return (0);
+				}
+				if (action == SCTP_STREAM_RESET_PERFORMED) {
+					resp = (struct sctp_stream_reset_response_tsn *)respin;
+					asoc->stream_reset_outstanding--;
+					fwdtsn.ch.chunk_length = htons(sizeof(struct sctp_forward_tsn_chunk));
+					fwdtsn.ch.chunk_type = SCTP_FORWARD_CUM_TSN;
+					fwdtsn.new_cumulative_tsn = htonl(ntohl(resp->senders_next_tsn) - 1);
+					sctp_handle_forward_tsn(stcb, &fwdtsn, &abort_flag, NULL, 0);
+					if (abort_flag) {
+						return (1);
+					}
+					stcb->asoc.highest_tsn_inside_map = (ntohl(resp->senders_next_tsn) - 1);
+					stcb->asoc.cumulative_tsn = stcb->asoc.highest_tsn_inside_map;
+					stcb->asoc.mapping_array_base_tsn = ntohl(resp->senders_next_tsn);
+					memset(stcb->asoc.mapping_array, 0, stcb->asoc.mapping_array_size);
+					stcb->asoc.sending_seq = ntohl(resp->receivers_next_tsn);
+					stcb->asoc.last_acked_seq = stcb->asoc.cumulative_tsn;
+
+					sctp_reset_out_streams(stcb, 0, (uint16_t *) NULL);
+					sctp_reset_in_stream(stcb, 0, (uint16_t *) NULL);
+
+				}
+			}
+			/* get rid of the request and get the request flags */
+			if (asoc->stream_reset_outstanding == 0) {
+				sctp_clean_up_stream_reset(stcb);
+			}
+		}
+	}
+	return (0);
+}
+
+static void
+sctp_handle_str_reset_request_in(struct sctp_tcb *stcb,
+    struct sctp_tmit_chunk *chk,
+    struct sctp_stream_reset_in_request *req, int trunc)
+{
+	uint32_t seq;
+	int len, i;
+	int number_entries;
+	uint16_t temp;
+
+	/*
+	 * peer wants me to send a str-reset to him for my outgoing seq's if
+	 * seq_in is right.
+	 */
+	struct sctp_association *asoc = &stcb->asoc;
+
+	seq = ntohl(req->request_seq);
+	if (asoc->str_reset_seq_in == seq) {
+		if (trunc) {
+			/* Can't do it, since they exceeded our buffer size  */
+			asoc->last_reset_action[1] = asoc->last_reset_action[0];
+			asoc->last_reset_action[0] = SCTP_STREAM_RESET_DENIED;
+			sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
+		} else if (stcb->asoc.stream_reset_out_is_outstanding == 0) {
+			len = ntohs(req->ph.param_length);
+			number_entries = ((len - sizeof(struct sctp_stream_reset_in_request)) / sizeof(uint16_t));
+			for (i = 0; i < number_entries; i++) {
+				temp = ntohs(req->list_of_streams[i]);
+				req->list_of_streams[i] = temp;
+			}
+			/* move the reset action back one */
+			asoc->last_reset_action[1] = asoc->last_reset_action[0];
+			asoc->last_reset_action[0] = SCTP_STREAM_RESET_PERFORMED;
+			sctp_add_stream_reset_out(chk, number_entries, req->list_of_streams,
+			    asoc->str_reset_seq_out,
+			    seq, (asoc->sending_seq - 1));
+			asoc->stream_reset_out_is_outstanding = 1;
+			asoc->str_reset = chk;
+			sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo);
+			stcb->asoc.stream_reset_outstanding++;
+		} else {
+			/* Can't do it, since we have sent one out */
+			asoc->last_reset_action[1] = asoc->last_reset_action[0];
+			asoc->last_reset_action[0] = SCTP_STREAM_RESET_TRY_LATER;
+			sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
+		}
+		asoc->str_reset_seq_in++;
+	} else if (asoc->str_reset_seq_in - 1 == seq) {
+		sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
+	} else if (asoc->str_reset_seq_in - 2 == seq) {
+		sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[1]);
+	} else {
+		sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_BAD_SEQNO);
+	}
+}
+
+static int
+sctp_handle_str_reset_request_tsn(struct sctp_tcb *stcb,
+    struct sctp_tmit_chunk *chk,
+    struct sctp_stream_reset_tsn_request *req)
+{
+	/* reset all in and out and update the tsn */
+	/*
+	 * A) reset my str-seq's on in and out. B) Select a receive next,
+	 * and set cum-ack to it. Also process this selected number as a
+	 * fwd-tsn as well. C) set in the response my next sending seq.
+	 */
+	struct sctp_forward_tsn_chunk fwdtsn;
+	struct sctp_association *asoc = &stcb->asoc;
+	int abort_flag = 0;
+	uint32_t seq;
+
+	seq = ntohl(req->request_seq);
+	if (asoc->str_reset_seq_in == seq) {
+		fwdtsn.ch.chunk_length = htons(sizeof(struct sctp_forward_tsn_chunk));
+		fwdtsn.ch.chunk_type = SCTP_FORWARD_CUM_TSN;
+		fwdtsn.ch.chunk_flags = 0;
+		fwdtsn.new_cumulative_tsn = htonl(stcb->asoc.highest_tsn_inside_map + 1);
+		sctp_handle_forward_tsn(stcb, &fwdtsn, &abort_flag, NULL, 0);
+		if (abort_flag) {
+			return (1);
+		}
+		stcb->asoc.highest_tsn_inside_map += SCTP_STREAM_RESET_TSN_DELTA;
+		stcb->asoc.cumulative_tsn = stcb->asoc.highest_tsn_inside_map;
+		stcb->asoc.mapping_array_base_tsn = stcb->asoc.highest_tsn_inside_map + 1;
+		memset(stcb->asoc.mapping_array, 0, stcb->asoc.mapping_array_size);
+		atomic_add_int(&stcb->asoc.sending_seq, 1);
+		/* save off historical data for retrans */
+		stcb->asoc.last_sending_seq[1] = stcb->asoc.last_sending_seq[0];
+		stcb->asoc.last_sending_seq[0] = stcb->asoc.sending_seq;
+		stcb->asoc.last_base_tsnsent[1] = stcb->asoc.last_base_tsnsent[0];
+		stcb->asoc.last_base_tsnsent[0] = stcb->asoc.mapping_array_base_tsn;
+
+		sctp_add_stream_reset_result_tsn(chk,
+		    ntohl(req->request_seq),
+		    SCTP_STREAM_RESET_PERFORMED,
+		    stcb->asoc.sending_seq,
+		    stcb->asoc.mapping_array_base_tsn);
+		sctp_reset_out_streams(stcb, 0, (uint16_t *) NULL);
+		sctp_reset_in_stream(stcb, 0, (uint16_t *) NULL);
+		stcb->asoc.last_reset_action[1] = stcb->asoc.last_reset_action[0];
+		stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_PERFORMED;
+
+		asoc->str_reset_seq_in++;
+	} else if (asoc->str_reset_seq_in - 1 == seq) {
+		sctp_add_stream_reset_result_tsn(chk, seq, asoc->last_reset_action[0],
+		    stcb->asoc.last_sending_seq[0],
+		    stcb->asoc.last_base_tsnsent[0]
+		    );
+	} else if (asoc->str_reset_seq_in - 2 == seq) {
+		sctp_add_stream_reset_result_tsn(chk, seq, asoc->last_reset_action[1],
+		    stcb->asoc.last_sending_seq[1],
+		    stcb->asoc.last_base_tsnsent[1]
+		    );
+	} else {
+		sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_BAD_SEQNO);
+	}
+	return (0);
+}
+
+static void
+sctp_handle_str_reset_request_out(struct sctp_tcb *stcb,
+    struct sctp_tmit_chunk *chk,
+    struct sctp_stream_reset_out_request *req, int trunc)
+{
+	uint32_t seq, tsn;
+	int number_entries, len;
+	struct sctp_association *asoc = &stcb->asoc;
+
+	seq = ntohl(req->request_seq);
+
+	/* now if its not a duplicate we process it */
+	if (asoc->str_reset_seq_in == seq) {
+		len = ntohs(req->ph.param_length);
+		number_entries = ((len - sizeof(struct sctp_stream_reset_out_request)) / sizeof(uint16_t));
+		/*
+		 * the sender is resetting, handle the list issue.. we must
+		 * a) verify if we can do the reset, if so no problem b) If
+		 * we can't do the reset we must copy the request. c) queue
+		 * it, and setup the data in processor to trigger it off
+		 * when needed and dequeue all the queued data.
+		 */
+		tsn = ntohl(req->send_reset_at_tsn);
+
+		/* move the reset action back one */
+		asoc->last_reset_action[1] = asoc->last_reset_action[0];
+		if (trunc) {
+			sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_DENIED);
+			asoc->last_reset_action[0] = SCTP_STREAM_RESET_DENIED;
+		} else if ((tsn == asoc->cumulative_tsn) ||
+		    (compare_with_wrap(asoc->cumulative_tsn, tsn, MAX_TSN))) {
+			/* we can do it now */
+			sctp_reset_in_stream(stcb, number_entries, req->list_of_streams);
+			sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_PERFORMED);
+			asoc->last_reset_action[0] = SCTP_STREAM_RESET_PERFORMED;
+		} else {
+			/*
+			 * we must queue it up and thus wait for the TSN's
+			 * to arrive that are at or before tsn
+			 */
+			struct sctp_stream_reset_list *liste;
+			int siz;
+
+			siz = sizeof(struct sctp_stream_reset_list) + (number_entries * sizeof(uint16_t));
+			SCTP_MALLOC(liste, struct sctp_stream_reset_list *,
+			    siz, SCTP_M_STRESET);
+			if (liste == NULL) {
+				/* gak out of memory */
+				sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_DENIED);
+				asoc->last_reset_action[0] = SCTP_STREAM_RESET_DENIED;
+				return;
+			}
+			liste->tsn = tsn;
+			liste->number_entries = number_entries;
+			memcpy(&liste->req, req,
+			    (sizeof(struct sctp_stream_reset_out_request) + (number_entries * sizeof(uint16_t))));
+			TAILQ_INSERT_TAIL(&asoc->resetHead, liste, next_resp);
+			sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_PERFORMED);
+			asoc->last_reset_action[0] = SCTP_STREAM_RESET_PERFORMED;
+		}
+		asoc->str_reset_seq_in++;
+	} else if ((asoc->str_reset_seq_in - 1) == seq) {
+		/*
+		 * one seq back, just echo back last action since my
+		 * response was lost.
+		 */
+		sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
+	} else if ((asoc->str_reset_seq_in - 2) == seq) {
+		/*
+		 * two seq back, just echo back last action since my
+		 * response was lost.
+		 */
+		sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[1]);
+	} else {
+		sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_BAD_SEQNO);
+	}
+}
+
+#ifdef __GNUC__
+__attribute__((noinline))
+#endif
+	static int
+	    sctp_handle_stream_reset(struct sctp_tcb *stcb, struct mbuf *m, int offset,
+        struct sctp_stream_reset_out_req *sr_req)
+{
+	int chk_length, param_len, ptype;
+	struct sctp_paramhdr pstore;
+	uint8_t cstore[SCTP_CHUNK_BUFFER_SIZE];
+
+	uint32_t seq;
+	int num_req = 0;
+	int trunc = 0;
+	struct sctp_tmit_chunk *chk;
+	struct sctp_chunkhdr *ch;
+	struct sctp_paramhdr *ph;
+	int ret_code = 0;
+	int num_param = 0;
+
+	/* now it may be a reset or a reset-response */
+	chk_length = ntohs(sr_req->ch.chunk_length);
+
+	/* setup for adding the response */
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		return (ret_code);
+	}
+	chk->rec.chunk_id.id = SCTP_STREAM_RESET;
+	chk->rec.chunk_id.can_take_data = 0;
+	chk->asoc = &stcb->asoc;
+	chk->no_fr_allowed = 0;
+	chk->book_size = chk->send_size = sizeof(struct sctp_chunkhdr);
+	chk->book_size_scale = 0;
+	chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+	if (chk->data == NULL) {
+strres_nochunk:
+		if (chk->data) {
+			sctp_m_freem(chk->data);
+			chk->data = NULL;
+		}
+		sctp_free_a_chunk(stcb, chk);
+		return (ret_code);
+	}
+	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
+
+	/* setup chunk parameters */
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	chk->whoTo = stcb->asoc.primary_destination;
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+
+	ch = mtod(chk->data, struct sctp_chunkhdr *);
+	ch->chunk_type = SCTP_STREAM_RESET;
+	ch->chunk_flags = 0;
+	ch->chunk_length = htons(chk->send_size);
+	SCTP_BUF_LEN(chk->data) = SCTP_SIZE32(chk->send_size);
+	offset += sizeof(struct sctp_chunkhdr);
+	while ((size_t)chk_length >= sizeof(struct sctp_stream_reset_tsn_request)) {
+		ph = (struct sctp_paramhdr *)sctp_m_getptr(m, offset, sizeof(pstore), (uint8_t *) & pstore);
+		if (ph == NULL)
+			break;
+		param_len = ntohs(ph->param_length);
+		if (param_len < (int)sizeof(struct sctp_stream_reset_tsn_request)) {
+			/* bad param */
+			break;
+		}
+		ph = (struct sctp_paramhdr *)sctp_m_getptr(m, offset, min(param_len, (int)sizeof(cstore)),
+		    (uint8_t *) & cstore);
+		ptype = ntohs(ph->param_type);
+		num_param++;
+		if (param_len > (int)sizeof(cstore)) {
+			trunc = 1;
+		} else {
+			trunc = 0;
+		}
+
+		if (num_param > SCTP_MAX_RESET_PARAMS) {
+			/* hit the max of parameters already sorry.. */
+			break;
+		}
+		if (ptype == SCTP_STR_RESET_OUT_REQUEST) {
+			struct sctp_stream_reset_out_request *req_out;
+
+			req_out = (struct sctp_stream_reset_out_request *)ph;
+			num_req++;
+			if (stcb->asoc.stream_reset_outstanding) {
+				seq = ntohl(req_out->response_seq);
+				if (seq == stcb->asoc.str_reset_seq_out) {
+					/* implicit ack */
+					(void)sctp_handle_stream_reset_response(stcb, seq, SCTP_STREAM_RESET_PERFORMED, NULL);
+				}
+			}
+			sctp_handle_str_reset_request_out(stcb, chk, req_out, trunc);
+		} else if (ptype == SCTP_STR_RESET_IN_REQUEST) {
+			struct sctp_stream_reset_in_request *req_in;
+
+			num_req++;
+
+			req_in = (struct sctp_stream_reset_in_request *)ph;
+
+			sctp_handle_str_reset_request_in(stcb, chk, req_in, trunc);
+		} else if (ptype == SCTP_STR_RESET_TSN_REQUEST) {
+			struct sctp_stream_reset_tsn_request *req_tsn;
+
+			num_req++;
+			req_tsn = (struct sctp_stream_reset_tsn_request *)ph;
+
+			if (sctp_handle_str_reset_request_tsn(stcb, chk, req_tsn)) {
+				ret_code = 1;
+				goto strres_nochunk;
+			}
+			/* no more */
+			break;
+		} else if (ptype == SCTP_STR_RESET_RESPONSE) {
+			struct sctp_stream_reset_response *resp;
+			uint32_t result;
+
+			resp = (struct sctp_stream_reset_response *)ph;
+			seq = ntohl(resp->response_seq);
+			result = ntohl(resp->result);
+			if (sctp_handle_stream_reset_response(stcb, seq, result, resp)) {
+				ret_code = 1;
+				goto strres_nochunk;
+			}
+		} else {
+			break;
+		}
+		offset += SCTP_SIZE32(param_len);
+		chk_length -= SCTP_SIZE32(param_len);
+	}
+	if (num_req == 0) {
+		/* we have no response free the stuff */
+		goto strres_nochunk;
+	}
+	/* ok we have a chunk to link in */
+	TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue,
+	    chk,
+	    sctp_next);
+	stcb->asoc.ctrl_queue_cnt++;
+	return (ret_code);
+}
+
+/*
+ * Handle a router or endpoints report of a packet loss, there are two ways
+ * to handle this, either we get the whole packet and must disect it
+ * ourselves (possibly with truncation and or corruption) or it is a summary
+ * from a middle box that did the disectting for us.
+ */
+static void
+sctp_handle_packet_dropped(struct sctp_pktdrop_chunk *cp,
+    struct sctp_tcb *stcb, struct sctp_nets *net, uint32_t limit)
+{
+	uint32_t bottle_bw, on_queue;
+	uint16_t trunc_len;
+	unsigned int chlen;
+	unsigned int at;
+	struct sctp_chunk_desc desc;
+	struct sctp_chunkhdr *ch;
+
+	chlen = ntohs(cp->ch.chunk_length);
+	chlen -= sizeof(struct sctp_pktdrop_chunk);
+	/* XXX possible chlen underflow */
+	if (chlen == 0) {
+		ch = NULL;
+		if (cp->ch.chunk_flags & SCTP_FROM_MIDDLE_BOX)
+			SCTP_STAT_INCR(sctps_pdrpbwrpt);
+	} else {
+		ch = (struct sctp_chunkhdr *)(cp->data + sizeof(struct sctphdr));
+		chlen -= sizeof(struct sctphdr);
+		/* XXX possible chlen underflow */
+		memset(&desc, 0, sizeof(desc));
+	}
+	trunc_len = (uint16_t) ntohs(cp->trunc_len);
+	if (trunc_len > limit) {
+		trunc_len = limit;
+	}
+	/* now the chunks themselves */
+	while ((ch != NULL) && (chlen >= sizeof(struct sctp_chunkhdr))) {
+		desc.chunk_type = ch->chunk_type;
+		/* get amount we need to move */
+		at = ntohs(ch->chunk_length);
+		if (at < sizeof(struct sctp_chunkhdr)) {
+			/* corrupt chunk, maybe at the end? */
+			SCTP_STAT_INCR(sctps_pdrpcrupt);
+			break;
+		}
+		if (trunc_len == 0) {
+			/* we are supposed to have all of it */
+			if (at > chlen) {
+				/* corrupt skip it */
+				SCTP_STAT_INCR(sctps_pdrpcrupt);
+				break;
+			}
+		} else {
+			/* is there enough of it left ? */
+			if (desc.chunk_type == SCTP_DATA) {
+				if (chlen < (sizeof(struct sctp_data_chunk) +
+				    sizeof(desc.data_bytes))) {
+					break;
+				}
+			} else {
+				if (chlen < sizeof(struct sctp_chunkhdr)) {
+					break;
+				}
+			}
+		}
+		if (desc.chunk_type == SCTP_DATA) {
+			/* can we get out the tsn? */
+			if ((cp->ch.chunk_flags & SCTP_FROM_MIDDLE_BOX))
+				SCTP_STAT_INCR(sctps_pdrpmbda);
+
+			if (chlen >= (sizeof(struct sctp_data_chunk) + sizeof(uint32_t))) {
+				/* yep */
+				struct sctp_data_chunk *dcp;
+				uint8_t *ddp;
+				unsigned int iii;
+
+				dcp = (struct sctp_data_chunk *)ch;
+				ddp = (uint8_t *) (dcp + 1);
+				for (iii = 0; iii < sizeof(desc.data_bytes); iii++) {
+					desc.data_bytes[iii] = ddp[iii];
+				}
+				desc.tsn_ifany = dcp->dp.tsn;
+			} else {
+				/* nope we are done. */
+				SCTP_STAT_INCR(sctps_pdrpnedat);
+				break;
+			}
+		} else {
+			if ((cp->ch.chunk_flags & SCTP_FROM_MIDDLE_BOX))
+				SCTP_STAT_INCR(sctps_pdrpmbct);
+		}
+
+		if (process_chunk_drop(stcb, &desc, net, cp->ch.chunk_flags)) {
+			SCTP_STAT_INCR(sctps_pdrppdbrk);
+			break;
+		}
+		if (SCTP_SIZE32(at) > chlen) {
+			break;
+		}
+		chlen -= SCTP_SIZE32(at);
+		if (chlen < sizeof(struct sctp_chunkhdr)) {
+			/* done, none left */
+			break;
+		}
+		ch = (struct sctp_chunkhdr *)((caddr_t)ch + SCTP_SIZE32(at));
+	}
+	/* Now update any rwnd --- possibly */
+	if ((cp->ch.chunk_flags & SCTP_FROM_MIDDLE_BOX) == 0) {
+		/* From a peer, we get a rwnd report */
+		uint32_t a_rwnd;
+
+		SCTP_STAT_INCR(sctps_pdrpfehos);
+
+		bottle_bw = ntohl(cp->bottle_bw);
+		on_queue = ntohl(cp->current_onq);
+		if (bottle_bw && on_queue) {
+			/* a rwnd report is in here */
+			if (bottle_bw > on_queue)
+				a_rwnd = bottle_bw - on_queue;
+			else
+				a_rwnd = 0;
+
+			if (a_rwnd == 0)
+				stcb->asoc.peers_rwnd = 0;
+			else {
+				if (a_rwnd > stcb->asoc.total_flight) {
+					stcb->asoc.peers_rwnd =
+					    a_rwnd - stcb->asoc.total_flight;
+				} else {
+					stcb->asoc.peers_rwnd = 0;
+				}
+				if (stcb->asoc.peers_rwnd <
+				    stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
+					/* SWS sender side engages */
+					stcb->asoc.peers_rwnd = 0;
+				}
+			}
+		}
+	} else {
+		SCTP_STAT_INCR(sctps_pdrpfmbox);
+	}
+
+	/* now middle boxes in sat networks get a cwnd bump */
+	if ((cp->ch.chunk_flags & SCTP_FROM_MIDDLE_BOX) &&
+	    (stcb->asoc.sat_t3_loss_recovery == 0) &&
+	    (stcb->asoc.sat_network)) {
+		/*
+		 * This is debateable but for sat networks it makes sense
+		 * Note if a T3 timer has went off, we will prohibit any
+		 * changes to cwnd until we exit the t3 loss recovery.
+		 */
+		stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped(stcb,
+		    net, cp, &bottle_bw, &on_queue);
+	}
+}
+
+/*
+ * handles all control chunks in a packet inputs: - m: mbuf chain, assumed to
+ * still contain IP/SCTP header - stcb: is the tcb found for this packet -
+ * offset: offset into the mbuf chain to first chunkhdr - length: is the
+ * length of the complete packet outputs: - length: modified to remaining
+ * length after control processing - netp: modified to new sctp_nets after
+ * cookie-echo processing - return NULL to discard the packet (ie. no asoc,
+ * bad packet,...) otherwise return the tcb for this packet
+ */
+#ifdef __GNUC__
+__attribute__((noinline))
+#endif
+	static struct sctp_tcb *
+	         sctp_process_control(struct mbuf *m, int iphlen, int *offset, int length,
+             struct sctphdr *sh, struct sctp_chunkhdr *ch, struct sctp_inpcb *inp,
+             struct sctp_tcb *stcb, struct sctp_nets **netp, int *fwd_tsn_seen,
+             uint32_t vrf_id)
+{
+	struct sctp_association *asoc;
+	uint32_t vtag_in;
+	int num_chunks = 0;	/* number of control chunks processed */
+	uint32_t chk_length;
+	int ret;
+	int abort_no_unlock = 0;
+
+	/*
+	 * How big should this be, and should it be alloc'd? Lets try the
+	 * d-mtu-ceiling for now (2k) and that should hopefully work ...
+	 * until we get into jumbo grams and such..
+	 */
+	uint8_t chunk_buf[SCTP_CHUNK_BUFFER_SIZE];
+	struct sctp_tcb *locked_tcb = stcb;
+	int got_auth = 0;
+	uint32_t auth_offset = 0, auth_len = 0;
+	int auth_skipped = 0;
+	int asconf_cnt = 0;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	struct socket *so;
+
+#endif
+
+	SCTPDBG(SCTP_DEBUG_INPUT1, "sctp_process_control: iphlen=%u, offset=%u, length=%u stcb:%p\n",
+	    iphlen, *offset, length, stcb);
+
+	/* validate chunk header length... */
+	if (ntohs(ch->chunk_length) < sizeof(*ch)) {
+		SCTPDBG(SCTP_DEBUG_INPUT1, "Invalid header length %d\n",
+		    ntohs(ch->chunk_length));
+		if (locked_tcb) {
+			SCTP_TCB_UNLOCK(locked_tcb);
+		}
+		return (NULL);
+	}
+	/*
+	 * validate the verification tag
+	 */
+	vtag_in = ntohl(sh->v_tag);
+
+	if (locked_tcb) {
+		SCTP_TCB_LOCK_ASSERT(locked_tcb);
+	}
+	if (ch->chunk_type == SCTP_INITIATION) {
+		SCTPDBG(SCTP_DEBUG_INPUT1, "Its an INIT of len:%d vtag:%x\n",
+		    ntohs(ch->chunk_length), vtag_in);
+		if (vtag_in != 0) {
+			/* protocol error- silently discard... */
+			SCTP_STAT_INCR(sctps_badvtag);
+			if (locked_tcb) {
+				SCTP_TCB_UNLOCK(locked_tcb);
+			}
+			return (NULL);
+		}
+	} else if (ch->chunk_type != SCTP_COOKIE_ECHO) {
+		/*
+		 * If there is no stcb, skip the AUTH chunk and process
+		 * later after a stcb is found (to validate the lookup was
+		 * valid.
+		 */
+		if ((ch->chunk_type == SCTP_AUTHENTICATION) &&
+		    (stcb == NULL) && !sctp_auth_disable) {
+			/* save this chunk for later processing */
+			auth_skipped = 1;
+			auth_offset = *offset;
+			auth_len = ntohs(ch->chunk_length);
+
+			/* (temporarily) move past this chunk */
+			*offset += SCTP_SIZE32(auth_len);
+			if (*offset >= length) {
+				/* no more data left in the mbuf chain */
+				*offset = length;
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				return (NULL);
+			}
+			ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset,
+			    sizeof(struct sctp_chunkhdr), chunk_buf);
+		}
+		if (ch == NULL) {
+			/* Help */
+			*offset = length;
+			if (locked_tcb) {
+				SCTP_TCB_UNLOCK(locked_tcb);
+			}
+			return (NULL);
+		}
+		if (ch->chunk_type == SCTP_COOKIE_ECHO) {
+			goto process_control_chunks;
+		}
+		/*
+		 * first check if it's an ASCONF with an unknown src addr we
+		 * need to look inside to find the association
+		 */
+		if (ch->chunk_type == SCTP_ASCONF && stcb == NULL) {
+			struct sctp_chunkhdr *asconf_ch = ch;
+			uint32_t asconf_offset = 0, asconf_len = 0;
+
+			/* inp's refcount may be reduced */
+			SCTP_INP_INCR_REF(inp);
+
+			asconf_offset = *offset;
+			do {
+				asconf_len = ntohs(asconf_ch->chunk_length);
+				if (asconf_len < sizeof(struct sctp_asconf_paramhdr))
+					break;
+				stcb = sctp_findassociation_ep_asconf(m, iphlen,
+				    *offset, sh, &inp, netp);
+				if (stcb != NULL)
+					break;
+				asconf_offset += SCTP_SIZE32(asconf_len);
+				asconf_ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, asconf_offset,
+				    sizeof(struct sctp_chunkhdr), chunk_buf);
+			} while (asconf_ch != NULL && asconf_ch->chunk_type == SCTP_ASCONF);
+			if (stcb == NULL) {
+				/*
+				 * reduce inp's refcount if not reduced in
+				 * sctp_findassociation_ep_asconf().
+				 */
+				SCTP_INP_DECR_REF(inp);
+			} else {
+				locked_tcb = stcb;
+			}
+
+			/* now go back and verify any auth chunk to be sure */
+			if (auth_skipped && (stcb != NULL)) {
+				struct sctp_auth_chunk *auth;
+
+				auth = (struct sctp_auth_chunk *)
+				    sctp_m_getptr(m, auth_offset,
+				    auth_len, chunk_buf);
+				got_auth = 1;
+				auth_skipped = 0;
+				if ((auth == NULL) || sctp_handle_auth(stcb, auth, m,
+				    auth_offset)) {
+					/* auth HMAC failed so dump it */
+					*offset = length;
+					if (locked_tcb) {
+						SCTP_TCB_UNLOCK(locked_tcb);
+					}
+					return (NULL);
+				} else {
+					/* remaining chunks are HMAC checked */
+					stcb->asoc.authenticated = 1;
+				}
+			}
+		}
+		if (stcb == NULL) {
+			/* no association, so it's out of the blue... */
+			sctp_handle_ootb(m, iphlen, *offset, sh, inp, NULL,
+			    vrf_id);
+			*offset = length;
+			if (locked_tcb) {
+				SCTP_TCB_UNLOCK(locked_tcb);
+			}
+			return (NULL);
+		}
+		asoc = &stcb->asoc;
+		/* ABORT and SHUTDOWN can use either v_tag... */
+		if ((ch->chunk_type == SCTP_ABORT_ASSOCIATION) ||
+		    (ch->chunk_type == SCTP_SHUTDOWN_COMPLETE) ||
+		    (ch->chunk_type == SCTP_PACKET_DROPPED)) {
+			if ((vtag_in == asoc->my_vtag) ||
+			    ((ch->chunk_flags & SCTP_HAD_NO_TCB) &&
+			    (vtag_in == asoc->peer_vtag))) {
+				/* this is valid */
+			} else {
+				/* drop this packet... */
+				SCTP_STAT_INCR(sctps_badvtag);
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				return (NULL);
+			}
+		} else if (ch->chunk_type == SCTP_SHUTDOWN_ACK) {
+			if (vtag_in != asoc->my_vtag) {
+				/*
+				 * this could be a stale SHUTDOWN-ACK or the
+				 * peer never got the SHUTDOWN-COMPLETE and
+				 * is still hung; we have started a new asoc
+				 * but it won't complete until the shutdown
+				 * is completed
+				 */
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				sctp_handle_ootb(m, iphlen, *offset, sh, inp,
+				    NULL, vrf_id);
+				return (NULL);
+			}
+		} else {
+			/* for all other chunks, vtag must match */
+			if (vtag_in != asoc->my_vtag) {
+				/* invalid vtag... */
+				SCTPDBG(SCTP_DEBUG_INPUT3,
+				    "invalid vtag: %xh, expect %xh\n",
+				    vtag_in, asoc->my_vtag);
+				SCTP_STAT_INCR(sctps_badvtag);
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				*offset = length;
+				return (NULL);
+			}
+		}
+	}			/* end if !SCTP_COOKIE_ECHO */
+	/*
+	 * process all control chunks...
+	 */
+	if (((ch->chunk_type == SCTP_SELECTIVE_ACK) ||
+	    (ch->chunk_type == SCTP_HEARTBEAT_REQUEST)) &&
+	    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+		/* implied cookie-ack.. we must have lost the ack */
+		if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+			sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+			    stcb->asoc.overall_error_count,
+			    0,
+			    SCTP_FROM_SCTP_INPUT,
+			    __LINE__);
+		}
+		stcb->asoc.overall_error_count = 0;
+		sctp_handle_cookie_ack((struct sctp_cookie_ack_chunk *)ch, stcb,
+		    *netp);
+	}
+process_control_chunks:
+	while (IS_SCTP_CONTROL(ch)) {
+		/* validate chunk length */
+		chk_length = ntohs(ch->chunk_length);
+		SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_process_control: processing a chunk type=%u, len=%u\n",
+		    ch->chunk_type, chk_length);
+		SCTP_LTRACE_CHK(inp, stcb, ch->chunk_type, chk_length);
+		if (chk_length < sizeof(*ch) ||
+		    (*offset + (int)chk_length) > length) {
+			*offset = length;
+			if (locked_tcb) {
+				SCTP_TCB_UNLOCK(locked_tcb);
+			}
+			return (NULL);
+		}
+		SCTP_STAT_INCR_COUNTER64(sctps_incontrolchunks);
+		/*
+		 * INIT-ACK only gets the init ack "header" portion only
+		 * because we don't have to process the peer's COOKIE. All
+		 * others get a complete chunk.
+		 */
+		if ((ch->chunk_type == SCTP_INITIATION_ACK) ||
+		    (ch->chunk_type == SCTP_INITIATION)) {
+			/* get an init-ack chunk */
+			ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset,
+			    sizeof(struct sctp_init_ack_chunk), chunk_buf);
+			if (ch == NULL) {
+				*offset = length;
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				return (NULL);
+			}
+		} else {
+			/* For cookies and all other chunks. */
+			if (chk_length > sizeof(chunk_buf)) {
+				/*
+				 * use just the size of the chunk buffer so
+				 * the front part of our chunks fit in
+				 * contiguous space up to the chunk buffer
+				 * size (508 bytes). For chunks that need to
+				 * get more than that they must use the
+				 * sctp_m_getptr() function or other means
+				 * (e.g. know how to parse mbuf chains).
+				 * Cookies do this already.
+				 */
+				ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset,
+				    (sizeof(chunk_buf) - 4),
+				    chunk_buf);
+				if (ch == NULL) {
+					*offset = length;
+					if (locked_tcb) {
+						SCTP_TCB_UNLOCK(locked_tcb);
+					}
+					return (NULL);
+				}
+			} else {
+				/* We can fit it all */
+				ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset,
+				    chk_length, chunk_buf);
+				if (ch == NULL) {
+					SCTP_PRINTF("sctp_process_control: Can't get the all data....\n");
+					*offset = length;
+					if (locked_tcb) {
+						SCTP_TCB_UNLOCK(locked_tcb);
+					}
+					return (NULL);
+				}
+			}
+		}
+		num_chunks++;
+		/* Save off the last place we got a control from */
+		if (stcb != NULL) {
+			if (((netp != NULL) && (*netp != NULL)) || (ch->chunk_type == SCTP_ASCONF)) {
+				/*
+				 * allow last_control to be NULL if
+				 * ASCONF... ASCONF processing will find the
+				 * right net later
+				 */
+				if ((netp != NULL) && (*netp != NULL))
+					stcb->asoc.last_control_chunk_from = *netp;
+			}
+		}
+#ifdef SCTP_AUDITING_ENABLED
+		sctp_audit_log(0xB0, ch->chunk_type);
+#endif
+
+		/* check to see if this chunk required auth, but isn't */
+		if ((stcb != NULL) && !sctp_auth_disable &&
+		    sctp_auth_is_required_chunk(ch->chunk_type,
+		    stcb->asoc.local_auth_chunks) &&
+		    !stcb->asoc.authenticated) {
+			/* "silently" ignore */
+			SCTP_STAT_INCR(sctps_recvauthmissing);
+			goto next_chunk;
+		}
+		switch (ch->chunk_type) {
+		case SCTP_INITIATION:
+			/* must be first and only chunk */
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_INIT\n");
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+				/* We are not interested anymore? */
+				if ((stcb) && (stcb->asoc.total_output_queue_size)) {
+					/*
+					 * collision case where we are
+					 * sending to them too
+					 */
+					;
+				} else {
+					if (locked_tcb) {
+						SCTP_TCB_UNLOCK(locked_tcb);
+					}
+					*offset = length;
+					return (NULL);
+				}
+			}
+			if ((chk_length > SCTP_LARGEST_INIT_ACCEPTED) ||
+			    (num_chunks > 1) ||
+			    (sctp_strict_init && (length - *offset > (int)SCTP_SIZE32(chk_length)))) {
+				*offset = length;
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				return (NULL);
+			}
+			if ((stcb != NULL) &&
+			    (SCTP_GET_STATE(&stcb->asoc) ==
+			    SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+				sctp_send_shutdown_ack(stcb,
+				    stcb->asoc.primary_destination);
+				*offset = length;
+				sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CONTROL_PROC, SCTP_SO_NOT_LOCKED);
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				return (NULL);
+			}
+			if (netp) {
+				sctp_handle_init(m, iphlen, *offset, sh,
+				    (struct sctp_init_chunk *)ch, inp,
+				    stcb, *netp, &abort_no_unlock, vrf_id);
+			}
+			if (abort_no_unlock)
+				return (NULL);
+
+			*offset = length;
+			if (locked_tcb) {
+				SCTP_TCB_UNLOCK(locked_tcb);
+			}
+			return (NULL);
+			break;
+		case SCTP_PAD_CHUNK:
+			break;
+		case SCTP_INITIATION_ACK:
+			/* must be first and only chunk */
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_INIT-ACK\n");
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+				/* We are not interested anymore */
+				if ((stcb) && (stcb->asoc.total_output_queue_size)) {
+					;
+				} else {
+					if (locked_tcb) {
+						SCTP_TCB_UNLOCK(locked_tcb);
+					}
+					*offset = length;
+					if (stcb) {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+						so = SCTP_INP_SO(inp);
+						atomic_add_int(&stcb->asoc.refcnt, 1);
+						SCTP_TCB_UNLOCK(stcb);
+						SCTP_SOCKET_LOCK(so, 1);
+						SCTP_TCB_LOCK(stcb);
+						atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+						(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_27);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+						SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+					}
+					return (NULL);
+				}
+			}
+			if ((num_chunks > 1) ||
+			    (sctp_strict_init && (length - *offset > (int)SCTP_SIZE32(chk_length)))) {
+				*offset = length;
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				return (NULL);
+			}
+			if ((netp) && (*netp)) {
+				ret = sctp_handle_init_ack(m, iphlen, *offset, sh,
+				    (struct sctp_init_ack_chunk *)ch, stcb, *netp, &abort_no_unlock, vrf_id);
+			} else {
+				ret = -1;
+			}
+			/*
+			 * Special case, I must call the output routine to
+			 * get the cookie echoed
+			 */
+			if (abort_no_unlock)
+				return (NULL);
+
+			if ((stcb) && ret == 0)
+				sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_CONTROL_PROC, SCTP_SO_NOT_LOCKED);
+			*offset = length;
+			if (locked_tcb) {
+				SCTP_TCB_UNLOCK(locked_tcb);
+			}
+			return (NULL);
+			break;
+		case SCTP_SELECTIVE_ACK:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SACK\n");
+			SCTP_STAT_INCR(sctps_recvsacks);
+			{
+				struct sctp_sack_chunk *sack;
+				int abort_now = 0;
+				uint32_t a_rwnd, cum_ack;
+				uint16_t num_seg;
+				int nonce_sum_flag;
+
+				if ((stcb == NULL) || (chk_length < sizeof(struct sctp_sack_chunk))) {
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Bad size on sack chunk, too small\n");
+			ignore_sack:
+					*offset = length;
+					if (locked_tcb) {
+						SCTP_TCB_UNLOCK(locked_tcb);
+					}
+					return (NULL);
+				}
+				if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) {
+					/*-
+					 * If we have sent a shutdown-ack, we will pay no
+					 * attention to a sack sent in to us since
+					 * we don't care anymore.
+					 */
+					goto ignore_sack;
+				}
+				sack = (struct sctp_sack_chunk *)ch;
+				nonce_sum_flag = ch->chunk_flags & SCTP_SACK_NONCE_SUM;
+				cum_ack = ntohl(sack->sack.cum_tsn_ack);
+				num_seg = ntohs(sack->sack.num_gap_ack_blks);
+				a_rwnd = (uint32_t) ntohl(sack->sack.a_rwnd);
+				SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SACK process cum_ack:%x num_seg:%d a_rwnd:%d\n",
+				    cum_ack,
+				    num_seg,
+				    a_rwnd
+				    );
+				stcb->asoc.seen_a_sack_this_pkt = 1;
+				if ((stcb->asoc.pr_sctp_cnt == 0) &&
+				    (num_seg == 0) &&
+				    ((compare_with_wrap(cum_ack, stcb->asoc.last_acked_seq, MAX_TSN)) ||
+				    (cum_ack == stcb->asoc.last_acked_seq)) &&
+				    (stcb->asoc.saw_sack_with_frags == 0) &&
+				    (!TAILQ_EMPTY(&stcb->asoc.sent_queue))
+				    ) {
+					/*
+					 * We have a SIMPLE sack having no
+					 * prior segments and data on sent
+					 * queue to be acked.. Use the
+					 * faster path sack processing. We
+					 * also allow window update sacks
+					 * with no missing segments to go
+					 * this way too.
+					 */
+					sctp_express_handle_sack(stcb, cum_ack, a_rwnd, nonce_sum_flag,
+					    &abort_now);
+				} else {
+					if (netp && *netp)
+						sctp_handle_sack(m, *offset,
+						    sack, stcb, *netp, &abort_now, chk_length, a_rwnd);
+				}
+				if (abort_now) {
+					/* ABORT signal from sack processing */
+					*offset = length;
+					return (NULL);
+				}
+			}
+			break;
+		case SCTP_HEARTBEAT_REQUEST:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_HEARTBEAT\n");
+			if ((stcb) && netp && *netp) {
+				SCTP_STAT_INCR(sctps_recvheartbeat);
+				sctp_send_heartbeat_ack(stcb, m, *offset,
+				    chk_length, *netp);
+
+				/* He's alive so give him credit */
+				if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+					sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+					    stcb->asoc.overall_error_count,
+					    0,
+					    SCTP_FROM_SCTP_INPUT,
+					    __LINE__);
+				}
+				stcb->asoc.overall_error_count = 0;
+			}
+			break;
+		case SCTP_HEARTBEAT_ACK:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_HEARTBEAT-ACK\n");
+			if ((stcb == NULL) || (chk_length != sizeof(struct sctp_heartbeat_chunk))) {
+				/* Its not ours */
+				*offset = length;
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				return (NULL);
+			}
+			/* He's alive so give him credit */
+			if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+				sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+				    stcb->asoc.overall_error_count,
+				    0,
+				    SCTP_FROM_SCTP_INPUT,
+				    __LINE__);
+			}
+			stcb->asoc.overall_error_count = 0;
+			SCTP_STAT_INCR(sctps_recvheartbeatack);
+			if (netp && *netp)
+				sctp_handle_heartbeat_ack((struct sctp_heartbeat_chunk *)ch,
+				    stcb, *netp);
+			break;
+		case SCTP_ABORT_ASSOCIATION:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ABORT, stcb %p\n",
+			    stcb);
+			if ((stcb) && netp && *netp)
+				sctp_handle_abort((struct sctp_abort_chunk *)ch,
+				    stcb, *netp);
+			*offset = length;
+			return (NULL);
+			break;
+		case SCTP_SHUTDOWN:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN, stcb %p\n",
+			    stcb);
+			if ((stcb == NULL) || (chk_length != sizeof(struct sctp_shutdown_chunk))) {
+				*offset = length;
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				return (NULL);
+			}
+			if (netp && *netp) {
+				int abort_flag = 0;
+
+				sctp_handle_shutdown((struct sctp_shutdown_chunk *)ch,
+				    stcb, *netp, &abort_flag);
+				if (abort_flag) {
+					*offset = length;
+					return (NULL);
+				}
+			}
+			break;
+		case SCTP_SHUTDOWN_ACK:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN-ACK, stcb %p\n", stcb);
+			if ((stcb) && (netp) && (*netp))
+				sctp_handle_shutdown_ack((struct sctp_shutdown_ack_chunk *)ch, stcb, *netp);
+			*offset = length;
+			return (NULL);
+			break;
+
+		case SCTP_OPERATION_ERROR:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_OP-ERR\n");
+			if ((stcb) && netp && *netp && sctp_handle_error(ch, stcb, *netp) < 0) {
+
+				*offset = length;
+				return (NULL);
+			}
+			break;
+		case SCTP_COOKIE_ECHO:
+			SCTPDBG(SCTP_DEBUG_INPUT3,
+			    "SCTP_COOKIE-ECHO, stcb %p\n", stcb);
+			if ((stcb) && (stcb->asoc.total_output_queue_size)) {
+				;
+			} else {
+				if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+					/* We are not interested anymore */
+					*offset = length;
+					return (NULL);
+				}
+			}
+			/*
+			 * First are we accepting? We do this again here
+			 * sincen it is possible that a previous endpoint
+			 * WAS listening responded to a INIT-ACK and then
+			 * closed. We opened and bound.. and are now no
+			 * longer listening.
+			 */
+
+			if ((stcb == NULL) && (inp->sctp_socket->so_qlen >= inp->sctp_socket->so_qlimit)) {
+				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
+				    (sctp_abort_if_one_2_one_hits_limit)) {
+					struct mbuf *oper;
+					struct sctp_paramhdr *phdr;
+
+					oper = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (oper) {
+						SCTP_BUF_LEN(oper) =
+						    sizeof(struct sctp_paramhdr);
+						phdr = mtod(oper,
+						    struct sctp_paramhdr *);
+						phdr->param_type =
+						    htons(SCTP_CAUSE_OUT_OF_RESC);
+						phdr->param_length =
+						    htons(sizeof(struct sctp_paramhdr));
+					}
+					sctp_abort_association(inp, stcb, m,
+					    iphlen, sh, oper, vrf_id);
+				}
+				*offset = length;
+				return (NULL);
+			} else {
+				struct mbuf *ret_buf;
+				struct sctp_inpcb *linp;
+
+				if (stcb) {
+					linp = NULL;
+				} else {
+					linp = inp;
+				}
+
+				if (linp) {
+					SCTP_ASOC_CREATE_LOCK(linp);
+				}
+				if (netp) {
+					ret_buf =
+					    sctp_handle_cookie_echo(m, iphlen,
+					    *offset, sh,
+					    (struct sctp_cookie_echo_chunk *)ch,
+					    &inp, &stcb, netp,
+					    auth_skipped,
+					    auth_offset,
+					    auth_len,
+					    &locked_tcb,
+					    vrf_id);
+				} else {
+					ret_buf = NULL;
+				}
+				if (linp) {
+					SCTP_ASOC_CREATE_UNLOCK(linp);
+				}
+				if (ret_buf == NULL) {
+					if (locked_tcb) {
+						SCTP_TCB_UNLOCK(locked_tcb);
+					}
+					SCTPDBG(SCTP_DEBUG_INPUT3,
+					    "GAK, null buffer\n");
+					auth_skipped = 0;
+					*offset = length;
+					return (NULL);
+				}
+				/* if AUTH skipped, see if it verified... */
+				if (auth_skipped) {
+					got_auth = 1;
+					auth_skipped = 0;
+				}
+				if (!TAILQ_EMPTY(&stcb->asoc.sent_queue)) {
+					/*
+					 * Restart the timer if we have
+					 * pending data
+					 */
+					struct sctp_tmit_chunk *chk;
+
+					chk = TAILQ_FIRST(&stcb->asoc.sent_queue);
+					if (chk) {
+						sctp_timer_start(SCTP_TIMER_TYPE_SEND,
+						    stcb->sctp_ep, stcb,
+						    chk->whoTo);
+					}
+				}
+			}
+			break;
+		case SCTP_COOKIE_ACK:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_COOKIE-ACK, stcb %p\n", stcb);
+			if ((stcb == NULL) || chk_length != sizeof(struct sctp_cookie_ack_chunk)) {
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				return (NULL);
+			}
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+				/* We are not interested anymore */
+				if ((stcb) && (stcb->asoc.total_output_queue_size)) {
+					;
+				} else if (stcb) {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+					so = SCTP_INP_SO(inp);
+					atomic_add_int(&stcb->asoc.refcnt, 1);
+					SCTP_TCB_UNLOCK(stcb);
+					SCTP_SOCKET_LOCK(so, 1);
+					SCTP_TCB_LOCK(stcb);
+					atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+					(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_27);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+					SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+					*offset = length;
+					return (NULL);
+				}
+			}
+			/* He's alive so give him credit */
+			if ((stcb) && netp && *netp) {
+				if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+					sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+					    stcb->asoc.overall_error_count,
+					    0,
+					    SCTP_FROM_SCTP_INPUT,
+					    __LINE__);
+				}
+				stcb->asoc.overall_error_count = 0;
+				sctp_handle_cookie_ack((struct sctp_cookie_ack_chunk *)ch, stcb, *netp);
+			}
+			break;
+		case SCTP_ECN_ECHO:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ECN-ECHO\n");
+			/* He's alive so give him credit */
+			if ((stcb == NULL) || (chk_length != sizeof(struct sctp_ecne_chunk))) {
+				/* Its not ours */
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				*offset = length;
+				return (NULL);
+			}
+			if (stcb) {
+				if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+					sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+					    stcb->asoc.overall_error_count,
+					    0,
+					    SCTP_FROM_SCTP_INPUT,
+					    __LINE__);
+				}
+				stcb->asoc.overall_error_count = 0;
+				sctp_handle_ecn_echo((struct sctp_ecne_chunk *)ch,
+				    stcb);
+			}
+			break;
+		case SCTP_ECN_CWR:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ECN-CWR\n");
+			/* He's alive so give him credit */
+			if ((stcb == NULL) || (chk_length != sizeof(struct sctp_cwr_chunk))) {
+				/* Its not ours */
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				*offset = length;
+				return (NULL);
+			}
+			if (stcb) {
+				if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+					sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+					    stcb->asoc.overall_error_count,
+					    0,
+					    SCTP_FROM_SCTP_INPUT,
+					    __LINE__);
+				}
+				stcb->asoc.overall_error_count = 0;
+				sctp_handle_ecn_cwr((struct sctp_cwr_chunk *)ch, stcb);
+			}
+			break;
+		case SCTP_SHUTDOWN_COMPLETE:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN-COMPLETE, stcb %p\n", stcb);
+			/* must be first and only chunk */
+			if ((num_chunks > 1) ||
+			    (length - *offset > (int)SCTP_SIZE32(chk_length))) {
+				*offset = length;
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				return (NULL);
+			}
+			if ((stcb) && netp && *netp) {
+				sctp_handle_shutdown_complete((struct sctp_shutdown_complete_chunk *)ch,
+				    stcb, *netp);
+			}
+			*offset = length;
+			return (NULL);
+			break;
+		case SCTP_ASCONF:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ASCONF\n");
+			/* He's alive so give him credit */
+			if (stcb) {
+				if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+					sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+					    stcb->asoc.overall_error_count,
+					    0,
+					    SCTP_FROM_SCTP_INPUT,
+					    __LINE__);
+				}
+				stcb->asoc.overall_error_count = 0;
+				sctp_handle_asconf(m, *offset,
+				    (struct sctp_asconf_chunk *)ch, stcb, asconf_cnt == 0);
+				asconf_cnt++;
+			}
+			break;
+		case SCTP_ASCONF_ACK:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ASCONF-ACK\n");
+			if (chk_length < sizeof(struct sctp_asconf_ack_chunk)) {
+				/* Its not ours */
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				*offset = length;
+				return (NULL);
+			}
+			if ((stcb) && netp && *netp) {
+				/* He's alive so give him credit */
+				if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+					sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+					    stcb->asoc.overall_error_count,
+					    0,
+					    SCTP_FROM_SCTP_INPUT,
+					    __LINE__);
+				}
+				stcb->asoc.overall_error_count = 0;
+				sctp_handle_asconf_ack(m, *offset,
+				    (struct sctp_asconf_ack_chunk *)ch, stcb, *netp, &abort_no_unlock);
+				if (abort_no_unlock)
+					return (NULL);
+			}
+			break;
+		case SCTP_FORWARD_CUM_TSN:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_FWD-TSN\n");
+			if (chk_length < sizeof(struct sctp_forward_tsn_chunk)) {
+				/* Its not ours */
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				*offset = length;
+				return (NULL);
+			}
+			/* He's alive so give him credit */
+			if (stcb) {
+				int abort_flag = 0;
+
+				stcb->asoc.overall_error_count = 0;
+				if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+					sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+					    stcb->asoc.overall_error_count,
+					    0,
+					    SCTP_FROM_SCTP_INPUT,
+					    __LINE__);
+				}
+				*fwd_tsn_seen = 1;
+				if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+					/* We are not interested anymore */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+					so = SCTP_INP_SO(inp);
+					atomic_add_int(&stcb->asoc.refcnt, 1);
+					SCTP_TCB_UNLOCK(stcb);
+					SCTP_SOCKET_LOCK(so, 1);
+					SCTP_TCB_LOCK(stcb);
+					atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+					(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_29);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+					SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+					*offset = length;
+					return (NULL);
+				}
+				sctp_handle_forward_tsn(stcb,
+				    (struct sctp_forward_tsn_chunk *)ch, &abort_flag, m, *offset);
+				if (abort_flag) {
+					*offset = length;
+					return (NULL);
+				} else {
+					if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+						sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+						    stcb->asoc.overall_error_count,
+						    0,
+						    SCTP_FROM_SCTP_INPUT,
+						    __LINE__);
+					}
+					stcb->asoc.overall_error_count = 0;
+				}
+
+			}
+			break;
+		case SCTP_STREAM_RESET:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_STREAM_RESET\n");
+			if (((stcb == NULL) || (ch == NULL) || (chk_length < sizeof(struct sctp_stream_reset_tsn_req)))) {
+				/* Its not ours */
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				*offset = length;
+				return (NULL);
+			}
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+				/* We are not interested anymore */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+				so = SCTP_INP_SO(inp);
+				atomic_add_int(&stcb->asoc.refcnt, 1);
+				SCTP_TCB_UNLOCK(stcb);
+				SCTP_SOCKET_LOCK(so, 1);
+				SCTP_TCB_LOCK(stcb);
+				atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+				(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_30);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+				SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+				*offset = length;
+				return (NULL);
+			}
+			if (stcb->asoc.peer_supports_strreset == 0) {
+				/*
+				 * hmm, peer should have announced this, but
+				 * we will turn it on since he is sending us
+				 * a stream reset.
+				 */
+				stcb->asoc.peer_supports_strreset = 1;
+			}
+			if (sctp_handle_stream_reset(stcb, m, *offset, (struct sctp_stream_reset_out_req *)ch)) {
+				/* stop processing */
+				*offset = length;
+				return (NULL);
+			}
+			break;
+		case SCTP_PACKET_DROPPED:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_PACKET_DROPPED\n");
+			/* re-get it all please */
+			if (chk_length < sizeof(struct sctp_pktdrop_chunk)) {
+				/* Its not ours */
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				*offset = length;
+				return (NULL);
+			}
+			if (ch && (stcb) && netp && (*netp)) {
+				sctp_handle_packet_dropped((struct sctp_pktdrop_chunk *)ch,
+				    stcb, *netp,
+				    min(chk_length, (sizeof(chunk_buf) - 4)));
+
+			}
+			break;
+
+		case SCTP_AUTHENTICATION:
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_AUTHENTICATION\n");
+			if (sctp_auth_disable)
+				goto unknown_chunk;
+
+			if (stcb == NULL) {
+				/* save the first AUTH for later processing */
+				if (auth_skipped == 0) {
+					auth_offset = *offset;
+					auth_len = chk_length;
+					auth_skipped = 1;
+				}
+				/* skip this chunk (temporarily) */
+				goto next_chunk;
+			}
+			if ((chk_length < (sizeof(struct sctp_auth_chunk))) ||
+			    (chk_length > (sizeof(struct sctp_auth_chunk) +
+			    SCTP_AUTH_DIGEST_LEN_MAX))) {
+				/* Its not ours */
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
+				*offset = length;
+				return (NULL);
+			}
+			if (got_auth == 1) {
+				/* skip this chunk... it's already auth'd */
+				goto next_chunk;
+			}
+			got_auth = 1;
+			if ((ch == NULL) || sctp_handle_auth(stcb, (struct sctp_auth_chunk *)ch,
+			    m, *offset)) {
+				/* auth HMAC failed so dump the packet */
+				*offset = length;
+				return (stcb);
+			} else {
+				/* remaining chunks are HMAC checked */
+				stcb->asoc.authenticated = 1;
+			}
+			break;
+
+		default:
+	unknown_chunk:
+			/* it's an unknown chunk! */
+			if ((ch->chunk_type & 0x40) && (stcb != NULL)) {
+				struct mbuf *mm;
+				struct sctp_paramhdr *phd;
+
+				mm = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr),
+				    0, M_DONTWAIT, 1, MT_DATA);
+				if (mm) {
+					phd = mtod(mm, struct sctp_paramhdr *);
+					/*
+					 * We cheat and use param type since
+					 * we did not bother to define a
+					 * error cause struct. They are the
+					 * same basic format with different
+					 * names.
+					 */
+					phd->param_type = htons(SCTP_CAUSE_UNRECOG_CHUNK);
+					phd->param_length = htons(chk_length + sizeof(*phd));
+					SCTP_BUF_LEN(mm) = sizeof(*phd);
+					SCTP_BUF_NEXT(mm) = SCTP_M_COPYM(m, *offset, SCTP_SIZE32(chk_length),
+					    M_DONTWAIT);
+					if (SCTP_BUF_NEXT(mm)) {
+						sctp_queue_op_err(stcb, mm);
+					} else {
+						sctp_m_freem(mm);
+					}
+				}
+			}
+			if ((ch->chunk_type & 0x80) == 0) {
+				/* discard this packet */
+				*offset = length;
+				return (stcb);
+			}	/* else skip this bad chunk and continue... */
+			break;
+		}		/* switch (ch->chunk_type) */
+
+
+next_chunk:
+		/* get the next chunk */
+		*offset += SCTP_SIZE32(chk_length);
+		if (*offset >= length) {
+			/* no more data left in the mbuf chain */
+			break;
+		}
+		ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset,
+		    sizeof(struct sctp_chunkhdr), chunk_buf);
+		if (ch == NULL) {
+			if (locked_tcb) {
+				SCTP_TCB_UNLOCK(locked_tcb);
+			}
+			*offset = length;
+			return (NULL);
+		}
+	}			/* while */
+
+	if (asconf_cnt > 0 && stcb != NULL) {
+		sctp_send_asconf_ack(stcb);
+	}
+	return (stcb);
+}
+
+
+/*
+ * Process the ECN bits we have something set so we must look to see if it is
+ * ECN(0) or ECN(1) or CE
+ */
+static void
+sctp_process_ecn_marked_a(struct sctp_tcb *stcb, struct sctp_nets *net,
+    uint8_t ecn_bits)
+{
+	if ((ecn_bits & SCTP_CE_BITS) == SCTP_CE_BITS) {
+		;
+	} else if ((ecn_bits & SCTP_ECT1_BIT) == SCTP_ECT1_BIT) {
+		/*
+		 * we only add to the nonce sum for ECT1, ECT0 does not
+		 * change the NS bit (that we have yet to find a way to send
+		 * it yet).
+		 */
+
+		/* ECN Nonce stuff */
+		stcb->asoc.receiver_nonce_sum++;
+		stcb->asoc.receiver_nonce_sum &= SCTP_SACK_NONCE_SUM;
+
+		/*
+		 * Drag up the last_echo point if cumack is larger since we
+		 * don't want the point falling way behind by more than
+		 * 2^^31 and then having it be incorrect.
+		 */
+		if (compare_with_wrap(stcb->asoc.cumulative_tsn,
+		    stcb->asoc.last_echo_tsn, MAX_TSN)) {
+			stcb->asoc.last_echo_tsn = stcb->asoc.cumulative_tsn;
+		}
+	} else if ((ecn_bits & SCTP_ECT0_BIT) == SCTP_ECT0_BIT) {
+		/*
+		 * Drag up the last_echo point if cumack is larger since we
+		 * don't want the point falling way behind by more than
+		 * 2^^31 and then having it be incorrect.
+		 */
+		if (compare_with_wrap(stcb->asoc.cumulative_tsn,
+		    stcb->asoc.last_echo_tsn, MAX_TSN)) {
+			stcb->asoc.last_echo_tsn = stcb->asoc.cumulative_tsn;
+		}
+	}
+}
+
+static void
+sctp_process_ecn_marked_b(struct sctp_tcb *stcb, struct sctp_nets *net,
+    uint32_t high_tsn, uint8_t ecn_bits)
+{
+	if ((ecn_bits & SCTP_CE_BITS) == SCTP_CE_BITS) {
+		/*
+		 * we possibly must notify the sender that a congestion
+		 * window reduction is in order. We do this by adding a ECNE
+		 * chunk to the output chunk queue. The incoming CWR will
+		 * remove this chunk.
+		 */
+		if (compare_with_wrap(high_tsn, stcb->asoc.last_echo_tsn,
+		    MAX_TSN)) {
+			/* Yep, we need to add a ECNE */
+			sctp_send_ecn_echo(stcb, net, high_tsn);
+			stcb->asoc.last_echo_tsn = high_tsn;
+		}
+	}
+}
+
+#ifdef INVARIANTS
+static void
+sctp_validate_no_locks(struct sctp_inpcb *inp)
+{
+	struct sctp_tcb *stcb;
+
+	LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+		if (mtx_owned(&stcb->tcb_mtx)) {
+			panic("Own lock on stcb at return from input");
+		}
+	}
+}
+
+#endif
+
+/*
+ * common input chunk processing (v4 and v6)
+ */
+void
+sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset,
+    int length, struct sctphdr *sh, struct sctp_chunkhdr *ch,
+    struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets *net,
+    uint8_t ecn_bits, uint32_t vrf_id)
+{
+	/*
+	 * Control chunk processing
+	 */
+	uint32_t high_tsn;
+	int fwd_tsn_seen = 0, data_processed = 0;
+	struct mbuf *m = *mm;
+	int abort_flag = 0;
+	int un_sent;
+
+	SCTP_STAT_INCR(sctps_recvdatagrams);
+#ifdef SCTP_AUDITING_ENABLED
+	sctp_audit_log(0xE0, 1);
+	sctp_auditing(0, inp, stcb, net);
+#endif
+
+	SCTPDBG(SCTP_DEBUG_INPUT1, "Ok, Common input processing called, m:%p iphlen:%d offset:%d stcb:%p\n",
+	    m, iphlen, offset, stcb);
+	if (stcb) {
+		/* always clear this before beginning a packet */
+		stcb->asoc.authenticated = 0;
+		stcb->asoc.seen_a_sack_this_pkt = 0;
+		SCTPDBG(SCTP_DEBUG_INPUT1, "stcb:%p state:%x\n",
+		    stcb, stcb->asoc.state);
+
+		if ((stcb->asoc.state & SCTP_STATE_WAS_ABORTED) ||
+		    (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED)) {
+			/*-
+			 * If we hit here, we had a ref count
+			 * up when the assoc was aborted and the
+			 * timer is clearing out the assoc, we should
+			 * NOT respond to any packet.. its OOTB.
+			 */
+			SCTP_TCB_UNLOCK(stcb);
+			sctp_handle_ootb(m, iphlen, offset, sh, inp, NULL,
+			    vrf_id);
+			goto out_now;
+		}
+	}
+	if (IS_SCTP_CONTROL(ch)) {
+		/* process the control portion of the SCTP packet */
+		/* sa_ignore NO_NULL_CHK */
+		stcb = sctp_process_control(m, iphlen, &offset, length, sh, ch,
+		    inp, stcb, &net, &fwd_tsn_seen, vrf_id);
+		if (stcb) {
+			/*
+			 * This covers us if the cookie-echo was there and
+			 * it changes our INP.
+			 */
+			inp = stcb->sctp_ep;
+		}
+	} else {
+		/*
+		 * no control chunks, so pre-process DATA chunks (these
+		 * checks are taken care of by control processing)
+		 */
+
+		/*
+		 * if DATA only packet, and auth is required, then punt...
+		 * can't have authenticated without any AUTH (control)
+		 * chunks
+		 */
+		if ((stcb != NULL) && !sctp_auth_disable &&
+		    sctp_auth_is_required_chunk(SCTP_DATA,
+		    stcb->asoc.local_auth_chunks)) {
+			/* "silently" ignore */
+			SCTP_STAT_INCR(sctps_recvauthmissing);
+			SCTP_TCB_UNLOCK(stcb);
+			goto out_now;
+		}
+		if (stcb == NULL) {
+			/* out of the blue DATA chunk */
+			sctp_handle_ootb(m, iphlen, offset, sh, inp, NULL,
+			    vrf_id);
+			goto out_now;
+		}
+		if (stcb->asoc.my_vtag != ntohl(sh->v_tag)) {
+			/* v_tag mismatch! */
+			SCTP_STAT_INCR(sctps_badvtag);
+			SCTP_TCB_UNLOCK(stcb);
+			goto out_now;
+		}
+	}
+
+	if (stcb == NULL) {
+		/*
+		 * no valid TCB for this packet, or we found it's a bad
+		 * packet while processing control, or we're done with this
+		 * packet (done or skip rest of data), so we drop it...
+		 */
+		goto out_now;
+	}
+	/*
+	 * DATA chunk processing
+	 */
+	/* plow through the data chunks while length > offset */
+
+	/*
+	 * Rest should be DATA only.  Check authentication state if AUTH for
+	 * DATA is required.
+	 */
+	if ((length > offset) && (stcb != NULL) && !sctp_auth_disable &&
+	    sctp_auth_is_required_chunk(SCTP_DATA,
+	    stcb->asoc.local_auth_chunks) &&
+	    !stcb->asoc.authenticated) {
+		/* "silently" ignore */
+		SCTP_STAT_INCR(sctps_recvauthmissing);
+		SCTPDBG(SCTP_DEBUG_AUTH1,
+		    "Data chunk requires AUTH, skipped\n");
+		goto trigger_send;
+	}
+	if (length > offset) {
+		int retval;
+
+		/*
+		 * First check to make sure our state is correct. We would
+		 * not get here unless we really did have a tag, so we don't
+		 * abort if this happens, just dump the chunk silently.
+		 */
+		switch (SCTP_GET_STATE(&stcb->asoc)) {
+		case SCTP_STATE_COOKIE_ECHOED:
+			/*
+			 * we consider data with valid tags in this state
+			 * shows us the cookie-ack was lost. Imply it was
+			 * there.
+			 */
+			if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+				sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+				    stcb->asoc.overall_error_count,
+				    0,
+				    SCTP_FROM_SCTP_INPUT,
+				    __LINE__);
+			}
+			stcb->asoc.overall_error_count = 0;
+			sctp_handle_cookie_ack((struct sctp_cookie_ack_chunk *)ch, stcb, net);
+			break;
+		case SCTP_STATE_COOKIE_WAIT:
+			/*
+			 * We consider OOTB any data sent during asoc setup.
+			 */
+			sctp_handle_ootb(m, iphlen, offset, sh, inp, NULL,
+			    vrf_id);
+			SCTP_TCB_UNLOCK(stcb);
+			goto out_now;
+			/* sa_ignore NOTREACHED */
+			break;
+		case SCTP_STATE_EMPTY:	/* should not happen */
+		case SCTP_STATE_INUSE:	/* should not happen */
+		case SCTP_STATE_SHUTDOWN_RECEIVED:	/* This is a peer error */
+		case SCTP_STATE_SHUTDOWN_ACK_SENT:
+		default:
+			SCTP_TCB_UNLOCK(stcb);
+			goto out_now;
+			/* sa_ignore NOTREACHED */
+			break;
+		case SCTP_STATE_OPEN:
+		case SCTP_STATE_SHUTDOWN_SENT:
+			break;
+		}
+		/* take care of ECN, part 1. */
+		if (stcb->asoc.ecn_allowed &&
+		    (ecn_bits & (SCTP_ECT0_BIT | SCTP_ECT1_BIT))) {
+			sctp_process_ecn_marked_a(stcb, net, ecn_bits);
+		}
+		/* plow through the data chunks while length > offset */
+		retval = sctp_process_data(mm, iphlen, &offset, length, sh,
+		    inp, stcb, net, &high_tsn);
+		if (retval == 2) {
+			/*
+			 * The association aborted, NO UNLOCK needed since
+			 * the association is destroyed.
+			 */
+			goto out_now;
+		}
+		data_processed = 1;
+		if (retval == 0) {
+			/* take care of ecn part 2. */
+			if (stcb->asoc.ecn_allowed &&
+			    (ecn_bits & (SCTP_ECT0_BIT | SCTP_ECT1_BIT))) {
+				sctp_process_ecn_marked_b(stcb, net, high_tsn,
+				    ecn_bits);
+			}
+		}
+		/*
+		 * Anything important needs to have been m_copy'ed in
+		 * process_data
+		 */
+	}
+	if ((data_processed == 0) && (fwd_tsn_seen)) {
+		int was_a_gap = 0;
+
+		if (compare_with_wrap(stcb->asoc.highest_tsn_inside_map,
+		    stcb->asoc.cumulative_tsn, MAX_TSN)) {
+			/* there was a gap before this data was processed */
+			was_a_gap = 1;
+		}
+		sctp_sack_check(stcb, 1, was_a_gap, &abort_flag);
+		if (abort_flag) {
+			/* Again, we aborted so NO UNLOCK needed */
+			goto out_now;
+		}
+	}
+	/* trigger send of any chunks in queue... */
+trigger_send:
+#ifdef SCTP_AUDITING_ENABLED
+	sctp_audit_log(0xE0, 2);
+	sctp_auditing(1, inp, stcb, net);
+#endif
+	SCTPDBG(SCTP_DEBUG_INPUT1,
+	    "Check for chunk output prw:%d tqe:%d tf=%d\n",
+	    stcb->asoc.peers_rwnd,
+	    TAILQ_EMPTY(&stcb->asoc.control_send_queue),
+	    stcb->asoc.total_flight);
+	un_sent = (stcb->asoc.total_output_queue_size - stcb->asoc.total_flight);
+
+	if (!TAILQ_EMPTY(&stcb->asoc.control_send_queue) ||
+	    ((un_sent) &&
+	    (stcb->asoc.peers_rwnd > 0 ||
+	    (stcb->asoc.peers_rwnd <= 0 && stcb->asoc.total_flight == 0)))) {
+		SCTPDBG(SCTP_DEBUG_INPUT3, "Calling chunk OUTPUT\n");
+		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CONTROL_PROC, SCTP_SO_NOT_LOCKED);
+		SCTPDBG(SCTP_DEBUG_INPUT3, "chunk OUTPUT returns\n");
+	}
+#ifdef SCTP_AUDITING_ENABLED
+	sctp_audit_log(0xE0, 3);
+	sctp_auditing(2, inp, stcb, net);
+#endif
+	SCTP_TCB_UNLOCK(stcb);
+out_now:
+#ifdef INVARIANTS
+	sctp_validate_no_locks(inp);
+#endif
+	return;
+}
+
+
+
+void
+sctp_input(i_pak, off)
+	struct mbuf *i_pak;
+	int off;
+
+{
+#ifdef SCTP_MBUF_LOGGING
+	struct mbuf *mat;
+
+#endif
+	struct mbuf *m;
+	int iphlen;
+	uint32_t vrf_id = 0;
+	uint8_t ecn_bits;
+	struct ip *ip;
+	struct sctphdr *sh;
+	struct sctp_inpcb *inp = NULL;
+
+	uint32_t check, calc_check;
+	struct sctp_nets *net;
+	struct sctp_tcb *stcb = NULL;
+	struct sctp_chunkhdr *ch;
+	int refcount_up = 0;
+	int length, mlen, offset;
+
+
+	if (SCTP_GET_PKT_VRFID(i_pak, vrf_id)) {
+		SCTP_RELEASE_PKT(i_pak);
+		return;
+	}
+	mlen = SCTP_HEADER_LEN(i_pak);
+	iphlen = off;
+	m = SCTP_HEADER_TO_CHAIN(i_pak);
+
+	net = NULL;
+	SCTP_STAT_INCR(sctps_recvpackets);
+	SCTP_STAT_INCR_COUNTER64(sctps_inpackets);
+
+
+#ifdef SCTP_MBUF_LOGGING
+	/* Log in any input mbufs */
+	if (sctp_logging_level & SCTP_MBUF_LOGGING_ENABLE) {
+		mat = m;
+		while (mat) {
+			if (SCTP_BUF_IS_EXTENDED(mat)) {
+				sctp_log_mb(mat, SCTP_MBUF_INPUT);
+			}
+			mat = SCTP_BUF_NEXT(mat);
+		}
+	}
+#endif
+#ifdef  SCTP_PACKET_LOGGING
+	if (sctp_logging_level & SCTP_LAST_PACKET_TRACING)
+		sctp_packet_log(m, mlen);
+#endif
+	/*
+	 * Must take out the iphlen, since mlen expects this (only effect lb
+	 * case)
+	 */
+	mlen -= iphlen;
+
+	/*
+	 * Get IP, SCTP, and first chunk header together in first mbuf.
+	 */
+	ip = mtod(m, struct ip *);
+	offset = iphlen + sizeof(*sh) + sizeof(*ch);
+	if (SCTP_BUF_LEN(m) < offset) {
+		if ((m = m_pullup(m, offset)) == 0) {
+			SCTP_STAT_INCR(sctps_hdrops);
+			return;
+		}
+		ip = mtod(m, struct ip *);
+	}
+	sh = (struct sctphdr *)((caddr_t)ip + iphlen);
+	ch = (struct sctp_chunkhdr *)((caddr_t)sh + sizeof(*sh));
+	SCTPDBG(SCTP_DEBUG_INPUT1,
+	    "sctp_input() length:%d iphlen:%d\n", mlen, iphlen);
+
+	/* SCTP does not allow broadcasts or multicasts */
+	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
+		goto bad;
+	}
+	if (SCTP_IS_IT_BROADCAST(ip->ip_dst, m)) {
+		/*
+		 * We only look at broadcast if its a front state, All
+		 * others we will not have a tcb for anyway.
+		 */
+		goto bad;
+	}
+	/* validate SCTP checksum */
+	check = sh->checksum;	/* save incoming checksum */
+	if ((check == 0) && (sctp_no_csum_on_loopback) &&
+	    ((ip->ip_src.s_addr == ip->ip_dst.s_addr) ||
+	    (SCTP_IS_IT_LOOPBACK(m)))
+	    ) {
+		goto sctp_skip_csum_4;
+	}
+	sh->checksum = 0;	/* prepare for calc */
+	calc_check = sctp_calculate_sum(m, &mlen, iphlen);
+	if (calc_check != check) {
+		SCTPDBG(SCTP_DEBUG_INPUT1, "Bad CSUM on SCTP packet calc_check:%x check:%x  m:%p mlen:%d iphlen:%d\n",
+		    calc_check, check, m, mlen, iphlen);
+
+		stcb = sctp_findassociation_addr(m, iphlen,
+		    offset - sizeof(*ch),
+		    sh, ch, &inp, &net,
+		    vrf_id);
+		if ((inp) && (stcb)) {
+			sctp_send_packet_dropped(stcb, net, m, iphlen, 1);
+			sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_INPUT_ERROR, SCTP_SO_NOT_LOCKED);
+		} else if ((inp != NULL) && (stcb == NULL)) {
+			refcount_up = 1;
+		}
+		SCTP_STAT_INCR(sctps_badsum);
+		SCTP_STAT_INCR_COUNTER32(sctps_checksumerrors);
+		goto bad;
+	}
+	sh->checksum = calc_check;
+sctp_skip_csum_4:
+	/* destination port of 0 is illegal, based on RFC2960. */
+	if (sh->dest_port == 0) {
+		SCTP_STAT_INCR(sctps_hdrops);
+		goto bad;
+	}
+	/* validate mbuf chain length with IP payload length */
+	if (mlen < (ip->ip_len - iphlen)) {
+		SCTP_STAT_INCR(sctps_hdrops);
+		goto bad;
+	}
+	/*
+	 * Locate pcb and tcb for datagram sctp_findassociation_addr() wants
+	 * IP/SCTP/first chunk header...
+	 */
+	stcb = sctp_findassociation_addr(m, iphlen, offset - sizeof(*ch),
+	    sh, ch, &inp, &net, vrf_id);
+	/* inp's ref-count increased && stcb locked */
+	if (inp == NULL) {
+		struct sctp_init_chunk *init_chk, chunk_buf;
+
+		SCTP_STAT_INCR(sctps_noport);
+#ifdef ICMP_BANDLIM
+		/*
+		 * we use the bandwidth limiting to protect against sending
+		 * too many ABORTS all at once. In this case these count the
+		 * same as an ICMP message.
+		 */
+		if (badport_bandlim(0) < 0)
+			goto bad;
+#endif				/* ICMP_BANDLIM */
+		SCTPDBG(SCTP_DEBUG_INPUT1,
+		    "Sending a ABORT from packet entry!\n");
+		if (ch->chunk_type == SCTP_INITIATION) {
+			/*
+			 * we do a trick here to get the INIT tag, dig in
+			 * and get the tag from the INIT and put it in the
+			 * common header.
+			 */
+			init_chk = (struct sctp_init_chunk *)sctp_m_getptr(m,
+			    iphlen + sizeof(*sh), sizeof(*init_chk),
+			    (uint8_t *) & chunk_buf);
+			if (init_chk != NULL)
+				sh->v_tag = init_chk->init.initiate_tag;
+		}
+		if (ch->chunk_type == SCTP_SHUTDOWN_ACK) {
+			sctp_send_shutdown_complete2(m, iphlen, sh, vrf_id);
+			goto bad;
+		}
+		if (ch->chunk_type == SCTP_SHUTDOWN_COMPLETE) {
+			goto bad;
+		}
+		if (ch->chunk_type != SCTP_ABORT_ASSOCIATION)
+			sctp_send_abort(m, iphlen, sh, 0, NULL, vrf_id);
+		goto bad;
+	} else if (stcb == NULL) {
+		refcount_up = 1;
+	}
+#ifdef IPSEC
+	/*
+	 * I very much doubt any of the IPSEC stuff will work but I have no
+	 * idea, so I will leave it in place.
+	 */
+	if (inp && ipsec4_in_reject(m, &inp->ip_inp.inp)) {
+		ipsec4stat.in_polvio++;
+		SCTP_STAT_INCR(sctps_hdrops);
+		goto bad;
+	}
+#endif				/* IPSEC */
+
+	/*
+	 * common chunk processing
+	 */
+	length = ip->ip_len + iphlen;
+	offset -= sizeof(struct sctp_chunkhdr);
+
+	ecn_bits = ip->ip_tos;
+
+	/* sa_ignore NO_NULL_CHK */
+	sctp_common_input_processing(&m, iphlen, offset, length, sh, ch,
+	    inp, stcb, net, ecn_bits, vrf_id);
+	/* inp's ref-count reduced && stcb unlocked */
+	if (m) {
+		sctp_m_freem(m);
+	}
+	if ((inp) && (refcount_up)) {
+		/* reduce ref-count */
+		SCTP_INP_DECR_REF(inp);
+	}
+	return;
+bad:
+	if (stcb) {
+		SCTP_TCB_UNLOCK(stcb);
+	}
+	if ((inp) && (refcount_up)) {
+		/* reduce ref-count */
+		SCTP_INP_DECR_REF(inp);
+	}
+	if (m) {
+		sctp_m_freem(m);
+	}
+	return;
+}
Index: raw_ip.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/raw_ip.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/raw_ip.c -L sys/netinet/raw_ip.c -u -r1.1.1.1 -r1.2
--- sys/netinet/raw_ip.c
+++ sys/netinet/raw_ip.c
@@ -27,9 +27,11 @@
  * SUCH DAMAGE.
  *
  *	@(#)raw_ip.c	8.7 (Berkeley) 5/15/95
- * $FreeBSD: src/sys/netinet/raw_ip.c,v 1.150.2.3 2005/11/04 18:34:45 maxim Exp $
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/raw_ip.c,v 1.180 2007/10/07 20:44:23 silby Exp $");
+
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_mac.h"
@@ -38,9 +40,9 @@
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
-#include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
@@ -66,14 +68,12 @@
 #include <netinet/ip_fw.h>
 #include <netinet/ip_dummynet.h>
 
-#ifdef FAST_IPSEC
-#include <netipsec/ipsec.h>
-#endif /*FAST_IPSEC*/
-
 #ifdef IPSEC
-#include <netinet6/ipsec.h>
+#include <netipsec/ipsec.h>
 #endif /*IPSEC*/
 
+#include <security/mac/mac_framework.h>
+
 struct	inpcbhead ripcb;
 struct	inpcbinfo ripcbinfo;
 
@@ -104,34 +104,48 @@
 void (*ip_rsvp_force_done)(struct socket *);
 
 /*
- * Nominal space allocated to a raw ip socket.
- */
-#define	RIPSNDQ		8192
-#define	RIPRCVQ		8192
-
-/*
  * Raw interface to IP protocol.
  */
 
 /*
  * Initialize raw connection block q.
  */
+static void
+rip_zone_change(void *tag)
+{
+
+	uma_zone_set_max(ripcbinfo.ipi_zone, maxsockets);
+}
+
+static int
+rip_inpcb_init(void *mem, int size, int flags)
+{
+	struct inpcb *inp = mem;
+
+	INP_LOCK_INIT(inp, "inp", "rawinp");
+	return (0);
+}
+
 void
-rip_init()
+rip_init(void)
 {
+
 	INP_INFO_LOCK_INIT(&ripcbinfo, "rip");
 	LIST_INIT(&ripcb);
-	ripcbinfo.listhead = &ripcb;
+	ripcbinfo.ipi_listhead = &ripcb;
 	/*
 	 * XXX We don't use the hash list for raw IP, but it's easier
 	 * to allocate a one entry hash list than it is to check all
 	 * over the place for hashbase == NULL.
 	 */
-	ripcbinfo.hashbase = hashinit(1, M_PCB, &ripcbinfo.hashmask);
-	ripcbinfo.porthashbase = hashinit(1, M_PCB, &ripcbinfo.porthashmask);
+	ripcbinfo.ipi_hashbase = hashinit(1, M_PCB, &ripcbinfo.ipi_hashmask);
+	ripcbinfo.ipi_porthashbase = hashinit(1, M_PCB,
+	    &ripcbinfo.ipi_porthashmask);
 	ripcbinfo.ipi_zone = uma_zcreate("ripcb", sizeof(struct inpcb),
-	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+	    NULL, NULL, rip_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	uma_zone_set_max(ripcbinfo.ipi_zone, maxsockets);
+	EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change,
+		NULL, EVENTHANDLER_PRI_ANY);
 }
 
 static struct	sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET };
@@ -143,16 +157,12 @@
 
 	INP_LOCK_ASSERT(last);
 
-#if defined(IPSEC) || defined(FAST_IPSEC)
+#ifdef IPSEC
 	/* check AH/ESP integrity. */
 	if (ipsec4_in_reject(n, last)) {
 		policyfail = 1;
-#ifdef IPSEC
-		ipsecstat.in_polvio++;
-#endif /*IPSEC*/
-		/* do not inject data to pcb */
 	}
-#endif /*IPSEC || FAST_IPSEC*/
+#endif /* IPSEC */
 #ifdef MAC
 	if (!policyfail && mac_check_inpcb_deliver(last, n) != 0)
 		policyfail = 1;
@@ -166,7 +176,7 @@
 
 		so = last->inp_socket;
 		if ((last->inp_flags & INP_CONTROLOPTS) ||
-		    (so->so_options & SO_TIMESTAMP))
+		    (so->so_options & (SO_TIMESTAMP | SO_BINTIME)))
 			ip_savecontrol(last, &opts, ip, n);
 		SOCKBUF_LOCK(&so->so_rcv);
 		if (sbappendaddr_locked(&so->so_rcv,
@@ -316,7 +326,7 @@
 		ipstat.ips_rawout++;
 	}
 
-	if (inp->inp_vflag & INP_ONESBCAST)
+	if (inp->inp_flags & INP_ONESBCAST)
 		flags |= IP_SENDONES;
 
 #ifdef MAC
@@ -341,7 +351,7 @@
  * after it gives up privilege, so some caution is required.  For options
  * passed down to the IP layer via ip_ctloutput(), checks are assumed to be
  * performed in ip_ctloutput() and therefore no check occurs here.
- * Unilaterally checking suser() here breaks normal IP socket option
+ * Unilaterally checking priv_check() here breaks normal IP socket option
  * operations on raw sockets.
  *
  * When adding new socket options here, make sure to add access control
@@ -369,7 +379,13 @@
 		case IP_FW_GET:
 		case IP_FW_TABLE_GETSIZE:
 		case IP_FW_TABLE_LIST:
-			error = suser(curthread);
+		case IP_FW_NAT_GET_CONFIG:
+		case IP_FW_NAT_GET_LOG:
+			/*
+			 * XXXRW: Isn't this checked one layer down?  Yes, it
+			 * is.
+			 */
+			error = priv_check(curthread, PRIV_NETINET_IPFW);
 			if (error != 0)
 				return (error);
 			if (ip_fw_ctl_ptr != NULL)
@@ -379,7 +395,7 @@
 			break;
 
 		case IP_DUMMYNET_GET:
-			error = suser(curthread);
+			error = priv_check(curthread, PRIV_NETINET_DUMMYNET);
 			if (error != 0)
 				return (error);
 			if (ip_dn_ctl_ptr != NULL)
@@ -400,7 +416,7 @@
 		case MRT_API_CONFIG:
 		case MRT_ADD_BW_UPCALL:
 		case MRT_DEL_BW_UPCALL:
-			error = suser(curthread);
+			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_mrouter_get ? ip_mrouter_get(so, sopt) :
@@ -434,7 +450,12 @@
 		case IP_FW_TABLE_ADD:
 		case IP_FW_TABLE_DEL:
 		case IP_FW_TABLE_FLUSH:
-			error = suser(curthread);
+		case IP_FW_NAT_CFG:
+		case IP_FW_NAT_DEL:
+			/*
+			 * XXXRW: Isn't this checked one layer down?
+			 */
+			error = priv_check(curthread, PRIV_NETINET_IPFW);
 			if (error != 0)
 				return (error);
 			if (ip_fw_ctl_ptr != NULL)
@@ -446,7 +467,7 @@
 		case IP_DUMMYNET_CONFIGURE:
 		case IP_DUMMYNET_DEL:
 		case IP_DUMMYNET_FLUSH:
-			error = suser(curthread);
+			error = priv_check(curthread, PRIV_NETINET_DUMMYNET);
 			if (error != 0)
 				return (error);
 			if (ip_dn_ctl_ptr != NULL)
@@ -456,14 +477,14 @@
 			break ;
 
 		case IP_RSVP_ON:
-			error = suser(curthread);
+			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_rsvp_init(so);
 			break;
 
 		case IP_RSVP_OFF:
-			error = suser(curthread);
+			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_rsvp_done();
@@ -471,7 +492,7 @@
 
 		case IP_RSVP_VIF_ON:
 		case IP_RSVP_VIF_OFF:
-			error = suser(curthread);
+			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_rsvp_vif ?
@@ -490,7 +511,7 @@
 		case MRT_API_CONFIG:
 		case MRT_ADD_BW_UPCALL:
 		case MRT_DEL_BW_UPCALL:
-			error = suser(curthread);
+			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_mrouter_set ? ip_mrouter_set(so, sopt) :
@@ -564,12 +585,12 @@
 	}
 }
 
-u_long	rip_sendspace = RIPSNDQ;
-u_long	rip_recvspace = RIPRCVQ;
+u_long	rip_sendspace = 9216;
+u_long	rip_recvspace = 9216;
 
-SYSCTL_INT(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW,
+SYSCTL_ULONG(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW,
     &rip_sendspace, 0, "Maximum outgoing raw IP datagram size");
-SYSCTL_INT(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW,
+SYSCTL_ULONG(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW,
     &rip_recvspace, 0, "Maximum space for incoming raw IP datagrams");
 
 static int
@@ -578,39 +599,24 @@
 	struct inpcb *inp;
 	int error;
 
-	/* XXX why not lower? */
-	INP_INFO_WLOCK(&ripcbinfo);
 	inp = sotoinpcb(so);
-	if (inp) {
-		/* XXX counter, printf */
-		INP_INFO_WUNLOCK(&ripcbinfo);
-		return EINVAL;
-	}
-	if (jailed(td->td_ucred) && !jail_allow_raw_sockets) {
-		INP_INFO_WUNLOCK(&ripcbinfo);
-		return (EPERM);
-	}
-	if ((error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL)) != 0) {
-		INP_INFO_WUNLOCK(&ripcbinfo);
+	KASSERT(inp == NULL, ("rip_attach: inp != NULL"));
+
+	error = priv_check(td, PRIV_NETINET_RAW);
+	if (error)
 		return error;
-	}
-	if (proto >= IPPROTO_MAX || proto < 0) {
-		INP_INFO_WUNLOCK(&ripcbinfo);
+	if (proto >= IPPROTO_MAX || proto < 0)
 		return EPROTONOSUPPORT;
-	}
-
 	error = soreserve(so, rip_sendspace, rip_recvspace);
-	if (error) {
-		INP_INFO_WUNLOCK(&ripcbinfo);
+	if (error)
 		return error;
-	}
-	error = in_pcballoc(so, &ripcbinfo, "rawinp");
+	INP_INFO_WLOCK(&ripcbinfo);
+	error = in_pcballoc(so, &ripcbinfo);
 	if (error) {
 		INP_INFO_WUNLOCK(&ripcbinfo);
 		return error;
 	}
 	inp = (struct inpcb *)so->so_pcb;
-	INP_LOCK(inp);
 	INP_INFO_WUNLOCK(&ripcbinfo);
 	inp->inp_vflag |= INP_IPV4;
 	inp->inp_ip_p = proto;
@@ -620,12 +626,17 @@
 }
 
 static void
-rip_pcbdetach(struct socket *so, struct inpcb *inp)
+rip_detach(struct socket *so)
 {
+	struct inpcb *inp;
 
-	INP_INFO_WLOCK_ASSERT(&ripcbinfo);
-	INP_LOCK_ASSERT(inp);
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("rip_detach: inp == NULL"));
+	KASSERT(inp->inp_faddr.s_addr == INADDR_ANY, 
+	    ("rip_detach: not closed"));
 
+	INP_INFO_WLOCK(&ripcbinfo);
+	INP_LOCK(inp);
 	if (so == ip_mrouter && ip_mrouter_done)
 		ip_mrouter_done();
 	if (ip_rsvp_force_done)
@@ -633,53 +644,68 @@
 	if (so == ip_rsvpd)
 		ip_rsvp_done();
 	in_pcbdetach(inp);
+	in_pcbfree(inp);
+	INP_INFO_WUNLOCK(&ripcbinfo);
 }
 
-static int
-rip_detach(struct socket *so)
+static void
+rip_dodisconnect(struct socket *so, struct inpcb *inp)
+{
+
+	INP_LOCK_ASSERT(inp);
+
+	inp->inp_faddr.s_addr = INADDR_ANY;
+	SOCK_LOCK(so);
+	so->so_state &= ~SS_ISCONNECTED;
+	SOCK_UNLOCK(so);
+}
+
+static void
+rip_abort(struct socket *so)
 {
 	struct inpcb *inp;
 
-	INP_INFO_WLOCK(&ripcbinfo);
 	inp = sotoinpcb(so);
-	if (inp == 0) {
-		/* XXX counter, printf */
-		INP_INFO_WUNLOCK(&ripcbinfo);
-		return EINVAL;
-	}
+	KASSERT(inp != NULL, ("rip_abort: inp == NULL"));
+
+	INP_INFO_WLOCK(&ripcbinfo);
 	INP_LOCK(inp);
-	rip_pcbdetach(so, inp);
+	rip_dodisconnect(so, inp);
+	INP_UNLOCK(inp);
 	INP_INFO_WUNLOCK(&ripcbinfo);
-	return 0;
 }
 
-static int
-rip_abort(struct socket *so)
+static void
+rip_close(struct socket *so)
 {
 	struct inpcb *inp;
 
-	INP_INFO_WLOCK(&ripcbinfo);
 	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_WUNLOCK(&ripcbinfo);
-		return EINVAL;	/* ??? possible? panic instead? */
-	}
+	KASSERT(inp != NULL, ("rip_close: inp == NULL"));
+
+	INP_INFO_WLOCK(&ripcbinfo);
 	INP_LOCK(inp);
-	soisdisconnected(so);
-	if (so->so_state & SS_NOFDREF)
-		rip_pcbdetach(so, inp);
-	else
-		INP_UNLOCK(inp);
+	rip_dodisconnect(so, inp);
+	INP_UNLOCK(inp);
 	INP_INFO_WUNLOCK(&ripcbinfo);
-	return 0;
 }
 
 static int
 rip_disconnect(struct socket *so)
 {
+	struct inpcb *inp;
+
 	if ((so->so_state & SS_ISCONNECTED) == 0)
 		return ENOTCONN;
-	return rip_abort(so);
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("rip_disconnect: inp == NULL"));
+	INP_INFO_WLOCK(&ripcbinfo);
+	INP_LOCK(inp);
+	rip_dodisconnect(so, inp);
+	INP_UNLOCK(inp);
+	INP_INFO_WUNLOCK(&ripcbinfo);
+	return (0);
 }
 
 static int
@@ -705,12 +731,9 @@
 	     ifa_ifwithaddr((struct sockaddr *)addr) == 0))
 		return EADDRNOTAVAIL;
 
-	INP_INFO_WLOCK(&ripcbinfo);
 	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_WUNLOCK(&ripcbinfo);
-		return EINVAL;
-	}
+	KASSERT(inp != NULL, ("rip_bind: inp == NULL"));
+	INP_INFO_WLOCK(&ripcbinfo);
 	INP_LOCK(inp);
 	inp->inp_laddr = addr->sin_addr;
 	INP_UNLOCK(inp);
@@ -731,12 +754,9 @@
 	if (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK)
 		return EAFNOSUPPORT;
 
-	INP_INFO_WLOCK(&ripcbinfo);
 	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_WUNLOCK(&ripcbinfo);
-		return EINVAL;
-	}
+	KASSERT(inp != NULL, ("rip_connect: inp == NULL"));
+	INP_INFO_WLOCK(&ripcbinfo);
 	INP_LOCK(inp);
 	inp->inp_faddr = addr->sin_addr;
 	soisconnected(so);
@@ -750,14 +770,9 @@
 {
 	struct inpcb *inp;
 
-	INP_INFO_RLOCK(&ripcbinfo);
 	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_RUNLOCK(&ripcbinfo);
-		return EINVAL;
-	}
+	KASSERT(inp != NULL, ("rip_shutdown: inp == NULL"));
 	INP_LOCK(inp);
-	INP_INFO_RUNLOCK(&ripcbinfo);
 	socantsendmore(so);
 	INP_UNLOCK(inp);
 	return 0;
@@ -765,32 +780,30 @@
 
 static int
 rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
-	 struct mbuf *control, struct thread *td)
+    struct mbuf *control, struct thread *td)
 {
 	struct inpcb *inp;
 	u_long dst;
-	int ret;
 
-	INP_INFO_WLOCK(&ripcbinfo);
 	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("rip_send: inp == NULL"));
+	/*
+	 * Note: 'dst' reads below are unlocked.
+	 */
 	if (so->so_state & SS_ISCONNECTED) {
 		if (nam) {
-			INP_INFO_WUNLOCK(&ripcbinfo);
 			m_freem(m);
 			return EISCONN;
 		}
-		dst = inp->inp_faddr.s_addr;
+		dst = inp->inp_faddr.s_addr;	/* Unlocked read. */
 	} else {
 		if (nam == NULL) {
-			INP_INFO_WUNLOCK(&ripcbinfo);
 			m_freem(m);
 			return ENOTCONN;
 		}
 		dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr;
 	}
-	ret = rip_output(m, so, dst);
-	INP_INFO_WUNLOCK(&ripcbinfo);
-	return ret;
+	return rip_output(m, so, dst);
 }
 
 static int
@@ -836,7 +849,7 @@
 		return ENOMEM;
 	
 	INP_INFO_RLOCK(&ripcbinfo);
-	for (inp = LIST_FIRST(ripcbinfo.listhead), i = 0; inp && i < n;
+	for (inp = LIST_FIRST(ripcbinfo.ipi_listhead), i = 0; inp && i < n;
 	     inp = LIST_NEXT(inp, inp_list)) {
 		INP_LOCK(inp);
 		if (inp->inp_gencnt <= gencnt &&
@@ -852,6 +865,7 @@
 	error = 0;
 	for (i = 0; i < n; i++) {
 		inp = inp_list[i];
+		INP_LOCK(inp);
 		if (inp->inp_gencnt <= gencnt) {
 			struct xinpcb xi;
 			bzero(&xi, sizeof(xi));
@@ -860,8 +874,10 @@
 			bcopy(inp, &xi.xi_inp, sizeof *inp);
 			if (inp->inp_socket)
 				sotoxsocket(inp->inp_socket, &xi.xi_socket);
+			INP_UNLOCK(inp);
 			error = SYSCTL_OUT(req, &xi, sizeof xi);
-		}
+		} else
+			INP_UNLOCK(inp);
 	}
 	if (!error) {
 		/*
@@ -882,27 +898,6 @@
 	return error;
 }
 
-/*
- * This is the wrapper function for in_setsockaddr.  We just pass down
- * the pcbinfo for in_setpeeraddr to lock.
- */
-static int
-rip_sockaddr(struct socket *so, struct sockaddr **nam)
-{
-	return (in_setsockaddr(so, nam, &ripcbinfo));
-}
-
-/*
- * This is the wrapper function for in_setpeeraddr.  We just pass down
- * the pcbinfo for in_setpeeraddr to lock.
- */
-static int
-rip_peeraddr(struct socket *so, struct sockaddr **nam)
-{
-	return (in_setpeeraddr(so, nam, &ripcbinfo));
-}
-
-
 SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLFLAG_RD, 0, 0,
 	    rip_pcblist, "S,xinpcb", "List of active raw IP sockets");
 
@@ -914,9 +909,10 @@
 	.pru_control =		in_control,
 	.pru_detach =		rip_detach,
 	.pru_disconnect =	rip_disconnect,
-	.pru_peeraddr =		rip_peeraddr,
+	.pru_peeraddr =		in_getpeeraddr,
 	.pru_send =		rip_send,
 	.pru_shutdown =		rip_shutdown,
-	.pru_sockaddr =		rip_sockaddr,
-	.pru_sosetlabel =	in_pcbsosetlabel
+	.pru_sockaddr =		in_getsockaddr,
+	.pru_sosetlabel =	in_pcbsosetlabel,
+	.pru_close =		rip_close,
 };
Index: udp_usrreq.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/udp_usrreq.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet/udp_usrreq.c -L sys/netinet/udp_usrreq.c -u -r1.2 -r1.3
--- sys/netinet/udp_usrreq.c
+++ sys/netinet/udp_usrreq.c
@@ -1,6 +1,7 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
- *	The Regents of the University of California.  All rights reserved.
+ *	The Regents of the University of California.
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -27,23 +28,25 @@
  * SUCH DAMAGE.
  *
  *	@(#)udp_usrreq.c	8.6 (Berkeley) 5/23/95
- * $FreeBSD: src/sys/netinet/udp_usrreq.c,v 1.175.2.5 2006/02/14 21:40:21 rwatson Exp $
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/udp_usrreq.c,v 1.218 2007/10/07 20:44:24 silby Exp $");
+
 #include "opt_ipfw.h"
-#include "opt_ipsec.h"
 #include "opt_inet6.h"
+#include "opt_ipsec.h"
 #include "opt_mac.h"
 
 #include <sys/param.h>
-#include <sys/systm.h>
 #include <sys/domain.h>
+#include <sys/eventhandler.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
-#include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
@@ -52,6 +55,7 @@
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
+#include <sys/systm.h>
 
 #include <vm/uma.h>
 
@@ -59,8 +63,8 @@
 #include <net/route.h>
 
 #include <netinet/in.h>
-#include <netinet/in_systm.h>
 #include <netinet/in_pcb.h>
+#include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #ifdef INET6
@@ -69,88 +73,190 @@
 #include <netinet/ip_icmp.h>
 #include <netinet/icmp_var.h>
 #include <netinet/ip_var.h>
+#include <netinet/ip_options.h>
 #ifdef INET6
 #include <netinet6/ip6_var.h>
 #endif
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 
-#ifdef FAST_IPSEC
-#include <netipsec/ipsec.h>
-#endif /*FAST_IPSEC*/
-
 #ifdef IPSEC
-#include <netinet6/ipsec.h>
-#endif /*IPSEC*/
+#include <netipsec/ipsec.h>
+#endif
 
 #include <machine/in_cksum.h>
 
+#include <security/mac/mac_framework.h>
+
 /*
  * UDP protocol implementation.
  * Per RFC 768, August, 1980.
  */
-#ifndef	COMPAT_42
-static int	udpcksum = 1;
+
+/*
+ * BSD 4.2 defaulted the udp checksum to be off.  Turning off udp checksums
+ * removes the only data integrity mechanism for packets and malformed
+ * packets that would otherwise be discarded due to bad checksums, and may
+ * cause problems (especially for NFS data blocks).
+ */
+static int	udp_cksum = 1;
+SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW, &udp_cksum,
+    0, "");
+
+int	udp_log_in_vain = 0;
+SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW,
+    &udp_log_in_vain, 0, "Log all incoming UDP packets");
+
+int	udp_blackhole = 0;
+SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW, &udp_blackhole, 0,
+    "Do not send port unreachables for refused connects");
+
+u_long	udp_sendspace = 9216;		/* really max datagram size */
+					/* 40 1K datagrams */
+SYSCTL_ULONG(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW,
+    &udp_sendspace, 0, "Maximum outgoing UDP datagram size");
+
+u_long	udp_recvspace = 40 * (1024 +
+#ifdef INET6
+				      sizeof(struct sockaddr_in6)
 #else
-static int	udpcksum = 0;		/* XXX */
+				      sizeof(struct sockaddr_in)
 #endif
-SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW,
-		&udpcksum, 0, "");
+				      );
 
-int	log_in_vain = 0;
-SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW,
-    &log_in_vain, 0, "Log all incoming UDP packets");
+SYSCTL_ULONG(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
+    &udp_recvspace, 0, "Maximum space for incoming UDP datagrams");
 
-static int	blackhole = 0;
-SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW,
-	&blackhole, 0, "Do not send port unreachables for refused connects");
-
-static int	strict_mcast_mship = 0;
-SYSCTL_INT(_net_inet_udp, OID_AUTO, strict_mcast_mship, CTLFLAG_RW,
-	&strict_mcast_mship, 0, "Only send multicast to member sockets");
-
-struct	inpcbhead udb;		/* from udp_var.h */
-#define	udb6	udb  /* for KAME src sync over BSD*'s */
-struct	inpcbinfo udbinfo;
+struct inpcbhead	udb;		/* from udp_var.h */
+struct inpcbinfo	udbinfo;
 
 #ifndef UDBHASHSIZE
-#define UDBHASHSIZE 16
+#define	UDBHASHSIZE	16
 #endif
 
-struct	udpstat udpstat;	/* from udp_var.h */
-SYSCTL_STRUCT(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RW,
-    &udpstat, udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)");
-
-static void udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n,
-		int off, struct sockaddr_in *udp_in);
-
-static int udp_detach(struct socket *so);
-static	int udp_output(struct inpcb *, struct mbuf *, struct sockaddr *,
-		struct mbuf *, struct thread *);
+struct udpstat	udpstat;	/* from udp_var.h */
+SYSCTL_STRUCT(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RW, &udpstat,
+    udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)");
+
+static void	udp_detach(struct socket *so);
+static int	udp_output(struct inpcb *, struct mbuf *, struct sockaddr *,
+		    struct mbuf *, struct thread *);
+
+static void
+udp_zone_change(void *tag)
+{
+
+	uma_zone_set_max(udbinfo.ipi_zone, maxsockets);
+}
+
+static int
+udp_inpcb_init(void *mem, int size, int flags)
+{
+	struct inpcb *inp;
+
+	inp = mem;
+	INP_LOCK_INIT(inp, "inp", "udpinp");
+	return (0);
+}
 
 void
-udp_init()
+udp_init(void)
 {
+
 	INP_INFO_LOCK_INIT(&udbinfo, "udp");
 	LIST_INIT(&udb);
-	udbinfo.listhead = &udb;
-	udbinfo.hashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.hashmask);
-	udbinfo.porthashbase = hashinit(UDBHASHSIZE, M_PCB,
-					&udbinfo.porthashmask);
+	udbinfo.ipi_listhead = &udb;
+	udbinfo.ipi_hashbase = hashinit(UDBHASHSIZE, M_PCB,
+	    &udbinfo.ipi_hashmask);
+	udbinfo.ipi_porthashbase = hashinit(UDBHASHSIZE, M_PCB,
+	    &udbinfo.ipi_porthashmask);
 	udbinfo.ipi_zone = uma_zcreate("udpcb", sizeof(struct inpcb), NULL,
-	    NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+	    NULL, udp_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	uma_zone_set_max(udbinfo.ipi_zone, maxsockets);
+	EVENTHANDLER_REGISTER(maxsockets_change, udp_zone_change, NULL,
+	    EVENTHANDLER_PRI_ANY);
+}
+
+/*
+ * Subroutine of udp_input(), which appends the provided mbuf chain to the
+ * passed pcb/socket.  The caller must provide a sockaddr_in via udp_in that
+ * contains the source address.  If the socket ends up being an IPv6 socket,
+ * udp_append() will convert to a sockaddr_in6 before passing the address
+ * into the socket code.
+ */
+static void
+udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
+    struct sockaddr_in *udp_in)
+{
+	struct sockaddr *append_sa;
+	struct socket *so;
+	struct mbuf *opts = 0;
+#ifdef INET6
+	struct sockaddr_in6 udp_in6;
+#endif
+
+	INP_LOCK_ASSERT(inp);
+
+#ifdef IPSEC
+	/* Check AH/ESP integrity. */
+	if (ipsec4_in_reject(n, inp)) {
+		m_freem(n);
+		ipsec4stat.in_polvio++;
+		return;
+	}
+#endif /* IPSEC */
+#ifdef MAC
+	if (mac_check_inpcb_deliver(inp, n) != 0) {
+		m_freem(n);
+		return;
+	}
+#endif
+	if (inp->inp_flags & INP_CONTROLOPTS ||
+	    inp->inp_socket->so_options & (SO_TIMESTAMP | SO_BINTIME)) {
+#ifdef INET6
+		if (inp->inp_vflag & INP_IPV6) {
+			int savedflags;
+
+			savedflags = inp->inp_flags;
+			inp->inp_flags &= ~INP_UNMAPPABLEOPTS;
+			ip6_savecontrol(inp, n, &opts);
+			inp->inp_flags = savedflags;
+		} else
+#endif
+			ip_savecontrol(inp, &opts, ip, n);
+	}
+#ifdef INET6
+	if (inp->inp_vflag & INP_IPV6) {
+		bzero(&udp_in6, sizeof(udp_in6));
+		udp_in6.sin6_len = sizeof(udp_in6);
+		udp_in6.sin6_family = AF_INET6;
+		in6_sin_2_v4mapsin6(udp_in, &udp_in6);
+		append_sa = (struct sockaddr *)&udp_in6;
+	} else
+#endif
+		append_sa = (struct sockaddr *)udp_in;
+	m_adj(n, off);
+
+	so = inp->inp_socket;
+	SOCKBUF_LOCK(&so->so_rcv);
+	if (sbappendaddr_locked(&so->so_rcv, append_sa, n, opts) == 0) {
+		SOCKBUF_UNLOCK(&so->so_rcv);
+		m_freem(n);
+		if (opts)
+			m_freem(opts);
+		udpstat.udps_fullsock++;
+	} else
+		sorwakeup_locked(so);
 }
 
 void
-udp_input(m, off)
-	register struct mbuf *m;
-	int off;
+udp_input(struct mbuf *m, int off)
 {
 	int iphlen = off;
-	register struct ip *ip;
-	register struct udphdr *uh;
-	register struct inpcb *inp;
+	struct ip *ip;
+	struct udphdr *uh;
+	struct ifnet *ifp;
+	struct inpcb *inp;
 	int len;
 	struct ip save_ip;
 	struct sockaddr_in udp_in;
@@ -158,13 +264,13 @@
 	struct m_tag *fwd_tag;
 #endif
 
+	ifp = m->m_pkthdr.rcvif;
 	udpstat.udps_ipackets++;
 
 	/*
-	 * Strip IP options, if any; should skip this,
-	 * make available to user, and use on returned packets,
-	 * but we don't yet have a way to check the checksum
-	 * with options still present.
+	 * Strip IP options, if any; should skip this, make available to
+	 * user, and use on returned packets, but we don't yet have a way to
+	 * check the checksum with options still present.
 	 */
 	if (iphlen > sizeof (struct ip)) {
 		ip_stripoptions(m, (struct mbuf *)0);
@@ -184,13 +290,15 @@
 	}
 	uh = (struct udphdr *)((caddr_t)ip + iphlen);
 
-	/* destination port of 0 is illegal, based on RFC768. */
+	/*
+	 * Destination port of 0 is illegal, based on RFC768.
+	 */
 	if (uh->uh_dport == 0)
 		goto badunlocked;
 
 	/*
-	 * Construct sockaddr format source address.
-	 * Stuff source address and datagram in user buffer.
+	 * Construct sockaddr format source address.  Stuff source address
+	 * and datagram in user buffer.
 	 */
 	bzero(&udp_in, sizeof(udp_in));
 	udp_in.sin_len = sizeof(udp_in);
@@ -199,8 +307,8 @@
 	udp_in.sin_addr = ip->ip_src;
 
 	/*
-	 * Make mbuf data length reflect UDP length.
-	 * If not enough data to reflect UDP length, drop.
+	 * Make mbuf data length reflect UDP length.  If not enough data to
+	 * reflect UDP length, drop.
 	 */
 	len = ntohs((u_short)uh->uh_ulen);
 	if (ip->ip_len != len) {
@@ -211,18 +319,22 @@
 		m_adj(m, len - ip->ip_len);
 		/* ip->ip_len = len; */
 	}
+
 	/*
-	 * Save a copy of the IP header in case we want restore it
-	 * for sending an ICMP error message in response.
+	 * Save a copy of the IP header in case we want restore it for
+	 * sending an ICMP error message in response.
 	 */
-	if (!blackhole)
+	if (!udp_blackhole)
 		save_ip = *ip;
+	else
+		memset(&save_ip, 0, sizeof(save_ip));
 
 	/*
 	 * Checksum extended UDP header and data.
 	 */
 	if (uh->uh_sum) {
 		u_short uh_sum;
+
 		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
 				uh_sum = m->m_pkthdr.csum_data;
@@ -233,6 +345,7 @@
 			uh_sum ^= 0xffff;
 		} else {
 			char b[9];
+
 			bcopy(((struct ipovly *)ip)->ih_x1, b, 9);
 			bzero(((struct ipovly *)ip)->ih_x1, 9);
 			((struct ipovly *)ip)->ih_len = uh->uh_ulen;
@@ -248,46 +361,33 @@
 		udpstat.udps_nosum++;
 
 #ifdef IPFIREWALL_FORWARD
-	/* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */
+	/*
+	 * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
+	 */
 	fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
-
 	if (fwd_tag != NULL) {
 		struct sockaddr_in *next_hop;
 
-		/* Do the hack. */
+		/*
+		 * Do the hack.
+		 */
 		next_hop = (struct sockaddr_in *)(fwd_tag + 1);
 		ip->ip_dst = next_hop->sin_addr;
 		uh->uh_dport = ntohs(next_hop->sin_port);
-		/* Remove the tag from the packet.  We don't need it anymore. */
+
+		/*
+		 * Remove the tag from the packet.  We don't need it anymore.
+		 */
 		m_tag_delete(m, fwd_tag);
 	}
 #endif
 
 	INP_INFO_RLOCK(&udbinfo);
-
 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
-	    in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) {
+	    in_broadcast(ip->ip_dst, ifp)) {
 		struct inpcb *last;
-		/*
-		 * Deliver a multicast or broadcast datagram to *all* sockets
-		 * for which the local and remote addresses and ports match
-		 * those of the incoming datagram.  This allows more than
-		 * one process to receive multi/broadcasts on the same port.
-		 * (This really ought to be done for unicast datagrams as
-		 * well, but that would cause problems with existing
-		 * applications that open both address-specific sockets and
-		 * a wildcard socket listening to the same port -- they would
-		 * end up receiving duplicates of every unicast datagram.
-		 * Those applications open the multiple sockets to overcome an
-		 * inadequacy of the UDP socket interface, but for backwards
-		 * compatibility we avoid the problem here rather than
-		 * fixing the interface.  Maybe 4.5BSD will remedy this?)
-		 */
+		struct ip_moptions *imo;
 
-		/*
-		 * Locate pcb(s) for datagram.
-		 * (Algorithm copied from raw_intr().)
-		 */
 		last = NULL;
 		LIST_FOREACH(inp, &udb, inp_list) {
 			if (inp->inp_lport != uh->uh_dport)
@@ -296,73 +396,111 @@
 			if ((inp->inp_vflag & INP_IPV4) == 0)
 				continue;
 #endif
-			if (inp->inp_laddr.s_addr != INADDR_ANY) {
-				if (inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
-					continue;
-			}
-			if (inp->inp_faddr.s_addr != INADDR_ANY) {
-				if (inp->inp_faddr.s_addr !=
-				    ip->ip_src.s_addr ||
-				    inp->inp_fport != uh->uh_sport)
-					continue;
-			}
+			if (inp->inp_laddr.s_addr != INADDR_ANY &&
+			    inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
+				continue;
+			if (inp->inp_faddr.s_addr != INADDR_ANY &&
+			    inp->inp_faddr.s_addr != ip->ip_src.s_addr)
+				continue;
+			/*
+			 * XXX: Do not check source port of incoming datagram
+			 * unless inp_connect() has been called to bind the
+			 * fport part of the 4-tuple; the source could be
+			 * trying to talk to us with an ephemeral port.
+			 */
+			if (inp->inp_fport != 0 &&
+			    inp->inp_fport != uh->uh_sport)
+				continue;
+
 			INP_LOCK(inp);
 
 			/*
-			 * Check multicast packets to make sure they are only
-			 * sent to sockets with multicast memberships for the
-			 * packet's destination address and arrival interface
+			 * Handle socket delivery policy for any-source
+			 * and source-specific multicast. [RFC3678]
 			 */
-#define MSHIP(_inp, n) ((_inp)->inp_moptions->imo_membership[(n)])
-#define NMSHIPS(_inp) ((_inp)->inp_moptions->imo_num_memberships)
-			if (strict_mcast_mship && inp->inp_moptions != NULL) {
-				int mship, foundmship = 0;
-
-				for (mship = 0; mship < NMSHIPS(inp); mship++) {
-					if (MSHIP(inp, mship)->inm_addr.s_addr
-					    == ip->ip_dst.s_addr &&
-					    MSHIP(inp, mship)->inm_ifp
-					    == m->m_pkthdr.rcvif) {
-						foundmship = 1;
-						break;
+			imo = inp->inp_moptions;
+			if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
+			    imo != NULL) {
+				struct sockaddr_in	 sin;
+				struct in_msource	*ims;
+				int			 blocked, mode;
+				size_t			 idx;
+
+				bzero(&sin, sizeof(struct sockaddr_in));
+				sin.sin_len = sizeof(struct sockaddr_in);
+				sin.sin_family = AF_INET;
+				sin.sin_addr = ip->ip_dst;
+
+				blocked = 0;
+				idx = imo_match_group(imo, ifp,
+				    (struct sockaddr *)&sin);
+				if (idx == -1) {
+					/*
+					 * No group membership for this socket.
+					 * Do not bump udps_noportbcast, as
+					 * this will happen further down.
+					 */
+					blocked++;
+				} else {
+					/*
+					 * Check for a multicast source filter
+					 * entry on this socket for this group.
+					 * MCAST_EXCLUDE is the default
+					 * behaviour.  It means default accept;
+					 * entries, if present, denote sources
+					 * to be excluded from delivery.
+					 */
+					ims = imo_match_source(imo, idx,
+					    (struct sockaddr *)&udp_in);
+					mode = imo->imo_mfilters[idx].imf_fmode;
+					if ((ims != NULL &&
+					     mode == MCAST_EXCLUDE) ||
+					    (ims == NULL &&
+					     mode == MCAST_INCLUDE)) {
+#ifdef DIAGNOSTIC
+						if (bootverbose) {
+							printf("%s: blocked by"
+							    " source filter\n",
+							    __func__);
+						}
+#endif
+						udpstat.udps_filtermcast++;
+						blocked++;
 					}
 				}
-				if (foundmship == 0) {
+				if (blocked != 0) {
 					INP_UNLOCK(inp);
 					continue;
 				}
 			}
-#undef NMSHIPS
-#undef MSHIP
 			if (last != NULL) {
 				struct mbuf *n;
 
 				n = m_copy(m, 0, M_COPYALL);
 				if (n != NULL)
-					udp_append(last, ip, n,
-						   iphlen +
-						   sizeof(struct udphdr),
-						   &udp_in);
+					udp_append(last, ip, n, iphlen +
+					    sizeof(struct udphdr), &udp_in);
 				INP_UNLOCK(last);
 			}
 			last = inp;
 			/*
 			 * Don't look for additional matches if this one does
 			 * not have either the SO_REUSEPORT or SO_REUSEADDR
-			 * socket options set.  This heuristic avoids searching
-			 * through all pcbs in the common case of a non-shared
-			 * port.  It * assumes that an application will never
-			 * clear these options after setting them.
+			 * socket options set.  This heuristic avoids
+			 * searching through all pcbs in the common case of a
+			 * non-shared port.  It assumes that an application
+			 * will never clear these options after setting them.
 			 */
-			if ((last->inp_socket->so_options&(SO_REUSEPORT|SO_REUSEADDR)) == 0)
+			if ((last->inp_socket->so_options &
+			    (SO_REUSEPORT|SO_REUSEADDR)) == 0)
 				break;
 		}
 
 		if (last == NULL) {
 			/*
-			 * No matching pcb found; discard datagram.
-			 * (No need to send an ICMP Port Unreachable
-			 * for a broadcast or multicast datgram.)
+			 * No matching pcb found; discard datagram.  (No need
+			 * to send an ICMP Port Unreachable for a broadcast
+			 * or multicast datgram.)
 			 */
 			udpstat.udps_noportbcast++;
 			goto badheadlocked;
@@ -373,13 +511,14 @@
 		INP_INFO_RUNLOCK(&udbinfo);
 		return;
 	}
+
 	/*
 	 * Locate pcb for datagram.
 	 */
 	inp = in_pcblookup_hash(&udbinfo, ip->ip_src, uh->uh_sport,
-	    ip->ip_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif);
+	    ip->ip_dst, uh->uh_dport, 1, ifp);
 	if (inp == NULL) {
-		if (log_in_vain) {
+		if (udp_log_in_vain) {
 			char buf[4*sizeof "123"];
 
 			strcpy(buf, inet_ntoa(ip->ip_dst));
@@ -393,7 +532,7 @@
 			udpstat.udps_noportbcast++;
 			goto badheadlocked;
 		}
-		if (blackhole)
+		if (udp_blackhole)
 			goto badheadlocked;
 		if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0)
 			goto badheadlocked;
@@ -403,8 +542,11 @@
 		INP_INFO_RUNLOCK(&udbinfo);
 		return;
 	}
+
+	/*
+	 * Check the minimum TTL for socket.
+	 */
 	INP_LOCK(inp);
-	/* Check the minimum TTL for socket. */
 	if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl)
 		goto badheadlocked;
 	udp_append(inp, ip, m, iphlen + sizeof(struct udphdr), &udp_in);
@@ -418,111 +560,27 @@
 	INP_INFO_RUNLOCK(&udbinfo);
 badunlocked:
 	m_freem(m);
-	return;
 }
 
 /*
- * Subroutine of udp_input(), which appends the provided mbuf chain to the
- * passed pcb/socket.  The caller must provide a sockaddr_in via udp_in that
- * contains the source address.  If the socket ends up being an IPv6 socket,
- * udp_append() will convert to a sockaddr_in6 before passing the address
- * into the socket code.
- */
-static void
-udp_append(last, ip, n, off, udp_in)
-	struct inpcb *last;
-	struct ip *ip;
-	struct mbuf *n;
-	int off;
-	struct sockaddr_in *udp_in;
-{
-	struct sockaddr *append_sa;
-	struct socket *so;
-	struct mbuf *opts = 0;
-#ifdef INET6
-	struct sockaddr_in6 udp_in6;
-#endif
-
-	INP_LOCK_ASSERT(last);
-
-#if defined(IPSEC) || defined(FAST_IPSEC)
-	/* check AH/ESP integrity. */
-	if (ipsec4_in_reject(n, last)) {
-#ifdef IPSEC
-		ipsecstat.in_polvio++;
-#endif /*IPSEC*/
-		m_freem(n);
-		return;
-	}
-#endif /*IPSEC || FAST_IPSEC*/
-#ifdef MAC
-	if (mac_check_inpcb_deliver(last, n) != 0) {
-		m_freem(n);
-		return;
-	}
-#endif
-	if (last->inp_flags & INP_CONTROLOPTS ||
-	    last->inp_socket->so_options & (SO_TIMESTAMP | SO_BINTIME)) {
-#ifdef INET6
-		if (last->inp_vflag & INP_IPV6) {
-			int savedflags;
-
-			savedflags = last->inp_flags;
-			last->inp_flags &= ~INP_UNMAPPABLEOPTS;
-			ip6_savecontrol(last, n, &opts);
-			last->inp_flags = savedflags;
-		} else
-#endif
-		ip_savecontrol(last, &opts, ip, n);
-	}
-#ifdef INET6
-	if (last->inp_vflag & INP_IPV6) {
-		bzero(&udp_in6, sizeof(udp_in6));
-		udp_in6.sin6_len = sizeof(udp_in6);
-		udp_in6.sin6_family = AF_INET6;
-		in6_sin_2_v4mapsin6(udp_in, &udp_in6);
-		append_sa = (struct sockaddr *)&udp_in6;
-	} else
-#endif
-	append_sa = (struct sockaddr *)udp_in;
-	m_adj(n, off);
-
-	so = last->inp_socket;
-	SOCKBUF_LOCK(&so->so_rcv);
-	if (sbappendaddr_locked(&so->so_rcv, append_sa, n, opts) == 0) {
-		m_freem(n);
-		if (opts)
-			m_freem(opts);
-		udpstat.udps_fullsock++;
-		SOCKBUF_UNLOCK(&so->so_rcv);
-	} else
-		sorwakeup_locked(so);
-}
-
-/*
- * Notify a udp user of an asynchronous error;
- * just wake up so that he can collect error status.
+ * Notify a udp user of an asynchronous error; just wake up so that they can
+ * collect error status.
  */
 struct inpcb *
-udp_notify(inp, errno)
-	register struct inpcb *inp;
-	int errno;
+udp_notify(struct inpcb *inp, int errno)
 {
+
 	inp->inp_socket->so_error = errno;
 	sorwakeup(inp->inp_socket);
 	sowwakeup(inp->inp_socket);
-	return inp;
+	return (inp);
 }
 
 void
-udp_ctlinput(cmd, sa, vip)
-	int cmd;
-	struct sockaddr *sa;
-	void *vip;
+udp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 {
 	struct ip *ip = vip;
 	struct udphdr *uh;
-	struct inpcb *(*notify)(struct inpcb *, int) = udp_notify;
 	struct in_addr faddr;
 	struct inpcb *inp;
 
@@ -535,16 +593,18 @@
 	 */
 	if (PRC_IS_REDIRECT(cmd))
 		return;
+
 	/*
 	 * Hostdead is ugly because it goes linearly through all PCBs.
-	 * XXX: We never get this from ICMP, otherwise it makes an
-	 * excellent DoS attack on machines with many connections.
+	 *
+	 * XXX: We never get this from ICMP, otherwise it makes an excellent
+	 * DoS attack on machines with many connections.
 	 */
 	if (cmd == PRC_HOSTDEAD)
-		ip = 0;
+		ip = NULL;
 	else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
 		return;
-	if (ip) {
+	if (ip != NULL) {
 		uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
 		INP_INFO_RLOCK(&udbinfo);
 		inp = in_pcblookup_hash(&udbinfo, faddr, uh->uh_dport,
@@ -552,13 +612,14 @@
 		if (inp != NULL) {
 			INP_LOCK(inp);
 			if (inp->inp_socket != NULL) {
-				(*notify)(inp, inetctlerrmap[cmd]);
+				udp_notify(inp, inetctlerrmap[cmd]);
 			}
 			INP_UNLOCK(inp);
 		}
 		INP_INFO_RUNLOCK(&udbinfo);
 	} else
-		in_pcbnotifyall(&udbinfo, faddr, inetctlerrmap[cmd], notify);
+		in_pcbnotifyall(&udbinfo, faddr, inetctlerrmap[cmd],
+		    udp_notify);
 }
 
 static int
@@ -570,18 +631,18 @@
 	struct xinpgen xig;
 
 	/*
-	 * The process of preparing the TCB list is too time-consuming and
+	 * The process of preparing the PCB list is too time-consuming and
 	 * resource-intensive to repeat twice on every request.
 	 */
 	if (req->oldptr == 0) {
 		n = udbinfo.ipi_count;
 		req->oldidx = 2 * (sizeof xig)
 			+ (n + n/8) * sizeof(struct xinpcb);
-		return 0;
+		return (0);
 	}
 
 	if (req->newptr != 0)
-		return EPERM;
+		return (EPERM);
 
 	/*
 	 * OK, now we're committed to doing something.
@@ -602,14 +663,14 @@
 	xig.xig_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, &xig, sizeof xig);
 	if (error)
-		return error;
+		return (error);
 
 	inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
 	if (inp_list == 0)
-		return ENOMEM;
+		return (ENOMEM);
 
 	INP_INFO_RLOCK(&udbinfo);
-	for (inp = LIST_FIRST(udbinfo.listhead), i = 0; inp && i < n;
+	for (inp = LIST_FIRST(udbinfo.ipi_listhead), i = 0; inp && i < n;
 	     inp = LIST_NEXT(inp, inp_list)) {
 		INP_LOCK(inp);
 		if (inp->inp_gencnt <= gencnt &&
@@ -623,6 +684,7 @@
 	error = 0;
 	for (i = 0; i < n; i++) {
 		inp = inp_list[i];
+		INP_LOCK(inp);
 		if (inp->inp_gencnt <= gencnt) {
 			struct xinpcb xi;
 			bzero(&xi, sizeof(xi));
@@ -632,16 +694,17 @@
 			if (inp->inp_socket)
 				sotoxsocket(inp->inp_socket, &xi.xi_socket);
 			xi.xi_inp.inp_gencnt = inp->inp_gencnt;
+			INP_UNLOCK(inp);
 			error = SYSCTL_OUT(req, &xi, sizeof xi);
-		}
+		} else
+			INP_UNLOCK(inp);
 	}
 	if (!error) {
 		/*
-		 * Give the user an updated idea of our state.
-		 * If the generation differs from what we told
-		 * her before, she knows that something happened
-		 * while we were processing this request, and it
-		 * might be necessary to retry.
+		 * Give the user an updated idea of our state.  If the
+		 * generation differs from what we told her before, she knows
+		 * that something happened while we were processing this
+		 * request, and it might be necessary to retry.
 		 */
 		INP_INFO_RLOCK(&udbinfo);
 		xig.xig_gen = udbinfo.ipi_gencnt;
@@ -651,11 +714,11 @@
 		error = SYSCTL_OUT(req, &xig, sizeof xig);
 	}
 	free(inp_list, M_TEMP);
-	return error;
+	return (error);
 }
 
 SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
-	    udp_pcblist, "S,xinpcb", "List of active UDP sockets");
+    udp_pcblist, "S,xinpcb", "List of active UDP sockets");
 
 static int
 udp_getcred(SYSCTL_HANDLER_ARGS)
@@ -665,7 +728,7 @@
 	struct inpcb *inp;
 	int error;
 
-	error = suser_cred(req->td->td_ucred, SUSER_ALLOWJAIL);
+	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 	if (error)
 		return (error);
 	error = SYSCTL_IN(req, addrs, sizeof(addrs));
@@ -694,15 +757,11 @@
     udp_getcred, "S,xucred", "Get the xucred of a UDP connection");
 
 static int
-udp_output(inp, m, addr, control, td)
-	register struct inpcb *inp;
-	struct mbuf *m;
-	struct sockaddr *addr;
-	struct mbuf *control;
-	struct thread *td;
+udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
+    struct mbuf *control, struct thread *td)
 {
-	register struct udpiphdr *ui;
-	register int len = m->m_pkthdr.len;
+	struct udpiphdr *ui;
+	int len = m->m_pkthdr.len;
 	struct in_addr faddr, laddr;
 	struct cmsghdr *cm;
 	struct sockaddr_in *sin, src;
@@ -713,34 +772,34 @@
 
 	/*
 	 * udp_output() may need to temporarily bind or connect the current
-	 * inpcb.  As such, we don't know up front what inpcb locks we will
-	 * need.  Do any work to decide what is needed up front before
-	 * acquiring locks.
+	 * inpcb.  As such, we don't know up front whether we will need the
+	 * pcbinfo lock or not.  Do any work to decide what is needed up
+	 * front before acquiring any locks.
 	 */
 	if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) {
 		if (control)
 			m_freem(control);
 		m_freem(m);
-		return EMSGSIZE;
+		return (EMSGSIZE);
 	}
 
-	src.sin_addr.s_addr = INADDR_ANY;
+	src.sin_family = 0;
 	if (control != NULL) {
 		/*
-		 * XXX: Currently, we assume all the optional information
-		 * is stored in a single mbuf.
+		 * XXX: Currently, we assume all the optional information is
+		 * stored in a single mbuf.
 		 */
 		if (control->m_next) {
 			m_freem(control);
 			m_freem(m);
-			return EINVAL;
+			return (EINVAL);
 		}
 		for (; control->m_len > 0;
 		    control->m_data += CMSG_ALIGN(cm->cmsg_len),
 		    control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
 			cm = mtod(control, struct cmsghdr *);
-			if (control->m_len < sizeof(*cm) || cm->cmsg_len == 0 ||
-			    cm->cmsg_len > control->m_len) {
+			if (control->m_len < sizeof(*cm) || cm->cmsg_len == 0
+			    || cm->cmsg_len > control->m_len) {
 				error = EINVAL;
 				break;
 			}
@@ -758,8 +817,10 @@
 				src.sin_family = AF_INET;
 				src.sin_len = sizeof(src);
 				src.sin_port = inp->inp_lport;
-				src.sin_addr = *(struct in_addr *)CMSG_DATA(cm);
+				src.sin_addr =
+				    *(struct in_addr *)CMSG_DATA(cm);
 				break;
+
 			default:
 				error = ENOPROTOOPT;
 				break;
@@ -771,11 +832,10 @@
 	}
 	if (error) {
 		m_freem(m);
-		return error;
+		return (error);
 	}
 
-	if (src.sin_addr.s_addr != INADDR_ANY ||
-	    addr != NULL) {
+	if (src.sin_family == AF_INET || addr != NULL) {
 		INP_INFO_WLOCK(&udbinfo);
 		unlock_udbinfo = 1;
 	} else
@@ -786,10 +846,17 @@
 	mac_create_mbuf_from_inpcb(inp, m);
 #endif
 
+	/*
+	 * If the IP_SENDSRCADDR control message was specified, override the
+	 * source address for this datagram.  Its use is invalidated if the
+	 * address thus specified is incomplete or clobbers other inpcbs.
+	 */
 	laddr = inp->inp_laddr;
 	lport = inp->inp_lport;
-	if (src.sin_addr.s_addr != INADDR_ANY) {
-		if (lport == 0) {
+	if (src.sin_family == AF_INET) {
+		if ((lport == 0) ||
+		    (laddr.s_addr == INADDR_ANY &&
+		     src.sin_addr.s_addr == INADDR_ANY)) {
 			error = EINVAL;
 			goto release;
 		}
@@ -802,7 +869,8 @@
 	if (addr) {
 		sin = (struct sockaddr_in *)addr;
 		if (jailed(td->td_ucred))
-			prison_remote_ip(td->td_ucred, 0, &sin->sin_addr.s_addr);
+			prison_remote_ip(td->td_ucred, 0,
+			    &sin->sin_addr.s_addr);
 		if (inp->inp_faddr.s_addr != INADDR_ANY) {
 			error = EISCONN;
 			goto release;
@@ -852,8 +920,8 @@
 	m->m_pkthdr.len -= max_linkhdr;
 
 	/*
-	 * Fill in mbuf with extended UDP header
-	 * and addresses and length put into network format.
+	 * Fill in mbuf with extended UDP header and addresses and length put
+	 * into network format.
 	 */
 	ui = mtod(m, struct udpiphdr *);
 	bzero(ui->ui_x1, sizeof(ui->ui_x1));	/* XXX still needed? */
@@ -869,6 +937,7 @@
 	 */
 	if (inp->inp_flags & INP_DONTFRAG) {
 		struct ip *ip;
+
 		ip = (struct ip *)&ui->ui_i;
 		ip->ip_off |= IP_DF;
 	}
@@ -878,22 +947,21 @@
 		ipflags |= IP_ROUTETOIF;
 	if (inp->inp_socket->so_options & SO_BROADCAST)
 		ipflags |= IP_ALLOWBROADCAST;
-	if (inp->inp_vflag & INP_ONESBCAST)
+	if (inp->inp_flags & INP_ONESBCAST)
 		ipflags |= IP_SENDONES;
 
 	/*
 	 * Set up checksum and output datagram.
 	 */
-	if (udpcksum) {
-		if (inp->inp_vflag & INP_ONESBCAST)
+	if (udp_cksum) {
+		if (inp->inp_flags & INP_ONESBCAST)
 			faddr.s_addr = INADDR_BROADCAST;
 		ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr,
 		    htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP));
 		m->m_pkthdr.csum_flags = CSUM_UDP;
 		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
-	} else {
+	} else
 		ui->ui_sum = 0;
-	}
 	((struct ip *)ui)->ip_len = sizeof (struct udpiphdr) + len;
 	((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl;	/* XXX */
 	((struct ip *)ui)->ip_tos = inp->inp_ip_tos;	/* XXX */
@@ -914,37 +982,22 @@
 	return (error);
 }
 
-u_long	udp_sendspace = 9216;		/* really max datagram size */
-					/* 40 1K datagrams */
-SYSCTL_INT(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW,
-    &udp_sendspace, 0, "Maximum outgoing UDP datagram size");
-
-u_long	udp_recvspace = 40 * (1024 +
-#ifdef INET6
-				      sizeof(struct sockaddr_in6)
-#else
-				      sizeof(struct sockaddr_in)
-#endif
-				      );
-SYSCTL_INT(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
-    &udp_recvspace, 0, "Maximum space for incoming UDP datagrams");
-
-static int
+static void
 udp_abort(struct socket *so)
 {
 	struct inpcb *inp;
 
-	INP_INFO_WLOCK(&udbinfo);
 	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_WUNLOCK(&udbinfo);
-		return EINVAL;	/* ??? possible? panic instead? */
-	}
+	KASSERT(inp != NULL, ("udp_abort: inp == NULL"));
+	INP_INFO_WLOCK(&udbinfo);
 	INP_LOCK(inp);
-	soisdisconnected(so);
-	in_pcbdetach(inp);
+	if (inp->inp_faddr.s_addr != INADDR_ANY) {
+		in_pcbdisconnect(inp);
+		inp->inp_laddr.s_addr = INADDR_ANY;
+		soisdisconnected(so);
+	}
+	INP_UNLOCK(inp);
 	INP_INFO_WUNLOCK(&udbinfo);
-	return 0;
 }
 
 static int
@@ -953,30 +1006,24 @@
 	struct inpcb *inp;
 	int error;
 
-	INP_INFO_WLOCK(&udbinfo);
 	inp = sotoinpcb(so);
-	if (inp != 0) {
-		INP_INFO_WUNLOCK(&udbinfo);
-		return EINVAL;
-	}
+	KASSERT(inp == NULL, ("udp_attach: inp != NULL"));
 	error = soreserve(so, udp_sendspace, udp_recvspace);
+	if (error)
+		return (error);
+	INP_INFO_WLOCK(&udbinfo);
+	error = in_pcballoc(so, &udbinfo);
 	if (error) {
 		INP_INFO_WUNLOCK(&udbinfo);
-		return error;
-	}
-	error = in_pcballoc(so, &udbinfo, "udpinp");
-	if (error) {
-		INP_INFO_WUNLOCK(&udbinfo);
-		return error;
+		return (error);
 	}
 
 	inp = (struct inpcb *)so->so_pcb;
-	INP_LOCK(inp);
 	INP_INFO_WUNLOCK(&udbinfo);
 	inp->inp_vflag |= INP_IPV4;
 	inp->inp_ip_ttl = ip_defttl;
 	INP_UNLOCK(inp);
-	return 0;
+	return (0);
 }
 
 static int
@@ -985,17 +1032,32 @@
 	struct inpcb *inp;
 	int error;
 
-	INP_INFO_WLOCK(&udbinfo);
 	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_WUNLOCK(&udbinfo);
-		return EINVAL;
-	}
+	KASSERT(inp != NULL, ("udp_bind: inp == NULL"));
+	INP_INFO_WLOCK(&udbinfo);
 	INP_LOCK(inp);
 	error = in_pcbbind(inp, nam, td->td_ucred);
 	INP_UNLOCK(inp);
 	INP_INFO_WUNLOCK(&udbinfo);
-	return error;
+	return (error);
+}
+
+static void
+udp_close(struct socket *so)
+{
+	struct inpcb *inp;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("udp_close: inp == NULL"));
+	INP_INFO_WLOCK(&udbinfo);
+	INP_LOCK(inp);
+	if (inp->inp_faddr.s_addr != INADDR_ANY) {
+		in_pcbdisconnect(inp);
+		inp->inp_laddr.s_addr = INADDR_ANY;
+		soisdisconnected(so);
+	}
+	INP_UNLOCK(inp);
+	INP_INFO_WUNLOCK(&udbinfo);
 }
 
 static int
@@ -1005,17 +1067,14 @@
 	int error;
 	struct sockaddr_in *sin;
 
-	INP_INFO_WLOCK(&udbinfo);
 	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_WUNLOCK(&udbinfo);
-		return EINVAL;
-	}
+	KASSERT(inp != NULL, ("udp_connect: inp == NULL"));
+	INP_INFO_WLOCK(&udbinfo);
 	INP_LOCK(inp);
 	if (inp->inp_faddr.s_addr != INADDR_ANY) {
 		INP_UNLOCK(inp);
 		INP_INFO_WUNLOCK(&udbinfo);
-		return EISCONN;
+		return (EISCONN);
 	}
 	sin = (struct sockaddr_in *)nam;
 	if (jailed(td->td_ucred))
@@ -1025,24 +1084,23 @@
 		soisconnected(so);
 	INP_UNLOCK(inp);
 	INP_INFO_WUNLOCK(&udbinfo);
-	return error;
+	return (error);
 }
 
-static int
+static void
 udp_detach(struct socket *so)
 {
 	struct inpcb *inp;
 
-	INP_INFO_WLOCK(&udbinfo);
 	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_WUNLOCK(&udbinfo);
-		return EINVAL;
-	}
+	KASSERT(inp != NULL, ("udp_detach: inp == NULL"));
+	KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
+	    ("udp_detach: not disconnected"));
+	INP_INFO_WLOCK(&udbinfo);
 	INP_LOCK(inp);
 	in_pcbdetach(inp);
+	in_pcbfree(inp);
 	INP_INFO_WUNLOCK(&udbinfo);
-	return 0;
 }
 
 static int
@@ -1050,35 +1108,35 @@
 {
 	struct inpcb *inp;
 
-	INP_INFO_WLOCK(&udbinfo);
 	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_WUNLOCK(&udbinfo);
-		return EINVAL;
-	}
+	KASSERT(inp != NULL, ("udp_disconnect: inp == NULL"));
+	INP_INFO_WLOCK(&udbinfo);
 	INP_LOCK(inp);
 	if (inp->inp_faddr.s_addr == INADDR_ANY) {
 		INP_INFO_WUNLOCK(&udbinfo);
 		INP_UNLOCK(inp);
-		return ENOTCONN;
+		return (ENOTCONN);
 	}
 
 	in_pcbdisconnect(inp);
 	inp->inp_laddr.s_addr = INADDR_ANY;
+	SOCK_LOCK(so);
+	so->so_state &= ~SS_ISCONNECTED;		/* XXX */
+	SOCK_UNLOCK(so);
 	INP_UNLOCK(inp);
 	INP_INFO_WUNLOCK(&udbinfo);
-	so->so_state &= ~SS_ISCONNECTED;		/* XXX */
-	return 0;
+	return (0);
 }
 
 static int
 udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
-	    struct mbuf *control, struct thread *td)
+    struct mbuf *control, struct thread *td)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
-	return udp_output(inp, m, addr, control, td);
+	KASSERT(inp != NULL, ("udp_send: inp == NULL"));
+	return (udp_output(inp, m, addr, control, td));
 }
 
 int
@@ -1086,38 +1144,12 @@
 {
 	struct inpcb *inp;
 
-	INP_INFO_RLOCK(&udbinfo);
 	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_RUNLOCK(&udbinfo);
-		return EINVAL;
-	}
+	KASSERT(inp != NULL, ("udp_shutdown: inp == NULL"));
 	INP_LOCK(inp);
-	INP_INFO_RUNLOCK(&udbinfo);
 	socantsendmore(so);
 	INP_UNLOCK(inp);
-	return 0;
-}
-
-/*
- * This is the wrapper function for in_setsockaddr.  We just pass down
- * the pcbinfo for in_setsockaddr to lock.  We don't want to do the locking
- * here because in_setsockaddr will call malloc and might block.
- */
-static int
-udp_sockaddr(struct socket *so, struct sockaddr **nam)
-{
-	return (in_setsockaddr(so, nam, &udbinfo));
-}
-
-/*
- * This is the wrapper function for in_setpeeraddr.  We just pass down
- * the pcbinfo for in_setpeeraddr to lock.
- */
-static int
-udp_peeraddr(struct socket *so, struct sockaddr **nam)
-{
-	return (in_setpeeraddr(so, nam, &udbinfo));
+	return (0);
 }
 
 struct pr_usrreqs udp_usrreqs = {
@@ -1128,9 +1160,11 @@
 	.pru_control =		in_control,
 	.pru_detach =		udp_detach,
 	.pru_disconnect =	udp_disconnect,
-	.pru_peeraddr =		udp_peeraddr,
+	.pru_peeraddr =		in_getpeeraddr,
 	.pru_send =		udp_send,
+	.pru_sosend =		sosend_dgram,
 	.pru_shutdown =		udp_shutdown,
-	.pru_sockaddr =		udp_sockaddr,
-	.pru_sosetlabel =	in_pcbsosetlabel
+	.pru_sockaddr =		in_getsockaddr,
+	.pru_sosetlabel =	in_pcbsosetlabel,
+	.pru_close =		udp_close,
 };
--- /dev/null
+++ sys/netinet/tcp_reass.c
@@ -0,0 +1,285 @@
+/*-
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_input.c	8.12 (Berkeley) 5/24/95
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/tcp_reass.c,v 1.353 2007/10/07 20:44:24 silby Exp $");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+#include "opt_tcpdebug.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/systm.h>
+
+#include <vm/uma.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_options.h>
+#include <netinet/ip6.h>
+#include <netinet6/in6_pcb.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/nd6.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet6/tcp6_var.h>
+#include <netinet/tcpip.h>
+#ifdef TCPDEBUG
+#include <netinet/tcp_debug.h>
+#endif /* TCPDEBUG */
+
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
+    "TCP Segment Reassembly Queue");
+
+static int tcp_reass_maxseg = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN,
+    &tcp_reass_maxseg, 0,
+    "Global maximum number of TCP Segments in Reassembly Queue");
+
+int tcp_reass_qsize = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_RD,
+    &tcp_reass_qsize, 0,
+    "Global number of TCP Segments currently in Reassembly Queue");
+
+static int tcp_reass_maxqlen = 48;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxqlen, CTLFLAG_RW,
+    &tcp_reass_maxqlen, 0,
+    "Maximum number of TCP Segments per individual Reassembly Queue");
+
+static int tcp_reass_overflows = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD,
+    &tcp_reass_overflows, 0,
+    "Global number of TCP Segment Reassembly Queue Overflows");
+
+/* Initialize TCP reassembly queue */
+static void
+tcp_reass_zone_change(void *tag)
+{
+
+	tcp_reass_maxseg = nmbclusters / 16;
+	uma_zone_set_max(tcp_reass_zone, tcp_reass_maxseg);
+}
+
+uma_zone_t	tcp_reass_zone;
+
+void
+tcp_reass_init(void)
+{
+
+	tcp_reass_maxseg = nmbclusters / 16;
+	TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments",
+	    &tcp_reass_maxseg);
+	tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent),
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+	uma_zone_set_max(tcp_reass_zone, tcp_reass_maxseg);
+	EVENTHANDLER_REGISTER(nmbclusters_change,
+	    tcp_reass_zone_change, NULL, EVENTHANDLER_PRI_ANY);
+}
+
+int
+tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
+{
+	struct tseg_qent *q;
+	struct tseg_qent *p = NULL;
+	struct tseg_qent *nq;
+	struct tseg_qent *te = NULL;
+	struct socket *so = tp->t_inpcb->inp_socket;
+	int flags;
+
+	INP_LOCK_ASSERT(tp->t_inpcb);
+
+	/*
+	 * XXX: tcp_reass() is rather inefficient with its data structures
+	 * and should be rewritten (see NetBSD for optimizations).  While
+	 * doing that it should move to its own file tcp_reass.c.
+	 */
+
+	/*
+	 * Call with th==NULL after become established to
+	 * force pre-ESTABLISHED data up to user socket.
+	 */
+	if (th == NULL)
+		goto present;
+
+	/*
+	 * Limit the number of segments in the reassembly queue to prevent
+	 * holding on to too many segments (and thus running out of mbufs).
+	 * Make sure to let the missing segment through which caused this
+	 * queue.  Always keep one global queue entry spare to be able to
+	 * process the missing segment.
+	 */
+	if (th->th_seq != tp->rcv_nxt &&
+	    (tcp_reass_qsize + 1 >= tcp_reass_maxseg ||
+	     tp->t_segqlen >= tcp_reass_maxqlen)) {
+		tcp_reass_overflows++;
+		tcpstat.tcps_rcvmemdrop++;
+		m_freem(m);
+		*tlenp = 0;
+		return (0);
+	}
+
+	/*
+	 * Allocate a new queue entry. If we can't, or hit the zone limit
+	 * just drop the pkt.
+	 */
+	te = uma_zalloc(tcp_reass_zone, M_NOWAIT);
+	if (te == NULL) {
+		tcpstat.tcps_rcvmemdrop++;
+		m_freem(m);
+		*tlenp = 0;
+		return (0);
+	}
+	tp->t_segqlen++;
+	tcp_reass_qsize++;
+
+	/*
+	 * Find a segment which begins after this one does.
+	 */
+	LIST_FOREACH(q, &tp->t_segq, tqe_q) {
+		if (SEQ_GT(q->tqe_th->th_seq, th->th_seq))
+			break;
+		p = q;
+	}
+
+	/*
+	 * If there is a preceding segment, it may provide some of
+	 * our data already.  If so, drop the data from the incoming
+	 * segment.  If it provides all of our data, drop us.
+	 */
+	if (p != NULL) {
+		int i;
+		/* conversion to int (in i) handles seq wraparound */
+		i = p->tqe_th->th_seq + p->tqe_len - th->th_seq;
+		if (i > 0) {
+			if (i >= *tlenp) {
+				tcpstat.tcps_rcvduppack++;
+				tcpstat.tcps_rcvdupbyte += *tlenp;
+				m_freem(m);
+				uma_zfree(tcp_reass_zone, te);
+				tp->t_segqlen--;
+				tcp_reass_qsize--;
+				/*
+				 * Try to present any queued data
+				 * at the left window edge to the user.
+				 * This is needed after the 3-WHS
+				 * completes.
+				 */
+				goto present;	/* ??? */
+			}
+			m_adj(m, i);
+			*tlenp -= i;
+			th->th_seq += i;
+		}
+	}
+	tcpstat.tcps_rcvoopack++;
+	tcpstat.tcps_rcvoobyte += *tlenp;
+
+	/*
+	 * While we overlap succeeding segments trim them or,
+	 * if they are completely covered, dequeue them.
+	 */
+	while (q) {
+		int i = (th->th_seq + *tlenp) - q->tqe_th->th_seq;
+		if (i <= 0)
+			break;
+		if (i < q->tqe_len) {
+			q->tqe_th->th_seq += i;
+			q->tqe_len -= i;
+			m_adj(q->tqe_m, i);
+			break;
+		}
+
+		nq = LIST_NEXT(q, tqe_q);
+		LIST_REMOVE(q, tqe_q);
+		m_freem(q->tqe_m);
+		uma_zfree(tcp_reass_zone, q);
+		tp->t_segqlen--;
+		tcp_reass_qsize--;
+		q = nq;
+	}
+
+	/* Insert the new segment queue entry into place. */
+	te->tqe_m = m;
+	te->tqe_th = th;
+	te->tqe_len = *tlenp;
+
+	if (p == NULL) {
+		LIST_INSERT_HEAD(&tp->t_segq, te, tqe_q);
+	} else {
+		LIST_INSERT_AFTER(p, te, tqe_q);
+	}
+
+present:
+	/*
+	 * Present data to user, advancing rcv_nxt through
+	 * completed sequence space.
+	 */
+	if (!TCPS_HAVEESTABLISHED(tp->t_state))
+		return (0);
+	q = LIST_FIRST(&tp->t_segq);
+	if (!q || q->tqe_th->th_seq != tp->rcv_nxt)
+		return (0);
+	SOCKBUF_LOCK(&so->so_rcv);
+	do {
+		tp->rcv_nxt += q->tqe_len;
+		flags = q->tqe_th->th_flags & TH_FIN;
+		nq = LIST_NEXT(q, tqe_q);
+		LIST_REMOVE(q, tqe_q);
+		if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
+			m_freem(q->tqe_m);
+		else
+			sbappendstream_locked(&so->so_rcv, q->tqe_m);
+		uma_zfree(tcp_reass_zone, q);
+		tp->t_segqlen--;
+		tcp_reass_qsize--;
+		q = nq;
+	} while (q && q->tqe_th->th_seq == tp->rcv_nxt);
+	ND6_HINT(tp);
+	sorwakeup_locked(so);
+	return (flags);
+}
--- /dev/null
+++ sys/netinet/sctp_peeloff.c
@@ -0,0 +1,236 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+/* $KAME: sctp_peeloff.c,v 1.13 2005/03/06 16:04:18 itojun Exp $	 */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_peeloff.c,v 1.16.4.1 2008/02/02 12:44:13 rwatson Exp $");
+#include <netinet/sctp_os.h>
+#include <netinet/sctp_pcb.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_var.h>
+#include <netinet/sctp_var.h>
+#include <netinet/sctp_sysctl.h>
+#include <netinet/sctp.h>
+#include <netinet/sctp_uio.h>
+#include <netinet/sctp_peeloff.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_auth.h>
+
+
+int
+sctp_can_peel_off(struct socket *head, sctp_assoc_t assoc_id)
+{
+	struct sctp_inpcb *inp;
+	struct sctp_tcb *stcb;
+	uint32_t state;
+
+	inp = (struct sctp_inpcb *)head->so_pcb;
+	if (inp == NULL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, EFAULT);
+		return (EFAULT);
+	}
+	stcb = sctp_findassociation_ep_asocid(inp, assoc_id, 1);
+	if (stcb == NULL) {
+		SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_PEELOFF, ENOENT);
+		return (ENOENT);
+	}
+	state = SCTP_GET_STATE((&stcb->asoc));
+	if ((state == SCTP_STATE_EMPTY) ||
+	    (state == SCTP_STATE_INUSE) ||
+	    (state == SCTP_STATE_COOKIE_WAIT) ||
+	    (state == SCTP_STATE_COOKIE_ECHOED)) {
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN);
+		return (ENOTCONN);
+	}
+	SCTP_TCB_UNLOCK(stcb);
+	/* We are clear to peel this one off */
+	return (0);
+}
+
+int
+sctp_do_peeloff(struct socket *head, struct socket *so, sctp_assoc_t assoc_id)
+{
+	struct sctp_inpcb *inp, *n_inp;
+	struct sctp_tcb *stcb;
+	uint32_t state;
+
+	inp = (struct sctp_inpcb *)head->so_pcb;
+	if (inp == NULL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, EFAULT);
+		return (EFAULT);
+	}
+	stcb = sctp_findassociation_ep_asocid(inp, assoc_id, 1);
+	if (stcb == NULL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN);
+		return (ENOTCONN);
+	}
+	state = SCTP_GET_STATE((&stcb->asoc));
+	if ((state == SCTP_STATE_EMPTY) ||
+	    (state == SCTP_STATE_INUSE) ||
+	    (state == SCTP_STATE_COOKIE_WAIT) ||
+	    (state == SCTP_STATE_COOKIE_ECHOED)) {
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN);
+		return (ENOTCONN);
+	}
+	n_inp = (struct sctp_inpcb *)so->so_pcb;
+	n_inp->sctp_flags = (SCTP_PCB_FLAGS_UDPTYPE |
+	    SCTP_PCB_FLAGS_CONNECTED |
+	    SCTP_PCB_FLAGS_IN_TCPPOOL |	/* Turn on Blocking IO */
+	    (SCTP_PCB_COPY_FLAGS & inp->sctp_flags));
+	n_inp->sctp_socket = so;
+	n_inp->sctp_features = inp->sctp_features;
+	n_inp->sctp_mobility_features = inp->sctp_mobility_features;
+	n_inp->sctp_frag_point = inp->sctp_frag_point;
+	n_inp->partial_delivery_point = inp->partial_delivery_point;
+	n_inp->sctp_context = inp->sctp_context;
+	n_inp->inp_starting_point_for_iterator = NULL;
+	/* copy in the authentication parameters from the original endpoint */
+	if (n_inp->sctp_ep.local_hmacs)
+		sctp_free_hmaclist(n_inp->sctp_ep.local_hmacs);
+	n_inp->sctp_ep.local_hmacs =
+	    sctp_copy_hmaclist(inp->sctp_ep.local_hmacs);
+	if (n_inp->sctp_ep.local_auth_chunks)
+		sctp_free_chunklist(n_inp->sctp_ep.local_auth_chunks);
+	n_inp->sctp_ep.local_auth_chunks =
+	    sctp_copy_chunklist(inp->sctp_ep.local_auth_chunks);
+	(void)sctp_copy_skeylist(&inp->sctp_ep.shared_keys,
+	    &n_inp->sctp_ep.shared_keys);
+	/*
+	 * Now we must move it from one hash table to another and get the
+	 * stcb in the right place.
+	 */
+	sctp_move_pcb_and_assoc(inp, n_inp, stcb);
+	atomic_add_int(&stcb->asoc.refcnt, 1);
+	SCTP_TCB_UNLOCK(stcb);
+
+	sctp_pull_off_control_to_new_inp(inp, n_inp, stcb, SBL_WAIT);
+	atomic_subtract_int(&stcb->asoc.refcnt, 1);
+
+	return (0);
+}
+
+
+struct socket *
+sctp_get_peeloff(struct socket *head, sctp_assoc_t assoc_id, int *error)
+{
+	struct socket *newso;
+	struct sctp_inpcb *inp, *n_inp;
+	struct sctp_tcb *stcb;
+
+	SCTPDBG(SCTP_DEBUG_PEEL1, "SCTP peel-off called\n");
+	inp = (struct sctp_inpcb *)head->so_pcb;
+	if (inp == NULL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, EFAULT);
+		*error = EFAULT;
+		return (NULL);
+	}
+	stcb = sctp_findassociation_ep_asocid(inp, assoc_id, 1);
+	if (stcb == NULL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN);
+		*error = ENOTCONN;
+		return (NULL);
+	}
+	atomic_add_int(&stcb->asoc.refcnt, 1);
+	SCTP_TCB_UNLOCK(stcb);
+	newso = sonewconn(head, SS_ISCONNECTED
+	    );
+	if (newso == NULL) {
+		SCTPDBG(SCTP_DEBUG_PEEL1, "sctp_peeloff:sonewconn failed\n");
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_PEELOFF, ENOMEM);
+		*error = ENOMEM;
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+		return (NULL);
+
+	}
+	SCTP_TCB_LOCK(stcb);
+	atomic_subtract_int(&stcb->asoc.refcnt, 1);
+	n_inp = (struct sctp_inpcb *)newso->so_pcb;
+	SOCK_LOCK(head);
+	n_inp->sctp_flags = (SCTP_PCB_FLAGS_UDPTYPE |
+	    SCTP_PCB_FLAGS_CONNECTED |
+	    SCTP_PCB_FLAGS_IN_TCPPOOL |	/* Turn on Blocking IO */
+	    (SCTP_PCB_COPY_FLAGS & inp->sctp_flags));
+	n_inp->sctp_features = inp->sctp_features;
+	n_inp->sctp_frag_point = inp->sctp_frag_point;
+	n_inp->partial_delivery_point = inp->partial_delivery_point;
+	n_inp->sctp_context = inp->sctp_context;
+	n_inp->inp_starting_point_for_iterator = NULL;
+
+	/* copy in the authentication parameters from the original endpoint */
+	if (n_inp->sctp_ep.local_hmacs)
+		sctp_free_hmaclist(n_inp->sctp_ep.local_hmacs);
+	n_inp->sctp_ep.local_hmacs =
+	    sctp_copy_hmaclist(inp->sctp_ep.local_hmacs);
+	if (n_inp->sctp_ep.local_auth_chunks)
+		sctp_free_chunklist(n_inp->sctp_ep.local_auth_chunks);
+	n_inp->sctp_ep.local_auth_chunks =
+	    sctp_copy_chunklist(inp->sctp_ep.local_auth_chunks);
+	(void)sctp_copy_skeylist(&inp->sctp_ep.shared_keys,
+	    &n_inp->sctp_ep.shared_keys);
+
+	n_inp->sctp_socket = newso;
+	if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE)) {
+		sctp_feature_off(n_inp, SCTP_PCB_FLAGS_AUTOCLOSE);
+		n_inp->sctp_ep.auto_close_time = 0;
+		sctp_timer_stop(SCTP_TIMER_TYPE_AUTOCLOSE, n_inp, stcb, NULL,
+		    SCTP_FROM_SCTP_PEELOFF + SCTP_LOC_1);
+	}
+	/* Turn off any non-blocking semantic. */
+	SCTP_CLEAR_SO_NBIO(newso);
+	newso->so_state |= SS_ISCONNECTED;
+	/* We remove it right away */
+
+#ifdef SCTP_LOCK_LOGGING
+	if (sctp_logging_level & SCTP_LOCK_LOGGING_ENABLE) {
+		sctp_log_lock(inp, (struct sctp_tcb *)NULL, SCTP_LOG_LOCK_SOCK);
+	}
+#endif
+	TAILQ_REMOVE(&head->so_comp, newso, so_list);
+	head->so_qlen--;
+	SOCK_UNLOCK(head);
+	/*
+	 * Now we must move it from one hash table to another and get the
+	 * stcb in the right place.
+	 */
+	sctp_move_pcb_and_assoc(inp, n_inp, stcb);
+	atomic_add_int(&stcb->asoc.refcnt, 1);
+	SCTP_TCB_UNLOCK(stcb);
+	/*
+	 * And now the final hack. We move data in the pending side i.e.
+	 * head to the new socket buffer. Let the GRUBBING begin :-0
+	 */
+	sctp_pull_off_control_to_new_inp(inp, n_inp, stcb, SBL_WAIT);
+	atomic_subtract_int(&stcb->asoc.refcnt, 1);
+	return (newso);
+}
--- /dev/null
+++ sys/netinet/sctp_os.h
@@ -0,0 +1,72 @@
+/*-
+ * Copyright (c) 2006-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_os.h,v 1.9 2007/08/24 00:53:52 rrs Exp $");
+#ifndef __sctp_os_h__
+#define __sctp_os_h__
+
+/*
+ * General kernel memory allocation:
+ *  SCTP_MALLOC(element, type, size, name)
+ *  SCTP_FREE(element)
+ * Kernel memory allocation for "soname"- memory must be zeroed.
+ *  SCTP_MALLOC_SONAME(name, type, size)
+ *  SCTP_FREE_SONAME(name)
+ */
+
+/*
+ * Zone(pool) allocation routines: MUST be defined for each OS.
+ *  zone = zone/pool pointer.
+ *  name = string name of the zone/pool.
+ *  size = size of each zone/pool element.
+ *  number = number of elements in zone/pool.
+ *  type = structure type to allocate
+ *
+ * sctp_zone_t
+ * SCTP_ZONE_INIT(zone, name, size, number)
+ * SCTP_ZONE_GET(zone, type)
+ * SCTP_ZONE_FREE(zone, element)
+ * SCTP_ZONE_DESTROY(zone)
+ */
+
+#include <netinet/sctp_os_bsd.h>
+
+
+
+
+
+/* All os's must implement this address gatherer. If
+ * no VRF's exist, then vrf 0 is the only one and all
+ * addresses and ifn's live here.
+ */
+#define SCTP_DEFAULT_VRF 0
+void sctp_init_vrf_list(int vrfid);
+
+#endif
--- /dev/null
+++ sys/netinet/ip_options.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.
+ * Copyright (c) 2005 Andre Oppermann, Internet Business Solutions AG.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/netinet/ip_options.h,v 1.3 2007/05/11 10:48:30 rwatson Exp $
+ */
+
+#ifndef _NETINET_IP_OPTIONS_H_
+#define	_NETINET_IP_OPTIONS_H_
+
+struct ipoptrt {
+        struct  in_addr dst;                    /* final destination */
+        char    nop;                            /* one NOP to align */
+        char    srcopt[IPOPT_OFFSET + 1];       /* OPTVAL, OLEN and OFFSET */
+        struct  in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
+};
+
+struct ipopt_tag {
+	struct	m_tag tag;			/* m_tag */
+	int	ip_nhops;
+	struct	ipoptrt ip_srcrt;
+};
+
+extern	int	ip_doopts;		/* process or ignore IP options */
+
+int		 ip_dooptions(struct mbuf *, int);
+struct mbuf	*ip_insertoptions(struct mbuf *, struct mbuf *, int *);
+int		 ip_optcopy(struct ip *, struct ip *);
+int		 ip_pcbopts(struct inpcb *, int, struct mbuf *);
+void		 ip_stripoptions(struct mbuf *, struct mbuf *);
+struct mbuf	*ip_srcroute(struct mbuf *);
+
+#endif /* !_NETINET_IP_OPTIONS_H_ */
Index: igmp.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/igmp.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/igmp.c -L sys/netinet/igmp.c -u -r1.1.1.1 -r1.2
--- sys/netinet/igmp.c
+++ sys/netinet/igmp.c
@@ -31,7 +31,6 @@
  * SUCH DAMAGE.
  *
  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
- * $FreeBSD: src/sys/netinet/igmp.c,v 1.48.2.1 2005/08/24 17:30:44 rwatson Exp $
  */
 
 /*
@@ -45,11 +44,13 @@
  * MULTICAST Revision: 3.5.1.4
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/igmp.c,v 1.54 2007/10/07 20:44:22 silby Exp $");
+
 #include "opt_mac.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
@@ -65,11 +66,14 @@
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
+#include <netinet/ip_options.h>
 #include <netinet/igmp.h>
 #include <netinet/igmp_var.h>
 
 #include <machine/in_cksum.h>
 
+#include <security/mac/mac_framework.h>
+
 static MALLOC_DEFINE(M_IGMP, "igmp", "igmp state");
 
 static struct router_info	*find_rti(struct ifnet *ifp);
@@ -81,11 +85,11 @@
     igmpstat, "");
 
 /*
- * igmp_mtx protects all mutable global variables in igmp.c, as well as
- * the data fields in struct router_info.  In general, a router_info
- * structure will be valid as long as the referencing struct in_multi is
- * valid, so no reference counting is used.  We allow unlocked reads of
- * router_info data when accessed via an in_multi read-only.
+ * igmp_mtx protects all mutable global variables in igmp.c, as well as the
+ * data fields in struct router_info.  In general, a router_info structure
+ * will be valid as long as the referencing struct in_multi is valid, so no
+ * reference counting is used.  We allow unlocked reads of router_info data
+ * when accessed via an in_multi read-only.
  */
 static struct mtx igmp_mtx;
 static SLIST_HEAD(, router_info) router_info_head;
@@ -122,7 +126,7 @@
 	igmp_timers_are_running = 0;
 
 	/*
-	 * Construct a Router Alert option to use in outgoing packets
+	 * Construct a Router Alert option to use in outgoing packets.
 	 */
 	MGET(router_alert, M_DONTWAIT, MT_DATA);
 	ra = mtod(router_alert, struct ipoption *);
@@ -148,21 +152,20 @@
 		if (rti->rti_ifp == ifp) {
 			IGMP_PRINTF(
 			    "[igmp.c, _find_rti] --> found old entry \n");
-			return rti;
+			return (rti);
 		}
 	}
 	MALLOC(rti, struct router_info *, sizeof *rti, M_IGMP, M_NOWAIT);
 	if (rti == NULL) {
-		IGMP_PRINTF( "[igmp.c, _find_rti] --> no memory for entry\n");
-		return NULL;
+		IGMP_PRINTF("[igmp.c, _find_rti] --> no memory for entry\n");
+		return (NULL);
 	}
 	rti->rti_ifp = ifp;
 	rti->rti_type = IGMP_V2_ROUTER;
 	rti->rti_time = 0;
 	SLIST_INSERT_HEAD(&router_info_head, rti, rti_list);
-
 	IGMP_PRINTF("[igmp.c, _find_rti] --> created an entry \n");
-	return rti;
+	return (rti);
 }
 
 void
@@ -186,7 +189,7 @@
 	igmplen = ip->ip_len;
 
 	/*
-	 * Validate lengths
+	 * Validate lengths.
 	 */
 	if (igmplen < IGMP_MINLEN) {
 		++igmpstat.igps_rcv_tooshort;
@@ -201,7 +204,7 @@
 	}
 
 	/*
-	 * Validate checksum
+	 * Validate checksum.
 	 */
 	m->m_data += iphlen;
 	m->m_len -= iphlen;
@@ -223,12 +226,12 @@
 	 * In the IGMPv2 specification, there are 3 states and a flag.
 	 *
 	 * In Non-Member state, we simply don't have a membership record.
-	 * In Delaying Member state, our timer is running (inm->inm_timer)
-	 * In Idle Member state, our timer is not running (inm->inm_timer==0)
+	 * In Delaying Member state, our timer is running (inm->inm_timer).
+	 * In Idle Member state, our timer is not running (inm->inm_timer==0).
 	 *
-	 * The flag is inm->inm_state, it is set to IGMP_OTHERMEMBER if
-	 * we have heard a report from another member, or IGMP_IREPORTEDLAST
-	 * if I sent the last report.
+	 * The flag is inm->inm_state, it is set to IGMP_OTHERMEMBER if we
+	 * have heard a report from another member, or IGMP_IREPORTEDLAST if
+	 * I sent the last report.
 	 */
 	switch (igmp->igmp_type) {
 	case IGMP_MEMBERSHIP_QUERY:
@@ -240,8 +243,8 @@
 		if (igmp->igmp_code == 0) {
 			/*
 			 * Old router.  Remember that the querier on this
-			 * interface is old, and set the timer to the
-			 * value in RFC 1112.
+			 * interface is old, and set the timer to the value
+			 * in RFC 1112.
 			 */
 
 			mtx_lock(&igmp_mtx);
@@ -277,14 +280,14 @@
 		}
 
 		/*
-		 * - Start the timers in all of our membership records
-		 *   that the query applies to for the interface on
-		 *   which the query arrived excl. those that belong
-		 *   to the "all-hosts" group (224.0.0.1).
-		 * - Restart any timer that is already running but has
-		 *   a value longer than the requested timeout.
-		 * - Use the value specified in the query message as
-		 *   the maximum timeout.
+		 * - Start the timers in all of our membership records that
+		 *   the query applies to for the interface on which the
+		 *   query arrived excl. those that belong to the "all-hosts"
+		 *   group (224.0.0.1).
+		 * - Restart any timer that is already running but has a
+		 *   value longer than the requested timeout.
+		 * - Use the value specified in the query message as the
+		 *   maximum timeout.
 		 */
 		IN_MULTI_LOCK();
 		IN_FIRST_MULTI(step, inm);
@@ -303,19 +306,19 @@
 			IN_NEXT_MULTI(step, inm);
 		}
 		IN_MULTI_UNLOCK();
-
 		break;
 
 	case IGMP_V1_MEMBERSHIP_REPORT:
 	case IGMP_V2_MEMBERSHIP_REPORT:
 		/*
 		 * For fast leave to work, we have to know that we are the
-		 * last person to send a report for this group.  Reports
-		 * can potentially get looped back if we are a multicast
-		 * router, so discard reports sourced by me.
+		 * last person to send a report for this group.  Reports can
+		 * potentially get looped back if we are a multicast router,
+		 * so discard reports sourced by me.
 		 */
 		IFP_TO_IA(ifp, ia);
-		if (ia && ip->ip_src.s_addr == IA_SIN(ia)->sin_addr.s_addr)
+		if (ia != NULL &&
+		    ip->ip_src.s_addr == IA_SIN(ia)->sin_addr.s_addr)
 			break;
 
 		++igmpstat.igps_rcv_reports;
@@ -338,29 +341,29 @@
 		 * to compensate for the lack of any way for a process to
 		 * determine the arrival interface of an incoming packet.
 		 */
-		if ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) == 0)
-			if (ia) ip->ip_src.s_addr = htonl(ia->ia_subnet);
+		if ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) == 0) {
+			if (ia != NULL)
+				ip->ip_src.s_addr = htonl(ia->ia_subnet);
+		}
 
 		/*
-		 * If we belong to the group being reported, stop
-		 * our timer for that group.
+		 * If we belong to the group being reported, stop our timer
+		 * for that group.
 		 */
 		IN_MULTI_LOCK();
 		IN_LOOKUP_MULTI(igmp->igmp_group, ifp, inm);
 		if (inm != NULL) {
 			inm->inm_timer = 0;
 			++igmpstat.igps_rcv_ourreports;
-
 			inm->inm_state = IGMP_OTHERMEMBER;
 		}
 		IN_MULTI_UNLOCK();
-
 		break;
 	}
 
 	/*
-	 * Pass all valid IGMP packets up to any process(es) listening
-	 * on a raw IGMP socket.
+	 * Pass all valid IGMP packets up to any process(es) listening on a
+	 * raw IGMP socket.
 	 */
 	rip_input(m, off);
 }
@@ -410,8 +413,8 @@
 	struct in_multistep step;
 
 	/*
-	 * Quick check to see if any work needs to be done, in order
-	 * to minimize the overhead of fasttimo processing.
+	 * Quick check to see if any work needs to be done, in order to
+	 * minimize the overhead of fasttimo processing.
 	 */
 
 	if (!igmp_timers_are_running)
@@ -462,7 +465,7 @@
 
 	IN_MULTI_LOCK_ASSERT();
 
-	MGETHDR(m, M_DONTWAIT, MT_HEADER);
+	MGETHDR(m, M_DONTWAIT, MT_DATA);
 	if (m == NULL)
 		return;
 
@@ -501,8 +504,7 @@
 	imo.imo_multicast_loop = (ip_mrouter != NULL);
 
 	/*
-	 * XXX
-	 * Do we have to worry about reentrancy here?  Don't think so.
+	 * XXX: Do we have to worry about reentrancy here?  Don't think so.
 	 */
 	ip_output(m, router_alert, &igmprt, 0, &imo, NULL);
 
Index: if_ether.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/if_ether.c,v
retrieving revision 1.7
retrieving revision 1.8
diff -L sys/netinet/if_ether.c -L sys/netinet/if_ether.c -u -r1.7 -r1.8
--- sys/netinet/if_ether.c
+++ sys/netinet/if_ether.c
@@ -27,7 +27,6 @@
  * SUCH DAMAGE.
  *
  *	@(#)if_ether.c	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/if_ether.c,v 1.137.2.12 2006/03/22 07:48:31 glebius Exp $
  */
 
 /*
@@ -36,8 +35,10 @@
  *	add "inuse/lock" bit (or ref. count) along with valid bit
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/if_ether.c,v 1.162 2007/10/07 20:44:22 silby Exp $");
+
 #include "opt_inet.h"
-#include "opt_bdg.h"
 #include "opt_mac.h"
 #include "opt_carp.h"
 
@@ -46,7 +47,6 @@
 #include <sys/queue.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
-#include <sys/mac.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/socket.h>
@@ -59,7 +59,6 @@
 #include <net/netisr.h>
 #include <net/if_llc.h>
 #include <net/ethernet.h>
-#include <net/bridge.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
@@ -72,6 +71,8 @@
 #include <netinet/ip_carp.h>
 #endif
 
+#include <security/mac/mac_framework.h>
+
 #define SIN(s) ((struct sockaddr_in *)s)
 #define SDL(s) ((struct sockaddr_dl *)s)
 
@@ -79,40 +80,34 @@
 SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, "");
 
 /* timer values */
-static int arpt_prune = (5*60*1); /* walk list every 5 minutes */
 static int arpt_keep = (20*60); /* once resolved, good for 20 more minutes */
 
-SYSCTL_INT(_net_link_ether_inet, OID_AUTO, prune_intvl, CTLFLAG_RW,
-	   &arpt_prune, 0, "");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_RW, 
-	   &arpt_keep, 0, "");
+	   &arpt_keep, 0, "ARP entry lifetime in seconds");
 
 #define	rt_expire rt_rmx.rmx_expire
 
 struct llinfo_arp {
-	LIST_ENTRY(llinfo_arp) la_le;
+	struct	callout la_timer;
 	struct	rtentry *la_rt;
 	struct	mbuf *la_hold;	/* last packet until resolved/timeout */
 	u_short	la_preempt;	/* countdown for pre-expiry arps */
 	u_short	la_asked;	/* # requests sent */
 };
 
-static	LIST_HEAD(, llinfo_arp) llinfo_arp;
-
 static struct	ifqueue arpintrq;
 static int	arp_allocated;
 
 static int	arp_maxtries = 5;
 static int	useloopback = 1; /* use loopback interface for local traffic */
 static int	arp_proxyall = 0;
-static struct callout arp_callout;
 
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxtries, CTLFLAG_RW,
-	   &arp_maxtries, 0, "");
+	   &arp_maxtries, 0, "ARP resolution attempts before returning error");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, useloopback, CTLFLAG_RW,
-	   &useloopback, 0, "");
+	   &useloopback, 0, "Use the loopback interface for local traffic");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_RW,
-	   &arp_proxyall, 0, "");
+	   &arp_proxyall, 0, "Enable proxy ARP for all suitable requests");
 
 static void	arp_init(void);
 static void	arp_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
@@ -127,53 +122,30 @@
 #endif
 
 /*
- * Timeout routine.  Age arp_tab entries periodically.
+ * Timeout routine.
  */
-/* ARGSUSED */
 static void
-arptimer(void * __unused unused)
+arptimer(void *arg)
 {
-	struct llinfo_arp *la, *ola;
+	struct rtentry *rt = (struct rtentry *)arg;
 
-	RADIX_NODE_HEAD_LOCK(rt_tables[AF_INET]);
-	LIST_FOREACH_SAFE(la, &llinfo_arp, la_le, ola) {
-		struct rtentry *rt = la->la_rt;
-
-		RT_LOCK(rt);
-		if (rt->rt_expire && rt->rt_expire <= time_second) {
-			struct sockaddr_dl *sdl = SDL(rt->rt_gateway);
-
-			KASSERT(sdl->sdl_family == AF_LINK, ("sdl_family %d",
-			    sdl->sdl_family));
-			if (rt->rt_refcnt > 1) {
-				sdl->sdl_alen = 0;
-				la->la_preempt = la->la_asked = 0;
-				RT_UNLOCK(rt);
-				continue;
-			}
-			RT_UNLOCK(rt);
-			/*
-			 * XXX: LIST_REMOVE() is deep inside rtrequest().
-			 */
-			rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt), 0,
-			    NULL);
-			continue;
-		}
-		RT_UNLOCK(rt);
-	}
-	RADIX_NODE_HEAD_UNLOCK(rt_tables[AF_INET]);
+	RT_LOCK_ASSERT(rt);
+	/*
+	 * The lock is needed to close a theoretical race
+	 * between spontaneous expiry and intentional removal.
+	 * We still got an extra reference on rtentry, so can
+	 * safely pass pointers to its contents.
+	 */
+	RT_UNLOCK(rt);
 
-	callout_reset(&arp_callout, arpt_prune * hz, arptimer, NULL);
+	rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt), 0, NULL);
 }
 
 /*
  * Parallel to llc_rtrequest.
  */
 static void
-arp_rtrequest(req, rt, info)
-	int req;
-	struct rtentry *rt;
-	struct rt_addrinfo *info;
+arp_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info)
 {
 	struct sockaddr *gate;
 	struct llinfo_arp *la;
@@ -208,7 +180,7 @@
 			gate = rt->rt_gateway;
 			SDL(gate)->sdl_type = rt->rt_ifp->if_type;
 			SDL(gate)->sdl_index = rt->rt_ifp->if_index;
-			rt->rt_expire = time_second;
+			rt->rt_expire = time_uptime;
 			break;
 		}
 		/* Announce a new entry if requested. */
@@ -252,8 +224,8 @@
 		RT_ADDREF(rt);
 		la->la_rt = rt;
 		rt->rt_flags |= RTF_LLINFO;
-		RADIX_NODE_HEAD_LOCK_ASSERT(rt_tables[AF_INET]);
-		LIST_INSERT_HEAD(&llinfo_arp, la, la_le);
+		callout_init_mtx(&la->la_timer, &rt->rt_mtx,
+		    CALLOUT_RETURNUNLOCKED);
 
 #ifdef INET
 		/*
@@ -296,8 +268,10 @@
 			rt->rt_expire = 0;
 			bcopy(IF_LLADDR(rt->rt_ifp), LLADDR(SDL(gate)),
 			      SDL(gate)->sdl_alen = rt->rt_ifp->if_addrlen);
-			if (useloopback)
+			if (useloopback) {
 				rt->rt_ifp = loif;
+				rt->rt_rmx.rmx_mtu = loif->if_mtu;
+			}
 
 		    /*
 		     * make sure to set rt->rt_ifa to the interface
@@ -314,13 +288,12 @@
 		break;
 
 	case RTM_DELETE:
-		if (la == 0)
+		if (la == NULL)	/* XXX: at least CARP does this. */
 			break;
-		RADIX_NODE_HEAD_LOCK_ASSERT(rt_tables[AF_INET]);
-		LIST_REMOVE(la, la_le);
-		RT_REMREF(rt);
-		rt->rt_llinfo = 0;
+		callout_stop(&la->la_timer);
+		rt->rt_llinfo = NULL;
 		rt->rt_flags &= ~RTF_LLINFO;
+		RT_REMREF(rt);
 		if (la->la_hold)
 			m_freem(la->la_hold);
 		Free((caddr_t)la);
@@ -334,10 +307,8 @@
  *	- arp header source ethernet address
  */
 static void
-arprequest(ifp, sip, tip, enaddr)
-	struct ifnet *ifp;
-	struct in_addr *sip, *tip;
-	u_char *enaddr;
+arprequest(struct ifnet *ifp, struct in_addr *sip, struct in_addr *tip,
+    u_char *enaddr)
 {
 	struct mbuf *m;
 	struct arphdr *ah;
@@ -384,7 +355,7 @@
  */
 int
 arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m,
-	struct sockaddr *dst, u_char *desten)
+    struct sockaddr *dst, u_char *desten)
 {
 	struct llinfo_arp *la = NULL;
 	struct rtentry *rt = NULL;
@@ -438,7 +409,7 @@
 	 * Check the address family and length is valid, the address
 	 * is resolved; otherwise, try to resolve.
 	 */
-	if ((rt->rt_expire == 0 || rt->rt_expire > time_second) &&
+	if ((rt->rt_expire == 0 || rt->rt_expire > time_uptime) &&
 	    sdl->sdl_family == AF_LINK && sdl->sdl_alen != 0) {
 
 		bcopy(LLADDR(sdl), desten, sdl->sdl_alen);
@@ -448,7 +419,7 @@
 		 * send an ARP request.
 		 */
 		if ((rt->rt_expire != 0) &&
-		    (time_second + la->la_preempt > rt->rt_expire)) {
+		    (time_uptime + la->la_preempt > rt->rt_expire)) {
 			struct in_addr sin = 
 			    SIN(rt->rt_ifa->ifa_addr)->sin_addr;
 
@@ -495,11 +466,12 @@
 	else
 		error = (rt == rt0) ? EHOSTDOWN : EHOSTUNREACH;
 
-	if (la->la_asked == 0 || rt->rt_expire != time_second) {
+	if (la->la_asked == 0 || rt->rt_expire != time_uptime) {
 		struct in_addr sin =
 		    SIN(rt->rt_ifa->ifa_addr)->sin_addr;
 
-		rt->rt_expire = time_second;
+		rt->rt_expire = time_uptime;
+		callout_reset(&la->la_timer, hz, arptimer, rt);
 		la->la_asked++;
 		RT_UNLOCK(rt);
 
@@ -587,13 +559,10 @@
 
 
 static void
-in_arpinput(m)
-	struct mbuf *m;
+in_arpinput(struct mbuf *m)
 {
 	struct arphdr *ah;
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
-	struct iso88025_header *th = (struct iso88025_header *)0;
-	struct iso88025_sockaddr_dl_data *trld;
 	struct llinfo_arp *la;
 	struct rtentry *rt;
 	struct ifaddr *ifa;
@@ -610,7 +579,7 @@
 	int carp_match = 0;
 #endif
 
-	if (do_bridge || ifp->if_bridge)
+	if (ifp->if_bridge)
 		bridged = 1;
 
 	req_len = arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr));
@@ -634,7 +603,7 @@
 	 * XXX: This is really ugly!
 	 */
 	LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) {
-		if (((bridged && ia->ia_ifp->if_bridge != NULL) || do_bridge ||
+		if (((bridged && ia->ia_ifp->if_bridge != NULL) ||
 		    (ia->ia_ifp == ifp)) &&
 		    itaddr.s_addr == ia->ia_addr.sin_addr.s_addr)
 			goto match;
@@ -648,7 +617,7 @@
 #endif
 	}
 	LIST_FOREACH(ia, INADDR_HASH(isaddr.s_addr), ia_hash)
-		if (((bridged && ia->ia_ifp->if_bridge != NULL) || do_bridge ||
+		if (((bridged && ia->ia_ifp->if_bridge != NULL) ||
 		    (ia->ia_ifp == ifp)) &&
 		    isaddr.s_addr == ia->ia_addr.sin_addr.s_addr)
 			goto match;
@@ -657,7 +626,7 @@
 	 * as a dummy address for the rest of the function.
 	 */
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
-		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
+		if (ifa->ifa_addr->sa_family == AF_INET) {
 			ia = ifatoia(ifa);
 			goto match;
 		}
@@ -767,6 +736,9 @@
 	 * routing info.
 	 */
 	if (ifp->if_type == IFT_ISO88025) {
+		struct iso88025_header *th = NULL;
+		struct iso88025_sockaddr_dl_data *trld;
+
 		th = (struct iso88025_header *)m->m_pkthdr.header;
 		trld = SDL_ISO88025(sdl);
 		rif_len = TR_RCF_RIFLEN(th->rcf);
@@ -792,8 +764,10 @@
 		m->m_pkthdr.len += 8;
 		th->rcf = trld->trld_rcf;
 	}
-	if (rt->rt_expire)
-		rt->rt_expire = time_second + arpt_keep;
+	if (rt->rt_expire) {
+		rt->rt_expire = time_uptime + arpt_keep;
+		callout_reset(&la->la_timer, hz * arpt_keep, arptimer, rt);
+	}
 	la->la_asked = 0;
 	la->la_preempt = arp_maxtries;
 	hold = la->la_hold;
@@ -866,13 +840,13 @@
 		} else {
 			/*
 			 * Return proxied ARP replies only on the interface
-			 * or bridge cluster where this network resides. 
+			 * or bridge cluster where this network resides.
 			 * Otherwise we may conflict with the host we are
 			 * proxying for.
 			 */
 			if (rt->rt_ifp != ifp &&
-			 	(rt->rt_ifp->if_bridge != ifp->if_bridge ||
-				ifp->if_bridge == NULL)) {
+			    (rt->rt_ifp->if_bridge != ifp->if_bridge ||
+			    ifp->if_bridge == NULL)) {
 				RT_UNLOCK(rt);
 				goto drop;
 			}
@@ -884,19 +858,18 @@
 	}
 
 	if (itaddr.s_addr == myaddr.s_addr &&
-  	             IN_LINKLOCAL(ntohl(itaddr.s_addr))) { 
+	    IN_LINKLOCAL(ntohl(itaddr.s_addr))) {
 		/* RFC 3927 link-local IPv4; always reply by broadcast. */
 #ifdef DEBUG_LINKLOCAL
 		printf("arp: sending reply for link-local addr %s\n",
-		inet_ntoa(itaddr));
+		    inet_ntoa(itaddr));
 #endif
-
 		m->m_flags |= M_BCAST;
 		m->m_flags &= ~M_MCAST;
 	} else {
 		/* default behaviour; never reply by broadcast. */
 		m->m_flags &= ~(M_BCAST|M_MCAST);
-	} 
+	}
 	(void)memcpy(ar_tpa(ah), ar_spa(ah), ah->ar_pln);
 	(void)memcpy(ar_spa(ah), &itaddr, ah->ar_pln);
 	ah->ar_op = htons(ARPOP_REPLY);
@@ -917,9 +890,7 @@
  * Lookup or enter a new address in arptab.
  */
 static struct rtentry *
-arplookup(addr, create, proxy)
-	u_long addr;
-	int create, proxy;
+arplookup(u_long addr, int create, int proxy)
 {
 	struct rtentry *rt;
 	struct sockaddr_inarp sin;
@@ -966,9 +937,7 @@
 }
 
 void
-arp_ifinit(ifp, ifa)
-	struct ifnet *ifp;
-	struct ifaddr *ifa;
+arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
 {
 	if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY)
 		arprequest(ifp, &IA_SIN(ifa)->sin_addr,
@@ -978,10 +947,7 @@
 }
 
 void
-arp_ifinit2(ifp, ifa, enaddr)
-	struct ifnet *ifp;
-	struct ifaddr *ifa;
-	u_char *enaddr;
+arp_ifinit2(struct ifnet *ifp, struct ifaddr *ifa, u_char *enaddr)
 {
 	if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY)
 		arprequest(ifp, &IA_SIN(ifa)->sin_addr,
@@ -996,9 +962,6 @@
 
 	arpintrq.ifq_maxlen = 50;
 	mtx_init(&arpintrq.ifq_mtx, "arp_inq", NULL, MTX_DEF);
-	LIST_INIT(&llinfo_arp);
-	callout_init(&arp_callout, CALLOUT_MPSAFE);
 	netisr_register(NETISR_ARP, arpintr, &arpintrq, NETISR_MPSAFE);
-	callout_reset(&arp_callout, hz, arptimer, NULL);
 }
 SYSINIT(arp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, arp_init, 0);
Index: if_atm.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/if_atm.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/if_atm.c -L sys/netinet/if_atm.c -u -r1.1.1.1 -r1.2
--- sys/netinet/if_atm.c
+++ sys/netinet/if_atm.c
@@ -32,7 +32,7 @@
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/netinet/if_atm.c,v 1.19.2.2 2005/08/31 13:58:28 glebius Exp $");
+__FBSDID("$FreeBSD: src/sys/netinet/if_atm.c,v 1.21 2005/08/26 15:27:18 glebius Exp $");
 
 /*
  * IP <=> ATM address resolution.
Index: tcp_debug.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_debug.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/tcp_debug.c -L sys/netinet/tcp_debug.c -u -r1.1.1.1 -r1.2
--- sys/netinet/tcp_debug.c
+++ sys/netinet/tcp_debug.c
@@ -1,6 +1,7 @@
 /*-
  * Copyright (c) 1982, 1986, 1993
- *	The Regents of the University of California.  All rights reserved.
+ *	The Regents of the University of California.
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -27,9 +28,11 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp_debug.c	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/tcp_debug.c,v 1.26 2005/01/07 01:45:45 imp Exp $
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/tcp_debug.c,v 1.29 2007/10/07 20:44:23 silby Exp $");
+
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_tcpdebug.h"
@@ -48,7 +51,10 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
 #include <sys/mbuf.h>
+#include <sys/mutex.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 
@@ -67,107 +73,95 @@
 #include <netinet/tcp_debug.h>
 
 #ifdef TCPDEBUG
-static int	tcpconsdebug = 0;
+static int		tcpconsdebug = 0;
 #endif
 
-static struct tcp_debug tcp_debug[TCP_NDEBUG];
-static int	tcp_debx;
+/*
+ * Global ring buffer of TCP debugging state.  Each entry captures a snapshot
+ * of TCP connection state at any given moment.  tcp_debx addresses at the
+ * next available slot.  There is no explicit export of this data structure;
+ * it will be read via /dev/kmem by debugging tools.
+ */
+static struct tcp_debug	tcp_debug[TCP_NDEBUG];
+static int		tcp_debx;
 
 /*
- * Tcp debug routines
+ * All global state is protected by tcp_debug_mtx; tcp_trace() is split into
+ * two parts, one of which saves connection and other state into the global
+ * array (locked by tcp_debug_mtx).
+ */
+struct mtx		tcp_debug_mtx;
+MTX_SYSINIT(tcp_debug_mtx, &tcp_debug_mtx, "tcp_debug_mtx", MTX_DEF);
+
+/*
+ * Save TCP state at a given moment; optionally, both tcpcb and TCP packet
+ * header state will be saved.
  */
 void
-tcp_trace(act, ostate, tp, ipgen, th, req)
-	short act, ostate;
-	struct tcpcb *tp;
-	void *ipgen;
-	struct tcphdr *th;
-	int req;
+tcp_trace(short act, short ostate, struct tcpcb *tp, void *ipgen,
+    struct tcphdr *th, int req)
 {
 #ifdef INET6
 	int isipv6;
 #endif /* INET6 */
 	tcp_seq seq, ack;
 	int len, flags;
-	struct tcp_debug *td = &tcp_debug[tcp_debx++];
+	struct tcp_debug *td;
 
+	mtx_lock(&tcp_debug_mtx);
+	td = &tcp_debug[tcp_debx++];
+	if (tcp_debx == TCP_NDEBUG)
+		tcp_debx = 0;
+	bzero(td, sizeof(*td));
 #ifdef INET6
 	isipv6 = (ipgen != NULL && ((struct ip *)ipgen)->ip_v == 6) ? 1 : 0;
 #endif /* INET6 */
 	td->td_family =
 #ifdef INET6
-		(isipv6 != 0) ? AF_INET6 :
+	    (isipv6 != 0) ? AF_INET6 :
 #endif
-		AF_INET;
-	if (tcp_debx == TCP_NDEBUG)
-		tcp_debx = 0;
+	    AF_INET;
 	td->td_time = iptime();
 	td->td_act = act;
 	td->td_ostate = ostate;
 	td->td_tcb = (caddr_t)tp;
-	if (tp)
+	if (tp != NULL)
 		td->td_cb = *tp;
-	else
-		bzero((caddr_t)&td->td_cb, sizeof (*tp));
-	if (ipgen) {
+	if (ipgen != NULL) {
 		switch (td->td_family) {
 		case AF_INET:
-			bcopy((caddr_t)ipgen, (caddr_t)&td->td_ti.ti_i,
-			      sizeof(td->td_ti.ti_i));
-			bzero((caddr_t)td->td_ip6buf, sizeof(td->td_ip6buf));
+			bcopy(ipgen, &td->td_ti.ti_i, sizeof(td->td_ti.ti_i));
 			break;
 #ifdef INET6
 		case AF_INET6:
-			bcopy((caddr_t)ipgen, (caddr_t)td->td_ip6buf,
-			      sizeof(td->td_ip6buf));
-			bzero((caddr_t)&td->td_ti.ti_i,
-			      sizeof(td->td_ti.ti_i));
+			bcopy(ipgen, td->td_ip6buf, sizeof(td->td_ip6buf));
 			break;
 #endif
-		default:
-			bzero((caddr_t)td->td_ip6buf, sizeof(td->td_ip6buf));
-			bzero((caddr_t)&td->td_ti.ti_i,
-			      sizeof(td->td_ti.ti_i));
-			break;
 		}
-	} else {
-		bzero((caddr_t)&td->td_ti.ti_i, sizeof(td->td_ti.ti_i));
-		bzero((caddr_t)td->td_ip6buf, sizeof(td->td_ip6buf));
 	}
-	if (th) {
+	if (th != NULL) {
 		switch (td->td_family) {
 		case AF_INET:
 			td->td_ti.ti_t = *th;
-			bzero((caddr_t)&td->td_ti6.th, sizeof(td->td_ti6.th));
 			break;
 #ifdef INET6
 		case AF_INET6:
 			td->td_ti6.th = *th;
-			bzero((caddr_t)&td->td_ti.ti_t,
-			      sizeof(td->td_ti.ti_t));
 			break;
 #endif
-		default:
-			bzero((caddr_t)&td->td_ti.ti_t,
-			      sizeof(td->td_ti.ti_t));
-			bzero((caddr_t)&td->td_ti6.th, sizeof(td->td_ti6.th));
-			break;
 		}
-	} else {
-		bzero((caddr_t)&td->td_ti.ti_t, sizeof(td->td_ti.ti_t));
-		bzero((caddr_t)&td->td_ti6.th, sizeof(td->td_ti6.th));
 	}
 	td->td_req = req;
+	mtx_unlock(&tcp_debug_mtx);
 #ifdef TCPDEBUG
 	if (tcpconsdebug == 0)
 		return;
-	if (tp)
+	if (tp != NULL)
 		printf("%p %s:", tp, tcpstates[ostate]);
 	else
 		printf("???????? ");
 	printf("%s ", tanames[act]);
 	switch (act) {
-
 	case TA_INPUT:
 	case TA_OUTPUT:
 	case TA_DROP:
@@ -177,9 +171,9 @@
 		ack = th->th_ack;
 		len =
 #ifdef INET6
-			isipv6 ? ((struct ip6_hdr *)ipgen)->ip6_plen :
+		    isipv6 ? ((struct ip6_hdr *)ipgen)->ip6_plen :
 #endif
-			((struct ip *)ipgen)->ip_len;
+		    ((struct ip *)ipgen)->ip_len;
 		if (act == TA_OUTPUT) {
 			seq = ntohl(seq);
 			ack = ntohl(ack);
@@ -212,11 +206,11 @@
 			printf("<%s>", tcptimers[req>>8]);
 		break;
 	}
-	if (tp)
+	if (tp != NULL)
 		printf(" -> %s", tcpstates[tp->t_state]);
 	/* print out internal state of tp !?! */
 	printf("\n");
-	if (tp == 0)
+	if (tp == NULL)
 		return;
 	printf(
 	"\trcv_(nxt,wnd,up) (%lx,%lx,%lx) snd_(una,nxt,max) (%lx,%lx,%lx)\n",
--- /dev/null
+++ sys/netinet/sctp.h
@@ -0,0 +1,541 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/* $KAME: sctp.h,v 1.18 2005/03/06 16:04:16 itojun Exp $	 */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp.h,v 1.21.2.1 2007/12/09 20:23:47 rrs Exp $");
+
+#ifndef _NETINET_SCTP_H_
+#define _NETINET_SCTP_H_
+
+#include <sys/types.h>
+
+/*
+ * SCTP protocol - RFC2960.
+ */
+struct sctphdr {
+	uint16_t src_port;	/* source port */
+	uint16_t dest_port;	/* destination port */
+	uint32_t v_tag;		/* verification tag of packet */
+	uint32_t checksum;	/* Adler32 C-Sum */
+	/* chunks follow... */
+} 
+
+__attribute__((packed));
+
+/*
+ * SCTP Chunks
+ */
+	struct sctp_chunkhdr {
+		uint8_t chunk_type;	/* chunk type */
+		uint8_t chunk_flags;	/* chunk flags */
+		uint16_t chunk_length;	/* chunk length */
+		/* optional params follow */
+	}             __attribute__((packed));
+
+/*
+ * SCTP chunk parameters
+ */
+	struct sctp_paramhdr {
+		uint16_t param_type;	/* parameter type */
+		uint16_t param_length;	/* parameter length */
+	}             __attribute__((packed));
+
+/*
+ * user socket options: socket API defined
+ */
+/*
+ * read-write options
+ */
+#define SCTP_RTOINFO			0x00000001
+#define SCTP_ASSOCINFO			0x00000002
+#define SCTP_INITMSG			0x00000003
+#define SCTP_NODELAY			0x00000004
+#define SCTP_AUTOCLOSE			0x00000005
+#define SCTP_SET_PEER_PRIMARY_ADDR	0x00000006
+#define SCTP_PRIMARY_ADDR		0x00000007
+#define SCTP_ADAPTATION_LAYER		0x00000008
+/* same as above */
+#define SCTP_ADAPTION_LAYER		0x00000008
+#define SCTP_DISABLE_FRAGMENTS		0x00000009
+#define SCTP_PEER_ADDR_PARAMS 		0x0000000a
+#define SCTP_DEFAULT_SEND_PARAM		0x0000000b
+/* ancillary data/notification interest options */
+#define SCTP_EVENTS			0x0000000c
+/* Without this applied we will give V4 and V6 addresses on a V6 socket */
+#define SCTP_I_WANT_MAPPED_V4_ADDR	0x0000000d
+#define SCTP_MAXSEG 			0x0000000e
+#define SCTP_DELAYED_SACK               0x0000000f
+#define SCTP_FRAGMENT_INTERLEAVE        0x00000010
+#define SCTP_PARTIAL_DELIVERY_POINT     0x00000011
+/* authentication support */
+#define SCTP_AUTH_CHUNK 		0x00000012
+#define SCTP_AUTH_KEY 			0x00000013
+#define SCTP_HMAC_IDENT 		0x00000014
+#define SCTP_AUTH_ACTIVE_KEY 		0x00000015
+#define SCTP_AUTH_DELETE_KEY 		0x00000016
+#define SCTP_USE_EXT_RCVINFO		0x00000017
+#define SCTP_AUTO_ASCONF		0x00000018	/* rw */
+#define SCTP_MAXBURST			0x00000019	/* rw */
+#define SCTP_MAX_BURST			0x00000019	/* rw */
+/* assoc level context */
+#define SCTP_CONTEXT                    0x0000001a	/* rw */
+/* explict EOR signalling */
+#define SCTP_EXPLICIT_EOR               0x0000001b
+
+/*
+ * read-only options
+ */
+#define SCTP_STATUS			0x00000100
+#define SCTP_GET_PEER_ADDR_INFO		0x00000101
+/* authentication support */
+#define SCTP_PEER_AUTH_CHUNKS 		0x00000102
+#define SCTP_LOCAL_AUTH_CHUNKS 		0x00000103
+#define SCTP_GET_ASSOC_NUMBER           0x00000104	/* ro */
+#define SCTP_GET_ASSOC_ID_LIST          0x00000105	/* ro */
+
+/*
+ * user socket options: BSD implementation specific
+ */
+/*
+ * Blocking I/O is enabled on any TCP type socket by default. For the UDP
+ * model if this is turned on then the socket buffer is shared for send
+ * resources amongst all associations.  The default for the UDP model is that
+ * is SS_NBIO is set.  Which means all associations have a seperate send
+ * limit BUT they will NOT ever BLOCK instead you will get an error back
+ * EAGAIN if you try to send to much. If you want the blocking symantics you
+ * set this option at the cost of sharing one socket send buffer size amongst
+ * all associations. Peeled off sockets turn this option off and block. But
+ * since both TCP and peeled off sockets have only one assoc per socket this
+ * is fine. It probably does NOT make sense to set this on SS_NBIO on a TCP
+ * model OR peeled off UDP model, but we do allow you to do so. You just use
+ * the normal syscall to toggle SS_NBIO the way you want.
+ *
+ * Blocking I/O is controled by the SS_NBIO flag on the socket state so_state
+ * field.
+ */
+
+/* these should probably go into sockets API */
+#define SCTP_RESET_STREAMS		0x00001004	/* wo */
+
+
+/* here on down are more implementation specific */
+#define SCTP_SET_DEBUG_LEVEL		0x00001005
+#define SCTP_CLR_STAT_LOG               0x00001007
+/* CMT ON/OFF socket option */
+#define SCTP_CMT_ON_OFF                 0x00001200
+#define SCTP_CMT_USE_DAC                0x00001201
+/* JRS - Pluggable Congestion Control Socket option */
+#define SCTP_PLUGGABLE_CC				0x00001202
+
+/* read only */
+#define SCTP_GET_SNDBUF_USE		0x00001101
+#define SCTP_GET_STAT_LOG		0x00001103
+#define SCTP_PCB_STATUS			0x00001104
+#define SCTP_GET_NONCE_VALUES           0x00001105
+
+
+/* Special hook for dynamically setting primary for all assoc's,
+ * this is a write only option that requires root privledge.
+ */
+#define SCTP_SET_DYNAMIC_PRIMARY        0x00002001
+
+/* VRF (virtual router feature) and multi-VRF support
+ * options. VRF's provide splits within a router
+ * that give the views of multiple routers. A
+ * standard host, without VRF support, is just
+ * a single VRF. If VRF's are supported then
+ * the transport must be VRF aware. This means
+ * that every socket call coming in must be directed
+ * within the endpoint to one of the VRF's it belongs
+ * to. The endpoint, before binding, may select
+ * the "default" VRF it is in by using a set socket
+ * option with SCTP_VRF_ID. This will also
+ * get propegated to the default VRF. Once the
+ * endpoint binds an address then it CANNOT add
+ * additional VRF's to become a Multi-VRF endpoint.
+ *
+ * Before BINDING additional VRF's can be added with
+ * the SCTP_ADD_VRF_ID call or deleted with
+ * SCTP_DEL_VRF_ID.
+ *
+ * Associations are ALWAYS contained inside a single
+ * VRF. They cannot reside in two (or more) VRF's. Incoming
+ * packets, assuming the router is VRF aware, can always
+ * tell us what VRF they arrived on. A host not supporting
+ * any VRF's will find that the packets always arrived on the
+ * single VRF that the host has.
+ *
+ */
+
+#define SCTP_VRF_ID			0x00003001
+#define SCTP_ADD_VRF_ID			0x00003002
+#define SCTP_GET_VRF_IDS		0x00003003
+#define SCTP_GET_ASOC_VRF               0x00003004
+#define SCTP_DEL_VRF_ID                 0x00003005
+
+/*
+ * If you enable packet logging you can get
+ * a poor mans ethereal output in binary
+ * form. Note this is a compile option to
+ * the kernel,  SCTP_PACKET_LOGGING, and
+ * without it in your kernel you
+ * will get a EOPNOTSUPP
+ */
+#define SCTP_GET_PACKET_LOG             0x00004001
+
+/*
+ * hidden implementation specific options these are NOT user visible (should
+ * move out of sctp.h)
+ */
+/* sctp_bindx() flags as hidden socket options */
+#define SCTP_BINDX_ADD_ADDR		0x00008001
+#define SCTP_BINDX_REM_ADDR		0x00008002
+/* Hidden socket option that gets the addresses */
+#define SCTP_GET_PEER_ADDRESSES		0x00008003
+#define SCTP_GET_LOCAL_ADDRESSES	0x00008004
+/* return the total count in bytes needed to hold all local addresses bound */
+#define SCTP_GET_LOCAL_ADDR_SIZE	0x00008005
+/* Return the total count in bytes needed to hold the remote address */
+#define SCTP_GET_REMOTE_ADDR_SIZE	0x00008006
+/* hidden option for connectx */
+#define SCTP_CONNECT_X			0x00008007
+/* hidden option for connectx_delayed, part of sendx */
+#define SCTP_CONNECT_X_DELAYED		0x00008008
+#define SCTP_CONNECT_X_COMPLETE         0x00008009
+/* hidden socket option based sctp_peeloff */
+#define SCTP_PEELOFF                    0x0000800a
+/* the real worker for sctp_getaddrlen() */
+#define SCTP_GET_ADDR_LEN               0x0000800b
+/* temporary workaround for Apple listen() issue, no args used */
+#define SCTP_LISTEN_FIX			0x0000800c
+/* Debug things that need to be purged */
+#define SCTP_SET_INITIAL_DBG_SEQ	0x00009f00
+
+/* JRS - Supported congestion control modules for pluggable
+ * congestion control
+ */
+/* Standard TCP Congestion Control */
+#define SCTP_CC_RFC2581		0x00000000
+/* High Speed TCP Congestion Control (Floyd) */
+#define SCTP_CC_HSTCP		0x00000001
+/* HTCP Congestion Control */
+#define SCTP_CC_HTCP		0x00000002
+
+
+/* fragment interleave constants
+ * setting must be one of these or
+ * EINVAL returned.
+ */
+#define SCTP_FRAG_LEVEL_0    0x00000000
+#define SCTP_FRAG_LEVEL_1    0x00000001
+#define SCTP_FRAG_LEVEL_2    0x00000002
+
+/*
+ * user state values
+ */
+#define SCTP_CLOSED			0x0000
+#define SCTP_BOUND			0x1000
+#define SCTP_LISTEN			0x2000
+#define SCTP_COOKIE_WAIT		0x0002
+#define SCTP_COOKIE_ECHOED		0x0004
+#define SCTP_ESTABLISHED		0x0008
+#define SCTP_SHUTDOWN_SENT		0x0010
+#define SCTP_SHUTDOWN_RECEIVED		0x0020
+#define SCTP_SHUTDOWN_ACK_SENT		0x0040
+#define SCTP_SHUTDOWN_PENDING		0x0080
+
+/*
+ * SCTP operational error codes (user visible)
+ */
+#define SCTP_CAUSE_NO_ERROR		0x0000
+#define SCTP_CAUSE_INVALID_STREAM	0x0001
+#define SCTP_CAUSE_MISSING_PARAM	0x0002
+#define SCTP_CAUSE_STALE_COOKIE		0x0003
+#define SCTP_CAUSE_OUT_OF_RESC		0x0004
+#define SCTP_CAUSE_UNRESOLVABLE_ADDR	0x0005
+#define SCTP_CAUSE_UNRECOG_CHUNK	0x0006
+#define SCTP_CAUSE_INVALID_PARAM	0x0007
+#define SCTP_CAUSE_UNRECOG_PARAM	0x0008
+#define SCTP_CAUSE_NO_USER_DATA		0x0009
+#define SCTP_CAUSE_COOKIE_IN_SHUTDOWN	0x000a
+#define SCTP_CAUSE_RESTART_W_NEWADDR	0x000b
+#define SCTP_CAUSE_USER_INITIATED_ABT	0x000c
+#define SCTP_CAUSE_PROTOCOL_VIOLATION	0x000d
+
+/* Error causes from RFC5061 */
+#define SCTP_CAUSE_DELETING_LAST_ADDR	0xa0
+#define SCTP_CAUSE_RESOURCE_SHORTAGE	0xa1
+#define SCTP_CAUSE_DELETING_SRC_ADDR	0xa2
+#define SCTP_CAUSE_ILLEGAL_ASCONF_ACK	0xa3
+#define SCTP_CAUSE_REQUEST_REFUSED	0xa4
+
+/* Error causes from RFC4895 */
+#define SCTP_CAUSE_UNSUPPORTED_HMACID	0x0105
+
+/*
+ * error cause parameters (user visisble)
+ */
+	struct sctp_error_cause {
+		uint16_t code;
+		uint16_t length;
+		/* optional cause-specific info may follow */
+	}                __attribute__((packed));
+
+	struct sctp_error_invalid_stream {
+		struct sctp_error_cause cause;	/* code=SCTP_ERROR_INVALID_STRE
+						 * AM */
+		uint16_t stream_id;	/* stream id of the DATA in error */
+		uint16_t reserved;
+	}                         __attribute__((packed));
+
+	struct sctp_error_missing_param {
+		struct sctp_error_cause cause;	/* code=SCTP_ERROR_MISSING_PARA
+						 * M */
+		uint32_t num_missing_params;	/* number of missing
+						 * parameters */
+		/* uint16_t param_type's follow */
+	}                        __attribute__((packed));
+
+	struct sctp_error_stale_cookie {
+		struct sctp_error_cause cause;	/* code=SCTP_ERROR_STALE_COOKIE
+						 *  */
+		uint32_t stale_time;	/* time in usec of staleness */
+	}                       __attribute__((packed));
+
+	struct sctp_error_out_of_resource {
+		struct sctp_error_cause cause;	/* code=SCTP_ERROR_OUT_OF_RESOU
+						 * RCES */
+	}                          __attribute__((packed));
+
+	struct sctp_error_unresolv_addr {
+		struct sctp_error_cause cause;	/* code=SCTP_ERROR_UNRESOLVABLE
+						 * _ADDR */
+
+	}                        __attribute__((packed));
+
+	struct sctp_error_unrecognized_chunk {
+		struct sctp_error_cause cause;	/* code=SCTP_ERROR_UNRECOG_CHUN
+						 * K */
+		struct sctp_chunkhdr ch;	/* header from chunk in error */
+	}                             __attribute__((packed));
+
+/*
+ * Main SCTP chunk types we place these here so natd and f/w's in user land
+ * can find them.
+ */
+/************0x00 series ***********/
+#define SCTP_DATA		0x00
+#define SCTP_INITIATION		0x01
+#define SCTP_INITIATION_ACK	0x02
+#define SCTP_SELECTIVE_ACK	0x03
+#define SCTP_HEARTBEAT_REQUEST	0x04
+#define SCTP_HEARTBEAT_ACK	0x05
+#define SCTP_ABORT_ASSOCIATION	0x06
+#define SCTP_SHUTDOWN		0x07
+#define SCTP_SHUTDOWN_ACK	0x08
+#define SCTP_OPERATION_ERROR	0x09
+#define SCTP_COOKIE_ECHO	0x0a
+#define SCTP_COOKIE_ACK		0x0b
+#define SCTP_ECN_ECHO		0x0c
+#define SCTP_ECN_CWR		0x0d
+#define SCTP_SHUTDOWN_COMPLETE	0x0e
+/* RFC4895 */
+#define SCTP_AUTHENTICATION     0x0f
+/************0x40 series ***********/
+/************0x80 series ***********/
+/* RFC5061 */
+#define	SCTP_ASCONF_ACK		0x80
+/* draft-ietf-stewart-pktdrpsctp */
+#define SCTP_PACKET_DROPPED	0x81
+/* draft-ietf-stewart-strreset-xxx */
+#define SCTP_STREAM_RESET       0x82
+
+/* RFC4820                         */
+#define SCTP_PAD_CHUNK          0x84
+/************0xc0 series ***********/
+/* RFC3758 */
+#define SCTP_FORWARD_CUM_TSN	0xc0
+/* RFC5061 */
+#define SCTP_ASCONF		0xc1
+
+
+/* ABORT and SHUTDOWN COMPLETE FLAG */
+#define SCTP_HAD_NO_TCB		0x01
+
+/* Packet dropped flags */
+#define SCTP_FROM_MIDDLE_BOX	SCTP_HAD_NO_TCB
+#define SCTP_BADCRC		0x02
+#define SCTP_PACKET_TRUNCATED	0x04
+
+#define SCTP_SAT_NETWORK_MIN	400	/* min ms for RTT to set satellite
+					 * time */
+#define SCTP_SAT_NETWORK_BURST_INCR  2	/* how many times to multiply maxburst
+					 * in sat */
+
+/* Data Chuck Specific Flags */
+#define SCTP_DATA_FRAG_MASK	0x03
+#define SCTP_DATA_MIDDLE_FRAG	0x00
+#define SCTP_DATA_LAST_FRAG	0x01
+#define SCTP_DATA_FIRST_FRAG	0x02
+#define SCTP_DATA_NOT_FRAG	0x03
+#define SCTP_DATA_UNORDERED	0x04
+
+/* ECN Nonce: SACK Chunk Specific Flags */
+#define SCTP_SACK_NONCE_SUM     0x01
+
+/* CMT DAC algorithm SACK flag */
+#define SCTP_SACK_CMT_DAC       0x80
+
+/*
+ * PCB flags (in sctp_flags bitmask).
+ * Note the features and flags are meant
+ * for use by netstat.
+ */
+#define SCTP_PCB_FLAGS_UDPTYPE		0x00000001
+#define SCTP_PCB_FLAGS_TCPTYPE		0x00000002
+#define SCTP_PCB_FLAGS_BOUNDALL		0x00000004
+#define SCTP_PCB_FLAGS_ACCEPTING	0x00000008
+#define SCTP_PCB_FLAGS_UNBOUND		0x00000010
+#define SCTP_PCB_FLAGS_LISTENING	0x00000020
+#define SCTP_PCB_FLAGS_CLOSE_IP         0x00040000
+#define SCTP_PCB_FLAGS_WAS_CONNECTED    0x00080000
+#define SCTP_PCB_FLAGS_WAS_ABORTED      0x00100000
+/* TCP model support */
+
+#define SCTP_PCB_FLAGS_CONNECTED	0x00200000
+#define SCTP_PCB_FLAGS_IN_TCPPOOL	0x00400000
+#define SCTP_PCB_FLAGS_DONT_WAKE	0x00800000
+#define SCTP_PCB_FLAGS_WAKEOUTPUT	0x01000000
+#define SCTP_PCB_FLAGS_WAKEINPUT	0x02000000
+#define SCTP_PCB_FLAGS_BOUND_V6		0x04000000
+#define SCTP_PCB_FLAGS_NEEDS_MAPPED_V4	0x08000000
+#define SCTP_PCB_FLAGS_BLOCKING_IO	0x10000000
+#define SCTP_PCB_FLAGS_SOCKET_GONE	0x20000000
+#define SCTP_PCB_FLAGS_SOCKET_ALLGONE	0x40000000
+/* flags to copy to new PCB */
+#define SCTP_PCB_COPY_FLAGS		0x0e000004
+
+
+/*
+ * PCB Features (in sctp_features bitmask)
+ */
+#define SCTP_PCB_FLAGS_EXT_RCVINFO      0x00000002
+#define SCTP_PCB_FLAGS_DONOT_HEARTBEAT  0x00000004
+#define SCTP_PCB_FLAGS_FRAG_INTERLEAVE  0x00000008
+#define SCTP_PCB_FLAGS_INTERLEAVE_STRMS	0x00000010
+#define SCTP_PCB_FLAGS_DO_ASCONF	0x00000020
+#define SCTP_PCB_FLAGS_AUTO_ASCONF	0x00000040
+#define SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE 0x00000080
+
+/* socket options */
+#define SCTP_PCB_FLAGS_NODELAY		0x00000100
+#define SCTP_PCB_FLAGS_AUTOCLOSE	0x00000200
+#define SCTP_PCB_FLAGS_RECVDATAIOEVNT	0x00000400
+#define SCTP_PCB_FLAGS_RECVASSOCEVNT	0x00000800
+#define SCTP_PCB_FLAGS_RECVPADDREVNT	0x00001000
+#define SCTP_PCB_FLAGS_RECVPEERERR	0x00002000
+#define SCTP_PCB_FLAGS_RECVSENDFAILEVNT	0x00004000
+#define SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT	0x00008000
+#define SCTP_PCB_FLAGS_ADAPTATIONEVNT	0x00010000
+#define SCTP_PCB_FLAGS_PDAPIEVNT	0x00020000
+#define SCTP_PCB_FLAGS_AUTHEVNT		0x00040000
+#define SCTP_PCB_FLAGS_STREAM_RESETEVNT 0x00080000
+#define SCTP_PCB_FLAGS_NO_FRAGMENT	0x00100000
+#define SCTP_PCB_FLAGS_EXPLICIT_EOR     0x00400000
+
+/*-
+ * mobility_features parameters (by micchie).Note
+ * these features are applied against the
+ * sctp_mobility_features flags.. not the sctp_features
+ * flags.
+ */
+#define SCTP_MOBILITY_BASE		0x00000001
+#define SCTP_MOBILITY_FASTHANDOFF	0x00000002
+#define SCTP_MOBILITY_PRIM_DELETED	0x00000004
+
+
+#define SCTP_SMALLEST_PMTU 512	/* smallest pmtu allowed when disabling PMTU
+				 * discovery */
+
+#include <netinet/sctp_uio.h>
+
+/* This dictates the size of the packet
+ * collection buffer. This only applies
+ * if SCTP_PACKET_LOGGING is enabled in
+ * your config.
+ */
+#define SCTP_PACKET_LOG_SIZE 65536
+
+/* Maximum delays and such a user can set for options that
+ * take ms.
+ */
+#define SCTP_MAX_SACK_DELAY 500	/* per RFC4960 */
+#define SCTP_MAX_HB_INTERVAL 14400000	/* 4 hours in ms */
+#define SCTP_MAX_COOKIE_LIFE  3600000	/* 1 hour in ms */
+
+
+/* Types of logging/KTR tracing  that can be enabled via the
+ * sysctl net.inet.sctp.sctp_logging. You must also enable
+ * SUBSYS tracing.
+ * Note that you must have the SCTP option in the kernel
+ * to enable these as well.
+ */
+#define SCTP_BLK_LOGGING_ENABLE				0x00000001
+#define SCTP_CWND_MONITOR_ENABLE			0x00000002
+#define SCTP_CWND_LOGGING_ENABLE			0x00000004
+#define SCTP_EARLYFR_LOGGING_ENABLE			0x00000010
+#define SCTP_FLIGHT_LOGGING_ENABLE			0x00000020
+#define SCTP_FR_LOGGING_ENABLE				0x00000040
+#define SCTP_LOCK_LOGGING_ENABLE			0x00000080
+#define SCTP_MAP_LOGGING_ENABLE				0x00000100
+#define SCTP_MBCNT_LOGGING_ENABLE			0x00000200
+#define SCTP_MBUF_LOGGING_ENABLE			0x00000400
+#define SCTP_NAGLE_LOGGING_ENABLE			0x00000800
+#define SCTP_RECV_RWND_LOGGING_ENABLE		0x00001000
+#define SCTP_RTTVAR_LOGGING_ENABLE			0x00002000
+#define SCTP_SACK_LOGGING_ENABLE			0x00004000
+#define SCTP_SACK_RWND_LOGGING_ENABLE		0x00008000
+#define SCTP_SB_LOGGING_ENABLE				0x00010000
+#define SCTP_STR_LOGGING_ENABLE				0x00020000
+#define SCTP_WAKE_LOGGING_ENABLE			0x00040000
+#define SCTP_LOG_MAXBURST_ENABLE			0x00080000
+#define SCTP_LOG_RWND_ENABLE    			0x00100000
+#define SCTP_LOG_SACK_ARRIVALS_ENABLE       0x00200000
+#define SCTP_LTRACE_CHUNK_ENABLE            0x00400000
+#define SCTP_LTRACE_ERROR_ENABLE            0x00800000
+#define SCTP_LAST_PACKET_TRACING            0x01000000
+#define SCTP_THRESHOLD_LOGGING              0x02000000
+#define SCTP_LOG_AT_SEND_2_SCTP             0x04000000
+#define SCTP_LOG_AT_SEND_2_OUTQ             0x08000000
+
+
+
+#endif				/* !_NETINET_SCTP_H_ */
Index: in_gif.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/in_gif.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/in_gif.c -L sys/netinet/in_gif.c -u -r1.1.1.2 -r1.2
--- sys/netinet/in_gif.c
+++ sys/netinet/in_gif.c
@@ -1,4 +1,3 @@
-/*	$FreeBSD: src/sys/netinet/in_gif.c,v 1.31.2.3 2006/01/31 15:56:46 glebius Exp $	*/
 /*	$KAME: in_gif.c,v 1.54 2001/05/14 14:02:16 itojun Exp $	*/
 
 /*-
@@ -30,6 +29,9 @@
  * SUCH DAMAGE.
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/in_gif.c,v 1.38 2007/10/07 20:44:22 silby Exp $");
+
 #include "opt_mrouting.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
@@ -68,8 +70,6 @@
 
 #include <net/if_gif.h>	
 
-#include <net/net_osdep.h>
-
 static int gif_validate4(const struct ip *, struct gif_softc *,
 	struct ifnet *);
 
@@ -90,10 +90,7 @@
 	&ip_gif_ttl,	0, "");
 
 int
-in_gif_output(ifp, family, m)
-	struct ifnet	*ifp;
-	int		family;
-	struct mbuf	*m;
+in_gif_output(struct ifnet *ifp, int family, struct mbuf *m)
 {
 	struct gif_softc *sc = ifp->if_softc;
 	struct sockaddr_in *dst = (struct sockaddr_in *)&sc->gif_ro.ro_dst;
@@ -240,9 +237,7 @@
 }
 
 void
-in_gif_input(m, off)
-	struct mbuf *m;
-	int off;
+in_gif_input(struct mbuf *m, int off)
 {
 	struct ifnet *gifp = NULL;
 	struct gif_softc *sc;
@@ -336,10 +331,7 @@
  * validate outer address.
  */
 static int
-gif_validate4(ip, sc, ifp)
-	const struct ip *ip;
-	struct gif_softc *sc;
-	struct ifnet *ifp;
+gif_validate4(const struct ip *ip, struct gif_softc *sc, struct ifnet *ifp)
 {
 	struct sockaddr_in *src, *dst;
 	struct in_ifaddr *ia4;
@@ -384,10 +376,10 @@
 			    (u_int32_t)ntohl(sin.sin_addr.s_addr));
 #endif
 			if (rt)
-				rtfree(rt);
+				RTFREE_LOCKED(rt);
 			return 0;
 		}
-		rtfree(rt);
+		RTFREE_LOCKED(rt);
 	}
 
 	return 32 * 2;
@@ -398,11 +390,7 @@
  * matched the physical addr family.  see gif_encapcheck().
  */
 int
-gif_encapcheck4(m, off, proto, arg)
-	const struct mbuf *m;
-	int off;
-	int proto;
-	void *arg;
+gif_encapcheck4(const struct mbuf *m, int off, int proto, void *arg)
 {
 	struct ip ip;
 	struct gif_softc *sc;
@@ -419,8 +407,7 @@
 }
 
 int
-in_gif_attach(sc)
-	struct gif_softc *sc;
+in_gif_attach(struct gif_softc *sc)
 {
 	sc->encap_cookie4 = encap_attach_func(AF_INET, -1, gif_encapcheck,
 	    &in_gif_protosw, sc);
@@ -430,8 +417,7 @@
 }
 
 int
-in_gif_detach(sc)
-	struct gif_softc *sc;
+in_gif_detach(struct gif_softc *sc)
 {
 	int error;
 
--- /dev/null
+++ sys/netinet/sctp_asconf.c
@@ -0,0 +1,3062 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_asconf.c,v 1.24 2005/03/06 16:04:16 itojun Exp $	 */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_asconf.c,v 1.34 2007/10/01 03:22:28 rrs Exp $");
+#include <netinet/sctp_os.h>
+#include <netinet/sctp_var.h>
+#include <netinet/sctp_sysctl.h>
+#include <netinet/sctp_pcb.h>
+#include <netinet/sctp_header.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_output.h>
+#include <netinet/sctp_asconf.h>
+#include <netinet/sctp_timer.h>
+
+/*
+ * debug flags:
+ * SCTP_DEBUG_ASCONF1: protocol info, general info and errors
+ * SCTP_DEBUG_ASCONF2: detailed info
+ */
+#ifdef SCTP_DEBUG
+#endif				/* SCTP_DEBUG */
+
+
+static void
+sctp_asconf_get_source_ip(struct mbuf *m, struct sockaddr *sa)
+{
+	struct ip *iph;
+	struct sockaddr_in *sin;
+
+#ifdef INET6
+	struct sockaddr_in6 *sin6;
+
+#endif
+
+	iph = mtod(m, struct ip *);
+	if (iph->ip_v == IPVERSION) {
+		/* IPv4 source */
+		sin = (struct sockaddr_in *)sa;
+		bzero(sin, sizeof(*sin));
+		sin->sin_family = AF_INET;
+		sin->sin_len = sizeof(struct sockaddr_in);
+		sin->sin_port = 0;
+		sin->sin_addr.s_addr = iph->ip_src.s_addr;
+		return;
+	}
+#ifdef INET6
+	else if (iph->ip_v == (IPV6_VERSION >> 4)) {
+		/* IPv6 source */
+		struct ip6_hdr *ip6;
+
+		sin6 = (struct sockaddr_in6 *)sa;
+		bzero(sin6, sizeof(*sin6));
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_len = sizeof(struct sockaddr_in6);
+		sin6->sin6_port = 0;
+		ip6 = mtod(m, struct ip6_hdr *);
+		sin6->sin6_addr = ip6->ip6_src;
+		return;
+	}
+#endif				/* INET6 */
+	else
+		return;
+}
+
+/*
+ * draft-ietf-tsvwg-addip-sctp
+ *
+ * An ASCONF parameter queue exists per asoc which holds the pending address
+ * operations.  Lists are updated upon receipt of ASCONF-ACK.
+ *
+ * A restricted_addrs list exists per assoc to hold local addresses that are
+ * not (yet) usable by the assoc as a source address.  These addresses are
+ * either pending an ASCONF operation (and exist on the ASCONF parameter
+ * queue), or they are permanently restricted (the peer has returned an
+ * ERROR indication to an ASCONF(ADD), or the peer does not support ASCONF).
+ *
+ * Deleted addresses are always immediately removed from the lists as they will
+ * (shortly) no longer exist in the kernel.  We send ASCONFs as a courtesy,
+ * only if allowed.
+ */
+
+/*
+ * ASCONF parameter processing.
+ * response_required: set if a reply is required (eg. SUCCESS_REPORT).
+ * returns a mbuf to an "error" response parameter or NULL/"success" if ok.
+ * FIX: allocating this many mbufs on the fly is pretty inefficient...
+ */
+static struct mbuf *
+sctp_asconf_success_response(uint32_t id)
+{
+	struct mbuf *m_reply = NULL;
+	struct sctp_asconf_paramhdr *aph;
+
+	m_reply = sctp_get_mbuf_for_msg(sizeof(struct sctp_asconf_paramhdr),
+	    0, M_DONTWAIT, 1, MT_DATA);
+	if (m_reply == NULL) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "asconf_success_response: couldn't get mbuf!\n");
+		return NULL;
+	}
+	aph = mtod(m_reply, struct sctp_asconf_paramhdr *);
+	aph->correlation_id = id;
+	aph->ph.param_type = htons(SCTP_SUCCESS_REPORT);
+	aph->ph.param_length = sizeof(struct sctp_asconf_paramhdr);
+	SCTP_BUF_LEN(m_reply) = aph->ph.param_length;
+	aph->ph.param_length = htons(aph->ph.param_length);
+
+	return m_reply;
+}
+
+static struct mbuf *
+sctp_asconf_error_response(uint32_t id, uint16_t cause, uint8_t * error_tlv,
+    uint16_t tlv_length)
+{
+	struct mbuf *m_reply = NULL;
+	struct sctp_asconf_paramhdr *aph;
+	struct sctp_error_cause *error;
+	uint8_t *tlv;
+
+	m_reply = sctp_get_mbuf_for_msg((sizeof(struct sctp_asconf_paramhdr) +
+	    tlv_length +
+	    sizeof(struct sctp_error_cause)),
+	    0, M_DONTWAIT, 1, MT_DATA);
+	if (m_reply == NULL) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "asconf_error_response: couldn't get mbuf!\n");
+		return NULL;
+	}
+	aph = mtod(m_reply, struct sctp_asconf_paramhdr *);
+	error = (struct sctp_error_cause *)(aph + 1);
+
+	aph->correlation_id = id;
+	aph->ph.param_type = htons(SCTP_ERROR_CAUSE_IND);
+	error->code = htons(cause);
+	error->length = tlv_length + sizeof(struct sctp_error_cause);
+	aph->ph.param_length = error->length +
+	    sizeof(struct sctp_asconf_paramhdr);
+
+	if (aph->ph.param_length > MLEN) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "asconf_error_response: tlv_length (%xh) too big\n",
+		    tlv_length);
+		sctp_m_freem(m_reply);	/* discard */
+		return NULL;
+	}
+	if (error_tlv != NULL) {
+		tlv = (uint8_t *) (error + 1);
+		memcpy(tlv, error_tlv, tlv_length);
+	}
+	SCTP_BUF_LEN(m_reply) = aph->ph.param_length;
+	error->length = htons(error->length);
+	aph->ph.param_length = htons(aph->ph.param_length);
+
+	return m_reply;
+}
+
+static struct mbuf *
+sctp_process_asconf_add_ip(struct mbuf *m, struct sctp_asconf_paramhdr *aph,
+    struct sctp_tcb *stcb, int response_required)
+{
+	struct mbuf *m_reply = NULL;
+	struct sockaddr_storage sa_source, sa_store;
+	struct sctp_ipv4addr_param *v4addr;
+	uint16_t param_type, param_length, aparam_length;
+	struct sockaddr *sa;
+	struct sockaddr_in *sin;
+	int zero_address = 0;
+
+#ifdef INET6
+	struct sockaddr_in6 *sin6;
+	struct sctp_ipv6addr_param *v6addr;
+
+#endif				/* INET6 */
+
+	aparam_length = ntohs(aph->ph.param_length);
+	v4addr = (struct sctp_ipv4addr_param *)(aph + 1);
+#ifdef INET6
+	v6addr = (struct sctp_ipv6addr_param *)(aph + 1);
+#endif				/* INET6 */
+	param_type = ntohs(v4addr->ph.param_type);
+	param_length = ntohs(v4addr->ph.param_length);
+
+	sa = (struct sockaddr *)&sa_store;
+	switch (param_type) {
+	case SCTP_IPV4_ADDRESS:
+		if (param_length != sizeof(struct sctp_ipv4addr_param)) {
+			/* invalid param size */
+			return NULL;
+		}
+		sin = (struct sockaddr_in *)&sa_store;
+		bzero(sin, sizeof(*sin));
+		sin->sin_family = AF_INET;
+		sin->sin_len = sizeof(struct sockaddr_in);
+		sin->sin_port = stcb->rport;
+		sin->sin_addr.s_addr = v4addr->addr;
+		if (sin->sin_addr.s_addr == INADDR_ANY)
+			zero_address = 1;
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_add_ip: adding ");
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+		break;
+	case SCTP_IPV6_ADDRESS:
+#ifdef INET6
+		if (param_length != sizeof(struct sctp_ipv6addr_param)) {
+			/* invalid param size */
+			return NULL;
+		}
+		sin6 = (struct sockaddr_in6 *)&sa_store;
+		bzero(sin6, sizeof(*sin6));
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_len = sizeof(struct sockaddr_in6);
+		sin6->sin6_port = stcb->rport;
+		memcpy((caddr_t)&sin6->sin6_addr, v6addr->addr,
+		    sizeof(struct in6_addr));
+		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
+			zero_address = 1;
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_add_ip: adding ");
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+#else
+		/* IPv6 not enabled! */
+		/* FIX ME: currently sends back an invalid param error */
+		m_reply = sctp_asconf_error_response(aph->correlation_id,
+		    SCTP_CAUSE_INVALID_PARAM, (uint8_t *) aph, aparam_length);
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "process_asconf_add_ip: v6 disabled- skipping ");
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+		return m_reply;
+#endif
+		break;
+	default:
+		m_reply = sctp_asconf_error_response(aph->correlation_id,
+		    SCTP_CAUSE_UNRESOLVABLE_ADDR, (uint8_t *) aph,
+		    aparam_length);
+		return m_reply;
+	}			/* end switch */
+
+	/* if 0.0.0.0/::0, add the source address instead */
+	if (zero_address && sctp_nat_friendly) {
+		sa = (struct sockaddr *)&sa_source;
+		sctp_asconf_get_source_ip(m, sa);
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "process_asconf_add_ip: using source addr ");
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+	}
+	/* add the address */
+	if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE,
+	    SCTP_ADDR_DYNAMIC_ADDED) != 0) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "process_asconf_add_ip: error adding address\n");
+		m_reply = sctp_asconf_error_response(aph->correlation_id,
+		    SCTP_CAUSE_RESOURCE_SHORTAGE, (uint8_t *) aph,
+		    aparam_length);
+	} else {
+		/* notify upper layer */
+		sctp_ulp_notify(SCTP_NOTIFY_ASCONF_ADD_IP, stcb, 0, sa, SCTP_SO_NOT_LOCKED);
+		if (response_required) {
+			m_reply =
+			    sctp_asconf_success_response(aph->correlation_id);
+		}
+		sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb,
+		    NULL, SCTP_FROM_SCTP_ASCONF + SCTP_LOC_1);
+		sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep,
+		    stcb, NULL);
+	}
+
+	return m_reply;
+}
+
+static int
+sctp_asconf_del_remote_addrs_except(struct sctp_tcb *stcb, struct sockaddr *src)
+{
+	struct sctp_nets *src_net, *net;
+
+	/* make sure the source address exists as a destination net */
+	src_net = sctp_findnet(stcb, src);
+	if (src_net == NULL) {
+		/* not found */
+		return -1;
+	}
+	/* delete all destination addresses except the source */
+	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+		if (net != src_net) {
+			/* delete this address */
+			sctp_remove_net(stcb, net);
+			SCTPDBG(SCTP_DEBUG_ASCONF1,
+			    "asconf_del_remote_addrs_except: deleting ");
+			SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1,
+			    (struct sockaddr *)&net->ro._l_addr);
+			/* notify upper layer */
+			sctp_ulp_notify(SCTP_NOTIFY_ASCONF_DELETE_IP, stcb, 0,
+			    (struct sockaddr *)&net->ro._l_addr, SCTP_SO_NOT_LOCKED);
+		}
+	}
+	return 0;
+}
+
+static struct mbuf *
+sctp_process_asconf_delete_ip(struct mbuf *m, struct sctp_asconf_paramhdr *aph,
+    struct sctp_tcb *stcb, int response_required)
+{
+	struct mbuf *m_reply = NULL;
+	struct sockaddr_storage sa_source, sa_store;
+	struct sctp_ipv4addr_param *v4addr;
+	uint16_t param_type, param_length, aparam_length;
+	struct sockaddr *sa;
+	struct sockaddr_in *sin;
+	int zero_address = 0;
+	int result;
+
+#ifdef INET6
+	struct sockaddr_in6 *sin6;
+	struct sctp_ipv6addr_param *v6addr;
+
+#endif				/* INET6 */
+
+	/* get the source IP address for src and 0.0.0.0/::0 delete checks */
+	sctp_asconf_get_source_ip(m, (struct sockaddr *)&sa_source);
+
+	aparam_length = ntohs(aph->ph.param_length);
+	v4addr = (struct sctp_ipv4addr_param *)(aph + 1);
+#ifdef INET6
+	v6addr = (struct sctp_ipv6addr_param *)(aph + 1);
+#endif				/* INET6 */
+	param_type = ntohs(v4addr->ph.param_type);
+	param_length = ntohs(v4addr->ph.param_length);
+
+	sa = (struct sockaddr *)&sa_store;
+	switch (param_type) {
+	case SCTP_IPV4_ADDRESS:
+		if (param_length != sizeof(struct sctp_ipv4addr_param)) {
+			/* invalid param size */
+			return NULL;
+		}
+		sin = (struct sockaddr_in *)&sa_store;
+		bzero(sin, sizeof(*sin));
+		sin->sin_family = AF_INET;
+		sin->sin_len = sizeof(struct sockaddr_in);
+		sin->sin_port = stcb->rport;
+		sin->sin_addr.s_addr = v4addr->addr;
+		if (sin->sin_addr.s_addr == INADDR_ANY)
+			zero_address = 1;
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "process_asconf_delete_ip: deleting ");
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+		break;
+	case SCTP_IPV6_ADDRESS:
+		if (param_length != sizeof(struct sctp_ipv6addr_param)) {
+			/* invalid param size */
+			return NULL;
+		}
+#ifdef INET6
+		sin6 = (struct sockaddr_in6 *)&sa_store;
+		bzero(sin6, sizeof(*sin6));
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_len = sizeof(struct sockaddr_in6);
+		sin6->sin6_port = stcb->rport;
+		memcpy(&sin6->sin6_addr, v6addr->addr,
+		    sizeof(struct in6_addr));
+		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
+			zero_address = 1;
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "process_asconf_delete_ip: deleting ");
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+#else
+		/* IPv6 not enabled!  No "action" needed; just ack it */
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "process_asconf_delete_ip: v6 disabled- ignoring: ");
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+		/* just respond with a "success" ASCONF-ACK */
+		return NULL;
+#endif
+		break;
+	default:
+		m_reply = sctp_asconf_error_response(aph->correlation_id,
+		    SCTP_CAUSE_UNRESOLVABLE_ADDR, (uint8_t *) aph,
+		    aparam_length);
+		return m_reply;
+	}
+
+	/* make sure the source address is not being deleted */
+	if (sctp_cmpaddr(sa, (struct sockaddr *)&sa_source)) {
+		/* trying to delete the source address! */
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_delete_ip: tried to delete source addr\n");
+		m_reply = sctp_asconf_error_response(aph->correlation_id,
+		    SCTP_CAUSE_DELETING_SRC_ADDR, (uint8_t *) aph,
+		    aparam_length);
+		return m_reply;
+	}
+	/* if deleting 0.0.0.0/::0, delete all addresses except src addr */
+	if (zero_address && sctp_nat_friendly) {
+		result = sctp_asconf_del_remote_addrs_except(stcb,
+		    (struct sockaddr *)&sa_source);
+
+		if (result) {
+			/* src address did not exist? */
+			SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_delete_ip: src addr does not exist?\n");
+			/* what error to reply with?? */
+			m_reply =
+			    sctp_asconf_error_response(aph->correlation_id,
+			    SCTP_CAUSE_REQUEST_REFUSED, (uint8_t *) aph,
+			    aparam_length);
+		} else if (response_required) {
+			m_reply =
+			    sctp_asconf_success_response(aph->correlation_id);
+		}
+		return m_reply;
+	}
+	/* delete the address */
+	result = sctp_del_remote_addr(stcb, sa);
+	/*
+	 * note if result == -2, the address doesn't exist in the asoc but
+	 * since it's being deleted anyways, we just ack the delete -- but
+	 * this probably means something has already gone awry
+	 */
+	if (result == -1) {
+		/* only one address in the asoc */
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_delete_ip: tried to delete last IP addr!\n");
+		m_reply = sctp_asconf_error_response(aph->correlation_id,
+		    SCTP_CAUSE_DELETING_LAST_ADDR, (uint8_t *) aph,
+		    aparam_length);
+	} else {
+		if (response_required) {
+			m_reply = sctp_asconf_success_response(aph->correlation_id);
+		}
+		/* notify upper layer */
+		sctp_ulp_notify(SCTP_NOTIFY_ASCONF_DELETE_IP, stcb, 0, sa, SCTP_SO_NOT_LOCKED);
+	}
+	return m_reply;
+}
+
+static struct mbuf *
+sctp_process_asconf_set_primary(struct mbuf *m,
+    struct sctp_asconf_paramhdr *aph,
+    struct sctp_tcb *stcb, int response_required)
+{
+	struct mbuf *m_reply = NULL;
+	struct sockaddr_storage sa_source, sa_store;
+	struct sctp_ipv4addr_param *v4addr;
+	uint16_t param_type, param_length, aparam_length;
+	struct sockaddr *sa;
+	struct sockaddr_in *sin;
+	int zero_address = 0;
+
+#ifdef INET6
+	struct sockaddr_in6 *sin6;
+	struct sctp_ipv6addr_param *v6addr;
+
+#endif				/* INET6 */
+
+	aparam_length = ntohs(aph->ph.param_length);
+	v4addr = (struct sctp_ipv4addr_param *)(aph + 1);
+#ifdef INET6
+	v6addr = (struct sctp_ipv6addr_param *)(aph + 1);
+#endif				/* INET6 */
+	param_type = ntohs(v4addr->ph.param_type);
+	param_length = ntohs(v4addr->ph.param_length);
+
+	sa = (struct sockaddr *)&sa_store;
+	switch (param_type) {
+	case SCTP_IPV4_ADDRESS:
+		if (param_length != sizeof(struct sctp_ipv4addr_param)) {
+			/* invalid param size */
+			return NULL;
+		}
+		sin = (struct sockaddr_in *)&sa_store;
+		bzero(sin, sizeof(*sin));
+		sin->sin_family = AF_INET;
+		sin->sin_len = sizeof(struct sockaddr_in);
+		sin->sin_addr.s_addr = v4addr->addr;
+		if (sin->sin_addr.s_addr == INADDR_ANY)
+			zero_address = 1;
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_set_primary: ");
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+		break;
+	case SCTP_IPV6_ADDRESS:
+		if (param_length != sizeof(struct sctp_ipv6addr_param)) {
+			/* invalid param size */
+			return NULL;
+		}
+#ifdef INET6
+		sin6 = (struct sockaddr_in6 *)&sa_store;
+		bzero(sin6, sizeof(*sin6));
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_len = sizeof(struct sockaddr_in6);
+		memcpy((caddr_t)&sin6->sin6_addr, v6addr->addr,
+		    sizeof(struct in6_addr));
+		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
+			zero_address = 1;
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_set_primary: ");
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+#else
+		/* IPv6 not enabled!  No "action" needed; just ack it */
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "process_asconf_set_primary: v6 disabled- ignoring: ");
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+		/* just respond with a "success" ASCONF-ACK */
+		return NULL;
+#endif
+		break;
+	default:
+		m_reply = sctp_asconf_error_response(aph->correlation_id,
+		    SCTP_CAUSE_UNRESOLVABLE_ADDR, (uint8_t *) aph,
+		    aparam_length);
+		return m_reply;
+	}
+
+	/* if 0.0.0.0/::0, use the source address instead */
+	if (zero_address && sctp_nat_friendly) {
+		sa = (struct sockaddr *)&sa_source;
+		sctp_asconf_get_source_ip(m, sa);
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "process_asconf_set_primary: using source addr ");
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+	}
+	/* set the primary address */
+	if (sctp_set_primary_addr(stcb, sa, NULL) == 0) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "process_asconf_set_primary: primary address set\n");
+		/* notify upper layer */
+		sctp_ulp_notify(SCTP_NOTIFY_ASCONF_SET_PRIMARY, stcb, 0, sa, SCTP_SO_NOT_LOCKED);
+
+		if (response_required) {
+			m_reply = sctp_asconf_success_response(aph->correlation_id);
+		}
+		/*
+		 * Mobility adaptation. Ideally, when the reception of SET
+		 * PRIMARY with DELETE IP ADDRESS of the previous primary
+		 * destination, unacknowledged DATA are retransmitted
+		 * immediately to the new primary destination for seamless
+		 * handover.  If the destination is UNCONFIRMED and marked
+		 * to REQ_PRIM, The retransmission occur when reception of
+		 * the HEARTBEAT-ACK.  (See sctp_handle_heartbeat_ack in
+		 * sctp_input.c) Also, when change of the primary
+		 * destination, it is better that all subsequent new DATA
+		 * containing already queued DATA are transmitted to the new
+		 * primary destination. (by micchie)
+		 */
+		if ((sctp_is_mobility_feature_on(stcb->sctp_ep,
+		    SCTP_MOBILITY_BASE) ||
+		    sctp_is_mobility_feature_on(stcb->sctp_ep,
+		    SCTP_MOBILITY_FASTHANDOFF)) &&
+		    sctp_is_mobility_feature_on(stcb->sctp_ep,
+		    SCTP_MOBILITY_PRIM_DELETED) &&
+		    (stcb->asoc.primary_destination->dest_state &
+		    SCTP_ADDR_UNCONFIRMED) == 0) {
+
+			sctp_timer_stop(SCTP_TIMER_TYPE_PRIM_DELETED, stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_TIMER + SCTP_LOC_7);
+			if (sctp_is_mobility_feature_on(stcb->sctp_ep,
+			    SCTP_MOBILITY_FASTHANDOFF)) {
+				sctp_assoc_immediate_retrans(stcb,
+				    stcb->asoc.primary_destination);
+			}
+			if (sctp_is_mobility_feature_on(stcb->sctp_ep,
+			    SCTP_MOBILITY_BASE)) {
+				sctp_move_chunks_from_deleted_prim(stcb,
+				    stcb->asoc.primary_destination);
+			}
+			sctp_delete_prim_timer(stcb->sctp_ep, stcb,
+			    stcb->asoc.deleted_primary);
+		}
+	} else {
+		/* couldn't set the requested primary address! */
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "process_asconf_set_primary: set primary failed!\n");
+		/* must have been an invalid address, so report */
+		m_reply = sctp_asconf_error_response(aph->correlation_id,
+		    SCTP_CAUSE_UNRESOLVABLE_ADDR, (uint8_t *) aph,
+		    aparam_length);
+	}
+
+	return m_reply;
+}
+
+/*
+ * handles an ASCONF chunk.
+ * if all parameters are processed ok, send a plain (empty) ASCONF-ACK
+ */
+void
+sctp_handle_asconf(struct mbuf *m, unsigned int offset,
+    struct sctp_asconf_chunk *cp, struct sctp_tcb *stcb,
+    int first)
+{
+	struct sctp_association *asoc;
+	uint32_t serial_num;
+	struct mbuf *n, *m_ack, *m_result, *m_tail;
+	struct sctp_asconf_ack_chunk *ack_cp;
+	struct sctp_asconf_paramhdr *aph, *ack_aph;
+	struct sctp_ipv6addr_param *p_addr;
+	unsigned int asconf_limit;
+	int error = 0;		/* did an error occur? */
+
+	/* asconf param buffer */
+	uint8_t aparam_buf[SCTP_PARAM_BUFFER_SIZE];
+	struct sctp_asconf_ack *ack, *ack_next;
+
+	/* verify minimum length */
+	if (ntohs(cp->ch.chunk_length) < sizeof(struct sctp_asconf_chunk)) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "handle_asconf: chunk too small = %xh\n",
+		    ntohs(cp->ch.chunk_length));
+		return;
+	}
+	asoc = &stcb->asoc;
+	serial_num = ntohl(cp->serial_number);
+
+	if (compare_with_wrap(asoc->asconf_seq_in, serial_num, MAX_SEQ) ||
+	    serial_num == asoc->asconf_seq_in) {
+		/* got a duplicate ASCONF */
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "handle_asconf: got duplicate serial number = %xh\n",
+		    serial_num);
+		return;
+	} else if (serial_num != (asoc->asconf_seq_in + 1)) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: incorrect serial number = %xh (expected next = %xh)\n",
+		    serial_num, asoc->asconf_seq_in + 1);
+		return;
+	}
+	/* it's the expected "next" sequence number, so process it */
+	asoc->asconf_seq_in = serial_num;	/* update sequence */
+	/* get length of all the param's in the ASCONF */
+	asconf_limit = offset + ntohs(cp->ch.chunk_length);
+	SCTPDBG(SCTP_DEBUG_ASCONF1,
+	    "handle_asconf: asconf_limit=%u, sequence=%xh\n",
+	    asconf_limit, serial_num);
+
+	if (first) {
+		/* delete old cache */
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: Now processing firstASCONF. Try to delte old cache\n");
+
+		ack = TAILQ_FIRST(&stcb->asoc.asconf_ack_sent);
+		while (ack != NULL) {
+			ack_next = TAILQ_NEXT(ack, next);
+			if (ack->serial_number == serial_num)
+				break;
+			SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: delete old(%u) < first(%u)\n",
+			    ack->serial_number, serial_num);
+			TAILQ_REMOVE(&stcb->asoc.asconf_ack_sent, ack, next);
+			if (ack->data != NULL) {
+				sctp_m_freem(ack->data);
+			}
+			SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_asconf_ack, ack);
+			ack = ack_next;
+		}
+	}
+	m_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_asconf_ack_chunk), 0,
+	    M_DONTWAIT, 1, MT_DATA);
+	if (m_ack == NULL) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "handle_asconf: couldn't get mbuf!\n");
+		return;
+	}
+	m_tail = m_ack;		/* current reply chain's tail */
+
+	/* fill in ASCONF-ACK header */
+	ack_cp = mtod(m_ack, struct sctp_asconf_ack_chunk *);
+	ack_cp->ch.chunk_type = SCTP_ASCONF_ACK;
+	ack_cp->ch.chunk_flags = 0;
+	ack_cp->serial_number = htonl(serial_num);
+	/* set initial lengths (eg. just an ASCONF-ACK), ntohx at the end! */
+	SCTP_BUF_LEN(m_ack) = sizeof(struct sctp_asconf_ack_chunk);
+	ack_cp->ch.chunk_length = sizeof(struct sctp_asconf_ack_chunk);
+
+	/* skip the lookup address parameter */
+	offset += sizeof(struct sctp_asconf_chunk);
+	p_addr = (struct sctp_ipv6addr_param *)sctp_m_getptr(m, offset, sizeof(struct sctp_paramhdr), (uint8_t *) & aparam_buf);
+	if (p_addr == NULL) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "handle_asconf: couldn't get lookup addr!\n");
+		/* respond with a missing/invalid mandatory parameter error */
+		return;
+	}
+	/* param_length is already validated in process_control... */
+	offset += ntohs(p_addr->ph.param_length);	/* skip lookup addr */
+
+	/* get pointer to first asconf param in ASCONF-ACK */
+	ack_aph = (struct sctp_asconf_paramhdr *)(mtod(m_ack, caddr_t)+sizeof(struct sctp_asconf_ack_chunk));
+	if (ack_aph == NULL) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "Gak in asconf2\n");
+		return;
+	}
+	/* get pointer to first asconf param in ASCONF */
+	aph = (struct sctp_asconf_paramhdr *)sctp_m_getptr(m, offset, sizeof(struct sctp_asconf_paramhdr), (uint8_t *) & aparam_buf);
+	if (aph == NULL) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "Empty ASCONF received?\n");
+		goto send_reply;
+	}
+	/* process through all parameters */
+	while (aph != NULL) {
+		unsigned int param_length, param_type;
+
+		param_type = ntohs(aph->ph.param_type);
+		param_length = ntohs(aph->ph.param_length);
+		if (offset + param_length > asconf_limit) {
+			/* parameter goes beyond end of chunk! */
+			sctp_m_freem(m_ack);
+			return;
+		}
+		m_result = NULL;
+
+		if (param_length > sizeof(aparam_buf)) {
+			SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: param length (%u) larger than buffer size!\n", param_length);
+			sctp_m_freem(m_ack);
+			return;
+		}
+		if (param_length <= sizeof(struct sctp_paramhdr)) {
+			SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: param length (%u) too short\n", param_length);
+			sctp_m_freem(m_ack);
+		}
+		/* get the entire parameter */
+		aph = (struct sctp_asconf_paramhdr *)sctp_m_getptr(m, offset, param_length, aparam_buf);
+		if (aph == NULL) {
+			SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: couldn't get entire param\n");
+			sctp_m_freem(m_ack);
+			return;
+		}
+		switch (param_type) {
+		case SCTP_ADD_IP_ADDRESS:
+			asoc->peer_supports_asconf = 1;
+			m_result = sctp_process_asconf_add_ip(m, aph, stcb,
+			    error);
+			break;
+		case SCTP_DEL_IP_ADDRESS:
+			asoc->peer_supports_asconf = 1;
+			m_result = sctp_process_asconf_delete_ip(m, aph, stcb,
+			    error);
+			break;
+		case SCTP_ERROR_CAUSE_IND:
+			/* not valid in an ASCONF chunk */
+			break;
+		case SCTP_SET_PRIM_ADDR:
+			asoc->peer_supports_asconf = 1;
+			m_result = sctp_process_asconf_set_primary(m, aph,
+			    stcb, error);
+			break;
+		case SCTP_SUCCESS_REPORT:
+			/* not valid in an ASCONF chunk */
+			break;
+		case SCTP_ULP_ADAPTATION:
+			/* FIX */
+			break;
+		default:
+			if ((param_type & 0x8000) == 0) {
+				/* Been told to STOP at this param */
+				asconf_limit = offset;
+				/*
+				 * FIX FIX - We need to call
+				 * sctp_arethere_unrecognized_parameters()
+				 * to get a operr and send it for any
+				 * param's with the 0x4000 bit set OR do it
+				 * here ourselves... note we still must STOP
+				 * if the 0x8000 bit is clear.
+				 */
+			}
+			/* unknown/invalid param type */
+			break;
+		}		/* switch */
+
+		/* add any (error) result to the reply mbuf chain */
+		if (m_result != NULL) {
+			SCTP_BUF_NEXT(m_tail) = m_result;
+			m_tail = m_result;
+			/* update lengths, make sure it's aligned too */
+			SCTP_BUF_LEN(m_result) = SCTP_SIZE32(SCTP_BUF_LEN(m_result));
+			ack_cp->ch.chunk_length += SCTP_BUF_LEN(m_result);
+			/* set flag to force success reports */
+			error = 1;
+		}
+		offset += SCTP_SIZE32(param_length);
+		/* update remaining ASCONF message length to process */
+		if (offset >= asconf_limit) {
+			/* no more data in the mbuf chain */
+			break;
+		}
+		/* get pointer to next asconf param */
+		aph = (struct sctp_asconf_paramhdr *)sctp_m_getptr(m, offset,
+		    sizeof(struct sctp_asconf_paramhdr),
+		    (uint8_t *) & aparam_buf);
+		if (aph == NULL) {
+			/* can't get an asconf paramhdr */
+			SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: can't get asconf param hdr!\n");
+			/* FIX ME - add error here... */
+		}
+	}
+
+send_reply:
+	ack_cp->ch.chunk_length = htons(ack_cp->ch.chunk_length);
+	/* save the ASCONF-ACK reply */
+	ack = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_asconf_ack,
+	    struct sctp_asconf_ack);
+	if (ack == NULL) {
+		sctp_m_freem(m_ack);
+		return;
+	}
+	ack->serial_number = serial_num;
+	ack->last_sent_to = NULL;
+	ack->data = m_ack;
+	n = m_ack;
+	while (n) {
+		ack->len += SCTP_BUF_LEN(n);
+		n = SCTP_BUF_NEXT(n);
+	}
+	TAILQ_INSERT_TAIL(&stcb->asoc.asconf_ack_sent, ack, next);
+
+	/* see if last_control_chunk_from is set properly (use IP src addr) */
+	if (stcb->asoc.last_control_chunk_from == NULL) {
+		/*
+		 * this could happen if the source address was just newly
+		 * added
+		 */
+		struct ip *iph;
+		struct sctphdr *sh;
+		struct sockaddr_storage from_store;
+		struct sockaddr *from = (struct sockaddr *)&from_store;
+
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: looking up net for IP source address\n");
+		/* pullup already done, IP options already stripped */
+		iph = mtod(m, struct ip *);
+		sh = (struct sctphdr *)((caddr_t)iph + sizeof(*iph));
+		if (iph->ip_v == IPVERSION) {
+			struct sockaddr_in *from4;
+
+			from4 = (struct sockaddr_in *)&from_store;
+			bzero(from4, sizeof(*from4));
+			from4->sin_family = AF_INET;
+			from4->sin_len = sizeof(struct sockaddr_in);
+			from4->sin_addr.s_addr = iph->ip_src.s_addr;
+			from4->sin_port = sh->src_port;
+		} else if (iph->ip_v == (IPV6_VERSION >> 4)) {
+			struct ip6_hdr *ip6;
+			struct sockaddr_in6 *from6;
+
+			ip6 = mtod(m, struct ip6_hdr *);
+			from6 = (struct sockaddr_in6 *)&from_store;
+			bzero(from6, sizeof(*from6));
+			from6->sin6_family = AF_INET6;
+			from6->sin6_len = sizeof(struct sockaddr_in6);
+			from6->sin6_addr = ip6->ip6_src;
+			from6->sin6_port = sh->src_port;
+			/* Get the scopes in properly to the sin6 addr's */
+			/* we probably don't need these operations */
+			(void)sa6_recoverscope(from6);
+			sa6_embedscope(from6, ip6_use_defzone);
+		} else {
+			/* unknown address type */
+			from = NULL;
+		}
+		if (from != NULL) {
+			SCTPDBG(SCTP_DEBUG_ASCONF1, "Looking for IP source: ");
+			SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, from);
+			/* look up the from address */
+			stcb->asoc.last_control_chunk_from = sctp_findnet(stcb, from);
+#ifdef SCTP_DEBUG
+			if (stcb->asoc.last_control_chunk_from == NULL)
+				SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: IP source address not found?!\n");
+#endif
+		}
+	}
+}
+
+/*
+ * does the address match? returns 0 if not, 1 if so
+ */
+static uint32_t
+sctp_asconf_addr_match(struct sctp_asconf_addr *aa, struct sockaddr *sa)
+{
+#ifdef INET6
+	if (sa->sa_family == AF_INET6) {
+		/* IPv6 sa address */
+		/* XXX scopeid */
+		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
+
+		if ((aa->ap.addrp.ph.param_type == SCTP_IPV6_ADDRESS) &&
+		    (memcmp(&aa->ap.addrp.addr, &sin6->sin6_addr,
+		    sizeof(struct in6_addr)) == 0)) {
+			return (1);
+		}
+	} else
+#endif				/* INET6 */
+	if (sa->sa_family == AF_INET) {
+		/* IPv4 sa address */
+		struct sockaddr_in *sin = (struct sockaddr_in *)sa;
+
+		if ((aa->ap.addrp.ph.param_type == SCTP_IPV4_ADDRESS) &&
+		    (memcmp(&aa->ap.addrp.addr, &sin->sin_addr,
+		    sizeof(struct in_addr)) == 0)) {
+			return (1);
+		}
+	}
+	return (0);
+}
+
+/*
+ * Cleanup for non-responded/OP ERR'd ASCONF
+ */
+void
+sctp_asconf_cleanup(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	/* mark peer as ASCONF incapable */
+	stcb->asoc.peer_supports_asconf = 0;
+	/*
+	 * clear out any existing asconfs going out
+	 */
+	sctp_timer_stop(SCTP_TIMER_TYPE_ASCONF, stcb->sctp_ep, stcb, net,
+	    SCTP_FROM_SCTP_ASCONF + SCTP_LOC_2);
+	stcb->asoc.asconf_seq_out++;
+	/* remove the old ASCONF on our outbound queue */
+	sctp_toss_old_asconf(stcb);
+}
+
+/*
+ * cleanup any cached source addresses that may be topologically
+ * incorrect after a new address has been added to this interface.
+ */
+static void
+sctp_asconf_nets_cleanup(struct sctp_tcb *stcb, struct sctp_ifn *ifn)
+{
+	struct sctp_nets *net;
+
+	/*
+	 * Ideally, we want to only clear cached routes and source addresses
+	 * that are topologically incorrect.  But since there is no easy way
+	 * to know whether the newly added address on the ifn would cause a
+	 * routing change (i.e. a new egress interface would be chosen)
+	 * without doing a new routing lookup and source address selection,
+	 * we will (for now) just flush any cached route using a different
+	 * ifn (and cached source addrs) and let output re-choose them
+	 * during the next send on that net.
+	 */
+	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+		/*
+		 * clear any cached route (and cached source address) if the
+		 * route's interface is NOT the same as the address change.
+		 * If it's the same interface, just clear the cached source
+		 * address.
+		 */
+		if (SCTP_ROUTE_HAS_VALID_IFN(&net->ro) &&
+		    SCTP_GET_IF_INDEX_FROM_ROUTE(&net->ro) != ifn->ifn_index) {
+			/* clear any cached route */
+			RTFREE(net->ro.ro_rt);
+			net->ro.ro_rt = NULL;
+		}
+		/* clear any cached source address */
+		if (net->src_addr_selected) {
+			sctp_free_ifa(net->ro._s_addr);
+			net->ro._s_addr = NULL;
+			net->src_addr_selected = 0;
+		}
+	}
+}
+
+void
+sctp_move_chunks_from_deleted_prim(struct sctp_tcb *stcb, struct sctp_nets *dst)
+{
+	struct sctp_association *asoc;
+	struct sctp_stream_out *outs;
+	struct sctp_tmit_chunk *chk;
+	struct sctp_stream_queue_pending *sp;
+
+	if (dst->dest_state & SCTP_ADDR_UNCONFIRMED) {
+		return;
+	}
+	if (stcb->asoc.deleted_primary == NULL) {
+		return;
+	}
+	asoc = &stcb->asoc;
+
+	/*
+	 * now through all the streams checking for chunks sent to our bad
+	 * network.
+	 */
+	TAILQ_FOREACH(outs, &asoc->out_wheel, next_spoke) {
+		/* now clean up any chunks here */
+		TAILQ_FOREACH(sp, &outs->outqueue, next) {
+			if (sp->net == asoc->deleted_primary) {
+				sctp_free_remote_addr(sp->net);
+				sp->net = dst;
+				atomic_add_int(&dst->ref_count, 1);
+			}
+		}
+	}
+	/* Now check the pending queue */
+	TAILQ_FOREACH(chk, &asoc->send_queue, sctp_next) {
+		if (chk->whoTo == asoc->deleted_primary) {
+			sctp_free_remote_addr(chk->whoTo);
+			chk->whoTo = dst;
+			atomic_add_int(&dst->ref_count, 1);
+		}
+	}
+
+}
+
+
+void
+sctp_assoc_immediate_retrans(struct sctp_tcb *stcb, struct sctp_nets *dstnet)
+{
+	int error;
+
+	if (dstnet->dest_state & SCTP_ADDR_UNCONFIRMED) {
+		return;
+	}
+	if (stcb->asoc.deleted_primary == NULL) {
+		return;
+	}
+	if (!TAILQ_EMPTY(&stcb->asoc.sent_queue)) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "assoc_immediate_retrans: Deleted primary is ");
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, &stcb->asoc.deleted_primary->ro._l_addr.sa);
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "Current Primary is ");
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, &stcb->asoc.primary_destination->ro._l_addr.sa);
+		sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb,
+		    stcb->asoc.deleted_primary,
+		    SCTP_FROM_SCTP_TIMER + SCTP_LOC_8);
+		stcb->asoc.num_send_timers_up--;
+		if (stcb->asoc.num_send_timers_up < 0) {
+			stcb->asoc.num_send_timers_up = 0;
+		}
+		SCTP_TCB_LOCK_ASSERT(stcb);
+		error = sctp_t3rxt_timer(stcb->sctp_ep, stcb,
+		    stcb->asoc.deleted_primary);
+		if (error) {
+			SCTP_INP_DECR_REF(stcb->sctp_ep);
+			return;
+		}
+		SCTP_TCB_LOCK_ASSERT(stcb);
+#ifdef SCTP_AUDITING_ENABLED
+		sctp_auditing(4, stcb->sctp_ep, stcb->asoc.deleted_primary);
+#endif
+		sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED);
+		if ((stcb->asoc.num_send_timers_up == 0) &&
+		    (stcb->asoc.sent_queue_cnt > 0)) {
+			struct sctp_tmit_chunk *chk;
+
+			chk = TAILQ_FIRST(&stcb->asoc.sent_queue);
+			sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+			    stcb, chk->whoTo);
+		}
+	}
+	return;
+}
+
+static int
+    sctp_asconf_queue_mgmt(struct sctp_tcb *, struct sctp_ifa *, uint16_t);
+
+void
+sctp_net_immediate_retrans(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	struct sctp_tmit_chunk *chk;
+
+	SCTPDBG(SCTP_DEBUG_ASCONF1, "net_immediate_retrans: RTO is %d\n", net->RTO);
+	sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, net,
+	    SCTP_FROM_SCTP_TIMER + SCTP_LOC_5);
+	stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net);
+	net->error_count = 0;
+	TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
+		if (chk->whoTo == net) {
+			if (chk->sent < SCTP_DATAGRAM_RESEND) {
+				chk->sent = SCTP_DATAGRAM_RESEND;
+				sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+				sctp_flight_size_decrease(chk);
+				sctp_total_flight_decrease(stcb, chk);
+				net->marked_retrans++;
+				stcb->asoc.marked_retrans++;
+			}
+		}
+	}
+	if (net->marked_retrans) {
+		sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED);
+	}
+}
+
+static void
+sctp_path_check_and_react(struct sctp_tcb *stcb, struct sctp_ifa *newifa)
+{
+	struct sctp_nets *net;
+	int addrnum, changed;
+
+	/*
+	 * If number of local valid addresses is 1, the valid address is
+	 * probably newly added address.  Several valid addresses in this
+	 * association.  A source address may not be changed.  Additionally,
+	 * they can be configured on a same interface as "alias" addresses.
+	 * (by micchie)
+	 */
+	addrnum = sctp_local_addr_count(stcb);
+	SCTPDBG(SCTP_DEBUG_ASCONF1, "p_check_react(): %d local addresses\n",
+	    addrnum);
+	if (addrnum == 1) {
+		TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+			/* clear any cached route and source address */
+			if (net->ro.ro_rt) {
+				RTFREE(net->ro.ro_rt);
+				net->ro.ro_rt = NULL;
+			}
+			if (net->src_addr_selected) {
+				sctp_free_ifa(net->ro._s_addr);
+				net->ro._s_addr = NULL;
+				net->src_addr_selected = 0;
+			}
+			/* Retransmit unacknowledged DATA chunks immediately */
+			if (sctp_is_mobility_feature_on(stcb->sctp_ep,
+			    SCTP_MOBILITY_FASTHANDOFF)) {
+				sctp_net_immediate_retrans(stcb, net);
+			}
+			/* also, SET PRIMARY is maybe already sent */
+		}
+		return;
+	}
+	/* Multiple local addresses exsist in the association.  */
+	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+		/* clear any cached route and source address */
+		if (net->ro.ro_rt) {
+			RTFREE(net->ro.ro_rt);
+			net->ro.ro_rt = NULL;
+		}
+		if (net->src_addr_selected) {
+			sctp_free_ifa(net->ro._s_addr);
+			net->ro._s_addr = NULL;
+			net->src_addr_selected = 0;
+		}
+		/*
+		 * Check if the nexthop is corresponding to the new address.
+		 * If the new address is corresponding to the current
+		 * nexthop, the path will be changed.  If the new address is
+		 * NOT corresponding to the current nexthop, the path will
+		 * not be changed.
+		 */
+		SCTP_RTALLOC((sctp_route_t *) & net->ro,
+		    stcb->sctp_ep->def_vrf_id);
+		if (net->ro.ro_rt == NULL)
+			continue;
+
+		changed = 0;
+		if (net->ro._l_addr.sa.sa_family == AF_INET) {
+			if (sctp_v4src_match_nexthop(newifa, (sctp_route_t *) & net->ro))
+				changed = 1;
+		}
+		if (net->ro._l_addr.sa.sa_family == AF_INET6) {
+			if (sctp_v6src_match_nexthop(
+			    &newifa->address.sin6, (sctp_route_t *) & net->ro))
+				changed = 1;
+		}
+		/*
+		 * if the newly added address does not relate routing
+		 * information, we skip.
+		 */
+		if (changed == 0)
+			continue;
+		/* Retransmit unacknowledged DATA chunks immediately */
+		if (sctp_is_mobility_feature_on(stcb->sctp_ep,
+		    SCTP_MOBILITY_FASTHANDOFF)) {
+			sctp_net_immediate_retrans(stcb, net);
+		}
+		/* Send SET PRIMARY for this new address */
+		if (net == stcb->asoc.primary_destination) {
+			(void)sctp_asconf_queue_mgmt(stcb, newifa,
+			    SCTP_SET_PRIM_ADDR);
+		}
+	}
+}
+
+/*
+ * process an ADD/DELETE IP ack from peer.
+ * addr: corresponding sctp_ifa to the address being added/deleted.
+ * type: SCTP_ADD_IP_ADDRESS or SCTP_DEL_IP_ADDRESS.
+ * flag: 1=success, 0=failure.
+ */
+static void
+sctp_asconf_addr_mgmt_ack(struct sctp_tcb *stcb, struct sctp_ifa *addr,
+    uint16_t type, uint32_t flag)
+{
+	/*
+	 * do the necessary asoc list work- if we get a failure indication,
+	 * leave the address on the assoc's restricted list.  If we get a
+	 * success indication, remove the address from the restricted list.
+	 */
+	/*
+	 * Note: this will only occur for ADD_IP_ADDRESS, since
+	 * DEL_IP_ADDRESS is never actually added to the list...
+	 */
+	if (flag) {
+		/* success case, so remove from the restricted list */
+		sctp_del_local_addr_restricted(stcb, addr);
+
+		if (sctp_is_mobility_feature_on(stcb->sctp_ep,
+		    SCTP_MOBILITY_BASE) ||
+		    sctp_is_mobility_feature_on(stcb->sctp_ep,
+		    SCTP_MOBILITY_FASTHANDOFF)) {
+			sctp_path_check_and_react(stcb, addr);
+			return;
+		}
+		/* clear any cached/topologically incorrect source addresses */
+		sctp_asconf_nets_cleanup(stcb, addr->ifn_p);
+	}
+	/* else, leave it on the list */
+}
+
+/*
+ * add an asconf add/delete/set primary IP address parameter to the queue.
+ * type = SCTP_ADD_IP_ADDRESS, SCTP_DEL_IP_ADDRESS, SCTP_SET_PRIM_ADDR.
+ * returns 0 if queued, -1 if not queued/removed.
+ * NOTE: if adding, but a delete for the same address is already scheduled
+ * (and not yet sent out), simply remove it from queue.  Same for deleting
+ * an address already scheduled for add.  If a duplicate operation is found,
+ * ignore the new one.
+ */
+static int
+sctp_asconf_queue_mgmt(struct sctp_tcb *stcb, struct sctp_ifa *ifa,
+    uint16_t type)
+{
+	struct sctp_asconf_addr *aa, *aa_next;
+	struct sockaddr *sa;
+
+	/* make sure the request isn't already in the queue */
+	for (aa = TAILQ_FIRST(&stcb->asoc.asconf_queue); aa != NULL;
+	    aa = aa_next) {
+		aa_next = TAILQ_NEXT(aa, next);
+		/* address match? */
+		if (sctp_asconf_addr_match(aa, &ifa->address.sa) == 0)
+			continue;
+		/* is the request already in queue (sent or not) */
+		if (aa->ap.aph.ph.param_type == type) {
+			return (-1);
+		}
+		/* is the negative request already in queue, and not sent */
+		if ((aa->sent == 0) && (type == SCTP_ADD_IP_ADDRESS) &&
+		    (aa->ap.aph.ph.param_type == SCTP_DEL_IP_ADDRESS)) {
+			/* add requested, delete already queued */
+			TAILQ_REMOVE(&stcb->asoc.asconf_queue, aa, next);
+			/* remove the ifa from the restricted list */
+			sctp_del_local_addr_restricted(stcb, ifa);
+			/* free the asconf param */
+			SCTP_FREE(aa, SCTP_M_ASC_ADDR);
+			SCTPDBG(SCTP_DEBUG_ASCONF2, "asconf_queue_mgmt: add removes queued entry\n");
+			return (-1);
+		}
+		if ((aa->sent == 0) && (type == SCTP_DEL_IP_ADDRESS) &&
+		    (aa->ap.aph.ph.param_type == SCTP_ADD_IP_ADDRESS)) {
+			/* delete requested, add already queued */
+			TAILQ_REMOVE(&stcb->asoc.asconf_queue, aa, next);
+			/* remove the aa->ifa from the restricted list */
+			sctp_del_local_addr_restricted(stcb, aa->ifa);
+			/* free the asconf param */
+			SCTP_FREE(aa, SCTP_M_ASC_ADDR);
+			SCTPDBG(SCTP_DEBUG_ASCONF2, "asconf_queue_mgmt: delete removes queued entry\n");
+			return (-1);
+		}
+	}			/* for each aa */
+
+	/* adding new request to the queue */
+	SCTP_MALLOC(aa, struct sctp_asconf_addr *, sizeof(*aa),
+	    SCTP_M_ASC_ADDR);
+	if (aa == NULL) {
+		/* didn't get memory */
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "asconf_queue_mgmt: failed to get memory!\n");
+		return (-1);
+	}
+	/* fill in asconf address parameter fields */
+	/* top level elements are "networked" during send */
+	aa->ap.aph.ph.param_type = type;
+	aa->ifa = ifa;
+	atomic_add_int(&ifa->refcount, 1);
+	/* correlation_id filled in during send routine later... */
+	if (ifa->address.sa.sa_family == AF_INET6) {
+		/* IPv6 address */
+		struct sockaddr_in6 *sin6;
+
+		sin6 = (struct sockaddr_in6 *)&ifa->address.sa;
+		sa = (struct sockaddr *)sin6;
+		aa->ap.addrp.ph.param_type = SCTP_IPV6_ADDRESS;
+		aa->ap.addrp.ph.param_length = (sizeof(struct sctp_ipv6addr_param));
+		aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_paramhdr) +
+		    sizeof(struct sctp_ipv6addr_param);
+		memcpy(&aa->ap.addrp.addr, &sin6->sin6_addr,
+		    sizeof(struct in6_addr));
+	} else if (ifa->address.sa.sa_family == AF_INET) {
+		/* IPv4 address */
+		struct sockaddr_in *sin;
+
+		sin = (struct sockaddr_in *)&ifa->address.sa;
+		sa = (struct sockaddr *)sin;
+		aa->ap.addrp.ph.param_type = SCTP_IPV4_ADDRESS;
+		aa->ap.addrp.ph.param_length = (sizeof(struct sctp_ipv4addr_param));
+		aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_paramhdr) +
+		    sizeof(struct sctp_ipv4addr_param);
+		memcpy(&aa->ap.addrp.addr, &sin->sin_addr,
+		    sizeof(struct in_addr));
+	} else {
+		/* invalid family! */
+		SCTP_FREE(aa, SCTP_M_ASC_ADDR);
+		sctp_free_ifa(ifa);
+		return (-1);
+	}
+	aa->sent = 0;		/* clear sent flag */
+
+	/*
+	 * if we are deleting an address it should go out last otherwise,
+	 * add it to front of the pending queue
+	 */
+	if (type == SCTP_ADD_IP_ADDRESS) {
+		/* add goes to the front of the queue */
+		TAILQ_INSERT_HEAD(&stcb->asoc.asconf_queue, aa, next);
+		SCTPDBG(SCTP_DEBUG_ASCONF2,
+		    "asconf_queue_mgmt: inserted asconf ADD_IP_ADDRESS: ");
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, sa);
+	} else {
+		/* delete and set primary goes to the back of the queue */
+		TAILQ_INSERT_TAIL(&stcb->asoc.asconf_queue, aa, next);
+#ifdef SCTP_DEBUG
+		if (sctp_debug_on && SCTP_DEBUG_ASCONF2) {
+			if (type == SCTP_DEL_IP_ADDRESS) {
+				SCTP_PRINTF("asconf_queue_mgmt: appended asconf DEL_IP_ADDRESS: ");
+				SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, sa);
+			} else {
+				SCTP_PRINTF("asconf_queue_mgmt: appended asconf SET_PRIM_ADDR: ");
+				SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, sa);
+			}
+		}
+#endif
+	}
+
+	return (0);
+}
+
+
+/*
+ * add an asconf operation for the given ifa and type.
+ * type = SCTP_ADD_IP_ADDRESS, SCTP_DEL_IP_ADDRESS, SCTP_SET_PRIM_ADDR.
+ * returns 0 if completed, -1 if not completed, 1 if immediate send is
+ * advisable.
+ */
+static int
+sctp_asconf_queue_add(struct sctp_tcb *stcb, struct sctp_ifa *ifa,
+    uint16_t type)
+{
+	uint32_t status;
+	int pending_delete_queued = 0;
+
+	/* see if peer supports ASCONF */
+	if (stcb->asoc.peer_supports_asconf == 0) {
+		return (-1);
+	}
+	/*
+	 * if this is deleting the last address from the assoc, mark it as
+	 * pending.
+	 */
+	if ((type == SCTP_DEL_IP_ADDRESS) && !stcb->asoc.asconf_del_pending &&
+	    (sctp_local_addr_count(stcb) < 2)) {
+		/* set the pending delete info only */
+		stcb->asoc.asconf_del_pending = 1;
+		stcb->asoc.asconf_addr_del_pending = ifa;
+		atomic_add_int(&ifa->refcount, 1);
+		SCTPDBG(SCTP_DEBUG_ASCONF2,
+		    "asconf_queue_add: mark delete last address pending\n");
+		return (-1);
+	}
+	/*
+	 * if this is an add, and there is a delete also pending (i.e. the
+	 * last local address is being changed), queue the pending delete
+	 * too.
+	 */
+	if ((type == SCTP_ADD_IP_ADDRESS) && stcb->asoc.asconf_del_pending) {
+		/* queue in the pending delete */
+		if (sctp_asconf_queue_mgmt(stcb,
+		    stcb->asoc.asconf_addr_del_pending,
+		    SCTP_DEL_IP_ADDRESS) == 0) {
+			SCTPDBG(SCTP_DEBUG_ASCONF2, "asconf_queue_add: queing pending delete\n");
+			pending_delete_queued = 1;
+			/* clear out the pending delete info */
+			stcb->asoc.asconf_del_pending = 0;
+			sctp_free_ifa(stcb->asoc.asconf_addr_del_pending);
+			stcb->asoc.asconf_addr_del_pending = NULL;
+		}
+	}
+	/* queue an asconf parameter */
+	status = sctp_asconf_queue_mgmt(stcb, ifa, type);
+
+	if (pending_delete_queued && (status == 0)) {
+		struct sctp_nets *net;
+
+		/*
+		 * since we know that the only/last address is now being
+		 * changed in this case, reset the cwnd/rto on all nets to
+		 * start as a new address and path.  Also clear the error
+		 * counts to give the assoc the best chance to complete the
+		 * address change.
+		 */
+		TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+			stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb,
+			    net);
+			net->RTO = 0;
+			net->error_count = 0;
+		}
+		stcb->asoc.overall_error_count = 0;
+		if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+			sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+			    stcb->asoc.overall_error_count,
+			    0,
+			    SCTP_FROM_SCTP_ASCONF,
+			    __LINE__);
+		}
+		/* queue in an advisory set primary too */
+		(void)sctp_asconf_queue_mgmt(stcb, ifa, SCTP_SET_PRIM_ADDR);
+		/* let caller know we should send this out immediately */
+		status = 1;
+	}
+	return (status);
+}
+
+/*-
+ * add an asconf delete IP address parameter to the queue by sockaddr and
+ * possibly with no sctp_ifa available.  This is only called by the routine
+ * that checks the addresses in an INIT-ACK against the current address list.
+ * returns 0 if completed, non-zero if not completed.
+ * NOTE: if an add is already scheduled (and not yet sent out), simply
+ * remove it from queue.  If a duplicate operation is found, ignore the
+ * new one.
+ */
+static int
+sctp_asconf_queue_sa_delete(struct sctp_tcb *stcb, struct sockaddr *sa)
+{
+	struct sctp_ifa *ifa;
+	struct sctp_asconf_addr *aa, *aa_next;
+	uint32_t vrf_id;
+
+	if (stcb == NULL) {
+		return (-1);
+	}
+	/* see if peer supports ASCONF */
+	if (stcb->asoc.peer_supports_asconf == 0) {
+		return (-1);
+	}
+	/* make sure the request isn't already in the queue */
+	for (aa = TAILQ_FIRST(&stcb->asoc.asconf_queue); aa != NULL;
+	    aa = aa_next) {
+		aa_next = TAILQ_NEXT(aa, next);
+		/* address match? */
+		if (sctp_asconf_addr_match(aa, sa) == 0)
+			continue;
+		/* is the request already in queue (sent or not) */
+		if (aa->ap.aph.ph.param_type == SCTP_DEL_IP_ADDRESS) {
+			return (-1);
+		}
+		/* is the negative request already in queue, and not sent */
+		if (aa->sent == 1)
+			continue;
+		if (aa->ap.aph.ph.param_type == SCTP_ADD_IP_ADDRESS) {
+			/* add already queued, so remove existing entry */
+			TAILQ_REMOVE(&stcb->asoc.asconf_queue, aa, next);
+			sctp_del_local_addr_restricted(stcb, aa->ifa);
+			/* free the entry */
+			SCTP_FREE(aa, SCTP_M_ASC_ADDR);
+			return (-1);
+		}
+	}			/* for each aa */
+
+	/* find any existing ifa-- NOTE ifa CAN be allowed to be NULL */
+	if (stcb) {
+		vrf_id = stcb->asoc.vrf_id;
+	} else {
+		vrf_id = SCTP_DEFAULT_VRFID;
+	}
+	ifa = sctp_find_ifa_by_addr(sa, vrf_id, SCTP_ADDR_NOT_LOCKED);
+
+	/* adding new request to the queue */
+	SCTP_MALLOC(aa, struct sctp_asconf_addr *, sizeof(*aa),
+	    SCTP_M_ASC_ADDR);
+	if (aa == NULL) {
+		/* didn't get memory */
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "sctp_asconf_queue_sa_delete: failed to get memory!\n");
+		return (-1);
+	}
+	/* fill in asconf address parameter fields */
+	/* top level elements are "networked" during send */
+	aa->ap.aph.ph.param_type = SCTP_DEL_IP_ADDRESS;
+	aa->ifa = ifa;
+	if (ifa)
+		atomic_add_int(&ifa->refcount, 1);
+	/* correlation_id filled in during send routine later... */
+	if (sa->sa_family == AF_INET6) {
+		/* IPv6 address */
+		struct sockaddr_in6 *sin6;
+
+		sin6 = (struct sockaddr_in6 *)sa;
+		aa->ap.addrp.ph.param_type = SCTP_IPV6_ADDRESS;
+		aa->ap.addrp.ph.param_length = (sizeof(struct sctp_ipv6addr_param));
+		aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_paramhdr) + sizeof(struct sctp_ipv6addr_param);
+		memcpy(&aa->ap.addrp.addr, &sin6->sin6_addr,
+		    sizeof(struct in6_addr));
+	} else if (sa->sa_family == AF_INET) {
+		/* IPv4 address */
+		struct sockaddr_in *sin = (struct sockaddr_in *)sa;
+
+		aa->ap.addrp.ph.param_type = SCTP_IPV4_ADDRESS;
+		aa->ap.addrp.ph.param_length = (sizeof(struct sctp_ipv4addr_param));
+		aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_paramhdr) + sizeof(struct sctp_ipv4addr_param);
+		memcpy(&aa->ap.addrp.addr, &sin->sin_addr,
+		    sizeof(struct in_addr));
+	} else {
+		/* invalid family! */
+		SCTP_FREE(aa, SCTP_M_ASC_ADDR);
+		if (ifa)
+			sctp_free_ifa(ifa);
+		return (-1);
+	}
+	aa->sent = 0;		/* clear sent flag */
+
+	/* delete goes to the back of the queue */
+	TAILQ_INSERT_TAIL(&stcb->asoc.asconf_queue, aa, next);
+
+	/* sa_ignore MEMLEAK {memory is put on the tailq} */
+	return (0);
+}
+
+/*
+ * find a specific asconf param on our "sent" queue
+ */
+static struct sctp_asconf_addr *
+sctp_asconf_find_param(struct sctp_tcb *stcb, uint32_t correlation_id)
+{
+	struct sctp_asconf_addr *aa;
+
+	TAILQ_FOREACH(aa, &stcb->asoc.asconf_queue, next) {
+		if (aa->ap.aph.correlation_id == correlation_id &&
+		    aa->sent == 1) {
+			/* found it */
+			return (aa);
+		}
+	}
+	/* didn't find it */
+	return (NULL);
+}
+
+/*
+ * process an SCTP_ERROR_CAUSE_IND for a ASCONF-ACK parameter and do
+ * notifications based on the error response
+ */
+static void
+sctp_asconf_process_error(struct sctp_tcb *stcb,
+    struct sctp_asconf_paramhdr *aph)
+{
+	struct sctp_error_cause *eh;
+	struct sctp_paramhdr *ph;
+	uint16_t param_type;
+	uint16_t error_code;
+
+	eh = (struct sctp_error_cause *)(aph + 1);
+	ph = (struct sctp_paramhdr *)(eh + 1);
+	/* validate lengths */
+	if (htons(eh->length) + sizeof(struct sctp_error_cause) >
+	    htons(aph->ph.param_length)) {
+		/* invalid error cause length */
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "asconf_process_error: cause element too long\n");
+		return;
+	}
+	if (htons(ph->param_length) + sizeof(struct sctp_paramhdr) >
+	    htons(eh->length)) {
+		/* invalid included TLV length */
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "asconf_process_error: included TLV too long\n");
+		return;
+	}
+	/* which error code ? */
+	error_code = ntohs(eh->code);
+	param_type = ntohs(aph->ph.param_type);
+	/* FIX: this should go back up the REMOTE_ERROR ULP notify */
+	switch (error_code) {
+	case SCTP_CAUSE_RESOURCE_SHORTAGE:
+		/* we allow ourselves to "try again" for this error */
+		break;
+	default:
+		/* peer can't handle it... */
+		switch (param_type) {
+		case SCTP_ADD_IP_ADDRESS:
+		case SCTP_DEL_IP_ADDRESS:
+			stcb->asoc.peer_supports_asconf = 0;
+			break;
+		case SCTP_SET_PRIM_ADDR:
+			stcb->asoc.peer_supports_asconf = 0;
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+/*
+ * process an asconf queue param.
+ * aparam: parameter to process, will be removed from the queue.
+ * flag: 1=success case, 0=failure case
+ */
+static void
+sctp_asconf_process_param_ack(struct sctp_tcb *stcb,
+    struct sctp_asconf_addr *aparam, uint32_t flag)
+{
+	uint16_t param_type;
+
+	/* process this param */
+	param_type = aparam->ap.aph.ph.param_type;
+	switch (param_type) {
+	case SCTP_ADD_IP_ADDRESS:
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "process_param_ack: added IP address\n");
+		sctp_asconf_addr_mgmt_ack(stcb, aparam->ifa, param_type, flag);
+		break;
+	case SCTP_DEL_IP_ADDRESS:
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "process_param_ack: deleted IP address\n");
+		/* nothing really to do... lists already updated */
+		break;
+	case SCTP_SET_PRIM_ADDR:
+		/* nothing to do... peer may start using this addr */
+		if (flag == 0)
+			stcb->asoc.peer_supports_asconf = 0;
+		break;
+	default:
+		/* should NEVER happen */
+		break;
+	}
+
+	/* remove the param and free it */
+	TAILQ_REMOVE(&stcb->asoc.asconf_queue, aparam, next);
+	if (aparam->ifa)
+		sctp_free_ifa(aparam->ifa);
+	SCTP_FREE(aparam, SCTP_M_ASC_ADDR);
+}
+
+/*
+ * cleanup from a bad asconf ack parameter
+ */
+static void
+sctp_asconf_ack_clear(struct sctp_tcb *stcb)
+{
+	/* assume peer doesn't really know how to do asconfs */
+	stcb->asoc.peer_supports_asconf = 0;
+	/* XXX we could free the pending queue here */
+}
+
+void
+sctp_handle_asconf_ack(struct mbuf *m, int offset,
+    struct sctp_asconf_ack_chunk *cp, struct sctp_tcb *stcb,
+    struct sctp_nets *net, int *abort_no_unlock)
+{
+	struct sctp_association *asoc;
+	uint32_t serial_num;
+	uint16_t ack_length;
+	struct sctp_asconf_paramhdr *aph;
+	struct sctp_asconf_addr *aa, *aa_next;
+	uint32_t last_error_id = 0;	/* last error correlation id */
+	uint32_t id;
+	struct sctp_asconf_addr *ap;
+
+	/* asconf param buffer */
+	uint8_t aparam_buf[SCTP_PARAM_BUFFER_SIZE];
+
+	/* verify minimum length */
+	if (ntohs(cp->ch.chunk_length) < sizeof(struct sctp_asconf_ack_chunk)) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "handle_asconf_ack: chunk too small = %xh\n",
+		    ntohs(cp->ch.chunk_length));
+		return;
+	}
+	asoc = &stcb->asoc;
+	serial_num = ntohl(cp->serial_number);
+
+	/*
+	 * NOTE: we may want to handle this differently- currently, we will
+	 * abort when we get an ack for the expected serial number + 1 (eg.
+	 * we didn't send it), process an ack normally if it is the expected
+	 * serial number, and re-send the previous ack for *ALL* other
+	 * serial numbers
+	 */
+
+	/*
+	 * if the serial number is the next expected, but I didn't send it,
+	 * abort the asoc, since someone probably just hijacked us...
+	 */
+	if (serial_num == (asoc->asconf_seq_out + 1)) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf_ack: got unexpected next serial number! Aborting asoc!\n");
+		sctp_abort_an_association(stcb->sctp_ep, stcb,
+		    SCTP_CAUSE_ILLEGAL_ASCONF_ACK, NULL, SCTP_SO_NOT_LOCKED);
+		*abort_no_unlock = 1;
+		return;
+	}
+	if (serial_num != asoc->asconf_seq_out) {
+		/* got a duplicate/unexpected ASCONF-ACK */
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf_ack: got duplicate/unexpected serial number = %xh (expected = %xh)\n",
+		    serial_num, asoc->asconf_seq_out);
+		return;
+	}
+	if (stcb->asoc.asconf_sent == 0) {
+		/* got a unexpected ASCONF-ACK for serial not in flight */
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf_ack: got serial number = %xh but not in flight\n",
+		    serial_num);
+		/* nothing to do... duplicate ACK received */
+		return;
+	}
+	/* stop our timer */
+	sctp_timer_stop(SCTP_TIMER_TYPE_ASCONF, stcb->sctp_ep, stcb, net,
+	    SCTP_FROM_SCTP_ASCONF + SCTP_LOC_3);
+
+	/* process the ASCONF-ACK contents */
+	ack_length = ntohs(cp->ch.chunk_length) -
+	    sizeof(struct sctp_asconf_ack_chunk);
+	offset += sizeof(struct sctp_asconf_ack_chunk);
+	/* process through all parameters */
+	while (ack_length >= sizeof(struct sctp_asconf_paramhdr)) {
+		unsigned int param_length, param_type;
+
+		/* get pointer to next asconf parameter */
+		aph = (struct sctp_asconf_paramhdr *)sctp_m_getptr(m, offset,
+		    sizeof(struct sctp_asconf_paramhdr), aparam_buf);
+		if (aph == NULL) {
+			/* can't get an asconf paramhdr */
+			sctp_asconf_ack_clear(stcb);
+			return;
+		}
+		param_type = ntohs(aph->ph.param_type);
+		param_length = ntohs(aph->ph.param_length);
+		if (param_length > ack_length) {
+			sctp_asconf_ack_clear(stcb);
+			return;
+		}
+		if (param_length < sizeof(struct sctp_paramhdr)) {
+			sctp_asconf_ack_clear(stcb);
+			return;
+		}
+		/* get the complete parameter... */
+		if (param_length > sizeof(aparam_buf)) {
+			SCTPDBG(SCTP_DEBUG_ASCONF1,
+			    "param length (%u) larger than buffer size!\n", param_length);
+			sctp_asconf_ack_clear(stcb);
+			return;
+		}
+		aph = (struct sctp_asconf_paramhdr *)sctp_m_getptr(m, offset, param_length, aparam_buf);
+		if (aph == NULL) {
+			sctp_asconf_ack_clear(stcb);
+			return;
+		}
+		/* correlation_id is transparent to peer, no ntohl needed */
+		id = aph->correlation_id;
+
+		switch (param_type) {
+		case SCTP_ERROR_CAUSE_IND:
+			last_error_id = id;
+			/* find the corresponding asconf param in our queue */
+			ap = sctp_asconf_find_param(stcb, id);
+			if (ap == NULL) {
+				/* hmm... can't find this in our queue! */
+				break;
+			}
+			/* process the parameter, failed flag */
+			sctp_asconf_process_param_ack(stcb, ap, 0);
+			/* process the error response */
+			sctp_asconf_process_error(stcb, aph);
+			break;
+		case SCTP_SUCCESS_REPORT:
+			/* find the corresponding asconf param in our queue */
+			ap = sctp_asconf_find_param(stcb, id);
+			if (ap == NULL) {
+				/* hmm... can't find this in our queue! */
+				break;
+			}
+			/* process the parameter, success flag */
+			sctp_asconf_process_param_ack(stcb, ap, 1);
+			break;
+		default:
+			break;
+		}		/* switch */
+
+		/* update remaining ASCONF-ACK message length to process */
+		ack_length -= SCTP_SIZE32(param_length);
+		if (ack_length <= 0) {
+			/* no more data in the mbuf chain */
+			break;
+		}
+		offset += SCTP_SIZE32(param_length);
+	}			/* while */
+
+	/*
+	 * if there are any "sent" params still on the queue, these are
+	 * implicitly "success", or "failed" (if we got an error back) ...
+	 * so process these appropriately
+	 * 
+	 * we assume that the correlation_id's are monotonically increasing
+	 * beginning from 1 and that we don't have *that* many outstanding
+	 * at any given time
+	 */
+	if (last_error_id == 0)
+		last_error_id--;/* set to "max" value */
+	for (aa = TAILQ_FIRST(&stcb->asoc.asconf_queue); aa != NULL;
+	    aa = aa_next) {
+		aa_next = TAILQ_NEXT(aa, next);
+		if (aa->sent == 1) {
+			/*
+			 * implicitly successful or failed if correlation_id
+			 * < last_error_id, then success else, failure
+			 */
+			if (aa->ap.aph.correlation_id < last_error_id)
+				sctp_asconf_process_param_ack(stcb, aa, 1);
+			else
+				sctp_asconf_process_param_ack(stcb, aa, 0);
+		} else {
+			/*
+			 * since we always process in order (FIFO queue) if
+			 * we reach one that hasn't been sent, the rest
+			 * should not have been sent either. so, we're
+			 * done...
+			 */
+			break;
+		}
+	}
+
+	/* update the next sequence number to use */
+	asoc->asconf_seq_out++;
+	/* remove the old ASCONF on our outbound queue */
+	sctp_toss_old_asconf(stcb);
+	/* clear the sent flag to allow new ASCONFs */
+	asoc->asconf_sent = 0;
+	if (!TAILQ_EMPTY(&stcb->asoc.asconf_queue)) {
+#ifdef SCTP_TIMER_BASED_ASCONF
+		/* we have more params, so restart our timer */
+		sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, stcb->sctp_ep,
+		    stcb, net);
+#else
+		/* we have more params, so send out more */
+		sctp_send_asconf(stcb, net, SCTP_ADDR_NOT_LOCKED);
+#endif
+	}
+}
+
+static uint32_t
+sctp_is_scopeid_in_nets(struct sctp_tcb *stcb, struct sockaddr *sa)
+{
+	struct sockaddr_in6 *sin6, *net6;
+	struct sctp_nets *net;
+
+	if (sa->sa_family != AF_INET6) {
+		/* wrong family */
+		return (0);
+	}
+	sin6 = (struct sockaddr_in6 *)sa;
+	if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) == 0) {
+		/* not link local address */
+		return (0);
+	}
+	/* hunt through our destination nets list for this scope_id */
+	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+		if (((struct sockaddr *)(&net->ro._l_addr))->sa_family !=
+		    AF_INET6)
+			continue;
+		net6 = (struct sockaddr_in6 *)&net->ro._l_addr;
+		if (IN6_IS_ADDR_LINKLOCAL(&net6->sin6_addr) == 0)
+			continue;
+		if (sctp_is_same_scope(sin6, net6)) {
+			/* found one */
+			return (1);
+		}
+	}
+	/* didn't find one */
+	return (0);
+}
+
+/*
+ * address management functions
+ */
+static void
+sctp_addr_mgmt_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_ifa *ifa, uint16_t type, int addr_locked)
+{
+	int status;
+
+
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0 &&
+	    sctp_is_feature_off(inp, SCTP_PCB_FLAGS_DO_ASCONF)) {
+		/* subset bound, no ASCONF allowed case, so ignore */
+		return;
+	}
+	/*
+	 * note: we know this is not the subset bound, no ASCONF case eg.
+	 * this is boundall or subset bound w/ASCONF allowed
+	 */
+
+	/* first, make sure it's a good address family */
+	if (ifa->address.sa.sa_family != AF_INET6 &&
+	    ifa->address.sa.sa_family != AF_INET) {
+		return;
+	}
+	/* make sure we're "allowed" to add this type of addr */
+	if (ifa->address.sa.sa_family == AF_INET6) {
+		/* invalid if we're not a v6 endpoint */
+		if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0)
+			return;
+		/* is the v6 addr really valid ? */
+		if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
+			return;
+		}
+	}
+	/* put this address on the "pending/do not use yet" list */
+	sctp_add_local_addr_restricted(stcb, ifa);
+	/*
+	 * check address scope if address is out of scope, don't queue
+	 * anything... note: this would leave the address on both inp and
+	 * asoc lists
+	 */
+	if (ifa->address.sa.sa_family == AF_INET6) {
+		struct sockaddr_in6 *sin6;
+
+		sin6 = (struct sockaddr_in6 *)&ifa->address.sin6;
+		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+			/* we skip unspecifed addresses */
+			return;
+		}
+		if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+			if (stcb->asoc.local_scope == 0) {
+				return;
+			}
+			/* is it the right link local scope? */
+			if (sctp_is_scopeid_in_nets(stcb, &ifa->address.sa) == 0) {
+				return;
+			}
+		}
+		if (stcb->asoc.site_scope == 0 &&
+		    IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) {
+			return;
+		}
+	} else if (ifa->address.sa.sa_family == AF_INET) {
+		struct sockaddr_in *sin;
+		struct in6pcb *inp6;
+
+		inp6 = (struct in6pcb *)&inp->ip_inp.inp;
+		/* invalid if we are a v6 only endpoint */
+		if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+		    SCTP_IPV6_V6ONLY(inp6))
+			return;
+
+		sin = (struct sockaddr_in *)&ifa->address.sa;
+		if (sin->sin_addr.s_addr == 0) {
+			/* we skip unspecifed addresses */
+			return;
+		}
+		if (stcb->asoc.ipv4_local_scope == 0 &&
+		    IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
+			return;
+		}
+	} else {
+		/* else, not AF_INET or AF_INET6, so skip */
+		return;
+	}
+
+	/* queue an asconf for this address add/delete */
+	if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF)) {
+		/* does the peer do asconf? */
+		if (stcb->asoc.peer_supports_asconf) {
+			/* queue an asconf for this addr */
+			status = sctp_asconf_queue_add(stcb, ifa, type);
+
+			/*
+			 * if queued ok, and in the open state, send out the
+			 * ASCONF.  If in the non-open state, these will be
+			 * sent when the state goes open.
+			 */
+			if (status == 0 &&
+			    SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+#ifdef SCTP_TIMER_BASED_ASCONF
+				sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp,
+				    stcb, stcb->asoc.primary_destination);
+#else
+				sctp_send_asconf(stcb, stcb->asoc.primary_destination,
+				    addr_locked);
+#endif
+			}
+		}
+	}
+}
+
+
+int
+sctp_asconf_iterator_ep(struct sctp_inpcb *inp, void *ptr, uint32_t val)
+{
+	struct sctp_asconf_iterator *asc;
+	struct sctp_ifa *ifa;
+	struct sctp_laddr *l;
+	int type;
+	int cnt_invalid = 0;
+
+	asc = (struct sctp_asconf_iterator *)ptr;
+	LIST_FOREACH(l, &asc->list_of_work, sctp_nxt_addr) {
+		ifa = l->ifa;
+		type = l->action;
+		if (ifa->address.sa.sa_family == AF_INET6) {
+			/* invalid if we're not a v6 endpoint */
+			if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) {
+				cnt_invalid++;
+				if (asc->cnt == cnt_invalid)
+					return (1);
+				else
+					continue;
+			}
+		} else if (ifa->address.sa.sa_family == AF_INET) {
+			/* invalid if we are a v6 only endpoint */
+			struct in6pcb *inp6;
+
+			inp6 = (struct in6pcb *)&inp->ip_inp.inp;
+			if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+			    SCTP_IPV6_V6ONLY(inp6)) {
+				cnt_invalid++;
+				if (asc->cnt == cnt_invalid)
+					return (1);
+				else
+					continue;
+			}
+		} else {
+			/* invalid address family */
+			cnt_invalid++;
+			if (asc->cnt == cnt_invalid)
+				return (1);
+			else
+				continue;
+		}
+	}
+	return (0);
+}
+
+static int
+sctp_asconf_iterator_ep_end(struct sctp_inpcb *inp, void *ptr, uint32_t val)
+{
+	struct sctp_ifa *ifa;
+	struct sctp_asconf_iterator *asc;
+	struct sctp_laddr *laddr, *nladdr, *l;
+
+	/* Only for specific case not bound all */
+	asc = (struct sctp_asconf_iterator *)ptr;
+	LIST_FOREACH(l, &asc->list_of_work, sctp_nxt_addr) {
+		ifa = l->ifa;
+		if (l->action == SCTP_ADD_IP_ADDRESS) {
+			LIST_FOREACH(laddr, &inp->sctp_addr_list,
+			    sctp_nxt_addr) {
+				if (laddr->ifa == ifa) {
+					laddr->action = 0;
+					break;
+				}
+			}
+		} else if (l->action == SCTP_DEL_IP_ADDRESS) {
+			laddr = LIST_FIRST(&inp->sctp_addr_list);
+			while (laddr) {
+				nladdr = LIST_NEXT(laddr, sctp_nxt_addr);
+				/* remove only after all guys are done */
+				if (laddr->ifa == ifa) {
+					sctp_del_local_addr_ep(inp, ifa);
+				}
+				laddr = nladdr;
+			}
+		}
+	}
+	return (0);
+}
+
+void
+sctp_asconf_iterator_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    void *ptr, uint32_t val)
+{
+	struct sctp_asconf_iterator *asc;
+	struct sctp_ifa *ifa;
+	struct sctp_laddr *l;
+	int cnt_invalid = 0;
+	int type, status;
+	int num_queued = 0;
+
+	asc = (struct sctp_asconf_iterator *)ptr;
+	LIST_FOREACH(l, &asc->list_of_work, sctp_nxt_addr) {
+		ifa = l->ifa;
+		type = l->action;
+
+		/* address's vrf_id must be the vrf_id of the assoc */
+		if (ifa->vrf_id != stcb->asoc.vrf_id) {
+			continue;
+		}
+		/* Same checks again for assoc */
+		if (ifa->address.sa.sa_family == AF_INET6) {
+			/* invalid if we're not a v6 endpoint */
+			struct sockaddr_in6 *sin6;
+
+			if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) {
+				cnt_invalid++;
+				if (asc->cnt == cnt_invalid)
+					return;
+				else
+					continue;
+			}
+			sin6 = (struct sockaddr_in6 *)&ifa->address.sin6;
+			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+				/* we skip unspecifed addresses */
+				continue;
+			}
+			if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+				if (stcb->asoc.local_scope == 0) {
+					continue;
+				}
+				/* is it the right link local scope? */
+				if (sctp_is_scopeid_in_nets(stcb, &ifa->address.sa) == 0) {
+					continue;
+				}
+			}
+		} else if (ifa->address.sa.sa_family == AF_INET) {
+			/* invalid if we are a v6 only endpoint */
+			struct in6pcb *inp6;
+			struct sockaddr_in *sin;
+
+			inp6 = (struct in6pcb *)&inp->ip_inp.inp;
+			/* invalid if we are a v6 only endpoint */
+			if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+			    SCTP_IPV6_V6ONLY(inp6))
+				continue;
+
+			sin = (struct sockaddr_in *)&ifa->address.sa;
+			if (sin->sin_addr.s_addr == 0) {
+				/* we skip unspecifed addresses */
+				continue;
+			}
+			if (stcb->asoc.ipv4_local_scope == 0 &&
+			    IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
+				continue;;
+			}
+			if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+			    SCTP_IPV6_V6ONLY(inp6)) {
+				cnt_invalid++;
+				if (asc->cnt == cnt_invalid)
+					return;
+				else
+					continue;
+			}
+		} else {
+			/* invalid address family */
+			cnt_invalid++;
+			if (asc->cnt == cnt_invalid)
+				return;
+			else
+				continue;
+		}
+
+		if (type == SCTP_ADD_IP_ADDRESS) {
+			/* prevent this address from being used as a source */
+			sctp_add_local_addr_restricted(stcb, ifa);
+		} else if (type == SCTP_DEL_IP_ADDRESS) {
+			struct sctp_nets *net;
+
+			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+				sctp_rtentry_t *rt;
+
+				/* delete this address if cached */
+				if (net->ro._s_addr == ifa) {
+					sctp_free_ifa(net->ro._s_addr);
+					net->ro._s_addr = NULL;
+					net->src_addr_selected = 0;
+					rt = net->ro.ro_rt;
+					if (rt) {
+						RTFREE(rt);
+						net->ro.ro_rt = NULL;
+					}
+					/*
+					 * Now we deleted our src address,
+					 * should we not also now reset the
+					 * cwnd/rto to start as if its a new
+					 * address?
+					 */
+					stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net);
+					net->RTO = 0;
+
+				}
+			}
+		} else if (type == SCTP_SET_PRIM_ADDR) {
+			if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) {
+				/* must validate the ifa is in the ep */
+				if (sctp_is_addr_in_ep(stcb->sctp_ep, ifa) == 0) {
+					continue;
+				}
+			} else {
+				/* Need to check scopes for this guy */
+				if (sctp_is_address_in_scope(ifa,
+				    stcb->asoc.ipv4_addr_legal,
+				    stcb->asoc.ipv6_addr_legal,
+				    stcb->asoc.loopback_scope,
+				    stcb->asoc.ipv4_local_scope,
+				    stcb->asoc.local_scope,
+				    stcb->asoc.site_scope, 0) == 0) {
+					continue;
+				}
+			}
+		}
+		/* queue an asconf for this address add/delete */
+		if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF) &&
+		    stcb->asoc.peer_supports_asconf) {
+			/* queue an asconf for this addr */
+			status = sctp_asconf_queue_add(stcb, ifa, type);
+			/*
+			 * if queued ok, and in the open state, update the
+			 * count of queued params.  If in the non-open
+			 * state, these get sent when the assoc goes open.
+			 */
+			if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+				if (status >= 0) {
+					num_queued++;
+				}
+			}
+		}
+	}
+	/*
+	 * If we have queued params in the open state, send out an ASCONF.
+	 */
+	if (num_queued > 0) {
+		sctp_send_asconf(stcb, stcb->asoc.primary_destination,
+		    SCTP_ADDR_NOT_LOCKED);
+	}
+}
+
+void
+sctp_asconf_iterator_end(void *ptr, uint32_t val)
+{
+	struct sctp_asconf_iterator *asc;
+	struct sctp_ifa *ifa;
+	struct sctp_laddr *l, *l_next;
+
+	asc = (struct sctp_asconf_iterator *)ptr;
+	l = LIST_FIRST(&asc->list_of_work);
+	while (l != NULL) {
+		l_next = LIST_NEXT(l, sctp_nxt_addr);
+		ifa = l->ifa;
+		if (l->action == SCTP_ADD_IP_ADDRESS) {
+			/* Clear the defer use flag */
+			ifa->localifa_flags &= ~SCTP_ADDR_DEFER_USE;
+		}
+		sctp_free_ifa(ifa);
+		SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_laddr, l);
+		SCTP_DECR_LADDR_COUNT();
+		l = l_next;
+	}
+	SCTP_FREE(asc, SCTP_M_ASC_IT);
+}
+
+/*
+ * sa is the sockaddr to ask the peer to set primary to.
+ * returns: 0 = completed, -1 = error
+ */
+int32_t
+sctp_set_primary_ip_address_sa(struct sctp_tcb *stcb, struct sockaddr *sa)
+{
+	uint32_t vrf_id;
+	struct sctp_ifa *ifa;
+
+	/* find the ifa for the desired set primary */
+	vrf_id = stcb->asoc.vrf_id;
+	ifa = sctp_find_ifa_by_addr(sa, vrf_id, SCTP_ADDR_NOT_LOCKED);
+	if (ifa == NULL) {
+		/* Invalid address */
+		return (-1);
+	}
+	/* queue an ASCONF:SET_PRIM_ADDR to be sent */
+	if (!sctp_asconf_queue_add(stcb, ifa, SCTP_SET_PRIM_ADDR)) {
+		/* set primary queuing succeeded */
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "set_primary_ip_address_sa: queued on tcb=%p, ",
+		    stcb);
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+		if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+#ifdef SCTP_TIMER_BASED_ASCONF
+			sctp_timer_start(SCTP_TIMER_TYPE_ASCONF,
+			    stcb->sctp_ep, stcb,
+			    stcb->asoc.primary_destination);
+#else
+			sctp_send_asconf(stcb, stcb->asoc.primary_destination,
+			    SCTP_ADDR_NOT_LOCKED);
+#endif
+		}
+	} else {
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "set_primary_ip_address_sa: failed to add to queue on tcb=%p, ",
+		    stcb);
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+		return (-1);
+	}
+	return (0);
+}
+
+void
+sctp_set_primary_ip_address(struct sctp_ifa *ifa)
+{
+	struct sctp_inpcb *inp;
+
+	/* go through all our PCB's */
+	LIST_FOREACH(inp, &sctppcbinfo.listhead, sctp_list) {
+		struct sctp_tcb *stcb;
+
+		/* process for all associations for this endpoint */
+		LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+			/* queue an ASCONF:SET_PRIM_ADDR to be sent */
+			if (!sctp_asconf_queue_add(stcb, ifa,
+			    SCTP_SET_PRIM_ADDR)) {
+				/* set primary queuing succeeded */
+				SCTPDBG(SCTP_DEBUG_ASCONF1, "set_primary_ip_address: queued on stcb=%p, ",
+				    stcb);
+				SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, &ifa->address.sa);
+				if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+#ifdef SCTP_TIMER_BASED_ASCONF
+					sctp_timer_start(SCTP_TIMER_TYPE_ASCONF,
+					    stcb->sctp_ep, stcb,
+					    stcb->asoc.primary_destination);
+#else
+					sctp_send_asconf(stcb, stcb->asoc.primary_destination,
+					    SCTP_ADDR_NOT_LOCKED);
+#endif
+				}
+			}
+		}		/* for each stcb */
+	}			/* for each inp */
+}
+
+static struct sockaddr *
+sctp_find_valid_localaddr(struct sctp_tcb *stcb, int addr_locked)
+{
+	struct sctp_vrf *vrf = NULL;
+	struct sctp_ifn *sctp_ifn;
+	struct sctp_ifa *sctp_ifa;
+
+	if (addr_locked == SCTP_ADDR_NOT_LOCKED)
+		SCTP_IPI_ADDR_RLOCK();
+	vrf = sctp_find_vrf(stcb->asoc.vrf_id);
+	if (vrf == NULL) {
+		if (addr_locked == SCTP_ADDR_NOT_LOCKED)
+			SCTP_IPI_ADDR_RUNLOCK();
+		return (NULL);
+	}
+	LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+		if (stcb->asoc.loopback_scope == 0 &&
+		    SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
+			/* Skip if loopback_scope not set */
+			continue;
+		}
+		LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+			if (sctp_ifa->address.sa.sa_family == AF_INET &&
+			    stcb->asoc.ipv4_addr_legal) {
+				struct sockaddr_in *sin;
+
+				sin = (struct sockaddr_in *)&sctp_ifa->address.sa;
+				if (sin->sin_addr.s_addr == 0) {
+					/* skip unspecifed addresses */
+					continue;
+				}
+				if (stcb->asoc.ipv4_local_scope == 0 &&
+				    IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))
+					continue;
+
+				if (sctp_is_addr_restricted(stcb, sctp_ifa))
+					continue;
+				/* found a valid local v4 address to use */
+				if (addr_locked == SCTP_ADDR_NOT_LOCKED)
+					SCTP_IPI_ADDR_RUNLOCK();
+				return (&sctp_ifa->address.sa);
+			} else if (sctp_ifa->address.sa.sa_family == AF_INET6 &&
+			    stcb->asoc.ipv6_addr_legal) {
+				struct sockaddr_in6 *sin6;
+
+				if (sctp_ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
+					continue;
+				}
+				sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sa;
+				if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+					/* we skip unspecifed addresses */
+					continue;
+				}
+				if (stcb->asoc.local_scope == 0 &&
+				    IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))
+					continue;
+				if (stcb->asoc.site_scope == 0 &&
+				    IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))
+					continue;
+
+				/* found a valid local v6 address to use */
+				if (addr_locked == SCTP_ADDR_NOT_LOCKED)
+					SCTP_IPI_ADDR_RUNLOCK();
+				return (&sctp_ifa->address.sa);
+			}
+		}
+	}
+	/* no valid addresses found */
+	if (addr_locked == SCTP_ADDR_NOT_LOCKED)
+		SCTP_IPI_ADDR_RUNLOCK();
+	return (NULL);
+}
+
+static struct sockaddr *
+sctp_find_valid_localaddr_ep(struct sctp_tcb *stcb)
+{
+	struct sctp_laddr *laddr;
+
+	LIST_FOREACH(laddr, &stcb->sctp_ep->sctp_addr_list, sctp_nxt_addr) {
+		if (laddr->ifa == NULL) {
+			continue;
+		}
+		/* is the address restricted ? */
+		if (sctp_is_addr_restricted(stcb, laddr->ifa))
+			continue;
+
+		/* found a valid local address to use */
+		return (&laddr->ifa->address.sa);
+	}
+	/* no valid addresses found */
+	return (NULL);
+}
+
+/*
+ * builds an ASCONF chunk from queued ASCONF params.
+ * returns NULL on error (no mbuf, no ASCONF params queued, etc).
+ */
+struct mbuf *
+sctp_compose_asconf(struct sctp_tcb *stcb, int *retlen, int addr_locked)
+{
+	struct mbuf *m_asconf, *m_asconf_chk;
+	struct sctp_asconf_addr *aa;
+	struct sctp_asconf_chunk *acp;
+	struct sctp_asconf_paramhdr *aph;
+	struct sctp_asconf_addr_param *aap;
+	uint32_t p_length;
+	uint32_t correlation_id = 1;	/* 0 is reserved... */
+	caddr_t ptr, lookup_ptr;
+	uint8_t lookup_used = 0;
+
+	/* are there any asconf params to send? */
+	if (TAILQ_EMPTY(&stcb->asoc.asconf_queue)) {
+		return (NULL);
+	}
+	/* can't send a new one if there is one in flight already */
+	if (stcb->asoc.asconf_sent > 0) {
+		return (NULL);
+	}
+	/*
+	 * get a chunk header mbuf and a cluster for the asconf params since
+	 * it's simpler to fill in the asconf chunk header lookup address on
+	 * the fly
+	 */
+	m_asconf_chk = sctp_get_mbuf_for_msg(sizeof(struct sctp_asconf_chunk), 0, M_DONTWAIT, 1, MT_DATA);
+	if (m_asconf_chk == NULL) {
+		/* no mbuf's */
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "compose_asconf: couldn't get chunk mbuf!\n");
+		return (NULL);
+	}
+	m_asconf = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+	if (m_asconf == NULL) {
+		/* no mbuf's */
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "compose_asconf: couldn't get mbuf!\n");
+		sctp_m_freem(m_asconf_chk);
+		return (NULL);
+	}
+	SCTP_BUF_LEN(m_asconf_chk) = sizeof(struct sctp_asconf_chunk);
+	SCTP_BUF_LEN(m_asconf) = 0;
+	acp = mtod(m_asconf_chk, struct sctp_asconf_chunk *);
+	bzero(acp, sizeof(struct sctp_asconf_chunk));
+	/* save pointers to lookup address and asconf params */
+	lookup_ptr = (caddr_t)(acp + 1);	/* after the header */
+	ptr = mtod(m_asconf, caddr_t);	/* beginning of cluster */
+
+	/* fill in chunk header info */
+	acp->ch.chunk_type = SCTP_ASCONF;
+	acp->ch.chunk_flags = 0;
+	acp->serial_number = htonl(stcb->asoc.asconf_seq_out);
+
+	/* add parameters... up to smallest MTU allowed */
+	TAILQ_FOREACH(aa, &stcb->asoc.asconf_queue, next) {
+		/* get the parameter length */
+		p_length = SCTP_SIZE32(aa->ap.aph.ph.param_length);
+		/* will it fit in current chunk? */
+		if (SCTP_BUF_LEN(m_asconf) + p_length > stcb->asoc.smallest_mtu) {
+			/* won't fit, so we're done with this chunk */
+			break;
+		}
+		/* assign (and store) a correlation id */
+		aa->ap.aph.correlation_id = correlation_id++;
+
+		/*
+		 * fill in address if we're doing a delete this is a simple
+		 * way for us to fill in the correlation address, which
+		 * should only be used by the peer if we're deleting our
+		 * source address and adding a new address (e.g. renumbering
+		 * case)
+		 */
+		if (lookup_used == 0 &&
+		    aa->ap.aph.ph.param_type == SCTP_DEL_IP_ADDRESS) {
+			struct sctp_ipv6addr_param *lookup;
+			uint16_t p_size, addr_size;
+
+			lookup = (struct sctp_ipv6addr_param *)lookup_ptr;
+			lookup->ph.param_type =
+			    htons(aa->ap.addrp.ph.param_type);
+			if (aa->ap.addrp.ph.param_type == SCTP_IPV6_ADDRESS) {
+				/* copy IPv6 address */
+				p_size = sizeof(struct sctp_ipv6addr_param);
+				addr_size = sizeof(struct in6_addr);
+			} else {
+				/* copy IPv4 address */
+				p_size = sizeof(struct sctp_ipv4addr_param);
+				addr_size = sizeof(struct in_addr);
+			}
+			lookup->ph.param_length = htons(SCTP_SIZE32(p_size));
+			memcpy(lookup->addr, &aa->ap.addrp.addr, addr_size);
+			SCTP_BUF_LEN(m_asconf_chk) += SCTP_SIZE32(p_size);
+			lookup_used = 1;
+		}
+		/* copy into current space */
+		memcpy(ptr, &aa->ap, p_length);
+
+		/* network elements and update lengths */
+		aph = (struct sctp_asconf_paramhdr *)ptr;
+		aap = (struct sctp_asconf_addr_param *)ptr;
+		/* correlation_id is transparent to peer, no htonl needed */
+		aph->ph.param_type = htons(aph->ph.param_type);
+		aph->ph.param_length = htons(aph->ph.param_length);
+		aap->addrp.ph.param_type = htons(aap->addrp.ph.param_type);
+		aap->addrp.ph.param_length = htons(aap->addrp.ph.param_length);
+
+		SCTP_BUF_LEN(m_asconf) += SCTP_SIZE32(p_length);
+		ptr += SCTP_SIZE32(p_length);
+
+		/*
+		 * these params are removed off the pending list upon
+		 * getting an ASCONF-ACK back from the peer, just set flag
+		 */
+		aa->sent = 1;
+	}
+	/* check to see if the lookup addr has been populated yet */
+	if (lookup_used == 0) {
+		/* NOTE: if the address param is optional, can skip this... */
+		/* add any valid (existing) address... */
+		struct sctp_ipv6addr_param *lookup;
+		uint16_t p_size, addr_size;
+		struct sockaddr *found_addr;
+		caddr_t addr_ptr;
+
+		if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL)
+			found_addr = sctp_find_valid_localaddr(stcb,
+			    addr_locked);
+		else
+			found_addr = sctp_find_valid_localaddr_ep(stcb);
+
+		lookup = (struct sctp_ipv6addr_param *)lookup_ptr;
+		if (found_addr != NULL) {
+			if (found_addr->sa_family == AF_INET6) {
+				/* copy IPv6 address */
+				lookup->ph.param_type =
+				    htons(SCTP_IPV6_ADDRESS);
+				p_size = sizeof(struct sctp_ipv6addr_param);
+				addr_size = sizeof(struct in6_addr);
+				addr_ptr = (caddr_t)&((struct sockaddr_in6 *)
+				    found_addr)->sin6_addr;
+			} else {
+				/* copy IPv4 address */
+				lookup->ph.param_type =
+				    htons(SCTP_IPV4_ADDRESS);
+				p_size = sizeof(struct sctp_ipv4addr_param);
+				addr_size = sizeof(struct in_addr);
+				addr_ptr = (caddr_t)&((struct sockaddr_in *)
+				    found_addr)->sin_addr;
+			}
+			lookup->ph.param_length = htons(SCTP_SIZE32(p_size));
+			memcpy(lookup->addr, addr_ptr, addr_size);
+			SCTP_BUF_LEN(m_asconf_chk) += SCTP_SIZE32(p_size);
+			lookup_used = 1;
+		} else {
+			/* uh oh... don't have any address?? */
+			SCTPDBG(SCTP_DEBUG_ASCONF1,
+			    "compose_asconf: no lookup addr!\n");
+			/* for now, we send a IPv4 address of 0.0.0.0 */
+			lookup->ph.param_type = htons(SCTP_IPV4_ADDRESS);
+			lookup->ph.param_length = htons(SCTP_SIZE32(sizeof(struct sctp_ipv4addr_param)));
+			bzero(lookup->addr, sizeof(struct in_addr));
+			SCTP_BUF_LEN(m_asconf_chk) += SCTP_SIZE32(sizeof(struct sctp_ipv4addr_param));
+			lookup_used = 1;
+		}
+	}
+	/* chain it all together */
+	SCTP_BUF_NEXT(m_asconf_chk) = m_asconf;
+	*retlen = SCTP_BUF_LEN(m_asconf_chk) + SCTP_BUF_LEN(m_asconf);
+	acp->ch.chunk_length = ntohs(*retlen);
+
+	/* update "sent" flag */
+	stcb->asoc.asconf_sent++;
+
+	return (m_asconf_chk);
+}
+
+/*
+ * section to handle address changes before an association is up eg. changes
+ * during INIT/INIT-ACK/COOKIE-ECHO handshake
+ */
+
+/*
+ * processes the (local) addresses in the INIT-ACK chunk
+ */
+static void
+sctp_process_initack_addresses(struct sctp_tcb *stcb, struct mbuf *m,
+    unsigned int offset, unsigned int length)
+{
+	struct sctp_paramhdr tmp_param, *ph;
+	uint16_t plen, ptype;
+	struct sctp_ifa *sctp_ifa;
+	struct sctp_ipv6addr_param addr_store;
+	struct sockaddr_in6 sin6;
+	struct sockaddr_in sin;
+	struct sockaddr *sa;
+	uint32_t vrf_id;
+
+	SCTPDBG(SCTP_DEBUG_ASCONF2, "processing init-ack addresses\n");
+	if (stcb == NULL)	/* Un-needed check for SA */
+		return;
+
+	/* convert to upper bound */
+	length += offset;
+
+	if ((offset + sizeof(struct sctp_paramhdr)) > length) {
+		return;
+	}
+	/* init the addresses */
+	bzero(&sin6, sizeof(sin6));
+	sin6.sin6_family = AF_INET6;
+	sin6.sin6_len = sizeof(sin6);
+	sin6.sin6_port = stcb->rport;
+
+	bzero(&sin, sizeof(sin));
+	sin.sin_len = sizeof(sin);
+	sin.sin_family = AF_INET;
+	sin.sin_port = stcb->rport;
+
+	/* go through the addresses in the init-ack */
+	ph = (struct sctp_paramhdr *)sctp_m_getptr(m, offset,
+	    sizeof(struct sctp_paramhdr), (uint8_t *) & tmp_param);
+	while (ph != NULL) {
+		ptype = ntohs(ph->param_type);
+		plen = ntohs(ph->param_length);
+		if (ptype == SCTP_IPV6_ADDRESS) {
+			struct sctp_ipv6addr_param *a6p;
+
+			/* get the entire IPv6 address param */
+			a6p = (struct sctp_ipv6addr_param *)
+			    sctp_m_getptr(m, offset,
+			    sizeof(struct sctp_ipv6addr_param),
+			    (uint8_t *) & addr_store);
+			if (plen != sizeof(struct sctp_ipv6addr_param) ||
+			    a6p == NULL) {
+				return;
+			}
+			memcpy(&sin6.sin6_addr, a6p->addr,
+			    sizeof(struct in6_addr));
+			sa = (struct sockaddr *)&sin6;
+		} else if (ptype == SCTP_IPV4_ADDRESS) {
+			struct sctp_ipv4addr_param *a4p;
+
+			/* get the entire IPv4 address param */
+			a4p = (struct sctp_ipv4addr_param *)sctp_m_getptr(m, offset,
+			    sizeof(struct sctp_ipv4addr_param),
+			    (uint8_t *) & addr_store);
+			if (plen != sizeof(struct sctp_ipv4addr_param) ||
+			    a4p == NULL) {
+				return;
+			}
+			sin.sin_addr.s_addr = a4p->addr;
+			sa = (struct sockaddr *)&sin;
+		} else {
+			goto next_addr;
+		}
+
+		/* see if this address really (still) exists */
+		if (stcb) {
+			vrf_id = stcb->asoc.vrf_id;
+		} else {
+			vrf_id = SCTP_DEFAULT_VRFID;
+		}
+		sctp_ifa = sctp_find_ifa_by_addr(sa, vrf_id,
+		    SCTP_ADDR_NOT_LOCKED);
+		if (sctp_ifa == NULL) {
+			/* address doesn't exist anymore */
+			int status;
+
+			/* are ASCONFs allowed ? */
+			if ((sctp_is_feature_on(stcb->sctp_ep,
+			    SCTP_PCB_FLAGS_DO_ASCONF)) &&
+			    stcb->asoc.peer_supports_asconf) {
+				/* queue an ASCONF DEL_IP_ADDRESS */
+				status = sctp_asconf_queue_sa_delete(stcb, sa);
+				/*
+				 * if queued ok, and in correct state, send
+				 * out the ASCONF.
+				 */
+				if (status == 0 &&
+				    SCTP_GET_STATE(&stcb->asoc) ==
+				    SCTP_STATE_OPEN) {
+#ifdef SCTP_TIMER_BASED_ASCONF
+					sctp_timer_start(SCTP_TIMER_TYPE_ASCONF,
+					    stcb->sctp_ep, stcb,
+					    stcb->asoc.primary_destination);
+#else
+					sctp_send_asconf(stcb, stcb->asoc.primary_destination,
+					    SCTP_ADDR_NOT_LOCKED);
+#endif
+				}
+			}
+		}
+next_addr:
+		/*
+		 * Sanity check:  Make sure the length isn't 0, otherwise
+		 * we'll be stuck in this loop for a long time...
+		 */
+		if (SCTP_SIZE32(plen) == 0) {
+			SCTP_PRINTF("process_initack_addrs: bad len (%d) type=%xh\n",
+			    plen, ptype);
+			return;
+		}
+		/* get next parameter */
+		offset += SCTP_SIZE32(plen);
+		if ((offset + sizeof(struct sctp_paramhdr)) > length)
+			return;
+		ph = (struct sctp_paramhdr *)sctp_m_getptr(m, offset,
+		    sizeof(struct sctp_paramhdr), (uint8_t *) & tmp_param);
+	}			/* while */
+}
+
+/* FIX ME: need to verify return result for v6 address type if v6 disabled */
+/*
+ * checks to see if a specific address is in the initack address list returns
+ * 1 if found, 0 if not
+ */
+static uint32_t
+sctp_addr_in_initack(struct sctp_tcb *stcb, struct mbuf *m, uint32_t offset,
+    uint32_t length, struct sockaddr *sa)
+{
+	struct sctp_paramhdr tmp_param, *ph;
+	uint16_t plen, ptype;
+	struct sctp_ipv6addr_param addr_store;
+	struct sockaddr_in *sin;
+	struct sctp_ipv4addr_param *a4p;
+
+#ifdef INET6
+	struct sockaddr_in6 *sin6;
+	struct sctp_ipv6addr_param *a6p;
+	struct sockaddr_in6 sin6_tmp;
+
+#endif				/* INET6 */
+
+	if (
+#ifdef INET6
+	    (sa->sa_family != AF_INET6) &&
+#endif				/* INET6 */
+	    (sa->sa_family != AF_INET))
+		return (0);
+
+	SCTPDBG(SCTP_DEBUG_ASCONF2, "find_initack_addr: starting search for ");
+	SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, sa);
+	/* convert to upper bound */
+	length += offset;
+
+	if ((offset + sizeof(struct sctp_paramhdr)) > length) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "find_initack_addr: invalid offset?\n");
+		return (0);
+	}
+	/* go through the addresses in the init-ack */
+	ph = (struct sctp_paramhdr *)sctp_m_getptr(m, offset,
+	    sizeof(struct sctp_paramhdr), (uint8_t *) & tmp_param);
+	while (ph != NULL) {
+		ptype = ntohs(ph->param_type);
+		plen = ntohs(ph->param_length);
+#ifdef INET6
+		if (ptype == SCTP_IPV6_ADDRESS && sa->sa_family == AF_INET6) {
+			/* get the entire IPv6 address param */
+			a6p = (struct sctp_ipv6addr_param *)
+			    sctp_m_getptr(m, offset,
+			    sizeof(struct sctp_ipv6addr_param),
+			    (uint8_t *) & addr_store);
+			if (plen != sizeof(struct sctp_ipv6addr_param) ||
+			    (ph == NULL) ||
+			    (a6p == NULL)) {
+				return (0);
+			}
+			sin6 = (struct sockaddr_in6 *)sa;
+			if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) {
+				/* create a copy and clear scope */
+				memcpy(&sin6_tmp, sin6,
+				    sizeof(struct sockaddr_in6));
+				sin6 = &sin6_tmp;
+				in6_clearscope(&sin6->sin6_addr);
+			}
+			if (memcmp(&sin6->sin6_addr, a6p->addr,
+			    sizeof(struct in6_addr)) == 0) {
+				/* found it */
+				return (1);
+			}
+		} else
+#endif				/* INET6 */
+
+			if (ptype == SCTP_IPV4_ADDRESS &&
+		    sa->sa_family == AF_INET) {
+			/* get the entire IPv4 address param */
+			a4p = (struct sctp_ipv4addr_param *)sctp_m_getptr(m,
+			    offset, sizeof(struct sctp_ipv4addr_param),
+			    (uint8_t *) & addr_store);
+			if (plen != sizeof(struct sctp_ipv4addr_param) ||
+			    (ph == NULL) ||
+			    (a4p == NULL)) {
+				return (0);
+			}
+			sin = (struct sockaddr_in *)sa;
+			if (sin->sin_addr.s_addr == a4p->addr) {
+				/* found it */
+				return (1);
+			}
+		}
+		/* get next parameter */
+		offset += SCTP_SIZE32(plen);
+		if (offset + sizeof(struct sctp_paramhdr) > length)
+			return (0);
+		ph = (struct sctp_paramhdr *)
+		    sctp_m_getptr(m, offset, sizeof(struct sctp_paramhdr),
+		    (uint8_t *) & tmp_param);
+	}			/* while */
+	/* not found! */
+	return (0);
+}
+
+/*
+ * makes sure that the current endpoint local addr list is consistent with
+ * the new association (eg. subset bound, asconf allowed) adds addresses as
+ * necessary
+ */
+static void
+sctp_check_address_list_ep(struct sctp_tcb *stcb, struct mbuf *m, int offset,
+    int length, struct sockaddr *init_addr)
+{
+	struct sctp_laddr *laddr;
+
+	/* go through the endpoint list */
+	LIST_FOREACH(laddr, &stcb->sctp_ep->sctp_addr_list, sctp_nxt_addr) {
+		/* be paranoid and validate the laddr */
+		if (laddr->ifa == NULL) {
+			SCTPDBG(SCTP_DEBUG_ASCONF1,
+			    "check_addr_list_ep: laddr->ifa is NULL");
+			continue;
+		}
+		if (laddr->ifa == NULL) {
+			SCTPDBG(SCTP_DEBUG_ASCONF1, "check_addr_list_ep: laddr->ifa->ifa_addr is NULL");
+			continue;
+		}
+		/* do i have it implicitly? */
+		if (sctp_cmpaddr(&laddr->ifa->address.sa, init_addr)) {
+			continue;
+		}
+		/* check to see if in the init-ack */
+		if (!sctp_addr_in_initack(stcb, m, offset, length,
+		    &laddr->ifa->address.sa)) {
+			/* try to add it */
+			sctp_addr_mgmt_assoc(stcb->sctp_ep, stcb, laddr->ifa,
+			    SCTP_ADD_IP_ADDRESS, SCTP_ADDR_NOT_LOCKED);
+		}
+	}
+}
+
+/*
+ * makes sure that the current kernel address list is consistent with the new
+ * association (with all addrs bound) adds addresses as necessary
+ */
+static void
+sctp_check_address_list_all(struct sctp_tcb *stcb, struct mbuf *m, int offset,
+    int length, struct sockaddr *init_addr,
+    uint16_t local_scope, uint16_t site_scope,
+    uint16_t ipv4_scope, uint16_t loopback_scope)
+{
+	struct sctp_vrf *vrf = NULL;
+	struct sctp_ifn *sctp_ifn;
+	struct sctp_ifa *sctp_ifa;
+	uint32_t vrf_id;
+
+	if (stcb) {
+		vrf_id = stcb->asoc.vrf_id;
+	} else {
+		return;
+	}
+	SCTP_IPI_ADDR_RLOCK();
+	vrf = sctp_find_vrf(vrf_id);
+	if (vrf == NULL) {
+		SCTP_IPI_ADDR_RUNLOCK();
+		return;
+	}
+	/* go through all our known interfaces */
+	LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+		if (loopback_scope == 0 && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
+			/* skip loopback interface */
+			continue;
+		}
+		/* go through each interface address */
+		LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+			/* do i have it implicitly? */
+			if (sctp_cmpaddr(&sctp_ifa->address.sa, init_addr)) {
+				continue;
+			}
+			/* check to see if in the init-ack */
+			if (!sctp_addr_in_initack(stcb, m, offset, length,
+			    &sctp_ifa->address.sa)) {
+				/* try to add it */
+				sctp_addr_mgmt_assoc(stcb->sctp_ep, stcb,
+				    sctp_ifa, SCTP_ADD_IP_ADDRESS,
+				    SCTP_ADDR_LOCKED);
+			}
+		}		/* end foreach ifa */
+	}			/* end foreach ifn */
+	SCTP_IPI_ADDR_RUNLOCK();
+}
+
+/*
+ * validates an init-ack chunk (from a cookie-echo) with current addresses
+ * adds addresses from the init-ack into our local address list, if needed
+ * queues asconf adds/deletes addresses as needed and makes appropriate list
+ * changes for source address selection m, offset: points to the start of the
+ * address list in an init-ack chunk length: total length of the address
+ * params only init_addr: address where my INIT-ACK was sent from
+ */
+void
+sctp_check_address_list(struct sctp_tcb *stcb, struct mbuf *m, int offset,
+    int length, struct sockaddr *init_addr,
+    uint16_t local_scope, uint16_t site_scope,
+    uint16_t ipv4_scope, uint16_t loopback_scope)
+{
+	/* process the local addresses in the initack */
+	sctp_process_initack_addresses(stcb, m, offset, length);
+
+	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		/* bound all case */
+		sctp_check_address_list_all(stcb, m, offset, length, init_addr,
+		    local_scope, site_scope, ipv4_scope, loopback_scope);
+	} else {
+		/* subset bound case */
+		if (sctp_is_feature_on(stcb->sctp_ep,
+		    SCTP_PCB_FLAGS_DO_ASCONF)) {
+			/* asconf's allowed */
+			sctp_check_address_list_ep(stcb, m, offset, length,
+			    init_addr);
+		}
+		/* else, no asconfs allowed, so what we sent is what we get */
+	}
+}
+
+/*
+ * sctp_bindx() support
+ */
+uint32_t
+sctp_addr_mgmt_ep_sa(struct sctp_inpcb *inp, struct sockaddr *sa,
+    uint32_t type, uint32_t vrf_id, struct sctp_ifa *sctp_ifap)
+{
+	struct sctp_ifa *ifa;
+
+	if (sa->sa_len == 0) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_ASCONF, EINVAL);
+		return (EINVAL);
+	}
+	if (sctp_ifap) {
+		ifa = sctp_ifap;
+	} else if (type == SCTP_ADD_IP_ADDRESS) {
+		/* For an add the address MUST be on the system */
+		ifa = sctp_find_ifa_by_addr(sa, vrf_id, SCTP_ADDR_NOT_LOCKED);
+	} else if (type == SCTP_DEL_IP_ADDRESS) {
+		/* For a delete we need to find it in the inp */
+		ifa = sctp_find_ifa_in_ep(inp, sa, SCTP_ADDR_NOT_LOCKED);
+	} else {
+		ifa = NULL;
+	}
+	if (ifa != NULL) {
+		/* add this address */
+		struct sctp_asconf_iterator *asc;
+		struct sctp_laddr *wi;
+
+		SCTP_MALLOC(asc, struct sctp_asconf_iterator *,
+		    sizeof(struct sctp_asconf_iterator),
+		    SCTP_M_ASC_IT);
+		if (asc == NULL) {
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_ASCONF, ENOMEM);
+			return (ENOMEM);
+		}
+		wi = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_laddr,
+		    struct sctp_laddr);
+		if (wi == NULL) {
+			SCTP_FREE(asc, SCTP_M_ASC_IT);
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_ASCONF, ENOMEM);
+			return (ENOMEM);
+		}
+		if (type == SCTP_ADD_IP_ADDRESS) {
+			sctp_add_local_addr_ep(inp, ifa, type);
+		} else if (type == SCTP_DEL_IP_ADDRESS) {
+			struct sctp_laddr *laddr;
+
+			if (inp->laddr_count < 2) {
+				/* can't delete the last local address */
+				SCTP_FREE(asc, SCTP_M_ASC_IT);
+				SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_laddr, wi);
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_ASCONF, EINVAL);
+				return (EINVAL);
+			}
+			LIST_FOREACH(laddr, &inp->sctp_addr_list,
+			    sctp_nxt_addr) {
+				if (ifa == laddr->ifa) {
+					/* Mark in the delete */
+					laddr->action = type;
+				}
+			}
+		}
+		LIST_INIT(&asc->list_of_work);
+		asc->cnt = 1;
+		SCTP_INCR_LADDR_COUNT();
+		wi->ifa = ifa;
+		wi->action = type;
+		atomic_add_int(&ifa->refcount, 1);
+		LIST_INSERT_HEAD(&asc->list_of_work, wi, sctp_nxt_addr);
+		(void)sctp_initiate_iterator(sctp_asconf_iterator_ep,
+		    sctp_asconf_iterator_stcb,
+		    sctp_asconf_iterator_ep_end,
+		    SCTP_PCB_ANY_FLAGS,
+		    SCTP_PCB_ANY_FEATURES,
+		    SCTP_ASOC_ANY_STATE,
+		    (void *)asc, 0,
+		    sctp_asconf_iterator_end, inp, 0);
+	} else {
+		/* invalid address! */
+		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_ASCONF, EADDRNOTAVAIL);
+		return (EADDRNOTAVAIL);
+	}
+	return (0);
+}
Index: ip_encap.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_encap.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/ip_encap.c -L sys/netinet/ip_encap.c -u -r1.1.1.1 -r1.2
--- sys/netinet/ip_encap.c
+++ sys/netinet/ip_encap.c
@@ -1,4 +1,3 @@
-/*	$FreeBSD: src/sys/netinet/ip_encap.c,v 1.20 2005/01/07 01:45:44 imp Exp $	*/
 /*	$KAME: ip_encap.c,v 1.41 2001/03/15 08:35:08 itojun Exp $	*/
 
 /*-
@@ -57,6 +56,9 @@
  */
 /* XXX is M_NETADDR correct? */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_encap.c,v 1.24 2007/10/07 20:44:23 silby Exp $");
+
 #include "opt_mrouting.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
@@ -87,11 +89,9 @@
 
 #include <machine/stdarg.h>
 
-#include <net/net_osdep.h>
-
 #include <sys/kernel.h>
 #include <sys/malloc.h>
-static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure");
+static MALLOC_DEFINE(M_NETADDR, "encap_export_host", "Export host address structure");
 
 static void encap_add(struct encaptab *);
 static int mask_match(const struct encaptab *, const struct sockaddr *,
@@ -110,15 +110,13 @@
  * it's referenced by KAME pieces in netinet6.
  */
 void
-encap_init()
+encap_init(void)
 {
 }
 
 #ifdef INET
 void
-encap4_input(m, off)
-	struct mbuf *m;
-	int off;
+encap4_input(struct mbuf *m, int off)
 {
 	struct ip *ip;
 	int proto;
@@ -203,10 +201,7 @@
 
 #ifdef INET6
 int
-encap6_input(mp, offp, proto)
-	struct mbuf **mp;
-	int *offp;
-	int proto;
+encap6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp;
 	struct ip6_hdr *ip6;
@@ -274,8 +269,7 @@
 
 /*lint -sem(encap_add, custodial(1)) */
 static void
-encap_add(ep)
-	struct encaptab *ep;
+encap_add(struct encaptab *ep)
 {
 
 	mtx_assert(&encapmtx, MA_OWNED);
@@ -288,13 +282,9 @@
  * Return value will be necessary as input (cookie) for encap_detach().
  */
 const struct encaptab *
-encap_attach(af, proto, sp, sm, dp, dm, psw, arg)
-	int af;
-	int proto;
-	const struct sockaddr *sp, *sm;
-	const struct sockaddr *dp, *dm;
-	const struct protosw *psw;
-	void *arg;
+encap_attach(int af, int proto, const struct sockaddr *sp,
+    const struct sockaddr *sm, const struct sockaddr *dp,
+    const struct sockaddr *dm, const struct protosw *psw, void *arg)
 {
 	struct encaptab *ep;
 
@@ -348,12 +338,9 @@
 }
 
 const struct encaptab *
-encap_attach_func(af, proto, func, psw, arg)
-	int af;
-	int proto;
-	int (*func)(const struct mbuf *, int, int, void *);
-	const struct protosw *psw;
-	void *arg;
+encap_attach_func(int af, int proto,
+    int (*func)(const struct mbuf *, int, int, void *),
+    const struct protosw *psw, void *arg)
 {
 	struct encaptab *ep;
 
@@ -379,8 +366,7 @@
 }
 
 int
-encap_detach(cookie)
-	const struct encaptab *cookie;
+encap_detach(const struct encaptab *cookie)
 {
 	const struct encaptab *ep = cookie;
 	struct encaptab *p;
@@ -400,10 +386,8 @@
 }
 
 static int
-mask_match(ep, sp, dp)
-	const struct encaptab *ep;
-	const struct sockaddr *sp;
-	const struct sockaddr *dp;
+mask_match(const struct encaptab *ep, const struct sockaddr *sp,
+    const struct sockaddr *dp)
 {
 	struct sockaddr_storage s;
 	struct sockaddr_storage d;
@@ -453,9 +437,7 @@
 }
 
 static void
-encap_fillarg(m, ep)
-	struct mbuf *m;
-	const struct encaptab *ep;
+encap_fillarg(struct mbuf *m, const struct encaptab *ep)
 {
 	struct m_tag *tag;
 
@@ -467,8 +449,7 @@
 }
 
 void *
-encap_getarg(m)
-	struct mbuf *m;
+encap_getarg(struct mbuf *m)
 {
 	void *p = NULL;
 	struct m_tag *tag;
--- /dev/null
+++ sys/netinet/sctp_constants.h
@@ -0,0 +1,1115 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_constants.h,v 1.17 2005/03/06 16:04:17 itojun Exp $	 */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_constants.h,v 1.32.2.2 2007/11/06 02:48:03 rrs Exp $");
+
+#ifndef __sctp_constants_h__
+#define __sctp_constants_h__
+
+/* Number of packets to get before sack sent by default */
+#define SCTP_DEFAULT_SACK_FREQ 2
+
+/* Address limit - This variable is calculated
+ * based on an 65535 byte max ip packet. We take out 100 bytes
+ * for the cookie, 40 bytes for a v6 header and 32
+ * bytes for the init structure. A second init structure
+ * for the init-ack and then finally a third one for the
+ * imbedded init. This yeilds 100+40+(3 * 32) = 236 bytes.
+ * This leaves 65299 bytes for addresses. We throw out the 299 bytes.
+ * Now whatever we send in the INIT() we need to allow to get back in the
+ * INIT-ACK plus all the values from INIT and INIT-ACK
+ * listed in the cookie. Plus we need some overhead for
+ * maybe copied parameters in the COOKIE. If we
+ * allow 1080 addresses, and each side has 1080 V6 addresses
+ * that will be 21600 bytes. In the INIT-ACK we will
+ * see the INIT-ACK 21600 + 43200 in the cookie. This leaves
+ * about 500 bytes slack for misc things in the cookie.
+ */
+#define SCTP_ADDRESS_LIMIT 1080
+
+/* We need at least 2k of space for us, inits
+ * larger than that lets abort.
+ */
+#define SCTP_LARGEST_INIT_ACCEPTED (65535 - 2048)
+
+/* Number of addresses where we just skip the counting */
+#define SCTP_COUNT_LIMIT 40
+
+#define SCTP_ZERO_COPY_TICK_DELAY (((100 * hz) + 999) / 1000)
+#define SCTP_ZERO_COPY_SENDQ_TICK_DELAY (((100 * hz) + 999) / 1000)
+
+/* Number of ticks to delay before running
+ * iterator on an address change.
+ */
+#define SCTP_ADDRESS_TICK_DELAY 2
+
+#define SCTP_VERSION_STRING "KAME-BSD 1.1"
+/* #define SCTP_AUDITING_ENABLED 1 used for debug/auditing */
+#define SCTP_AUDIT_SIZE 256
+
+#define SCTP_USE_THREAD_BASED_ITERATOR 1
+
+#define SCTP_KTRHEAD_NAME "sctp_iterator"
+#define SCTP_KTHREAD_PAGES 2
+
+
+/* If you support Multi-VRF how big to
+ * make the initial array of VRF's to.
+ */
+#define SCTP_DEFAULT_VRF_SIZE 4
+
+/* constants for rto calc */
+#define sctp_align_safe_nocopy 0
+#define sctp_align_unsafe_makecopy 1
+
+/* JRS - Values defined for the HTCP algorithm */
+#define ALPHA_BASE	(1<<7)	/* 1.0 with shift << 7 */
+#define BETA_MIN	(1<<6)	/* 0.5 with shift << 7 */
+#define BETA_MAX	102	/* 0.8 with shift << 7 */
+
+/* Places that CWND log can happen from */
+#define SCTP_CWND_LOG_FROM_FR	1
+#define SCTP_CWND_LOG_FROM_RTX	2
+#define SCTP_CWND_LOG_FROM_BRST	3
+#define SCTP_CWND_LOG_FROM_SS	4
+#define SCTP_CWND_LOG_FROM_CA	5
+#define SCTP_CWND_LOG_FROM_SAT	6
+#define SCTP_BLOCK_LOG_INTO_BLK 7
+#define SCTP_BLOCK_LOG_OUTOF_BLK 8
+#define SCTP_BLOCK_LOG_CHECK     9
+#define SCTP_STR_LOG_FROM_INTO_STRD 10
+#define SCTP_STR_LOG_FROM_IMMED_DEL 11
+#define SCTP_STR_LOG_FROM_INSERT_HD 12
+#define SCTP_STR_LOG_FROM_INSERT_MD 13
+#define SCTP_STR_LOG_FROM_INSERT_TL 14
+#define SCTP_STR_LOG_FROM_MARK_TSN  15
+#define SCTP_STR_LOG_FROM_EXPRS_DEL 16
+#define SCTP_FR_LOG_BIGGEST_TSNS    17
+#define SCTP_FR_LOG_STRIKE_TEST     18
+#define SCTP_FR_LOG_STRIKE_CHUNK    19
+#define SCTP_FR_T3_TIMEOUT          20
+#define SCTP_MAP_PREPARE_SLIDE      21
+#define SCTP_MAP_SLIDE_FROM         22
+#define SCTP_MAP_SLIDE_RESULT       23
+#define SCTP_MAP_SLIDE_CLEARED	    24
+#define SCTP_MAP_SLIDE_NONE         25
+#define SCTP_FR_T3_MARK_TIME        26
+#define SCTP_FR_T3_MARKED           27
+#define SCTP_FR_T3_STOPPED          28
+#define SCTP_FR_MARKED              30
+#define SCTP_CWND_LOG_NOADV_SS      31
+#define SCTP_CWND_LOG_NOADV_CA      32
+#define SCTP_MAX_BURST_APPLIED      33
+#define SCTP_MAX_IFP_APPLIED        34
+#define SCTP_MAX_BURST_ERROR_STOP   35
+#define SCTP_INCREASE_PEER_RWND     36
+#define SCTP_DECREASE_PEER_RWND     37
+#define SCTP_SET_PEER_RWND_VIA_SACK 38
+#define SCTP_LOG_MBCNT_INCREASE     39
+#define SCTP_LOG_MBCNT_DECREASE     40
+#define SCTP_LOG_MBCNT_CHKSET       41
+#define SCTP_LOG_NEW_SACK           42
+#define SCTP_LOG_TSN_ACKED          43
+#define SCTP_LOG_TSN_REVOKED        44
+#define SCTP_LOG_LOCK_TCB           45
+#define SCTP_LOG_LOCK_INP           46
+#define SCTP_LOG_LOCK_SOCK          47
+#define SCTP_LOG_LOCK_SOCKBUF_R     48
+#define SCTP_LOG_LOCK_SOCKBUF_S     49
+#define SCTP_LOG_LOCK_CREATE        50
+#define SCTP_LOG_INITIAL_RTT        51
+#define SCTP_LOG_RTTVAR             52
+#define SCTP_LOG_SBALLOC            53
+#define SCTP_LOG_SBFREE             54
+#define SCTP_LOG_SBRESULT           55
+#define SCTP_FR_DUPED               56
+#define SCTP_FR_MARKED_EARLY        57
+#define SCTP_FR_CWND_REPORT         58
+#define SCTP_FR_CWND_REPORT_START   59
+#define SCTP_FR_CWND_REPORT_STOP    60
+#define SCTP_CWND_LOG_FROM_SEND     61
+#define SCTP_CWND_INITIALIZATION    62
+#define SCTP_CWND_LOG_FROM_T3       63
+#define SCTP_CWND_LOG_FROM_SACK     64
+#define SCTP_CWND_LOG_NO_CUMACK     65
+#define SCTP_CWND_LOG_FROM_RESEND   66
+#define SCTP_FR_LOG_CHECK_STRIKE    67
+#define SCTP_SEND_NOW_COMPLETES     68
+#define SCTP_CWND_LOG_FILL_OUTQ_CALLED 69
+#define SCTP_CWND_LOG_FILL_OUTQ_FILLS  70
+#define SCTP_LOG_FREE_SENT             71
+#define SCTP_NAGLE_APPLIED          72
+#define SCTP_NAGLE_SKIPPED          73
+#define SCTP_WAKESND_FROM_SACK      74
+#define SCTP_WAKESND_FROM_FWDTSN    75
+#define SCTP_NOWAKE_FROM_SACK       76
+#define SCTP_CWNDLOG_PRESEND        77
+#define SCTP_CWNDLOG_ENDSEND        78
+#define SCTP_AT_END_OF_SACK         79
+#define SCTP_REASON_FOR_SC          80
+#define SCTP_BLOCK_LOG_INTO_BLKA    81
+#define SCTP_ENTER_USER_RECV        82
+#define SCTP_USER_RECV_SACKS        83
+#define SCTP_SORECV_BLOCKSA         84
+#define SCTP_SORECV_BLOCKSB         85
+#define SCTP_SORECV_DONE            86
+#define SCTP_SACK_RWND_UPDATE       87
+#define SCTP_SORECV_ENTER           88
+#define SCTP_SORECV_ENTERPL         89
+#define SCTP_MBUF_INPUT             90
+#define SCTP_MBUF_IALLOC            91
+#define SCTP_MBUF_IFREE             92
+#define SCTP_MBUF_ICOPY             93
+#define SCTP_SORCV_FREECTL          94
+#define SCTP_SORCV_DOESCPY          95
+#define SCTP_SORCV_DOESLCK          96
+#define SCTP_SORCV_DOESADJ          97
+#define SCTP_SORCV_BOTWHILE         98
+#define SCTP_SORCV_PASSBF           99
+#define SCTP_SORCV_ADJD            100
+#define SCTP_UNKNOWN_MAX           101
+#define SCTP_RANDY_STUFF           102
+#define SCTP_RANDY_STUFF1          103
+#define SCTP_STRMOUT_LOG_ASSIGN	   104
+#define SCTP_STRMOUT_LOG_SEND	   105
+#define SCTP_FLIGHT_LOG_DOWN_CA    106
+#define SCTP_FLIGHT_LOG_UP         107
+#define SCTP_FLIGHT_LOG_DOWN_GAP   108
+#define SCTP_FLIGHT_LOG_DOWN_RSND  109
+#define SCTP_FLIGHT_LOG_UP_RSND    110
+#define SCTP_FLIGHT_LOG_DOWN_RSND_TO    111
+#define SCTP_FLIGHT_LOG_DOWN_WP    112
+#define SCTP_FLIGHT_LOG_UP_REVOKE  113
+#define SCTP_FLIGHT_LOG_DOWN_PDRP  114
+#define SCTP_FLIGHT_LOG_DOWN_PMTU  115
+#define SCTP_SACK_LOG_NORMAL	   116
+#define SCTP_SACK_LOG_EXPRESS	   117
+#define SCTP_MAP_TSN_ENTERS        118
+#define SCTP_THRESHOLD_CLEAR       119
+#define SCTP_THRESHOLD_INCR        120
+
+#define SCTP_LOG_MAX_TYPES 121
+/*
+ * To turn on various logging, you must first enable 'options KTR' and
+ * you might want to bump the entires 'options KTR_ENTRIES=80000'.
+ * To get something to log you define one of the logging defines.
+ * (see LINT).
+ *
+ * This gets the compile in place, but you still need to turn the
+ * logging flag on too in the sysctl (see in sctp.h).
+ */
+
+#define SCTP_LOG_EVENT_UNKNOWN 0
+#define SCTP_LOG_EVENT_CWND  1
+#define SCTP_LOG_EVENT_BLOCK 2
+#define SCTP_LOG_EVENT_STRM  3
+#define SCTP_LOG_EVENT_FR    4
+#define SCTP_LOG_EVENT_MAP   5
+#define SCTP_LOG_EVENT_MAXBURST 6
+#define SCTP_LOG_EVENT_RWND  7
+#define SCTP_LOG_EVENT_MBCNT 8
+#define SCTP_LOG_EVENT_SACK  9
+#define SCTP_LOG_LOCK_EVENT 10
+#define SCTP_LOG_EVENT_RTT  11
+#define SCTP_LOG_EVENT_SB   12
+#define SCTP_LOG_EVENT_NAGLE 13
+#define SCTP_LOG_EVENT_WAKE 14
+#define SCTP_LOG_MISC_EVENT 15
+#define SCTP_LOG_EVENT_CLOSE 16
+#define SCTP_LOG_EVENT_MBUF 17
+#define SCTP_LOG_CHUNK_PROC 18
+#define SCTP_LOG_ERROR_RET  19
+
+#define SCTP_LOG_MAX_EVENT 20
+
+#define SCTP_LOCK_UNKNOWN 2
+
+
+/* number of associations by default for zone allocation */
+#define SCTP_MAX_NUM_OF_ASOC	40000
+/* how many addresses per assoc remote and local */
+#define SCTP_SCALE_FOR_ADDR	2
+
+/* default AUTO_ASCONF mode enable(1)/disable(0) value (sysctl) */
+#if defined (__APPLE__) && !defined(SCTP_APPLE_AUTO_ASCONF)
+#define SCTP_DEFAULT_AUTO_ASCONF        0
+#else
+#define SCTP_DEFAULT_AUTO_ASCONF	1
+#endif
+
+/* default MOBILITY_BASE mode enable(1)/disable(0) value (sysctl) */
+#if defined (__APPLE__) && !defined(SCTP_APPLE_MOBILITY_BASE)
+#define SCTP_DEFAULT_MOBILITY_BASE      0
+#else
+#define SCTP_DEFAULT_MOBILITY_BASE	0
+#endif
+
+/* default MOBILITY_FASTHANDOFF mode enable(1)/disable(0) value (sysctl) */
+#if defined (__APPLE__) && !defined(SCTP_APPLE_MOBILITY_FASTHANDOFF)
+#define SCTP_DEFAULT_MOBILITY_FASTHANDOFF	0
+#else
+#define SCTP_DEFAULT_MOBILITY_FASTHANDOFF	0
+#endif
+
+/*
+ * Theshold for rwnd updates, we have to read (sb_hiwat >>
+ * SCTP_RWND_HIWAT_SHIFT) before we will look to see if we need to send a
+ * window update sack. When we look, we compare the last rwnd we sent vs the
+ * current rwnd. It too must be greater than this value. Using 3 divdes the
+ * hiwat by 8, so for 200k rwnd we need to read 24k. For a 64k rwnd we need
+ * to read 8k. This seems about right.. I hope :-D.. we do set a
+ * min of a MTU on it so if the rwnd is real small we will insist
+ * on a full MTU of 1500 bytes.
+ */
+#define SCTP_RWND_HIWAT_SHIFT 3
+
+/* How much of the rwnd must the
+ * message be taking up to start partial delivery.
+ * We calculate this by shifing the hi_water (recv_win)
+ * left the following .. set to 1, when a message holds
+ * 1/2 the rwnd. If we set it to 2 when a message holds
+ * 1/4 the rwnd...etc..
+ */
+
+#define SCTP_PARTIAL_DELIVERY_SHIFT 1
+
+/* Minimum number of bytes read by user before we
+ * condsider doing a rwnd update
+ */
+#define SCTP_MIN_READ_BEFORE_CONSIDERING  3000
+
+/*
+ * default HMAC for cookies, etc... use one of the AUTH HMAC id's
+ * SCTP_HMAC is the HMAC_ID to use
+ * SCTP_SIGNATURE_SIZE is the digest length
+ */
+#define SCTP_HMAC		SCTP_AUTH_HMAC_ID_SHA1
+#define SCTP_SIGNATURE_SIZE	SCTP_AUTH_DIGEST_LEN_SHA1
+#define SCTP_SIGNATURE_ALOC_SIZE SCTP_SIGNATURE_SIZE
+
+/* DEFINE HERE WHAT CRC YOU WANT TO USE */
+#define SCTP_USECRC_RFC2960  1
+/* #define SCTP_USECRC_FLETCHER 1 */
+/* #define SCTP_USECRC_SSHCRC32 1 */
+/* #define SCTP_USECRC_FASTCRC32 1 */
+/* #define SCTP_USECRC_CRC32 1 */
+/* #define SCTP_USECRC_TCP32 1 */
+/* #define SCTP_USECRC_CRC16SMAL 1 */
+/* #define SCTP_USECRC_CRC16 1 */
+/* #define SCTP_USECRC_MODADLER 1 */
+
+#ifndef SCTP_ADLER32_BASE
+#define SCTP_ADLER32_BASE 65521
+#endif
+
+/*
+ * the SCTP protocol signature this includes the version number encoded in
+ * the last 4 bits of the signature.
+ */
+#define PROTO_SIGNATURE_A	0x30000000
+#define SCTP_VERSION_NUMBER	0x3
+
+#define MAX_TSN	0xffffffff
+#define MAX_SEQ	0xffff
+
+/* how many executions every N tick's */
+#define SCTP_ITERATOR_MAX_AT_ONCE 20
+
+/* number of clock ticks between iterator executions */
+#define SCTP_ITERATOR_TICKS 1
+
+/*
+ * option: If you comment out the following you will receive the old behavior
+ * of obeying cwnd for the fast retransmit algorithm. With this defined a FR
+ * happens right away with-out waiting for the flightsize to drop below the
+ * cwnd value (which is reduced by the FR to 1/2 the inflight packets).
+ */
+#define SCTP_IGNORE_CWND_ON_FR 1
+
+/*
+ * Adds implementors guide behavior to only use newest highest update in SACK
+ * gap ack's to figure out if you need to stroke a chunk for FR.
+ */
+#define SCTP_NO_FR_UNLESS_SEGMENT_SMALLER 1
+
+/* default max I can burst out after a fast retransmit */
+#define SCTP_DEF_MAX_BURST 4
+/* IP hdr (20/40) + 12+2+2 (enet) + sctp common 12 */
+#define SCTP_FIRST_MBUF_RESV 68
+/* Packet transmit states in the sent field */
+#define SCTP_DATAGRAM_UNSENT		0
+#define SCTP_DATAGRAM_SENT		1
+#define SCTP_DATAGRAM_RESEND1		2	/* not used (in code, but may
+						 * hit this value) */
+#define SCTP_DATAGRAM_RESEND2		3	/* not used (in code, but may
+						 * hit this value) */
+#define SCTP_DATAGRAM_RESEND		4
+#define SCTP_DATAGRAM_ACKED		10010
+#define SCTP_DATAGRAM_INBOUND		10011
+#define SCTP_READY_TO_TRANSMIT		10012
+#define SCTP_DATAGRAM_MARKED		20010
+#define SCTP_FORWARD_TSN_SKIP		30010
+
+/* chunk output send from locations */
+#define SCTP_OUTPUT_FROM_USR_SEND       0
+#define SCTP_OUTPUT_FROM_T3       	1
+#define SCTP_OUTPUT_FROM_INPUT_ERROR    2
+#define SCTP_OUTPUT_FROM_CONTROL_PROC   3
+#define SCTP_OUTPUT_FROM_SACK_TMR       4
+#define SCTP_OUTPUT_FROM_SHUT_TMR       5
+#define SCTP_OUTPUT_FROM_HB_TMR         6
+#define SCTP_OUTPUT_FROM_SHUT_ACK_TMR   7
+#define SCTP_OUTPUT_FROM_ASCONF_TMR     8
+#define SCTP_OUTPUT_FROM_STRRST_TMR     9
+#define SCTP_OUTPUT_FROM_AUTOCLOSE_TMR  10
+#define SCTP_OUTPUT_FROM_EARLY_FR_TMR   11
+#define SCTP_OUTPUT_FROM_STRRST_REQ     12
+#define SCTP_OUTPUT_FROM_USR_RCVD       13
+#define SCTP_OUTPUT_FROM_COOKIE_ACK     14
+#define SCTP_OUTPUT_FROM_DRAIN          15
+#define SCTP_OUTPUT_FROM_CLOSING        16
+/* SCTP chunk types are moved sctp.h for application (NAT, FW) use */
+
+/* align to 32-bit sizes */
+#define SCTP_SIZE32(x)	((((x)+3) >> 2) << 2)
+
+#define IS_SCTP_CONTROL(a) ((a)->chunk_type != SCTP_DATA)
+#define IS_SCTP_DATA(a) ((a)->chunk_type == SCTP_DATA)
+
+
+/* SCTP parameter types */
+/*************0x0000 series*************/
+#define SCTP_HEARTBEAT_INFO		0x0001
+#define SCTP_IPV4_ADDRESS		0x0005
+#define SCTP_IPV6_ADDRESS		0x0006
+#define SCTP_STATE_COOKIE		0x0007
+#define SCTP_UNRECOG_PARAM		0x0008
+#define SCTP_COOKIE_PRESERVE		0x0009
+#define SCTP_HOSTNAME_ADDRESS		0x000b
+#define SCTP_SUPPORTED_ADDRTYPE		0x000c
+
+/* draft-ietf-stewart-strreset-xxx */
+#define SCTP_STR_RESET_OUT_REQUEST	0x000d
+#define SCTP_STR_RESET_IN_REQUEST	0x000e
+#define SCTP_STR_RESET_TSN_REQUEST	0x000f
+#define SCTP_STR_RESET_RESPONSE		0x0010
+
+#define SCTP_MAX_RESET_PARAMS 2
+#define SCTP_STREAM_RESET_TSN_DELTA    0x1000
+
+/*************0x4000 series*************/
+
+/*************0x8000 series*************/
+#define SCTP_ECN_CAPABLE		0x8000
+/* ECN Nonce: draft-ladha-sctp-ecn-nonce */
+#define SCTP_ECN_NONCE_SUPPORTED	0x8001
+/* draft-ietf-tsvwg-auth-xxx */
+#define SCTP_RANDOM			0x8002
+#define SCTP_CHUNK_LIST			0x8003
+#define SCTP_HMAC_LIST			0x8004
+/*
+ * draft-ietf-tsvwg-addip-sctp-xx param=0x8008  len=0xNNNN Byte | Byte | Byte
+ * | Byte Byte | Byte ...
+ *
+ * Where each byte is a chunk type extension supported. For example, to support
+ * all chunks one would have (in hex):
+ *
+ * 80 01 00 09 C0 C1 80 81 82 00 00 00
+ *
+ * Has the parameter. C0 = PR-SCTP    (RFC3758) C1, 80 = ASCONF (addip draft) 81
+ * = Packet Drop 82 = Stream Reset 83 = Authentication
+ */
+#define SCTP_SUPPORTED_CHUNK_EXT    0x8008
+
+/*************0xC000 series*************/
+#define SCTP_PRSCTP_SUPPORTED		0xc000
+/* draft-ietf-tsvwg-addip-sctp */
+#define SCTP_ADD_IP_ADDRESS		0xc001
+#define SCTP_DEL_IP_ADDRESS		0xc002
+#define SCTP_ERROR_CAUSE_IND		0xc003
+#define SCTP_SET_PRIM_ADDR		0xc004
+#define SCTP_SUCCESS_REPORT		0xc005
+#define SCTP_ULP_ADAPTATION		0xc006
+
+/* Notification error codes */
+#define SCTP_NOTIFY_DATAGRAM_UNSENT	0x0001
+#define SCTP_NOTIFY_DATAGRAM_SENT	0x0002
+#define SCTP_FAILED_THRESHOLD		0x0004
+#define SCTP_HEARTBEAT_SUCCESS		0x0008
+#define SCTP_RESPONSE_TO_USER_REQ	0x0010
+#define SCTP_INTERNAL_ERROR		0x0020
+#define SCTP_SHUTDOWN_GUARD_EXPIRES	0x0040
+#define SCTP_RECEIVED_SACK		0x0080
+#define SCTP_PEER_FAULTY		0x0100
+#define SCTP_ICMP_REFUSED		0x0200
+
+/* bits for TOS field */
+#define SCTP_ECT0_BIT		0x02
+#define SCTP_ECT1_BIT		0x01
+#define SCTP_CE_BITS		0x03
+
+/* below turns off above */
+#define SCTP_FLEXIBLE_ADDRESS	0x20
+#define SCTP_NO_HEARTBEAT	0x40
+
+/* mask to get sticky */
+#define SCTP_STICKY_OPTIONS_MASK	0x0c
+
+
+/*
+ * SCTP states for internal state machine XXX (should match "user" values)
+ */
+#define SCTP_STATE_EMPTY		0x0000
+#define SCTP_STATE_INUSE		0x0001
+#define SCTP_STATE_COOKIE_WAIT		0x0002
+#define SCTP_STATE_COOKIE_ECHOED	0x0004
+#define SCTP_STATE_OPEN			0x0008
+#define SCTP_STATE_SHUTDOWN_SENT	0x0010
+#define SCTP_STATE_SHUTDOWN_RECEIVED	0x0020
+#define SCTP_STATE_SHUTDOWN_ACK_SENT	0x0040
+#define SCTP_STATE_SHUTDOWN_PENDING	0x0080
+#define SCTP_STATE_CLOSED_SOCKET	0x0100
+#define SCTP_STATE_ABOUT_TO_BE_FREED    0x0200
+#define SCTP_STATE_PARTIAL_MSG_LEFT     0x0400
+#define SCTP_STATE_WAS_ABORTED          0x0800
+#define SCTP_STATE_MASK			0x007f
+
+#define SCTP_GET_STATE(asoc)	((asoc)->state & SCTP_STATE_MASK)
+#define SCTP_SET_STATE(asoc, newstate)  ((asoc)->state = ((asoc)->state & ~SCTP_STATE_MASK) |  newstate)
+#define SCTP_CLEAR_SUBSTATE(asoc, substate) ((asoc)->state &= ~substate)
+#define SCTP_ADD_SUBSTATE(asoc, substate) ((asoc)->state |= substate)
+
+/* SCTP reachability state for each address */
+#define SCTP_ADDR_REACHABLE		0x001
+#define SCTP_ADDR_NOT_REACHABLE		0x002
+#define SCTP_ADDR_NOHB			0x004
+#define SCTP_ADDR_BEING_DELETED		0x008
+#define SCTP_ADDR_NOT_IN_ASSOC		0x010
+#define SCTP_ADDR_WAS_PRIMARY		0x020
+#define SCTP_ADDR_SWITCH_PRIMARY	0x040
+#define SCTP_ADDR_OUT_OF_SCOPE		0x080
+#define SCTP_ADDR_DOUBLE_SWITCH		0x100
+#define SCTP_ADDR_UNCONFIRMED		0x200
+#define SCTP_ADDR_REQ_PRIMARY           0x400
+/* JRS 5/13/07 - Added potentially failed state for CMT PF */
+#define SCTP_ADDR_PF            0x800
+#define SCTP_REACHABLE_MASK		0x203
+
+/* bound address types (e.g. valid address types to allow) */
+#define SCTP_BOUND_V6		0x01
+#define SCTP_BOUND_V4		0x02
+
+/*
+ * what is the default number of mbufs in a chain I allow before switching to
+ * a cluster
+ */
+#define SCTP_DEFAULT_MBUFS_IN_CHAIN 5
+
+/* How long a cookie lives in milli-seconds */
+#define SCTP_DEFAULT_COOKIE_LIFE	60000
+
+/* resource limit of streams */
+#define MAX_SCTP_STREAMS	2048
+
+/* Maximum the mapping array will  grow to (TSN mapping array) */
+#define SCTP_MAPPING_ARRAY	512
+
+/* size of the inital malloc on the mapping array */
+#define SCTP_INITIAL_MAPPING_ARRAY  16
+/* how much we grow the mapping array each call */
+#define SCTP_MAPPING_ARRAY_INCR     32
+
+/*
+ * Here we define the timer types used by the implementation as arguments in
+ * the set/get timer type calls.
+ */
+#define SCTP_TIMER_INIT 	0
+#define SCTP_TIMER_RECV 	1
+#define SCTP_TIMER_SEND 	2
+#define SCTP_TIMER_HEARTBEAT	3
+#define SCTP_TIMER_PMTU		4
+#define SCTP_TIMER_MAXSHUTDOWN	5
+#define SCTP_TIMER_SIGNATURE	6
+/*
+ * number of timer types in the base SCTP structure used in the set/get and
+ * has the base default.
+ */
+#define SCTP_NUM_TMRS	7
+
+/* timer types */
+#define SCTP_TIMER_TYPE_NONE		0
+#define SCTP_TIMER_TYPE_SEND		1
+#define SCTP_TIMER_TYPE_INIT		2
+#define SCTP_TIMER_TYPE_RECV		3
+#define SCTP_TIMER_TYPE_SHUTDOWN	4
+#define SCTP_TIMER_TYPE_HEARTBEAT	5
+#define SCTP_TIMER_TYPE_COOKIE		6
+#define SCTP_TIMER_TYPE_NEWCOOKIE	7
+#define SCTP_TIMER_TYPE_PATHMTURAISE	8
+#define SCTP_TIMER_TYPE_SHUTDOWNACK	9
+#define SCTP_TIMER_TYPE_ASCONF		10
+#define SCTP_TIMER_TYPE_SHUTDOWNGUARD	11
+#define SCTP_TIMER_TYPE_AUTOCLOSE	12
+#define SCTP_TIMER_TYPE_EVENTWAKE	13
+#define SCTP_TIMER_TYPE_STRRESET        14
+#define SCTP_TIMER_TYPE_INPKILL         15
+#define SCTP_TIMER_TYPE_ITERATOR        16
+#define SCTP_TIMER_TYPE_EARLYFR         17
+#define SCTP_TIMER_TYPE_ASOCKILL        18
+#define SCTP_TIMER_TYPE_ADDR_WQ         19
+#define SCTP_TIMER_TYPE_ZERO_COPY       20
+#define SCTP_TIMER_TYPE_ZCOPY_SENDQ     21
+#define SCTP_TIMER_TYPE_PRIM_DELETED    22
+/* add new timers here - and increment LAST */
+#define SCTP_TIMER_TYPE_LAST            23
+
+#define SCTP_IS_TIMER_TYPE_VALID(t)	(((t) > SCTP_TIMER_TYPE_NONE) && \
+					 ((t) < SCTP_TIMER_TYPE_LAST))
+
+
+
+/*
+ * Number of ticks before the soxwakeup() event that is delayed is sent AFTER
+ * the accept() call
+ */
+#define SCTP_EVENTWAKEUP_WAIT_TICKS	3000
+
+/*
+ * Of course we really don't collect stale cookies, being folks of decerning
+ * taste. However we do count them, if we get too many before the association
+ * comes up.. we give up. Below is the constant that dictates when we give it
+ * up...this is a implemenation dependent treatment. In ours we do not ask
+ * for a extension of time, but just retry this many times...
+ */
+#define SCTP_MAX_STALE_COOKIES_I_COLLECT 10
+
+/* max number of TSN's dup'd that I will hold */
+#define SCTP_MAX_DUP_TSNS	20
+
+/*
+ * Here we define the types used when setting the retry amounts.
+ */
+/* constants for type of set */
+#define SCTP_MAXATTEMPT_INIT	2
+#define SCTP_MAXATTEMPT_SEND	3
+
+/* Maximum TSN's we will summarize in a drop report */
+#define SCTP_MAX_DROP_REPORT 16
+
+/* How many drop re-attempts we make on  INIT/COOKIE-ECHO */
+#define SCTP_RETRY_DROPPED_THRESH 4
+
+/*
+ * And the max we will keep a history of in the tcb which MUST be lower than
+ * 256.
+ */
+#define SCTP_MAX_DROP_SAVE_REPORT 16
+
+/*
+ * Here we define the default timers and the default number of attemts we
+ * make for each respective side (send/init).
+ */
+
+/*
+ * Maxmium number of chunks a single association can have on it. Note that
+ * this is a squishy number since the count can run over this if the user
+ * sends a large message down .. the fragmented chunks don't count until
+ * AFTER the message is on queue.. it would be the next send that blocks
+ * things. This number will get tuned up at boot in the sctp_init and use the
+ * number of clusters as a base. This way high bandwidth environments will
+ * not get impacted by the lower bandwidth sending a bunch of 1 byte chunks
+ */
+#define SCTP_ASOC_MAX_CHUNKS_ON_QUEUE 512
+
+
+/* The conversion from time to ticks and vice versa is done by rounding
+ * upwards. This way we can test in the code the time to be positive and
+ * know that this corresponds to a positive number of ticks.
+ */
+#define MSEC_TO_TICKS(x) ((hz == 1000) ? x : ((((x) * hz) + 999) / 1000))
+#define TICKS_TO_MSEC(x) ((hz == 1000) ? x : ((((x) * 1000) + (hz - 1)) / hz))
+
+#define SEC_TO_TICKS(x) ((x) * hz)
+#define TICKS_TO_SEC(x) (((x) + (hz - 1)) / hz)
+
+/*
+ * Basically the minimum amount of time before I do a early FR. Making this
+ * value to low will cause duplicate retransmissions.
+ */
+#define SCTP_MINFR_MSEC_TIMER 250
+/* The floor this value is allowed to fall to when starting a timer. */
+#define SCTP_MINFR_MSEC_FLOOR 20
+
+/* init timer def = 1 sec */
+#define SCTP_INIT_SEC	1
+
+/* send timer def = 1 seconds */
+#define SCTP_SEND_SEC	1
+
+/* recv timer def = 200ms  */
+#define SCTP_RECV_MSEC	200
+
+/* 30 seconds + RTO (in ms) */
+#define SCTP_HB_DEFAULT_MSEC	30000
+
+/* Max time I will wait for Shutdown to complete */
+#define SCTP_DEF_MAX_SHUTDOWN_SEC 180
+
+
+/*
+ * This is how long a secret lives, NOT how long a cookie lives how many
+ * ticks the current secret will live.
+ */
+#define SCTP_DEFAULT_SECRET_LIFE_SEC 3600
+
+#define SCTP_RTO_UPPER_BOUND	(60000)	/* 60 sec in ms */
+#define SCTP_RTO_UPPER_BOUND_SEC 60	/* for the init timer */
+#define SCTP_RTO_LOWER_BOUND	(1000)	/* 1 sec in ms */
+#define SCTP_RTO_INITIAL	(3000)	/* 3 sec in ms */
+
+
+#define SCTP_INP_KILL_TIMEOUT 20/* number of ms to retry kill of inpcb */
+#define SCTP_ASOC_KILL_TIMEOUT 10	/* number of ms to retry kill of inpcb */
+
+#define SCTP_DEF_MAX_INIT	8
+#define SCTP_DEF_MAX_SEND	10
+#define SCTP_DEF_MAX_PATH_RTX	5
+
+#define SCTP_DEF_PMTU_RAISE_SEC	600	/* 10 min between raise attempts */
+#define SCTP_DEF_PMTU_MIN	600
+
+
+#define SCTP_MSEC_IN_A_SEC	1000
+#define SCTP_USEC_IN_A_SEC	1000000
+#define SCTP_NSEC_IN_A_SEC	1000000000
+
+#define SCTP_MAX_OUTSTANDING_DG	10000
+
+/* How many streams I request initally by default */
+#define SCTP_OSTREAM_INITIAL 10
+
+/*
+ * How many smallest_mtu's need to increase before a window update sack is
+ * sent (should be a power of 2).
+ */
+#define SCTP_SEG_TO_RWND_UPD 32
+/* Send window update (incr * this > hiwat). Should be a power of 2 */
+#define SCTP_SCALE_OF_RWND_TO_UPD       4
+#define SCTP_MINIMAL_RWND		(4096)	/* minimal rwnd */
+
+#define SCTP_ADDRMAX		24
+
+/* SCTP DEBUG Switch parameters */
+#define SCTP_DEBUG_TIMER1	0x00000001
+#define SCTP_DEBUG_TIMER2	0x00000002	/* unused */
+#define SCTP_DEBUG_TIMER3	0x00000004	/* unused */
+#define SCTP_DEBUG_TIMER4	0x00000008
+#define SCTP_DEBUG_OUTPUT1	0x00000010
+#define SCTP_DEBUG_OUTPUT2	0x00000020
+#define SCTP_DEBUG_OUTPUT3	0x00000040
+#define SCTP_DEBUG_OUTPUT4	0x00000080
+#define SCTP_DEBUG_UTIL1	0x00000100
+#define SCTP_DEBUG_UTIL2	0x00000200	/* unused */
+#define SCTP_DEBUG_AUTH1	0x00000400
+#define SCTP_DEBUG_AUTH2	0x00000800	/* unused */
+#define SCTP_DEBUG_INPUT1	0x00001000
+#define SCTP_DEBUG_INPUT2	0x00002000
+#define SCTP_DEBUG_INPUT3	0x00004000
+#define SCTP_DEBUG_INPUT4	0x00008000	/* unused */
+#define SCTP_DEBUG_ASCONF1	0x00010000
+#define SCTP_DEBUG_ASCONF2	0x00020000
+#define SCTP_DEBUG_OUTPUT5	0x00040000	/* unused */
+#define SCTP_DEBUG_XXX		0x00080000	/* unused */
+#define SCTP_DEBUG_PCB1		0x00100000
+#define SCTP_DEBUG_PCB2		0x00200000	/* unused */
+#define SCTP_DEBUG_PCB3		0x00400000
+#define SCTP_DEBUG_PCB4		0x00800000
+#define SCTP_DEBUG_INDATA1	0x01000000
+#define SCTP_DEBUG_INDATA2	0x02000000	/* unused */
+#define SCTP_DEBUG_INDATA3	0x04000000	/* unused */
+#define SCTP_DEBUG_INDATA4	0x08000000	/* unused */
+#define SCTP_DEBUG_USRREQ1	0x10000000	/* unused */
+#define SCTP_DEBUG_USRREQ2	0x20000000	/* unused */
+#define SCTP_DEBUG_PEEL1	0x40000000
+#define SCTP_DEBUG_XXXXX	0x80000000	/* unused */
+#define SCTP_DEBUG_ALL		0x7ff3ffff
+#define SCTP_DEBUG_NOISY	0x00040000
+
+/* What sender needs to see to avoid SWS or we consider peers rwnd 0 */
+#define SCTP_SWS_SENDER_DEF	1420
+
+/*
+ * SWS is scaled to the sb_hiwat of the socket. A value of 2 is hiwat/4, 1
+ * would be hiwat/2 etc.
+ */
+/* What receiver needs to see in sockbuf or we tell peer its 1 */
+#define SCTP_SWS_RECEIVER_DEF	3000
+
+#define SCTP_INITIAL_CWND 4380
+
+#define SCTP_DEFAULT_MTU 1500	/* emegency default MTU */
+/* amount peer is obligated to have in rwnd or I will abort */
+#define SCTP_MIN_RWND	1500
+
+#define SCTP_WINDOW_MIN	1500	/* smallest rwnd can be */
+#define SCTP_WINDOW_MAX 1048576	/* biggest I can grow rwnd to My playing
+				 * around suggests a value greater than 64k
+				 * does not do much, I guess via the kernel
+				 * limitations on the stream/socket. */
+
+/* I can handle a 1meg re-assembly */
+#define SCTP_DEFAULT_MAXMSGREASM 1048576
+
+#define SCTP_DEFAULT_MAXSEGMENT 65535
+
+#define SCTP_CHUNK_BUFFER_SIZE	512
+#define SCTP_PARAM_BUFFER_SIZE	512
+
+/* small chunk store for looking at chunk_list in auth */
+#define SCTP_SMALL_CHUNK_STORE 260
+
+#define SCTP_DEFAULT_MINSEGMENT 512	/* MTU size ... if no mtu disc */
+#define SCTP_HOW_MANY_SECRETS	2	/* how many secrets I keep */
+
+#define SCTP_NUMBER_OF_SECRETS	8	/* or 8 * 4 = 32 octets */
+#define SCTP_SECRET_SIZE	32	/* number of octets in a 256 bits */
+
+
+/*
+ * SCTP upper layer notifications
+ */
+#define SCTP_NOTIFY_ASSOC_UP		1
+#define SCTP_NOTIFY_ASSOC_DOWN		2
+#define SCTP_NOTIFY_INTERFACE_DOWN	3
+#define SCTP_NOTIFY_INTERFACE_UP	4
+#define SCTP_NOTIFY_DG_FAIL		5
+#define SCTP_NOTIFY_STRDATA_ERR 	6
+#define SCTP_NOTIFY_ASSOC_ABORTED	7
+#define SCTP_NOTIFY_PEER_OPENED_STREAM	8
+#define SCTP_NOTIFY_STREAM_OPENED_OK	9
+#define SCTP_NOTIFY_ASSOC_RESTART	10
+#define SCTP_NOTIFY_HB_RESP             11
+#define SCTP_NOTIFY_ASCONF_SUCCESS	12
+#define SCTP_NOTIFY_ASCONF_FAILED	13
+#define SCTP_NOTIFY_PEER_SHUTDOWN	14
+#define SCTP_NOTIFY_ASCONF_ADD_IP	15
+#define SCTP_NOTIFY_ASCONF_DELETE_IP	16
+#define SCTP_NOTIFY_ASCONF_SET_PRIMARY	17
+#define SCTP_NOTIFY_PARTIAL_DELVIERY_INDICATION 18
+#define SCTP_NOTIFY_INTERFACE_CONFIRMED 20
+#define SCTP_NOTIFY_STR_RESET_RECV      21
+#define SCTP_NOTIFY_STR_RESET_SEND      22
+#define SCTP_NOTIFY_STR_RESET_FAILED_OUT 23
+#define SCTP_NOTIFY_STR_RESET_FAILED_IN 24
+#define SCTP_NOTIFY_AUTH_NEW_KEY	25
+#define SCTP_NOTIFY_AUTH_KEY_CONFLICT	26
+#define SCTP_NOTIFY_SPECIAL_SP_FAIL     27
+#define SCTP_NOTIFY_MAX			27
+
+/* This is the value for messages that are NOT completely
+ * copied down where we will start to split the message.
+ * So, with our default, we split only if the piece we
+ * want to take will fill up a full MTU (assuming
+ * a 1500 byte MTU).
+ */
+#define SCTP_DEFAULT_SPLIT_POINT_MIN 2904
+
+/* ABORT CODES and other tell-tale location
+ * codes are generated by adding the below
+ * to the instance id.
+ */
+
+/* File defines */
+#define SCTP_FROM_SCTP_INPUT   0x10000000
+#define SCTP_FROM_SCTP_PCB     0x20000000
+#define SCTP_FROM_SCTP_INDATA  0x30000000
+#define SCTP_FROM_SCTP_TIMER   0x40000000
+#define SCTP_FROM_SCTP_USRREQ  0x50000000
+#define SCTP_FROM_SCTPUTIL     0x60000000
+#define SCTP_FROM_SCTP6_USRREQ 0x70000000
+#define SCTP_FROM_SCTP_ASCONF  0x80000000
+#define SCTP_FROM_SCTP_OUTPUT  0x90000000
+#define SCTP_FROM_SCTP_PEELOFF 0xa0000000
+#define SCTP_FROM_SCTP_PANDA   0xb0000000
+#define SCTP_FROM_SCTP_SYSCTL  0xc0000000
+
+/* Location ID's */
+#define SCTP_LOC_1  0x00000001
+#define SCTP_LOC_2  0x00000002
+#define SCTP_LOC_3  0x00000003
+#define SCTP_LOC_4  0x00000004
+#define SCTP_LOC_5  0x00000005
+#define SCTP_LOC_6  0x00000006
+#define SCTP_LOC_7  0x00000007
+#define SCTP_LOC_8  0x00000008
+#define SCTP_LOC_9  0x00000009
+#define SCTP_LOC_10 0x0000000a
+#define SCTP_LOC_11 0x0000000b
+#define SCTP_LOC_12 0x0000000c
+#define SCTP_LOC_13 0x0000000d
+#define SCTP_LOC_14 0x0000000e
+#define SCTP_LOC_15 0x0000000f
+#define SCTP_LOC_16 0x00000010
+#define SCTP_LOC_17 0x00000011
+#define SCTP_LOC_18 0x00000012
+#define SCTP_LOC_19 0x00000013
+#define SCTP_LOC_20 0x00000014
+#define SCTP_LOC_21 0x00000015
+#define SCTP_LOC_22 0x00000016
+#define SCTP_LOC_23 0x00000017
+#define SCTP_LOC_24 0x00000018
+#define SCTP_LOC_25 0x00000019
+#define SCTP_LOC_26 0x0000001a
+#define SCTP_LOC_27 0x0000001b
+#define SCTP_LOC_28 0x0000001c
+#define SCTP_LOC_29 0x0000001d
+#define SCTP_LOC_30 0x0000001e
+#define SCTP_LOC_31 0x0000001f
+#define SCTP_LOC_32 0x00000020
+#define SCTP_LOC_33 0x00000021
+
+
+/* Free assoc codes */
+#define SCTP_NORMAL_PROC      0
+#define SCTP_PCBFREE_NOFORCE  1
+#define SCTP_PCBFREE_FORCE    2
+
+/* From codes for adding addresses */
+#define SCTP_ADDR_IS_CONFIRMED 8
+#define SCTP_ADDR_DYNAMIC_ADDED 6
+#define SCTP_IN_COOKIE_PROC 100
+#define SCTP_ALLOC_ASOC  1
+#define SCTP_LOAD_ADDR_2 2
+#define SCTP_LOAD_ADDR_3 3
+#define SCTP_LOAD_ADDR_4 4
+#define SCTP_LOAD_ADDR_5 5
+
+#define SCTP_DONOT_SETSCOPE 0
+#define SCTP_DO_SETSCOPE 1
+
+
+/* This value determines the default for when
+ * we try to add more on the send queue., if
+ * there is room. This prevents us from cycling
+ * into the copy_resume routine to often if
+ * we have not got enough space to add a decent
+ * enough size message. Note that if we have enough
+ * space to complete the message copy we will always
+ * add to the message, no matter what the size. Its
+ * only when we reach the point that we have some left
+ * to add, there is only room for part of it that we
+ * will use this threshold. Its also a sysctl.
+ */
+#define SCTP_DEFAULT_ADD_MORE 1452
+
+#ifndef SCTP_PCBHASHSIZE
+/* default number of association hash buckets in each endpoint */
+#define SCTP_PCBHASHSIZE 256
+#endif
+#ifndef SCTP_TCBHASHSIZE
+#define SCTP_TCBHASHSIZE 1024
+#endif
+
+#ifndef SCTP_CHUNKQUEUE_SCALE
+#define SCTP_CHUNKQUEUE_SCALE 10
+#endif
+
+/* clock variance is 1 ms */
+#define SCTP_CLOCK_GRANULARITY	1
+#define IP_HDR_SIZE 40		/* we use the size of a IP6 header here this
+				 * detracts a small amount for ipv4 but it
+				 * simplifies the ipv6 addition */
+
+/* Argument magic number for sctp_inpcb_free() */
+
+/* third argument */
+#define SCTP_CALLED_DIRECTLY_NOCMPSET     0
+#define SCTP_CALLED_AFTER_CMPSET_OFCLOSE  1
+
+/* second argument */
+#define SCTP_FREE_SHOULD_USE_ABORT          1
+#define SCTP_FREE_SHOULD_USE_GRACEFUL_CLOSE 0
+
+#ifndef IPPROTO_SCTP
+#define IPPROTO_SCTP 132	/* the Official IANA number :-) */
+#endif				/* !IPPROTO_SCTP */
+
+#define SCTP_MAX_DATA_BUNDLING		256
+#define SCTP_MAX_CONTROL_BUNDLING	20
+
+/* modular comparison */
+/* True if a > b (mod = M) */
+#define compare_with_wrap(a, b, M) (((a > b) && ((a - b) < ((M >> 1) + 1))) || \
+              ((b > a) && ((b - a) > ((M >> 1) + 1))))
+
+
+/* Mapping array manipulation routines */
+#define SCTP_IS_TSN_PRESENT(arry, gap) ((arry[(gap >> 3)] >> (gap & 0x07)) & 0x01)
+#define SCTP_SET_TSN_PRESENT(arry, gap) (arry[(gap >> 3)] |= (0x01 << ((gap & 0x07))))
+#define SCTP_UNSET_TSN_PRESENT(arry, gap) (arry[(gap >> 3)] &= ((~(0x01 << ((gap & 0x07)))) & 0xff))
+
+
+#define SCTP_RETRAN_DONE -1
+#define SCTP_RETRAN_EXIT -2
+
+/*
+ * This value defines the number of vtag block time wait entry's per list
+ * element.  Each entry will take 2 4 byte ints (and of course the overhead
+ * of the next pointer as well). Using 15 as an example will yield * ((8 *
+ * 15) + 8) or 128 bytes of overhead for each timewait block that gets
+ * initialized. Increasing it to 31 would yeild 256 bytes per block.
+ */
+#define SCTP_NUMBER_IN_VTAG_BLOCK 15
+/*
+ * If we use the STACK option, we have an array of this size head pointers.
+ * This array is mod'd the with the size to find which bucket and then all
+ * entries must be searched to see if the tag is in timed wait. If so we
+ * reject it.
+ */
+#define SCTP_STACK_VTAG_HASH_SIZE   31
+#define SCTP_STACK_VTAG_HASH_SIZE_A 32
+
+
+/*
+ * If we use the per-endpoint model than we do not have a hash table of
+ * entries but instead have a single head pointer and we must crawl through
+ * the entire list.
+ */
+
+/*
+ * Number of seconds of time wait for a vtag.
+ */
+#define SCTP_TIME_WAIT 60
+
+/* This time wait is the same as the default cookie life
+ * since we now enter a tag in every time we send a cookie.
+ * We want this shorter to avoid vtag depletion.
+ */
+#define SCTP_TIME_WAIT_SHORT 60
+
+/* The system retains a cache of free chunks such to
+ * cut down on calls the memory allocation system. There
+ * is a per association limit of free items and a overall
+ * system limit. If either one gets hit then the resource
+ * stops being cached.
+ */
+
+#define SCTP_DEF_ASOC_RESC_LIMIT 10
+#define SCTP_DEF_SYSTEM_RESC_LIMIT 1000
+
+/*-
+ * defines for socket lock states.
+ * Used by __APPLE__ and SCTP_SO_LOCK_TESTING
+ */
+#define SCTP_SO_LOCKED		1
+#define SCTP_SO_NOT_LOCKED	0
+
+
+#define SCTP_HOLDS_LOCK 1
+#define SCTP_NOT_LOCKED 0
+
+/*-
+ * For address locks, do we hold the lock?
+ */
+#define SCTP_ADDR_LOCKED 1
+#define SCTP_ADDR_NOT_LOCKED 0
+
+#define IN4_ISPRIVATE_ADDRESS(a) \
+   ((((uint8_t *)&(a)->s_addr)[0] == 10) || \
+    ((((uint8_t *)&(a)->s_addr)[0] == 172) && \
+     (((uint8_t *)&(a)->s_addr)[1] >= 16) && \
+     (((uint8_t *)&(a)->s_addr)[1] <= 32)) || \
+    ((((uint8_t *)&(a)->s_addr)[0] == 192) && \
+     (((uint8_t *)&(a)->s_addr)[1] == 168)))
+
+#define IN4_ISLOOPBACK_ADDRESS(a) \
+    ((((uint8_t *)&(a)->s_addr)[0] == 127) && \
+     (((uint8_t *)&(a)->s_addr)[1] == 0) && \
+     (((uint8_t *)&(a)->s_addr)[2] == 0) && \
+     (((uint8_t *)&(a)->s_addr)[3] == 1))
+
+
+#if defined(_KERNEL)
+
+#define SCTP_GETTIME_TIMEVAL(x)	(getmicrouptime(x))
+#define SCTP_GETPTIME_TIMEVAL(x)	(microuptime(x))
+/*#if defined(__FreeBSD__) || defined(__APPLE__)*/
+/*#define SCTP_GETTIME_TIMEVAL(x) { \*/
+/*	(x)->tv_sec = ticks / 1000; \*/
+/*	(x)->tv_usec = (ticks % 1000) * 1000; \*/
+/*}*/
+
+/*#else*/
+/*#define SCTP_GETTIME_TIMEVAL(x)	(microtime(x))*/
+/*#endif				 __FreeBSD__ */
+
+#define sctp_sowwakeup(inp, so) \
+do { \
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_DONT_WAKE) { \
+		inp->sctp_flags |= SCTP_PCB_FLAGS_WAKEOUTPUT; \
+	} else { \
+		sowwakeup(so); \
+	} \
+} while (0)
+
+#define sctp_sowwakeup_locked(inp, so) \
+do { \
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_DONT_WAKE) { \
+                SOCKBUF_UNLOCK(&((so)->so_snd)); \
+		inp->sctp_flags |= SCTP_PCB_FLAGS_WAKEOUTPUT; \
+	} else { \
+		sowwakeup_locked(so); \
+	} \
+} while (0)
+
+#define sctp_sorwakeup(inp, so) \
+do { \
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_DONT_WAKE) { \
+		inp->sctp_flags |= SCTP_PCB_FLAGS_WAKEINPUT; \
+	} else { \
+		sorwakeup(so); \
+	} \
+} while (0)
+
+#define sctp_sorwakeup_locked(inp, so) \
+do { \
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_DONT_WAKE) { \
+		inp->sctp_flags |= SCTP_PCB_FLAGS_WAKEINPUT; \
+                SOCKBUF_UNLOCK(&((so)->so_rcv)); \
+	} else { \
+		sorwakeup_locked(so); \
+	} \
+} while (0)
+
+#endif				/* _KERNEL */
+#endif
--- /dev/null
+++ sys/netinet/sctp_indata.c
@@ -0,0 +1,5605 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_indata.c,v 1.36 2005/03/06 16:04:17 itojun Exp $	 */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_indata.c,v 1.45.2.2.2.1 2008/01/31 17:21:50 rrs Exp $");
+
+#include <netinet/sctp_os.h>
+#include <netinet/sctp_var.h>
+#include <netinet/sctp_sysctl.h>
+#include <netinet/sctp_pcb.h>
+#include <netinet/sctp_header.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_output.h>
+#include <netinet/sctp_input.h>
+#include <netinet/sctp_indata.h>
+#include <netinet/sctp_uio.h>
+#include <netinet/sctp_timer.h>
+
+
+/*
+ * NOTES: On the outbound side of things I need to check the sack timer to
+ * see if I should generate a sack into the chunk queue (if I have data to
+ * send that is and will be sending it .. for bundling.
+ *
+ * The callback in sctp_usrreq.c will get called when the socket is read from.
+ * This will cause sctp_service_queues() to get called on the top entry in
+ * the list.
+ */
+
+void
+sctp_set_rwnd(struct sctp_tcb *stcb, struct sctp_association *asoc)
+{
+	uint32_t calc, calc_save;
+
+	/*
+	 * This is really set wrong with respect to a 1-2-m socket. Since
+	 * the sb_cc is the count that everyone as put up. When we re-write
+	 * sctp_soreceive then we will fix this so that ONLY this
+	 * associations data is taken into account.
+	 */
+	if (stcb->sctp_socket == NULL)
+		return;
+
+	if (stcb->asoc.sb_cc == 0 &&
+	    asoc->size_on_reasm_queue == 0 &&
+	    asoc->size_on_all_streams == 0) {
+		/* Full rwnd granted */
+		asoc->my_rwnd = max(SCTP_SB_LIMIT_RCV(stcb->sctp_socket),
+		    SCTP_MINIMAL_RWND);
+		return;
+	}
+	/* get actual space */
+	calc = (uint32_t) sctp_sbspace(&stcb->asoc, &stcb->sctp_socket->so_rcv);
+
+	/*
+	 * take out what has NOT been put on socket queue and we yet hold
+	 * for putting up.
+	 */
+	calc = sctp_sbspace_sub(calc, (uint32_t) asoc->size_on_reasm_queue);
+	calc = sctp_sbspace_sub(calc, (uint32_t) asoc->size_on_all_streams);
+
+	if (calc == 0) {
+		/* out of space */
+		asoc->my_rwnd = 0;
+		return;
+	}
+	/* what is the overhead of all these rwnd's */
+
+	calc = sctp_sbspace_sub(calc, stcb->asoc.my_rwnd_control_len);
+	calc_save = calc;
+
+	asoc->my_rwnd = calc;
+	if ((asoc->my_rwnd == 0) &&
+	    (calc < stcb->asoc.my_rwnd_control_len)) {
+		/*-
+		 * If our rwnd == 0 && the overhead is greater than the
+ 		 * data onqueue, we clamp the rwnd to 1. This lets us
+ 		 * still accept inbound segments, but hopefully will shut
+ 		 * the sender down when he finally gets the message. This
+		 * hopefully will gracefully avoid discarding packets.
+ 		 */
+		asoc->my_rwnd = 1;
+	}
+	if (asoc->my_rwnd &&
+	    (asoc->my_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_receiver)) {
+		/* SWS engaged, tell peer none left */
+		asoc->my_rwnd = 1;
+	}
+}
+
+/* Calculate what the rwnd would be */
+uint32_t
+sctp_calc_rwnd(struct sctp_tcb *stcb, struct sctp_association *asoc)
+{
+	uint32_t calc = 0, calc_save = 0, result = 0;
+
+	/*
+	 * This is really set wrong with respect to a 1-2-m socket. Since
+	 * the sb_cc is the count that everyone as put up. When we re-write
+	 * sctp_soreceive then we will fix this so that ONLY this
+	 * associations data is taken into account.
+	 */
+	if (stcb->sctp_socket == NULL)
+		return (calc);
+
+	if (stcb->asoc.sb_cc == 0 &&
+	    asoc->size_on_reasm_queue == 0 &&
+	    asoc->size_on_all_streams == 0) {
+		/* Full rwnd granted */
+		calc = max(SCTP_SB_LIMIT_RCV(stcb->sctp_socket),
+		    SCTP_MINIMAL_RWND);
+		return (calc);
+	}
+	/* get actual space */
+	calc = (uint32_t) sctp_sbspace(&stcb->asoc, &stcb->sctp_socket->so_rcv);
+
+	/*
+	 * take out what has NOT been put on socket queue and we yet hold
+	 * for putting up.
+	 */
+	calc = sctp_sbspace_sub(calc, (uint32_t) asoc->size_on_reasm_queue);
+	calc = sctp_sbspace_sub(calc, (uint32_t) asoc->size_on_all_streams);
+
+	if (calc == 0) {
+		/* out of space */
+		return (calc);
+	}
+	/* what is the overhead of all these rwnd's */
+	calc = sctp_sbspace_sub(calc, stcb->asoc.my_rwnd_control_len);
+	calc_save = calc;
+
+	result = calc;
+	if ((result == 0) &&
+	    (calc < stcb->asoc.my_rwnd_control_len)) {
+		/*-
+		 * If our rwnd == 0 && the overhead is greater than the
+ 		 * data onqueue, we clamp the rwnd to 1. This lets us
+ 		 * still accept inbound segments, but hopefully will shut
+ 		 * the sender down when he finally gets the message. This
+		 * hopefully will gracefully avoid discarding packets.
+ 		 */
+		result = 1;
+	}
+	if (result &&
+	    (result < stcb->sctp_ep->sctp_ep.sctp_sws_receiver)) {
+		/* SWS engaged, tell peer none left */
+		result = 1;
+	}
+	return (result);
+}
+
+
+
+/*
+ * Build out our readq entry based on the incoming packet.
+ */
+struct sctp_queued_to_read *
+sctp_build_readq_entry(struct sctp_tcb *stcb,
+    struct sctp_nets *net,
+    uint32_t tsn, uint32_t ppid,
+    uint32_t context, uint16_t stream_no,
+    uint16_t stream_seq, uint8_t flags,
+    struct mbuf *dm)
+{
+	struct sctp_queued_to_read *read_queue_e = NULL;
+
+	sctp_alloc_a_readq(stcb, read_queue_e);
+	if (read_queue_e == NULL) {
+		goto failed_build;
+	}
+	read_queue_e->sinfo_stream = stream_no;
+	read_queue_e->sinfo_ssn = stream_seq;
+	read_queue_e->sinfo_flags = (flags << 8);
+	read_queue_e->sinfo_ppid = ppid;
+	read_queue_e->sinfo_context = stcb->asoc.context;
+	read_queue_e->sinfo_timetolive = 0;
+	read_queue_e->sinfo_tsn = tsn;
+	read_queue_e->sinfo_cumtsn = tsn;
+	read_queue_e->sinfo_assoc_id = sctp_get_associd(stcb);
+	read_queue_e->whoFrom = net;
+	read_queue_e->length = 0;
+	atomic_add_int(&net->ref_count, 1);
+	read_queue_e->data = dm;
+	read_queue_e->spec_flags = 0;
+	read_queue_e->tail_mbuf = NULL;
+	read_queue_e->aux_data = NULL;
+	read_queue_e->stcb = stcb;
+	read_queue_e->port_from = stcb->rport;
+	read_queue_e->do_not_ref_stcb = 0;
+	read_queue_e->end_added = 0;
+	read_queue_e->some_taken = 0;
+	read_queue_e->pdapi_aborted = 0;
+failed_build:
+	return (read_queue_e);
+}
+
+
+/*
+ * Build out our readq entry based on the incoming packet.
+ */
+static struct sctp_queued_to_read *
+sctp_build_readq_entry_chk(struct sctp_tcb *stcb,
+    struct sctp_tmit_chunk *chk)
+{
+	struct sctp_queued_to_read *read_queue_e = NULL;
+
+	sctp_alloc_a_readq(stcb, read_queue_e);
+	if (read_queue_e == NULL) {
+		goto failed_build;
+	}
+	read_queue_e->sinfo_stream = chk->rec.data.stream_number;
+	read_queue_e->sinfo_ssn = chk->rec.data.stream_seq;
+	read_queue_e->sinfo_flags = (chk->rec.data.rcv_flags << 8);
+	read_queue_e->sinfo_ppid = chk->rec.data.payloadtype;
+	read_queue_e->sinfo_context = stcb->asoc.context;
+	read_queue_e->sinfo_timetolive = 0;
+	read_queue_e->sinfo_tsn = chk->rec.data.TSN_seq;
+	read_queue_e->sinfo_cumtsn = chk->rec.data.TSN_seq;
+	read_queue_e->sinfo_assoc_id = sctp_get_associd(stcb);
+	read_queue_e->whoFrom = chk->whoTo;
+	read_queue_e->aux_data = NULL;
+	read_queue_e->length = 0;
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+	read_queue_e->data = chk->data;
+	read_queue_e->tail_mbuf = NULL;
+	read_queue_e->stcb = stcb;
+	read_queue_e->port_from = stcb->rport;
+	read_queue_e->spec_flags = 0;
+	read_queue_e->do_not_ref_stcb = 0;
+	read_queue_e->end_added = 0;
+	read_queue_e->some_taken = 0;
+	read_queue_e->pdapi_aborted = 0;
+failed_build:
+	return (read_queue_e);
+}
+
+
+struct mbuf *
+sctp_build_ctl_nchunk(struct sctp_inpcb *inp,
+    struct sctp_sndrcvinfo *sinfo)
+{
+	struct sctp_sndrcvinfo *outinfo;
+	struct cmsghdr *cmh;
+	struct mbuf *ret;
+	int len;
+	int use_extended = 0;
+
+	if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT)) {
+		/* user does not want the sndrcv ctl */
+		return (NULL);
+	}
+	if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO)) {
+		use_extended = 1;
+		len = CMSG_LEN(sizeof(struct sctp_extrcvinfo));
+	} else {
+		len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
+	}
+
+
+	ret = sctp_get_mbuf_for_msg(len,
+	    0, M_DONTWAIT, 1, MT_DATA);
+
+	if (ret == NULL) {
+		/* No space */
+		return (ret);
+	}
+	/* We need a CMSG header followed by the struct  */
+	cmh = mtod(ret, struct cmsghdr *);
+	outinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmh);
+	cmh->cmsg_level = IPPROTO_SCTP;
+	if (use_extended) {
+		cmh->cmsg_type = SCTP_EXTRCV;
+		cmh->cmsg_len = len;
+		memcpy(outinfo, sinfo, len);
+	} else {
+		cmh->cmsg_type = SCTP_SNDRCV;
+		cmh->cmsg_len = len;
+		*outinfo = *sinfo;
+	}
+	SCTP_BUF_LEN(ret) = cmh->cmsg_len;
+	return (ret);
+}
+
+
+char *
+sctp_build_ctl_cchunk(struct sctp_inpcb *inp,
+    int *control_len,
+    struct sctp_sndrcvinfo *sinfo)
+{
+	struct sctp_sndrcvinfo *outinfo;
+	struct cmsghdr *cmh;
+	char *buf;
+	int len;
+	int use_extended = 0;
+
+	if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT)) {
+		/* user does not want the sndrcv ctl */
+		return (NULL);
+	}
+	if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO)) {
+		use_extended = 1;
+		len = CMSG_LEN(sizeof(struct sctp_extrcvinfo));
+	} else {
+		len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
+	}
+	SCTP_MALLOC(buf, char *, len, SCTP_M_CMSG);
+	if (buf == NULL) {
+		/* No space */
+		return (buf);
+	}
+	/* We need a CMSG header followed by the struct  */
+	cmh = (struct cmsghdr *)buf;
+	outinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmh);
+	cmh->cmsg_level = IPPROTO_SCTP;
+	if (use_extended) {
+		cmh->cmsg_type = SCTP_EXTRCV;
+		cmh->cmsg_len = len;
+		memcpy(outinfo, sinfo, len);
+	} else {
+		cmh->cmsg_type = SCTP_SNDRCV;
+		cmh->cmsg_len = len;
+		*outinfo = *sinfo;
+	}
+	*control_len = len;
+	return (buf);
+}
+
+
+/*
+ * We are delivering currently from the reassembly queue. We must continue to
+ * deliver until we either: 1) run out of space. 2) run out of sequential
+ * TSN's 3) hit the SCTP_DATA_LAST_FRAG flag.
+ */
+static void
+sctp_service_reassembly(struct sctp_tcb *stcb, struct sctp_association *asoc)
+{
+	struct sctp_tmit_chunk *chk;
+	uint16_t nxt_todel;
+	uint16_t stream_no;
+	int end = 0;
+	int cntDel;
+	struct sctp_queued_to_read *control, *ctl, *ctlat;
+
+	if (stcb == NULL)
+		return;
+
+	cntDel = stream_no = 0;
+	if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+	    (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) ||
+	    (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET)) {
+		/* socket above is long gone or going.. */
+abandon:
+		asoc->fragmented_delivery_inprogress = 0;
+		chk = TAILQ_FIRST(&asoc->reasmqueue);
+		while (chk) {
+			TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next);
+			asoc->size_on_reasm_queue -= chk->send_size;
+			sctp_ucount_decr(asoc->cnt_on_reasm_queue);
+			/*
+			 * Lose the data pointer, since its in the socket
+			 * buffer
+			 */
+			if (chk->data) {
+				sctp_m_freem(chk->data);
+				chk->data = NULL;
+			}
+			/* Now free the address and data */
+			sctp_free_a_chunk(stcb, chk);
+			/* sa_ignore FREED_MEMORY */
+			chk = TAILQ_FIRST(&asoc->reasmqueue);
+		}
+		return;
+	}
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	do {
+		chk = TAILQ_FIRST(&asoc->reasmqueue);
+		if (chk == NULL) {
+			return;
+		}
+		if (chk->rec.data.TSN_seq != (asoc->tsn_last_delivered + 1)) {
+			/* Can't deliver more :< */
+			return;
+		}
+		stream_no = chk->rec.data.stream_number;
+		nxt_todel = asoc->strmin[stream_no].last_sequence_delivered + 1;
+		if (nxt_todel != chk->rec.data.stream_seq &&
+		    (chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == 0) {
+			/*
+			 * Not the next sequence to deliver in its stream OR
+			 * unordered
+			 */
+			return;
+		}
+		if (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) {
+
+			control = sctp_build_readq_entry_chk(stcb, chk);
+			if (control == NULL) {
+				/* out of memory? */
+				return;
+			}
+			/* save it off for our future deliveries */
+			stcb->asoc.control_pdapi = control;
+			if (chk->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG)
+				end = 1;
+			else
+				end = 0;
+			sctp_add_to_readq(stcb->sctp_ep,
+			    stcb, control, &stcb->sctp_socket->so_rcv, end, SCTP_SO_NOT_LOCKED);
+			cntDel++;
+		} else {
+			if (chk->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG)
+				end = 1;
+			else
+				end = 0;
+			if (sctp_append_to_readq(stcb->sctp_ep, stcb,
+			    stcb->asoc.control_pdapi,
+			    chk->data, end, chk->rec.data.TSN_seq,
+			    &stcb->sctp_socket->so_rcv)) {
+				/*
+				 * something is very wrong, either
+				 * control_pdapi is NULL, or the tail_mbuf
+				 * is corrupt, or there is a EOM already on
+				 * the mbuf chain.
+				 */
+				if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+					goto abandon;
+				} else {
+					if ((stcb->asoc.control_pdapi == NULL) || (stcb->asoc.control_pdapi->tail_mbuf == NULL)) {
+						panic("This should not happen control_pdapi NULL?");
+					}
+					/* if we did not panic, it was a EOM */
+					panic("Bad chunking ??");
+					return;
+				}
+			}
+			cntDel++;
+		}
+		/* pull it we did it */
+		TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next);
+		if (chk->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG) {
+			asoc->fragmented_delivery_inprogress = 0;
+			if ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == 0) {
+				asoc->strmin[stream_no].last_sequence_delivered++;
+			}
+			if ((chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) == 0) {
+				SCTP_STAT_INCR_COUNTER64(sctps_reasmusrmsgs);
+			}
+		} else if (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) {
+			/*
+			 * turn the flag back on since we just  delivered
+			 * yet another one.
+			 */
+			asoc->fragmented_delivery_inprogress = 1;
+		}
+		asoc->tsn_of_pdapi_last_delivered = chk->rec.data.TSN_seq;
+		asoc->last_flags_delivered = chk->rec.data.rcv_flags;
+		asoc->last_strm_seq_delivered = chk->rec.data.stream_seq;
+		asoc->last_strm_no_delivered = chk->rec.data.stream_number;
+
+		asoc->tsn_last_delivered = chk->rec.data.TSN_seq;
+		asoc->size_on_reasm_queue -= chk->send_size;
+		sctp_ucount_decr(asoc->cnt_on_reasm_queue);
+		/* free up the chk */
+		chk->data = NULL;
+		sctp_free_a_chunk(stcb, chk);
+
+		if (asoc->fragmented_delivery_inprogress == 0) {
+			/*
+			 * Now lets see if we can deliver the next one on
+			 * the stream
+			 */
+			struct sctp_stream_in *strm;
+
+			strm = &asoc->strmin[stream_no];
+			nxt_todel = strm->last_sequence_delivered + 1;
+			ctl = TAILQ_FIRST(&strm->inqueue);
+			if (ctl && (nxt_todel == ctl->sinfo_ssn)) {
+				while (ctl != NULL) {
+					/* Deliver more if we can. */
+					if (nxt_todel == ctl->sinfo_ssn) {
+						ctlat = TAILQ_NEXT(ctl, next);
+						TAILQ_REMOVE(&strm->inqueue, ctl, next);
+						asoc->size_on_all_streams -= ctl->length;
+						sctp_ucount_decr(asoc->cnt_on_all_streams);
+						strm->last_sequence_delivered++;
+						sctp_add_to_readq(stcb->sctp_ep, stcb,
+						    ctl,
+						    &stcb->sctp_socket->so_rcv, 1, SCTP_SO_NOT_LOCKED);
+						ctl = ctlat;
+					} else {
+						break;
+					}
+					nxt_todel = strm->last_sequence_delivered + 1;
+				}
+			}
+			break;
+		}
+		/* sa_ignore FREED_MEMORY */
+		chk = TAILQ_FIRST(&asoc->reasmqueue);
+	} while (chk);
+}
+
+/*
+ * Queue the chunk either right into the socket buffer if it is the next one
+ * to go OR put it in the correct place in the delivery queue.  If we do
+ * append to the so_buf, keep doing so until we are out of order. One big
+ * question still remains, what to do when the socket buffer is FULL??
+ */
+static void
+sctp_queue_data_to_stream(struct sctp_tcb *stcb, struct sctp_association *asoc,
+    struct sctp_queued_to_read *control, int *abort_flag)
+{
+	/*
+	 * FIX-ME maybe? What happens when the ssn wraps? If we are getting
+	 * all the data in one stream this could happen quite rapidly. One
+	 * could use the TSN to keep track of things, but this scheme breaks
+	 * down in the other type of stream useage that could occur. Send a
+	 * single msg to stream 0, send 4Billion messages to stream 1, now
+	 * send a message to stream 0. You have a situation where the TSN
+	 * has wrapped but not in the stream. Is this worth worrying about
+	 * or should we just change our queue sort at the bottom to be by
+	 * TSN.
+	 * 
+	 * Could it also be legal for a peer to send ssn 1 with TSN 2 and ssn 2
+	 * with TSN 1? If the peer is doing some sort of funky TSN/SSN
+	 * assignment this could happen... and I don't see how this would be
+	 * a violation. So for now I am undecided an will leave the sort by
+	 * SSN alone. Maybe a hybred approach is the answer
+	 * 
+	 */
+	struct sctp_stream_in *strm;
+	struct sctp_queued_to_read *at;
+	int queue_needed;
+	uint16_t nxt_todel;
+	struct mbuf *oper;
+
+	queue_needed = 1;
+	asoc->size_on_all_streams += control->length;
+	sctp_ucount_incr(asoc->cnt_on_all_streams);
+	strm = &asoc->strmin[control->sinfo_stream];
+	nxt_todel = strm->last_sequence_delivered + 1;
+	if (sctp_logging_level & SCTP_STR_LOGGING_ENABLE) {
+		sctp_log_strm_del(control, NULL, SCTP_STR_LOG_FROM_INTO_STRD);
+	}
+	SCTPDBG(SCTP_DEBUG_INDATA1,
+	    "queue to stream called for ssn:%u lastdel:%u nxt:%u\n",
+	    (uint32_t) control->sinfo_stream,
+	    (uint32_t) strm->last_sequence_delivered,
+	    (uint32_t) nxt_todel);
+	if (compare_with_wrap(strm->last_sequence_delivered,
+	    control->sinfo_ssn, MAX_SEQ) ||
+	    (strm->last_sequence_delivered == control->sinfo_ssn)) {
+		/* The incoming sseq is behind where we last delivered? */
+		SCTPDBG(SCTP_DEBUG_INDATA1, "Duplicate S-SEQ:%d delivered:%d from peer, Abort  association\n",
+		    control->sinfo_ssn, strm->last_sequence_delivered);
+		/*
+		 * throw it in the stream so it gets cleaned up in
+		 * association destruction
+		 */
+		TAILQ_INSERT_HEAD(&strm->inqueue, control, next);
+		oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+		    0, M_DONTWAIT, 1, MT_DATA);
+		if (oper) {
+			struct sctp_paramhdr *ph;
+			uint32_t *ippp;
+
+			SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
+			    (sizeof(uint32_t) * 3);
+			ph = mtod(oper, struct sctp_paramhdr *);
+			ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+			ph->param_length = htons(SCTP_BUF_LEN(oper));
+			ippp = (uint32_t *) (ph + 1);
+			*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_1);
+			ippp++;
+			*ippp = control->sinfo_tsn;
+			ippp++;
+			*ippp = ((control->sinfo_stream << 16) | control->sinfo_ssn);
+		}
+		stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_1;
+		sctp_abort_an_association(stcb->sctp_ep, stcb,
+		    SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+		*abort_flag = 1;
+		return;
+
+	}
+	if (nxt_todel == control->sinfo_ssn) {
+		/* can be delivered right away? */
+		if (sctp_logging_level & SCTP_STR_LOGGING_ENABLE) {
+			sctp_log_strm_del(control, NULL, SCTP_STR_LOG_FROM_IMMED_DEL);
+		}
+		queue_needed = 0;
+		asoc->size_on_all_streams -= control->length;
+		sctp_ucount_decr(asoc->cnt_on_all_streams);
+		strm->last_sequence_delivered++;
+		sctp_add_to_readq(stcb->sctp_ep, stcb,
+		    control,
+		    &stcb->sctp_socket->so_rcv, 1, SCTP_SO_NOT_LOCKED);
+		control = TAILQ_FIRST(&strm->inqueue);
+		while (control != NULL) {
+			/* all delivered */
+			nxt_todel = strm->last_sequence_delivered + 1;
+			if (nxt_todel == control->sinfo_ssn) {
+				at = TAILQ_NEXT(control, next);
+				TAILQ_REMOVE(&strm->inqueue, control, next);
+				asoc->size_on_all_streams -= control->length;
+				sctp_ucount_decr(asoc->cnt_on_all_streams);
+				strm->last_sequence_delivered++;
+				/*
+				 * We ignore the return of deliver_data here
+				 * since we always can hold the chunk on the
+				 * d-queue. And we have a finite number that
+				 * can be delivered from the strq.
+				 */
+				if (sctp_logging_level & SCTP_STR_LOGGING_ENABLE) {
+					sctp_log_strm_del(control, NULL,
+					    SCTP_STR_LOG_FROM_IMMED_DEL);
+				}
+				sctp_add_to_readq(stcb->sctp_ep, stcb,
+				    control,
+				    &stcb->sctp_socket->so_rcv, 1, SCTP_SO_NOT_LOCKED);
+				control = at;
+				continue;
+			}
+			break;
+		}
+	}
+	if (queue_needed) {
+		/*
+		 * Ok, we did not deliver this guy, find the correct place
+		 * to put it on the queue.
+		 */
+		if (TAILQ_EMPTY(&strm->inqueue)) {
+			/* Empty queue */
+			if (sctp_logging_level & SCTP_STR_LOGGING_ENABLE) {
+				sctp_log_strm_del(control, NULL, SCTP_STR_LOG_FROM_INSERT_HD);
+			}
+			TAILQ_INSERT_HEAD(&strm->inqueue, control, next);
+		} else {
+			TAILQ_FOREACH(at, &strm->inqueue, next) {
+				if (compare_with_wrap(at->sinfo_ssn,
+				    control->sinfo_ssn, MAX_SEQ)) {
+					/*
+					 * one in queue is bigger than the
+					 * new one, insert before this one
+					 */
+					if (sctp_logging_level & SCTP_STR_LOGGING_ENABLE) {
+						sctp_log_strm_del(control, at,
+						    SCTP_STR_LOG_FROM_INSERT_MD);
+					}
+					TAILQ_INSERT_BEFORE(at, control, next);
+					break;
+				} else if (at->sinfo_ssn == control->sinfo_ssn) {
+					/*
+					 * Gak, He sent me a duplicate str
+					 * seq number
+					 */
+					/*
+					 * foo bar, I guess I will just free
+					 * this new guy, should we abort
+					 * too? FIX ME MAYBE? Or it COULD be
+					 * that the SSN's have wrapped.
+					 * Maybe I should compare to TSN
+					 * somehow... sigh for now just blow
+					 * away the chunk!
+					 */
+
+					if (control->data)
+						sctp_m_freem(control->data);
+					control->data = NULL;
+					asoc->size_on_all_streams -= control->length;
+					sctp_ucount_decr(asoc->cnt_on_all_streams);
+					if (control->whoFrom)
+						sctp_free_remote_addr(control->whoFrom);
+					control->whoFrom = NULL;
+					sctp_free_a_readq(stcb, control);
+					return;
+				} else {
+					if (TAILQ_NEXT(at, next) == NULL) {
+						/*
+						 * We are at the end, insert
+						 * it after this one
+						 */
+						if (sctp_logging_level & SCTP_STR_LOGGING_ENABLE) {
+							sctp_log_strm_del(control, at,
+							    SCTP_STR_LOG_FROM_INSERT_TL);
+						}
+						TAILQ_INSERT_AFTER(&strm->inqueue,
+						    at, control, next);
+						break;
+					}
+				}
+			}
+		}
+	}
+}
+
+/*
+ * Returns two things: You get the total size of the deliverable parts of the
+ * first fragmented message on the reassembly queue. And you get a 1 back if
+ * all of the message is ready or a 0 back if the message is still incomplete
+ */
+static int
+sctp_is_all_msg_on_reasm(struct sctp_association *asoc, uint32_t * t_size)
+{
+	struct sctp_tmit_chunk *chk;
+	uint32_t tsn;
+
+	*t_size = 0;
+	chk = TAILQ_FIRST(&asoc->reasmqueue);
+	if (chk == NULL) {
+		/* nothing on the queue */
+		return (0);
+	}
+	if ((chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) == 0) {
+		/* Not a first on the queue */
+		return (0);
+	}
+	tsn = chk->rec.data.TSN_seq;
+	while (chk) {
+		if (tsn != chk->rec.data.TSN_seq) {
+			return (0);
+		}
+		*t_size += chk->send_size;
+		if (chk->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG) {
+			return (1);
+		}
+		tsn++;
+		chk = TAILQ_NEXT(chk, sctp_next);
+	}
+	return (0);
+}
+
+static void
+sctp_deliver_reasm_check(struct sctp_tcb *stcb, struct sctp_association *asoc)
+{
+	struct sctp_tmit_chunk *chk;
+	uint16_t nxt_todel;
+	uint32_t tsize;
+
+doit_again:
+	chk = TAILQ_FIRST(&asoc->reasmqueue);
+	if (chk == NULL) {
+		/* Huh? */
+		asoc->size_on_reasm_queue = 0;
+		asoc->cnt_on_reasm_queue = 0;
+		return;
+	}
+	if (asoc->fragmented_delivery_inprogress == 0) {
+		nxt_todel =
+		    asoc->strmin[chk->rec.data.stream_number].last_sequence_delivered + 1;
+		if ((chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) &&
+		    (nxt_todel == chk->rec.data.stream_seq ||
+		    (chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED))) {
+			/*
+			 * Yep the first one is here and its ok to deliver
+			 * but should we?
+			 */
+			if ((sctp_is_all_msg_on_reasm(asoc, &tsize) ||
+			    (tsize >= stcb->sctp_ep->partial_delivery_point))) {
+
+				/*
+				 * Yes, we setup to start reception, by
+				 * backing down the TSN just in case we
+				 * can't deliver. If we
+				 */
+				asoc->fragmented_delivery_inprogress = 1;
+				asoc->tsn_last_delivered =
+				    chk->rec.data.TSN_seq - 1;
+				asoc->str_of_pdapi =
+				    chk->rec.data.stream_number;
+				asoc->ssn_of_pdapi = chk->rec.data.stream_seq;
+				asoc->pdapi_ppid = chk->rec.data.payloadtype;
+				asoc->fragment_flags = chk->rec.data.rcv_flags;
+				sctp_service_reassembly(stcb, asoc);
+			}
+		}
+	} else {
+		/*
+		 * Service re-assembly will deliver stream data queued at
+		 * the end of fragmented delivery.. but it wont know to go
+		 * back and call itself again... we do that here with the
+		 * got doit_again
+		 */
+		sctp_service_reassembly(stcb, asoc);
+		if (asoc->fragmented_delivery_inprogress == 0) {
+			/*
+			 * finished our Fragmented delivery, could be more
+			 * waiting?
+			 */
+			goto doit_again;
+		}
+	}
+}
+
+/*
+ * Dump onto the re-assembly queue, in its proper place. After dumping on the
+ * queue, see if anthing can be delivered. If so pull it off (or as much as
+ * we can. If we run out of space then we must dump what we can and set the
+ * appropriate flag to say we queued what we could.
+ */
+static void
+sctp_queue_data_for_reasm(struct sctp_tcb *stcb, struct sctp_association *asoc,
+    struct sctp_tmit_chunk *chk, int *abort_flag)
+{
+	struct mbuf *oper;
+	uint32_t cum_ackp1, last_tsn, prev_tsn, post_tsn;
+	u_char last_flags;
+	struct sctp_tmit_chunk *at, *prev, *next;
+
+	prev = next = NULL;
+	cum_ackp1 = asoc->tsn_last_delivered + 1;
+	if (TAILQ_EMPTY(&asoc->reasmqueue)) {
+		/* This is the first one on the queue */
+		TAILQ_INSERT_HEAD(&asoc->reasmqueue, chk, sctp_next);
+		/*
+		 * we do not check for delivery of anything when only one
+		 * fragment is here
+		 */
+		asoc->size_on_reasm_queue = chk->send_size;
+		sctp_ucount_incr(asoc->cnt_on_reasm_queue);
+		if (chk->rec.data.TSN_seq == cum_ackp1) {
+			if (asoc->fragmented_delivery_inprogress == 0 &&
+			    (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) !=
+			    SCTP_DATA_FIRST_FRAG) {
+				/*
+				 * An empty queue, no delivery inprogress,
+				 * we hit the next one and it does NOT have
+				 * a FIRST fragment mark.
+				 */
+				SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, its not first, no fragmented delivery in progress\n");
+				oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+				    0, M_DONTWAIT, 1, MT_DATA);
+
+				if (oper) {
+					struct sctp_paramhdr *ph;
+					uint32_t *ippp;
+
+					SCTP_BUF_LEN(oper) =
+					    sizeof(struct sctp_paramhdr) +
+					    (sizeof(uint32_t) * 3);
+					ph = mtod(oper, struct sctp_paramhdr *);
+					ph->param_type =
+					    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+					ph->param_length = htons(SCTP_BUF_LEN(oper));
+					ippp = (uint32_t *) (ph + 1);
+					*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_2);
+					ippp++;
+					*ippp = chk->rec.data.TSN_seq;
+					ippp++;
+					*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+
+				}
+				stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_2;
+				sctp_abort_an_association(stcb->sctp_ep, stcb,
+				    SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+				*abort_flag = 1;
+			} else if (asoc->fragmented_delivery_inprogress &&
+			    (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) == SCTP_DATA_FIRST_FRAG) {
+				/*
+				 * We are doing a partial delivery and the
+				 * NEXT chunk MUST be either the LAST or
+				 * MIDDLE fragment NOT a FIRST
+				 */
+				SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, it IS a first and fragmented delivery in progress\n");
+				oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+				    0, M_DONTWAIT, 1, MT_DATA);
+				if (oper) {
+					struct sctp_paramhdr *ph;
+					uint32_t *ippp;
+
+					SCTP_BUF_LEN(oper) =
+					    sizeof(struct sctp_paramhdr) +
+					    (3 * sizeof(uint32_t));
+					ph = mtod(oper, struct sctp_paramhdr *);
+					ph->param_type =
+					    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+					ph->param_length = htons(SCTP_BUF_LEN(oper));
+					ippp = (uint32_t *) (ph + 1);
+					*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_3);
+					ippp++;
+					*ippp = chk->rec.data.TSN_seq;
+					ippp++;
+					*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+				}
+				stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_3;
+				sctp_abort_an_association(stcb->sctp_ep, stcb,
+				    SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+				*abort_flag = 1;
+			} else if (asoc->fragmented_delivery_inprogress) {
+				/*
+				 * Here we are ok with a MIDDLE or LAST
+				 * piece
+				 */
+				if (chk->rec.data.stream_number !=
+				    asoc->str_of_pdapi) {
+					/* Got to be the right STR No */
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, it IS not same stream number %d vs %d\n",
+					    chk->rec.data.stream_number,
+					    asoc->str_of_pdapi);
+					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (oper) {
+						struct sctp_paramhdr *ph;
+						uint32_t *ippp;
+
+						SCTP_BUF_LEN(oper) =
+						    sizeof(struct sctp_paramhdr) +
+						    (sizeof(uint32_t) * 3);
+						ph = mtod(oper,
+						    struct sctp_paramhdr *);
+						ph->param_type =
+						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+						ph->param_length =
+						    htons(SCTP_BUF_LEN(oper));
+						ippp = (uint32_t *) (ph + 1);
+						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_4);
+						ippp++;
+						*ippp = chk->rec.data.TSN_seq;
+						ippp++;
+						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+					}
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_4;
+					sctp_abort_an_association(stcb->sctp_ep,
+					    stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+					*abort_flag = 1;
+				} else if ((asoc->fragment_flags & SCTP_DATA_UNORDERED) !=
+					    SCTP_DATA_UNORDERED &&
+					    chk->rec.data.stream_seq !=
+				    asoc->ssn_of_pdapi) {
+					/* Got to be the right STR Seq */
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, it IS not same stream seq %d vs %d\n",
+					    chk->rec.data.stream_seq,
+					    asoc->ssn_of_pdapi);
+					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (oper) {
+						struct sctp_paramhdr *ph;
+						uint32_t *ippp;
+
+						SCTP_BUF_LEN(oper) =
+						    sizeof(struct sctp_paramhdr) +
+						    (3 * sizeof(uint32_t));
+						ph = mtod(oper,
+						    struct sctp_paramhdr *);
+						ph->param_type =
+						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+						ph->param_length =
+						    htons(SCTP_BUF_LEN(oper));
+						ippp = (uint32_t *) (ph + 1);
+						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_5);
+						ippp++;
+						*ippp = chk->rec.data.TSN_seq;
+						ippp++;
+						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+
+					}
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_5;
+					sctp_abort_an_association(stcb->sctp_ep,
+					    stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+					*abort_flag = 1;
+				}
+			}
+		}
+		return;
+	}
+	/* Find its place */
+	TAILQ_FOREACH(at, &asoc->reasmqueue, sctp_next) {
+		if (compare_with_wrap(at->rec.data.TSN_seq,
+		    chk->rec.data.TSN_seq, MAX_TSN)) {
+			/*
+			 * one in queue is bigger than the new one, insert
+			 * before this one
+			 */
+			/* A check */
+			asoc->size_on_reasm_queue += chk->send_size;
+			sctp_ucount_incr(asoc->cnt_on_reasm_queue);
+			next = at;
+			TAILQ_INSERT_BEFORE(at, chk, sctp_next);
+			break;
+		} else if (at->rec.data.TSN_seq == chk->rec.data.TSN_seq) {
+			/* Gak, He sent me a duplicate str seq number */
+			/*
+			 * foo bar, I guess I will just free this new guy,
+			 * should we abort too? FIX ME MAYBE? Or it COULD be
+			 * that the SSN's have wrapped. Maybe I should
+			 * compare to TSN somehow... sigh for now just blow
+			 * away the chunk!
+			 */
+			if (chk->data) {
+				sctp_m_freem(chk->data);
+				chk->data = NULL;
+			}
+			sctp_free_a_chunk(stcb, chk);
+			return;
+		} else {
+			last_flags = at->rec.data.rcv_flags;
+			last_tsn = at->rec.data.TSN_seq;
+			prev = at;
+			if (TAILQ_NEXT(at, sctp_next) == NULL) {
+				/*
+				 * We are at the end, insert it after this
+				 * one
+				 */
+				/* check it first */
+				asoc->size_on_reasm_queue += chk->send_size;
+				sctp_ucount_incr(asoc->cnt_on_reasm_queue);
+				TAILQ_INSERT_AFTER(&asoc->reasmqueue, at, chk, sctp_next);
+				break;
+			}
+		}
+	}
+	/* Now the audits */
+	if (prev) {
+		prev_tsn = chk->rec.data.TSN_seq - 1;
+		if (prev_tsn == prev->rec.data.TSN_seq) {
+			/*
+			 * Ok the one I am dropping onto the end is the
+			 * NEXT. A bit of valdiation here.
+			 */
+			if ((prev->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) ==
+			    SCTP_DATA_FIRST_FRAG ||
+			    (prev->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) ==
+			    SCTP_DATA_MIDDLE_FRAG) {
+				/*
+				 * Insert chk MUST be a MIDDLE or LAST
+				 * fragment
+				 */
+				if ((chk->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) ==
+				    SCTP_DATA_FIRST_FRAG) {
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Prev check - It can be a midlle or last but not a first\n");
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, it's a FIRST!\n");
+					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (oper) {
+						struct sctp_paramhdr *ph;
+						uint32_t *ippp;
+
+						SCTP_BUF_LEN(oper) =
+						    sizeof(struct sctp_paramhdr) +
+						    (3 * sizeof(uint32_t));
+						ph = mtod(oper,
+						    struct sctp_paramhdr *);
+						ph->param_type =
+						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+						ph->param_length =
+						    htons(SCTP_BUF_LEN(oper));
+						ippp = (uint32_t *) (ph + 1);
+						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_6);
+						ippp++;
+						*ippp = chk->rec.data.TSN_seq;
+						ippp++;
+						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+
+					}
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_6;
+					sctp_abort_an_association(stcb->sctp_ep,
+					    stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+					*abort_flag = 1;
+					return;
+				}
+				if (chk->rec.data.stream_number !=
+				    prev->rec.data.stream_number) {
+					/*
+					 * Huh, need the correct STR here,
+					 * they must be the same.
+					 */
+					SCTP_PRINTF("Prev check - Gak, Evil plot, ssn:%d not the same as at:%d\n",
+					    chk->rec.data.stream_number,
+					    prev->rec.data.stream_number);
+					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (oper) {
+						struct sctp_paramhdr *ph;
+						uint32_t *ippp;
+
+						SCTP_BUF_LEN(oper) =
+						    sizeof(struct sctp_paramhdr) +
+						    (3 * sizeof(uint32_t));
+						ph = mtod(oper,
+						    struct sctp_paramhdr *);
+						ph->param_type =
+						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+						ph->param_length =
+						    htons(SCTP_BUF_LEN(oper));
+						ippp = (uint32_t *) (ph + 1);
+						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_7);
+						ippp++;
+						*ippp = chk->rec.data.TSN_seq;
+						ippp++;
+						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+					}
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_7;
+					sctp_abort_an_association(stcb->sctp_ep,
+					    stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+					*abort_flag = 1;
+					return;
+				}
+				if ((prev->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == 0 &&
+				    chk->rec.data.stream_seq !=
+				    prev->rec.data.stream_seq) {
+					/*
+					 * Huh, need the correct STR here,
+					 * they must be the same.
+					 */
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Prev check - Gak, Evil plot, sseq:%d not the same as at:%d\n",
+					    chk->rec.data.stream_seq,
+					    prev->rec.data.stream_seq);
+					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (oper) {
+						struct sctp_paramhdr *ph;
+						uint32_t *ippp;
+
+						SCTP_BUF_LEN(oper) =
+						    sizeof(struct sctp_paramhdr) +
+						    (3 * sizeof(uint32_t));
+						ph = mtod(oper,
+						    struct sctp_paramhdr *);
+						ph->param_type =
+						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+						ph->param_length =
+						    htons(SCTP_BUF_LEN(oper));
+						ippp = (uint32_t *) (ph + 1);
+						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_8);
+						ippp++;
+						*ippp = chk->rec.data.TSN_seq;
+						ippp++;
+						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+					}
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_8;
+					sctp_abort_an_association(stcb->sctp_ep,
+					    stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+					*abort_flag = 1;
+					return;
+				}
+			} else if ((prev->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) ==
+			    SCTP_DATA_LAST_FRAG) {
+				/* Insert chk MUST be a FIRST */
+				if ((chk->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) !=
+				    SCTP_DATA_FIRST_FRAG) {
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Prev check - Gak, evil plot, its not FIRST and it must be!\n");
+					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (oper) {
+						struct sctp_paramhdr *ph;
+						uint32_t *ippp;
+
+						SCTP_BUF_LEN(oper) =
+						    sizeof(struct sctp_paramhdr) +
+						    (3 * sizeof(uint32_t));
+						ph = mtod(oper,
+						    struct sctp_paramhdr *);
+						ph->param_type =
+						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+						ph->param_length =
+						    htons(SCTP_BUF_LEN(oper));
+						ippp = (uint32_t *) (ph + 1);
+						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_9);
+						ippp++;
+						*ippp = chk->rec.data.TSN_seq;
+						ippp++;
+						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+
+					}
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_9;
+					sctp_abort_an_association(stcb->sctp_ep,
+					    stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+					*abort_flag = 1;
+					return;
+				}
+			}
+		}
+	}
+	if (next) {
+		post_tsn = chk->rec.data.TSN_seq + 1;
+		if (post_tsn == next->rec.data.TSN_seq) {
+			/*
+			 * Ok the one I am inserting ahead of is my NEXT
+			 * one. A bit of valdiation here.
+			 */
+			if (next->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) {
+				/* Insert chk MUST be a last fragment */
+				if ((chk->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK)
+				    != SCTP_DATA_LAST_FRAG) {
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Next chk - Next is FIRST, we must be LAST\n");
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, its not a last!\n");
+					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (oper) {
+						struct sctp_paramhdr *ph;
+						uint32_t *ippp;
+
+						SCTP_BUF_LEN(oper) =
+						    sizeof(struct sctp_paramhdr) +
+						    (3 * sizeof(uint32_t));
+						ph = mtod(oper,
+						    struct sctp_paramhdr *);
+						ph->param_type =
+						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+						ph->param_length =
+						    htons(SCTP_BUF_LEN(oper));
+						ippp = (uint32_t *) (ph + 1);
+						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_10);
+						ippp++;
+						*ippp = chk->rec.data.TSN_seq;
+						ippp++;
+						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+					}
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_10;
+					sctp_abort_an_association(stcb->sctp_ep,
+					    stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+					*abort_flag = 1;
+					return;
+				}
+			} else if ((next->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) ==
+				    SCTP_DATA_MIDDLE_FRAG ||
+				    (next->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) ==
+			    SCTP_DATA_LAST_FRAG) {
+				/*
+				 * Insert chk CAN be MIDDLE or FIRST NOT
+				 * LAST
+				 */
+				if ((chk->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) ==
+				    SCTP_DATA_LAST_FRAG) {
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Next chk - Next is a MIDDLE/LAST\n");
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, new prev chunk is a LAST\n");
+					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (oper) {
+						struct sctp_paramhdr *ph;
+						uint32_t *ippp;
+
+						SCTP_BUF_LEN(oper) =
+						    sizeof(struct sctp_paramhdr) +
+						    (3 * sizeof(uint32_t));
+						ph = mtod(oper,
+						    struct sctp_paramhdr *);
+						ph->param_type =
+						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+						ph->param_length =
+						    htons(SCTP_BUF_LEN(oper));
+						ippp = (uint32_t *) (ph + 1);
+						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_11);
+						ippp++;
+						*ippp = chk->rec.data.TSN_seq;
+						ippp++;
+						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+
+					}
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_11;
+					sctp_abort_an_association(stcb->sctp_ep,
+					    stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+					*abort_flag = 1;
+					return;
+				}
+				if (chk->rec.data.stream_number !=
+				    next->rec.data.stream_number) {
+					/*
+					 * Huh, need the correct STR here,
+					 * they must be the same.
+					 */
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Next chk - Gak, Evil plot, ssn:%d not the same as at:%d\n",
+					    chk->rec.data.stream_number,
+					    next->rec.data.stream_number);
+					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (oper) {
+						struct sctp_paramhdr *ph;
+						uint32_t *ippp;
+
+						SCTP_BUF_LEN(oper) =
+						    sizeof(struct sctp_paramhdr) +
+						    (3 * sizeof(uint32_t));
+						ph = mtod(oper,
+						    struct sctp_paramhdr *);
+						ph->param_type =
+						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+						ph->param_length =
+						    htons(SCTP_BUF_LEN(oper));
+						ippp = (uint32_t *) (ph + 1);
+						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_12);
+						ippp++;
+						*ippp = chk->rec.data.TSN_seq;
+						ippp++;
+						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+
+					}
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_12;
+					sctp_abort_an_association(stcb->sctp_ep,
+					    stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+					*abort_flag = 1;
+					return;
+				}
+				if ((next->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == 0 &&
+				    chk->rec.data.stream_seq !=
+				    next->rec.data.stream_seq) {
+					/*
+					 * Huh, need the correct STR here,
+					 * they must be the same.
+					 */
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Next chk - Gak, Evil plot, sseq:%d not the same as at:%d\n",
+					    chk->rec.data.stream_seq,
+					    next->rec.data.stream_seq);
+					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (oper) {
+						struct sctp_paramhdr *ph;
+						uint32_t *ippp;
+
+						SCTP_BUF_LEN(oper) =
+						    sizeof(struct sctp_paramhdr) +
+						    (3 * sizeof(uint32_t));
+						ph = mtod(oper,
+						    struct sctp_paramhdr *);
+						ph->param_type =
+						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+						ph->param_length =
+						    htons(SCTP_BUF_LEN(oper));
+						ippp = (uint32_t *) (ph + 1);
+						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_13);
+						ippp++;
+						*ippp = chk->rec.data.TSN_seq;
+						ippp++;
+						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+					}
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_13;
+					sctp_abort_an_association(stcb->sctp_ep,
+					    stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+					*abort_flag = 1;
+					return;
+				}
+			}
+		}
+	}
+	/* Do we need to do some delivery? check */
+	sctp_deliver_reasm_check(stcb, asoc);
+}
+
+/*
+ * This is an unfortunate routine. It checks to make sure a evil guy is not
+ * stuffing us full of bad packet fragments. A broken peer could also do this
+ * but this is doubtful. It is to bad I must worry about evil crackers sigh
+ * :< more cycles.
+ */
+static int
+sctp_does_tsn_belong_to_reasm(struct sctp_association *asoc,
+    uint32_t TSN_seq)
+{
+	struct sctp_tmit_chunk *at;
+	uint32_t tsn_est;
+
+	TAILQ_FOREACH(at, &asoc->reasmqueue, sctp_next) {
+		if (compare_with_wrap(TSN_seq,
+		    at->rec.data.TSN_seq, MAX_TSN)) {
+			/* is it one bigger? */
+			tsn_est = at->rec.data.TSN_seq + 1;
+			if (tsn_est == TSN_seq) {
+				/* yep. It better be a last then */
+				if ((at->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) !=
+				    SCTP_DATA_LAST_FRAG) {
+					/*
+					 * Ok this guy belongs next to a guy
+					 * that is NOT last, it should be a
+					 * middle/last, not a complete
+					 * chunk.
+					 */
+					return (1);
+				} else {
+					/*
+					 * This guy is ok since its a LAST
+					 * and the new chunk is a fully
+					 * self- contained one.
+					 */
+					return (0);
+				}
+			}
+		} else if (TSN_seq == at->rec.data.TSN_seq) {
+			/* Software error since I have a dup? */
+			return (1);
+		} else {
+			/*
+			 * Ok, 'at' is larger than new chunk but does it
+			 * need to be right before it.
+			 */
+			tsn_est = TSN_seq + 1;
+			if (tsn_est == at->rec.data.TSN_seq) {
+				/* Yep, It better be a first */
+				if ((at->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) !=
+				    SCTP_DATA_FIRST_FRAG) {
+					return (1);
+				} else {
+					return (0);
+				}
+			}
+		}
+	}
+	return (0);
+}
+
+
+static int
+sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc,
+    struct mbuf **m, int offset, struct sctp_data_chunk *ch, int chk_length,
+    struct sctp_nets *net, uint32_t * high_tsn, int *abort_flag,
+    int *break_flag, int last_chunk)
+{
+	/* Process a data chunk */
+	/* struct sctp_tmit_chunk *chk; */
+	struct sctp_tmit_chunk *chk;
+	uint32_t tsn, gap;
+	struct mbuf *dmbuf;
+	int indx, the_len;
+	int need_reasm_check = 0;
+	uint16_t strmno, strmseq;
+	struct mbuf *oper;
+	struct sctp_queued_to_read *control;
+	int ordered;
+	uint32_t protocol_id;
+	uint8_t chunk_flags;
+	struct sctp_stream_reset_list *liste;
+
+	chk = NULL;
+	tsn = ntohl(ch->dp.tsn);
+	chunk_flags = ch->ch.chunk_flags;
+	protocol_id = ch->dp.protocol_id;
+	ordered = ((ch->ch.chunk_flags & SCTP_DATA_UNORDERED) == 0);
+	if (sctp_logging_level & SCTP_MAP_LOGGING_ENABLE) {
+		sctp_log_map(tsn, asoc->cumulative_tsn, asoc->highest_tsn_inside_map, SCTP_MAP_TSN_ENTERS);
+	}
+	if (stcb == NULL) {
+		return (0);
+	}
+	SCTP_LTRACE_CHK(stcb->sctp_ep, stcb, ch->ch.chunk_type, tsn);
+	if (compare_with_wrap(asoc->cumulative_tsn, tsn, MAX_TSN) ||
+	    asoc->cumulative_tsn == tsn) {
+		/* It is a duplicate */
+		SCTP_STAT_INCR(sctps_recvdupdata);
+		if (asoc->numduptsns < SCTP_MAX_DUP_TSNS) {
+			/* Record a dup for the next outbound sack */
+			asoc->dup_tsns[asoc->numduptsns] = tsn;
+			asoc->numduptsns++;
+		}
+		asoc->send_sack = 1;
+		return (0);
+	}
+	/* Calculate the number of TSN's between the base and this TSN */
+	if (tsn >= asoc->mapping_array_base_tsn) {
+		gap = tsn - asoc->mapping_array_base_tsn;
+	} else {
+		gap = (MAX_TSN - asoc->mapping_array_base_tsn) + tsn + 1;
+	}
+	if (gap >= (SCTP_MAPPING_ARRAY << 3)) {
+		/* Can't hold the bit in the mapping at max array, toss it */
+		return (0);
+	}
+	if (gap >= (uint32_t) (asoc->mapping_array_size << 3)) {
+		SCTP_TCB_LOCK_ASSERT(stcb);
+		if (sctp_expand_mapping_array(asoc, gap)) {
+			/* Can't expand, drop it */
+			return (0);
+		}
+	}
+	if (compare_with_wrap(tsn, *high_tsn, MAX_TSN)) {
+		*high_tsn = tsn;
+	}
+	/* See if we have received this one already */
+	if (SCTP_IS_TSN_PRESENT(asoc->mapping_array, gap)) {
+		SCTP_STAT_INCR(sctps_recvdupdata);
+		if (asoc->numduptsns < SCTP_MAX_DUP_TSNS) {
+			/* Record a dup for the next outbound sack */
+			asoc->dup_tsns[asoc->numduptsns] = tsn;
+			asoc->numduptsns++;
+		}
+		asoc->send_sack = 1;
+		return (0);
+	}
+	/*
+	 * Check to see about the GONE flag, duplicates would cause a sack
+	 * to be sent up above
+	 */
+	if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+	    (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET))
+	    ) {
+		/*
+		 * wait a minute, this guy is gone, there is no longer a
+		 * receiver. Send peer an ABORT!
+		 */
+		struct mbuf *op_err;
+
+		op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
+		sctp_abort_an_association(stcb->sctp_ep, stcb, 0, op_err, SCTP_SO_NOT_LOCKED);
+		*abort_flag = 1;
+		return (0);
+	}
+	/*
+	 * Now before going further we see if there is room. If NOT then we
+	 * MAY let one through only IF this TSN is the one we are waiting
+	 * for on a partial delivery API.
+	 */
+
+	/* now do the tests */
+	if (((asoc->cnt_on_all_streams +
+	    asoc->cnt_on_reasm_queue +
+	    asoc->cnt_msg_on_sb) > sctp_max_chunks_on_queue) ||
+	    (((int)asoc->my_rwnd) <= 0)) {
+		/*
+		 * When we have NO room in the rwnd we check to make sure
+		 * the reader is doing its job...
+		 */
+		if (stcb->sctp_socket->so_rcv.sb_cc) {
+			/* some to read, wake-up */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			struct socket *so;
+
+			so = SCTP_INP_SO(stcb->sctp_ep);
+			atomic_add_int(&stcb->asoc.refcnt, 1);
+			SCTP_TCB_UNLOCK(stcb);
+			SCTP_SOCKET_LOCK(so, 1);
+			SCTP_TCB_LOCK(stcb);
+			atomic_subtract_int(&stcb->asoc.refcnt, 1);
+			if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+				/* assoc was freed while we were unlocked */
+				SCTP_SOCKET_UNLOCK(so, 1);
+				return (0);
+			}
+#endif
+			sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+		}
+		/* now is it in the mapping array of what we have accepted? */
+		if (compare_with_wrap(tsn,
+		    asoc->highest_tsn_inside_map, MAX_TSN)) {
+
+			/* Nope not in the valid range dump it */
+			sctp_set_rwnd(stcb, asoc);
+			if ((asoc->cnt_on_all_streams +
+			    asoc->cnt_on_reasm_queue +
+			    asoc->cnt_msg_on_sb) > sctp_max_chunks_on_queue) {
+				SCTP_STAT_INCR(sctps_datadropchklmt);
+			} else {
+				SCTP_STAT_INCR(sctps_datadroprwnd);
+			}
+			indx = *break_flag;
+			*break_flag = 1;
+			return (0);
+		}
+	}
+	strmno = ntohs(ch->dp.stream_id);
+	if (strmno >= asoc->streamincnt) {
+		struct sctp_paramhdr *phdr;
+		struct mbuf *mb;
+
+		mb = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) * 2),
+		    0, M_DONTWAIT, 1, MT_DATA);
+		if (mb != NULL) {
+			/* add some space up front so prepend will work well */
+			SCTP_BUF_RESV_UF(mb, sizeof(struct sctp_chunkhdr));
+			phdr = mtod(mb, struct sctp_paramhdr *);
+			/*
+			 * Error causes are just param's and this one has
+			 * two back to back phdr, one with the error type
+			 * and size, the other with the streamid and a rsvd
+			 */
+			SCTP_BUF_LEN(mb) = (sizeof(struct sctp_paramhdr) * 2);
+			phdr->param_type = htons(SCTP_CAUSE_INVALID_STREAM);
+			phdr->param_length =
+			    htons(sizeof(struct sctp_paramhdr) * 2);
+			phdr++;
+			/* We insert the stream in the type field */
+			phdr->param_type = ch->dp.stream_id;
+			/* And set the length to 0 for the rsvd field */
+			phdr->param_length = 0;
+			sctp_queue_op_err(stcb, mb);
+		}
+		SCTP_STAT_INCR(sctps_badsid);
+		SCTP_TCB_LOCK_ASSERT(stcb);
+		SCTP_SET_TSN_PRESENT(asoc->mapping_array, gap);
+		if (compare_with_wrap(tsn, asoc->highest_tsn_inside_map, MAX_TSN)) {
+			/* we have a new high score */
+			asoc->highest_tsn_inside_map = tsn;
+			if (sctp_logging_level & SCTP_MAP_LOGGING_ENABLE) {
+				sctp_log_map(0, 2, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT);
+			}
+		}
+		if (tsn == (asoc->cumulative_tsn + 1)) {
+			/* Update cum-ack */
+			asoc->cumulative_tsn = tsn;
+		}
+		return (0);
+	}
+	/*
+	 * Before we continue lets validate that we are not being fooled by
+	 * an evil attacker. We can only have 4k chunks based on our TSN
+	 * spread allowed by the mapping array 512 * 8 bits, so there is no
+	 * way our stream sequence numbers could have wrapped. We of course
+	 * only validate the FIRST fragment so the bit must be set.
+	 */
+	strmseq = ntohs(ch->dp.stream_sequence);
+#ifdef SCTP_ASOCLOG_OF_TSNS
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	if (asoc->tsn_in_at >= SCTP_TSN_LOG_SIZE) {
+		asoc->tsn_in_at = 0;
+		asoc->tsn_in_wrapped = 1;
+	}
+	asoc->in_tsnlog[asoc->tsn_in_at].tsn = tsn;
+	asoc->in_tsnlog[asoc->tsn_in_at].strm = strmno;
+	asoc->in_tsnlog[asoc->tsn_in_at].seq = strmseq;
+	asoc->in_tsnlog[asoc->tsn_in_at].sz = chk_length;
+	asoc->in_tsnlog[asoc->tsn_in_at].flgs = chunk_flags;
+	asoc->in_tsnlog[asoc->tsn_in_at].stcb = (void *)stcb;
+	asoc->in_tsnlog[asoc->tsn_in_at].in_pos = asoc->tsn_in_at;
+	asoc->in_tsnlog[asoc->tsn_in_at].in_out = 1;
+	asoc->tsn_in_at++;
+#endif
+	if ((chunk_flags & SCTP_DATA_FIRST_FRAG) &&
+	    (TAILQ_EMPTY(&asoc->resetHead)) &&
+	    (chunk_flags & SCTP_DATA_UNORDERED) == 0 &&
+	    (compare_with_wrap(asoc->strmin[strmno].last_sequence_delivered,
+	    strmseq, MAX_SEQ) ||
+	    asoc->strmin[strmno].last_sequence_delivered == strmseq)) {
+		/* The incoming sseq is behind where we last delivered? */
+		SCTPDBG(SCTP_DEBUG_INDATA1, "EVIL/Broken-Dup S-SEQ:%d delivered:%d from peer, Abort!\n",
+		    strmseq, asoc->strmin[strmno].last_sequence_delivered);
+		oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+		    0, M_DONTWAIT, 1, MT_DATA);
+		if (oper) {
+			struct sctp_paramhdr *ph;
+			uint32_t *ippp;
+
+			SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
+			    (3 * sizeof(uint32_t));
+			ph = mtod(oper, struct sctp_paramhdr *);
+			ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+			ph->param_length = htons(SCTP_BUF_LEN(oper));
+			ippp = (uint32_t *) (ph + 1);
+			*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_14);
+			ippp++;
+			*ippp = tsn;
+			ippp++;
+			*ippp = ((strmno << 16) | strmseq);
+
+		}
+		stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_14;
+		sctp_abort_an_association(stcb->sctp_ep, stcb,
+		    SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+		*abort_flag = 1;
+		return (0);
+	}
+	/************************************
+	 * From here down we may find ch-> invalid
+	 * so its a good idea NOT to use it.
+	 *************************************/
+
+	the_len = (chk_length - sizeof(struct sctp_data_chunk));
+	if (last_chunk == 0) {
+		dmbuf = SCTP_M_COPYM(*m,
+		    (offset + sizeof(struct sctp_data_chunk)),
+		    the_len, M_DONTWAIT);
+#ifdef SCTP_MBUF_LOGGING
+		if (sctp_logging_level & SCTP_MBUF_LOGGING_ENABLE) {
+			struct mbuf *mat;
+
+			mat = dmbuf;
+			while (mat) {
+				if (SCTP_BUF_IS_EXTENDED(mat)) {
+					sctp_log_mb(mat, SCTP_MBUF_ICOPY);
+				}
+				mat = SCTP_BUF_NEXT(mat);
+			}
+		}
+#endif
+	} else {
+		/* We can steal the last chunk */
+		int l_len;
+
+		dmbuf = *m;
+		/* lop off the top part */
+		m_adj(dmbuf, (offset + sizeof(struct sctp_data_chunk)));
+		if (SCTP_BUF_NEXT(dmbuf) == NULL) {
+			l_len = SCTP_BUF_LEN(dmbuf);
+		} else {
+			/*
+			 * need to count up the size hopefully does not hit
+			 * this to often :-0
+			 */
+			struct mbuf *lat;
+
+			l_len = 0;
+			lat = dmbuf;
+			while (lat) {
+				l_len += SCTP_BUF_LEN(lat);
+				lat = SCTP_BUF_NEXT(lat);
+			}
+		}
+		if (l_len > the_len) {
+			/* Trim the end round bytes off  too */
+			m_adj(dmbuf, -(l_len - the_len));
+		}
+	}
+	if (dmbuf == NULL) {
+		SCTP_STAT_INCR(sctps_nomem);
+		return (0);
+	}
+	if ((chunk_flags & SCTP_DATA_NOT_FRAG) == SCTP_DATA_NOT_FRAG &&
+	    asoc->fragmented_delivery_inprogress == 0 &&
+	    TAILQ_EMPTY(&asoc->resetHead) &&
+	    ((ordered == 0) ||
+	    ((asoc->strmin[strmno].last_sequence_delivered + 1) == strmseq &&
+	    TAILQ_EMPTY(&asoc->strmin[strmno].inqueue)))) {
+		/* Candidate for express delivery */
+		/*
+		 * Its not fragmented, No PD-API is up, Nothing in the
+		 * delivery queue, Its un-ordered OR ordered and the next to
+		 * deliver AND nothing else is stuck on the stream queue,
+		 * And there is room for it in the socket buffer. Lets just
+		 * stuff it up the buffer....
+		 */
+
+		/* It would be nice to avoid this copy if we could :< */
+		sctp_alloc_a_readq(stcb, control);
+		sctp_build_readq_entry_mac(control, stcb, asoc->context, net, tsn,
+		    protocol_id,
+		    stcb->asoc.context,
+		    strmno, strmseq,
+		    chunk_flags,
+		    dmbuf);
+		if (control == NULL) {
+			goto failed_express_del;
+		}
+		sctp_add_to_readq(stcb->sctp_ep, stcb, control, &stcb->sctp_socket->so_rcv, 1, SCTP_SO_NOT_LOCKED);
+		if ((chunk_flags & SCTP_DATA_UNORDERED) == 0) {
+			/* for ordered, bump what we delivered */
+			asoc->strmin[strmno].last_sequence_delivered++;
+		}
+		SCTP_STAT_INCR(sctps_recvexpress);
+		if (sctp_logging_level & SCTP_STR_LOGGING_ENABLE) {
+			sctp_log_strm_del_alt(stcb, tsn, strmseq, strmno,
+			    SCTP_STR_LOG_FROM_EXPRS_DEL);
+		}
+		control = NULL;
+		goto finish_express_del;
+	}
+failed_express_del:
+	/* If we reach here this is a new chunk */
+	chk = NULL;
+	control = NULL;
+	/* Express for fragmented delivery? */
+	if ((asoc->fragmented_delivery_inprogress) &&
+	    (stcb->asoc.control_pdapi) &&
+	    (asoc->str_of_pdapi == strmno) &&
+	    (asoc->ssn_of_pdapi == strmseq)
+	    ) {
+		control = stcb->asoc.control_pdapi;
+		if ((chunk_flags & SCTP_DATA_FIRST_FRAG) == SCTP_DATA_FIRST_FRAG) {
+			/* Can't be another first? */
+			goto failed_pdapi_express_del;
+		}
+		if (tsn == (control->sinfo_tsn + 1)) {
+			/* Yep, we can add it on */
+			int end = 0;
+			uint32_t cumack;
+
+			if (chunk_flags & SCTP_DATA_LAST_FRAG) {
+				end = 1;
+			}
+			cumack = asoc->cumulative_tsn;
+			if ((cumack + 1) == tsn)
+				cumack = tsn;
+
+			if (sctp_append_to_readq(stcb->sctp_ep, stcb, control, dmbuf, end,
+			    tsn,
+			    &stcb->sctp_socket->so_rcv)) {
+				SCTP_PRINTF("Append fails end:%d\n", end);
+				goto failed_pdapi_express_del;
+			}
+			SCTP_STAT_INCR(sctps_recvexpressm);
+			control->sinfo_tsn = tsn;
+			asoc->tsn_last_delivered = tsn;
+			asoc->fragment_flags = chunk_flags;
+			asoc->tsn_of_pdapi_last_delivered = tsn;
+			asoc->last_flags_delivered = chunk_flags;
+			asoc->last_strm_seq_delivered = strmseq;
+			asoc->last_strm_no_delivered = strmno;
+			if (end) {
+				/* clean up the flags and such */
+				asoc->fragmented_delivery_inprogress = 0;
+				if ((chunk_flags & SCTP_DATA_UNORDERED) == 0) {
+					asoc->strmin[strmno].last_sequence_delivered++;
+				}
+				stcb->asoc.control_pdapi = NULL;
+				if (TAILQ_EMPTY(&asoc->reasmqueue) == 0) {
+					/*
+					 * There could be another message
+					 * ready
+					 */
+					need_reasm_check = 1;
+				}
+			}
+			control = NULL;
+			goto finish_express_del;
+		}
+	}
+failed_pdapi_express_del:
+	control = NULL;
+	if ((chunk_flags & SCTP_DATA_NOT_FRAG) != SCTP_DATA_NOT_FRAG) {
+		sctp_alloc_a_chunk(stcb, chk);
+		if (chk == NULL) {
+			/* No memory so we drop the chunk */
+			SCTP_STAT_INCR(sctps_nomem);
+			if (last_chunk == 0) {
+				/* we copied it, free the copy */
+				sctp_m_freem(dmbuf);
+			}
+			return (0);
+		}
+		chk->rec.data.TSN_seq = tsn;
+		chk->no_fr_allowed = 0;
+		chk->rec.data.stream_seq = strmseq;
+		chk->rec.data.stream_number = strmno;
+		chk->rec.data.payloadtype = protocol_id;
+		chk->rec.data.context = stcb->asoc.context;
+		chk->rec.data.doing_fast_retransmit = 0;
+		chk->rec.data.rcv_flags = chunk_flags;
+		chk->asoc = asoc;
+		chk->send_size = the_len;
+		chk->whoTo = net;
+		atomic_add_int(&net->ref_count, 1);
+		chk->data = dmbuf;
+	} else {
+		sctp_alloc_a_readq(stcb, control);
+		sctp_build_readq_entry_mac(control, stcb, asoc->context, net, tsn,
+		    protocol_id,
+		    stcb->asoc.context,
+		    strmno, strmseq,
+		    chunk_flags,
+		    dmbuf);
+		if (control == NULL) {
+			/* No memory so we drop the chunk */
+			SCTP_STAT_INCR(sctps_nomem);
+			if (last_chunk == 0) {
+				/* we copied it, free the copy */
+				sctp_m_freem(dmbuf);
+			}
+			return (0);
+		}
+		control->length = the_len;
+	}
+
+	/* Mark it as received */
+	/* Now queue it where it belongs */
+	if (control != NULL) {
+		/* First a sanity check */
+		if (asoc->fragmented_delivery_inprogress) {
+			/*
+			 * Ok, we have a fragmented delivery in progress if
+			 * this chunk is next to deliver OR belongs in our
+			 * view to the reassembly, the peer is evil or
+			 * broken.
+			 */
+			uint32_t estimate_tsn;
+
+			estimate_tsn = asoc->tsn_last_delivered + 1;
+			if (TAILQ_EMPTY(&asoc->reasmqueue) &&
+			    (estimate_tsn == control->sinfo_tsn)) {
+				/* Evil/Broke peer */
+				sctp_m_freem(control->data);
+				control->data = NULL;
+				if (control->whoFrom) {
+					sctp_free_remote_addr(control->whoFrom);
+					control->whoFrom = NULL;
+				}
+				sctp_free_a_readq(stcb, control);
+				oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+				    0, M_DONTWAIT, 1, MT_DATA);
+				if (oper) {
+					struct sctp_paramhdr *ph;
+					uint32_t *ippp;
+
+					SCTP_BUF_LEN(oper) =
+					    sizeof(struct sctp_paramhdr) +
+					    (3 * sizeof(uint32_t));
+					ph = mtod(oper, struct sctp_paramhdr *);
+					ph->param_type =
+					    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+					ph->param_length = htons(SCTP_BUF_LEN(oper));
+					ippp = (uint32_t *) (ph + 1);
+					*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_15);
+					ippp++;
+					*ippp = tsn;
+					ippp++;
+					*ippp = ((strmno << 16) | strmseq);
+				}
+				stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_15;
+				sctp_abort_an_association(stcb->sctp_ep, stcb,
+				    SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+				*abort_flag = 1;
+				return (0);
+			} else {
+				if (sctp_does_tsn_belong_to_reasm(asoc, control->sinfo_tsn)) {
+					sctp_m_freem(control->data);
+					control->data = NULL;
+					if (control->whoFrom) {
+						sctp_free_remote_addr(control->whoFrom);
+						control->whoFrom = NULL;
+					}
+					sctp_free_a_readq(stcb, control);
+
+					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (oper) {
+						struct sctp_paramhdr *ph;
+						uint32_t *ippp;
+
+						SCTP_BUF_LEN(oper) =
+						    sizeof(struct sctp_paramhdr) +
+						    (3 * sizeof(uint32_t));
+						ph = mtod(oper,
+						    struct sctp_paramhdr *);
+						ph->param_type =
+						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+						ph->param_length =
+						    htons(SCTP_BUF_LEN(oper));
+						ippp = (uint32_t *) (ph + 1);
+						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_16);
+						ippp++;
+						*ippp = tsn;
+						ippp++;
+						*ippp = ((strmno << 16) | strmseq);
+					}
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_16;
+					sctp_abort_an_association(stcb->sctp_ep,
+					    stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+					*abort_flag = 1;
+					return (0);
+				}
+			}
+		} else {
+			/* No PDAPI running */
+			if (!TAILQ_EMPTY(&asoc->reasmqueue)) {
+				/*
+				 * Reassembly queue is NOT empty validate
+				 * that this tsn does not need to be in
+				 * reasembly queue. If it does then our peer
+				 * is broken or evil.
+				 */
+				if (sctp_does_tsn_belong_to_reasm(asoc, control->sinfo_tsn)) {
+					sctp_m_freem(control->data);
+					control->data = NULL;
+					if (control->whoFrom) {
+						sctp_free_remote_addr(control->whoFrom);
+						control->whoFrom = NULL;
+					}
+					sctp_free_a_readq(stcb, control);
+					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (oper) {
+						struct sctp_paramhdr *ph;
+						uint32_t *ippp;
+
+						SCTP_BUF_LEN(oper) =
+						    sizeof(struct sctp_paramhdr) +
+						    (3 * sizeof(uint32_t));
+						ph = mtod(oper,
+						    struct sctp_paramhdr *);
+						ph->param_type =
+						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+						ph->param_length =
+						    htons(SCTP_BUF_LEN(oper));
+						ippp = (uint32_t *) (ph + 1);
+						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_17);
+						ippp++;
+						*ippp = tsn;
+						ippp++;
+						*ippp = ((strmno << 16) | strmseq);
+					}
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_17;
+					sctp_abort_an_association(stcb->sctp_ep,
+					    stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+					*abort_flag = 1;
+					return (0);
+				}
+			}
+		}
+		/* ok, if we reach here we have passed the sanity checks */
+		if (chunk_flags & SCTP_DATA_UNORDERED) {
+			/* queue directly into socket buffer */
+			sctp_add_to_readq(stcb->sctp_ep, stcb,
+			    control,
+			    &stcb->sctp_socket->so_rcv, 1, SCTP_SO_NOT_LOCKED);
+		} else {
+			/*
+			 * Special check for when streams are resetting. We
+			 * could be more smart about this and check the
+			 * actual stream to see if it is not being reset..
+			 * that way we would not create a HOLB when amongst
+			 * streams being reset and those not being reset.
+			 * 
+			 * We take complete messages that have a stream reset
+			 * intervening (aka the TSN is after where our
+			 * cum-ack needs to be) off and put them on a
+			 * pending_reply_queue. The reassembly ones we do
+			 * not have to worry about since they are all sorted
+			 * and proceessed by TSN order. It is only the
+			 * singletons I must worry about.
+			 */
+			if (((liste = TAILQ_FIRST(&asoc->resetHead)) != NULL) &&
+			    ((compare_with_wrap(tsn, liste->tsn, MAX_TSN)))
+			    ) {
+				/*
+				 * yep its past where we need to reset... go
+				 * ahead and queue it.
+				 */
+				if (TAILQ_EMPTY(&asoc->pending_reply_queue)) {
+					/* first one on */
+					TAILQ_INSERT_TAIL(&asoc->pending_reply_queue, control, next);
+				} else {
+					struct sctp_queued_to_read *ctlOn;
+					unsigned char inserted = 0;
+
+					ctlOn = TAILQ_FIRST(&asoc->pending_reply_queue);
+					while (ctlOn) {
+						if (compare_with_wrap(control->sinfo_tsn,
+						    ctlOn->sinfo_tsn, MAX_TSN)) {
+							ctlOn = TAILQ_NEXT(ctlOn, next);
+						} else {
+							/* found it */
+							TAILQ_INSERT_BEFORE(ctlOn, control, next);
+							inserted = 1;
+							break;
+						}
+					}
+					if (inserted == 0) {
+						/*
+						 * must be put at end, use
+						 * prevP (all setup from
+						 * loop) to setup nextP.
+						 */
+						TAILQ_INSERT_TAIL(&asoc->pending_reply_queue, control, next);
+					}
+				}
+			} else {
+				sctp_queue_data_to_stream(stcb, asoc, control, abort_flag);
+				if (*abort_flag) {
+					return (0);
+				}
+			}
+		}
+	} else {
+		/* Into the re-assembly queue */
+		sctp_queue_data_for_reasm(stcb, asoc, chk, abort_flag);
+		if (*abort_flag) {
+			/*
+			 * the assoc is now gone and chk was put onto the
+			 * reasm queue, which has all been freed.
+			 */
+			*m = NULL;
+			return (0);
+		}
+	}
+finish_express_del:
+	if (compare_with_wrap(tsn, asoc->highest_tsn_inside_map, MAX_TSN)) {
+		/* we have a new high score */
+		asoc->highest_tsn_inside_map = tsn;
+		if (sctp_logging_level & SCTP_MAP_LOGGING_ENABLE) {
+			sctp_log_map(0, 2, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT);
+		}
+	}
+	if (tsn == (asoc->cumulative_tsn + 1)) {
+		/* Update cum-ack */
+		asoc->cumulative_tsn = tsn;
+	}
+	if (last_chunk) {
+		*m = NULL;
+	}
+	if (ordered) {
+		SCTP_STAT_INCR_COUNTER64(sctps_inorderchunks);
+	} else {
+		SCTP_STAT_INCR_COUNTER64(sctps_inunorderchunks);
+	}
+	SCTP_STAT_INCR(sctps_recvdata);
+	/* Set it present please */
+	if (sctp_logging_level & SCTP_STR_LOGGING_ENABLE) {
+		sctp_log_strm_del_alt(stcb, tsn, strmseq, strmno, SCTP_STR_LOG_FROM_MARK_TSN);
+	}
+	if (sctp_logging_level & SCTP_MAP_LOGGING_ENABLE) {
+		sctp_log_map(asoc->mapping_array_base_tsn, asoc->cumulative_tsn,
+		    asoc->highest_tsn_inside_map, SCTP_MAP_PREPARE_SLIDE);
+	}
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	SCTP_SET_TSN_PRESENT(asoc->mapping_array, gap);
+	/* check the special flag for stream resets */
+	if (((liste = TAILQ_FIRST(&asoc->resetHead)) != NULL) &&
+	    ((compare_with_wrap(asoc->cumulative_tsn, liste->tsn, MAX_TSN)) ||
+	    (asoc->cumulative_tsn == liste->tsn))
+	    ) {
+		/*
+		 * we have finished working through the backlogged TSN's now
+		 * time to reset streams. 1: call reset function. 2: free
+		 * pending_reply space 3: distribute any chunks in
+		 * pending_reply_queue.
+		 */
+		struct sctp_queued_to_read *ctl;
+
+		sctp_reset_in_stream(stcb, liste->number_entries, liste->req.list_of_streams);
+		TAILQ_REMOVE(&asoc->resetHead, liste, next_resp);
+		SCTP_FREE(liste, SCTP_M_STRESET);
+		/* sa_ignore FREED_MEMORY */
+		liste = TAILQ_FIRST(&asoc->resetHead);
+		ctl = TAILQ_FIRST(&asoc->pending_reply_queue);
+		if (ctl && (liste == NULL)) {
+			/* All can be removed */
+			while (ctl) {
+				TAILQ_REMOVE(&asoc->pending_reply_queue, ctl, next);
+				sctp_queue_data_to_stream(stcb, asoc, ctl, abort_flag);
+				if (*abort_flag) {
+					return (0);
+				}
+				ctl = TAILQ_FIRST(&asoc->pending_reply_queue);
+			}
+		} else if (ctl) {
+			/* more than one in queue */
+			while (!compare_with_wrap(ctl->sinfo_tsn, liste->tsn, MAX_TSN)) {
+				/*
+				 * if ctl->sinfo_tsn is <= liste->tsn we can
+				 * process it which is the NOT of
+				 * ctl->sinfo_tsn > liste->tsn
+				 */
+				TAILQ_REMOVE(&asoc->pending_reply_queue, ctl, next);
+				sctp_queue_data_to_stream(stcb, asoc, ctl, abort_flag);
+				if (*abort_flag) {
+					return (0);
+				}
+				ctl = TAILQ_FIRST(&asoc->pending_reply_queue);
+			}
+		}
+		/*
+		 * Now service re-assembly to pick up anything that has been
+		 * held on reassembly queue?
+		 */
+		sctp_deliver_reasm_check(stcb, asoc);
+		need_reasm_check = 0;
+	}
+	if (need_reasm_check) {
+		/* Another one waits ? */
+		sctp_deliver_reasm_check(stcb, asoc);
+	}
+	return (1);
+}
+
+int8_t sctp_map_lookup_tab[256] = {
+	-1, 0, -1, 1, -1, 0, -1, 2,
+	-1, 0, -1, 1, -1, 0, -1, 3,
+	-1, 0, -1, 1, -1, 0, -1, 2,
+	-1, 0, -1, 1, -1, 0, -1, 4,
+	-1, 0, -1, 1, -1, 0, -1, 2,
+	-1, 0, -1, 1, -1, 0, -1, 3,
+	-1, 0, -1, 1, -1, 0, -1, 2,
+	-1, 0, -1, 1, -1, 0, -1, 5,
+	-1, 0, -1, 1, -1, 0, -1, 2,
+	-1, 0, -1, 1, -1, 0, -1, 3,
+	-1, 0, -1, 1, -1, 0, -1, 2,
+	-1, 0, -1, 1, -1, 0, -1, 4,
+	-1, 0, -1, 1, -1, 0, -1, 2,
+	-1, 0, -1, 1, -1, 0, -1, 3,
+	-1, 0, -1, 1, -1, 0, -1, 2,
+	-1, 0, -1, 1, -1, 0, -1, 6,
+	-1, 0, -1, 1, -1, 0, -1, 2,
+	-1, 0, -1, 1, -1, 0, -1, 3,
+	-1, 0, -1, 1, -1, 0, -1, 2,
+	-1, 0, -1, 1, -1, 0, -1, 4,
+	-1, 0, -1, 1, -1, 0, -1, 2,
+	-1, 0, -1, 1, -1, 0, -1, 3,
+	-1, 0, -1, 1, -1, 0, -1, 2,
+	-1, 0, -1, 1, -1, 0, -1, 5,
+	-1, 0, -1, 1, -1, 0, -1, 2,
+	-1, 0, -1, 1, -1, 0, -1, 3,
+	-1, 0, -1, 1, -1, 0, -1, 2,
+	-1, 0, -1, 1, -1, 0, -1, 4,
+	-1, 0, -1, 1, -1, 0, -1, 2,
+	-1, 0, -1, 1, -1, 0, -1, 3,
+	-1, 0, -1, 1, -1, 0, -1, 2,
+	-1, 0, -1, 1, -1, 0, -1, 7,
+};
+
+
+void
+sctp_sack_check(struct sctp_tcb *stcb, int ok_to_sack, int was_a_gap, int *abort_flag)
+{
+	/*
+	 * Now we also need to check the mapping array in a couple of ways.
+	 * 1) Did we move the cum-ack point?
+	 */
+	struct sctp_association *asoc;
+	int i, at;
+	int last_all_ones = 0;
+	int slide_from, slide_end, lgap, distance;
+	uint32_t old_cumack, old_base, old_highest;
+	unsigned char aux_array[64];
+
+
+	asoc = &stcb->asoc;
+	at = 0;
+
+	old_cumack = asoc->cumulative_tsn;
+	old_base = asoc->mapping_array_base_tsn;
+	old_highest = asoc->highest_tsn_inside_map;
+	if (asoc->mapping_array_size < 64)
+		memcpy(aux_array, asoc->mapping_array,
+		    asoc->mapping_array_size);
+	else
+		memcpy(aux_array, asoc->mapping_array, 64);
+
+	/*
+	 * We could probably improve this a small bit by calculating the
+	 * offset of the current cum-ack as the starting point.
+	 */
+	at = 0;
+	for (i = 0; i < stcb->asoc.mapping_array_size; i++) {
+
+		if (asoc->mapping_array[i] == 0xff) {
+			at += 8;
+			last_all_ones = 1;
+		} else {
+			/* there is a 0 bit */
+			at += sctp_map_lookup_tab[asoc->mapping_array[i]];
+			last_all_ones = 0;
+			break;
+		}
+	}
+	asoc->cumulative_tsn = asoc->mapping_array_base_tsn + (at - last_all_ones);
+	/* at is one off, since in the table a embedded -1 is present */
+	at++;
+
+	if (compare_with_wrap(asoc->cumulative_tsn,
+	    asoc->highest_tsn_inside_map,
+	    MAX_TSN)) {
+#ifdef INVARIANTS
+		panic("huh, cumack 0x%x greater than high-tsn 0x%x in map",
+		    asoc->cumulative_tsn, asoc->highest_tsn_inside_map);
+#else
+		SCTP_PRINTF("huh, cumack 0x%x greater than high-tsn 0x%x in map - should panic?\n",
+		    asoc->cumulative_tsn, asoc->highest_tsn_inside_map);
+		asoc->highest_tsn_inside_map = asoc->cumulative_tsn;
+#endif
+	}
+	if ((asoc->cumulative_tsn == asoc->highest_tsn_inside_map) && (at >= 8)) {
+		/* The complete array was completed by a single FR */
+		/* higest becomes the cum-ack */
+		int clr;
+
+		asoc->cumulative_tsn = asoc->highest_tsn_inside_map;
+		/* clear the array */
+		clr = (at >> 3) + 1;
+		if (clr > asoc->mapping_array_size) {
+			clr = asoc->mapping_array_size;
+		}
+		memset(asoc->mapping_array, 0, clr);
+		/* base becomes one ahead of the cum-ack */
+		asoc->mapping_array_base_tsn = asoc->cumulative_tsn + 1;
+		if (sctp_logging_level & SCTP_MAP_LOGGING_ENABLE) {
+			sctp_log_map(old_base, old_cumack, old_highest,
+			    SCTP_MAP_PREPARE_SLIDE);
+			sctp_log_map(asoc->mapping_array_base_tsn, asoc->cumulative_tsn,
+			    asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_CLEARED);
+		}
+	} else if (at >= 8) {
+		/* we can slide the mapping array down */
+		/* Calculate the new byte postion we can move down */
+		slide_from = at >> 3;
+		/*
+		 * now calculate the ceiling of the move using our highest
+		 * TSN value
+		 */
+		if (asoc->highest_tsn_inside_map >= asoc->mapping_array_base_tsn) {
+			lgap = asoc->highest_tsn_inside_map -
+			    asoc->mapping_array_base_tsn;
+		} else {
+			lgap = (MAX_TSN - asoc->mapping_array_base_tsn) +
+			    asoc->highest_tsn_inside_map + 1;
+		}
+		slide_end = lgap >> 3;
+		if (slide_end < slide_from) {
+#ifdef INVARIANTS
+			panic("impossible slide");
+#else
+			printf("impossible slide?\n");
+			return;
+#endif
+		}
+		distance = (slide_end - slide_from) + 1;
+		if (sctp_logging_level & SCTP_MAP_LOGGING_ENABLE) {
+			sctp_log_map(old_base, old_cumack, old_highest,
+			    SCTP_MAP_PREPARE_SLIDE);
+			sctp_log_map((uint32_t) slide_from, (uint32_t) slide_end,
+			    (uint32_t) lgap, SCTP_MAP_SLIDE_FROM);
+		}
+		if (distance + slide_from > asoc->mapping_array_size ||
+		    distance < 0) {
+			/*
+			 * Here we do NOT slide forward the array so that
+			 * hopefully when more data comes in to fill it up
+			 * we will be able to slide it forward. Really I
+			 * don't think this should happen :-0
+			 */
+
+			if (sctp_logging_level & SCTP_MAP_LOGGING_ENABLE) {
+				sctp_log_map((uint32_t) distance, (uint32_t) slide_from,
+				    (uint32_t) asoc->mapping_array_size,
+				    SCTP_MAP_SLIDE_NONE);
+			}
+		} else {
+			int ii;
+
+			for (ii = 0; ii < distance; ii++) {
+				asoc->mapping_array[ii] =
+				    asoc->mapping_array[slide_from + ii];
+			}
+			for (ii = distance; ii <= slide_end; ii++) {
+				asoc->mapping_array[ii] = 0;
+			}
+			asoc->mapping_array_base_tsn += (slide_from << 3);
+			if (sctp_logging_level & SCTP_MAP_LOGGING_ENABLE) {
+				sctp_log_map(asoc->mapping_array_base_tsn,
+				    asoc->cumulative_tsn, asoc->highest_tsn_inside_map,
+				    SCTP_MAP_SLIDE_RESULT);
+			}
+		}
+	}
+	/*
+	 * Now we need to see if we need to queue a sack or just start the
+	 * timer (if allowed).
+	 */
+	if (ok_to_sack) {
+		if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) {
+			/*
+			 * Ok special case, in SHUTDOWN-SENT case. here we
+			 * maker sure SACK timer is off and instead send a
+			 * SHUTDOWN and a SACK
+			 */
+			if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
+				sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
+				    stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_INDATA + SCTP_LOC_18);
+			}
+			sctp_send_shutdown(stcb, stcb->asoc.primary_destination);
+			sctp_send_sack(stcb);
+		} else {
+			int is_a_gap;
+
+			/* is there a gap now ? */
+			is_a_gap = compare_with_wrap(stcb->asoc.highest_tsn_inside_map,
+			    stcb->asoc.cumulative_tsn, MAX_TSN);
+
+			/*
+			 * CMT DAC algorithm: increase number of packets
+			 * received since last ack
+			 */
+			stcb->asoc.cmt_dac_pkts_rcvd++;
+
+			if ((stcb->asoc.send_sack == 1) ||	/* We need to send a
+								 * SACK */
+			    ((was_a_gap) && (is_a_gap == 0)) ||	/* was a gap, but no
+								 * longer is one */
+			    (stcb->asoc.numduptsns) ||	/* we have dup's */
+			    (is_a_gap) ||	/* is still a gap */
+			    (stcb->asoc.delayed_ack == 0) ||	/* Delayed sack disabled */
+			    (stcb->asoc.data_pkts_seen >= stcb->asoc.sack_freq)	/* hit limit of pkts */
+			    ) {
+
+				if ((sctp_cmt_on_off) && (sctp_cmt_use_dac) &&
+				    (stcb->asoc.send_sack == 0) &&
+				    (stcb->asoc.numduptsns == 0) &&
+				    (stcb->asoc.delayed_ack) &&
+				    (!SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer))) {
+
+					/*
+					 * CMT DAC algorithm: With CMT,
+					 * delay acks even in the face of
+					 * 
+					 * reordering. Therefore, if acks that
+					 * do not have to be sent because of
+					 * the above reasons, will be
+					 * delayed. That is, acks that would
+					 * have been sent due to gap reports
+					 * will be delayed with DAC. Start
+					 * the delayed ack timer.
+					 */
+					sctp_timer_start(SCTP_TIMER_TYPE_RECV,
+					    stcb->sctp_ep, stcb, NULL);
+				} else {
+					/*
+					 * Ok we must build a SACK since the
+					 * timer is pending, we got our
+					 * first packet OR there are gaps or
+					 * duplicates.
+					 */
+					(void)SCTP_OS_TIMER_STOP(&stcb->asoc.dack_timer.timer);
+					sctp_send_sack(stcb);
+				}
+			} else {
+				if (!SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
+					sctp_timer_start(SCTP_TIMER_TYPE_RECV,
+					    stcb->sctp_ep, stcb, NULL);
+				}
+			}
+		}
+	}
+}
+
+void
+sctp_service_queues(struct sctp_tcb *stcb, struct sctp_association *asoc)
+{
+	struct sctp_tmit_chunk *chk;
+	uint32_t tsize;
+	uint16_t nxt_todel;
+
+	if (asoc->fragmented_delivery_inprogress) {
+		sctp_service_reassembly(stcb, asoc);
+	}
+	/* Can we proceed further, i.e. the PD-API is complete */
+	if (asoc->fragmented_delivery_inprogress) {
+		/* no */
+		return;
+	}
+	/*
+	 * Now is there some other chunk I can deliver from the reassembly
+	 * queue.
+	 */
+doit_again:
+	chk = TAILQ_FIRST(&asoc->reasmqueue);
+	if (chk == NULL) {
+		asoc->size_on_reasm_queue = 0;
+		asoc->cnt_on_reasm_queue = 0;
+		return;
+	}
+	nxt_todel = asoc->strmin[chk->rec.data.stream_number].last_sequence_delivered + 1;
+	if ((chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) &&
+	    ((nxt_todel == chk->rec.data.stream_seq) ||
+	    (chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED))) {
+		/*
+		 * Yep the first one is here. We setup to start reception,
+		 * by backing down the TSN just in case we can't deliver.
+		 */
+
+		/*
+		 * Before we start though either all of the message should
+		 * be here or 1/4 the socket buffer max or nothing on the
+		 * delivery queue and something can be delivered.
+		 */
+		if ((sctp_is_all_msg_on_reasm(asoc, &tsize) ||
+		    (tsize >= stcb->sctp_ep->partial_delivery_point))) {
+			asoc->fragmented_delivery_inprogress = 1;
+			asoc->tsn_last_delivered = chk->rec.data.TSN_seq - 1;
+			asoc->str_of_pdapi = chk->rec.data.stream_number;
+			asoc->ssn_of_pdapi = chk->rec.data.stream_seq;
+			asoc->pdapi_ppid = chk->rec.data.payloadtype;
+			asoc->fragment_flags = chk->rec.data.rcv_flags;
+			sctp_service_reassembly(stcb, asoc);
+			if (asoc->fragmented_delivery_inprogress == 0) {
+				goto doit_again;
+			}
+		}
+	}
+}
+
+int
+sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length,
+    struct sctphdr *sh, struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net, uint32_t * high_tsn)
+{
+	struct sctp_data_chunk *ch, chunk_buf;
+	struct sctp_association *asoc;
+	int num_chunks = 0;	/* number of control chunks processed */
+	int stop_proc = 0;
+	int chk_length, break_flag, last_chunk;
+	int abort_flag = 0, was_a_gap = 0;
+	struct mbuf *m;
+
+	/* set the rwnd */
+	sctp_set_rwnd(stcb, &stcb->asoc);
+
+	m = *mm;
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	asoc = &stcb->asoc;
+	if (compare_with_wrap(stcb->asoc.highest_tsn_inside_map,
+	    stcb->asoc.cumulative_tsn, MAX_TSN)) {
+		/* there was a gap before this data was processed */
+		was_a_gap = 1;
+	}
+	/*
+	 * setup where we got the last DATA packet from for any SACK that
+	 * may need to go out. Don't bump the net. This is done ONLY when a
+	 * chunk is assigned.
+	 */
+	asoc->last_data_chunk_from = net;
+
+	/*-
+	 * Now before we proceed we must figure out if this is a wasted
+	 * cluster... i.e. it is a small packet sent in and yet the driver
+	 * underneath allocated a full cluster for it. If so we must copy it
+	 * to a smaller mbuf and free up the cluster mbuf. This will help
+	 * with cluster starvation. Note for __Panda__ we don't do this
+	 * since it has clusters all the way down to 64 bytes.
+	 */
+	if (SCTP_BUF_LEN(m) < (long)MLEN && SCTP_BUF_NEXT(m) == NULL) {
+		/* we only handle mbufs that are singletons.. not chains */
+		m = sctp_get_mbuf_for_msg(SCTP_BUF_LEN(m), 0, M_DONTWAIT, 1, MT_DATA);
+		if (m) {
+			/* ok lets see if we can copy the data up */
+			caddr_t *from, *to;
+
+			/* get the pointers and copy */
+			to = mtod(m, caddr_t *);
+			from = mtod((*mm), caddr_t *);
+			memcpy(to, from, SCTP_BUF_LEN((*mm)));
+			/* copy the length and free up the old */
+			SCTP_BUF_LEN(m) = SCTP_BUF_LEN((*mm));
+			sctp_m_freem(*mm);
+			/* sucess, back copy */
+			*mm = m;
+		} else {
+			/* We are in trouble in the mbuf world .. yikes */
+			m = *mm;
+		}
+	}
+	/* get pointer to the first chunk header */
+	ch = (struct sctp_data_chunk *)sctp_m_getptr(m, *offset,
+	    sizeof(struct sctp_data_chunk), (uint8_t *) & chunk_buf);
+	if (ch == NULL) {
+		return (1);
+	}
+	/*
+	 * process all DATA chunks...
+	 */
+	*high_tsn = asoc->cumulative_tsn;
+	break_flag = 0;
+	asoc->data_pkts_seen++;
+	while (stop_proc == 0) {
+		/* validate chunk length */
+		chk_length = ntohs(ch->ch.chunk_length);
+		if (length - *offset < chk_length) {
+			/* all done, mutulated chunk */
+			stop_proc = 1;
+			break;
+		}
+		if (ch->ch.chunk_type == SCTP_DATA) {
+			if ((size_t)chk_length < sizeof(struct sctp_data_chunk) + 1) {
+				/*
+				 * Need to send an abort since we had a
+				 * invalid data chunk.
+				 */
+				struct mbuf *op_err;
+
+				op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 2 * sizeof(uint32_t)),
+				    0, M_DONTWAIT, 1, MT_DATA);
+
+				if (op_err) {
+					struct sctp_paramhdr *ph;
+					uint32_t *ippp;
+
+					SCTP_BUF_LEN(op_err) = sizeof(struct sctp_paramhdr) +
+					    (2 * sizeof(uint32_t));
+					ph = mtod(op_err, struct sctp_paramhdr *);
+					ph->param_type =
+					    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+					ph->param_length = htons(SCTP_BUF_LEN(op_err));
+					ippp = (uint32_t *) (ph + 1);
+					*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_19);
+					ippp++;
+					*ippp = asoc->cumulative_tsn;
+
+				}
+				stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_19;
+				sctp_abort_association(inp, stcb, m, iphlen, sh,
+				    op_err, 0);
+				return (2);
+			}
+#ifdef SCTP_AUDITING_ENABLED
+			sctp_audit_log(0xB1, 0);
+#endif
+			if (SCTP_SIZE32(chk_length) == (length - *offset)) {
+				last_chunk = 1;
+			} else {
+				last_chunk = 0;
+			}
+			if (sctp_process_a_data_chunk(stcb, asoc, mm, *offset, ch,
+			    chk_length, net, high_tsn, &abort_flag, &break_flag,
+			    last_chunk)) {
+				num_chunks++;
+			}
+			if (abort_flag)
+				return (2);
+
+			if (break_flag) {
+				/*
+				 * Set because of out of rwnd space and no
+				 * drop rep space left.
+				 */
+				stop_proc = 1;
+				break;
+			}
+		} else {
+			/* not a data chunk in the data region */
+			switch (ch->ch.chunk_type) {
+			case SCTP_INITIATION:
+			case SCTP_INITIATION_ACK:
+			case SCTP_SELECTIVE_ACK:
+			case SCTP_HEARTBEAT_REQUEST:
+			case SCTP_HEARTBEAT_ACK:
+			case SCTP_ABORT_ASSOCIATION:
+			case SCTP_SHUTDOWN:
+			case SCTP_SHUTDOWN_ACK:
+			case SCTP_OPERATION_ERROR:
+			case SCTP_COOKIE_ECHO:
+			case SCTP_COOKIE_ACK:
+			case SCTP_ECN_ECHO:
+			case SCTP_ECN_CWR:
+			case SCTP_SHUTDOWN_COMPLETE:
+			case SCTP_AUTHENTICATION:
+			case SCTP_ASCONF_ACK:
+			case SCTP_PACKET_DROPPED:
+			case SCTP_STREAM_RESET:
+			case SCTP_FORWARD_CUM_TSN:
+			case SCTP_ASCONF:
+				/*
+				 * Now, what do we do with KNOWN chunks that
+				 * are NOT in the right place?
+				 * 
+				 * For now, I do nothing but ignore them. We
+				 * may later want to add sysctl stuff to
+				 * switch out and do either an ABORT() or
+				 * possibly process them.
+				 */
+				if (sctp_strict_data_order) {
+					struct mbuf *op_err;
+
+					op_err = sctp_generate_invmanparam(SCTP_CAUSE_PROTOCOL_VIOLATION);
+					sctp_abort_association(inp, stcb, m, iphlen, sh, op_err, 0);
+					return (2);
+				}
+				break;
+			default:
+				/* unknown chunk type, use bit rules */
+				if (ch->ch.chunk_type & 0x40) {
+					/* Add a error report to the queue */
+					struct mbuf *merr;
+					struct sctp_paramhdr *phd;
+
+					merr = sctp_get_mbuf_for_msg(sizeof(*phd), 0, M_DONTWAIT, 1, MT_DATA);
+					if (merr) {
+						phd = mtod(merr, struct sctp_paramhdr *);
+						/*
+						 * We cheat and use param
+						 * type since we did not
+						 * bother to define a error
+						 * cause struct. They are
+						 * the same basic format
+						 * with different names.
+						 */
+						phd->param_type =
+						    htons(SCTP_CAUSE_UNRECOG_CHUNK);
+						phd->param_length =
+						    htons(chk_length + sizeof(*phd));
+						SCTP_BUF_LEN(merr) = sizeof(*phd);
+						SCTP_BUF_NEXT(merr) = SCTP_M_COPYM(m, *offset,
+						    SCTP_SIZE32(chk_length),
+						    M_DONTWAIT);
+						if (SCTP_BUF_NEXT(merr)) {
+							sctp_queue_op_err(stcb, merr);
+						} else {
+							sctp_m_freem(merr);
+						}
+					}
+				}
+				if ((ch->ch.chunk_type & 0x80) == 0) {
+					/* discard the rest of this packet */
+					stop_proc = 1;
+				}	/* else skip this bad chunk and
+					 * continue... */
+				break;
+			};	/* switch of chunk type */
+		}
+		*offset += SCTP_SIZE32(chk_length);
+		if ((*offset >= length) || stop_proc) {
+			/* no more data left in the mbuf chain */
+			stop_proc = 1;
+			continue;
+		}
+		ch = (struct sctp_data_chunk *)sctp_m_getptr(m, *offset,
+		    sizeof(struct sctp_data_chunk), (uint8_t *) & chunk_buf);
+		if (ch == NULL) {
+			*offset = length;
+			stop_proc = 1;
+			break;
+
+		}
+	}			/* while */
+	if (break_flag) {
+		/*
+		 * we need to report rwnd overrun drops.
+		 */
+		sctp_send_packet_dropped(stcb, net, *mm, iphlen, 0);
+	}
+	if (num_chunks) {
+		/*
+		 * Did we get data, if so update the time for auto-close and
+		 * give peer credit for being alive.
+		 */
+		SCTP_STAT_INCR(sctps_recvpktwithdata);
+		if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+			sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+			    stcb->asoc.overall_error_count,
+			    0,
+			    SCTP_FROM_SCTP_INDATA,
+			    __LINE__);
+		}
+		stcb->asoc.overall_error_count = 0;
+		(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_last_rcvd);
+	}
+	/* now service all of the reassm queue if needed */
+	if (!(TAILQ_EMPTY(&asoc->reasmqueue)))
+		sctp_service_queues(stcb, asoc);
+
+	if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) {
+		/* Assure that we ack right away */
+		stcb->asoc.send_sack = 1;
+	}
+	/* Start a sack timer or QUEUE a SACK for sending */
+	if ((stcb->asoc.cumulative_tsn == stcb->asoc.highest_tsn_inside_map) &&
+	    (stcb->asoc.mapping_array[0] != 0xff)) {
+		if ((stcb->asoc.data_pkts_seen >= stcb->asoc.sack_freq) ||
+		    (stcb->asoc.delayed_ack == 0) ||
+		    (stcb->asoc.numduptsns) ||
+		    (stcb->asoc.send_sack == 1)) {
+			if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
+				(void)SCTP_OS_TIMER_STOP(&stcb->asoc.dack_timer.timer);
+			}
+			sctp_send_sack(stcb);
+		} else {
+			if (!SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
+				sctp_timer_start(SCTP_TIMER_TYPE_RECV,
+				    stcb->sctp_ep, stcb, NULL);
+			}
+		}
+	} else {
+		sctp_sack_check(stcb, 1, was_a_gap, &abort_flag);
+	}
+	if (abort_flag)
+		return (2);
+
+	return (0);
+}
+
+static void
+sctp_handle_segments(struct mbuf *m, int *offset, struct sctp_tcb *stcb, struct sctp_association *asoc,
+    struct sctp_sack_chunk *ch, uint32_t last_tsn, uint32_t * biggest_tsn_acked,
+    uint32_t * biggest_newly_acked_tsn, uint32_t * this_sack_lowest_newack,
+    int num_seg, int *ecn_seg_sums)
+{
+	/************************************************/
+	/* process fragments and update sendqueue        */
+	/************************************************/
+	struct sctp_sack *sack;
+	struct sctp_gap_ack_block *frag, block;
+	struct sctp_tmit_chunk *tp1;
+	int i;
+	unsigned int j;
+	int num_frs = 0;
+
+	uint16_t frag_strt, frag_end, primary_flag_set;
+	u_long last_frag_high;
+
+	/*
+	 * @@@ JRI : TODO: This flag is not used anywhere .. remove?
+	 */
+	if (asoc->primary_destination->dest_state & SCTP_ADDR_SWITCH_PRIMARY) {
+		primary_flag_set = 1;
+	} else {
+		primary_flag_set = 0;
+	}
+	sack = &ch->sack;
+
+	frag = (struct sctp_gap_ack_block *)sctp_m_getptr(m, *offset,
+	    sizeof(struct sctp_gap_ack_block), (uint8_t *) & block);
+	*offset += sizeof(block);
+	if (frag == NULL) {
+		return;
+	}
+	tp1 = NULL;
+	last_frag_high = 0;
+	for (i = 0; i < num_seg; i++) {
+		frag_strt = ntohs(frag->start);
+		frag_end = ntohs(frag->end);
+		/* some sanity checks on the fargment offsets */
+		if (frag_strt > frag_end) {
+			/* this one is malformed, skip */
+			frag++;
+			continue;
+		}
+		if (compare_with_wrap((frag_end + last_tsn), *biggest_tsn_acked,
+		    MAX_TSN))
+			*biggest_tsn_acked = frag_end + last_tsn;
+
+		/* mark acked dgs and find out the highestTSN being acked */
+		if (tp1 == NULL) {
+			tp1 = TAILQ_FIRST(&asoc->sent_queue);
+
+			/* save the locations of the last frags */
+			last_frag_high = frag_end + last_tsn;
+		} else {
+			/*
+			 * now lets see if we need to reset the queue due to
+			 * a out-of-order SACK fragment
+			 */
+			if (compare_with_wrap(frag_strt + last_tsn,
+			    last_frag_high, MAX_TSN)) {
+				/*
+				 * if the new frag starts after the last TSN
+				 * frag covered, we are ok and this one is
+				 * beyond the last one
+				 */
+				;
+			} else {
+				/*
+				 * ok, they have reset us, so we need to
+				 * reset the queue this will cause extra
+				 * hunting but hey, they chose the
+				 * performance hit when they failed to order
+				 * there gaps..
+				 */
+				tp1 = TAILQ_FIRST(&asoc->sent_queue);
+			}
+			last_frag_high = frag_end + last_tsn;
+		}
+		for (j = frag_strt + last_tsn; (compare_with_wrap((frag_end + last_tsn), j, MAX_TSN)); j++) {
+			while (tp1) {
+				if (tp1->rec.data.doing_fast_retransmit)
+					num_frs++;
+
+				/*
+				 * CMT: CUCv2 algorithm. For each TSN being
+				 * processed from the sent queue, track the
+				 * next expected pseudo-cumack, or
+				 * rtx_pseudo_cumack, if required. Separate
+				 * cumack trackers for first transmissions,
+				 * and retransmissions.
+				 */
+				if ((tp1->whoTo->find_pseudo_cumack == 1) && (tp1->sent < SCTP_DATAGRAM_RESEND) &&
+				    (tp1->snd_count == 1)) {
+					tp1->whoTo->pseudo_cumack = tp1->rec.data.TSN_seq;
+					tp1->whoTo->find_pseudo_cumack = 0;
+				}
+				if ((tp1->whoTo->find_rtx_pseudo_cumack == 1) && (tp1->sent < SCTP_DATAGRAM_RESEND) &&
+				    (tp1->snd_count > 1)) {
+					tp1->whoTo->rtx_pseudo_cumack = tp1->rec.data.TSN_seq;
+					tp1->whoTo->find_rtx_pseudo_cumack = 0;
+				}
+				if (tp1->rec.data.TSN_seq == j) {
+					if (tp1->sent != SCTP_DATAGRAM_UNSENT) {
+						/*
+						 * must be held until
+						 * cum-ack passes
+						 */
+						/*
+						 * ECN Nonce: Add the nonce
+						 * value to the sender's
+						 * nonce sum
+						 */
+						if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+							/*-
+							 * If it is less than RESEND, it is
+							 * now no-longer in flight.
+							 * Higher values may already be set
+							 * via previous Gap Ack Blocks...
+							 * i.e. ACKED or RESEND.
+							 */
+							if (compare_with_wrap(tp1->rec.data.TSN_seq,
+							    *biggest_newly_acked_tsn, MAX_TSN)) {
+								*biggest_newly_acked_tsn = tp1->rec.data.TSN_seq;
+							}
+							/*
+							 * CMT: SFR algo
+							 * (and HTNA) - set
+							 * saw_newack to 1
+							 * for dest being
+							 * newly acked.
+							 * update
+							 * this_sack_highest_
+							 * newack if
+							 * appropriate.
+							 */
+							if (tp1->rec.data.chunk_was_revoked == 0)
+								tp1->whoTo->saw_newack = 1;
+
+							if (compare_with_wrap(tp1->rec.data.TSN_seq,
+							    tp1->whoTo->this_sack_highest_newack,
+							    MAX_TSN)) {
+								tp1->whoTo->this_sack_highest_newack =
+								    tp1->rec.data.TSN_seq;
+							}
+							/*
+							 * CMT DAC algo:
+							 * also update
+							 * this_sack_lowest_n
+							 * ewack
+							 */
+							if (*this_sack_lowest_newack == 0) {
+								if (sctp_logging_level & SCTP_SACK_LOGGING_ENABLE) {
+									sctp_log_sack(*this_sack_lowest_newack,
+									    last_tsn,
+									    tp1->rec.data.TSN_seq,
+									    0,
+									    0,
+									    SCTP_LOG_TSN_ACKED);
+								}
+								*this_sack_lowest_newack = tp1->rec.data.TSN_seq;
+							}
+							/*
+							 * CMT: CUCv2
+							 * algorithm. If
+							 * (rtx-)pseudo-cumac
+							 * k for corresp
+							 * dest is being
+							 * acked, then we
+							 * have a new
+							 * (rtx-)pseudo-cumac
+							 * k. Set
+							 * new_(rtx_)pseudo_c
+							 * umack to TRUE so
+							 * that the cwnd for
+							 * this dest can be
+							 * updated. Also
+							 * trigger search
+							 * for the next
+							 * expected
+							 * (rtx-)pseudo-cumac
+							 * k. Separate
+							 * pseudo_cumack
+							 * trackers for
+							 * first
+							 * transmissions and
+							 * retransmissions.
+							 */
+							if (tp1->rec.data.TSN_seq == tp1->whoTo->pseudo_cumack) {
+								if (tp1->rec.data.chunk_was_revoked == 0) {
+									tp1->whoTo->new_pseudo_cumack = 1;
+								}
+								tp1->whoTo->find_pseudo_cumack = 1;
+							}
+							if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+								sctp_log_cwnd(stcb, tp1->whoTo, tp1->rec.data.TSN_seq, SCTP_CWND_LOG_FROM_SACK);
+							}
+							if (tp1->rec.data.TSN_seq == tp1->whoTo->rtx_pseudo_cumack) {
+								if (tp1->rec.data.chunk_was_revoked == 0) {
+									tp1->whoTo->new_pseudo_cumack = 1;
+								}
+								tp1->whoTo->find_rtx_pseudo_cumack = 1;
+							}
+							if (sctp_logging_level & SCTP_SACK_LOGGING_ENABLE) {
+								sctp_log_sack(*biggest_newly_acked_tsn,
+								    last_tsn,
+								    tp1->rec.data.TSN_seq,
+								    frag_strt,
+								    frag_end,
+								    SCTP_LOG_TSN_ACKED);
+							}
+							if (sctp_logging_level & SCTP_FLIGHT_LOGGING_ENABLE) {
+								sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_GAP,
+								    tp1->whoTo->flight_size,
+								    tp1->book_size,
+								    (uintptr_t) tp1->whoTo,
+								    tp1->rec.data.TSN_seq);
+							}
+							sctp_flight_size_decrease(tp1);
+							sctp_total_flight_decrease(stcb, tp1);
+
+							tp1->whoTo->net_ack += tp1->send_size;
+							if (tp1->snd_count < 2) {
+								/*
+								 * True
+								 * non-retran
+								 * smited
+								 * chunk */
+								tp1->whoTo->net_ack2 += tp1->send_size;
+
+								/*
+								 * update RTO
+								 * too ? */
+								if (tp1->do_rtt) {
+									tp1->whoTo->RTO =
+									    sctp_calculate_rto(stcb,
+									    asoc,
+									    tp1->whoTo,
+									    &tp1->sent_rcv_time,
+									    sctp_align_safe_nocopy);
+									tp1->do_rtt = 0;
+								}
+							}
+						}
+						if (tp1->sent <= SCTP_DATAGRAM_RESEND) {
+							(*ecn_seg_sums) += tp1->rec.data.ect_nonce;
+							(*ecn_seg_sums) &= SCTP_SACK_NONCE_SUM;
+							if (compare_with_wrap(tp1->rec.data.TSN_seq,
+							    asoc->this_sack_highest_gap,
+							    MAX_TSN)) {
+								asoc->this_sack_highest_gap =
+								    tp1->rec.data.TSN_seq;
+							}
+							if (tp1->sent == SCTP_DATAGRAM_RESEND) {
+								sctp_ucount_decr(asoc->sent_queue_retran_cnt);
+#ifdef SCTP_AUDITING_ENABLED
+								sctp_audit_log(0xB2,
+								    (asoc->sent_queue_retran_cnt & 0x000000ff));
+#endif
+							}
+						}
+						/*
+						 * All chunks NOT UNSENT
+						 * fall through here and are
+						 * marked
+						 */
+						tp1->sent = SCTP_DATAGRAM_MARKED;
+						if (tp1->rec.data.chunk_was_revoked) {
+							/* deflate the cwnd */
+							tp1->whoTo->cwnd -= tp1->book_size;
+							tp1->rec.data.chunk_was_revoked = 0;
+						}
+					}
+					break;
+				}	/* if (tp1->TSN_seq == j) */
+				if (compare_with_wrap(tp1->rec.data.TSN_seq, j,
+				    MAX_TSN))
+					break;
+
+				tp1 = TAILQ_NEXT(tp1, sctp_next);
+			}	/* end while (tp1) */
+		}		/* end for (j = fragStart */
+		frag = (struct sctp_gap_ack_block *)sctp_m_getptr(m, *offset,
+		    sizeof(struct sctp_gap_ack_block), (uint8_t *) & block);
+		*offset += sizeof(block);
+		if (frag == NULL) {
+			break;
+		}
+	}
+	if (sctp_logging_level & SCTP_FR_LOGGING_ENABLE) {
+		if (num_frs)
+			sctp_log_fr(*biggest_tsn_acked,
+			    *biggest_newly_acked_tsn,
+			    last_tsn, SCTP_FR_LOG_BIGGEST_TSNS);
+	}
+}
+
+static void
+sctp_check_for_revoked(struct sctp_tcb *stcb,
+    struct sctp_association *asoc, uint32_t cumack,
+    u_long biggest_tsn_acked)
+{
+	struct sctp_tmit_chunk *tp1;
+	int tot_revoked = 0;
+
+	tp1 = TAILQ_FIRST(&asoc->sent_queue);
+	while (tp1) {
+		if (compare_with_wrap(tp1->rec.data.TSN_seq, cumack,
+		    MAX_TSN)) {
+			/*
+			 * ok this guy is either ACK or MARKED. If it is
+			 * ACKED it has been previously acked but not this
+			 * time i.e. revoked.  If it is MARKED it was ACK'ed
+			 * again.
+			 */
+			if (compare_with_wrap(tp1->rec.data.TSN_seq, biggest_tsn_acked,
+			    MAX_TSN))
+				break;
+
+
+			if (tp1->sent == SCTP_DATAGRAM_ACKED) {
+				/* it has been revoked */
+				tp1->sent = SCTP_DATAGRAM_SENT;
+				tp1->rec.data.chunk_was_revoked = 1;
+				/*
+				 * We must add this stuff back in to assure
+				 * timers and such get started.
+				 */
+				if (sctp_logging_level & SCTP_FLIGHT_LOGGING_ENABLE) {
+					sctp_misc_ints(SCTP_FLIGHT_LOG_UP_REVOKE,
+					    tp1->whoTo->flight_size,
+					    tp1->book_size,
+					    (uintptr_t) tp1->whoTo,
+					    tp1->rec.data.TSN_seq);
+				}
+				sctp_flight_size_increase(tp1);
+				sctp_total_flight_increase(stcb, tp1);
+				/*
+				 * We inflate the cwnd to compensate for our
+				 * artificial inflation of the flight_size.
+				 */
+				tp1->whoTo->cwnd += tp1->book_size;
+				tot_revoked++;
+				if (sctp_logging_level & SCTP_SACK_LOGGING_ENABLE) {
+					sctp_log_sack(asoc->last_acked_seq,
+					    cumack,
+					    tp1->rec.data.TSN_seq,
+					    0,
+					    0,
+					    SCTP_LOG_TSN_REVOKED);
+				}
+			} else if (tp1->sent == SCTP_DATAGRAM_MARKED) {
+				/* it has been re-acked in this SACK */
+				tp1->sent = SCTP_DATAGRAM_ACKED;
+			}
+		}
+		if (tp1->sent == SCTP_DATAGRAM_UNSENT)
+			break;
+		tp1 = TAILQ_NEXT(tp1, sctp_next);
+	}
+	if (tot_revoked > 0) {
+		/*
+		 * Setup the ecn nonce re-sync point. We do this since once
+		 * data is revoked we begin to retransmit things, which do
+		 * NOT have the ECN bits set. This means we are now out of
+		 * sync and must wait until we get back in sync with the
+		 * peer to check ECN bits.
+		 */
+		tp1 = TAILQ_FIRST(&asoc->send_queue);
+		if (tp1 == NULL) {
+			asoc->nonce_resync_tsn = asoc->sending_seq;
+		} else {
+			asoc->nonce_resync_tsn = tp1->rec.data.TSN_seq;
+		}
+		asoc->nonce_wait_for_ecne = 0;
+		asoc->nonce_sum_check = 0;
+	}
+}
+
+static void
+sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc,
+    u_long biggest_tsn_acked, u_long biggest_tsn_newly_acked, u_long this_sack_lowest_newack, int accum_moved)
+{
+	struct sctp_tmit_chunk *tp1;
+	int strike_flag = 0;
+	struct timeval now;
+	int tot_retrans = 0;
+	uint32_t sending_seq;
+	struct sctp_nets *net;
+	int num_dests_sacked = 0;
+
+	/*
+	 * select the sending_seq, this is either the next thing ready to be
+	 * sent but not transmitted, OR, the next seq we assign.
+	 */
+	tp1 = TAILQ_FIRST(&stcb->asoc.send_queue);
+	if (tp1 == NULL) {
+		sending_seq = asoc->sending_seq;
+	} else {
+		sending_seq = tp1->rec.data.TSN_seq;
+	}
+
+	/* CMT DAC algo: finding out if SACK is a mixed SACK */
+	if (sctp_cmt_on_off && sctp_cmt_use_dac) {
+		TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+			if (net->saw_newack)
+				num_dests_sacked++;
+		}
+	}
+	if (stcb->asoc.peer_supports_prsctp) {
+		(void)SCTP_GETTIME_TIMEVAL(&now);
+	}
+	tp1 = TAILQ_FIRST(&asoc->sent_queue);
+	while (tp1) {
+		strike_flag = 0;
+		if (tp1->no_fr_allowed) {
+			/* this one had a timeout or something */
+			tp1 = TAILQ_NEXT(tp1, sctp_next);
+			continue;
+		}
+		if (sctp_logging_level & SCTP_FR_LOGGING_ENABLE) {
+			if (tp1->sent < SCTP_DATAGRAM_RESEND)
+				sctp_log_fr(biggest_tsn_newly_acked,
+				    tp1->rec.data.TSN_seq,
+				    tp1->sent,
+				    SCTP_FR_LOG_CHECK_STRIKE);
+		}
+		if (compare_with_wrap(tp1->rec.data.TSN_seq, biggest_tsn_acked,
+		    MAX_TSN) ||
+		    tp1->sent == SCTP_DATAGRAM_UNSENT) {
+			/* done */
+			break;
+		}
+		if (stcb->asoc.peer_supports_prsctp) {
+			if ((PR_SCTP_TTL_ENABLED(tp1->flags)) && tp1->sent < SCTP_DATAGRAM_ACKED) {
+				/* Is it expired? */
+				if (
+				    (timevalcmp(&now, &tp1->rec.data.timetodrop, >))
+				    ) {
+					/* Yes so drop it */
+					if (tp1->data != NULL) {
+						(void)sctp_release_pr_sctp_chunk(stcb, tp1,
+						    (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT),
+						    &asoc->sent_queue, SCTP_SO_NOT_LOCKED);
+					}
+					tp1 = TAILQ_NEXT(tp1, sctp_next);
+					continue;
+				}
+			}
+			if ((PR_SCTP_RTX_ENABLED(tp1->flags)) && tp1->sent < SCTP_DATAGRAM_ACKED) {
+				/* Has it been retransmitted tv_sec times? */
+				if (tp1->snd_count > tp1->rec.data.timetodrop.tv_sec) {
+					/* Yes, so drop it */
+					if (tp1->data != NULL) {
+						(void)sctp_release_pr_sctp_chunk(stcb, tp1,
+						    (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT),
+						    &asoc->sent_queue, SCTP_SO_NOT_LOCKED);
+					}
+					tp1 = TAILQ_NEXT(tp1, sctp_next);
+					continue;
+				}
+			}
+		}
+		if (compare_with_wrap(tp1->rec.data.TSN_seq,
+		    asoc->this_sack_highest_gap, MAX_TSN)) {
+			/* we are beyond the tsn in the sack  */
+			break;
+		}
+		if (tp1->sent >= SCTP_DATAGRAM_RESEND) {
+			/* either a RESEND, ACKED, or MARKED */
+			/* skip */
+			tp1 = TAILQ_NEXT(tp1, sctp_next);
+			continue;
+		}
+		/*
+		 * CMT : SFR algo (covers part of DAC and HTNA as well)
+		 */
+		if (tp1->whoTo && tp1->whoTo->saw_newack == 0) {
+			/*
+			 * No new acks were receieved for data sent to this
+			 * dest. Therefore, according to the SFR algo for
+			 * CMT, no data sent to this dest can be marked for
+			 * FR using this SACK.
+			 */
+			tp1 = TAILQ_NEXT(tp1, sctp_next);
+			continue;
+		} else if (tp1->whoTo && compare_with_wrap(tp1->rec.data.TSN_seq,
+		    tp1->whoTo->this_sack_highest_newack, MAX_TSN)) {
+			/*
+			 * CMT: New acks were receieved for data sent to
+			 * this dest. But no new acks were seen for data
+			 * sent after tp1. Therefore, according to the SFR
+			 * algo for CMT, tp1 cannot be marked for FR using
+			 * this SACK. This step covers part of the DAC algo
+			 * and the HTNA algo as well.
+			 */
+			tp1 = TAILQ_NEXT(tp1, sctp_next);
+			continue;
+		}
+		/*
+		 * Here we check to see if we were have already done a FR
+		 * and if so we see if the biggest TSN we saw in the sack is
+		 * smaller than the recovery point. If so we don't strike
+		 * the tsn... otherwise we CAN strike the TSN.
+		 */
+		/*
+		 * @@@ JRI: Check for CMT if (accum_moved &&
+		 * asoc->fast_retran_loss_recovery && (sctp_cmt_on_off ==
+		 * 0)) {
+		 */
+		if (accum_moved && asoc->fast_retran_loss_recovery) {
+			/*
+			 * Strike the TSN if in fast-recovery and cum-ack
+			 * moved.
+			 */
+			if (sctp_logging_level & SCTP_FR_LOGGING_ENABLE) {
+				sctp_log_fr(biggest_tsn_newly_acked,
+				    tp1->rec.data.TSN_seq,
+				    tp1->sent,
+				    SCTP_FR_LOG_STRIKE_CHUNK);
+			}
+			if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+				tp1->sent++;
+			}
+			if (sctp_cmt_on_off && sctp_cmt_use_dac) {
+				/*
+				 * CMT DAC algorithm: If SACK flag is set to
+				 * 0, then lowest_newack test will not pass
+				 * because it would have been set to the
+				 * cumack earlier. If not already to be
+				 * rtx'd, If not a mixed sack and if tp1 is
+				 * not between two sacked TSNs, then mark by
+				 * one more. NOTE that we are marking by one
+				 * additional time since the SACK DAC flag
+				 * indicates that two packets have been
+				 * received after this missing TSN.
+				 */
+				if ((tp1->sent < SCTP_DATAGRAM_RESEND) && (num_dests_sacked == 1) &&
+				    compare_with_wrap(this_sack_lowest_newack, tp1->rec.data.TSN_seq, MAX_TSN)) {
+					if (sctp_logging_level & SCTP_FR_LOGGING_ENABLE) {
+						sctp_log_fr(16 + num_dests_sacked,
+						    tp1->rec.data.TSN_seq,
+						    tp1->sent,
+						    SCTP_FR_LOG_STRIKE_CHUNK);
+					}
+					tp1->sent++;
+				}
+			}
+		} else if (tp1->rec.data.doing_fast_retransmit) {
+			/*
+			 * For those that have done a FR we must take
+			 * special consideration if we strike. I.e the
+			 * biggest_newly_acked must be higher than the
+			 * sending_seq at the time we did the FR.
+			 */
+			if (
+#ifdef SCTP_FR_TO_ALTERNATE
+			/*
+			 * If FR's go to new networks, then we must only do
+			 * this for singly homed asoc's. However if the FR's
+			 * go to the same network (Armando's work) then its
+			 * ok to FR multiple times.
+			 */
+			    (asoc->numnets < 2)
+#else
+			    (1)
+#endif
+			    ) {
+
+				if ((compare_with_wrap(biggest_tsn_newly_acked,
+				    tp1->rec.data.fast_retran_tsn, MAX_TSN)) ||
+				    (biggest_tsn_newly_acked ==
+				    tp1->rec.data.fast_retran_tsn)) {
+					/*
+					 * Strike the TSN, since this ack is
+					 * beyond where things were when we
+					 * did a FR.
+					 */
+					if (sctp_logging_level & SCTP_FR_LOGGING_ENABLE) {
+						sctp_log_fr(biggest_tsn_newly_acked,
+						    tp1->rec.data.TSN_seq,
+						    tp1->sent,
+						    SCTP_FR_LOG_STRIKE_CHUNK);
+					}
+					if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+						tp1->sent++;
+					}
+					strike_flag = 1;
+					if (sctp_cmt_on_off && sctp_cmt_use_dac) {
+						/*
+						 * CMT DAC algorithm: If
+						 * SACK flag is set to 0,
+						 * then lowest_newack test
+						 * will not pass because it
+						 * would have been set to
+						 * the cumack earlier. If
+						 * not already to be rtx'd,
+						 * If not a mixed sack and
+						 * if tp1 is not between two
+						 * sacked TSNs, then mark by
+						 * one more. NOTE that we
+						 * are marking by one
+						 * additional time since the
+						 * SACK DAC flag indicates
+						 * that two packets have
+						 * been received after this
+						 * missing TSN.
+						 */
+						if ((tp1->sent < SCTP_DATAGRAM_RESEND) &&
+						    (num_dests_sacked == 1) &&
+						    compare_with_wrap(this_sack_lowest_newack,
+						    tp1->rec.data.TSN_seq, MAX_TSN)) {
+							if (sctp_logging_level & SCTP_FR_LOGGING_ENABLE) {
+								sctp_log_fr(32 + num_dests_sacked,
+								    tp1->rec.data.TSN_seq,
+								    tp1->sent,
+								    SCTP_FR_LOG_STRIKE_CHUNK);
+							}
+							if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+								tp1->sent++;
+							}
+						}
+					}
+				}
+			}
+			/*
+			 * JRI: TODO: remove code for HTNA algo. CMT's SFR
+			 * algo covers HTNA.
+			 */
+		} else if (compare_with_wrap(tp1->rec.data.TSN_seq,
+		    biggest_tsn_newly_acked, MAX_TSN)) {
+			/*
+			 * We don't strike these: This is the  HTNA
+			 * algorithm i.e. we don't strike If our TSN is
+			 * larger than the Highest TSN Newly Acked.
+			 */
+			;
+		} else {
+			/* Strike the TSN */
+			if (sctp_logging_level & SCTP_FR_LOGGING_ENABLE) {
+				sctp_log_fr(biggest_tsn_newly_acked,
+				    tp1->rec.data.TSN_seq,
+				    tp1->sent,
+				    SCTP_FR_LOG_STRIKE_CHUNK);
+			}
+			if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+				tp1->sent++;
+			}
+			if (sctp_cmt_on_off && sctp_cmt_use_dac) {
+				/*
+				 * CMT DAC algorithm: If SACK flag is set to
+				 * 0, then lowest_newack test will not pass
+				 * because it would have been set to the
+				 * cumack earlier. If not already to be
+				 * rtx'd, If not a mixed sack and if tp1 is
+				 * not between two sacked TSNs, then mark by
+				 * one more. NOTE that we are marking by one
+				 * additional time since the SACK DAC flag
+				 * indicates that two packets have been
+				 * received after this missing TSN.
+				 */
+				if ((tp1->sent < SCTP_DATAGRAM_RESEND) && (num_dests_sacked == 1) &&
+				    compare_with_wrap(this_sack_lowest_newack, tp1->rec.data.TSN_seq, MAX_TSN)) {
+					if (sctp_logging_level & SCTP_FR_LOGGING_ENABLE) {
+						sctp_log_fr(48 + num_dests_sacked,
+						    tp1->rec.data.TSN_seq,
+						    tp1->sent,
+						    SCTP_FR_LOG_STRIKE_CHUNK);
+					}
+					tp1->sent++;
+				}
+			}
+		}
+		if (tp1->sent == SCTP_DATAGRAM_RESEND) {
+			/* Increment the count to resend */
+			struct sctp_nets *alt;
+
+			/* printf("OK, we are now ready to FR this guy\n"); */
+			if (sctp_logging_level & SCTP_FR_LOGGING_ENABLE) {
+				sctp_log_fr(tp1->rec.data.TSN_seq, tp1->snd_count,
+				    0, SCTP_FR_MARKED);
+			}
+			if (strike_flag) {
+				/* This is a subsequent FR */
+				SCTP_STAT_INCR(sctps_sendmultfastretrans);
+			}
+			sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+			if (sctp_cmt_on_off) {
+				/*
+				 * CMT: Using RTX_SSTHRESH policy for CMT.
+				 * If CMT is being used, then pick dest with
+				 * largest ssthresh for any retransmission.
+				 */
+				tp1->no_fr_allowed = 1;
+				alt = tp1->whoTo;
+				/* sa_ignore NO_NULL_CHK */
+				if (sctp_cmt_on_off && sctp_cmt_pf) {
+					/*
+					 * JRS 5/18/07 - If CMT PF is on,
+					 * use the PF version of
+					 * find_alt_net()
+					 */
+					alt = sctp_find_alternate_net(stcb, alt, 2);
+				} else {
+					/*
+					 * JRS 5/18/07 - If only CMT is on,
+					 * use the CMT version of
+					 * find_alt_net()
+					 */
+					/* sa_ignore NO_NULL_CHK */
+					alt = sctp_find_alternate_net(stcb, alt, 1);
+				}
+				if (alt == NULL) {
+					alt = tp1->whoTo;
+				}
+				/*
+				 * CUCv2: If a different dest is picked for
+				 * the retransmission, then new
+				 * (rtx-)pseudo_cumack needs to be tracked
+				 * for orig dest. Let CUCv2 track new (rtx-)
+				 * pseudo-cumack always.
+				 */
+				if (tp1->whoTo) {
+					tp1->whoTo->find_pseudo_cumack = 1;
+					tp1->whoTo->find_rtx_pseudo_cumack = 1;
+				}
+			} else {/* CMT is OFF */
+
+#ifdef SCTP_FR_TO_ALTERNATE
+				/* Can we find an alternate? */
+				alt = sctp_find_alternate_net(stcb, tp1->whoTo, 0);
+#else
+				/*
+				 * default behavior is to NOT retransmit
+				 * FR's to an alternate. Armando Caro's
+				 * paper details why.
+				 */
+				alt = tp1->whoTo;
+#endif
+			}
+
+			tp1->rec.data.doing_fast_retransmit = 1;
+			tot_retrans++;
+			/* mark the sending seq for possible subsequent FR's */
+			/*
+			 * printf("Marking TSN for FR new value %x\n",
+			 * (uint32_t)tpi->rec.data.TSN_seq);
+			 */
+			if (TAILQ_EMPTY(&asoc->send_queue)) {
+				/*
+				 * If the queue of send is empty then its
+				 * the next sequence number that will be
+				 * assigned so we subtract one from this to
+				 * get the one we last sent.
+				 */
+				tp1->rec.data.fast_retran_tsn = sending_seq;
+			} else {
+				/*
+				 * If there are chunks on the send queue
+				 * (unsent data that has made it from the
+				 * stream queues but not out the door, we
+				 * take the first one (which will have the
+				 * lowest TSN) and subtract one to get the
+				 * one we last sent.
+				 */
+				struct sctp_tmit_chunk *ttt;
+
+				ttt = TAILQ_FIRST(&asoc->send_queue);
+				tp1->rec.data.fast_retran_tsn =
+				    ttt->rec.data.TSN_seq;
+			}
+
+			if (tp1->do_rtt) {
+				/*
+				 * this guy had a RTO calculation pending on
+				 * it, cancel it
+				 */
+				tp1->do_rtt = 0;
+			}
+			/* fix counts and things */
+			if (sctp_logging_level & SCTP_FLIGHT_LOGGING_ENABLE) {
+				sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_RSND,
+				    (tp1->whoTo ? (tp1->whoTo->flight_size) : 0),
+				    tp1->book_size,
+				    (uintptr_t) tp1->whoTo,
+				    tp1->rec.data.TSN_seq);
+			}
+			if (tp1->whoTo) {
+				tp1->whoTo->net_ack++;
+				sctp_flight_size_decrease(tp1);
+			}
+			if (sctp_logging_level & SCTP_LOG_RWND_ENABLE) {
+				sctp_log_rwnd(SCTP_INCREASE_PEER_RWND,
+				    asoc->peers_rwnd, tp1->send_size, sctp_peer_chunk_oh);
+			}
+			/* add back to the rwnd */
+			asoc->peers_rwnd += (tp1->send_size + sctp_peer_chunk_oh);
+
+			/* remove from the total flight */
+			sctp_total_flight_decrease(stcb, tp1);
+			if (alt != tp1->whoTo) {
+				/* yes, there is an alternate. */
+				sctp_free_remote_addr(tp1->whoTo);
+				/* sa_ignore FREED_MEMORY */
+				tp1->whoTo = alt;
+				atomic_add_int(&alt->ref_count, 1);
+			}
+		}
+		tp1 = TAILQ_NEXT(tp1, sctp_next);
+	}			/* while (tp1) */
+
+	if (tot_retrans > 0) {
+		/*
+		 * Setup the ecn nonce re-sync point. We do this since once
+		 * we go to FR something we introduce a Karn's rule scenario
+		 * and won't know the totals for the ECN bits.
+		 */
+		asoc->nonce_resync_tsn = sending_seq;
+		asoc->nonce_wait_for_ecne = 0;
+		asoc->nonce_sum_check = 0;
+	}
+}
+
+struct sctp_tmit_chunk *
+sctp_try_advance_peer_ack_point(struct sctp_tcb *stcb,
+    struct sctp_association *asoc)
+{
+	struct sctp_tmit_chunk *tp1, *tp2, *a_adv = NULL;
+	struct timeval now;
+	int now_filled = 0;
+
+	if (asoc->peer_supports_prsctp == 0) {
+		return (NULL);
+	}
+	tp1 = TAILQ_FIRST(&asoc->sent_queue);
+	while (tp1) {
+		if (tp1->sent != SCTP_FORWARD_TSN_SKIP &&
+		    tp1->sent != SCTP_DATAGRAM_RESEND) {
+			/* no chance to advance, out of here */
+			break;
+		}
+		if (!PR_SCTP_ENABLED(tp1->flags)) {
+			/*
+			 * We can't fwd-tsn past any that are reliable aka
+			 * retransmitted until the asoc fails.
+			 */
+			break;
+		}
+		if (!now_filled) {
+			(void)SCTP_GETTIME_TIMEVAL(&now);
+			now_filled = 1;
+		}
+		tp2 = TAILQ_NEXT(tp1, sctp_next);
+		/*
+		 * now we got a chunk which is marked for another
+		 * retransmission to a PR-stream but has run out its chances
+		 * already maybe OR has been marked to skip now. Can we skip
+		 * it if its a resend?
+		 */
+		if (tp1->sent == SCTP_DATAGRAM_RESEND &&
+		    (PR_SCTP_TTL_ENABLED(tp1->flags))) {
+			/*
+			 * Now is this one marked for resend and its time is
+			 * now up?
+			 */
+			if (timevalcmp(&now, &tp1->rec.data.timetodrop, >)) {
+				/* Yes so drop it */
+				if (tp1->data) {
+					(void)sctp_release_pr_sctp_chunk(stcb, tp1,
+					    (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT),
+					    &asoc->sent_queue, SCTP_SO_NOT_LOCKED);
+				}
+			} else {
+				/*
+				 * No, we are done when hit one for resend
+				 * whos time as not expired.
+				 */
+				break;
+			}
+		}
+		/*
+		 * Ok now if this chunk is marked to drop it we can clean up
+		 * the chunk, advance our peer ack point and we can check
+		 * the next chunk.
+		 */
+		if (tp1->sent == SCTP_FORWARD_TSN_SKIP) {
+			/* advance PeerAckPoint goes forward */
+			asoc->advanced_peer_ack_point = tp1->rec.data.TSN_seq;
+			a_adv = tp1;
+			/*
+			 * we don't want to de-queue it here. Just wait for
+			 * the next peer SACK to come with a new cumTSN and
+			 * then the chunk will be droped in the normal
+			 * fashion.
+			 */
+			if (tp1->data) {
+				sctp_free_bufspace(stcb, asoc, tp1, 1);
+				/*
+				 * Maybe there should be another
+				 * notification type
+				 */
+				sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb,
+				    (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT),
+				    tp1, SCTP_SO_NOT_LOCKED);
+				sctp_m_freem(tp1->data);
+				tp1->data = NULL;
+				if (stcb->sctp_socket) {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+					struct socket *so;
+
+					so = SCTP_INP_SO(stcb->sctp_ep);
+					atomic_add_int(&stcb->asoc.refcnt, 1);
+					SCTP_TCB_UNLOCK(stcb);
+					SCTP_SOCKET_LOCK(so, 1);
+					SCTP_TCB_LOCK(stcb);
+					atomic_subtract_int(&stcb->asoc.refcnt, 1);
+					if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+						/*
+						 * assoc was freed while we
+						 * were unlocked
+						 */
+						SCTP_SOCKET_UNLOCK(so, 1);
+						return (NULL);
+					}
+#endif
+					sctp_sowwakeup(stcb->sctp_ep, stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+					SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+					if (sctp_logging_level & SCTP_WAKE_LOGGING_ENABLE) {
+						sctp_wakeup_log(stcb, tp1->rec.data.TSN_seq, 1, SCTP_WAKESND_FROM_FWDTSN);
+					}
+				}
+			}
+		} else {
+			/*
+			 * If it is still in RESEND we can advance no
+			 * further
+			 */
+			break;
+		}
+		/*
+		 * If we hit here we just dumped tp1, move to next tsn on
+		 * sent queue.
+		 */
+		tp1 = tp2;
+	}
+	return (a_adv);
+}
+
+static void
+sctp_fs_audit(struct sctp_association *asoc)
+{
+	struct sctp_tmit_chunk *chk;
+	int inflight = 0, resend = 0, inbetween = 0, acked = 0, above = 0;
+
+	TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
+		if (chk->sent < SCTP_DATAGRAM_RESEND) {
+			inflight++;
+		} else if (chk->sent == SCTP_DATAGRAM_RESEND) {
+			resend++;
+		} else if (chk->sent < SCTP_DATAGRAM_ACKED) {
+			inbetween++;
+		} else if (chk->sent > SCTP_DATAGRAM_ACKED) {
+			above++;
+		} else {
+			acked++;
+		}
+	}
+
+	if ((inflight > 0) || (inbetween > 0)) {
+#ifdef INVARIANTS
+		panic("Flight size-express incorrect? \n");
+#else
+		SCTP_PRINTF("Flight size-express incorrect inflight:%d inbetween:%d\n",
+		    inflight, inbetween);
+#endif
+	}
+}
+
+
+static void
+sctp_window_probe_recovery(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    struct sctp_nets *net,
+    struct sctp_tmit_chunk *tp1)
+{
+	struct sctp_tmit_chunk *chk;
+
+	/* First setup this one and get it moved back */
+	tp1->sent = SCTP_DATAGRAM_UNSENT;
+	if (sctp_logging_level & SCTP_FLIGHT_LOGGING_ENABLE) {
+		sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_WP,
+		    tp1->whoTo->flight_size,
+		    tp1->book_size,
+		    (uintptr_t) tp1->whoTo,
+		    tp1->rec.data.TSN_seq);
+	}
+	sctp_flight_size_decrease(tp1);
+	sctp_total_flight_decrease(stcb, tp1);
+	TAILQ_REMOVE(&asoc->sent_queue, tp1, sctp_next);
+	TAILQ_INSERT_HEAD(&asoc->send_queue, tp1, sctp_next);
+	asoc->sent_queue_cnt--;
+	asoc->send_queue_cnt++;
+	/*
+	 * Now all guys marked for RESEND on the sent_queue must be moved
+	 * back too.
+	 */
+	TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
+		if (chk->sent == SCTP_DATAGRAM_RESEND) {
+			/* Another chunk to move */
+			chk->sent = SCTP_DATAGRAM_UNSENT;
+			/* It should not be in flight */
+			TAILQ_REMOVE(&asoc->sent_queue, chk, sctp_next);
+			TAILQ_INSERT_AFTER(&asoc->send_queue, tp1, chk, sctp_next);
+			asoc->sent_queue_cnt--;
+			asoc->send_queue_cnt++;
+			sctp_ucount_decr(asoc->sent_queue_retran_cnt);
+		}
+	}
+}
+
+void
+sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack,
+    uint32_t rwnd, int nonce_sum_flag, int *abort_now)
+{
+	struct sctp_nets *net;
+	struct sctp_association *asoc;
+	struct sctp_tmit_chunk *tp1, *tp2;
+	uint32_t old_rwnd;
+	int win_probe_recovery = 0;
+	int win_probe_recovered = 0;
+	int j, done_once = 0;
+
+	if (sctp_logging_level & SCTP_LOG_SACK_ARRIVALS_ENABLE) {
+		sctp_misc_ints(SCTP_SACK_LOG_EXPRESS, cumack,
+		    rwnd, stcb->asoc.last_acked_seq, stcb->asoc.peers_rwnd);
+	}
+	SCTP_TCB_LOCK_ASSERT(stcb);
+#ifdef SCTP_ASOCLOG_OF_TSNS
+	stcb->asoc.cumack_log[stcb->asoc.cumack_log_at] = cumack;
+	stcb->asoc.cumack_log_at++;
+	if (stcb->asoc.cumack_log_at > SCTP_TSN_LOG_SIZE) {
+		stcb->asoc.cumack_log_at = 0;
+	}
+#endif
+	asoc = &stcb->asoc;
+	old_rwnd = asoc->peers_rwnd;
+	if (compare_with_wrap(asoc->last_acked_seq, cumack, MAX_TSN)) {
+		/* old ack */
+		return;
+	} else if (asoc->last_acked_seq == cumack) {
+		/* Window update sack */
+		asoc->peers_rwnd = sctp_sbspace_sub(rwnd,
+		    (uint32_t) (asoc->total_flight + (asoc->sent_queue_cnt * sctp_peer_chunk_oh)));
+		if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
+			/* SWS sender side engages */
+			asoc->peers_rwnd = 0;
+		}
+		if (asoc->peers_rwnd > old_rwnd) {
+			goto again;
+		}
+		return;
+	}
+	/* First setup for CC stuff */
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		net->prev_cwnd = net->cwnd;
+		net->net_ack = 0;
+		net->net_ack2 = 0;
+
+		/*
+		 * CMT: Reset CUC and Fast recovery algo variables before
+		 * SACK processing
+		 */
+		net->new_pseudo_cumack = 0;
+		net->will_exit_fast_recovery = 0;
+	}
+	if (sctp_strict_sacks) {
+		uint32_t send_s;
+
+		if (!TAILQ_EMPTY(&asoc->sent_queue)) {
+			tp1 = TAILQ_LAST(&asoc->sent_queue,
+			    sctpchunk_listhead);
+			send_s = tp1->rec.data.TSN_seq + 1;
+		} else {
+			send_s = asoc->sending_seq;
+		}
+		if ((cumack == send_s) ||
+		    compare_with_wrap(cumack, send_s, MAX_TSN)) {
+#ifndef INVARIANTS
+			struct mbuf *oper;
+
+#endif
+#ifdef INVARIANTS
+			panic("Impossible sack 1");
+#else
+			*abort_now = 1;
+			/* XXX */
+			oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+			    0, M_DONTWAIT, 1, MT_DATA);
+			if (oper) {
+				struct sctp_paramhdr *ph;
+				uint32_t *ippp;
+
+				SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
+				    sizeof(uint32_t);
+				ph = mtod(oper, struct sctp_paramhdr *);
+				ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+				ph->param_length = htons(SCTP_BUF_LEN(oper));
+				ippp = (uint32_t *) (ph + 1);
+				*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_25);
+			}
+			stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_25;
+			sctp_abort_an_association(stcb->sctp_ep, stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+			return;
+#endif
+		}
+	}
+	asoc->this_sack_highest_gap = cumack;
+	if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+		sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+		    stcb->asoc.overall_error_count,
+		    0,
+		    SCTP_FROM_SCTP_INDATA,
+		    __LINE__);
+	}
+	stcb->asoc.overall_error_count = 0;
+	if (compare_with_wrap(cumack, asoc->last_acked_seq, MAX_TSN)) {
+		/* process the new consecutive TSN first */
+		tp1 = TAILQ_FIRST(&asoc->sent_queue);
+		while (tp1) {
+			tp2 = TAILQ_NEXT(tp1, sctp_next);
+			if (compare_with_wrap(cumack, tp1->rec.data.TSN_seq,
+			    MAX_TSN) ||
+			    cumack == tp1->rec.data.TSN_seq) {
+				if (tp1->sent == SCTP_DATAGRAM_UNSENT) {
+					printf("Warning, an unsent is now acked?\n");
+				}
+				/*
+				 * ECN Nonce: Add the nonce to the sender's
+				 * nonce sum
+				 */
+				asoc->nonce_sum_expect_base += tp1->rec.data.ect_nonce;
+				if (tp1->sent < SCTP_DATAGRAM_ACKED) {
+					/*
+					 * If it is less than ACKED, it is
+					 * now no-longer in flight. Higher
+					 * values may occur during marking
+					 */
+					if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+						if (sctp_logging_level & SCTP_FLIGHT_LOGGING_ENABLE) {
+							sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_CA,
+							    tp1->whoTo->flight_size,
+							    tp1->book_size,
+							    (uintptr_t) tp1->whoTo,
+							    tp1->rec.data.TSN_seq);
+						}
+						sctp_flight_size_decrease(tp1);
+						/* sa_ignore NO_NULL_CHK */
+						sctp_total_flight_decrease(stcb, tp1);
+					}
+					tp1->whoTo->net_ack += tp1->send_size;
+					if (tp1->snd_count < 2) {
+						/*
+						 * True non-retransmited
+						 * chunk
+						 */
+						tp1->whoTo->net_ack2 +=
+						    tp1->send_size;
+
+						/* update RTO too? */
+						if (tp1->do_rtt) {
+							tp1->whoTo->RTO =
+							/*
+							 * sa_ignore
+							 * NO_NULL_CHK
+							 */
+							    sctp_calculate_rto(stcb,
+							    asoc, tp1->whoTo,
+							    &tp1->sent_rcv_time,
+							    sctp_align_safe_nocopy);
+							tp1->do_rtt = 0;
+						}
+					}
+					/*
+					 * CMT: CUCv2 algorithm. From the
+					 * cumack'd TSNs, for each TSN being
+					 * acked for the first time, set the
+					 * following variables for the
+					 * corresp destination.
+					 * new_pseudo_cumack will trigger a
+					 * cwnd update.
+					 * find_(rtx_)pseudo_cumack will
+					 * trigger search for the next
+					 * expected (rtx-)pseudo-cumack.
+					 */
+					tp1->whoTo->new_pseudo_cumack = 1;
+					tp1->whoTo->find_pseudo_cumack = 1;
+					tp1->whoTo->find_rtx_pseudo_cumack = 1;
+
+					if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+						/* sa_ignore NO_NULL_CHK */
+						sctp_log_cwnd(stcb, tp1->whoTo, tp1->rec.data.TSN_seq, SCTP_CWND_LOG_FROM_SACK);
+					}
+				}
+				if (tp1->sent == SCTP_DATAGRAM_RESEND) {
+					sctp_ucount_decr(asoc->sent_queue_retran_cnt);
+				}
+				if (tp1->rec.data.chunk_was_revoked) {
+					/* deflate the cwnd */
+					tp1->whoTo->cwnd -= tp1->book_size;
+					tp1->rec.data.chunk_was_revoked = 0;
+				}
+				tp1->sent = SCTP_DATAGRAM_ACKED;
+				TAILQ_REMOVE(&asoc->sent_queue, tp1, sctp_next);
+				if (tp1->data) {
+					/* sa_ignore NO_NULL_CHK */
+					sctp_free_bufspace(stcb, asoc, tp1, 1);
+					sctp_m_freem(tp1->data);
+				}
+				if (sctp_logging_level & SCTP_SACK_LOGGING_ENABLE) {
+					sctp_log_sack(asoc->last_acked_seq,
+					    cumack,
+					    tp1->rec.data.TSN_seq,
+					    0,
+					    0,
+					    SCTP_LOG_FREE_SENT);
+				}
+				tp1->data = NULL;
+				asoc->sent_queue_cnt--;
+				sctp_free_a_chunk(stcb, tp1);
+				tp1 = tp2;
+			} else {
+				break;
+			}
+		}
+
+	}
+	/* sa_ignore NO_NULL_CHK */
+	if (stcb->sctp_socket) {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		struct socket *so;
+
+#endif
+
+		SOCKBUF_LOCK(&stcb->sctp_socket->so_snd);
+		if (sctp_logging_level & SCTP_WAKE_LOGGING_ENABLE) {
+			/* sa_ignore NO_NULL_CHK */
+			sctp_wakeup_log(stcb, cumack, 1, SCTP_WAKESND_FROM_SACK);
+		}
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		so = SCTP_INP_SO(stcb->sctp_ep);
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_SOCKET_LOCK(so, 1);
+		SCTP_TCB_LOCK(stcb);
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+		if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+			/* assoc was freed while we were unlocked */
+			SCTP_SOCKET_UNLOCK(so, 1);
+			return;
+		}
+#endif
+		sctp_sowwakeup_locked(stcb->sctp_ep, stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+	} else {
+		if (sctp_logging_level & SCTP_WAKE_LOGGING_ENABLE) {
+			sctp_wakeup_log(stcb, cumack, 1, SCTP_NOWAKE_FROM_SACK);
+		}
+	}
+
+	/* JRS - Use the congestion control given in the CC module */
+	if (asoc->last_acked_seq != cumack)
+		asoc->cc_functions.sctp_cwnd_update_after_sack(stcb, asoc, 1, 0, 0);
+
+	asoc->last_acked_seq = cumack;
+
+	if (TAILQ_EMPTY(&asoc->sent_queue)) {
+		/* nothing left in-flight */
+		TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+			net->flight_size = 0;
+			net->partial_bytes_acked = 0;
+		}
+		asoc->total_flight = 0;
+		asoc->total_flight_count = 0;
+	}
+	/* Fix up the a-p-a-p for future PR-SCTP sends */
+	if (compare_with_wrap(cumack, asoc->advanced_peer_ack_point, MAX_TSN)) {
+		asoc->advanced_peer_ack_point = cumack;
+	}
+	/* ECN Nonce updates */
+	if (asoc->ecn_nonce_allowed) {
+		if (asoc->nonce_sum_check) {
+			if (nonce_sum_flag != ((asoc->nonce_sum_expect_base) & SCTP_SACK_NONCE_SUM)) {
+				if (asoc->nonce_wait_for_ecne == 0) {
+					struct sctp_tmit_chunk *lchk;
+
+					lchk = TAILQ_FIRST(&asoc->send_queue);
+					asoc->nonce_wait_for_ecne = 1;
+					if (lchk) {
+						asoc->nonce_wait_tsn = lchk->rec.data.TSN_seq;
+					} else {
+						asoc->nonce_wait_tsn = asoc->sending_seq;
+					}
+				} else {
+					if (compare_with_wrap(asoc->last_acked_seq, asoc->nonce_wait_tsn, MAX_TSN) ||
+					    (asoc->last_acked_seq == asoc->nonce_wait_tsn)) {
+						/*
+						 * Misbehaving peer. We need
+						 * to react to this guy
+						 */
+						asoc->ecn_allowed = 0;
+						asoc->ecn_nonce_allowed = 0;
+					}
+				}
+			}
+		} else {
+			/* See if Resynchronization Possible */
+			if (compare_with_wrap(asoc->last_acked_seq, asoc->nonce_resync_tsn, MAX_TSN)) {
+				asoc->nonce_sum_check = 1;
+				/*
+				 * now we must calculate what the base is.
+				 * We do this based on two things, we know
+				 * the total's for all the segments
+				 * gap-acked in the SACK (none), We also
+				 * know the SACK's nonce sum, its in
+				 * nonce_sum_flag. So we can build a truth
+				 * table to back-calculate the new value of
+				 * asoc->nonce_sum_expect_base:
+				 * 
+				 * SACK-flag-Value         Seg-Sums Base 0 0 0
+				 * 1                    0 1 0 1 1 1
+				 * 1 0
+				 */
+				asoc->nonce_sum_expect_base = (0 ^ nonce_sum_flag) & SCTP_SACK_NONCE_SUM;
+			}
+		}
+	}
+	/* RWND update */
+	asoc->peers_rwnd = sctp_sbspace_sub(rwnd,
+	    (uint32_t) (asoc->total_flight + (asoc->sent_queue_cnt * sctp_peer_chunk_oh)));
+	if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
+		/* SWS sender side engages */
+		asoc->peers_rwnd = 0;
+	}
+	if (asoc->peers_rwnd > old_rwnd) {
+		win_probe_recovery = 1;
+	}
+	/* Now assure a timer where data is queued at */
+again:
+	j = 0;
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		if (win_probe_recovery && (net->window_probe)) {
+			net->window_probe = 0;
+			win_probe_recovered = 1;
+			/*
+			 * Find first chunk that was used with window probe
+			 * and clear the sent
+			 */
+			/* sa_ignore FREED_MEMORY */
+			TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) {
+				if (tp1->window_probe) {
+					/* move back to data send queue */
+					sctp_window_probe_recovery(stcb, asoc, net, tp1);
+					break;
+				}
+			}
+		}
+		if (net->flight_size) {
+			int to_ticks;
+
+			if (net->RTO == 0) {
+				to_ticks = MSEC_TO_TICKS(stcb->asoc.initial_rto);
+			} else {
+				to_ticks = MSEC_TO_TICKS(net->RTO);
+			}
+			j++;
+			(void)SCTP_OS_TIMER_START(&net->rxt_timer.timer, to_ticks,
+			    sctp_timeout_handler, &net->rxt_timer);
+		} else {
+			if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
+				sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+				    stcb, net,
+				    SCTP_FROM_SCTP_INDATA + SCTP_LOC_22);
+			}
+			if (sctp_early_fr) {
+				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+					SCTP_STAT_INCR(sctps_earlyfrstpidsck4);
+					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_23);
+				}
+			}
+		}
+	}
+	if ((j == 0) &&
+	    (!TAILQ_EMPTY(&asoc->sent_queue)) &&
+	    (asoc->sent_queue_retran_cnt == 0) &&
+	    (win_probe_recovered == 0) &&
+	    (done_once == 0)) {
+		/* huh, this should not happen */
+		sctp_fs_audit(asoc);
+		TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+			net->flight_size = 0;
+		}
+		asoc->total_flight = 0;
+		asoc->total_flight_count = 0;
+		asoc->sent_queue_retran_cnt = 0;
+		TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) {
+			if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+				sctp_flight_size_increase(tp1);
+				sctp_total_flight_increase(stcb, tp1);
+			} else if (tp1->sent == SCTP_DATAGRAM_RESEND) {
+				asoc->sent_queue_retran_cnt++;
+			}
+		}
+		done_once = 1;
+		goto again;
+	}
+	/**********************************/
+	/* Now what about shutdown issues */
+	/**********************************/
+	if (TAILQ_EMPTY(&asoc->send_queue) && TAILQ_EMPTY(&asoc->sent_queue)) {
+		/* nothing left on sendqueue.. consider done */
+		/* clean up */
+		if ((asoc->stream_queue_cnt == 1) &&
+		    ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) ||
+		    (asoc->state & SCTP_STATE_SHUTDOWN_RECEIVED)) &&
+		    (asoc->locked_on_sending)
+		    ) {
+			struct sctp_stream_queue_pending *sp;
+
+			/*
+			 * I may be in a state where we got all across.. but
+			 * cannot write more due to a shutdown... we abort
+			 * since the user did not indicate EOR in this case.
+			 * The sp will be cleaned during free of the asoc.
+			 */
+			sp = TAILQ_LAST(&((asoc->locked_on_sending)->outqueue),
+			    sctp_streamhead);
+			if ((sp) && (sp->length == 0)) {
+				/* Let cleanup code purge it */
+				if (sp->msg_is_complete) {
+					asoc->stream_queue_cnt--;
+				} else {
+					asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+					asoc->locked_on_sending = NULL;
+					asoc->stream_queue_cnt--;
+				}
+			}
+		}
+		if ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) &&
+		    (asoc->stream_queue_cnt == 0)) {
+			if (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT) {
+				/* Need to abort here */
+				struct mbuf *oper;
+
+		abort_out_now:
+				*abort_now = 1;
+				/* XXX */
+				oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+				    0, M_DONTWAIT, 1, MT_DATA);
+				if (oper) {
+					struct sctp_paramhdr *ph;
+					uint32_t *ippp;
+
+					SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
+					    sizeof(uint32_t);
+					ph = mtod(oper, struct sctp_paramhdr *);
+					ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
+					ph->param_length = htons(SCTP_BUF_LEN(oper));
+					ippp = (uint32_t *) (ph + 1);
+					*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_24);
+				}
+				stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_24;
+				sctp_abort_an_association(stcb->sctp_ep, stcb, SCTP_RESPONSE_TO_USER_REQ, oper, SCTP_SO_NOT_LOCKED);
+			} else {
+				if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+				    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+					SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+				}
+				SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+				SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+				sctp_stop_timers_for_shutdown(stcb);
+				sctp_send_shutdown(stcb,
+				    stcb->asoc.primary_destination);
+				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
+				    stcb->sctp_ep, stcb, asoc->primary_destination);
+				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+				    stcb->sctp_ep, stcb, asoc->primary_destination);
+			}
+		} else if ((SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) &&
+		    (asoc->stream_queue_cnt == 0)) {
+			if (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT) {
+				goto abort_out_now;
+			}
+			SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+			SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT);
+			SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+			sctp_send_shutdown_ack(stcb,
+			    stcb->asoc.primary_destination);
+
+			sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK,
+			    stcb->sctp_ep, stcb, asoc->primary_destination);
+		}
+	}
+	if (sctp_logging_level & SCTP_SACK_RWND_LOGGING_ENABLE) {
+		sctp_misc_ints(SCTP_SACK_RWND_UPDATE,
+		    rwnd,
+		    stcb->asoc.peers_rwnd,
+		    stcb->asoc.total_flight,
+		    stcb->asoc.total_output_queue_size);
+	}
+}
+
+void
+sctp_handle_sack(struct mbuf *m, int offset,
+    struct sctp_sack_chunk *ch, struct sctp_tcb *stcb,
+    struct sctp_nets *net_from, int *abort_now, int sack_len, uint32_t rwnd)
+{
+	struct sctp_association *asoc;
+	struct sctp_sack *sack;
+	struct sctp_tmit_chunk *tp1, *tp2;
+	uint32_t cum_ack, last_tsn, biggest_tsn_acked, biggest_tsn_newly_acked,
+	         this_sack_lowest_newack;
+	uint32_t sav_cum_ack;
+	uint16_t num_seg, num_dup;
+	uint16_t wake_him = 0;
+	unsigned int sack_length;
+	uint32_t send_s = 0;
+	long j;
+	int accum_moved = 0;
+	int will_exit_fast_recovery = 0;
+	uint32_t a_rwnd, old_rwnd;
+	int win_probe_recovery = 0;
+	int win_probe_recovered = 0;
+	struct sctp_nets *net = NULL;
+	int nonce_sum_flag, ecn_seg_sums = 0;
+	int done_once;
+	uint8_t reneged_all = 0;
+	uint8_t cmt_dac_flag;
+
+	/*
+	 * we take any chance we can to service our queues since we cannot
+	 * get awoken when the socket is read from :<
+	 */
+	/*
+	 * Now perform the actual SACK handling: 1) Verify that it is not an
+	 * old sack, if so discard. 2) If there is nothing left in the send
+	 * queue (cum-ack is equal to last acked) then you have a duplicate
+	 * too, update any rwnd change and verify no timers are running.
+	 * then return. 3) Process any new consequtive data i.e. cum-ack
+	 * moved process these first and note that it moved. 4) Process any
+	 * sack blocks. 5) Drop any acked from the queue. 6) Check for any
+	 * revoked blocks and mark. 7) Update the cwnd. 8) Nothing left,
+	 * sync up flightsizes and things, stop all timers and also check
+	 * for shutdown_pending state. If so then go ahead and send off the
+	 * shutdown. If in shutdown recv, send off the shutdown-ack and
+	 * start that timer, Ret. 9) Strike any non-acked things and do FR
+	 * procedure if needed being sure to set the FR flag. 10) Do pr-sctp
+	 * procedures. 11) Apply any FR penalties. 12) Assure we will SACK
+	 * if in shutdown_recv state.
+	 */
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	sack = &ch->sack;
+	/* CMT DAC algo */
+	this_sack_lowest_newack = 0;
+	j = 0;
+	sack_length = (unsigned int)sack_len;
+	/* ECN Nonce */
+	SCTP_STAT_INCR(sctps_slowpath_sack);
+	nonce_sum_flag = ch->ch.chunk_flags & SCTP_SACK_NONCE_SUM;
+	cum_ack = last_tsn = ntohl(sack->cum_tsn_ack);
+#ifdef SCTP_ASOCLOG_OF_TSNS
+	stcb->asoc.cumack_log[stcb->asoc.cumack_log_at] = cum_ack;
+	stcb->asoc.cumack_log_at++;
+	if (stcb->asoc.cumack_log_at > SCTP_TSN_LOG_SIZE) {
+		stcb->asoc.cumack_log_at = 0;
+	}
+#endif
+	num_seg = ntohs(sack->num_gap_ack_blks);
+	a_rwnd = rwnd;
+
+	if (sctp_logging_level & SCTP_LOG_SACK_ARRIVALS_ENABLE) {
+		sctp_misc_ints(SCTP_SACK_LOG_NORMAL, cum_ack,
+		    rwnd, stcb->asoc.last_acked_seq, stcb->asoc.peers_rwnd);
+	}
+	/* CMT DAC algo */
+	cmt_dac_flag = ch->ch.chunk_flags & SCTP_SACK_CMT_DAC;
+	num_dup = ntohs(sack->num_dup_tsns);
+
+	old_rwnd = stcb->asoc.peers_rwnd;
+	if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+		sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+		    stcb->asoc.overall_error_count,
+		    0,
+		    SCTP_FROM_SCTP_INDATA,
+		    __LINE__);
+	}
+	stcb->asoc.overall_error_count = 0;
+	asoc = &stcb->asoc;
+	if (sctp_logging_level & SCTP_SACK_LOGGING_ENABLE) {
+		sctp_log_sack(asoc->last_acked_seq,
+		    cum_ack,
+		    0,
+		    num_seg,
+		    num_dup,
+		    SCTP_LOG_NEW_SACK);
+	}
+	if ((num_dup) && (sctp_logging_level & (SCTP_FR_LOGGING_ENABLE | SCTP_EARLYFR_LOGGING_ENABLE))) {
+		int off_to_dup, iii;
+		uint32_t *dupdata, dblock;
+
+		off_to_dup = (num_seg * sizeof(struct sctp_gap_ack_block)) + sizeof(struct sctp_sack_chunk);
+		if ((off_to_dup + (num_dup * sizeof(uint32_t))) <= sack_length) {
+			dupdata = (uint32_t *) sctp_m_getptr(m, off_to_dup,
+			    sizeof(uint32_t), (uint8_t *) & dblock);
+			off_to_dup += sizeof(uint32_t);
+			if (dupdata) {
+				for (iii = 0; iii < num_dup; iii++) {
+					sctp_log_fr(*dupdata, 0, 0, SCTP_FR_DUPED);
+					dupdata = (uint32_t *) sctp_m_getptr(m, off_to_dup,
+					    sizeof(uint32_t), (uint8_t *) & dblock);
+					if (dupdata == NULL)
+						break;
+					off_to_dup += sizeof(uint32_t);
+				}
+			}
+		} else {
+			SCTP_PRINTF("Size invalid offset to dups:%d number dups:%d sack_len:%d num gaps:%d\n",
+			    off_to_dup, num_dup, sack_length, num_seg);
+		}
+	}
+	if (sctp_strict_sacks) {
+		/* reality check */
+		if (!TAILQ_EMPTY(&asoc->sent_queue)) {
+			tp1 = TAILQ_LAST(&asoc->sent_queue,
+			    sctpchunk_listhead);
+			send_s = tp1->rec.data.TSN_seq + 1;
+		} else {
+			send_s = asoc->sending_seq;
+		}
+		if (cum_ack == send_s ||
+		    compare_with_wrap(cum_ack, send_s, MAX_TSN)) {
+#ifndef INVARIANTS
+			struct mbuf *oper;
+
+#endif
+#ifdef INVARIANTS
+	hopeless_peer:
+			panic("Impossible sack 1");
+#else
+
+
+			/*
+			 * no way, we have not even sent this TSN out yet.
+			 * Peer is hopelessly messed up with us.
+			 */
+	hopeless_peer:
+			*abort_now = 1;
+			/* XXX */
+			oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+			    0, M_DONTWAIT, 1, MT_DATA);
+			if (oper) {
+				struct sctp_paramhdr *ph;
+				uint32_t *ippp;
+
+				SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
+				    sizeof(uint32_t);
+				ph = mtod(oper, struct sctp_paramhdr *);
+				ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+				ph->param_length = htons(SCTP_BUF_LEN(oper));
+				ippp = (uint32_t *) (ph + 1);
+				*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_25);
+			}
+			stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_25;
+			sctp_abort_an_association(stcb->sctp_ep, stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+			return;
+#endif
+		}
+	}
+	/**********************/
+	/* 1) check the range */
+	/**********************/
+	if (compare_with_wrap(asoc->last_acked_seq, last_tsn, MAX_TSN)) {
+		/* acking something behind */
+		return;
+	}
+	sav_cum_ack = asoc->last_acked_seq;
+
+	/* update the Rwnd of the peer */
+	if (TAILQ_EMPTY(&asoc->sent_queue) &&
+	    TAILQ_EMPTY(&asoc->send_queue) &&
+	    (asoc->stream_queue_cnt == 0)
+	    ) {
+		/* nothing left on send/sent and strmq */
+		if (sctp_logging_level & SCTP_LOG_RWND_ENABLE) {
+			sctp_log_rwnd_set(SCTP_SET_PEER_RWND_VIA_SACK,
+			    asoc->peers_rwnd, 0, 0, a_rwnd);
+		}
+		asoc->peers_rwnd = a_rwnd;
+		if (asoc->sent_queue_retran_cnt) {
+			asoc->sent_queue_retran_cnt = 0;
+		}
+		if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
+			/* SWS sender side engages */
+			asoc->peers_rwnd = 0;
+		}
+		/* stop any timers */
+		TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+			sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+			    stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_26);
+			if (sctp_early_fr) {
+				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+					SCTP_STAT_INCR(sctps_earlyfrstpidsck1);
+					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_26);
+				}
+			}
+			net->partial_bytes_acked = 0;
+			net->flight_size = 0;
+		}
+		asoc->total_flight = 0;
+		asoc->total_flight_count = 0;
+		return;
+	}
+	/*
+	 * We init netAckSz and netAckSz2 to 0. These are used to track 2
+	 * things. The total byte count acked is tracked in netAckSz AND
+	 * netAck2 is used to track the total bytes acked that are un-
+	 * amibguious and were never retransmitted. We track these on a per
+	 * destination address basis.
+	 */
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		net->prev_cwnd = net->cwnd;
+		net->net_ack = 0;
+		net->net_ack2 = 0;
+
+		/*
+		 * CMT: Reset CUC and Fast recovery algo variables before
+		 * SACK processing
+		 */
+		net->new_pseudo_cumack = 0;
+		net->will_exit_fast_recovery = 0;
+	}
+	/* process the new consecutive TSN first */
+	tp1 = TAILQ_FIRST(&asoc->sent_queue);
+	while (tp1) {
+		if (compare_with_wrap(last_tsn, tp1->rec.data.TSN_seq,
+		    MAX_TSN) ||
+		    last_tsn == tp1->rec.data.TSN_seq) {
+			if (tp1->sent != SCTP_DATAGRAM_UNSENT) {
+				/*
+				 * ECN Nonce: Add the nonce to the sender's
+				 * nonce sum
+				 */
+				asoc->nonce_sum_expect_base += tp1->rec.data.ect_nonce;
+				accum_moved = 1;
+				if (tp1->sent < SCTP_DATAGRAM_ACKED) {
+					/*
+					 * If it is less than ACKED, it is
+					 * now no-longer in flight. Higher
+					 * values may occur during marking
+					 */
+					if ((tp1->whoTo->dest_state &
+					    SCTP_ADDR_UNCONFIRMED) &&
+					    (tp1->snd_count < 2)) {
+						/*
+						 * If there was no retran
+						 * and the address is
+						 * un-confirmed and we sent
+						 * there and are now
+						 * sacked.. its confirmed,
+						 * mark it so.
+						 */
+						tp1->whoTo->dest_state &=
+						    ~SCTP_ADDR_UNCONFIRMED;
+					}
+					if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+						if (sctp_logging_level & SCTP_FLIGHT_LOGGING_ENABLE) {
+							sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_CA,
+							    tp1->whoTo->flight_size,
+							    tp1->book_size,
+							    (uintptr_t) tp1->whoTo,
+							    tp1->rec.data.TSN_seq);
+						}
+						sctp_flight_size_decrease(tp1);
+						sctp_total_flight_decrease(stcb, tp1);
+					}
+					tp1->whoTo->net_ack += tp1->send_size;
+
+					/* CMT SFR and DAC algos */
+					this_sack_lowest_newack = tp1->rec.data.TSN_seq;
+					tp1->whoTo->saw_newack = 1;
+
+					if (tp1->snd_count < 2) {
+						/*
+						 * True non-retransmited
+						 * chunk
+						 */
+						tp1->whoTo->net_ack2 +=
+						    tp1->send_size;
+
+						/* update RTO too? */
+						if (tp1->do_rtt) {
+							tp1->whoTo->RTO =
+							    sctp_calculate_rto(stcb,
+							    asoc, tp1->whoTo,
+							    &tp1->sent_rcv_time,
+							    sctp_align_safe_nocopy);
+							tp1->do_rtt = 0;
+						}
+					}
+					/*
+					 * CMT: CUCv2 algorithm. From the
+					 * cumack'd TSNs, for each TSN being
+					 * acked for the first time, set the
+					 * following variables for the
+					 * corresp destination.
+					 * new_pseudo_cumack will trigger a
+					 * cwnd update.
+					 * find_(rtx_)pseudo_cumack will
+					 * trigger search for the next
+					 * expected (rtx-)pseudo-cumack.
+					 */
+					tp1->whoTo->new_pseudo_cumack = 1;
+					tp1->whoTo->find_pseudo_cumack = 1;
+					tp1->whoTo->find_rtx_pseudo_cumack = 1;
+
+
+					if (sctp_logging_level & SCTP_SACK_LOGGING_ENABLE) {
+						sctp_log_sack(asoc->last_acked_seq,
+						    cum_ack,
+						    tp1->rec.data.TSN_seq,
+						    0,
+						    0,
+						    SCTP_LOG_TSN_ACKED);
+					}
+					if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+						sctp_log_cwnd(stcb, tp1->whoTo, tp1->rec.data.TSN_seq, SCTP_CWND_LOG_FROM_SACK);
+					}
+				}
+				if (tp1->sent == SCTP_DATAGRAM_RESEND) {
+					sctp_ucount_decr(asoc->sent_queue_retran_cnt);
+#ifdef SCTP_AUDITING_ENABLED
+					sctp_audit_log(0xB3,
+					    (asoc->sent_queue_retran_cnt & 0x000000ff));
+#endif
+				}
+				if (tp1->rec.data.chunk_was_revoked) {
+					/* deflate the cwnd */
+					tp1->whoTo->cwnd -= tp1->book_size;
+					tp1->rec.data.chunk_was_revoked = 0;
+				}
+				tp1->sent = SCTP_DATAGRAM_ACKED;
+			}
+		} else {
+			break;
+		}
+		tp1 = TAILQ_NEXT(tp1, sctp_next);
+	}
+	biggest_tsn_newly_acked = biggest_tsn_acked = last_tsn;
+	/* always set this up to cum-ack */
+	asoc->this_sack_highest_gap = last_tsn;
+
+	/* Move offset up to point to gaps/dups */
+	offset += sizeof(struct sctp_sack_chunk);
+	if (((num_seg * (sizeof(struct sctp_gap_ack_block))) + sizeof(struct sctp_sack_chunk)) > sack_length) {
+
+		/* skip corrupt segments */
+		goto skip_segments;
+	}
+	if (num_seg > 0) {
+
+		/*
+		 * CMT: SFR algo (and HTNA) - this_sack_highest_newack has
+		 * to be greater than the cumack. Also reset saw_newack to 0
+		 * for all dests.
+		 */
+		TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+			net->saw_newack = 0;
+			net->this_sack_highest_newack = last_tsn;
+		}
+
+		/*
+		 * thisSackHighestGap will increase while handling NEW
+		 * segments this_sack_highest_newack will increase while
+		 * handling NEWLY ACKED chunks. this_sack_lowest_newack is
+		 * used for CMT DAC algo. saw_newack will also change.
+		 */
+		sctp_handle_segments(m, &offset, stcb, asoc, ch, last_tsn,
+		    &biggest_tsn_acked, &biggest_tsn_newly_acked, &this_sack_lowest_newack,
+		    num_seg, &ecn_seg_sums);
+
+		if (sctp_strict_sacks) {
+			/*
+			 * validate the biggest_tsn_acked in the gap acks if
+			 * strict adherence is wanted.
+			 */
+			if ((biggest_tsn_acked == send_s) ||
+			    (compare_with_wrap(biggest_tsn_acked, send_s, MAX_TSN))) {
+				/*
+				 * peer is either confused or we are under
+				 * attack. We must abort.
+				 */
+				goto hopeless_peer;
+			}
+		}
+	}
+skip_segments:
+	/*******************************************/
+	/* cancel ALL T3-send timer if accum moved */
+	/*******************************************/
+	if (sctp_cmt_on_off) {
+		TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+			if (net->new_pseudo_cumack)
+				sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+				    stcb, net,
+				    SCTP_FROM_SCTP_INDATA + SCTP_LOC_27);
+
+		}
+	} else {
+		if (accum_moved) {
+			TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+				sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+				    stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_28);
+			}
+		}
+	}
+	/********************************************/
+	/* drop the acked chunks from the sendqueue */
+	/********************************************/
+	asoc->last_acked_seq = cum_ack;
+
+	tp1 = TAILQ_FIRST(&asoc->sent_queue);
+	if (tp1 == NULL)
+		goto done_with_it;
+	do {
+		if (compare_with_wrap(tp1->rec.data.TSN_seq, cum_ack,
+		    MAX_TSN)) {
+			break;
+		}
+		if (tp1->sent == SCTP_DATAGRAM_UNSENT) {
+			/* no more sent on list */
+			printf("Warning, tp1->sent == %d and its now acked?\n",
+			    tp1->sent);
+		}
+		tp2 = TAILQ_NEXT(tp1, sctp_next);
+		TAILQ_REMOVE(&asoc->sent_queue, tp1, sctp_next);
+		if (tp1->pr_sctp_on) {
+			if (asoc->pr_sctp_cnt != 0)
+				asoc->pr_sctp_cnt--;
+		}
+		if ((TAILQ_FIRST(&asoc->sent_queue) == NULL) &&
+		    (asoc->total_flight > 0)) {
+#ifdef INVARIANTS
+			panic("Warning flight size is postive and should be 0");
+#else
+			SCTP_PRINTF("Warning flight size incorrect should be 0 is %d\n",
+			    asoc->total_flight);
+#endif
+			asoc->total_flight = 0;
+		}
+		if (tp1->data) {
+			/* sa_ignore NO_NULL_CHK */
+			sctp_free_bufspace(stcb, asoc, tp1, 1);
+			sctp_m_freem(tp1->data);
+			if (PR_SCTP_BUF_ENABLED(tp1->flags)) {
+				asoc->sent_queue_cnt_removeable--;
+			}
+		}
+		if (sctp_logging_level & SCTP_SACK_LOGGING_ENABLE) {
+			sctp_log_sack(asoc->last_acked_seq,
+			    cum_ack,
+			    tp1->rec.data.TSN_seq,
+			    0,
+			    0,
+			    SCTP_LOG_FREE_SENT);
+		}
+		tp1->data = NULL;
+		asoc->sent_queue_cnt--;
+		sctp_free_a_chunk(stcb, tp1);
+		wake_him++;
+		tp1 = tp2;
+	} while (tp1 != NULL);
+
+done_with_it:
+	/* sa_ignore NO_NULL_CHK */
+	if ((wake_him) && (stcb->sctp_socket)) {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		struct socket *so;
+
+#endif
+		SOCKBUF_LOCK(&stcb->sctp_socket->so_snd);
+		if (sctp_logging_level & SCTP_WAKE_LOGGING_ENABLE) {
+			sctp_wakeup_log(stcb, cum_ack, wake_him, SCTP_WAKESND_FROM_SACK);
+		}
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		so = SCTP_INP_SO(stcb->sctp_ep);
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_SOCKET_LOCK(so, 1);
+		SCTP_TCB_LOCK(stcb);
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+		if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+			/* assoc was freed while we were unlocked */
+			SCTP_SOCKET_UNLOCK(so, 1);
+			return;
+		}
+#endif
+		sctp_sowwakeup_locked(stcb->sctp_ep, stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+	} else {
+		if (sctp_logging_level & SCTP_WAKE_LOGGING_ENABLE) {
+			sctp_wakeup_log(stcb, cum_ack, wake_him, SCTP_NOWAKE_FROM_SACK);
+		}
+	}
+
+	if (asoc->fast_retran_loss_recovery && accum_moved) {
+		if (compare_with_wrap(asoc->last_acked_seq,
+		    asoc->fast_recovery_tsn, MAX_TSN) ||
+		    asoc->last_acked_seq == asoc->fast_recovery_tsn) {
+			/* Setup so we will exit RFC2582 fast recovery */
+			will_exit_fast_recovery = 1;
+		}
+	}
+	/*
+	 * Check for revoked fragments:
+	 * 
+	 * if Previous sack - Had no frags then we can't have any revoked if
+	 * Previous sack - Had frag's then - If we now have frags aka
+	 * num_seg > 0 call sctp_check_for_revoked() to tell if peer revoked
+	 * some of them. else - The peer revoked all ACKED fragments, since
+	 * we had some before and now we have NONE.
+	 */
+
+	if (num_seg)
+		sctp_check_for_revoked(stcb, asoc, cum_ack, biggest_tsn_acked);
+	else if (asoc->saw_sack_with_frags) {
+		int cnt_revoked = 0;
+
+		tp1 = TAILQ_FIRST(&asoc->sent_queue);
+		if (tp1 != NULL) {
+			/* Peer revoked all dg's marked or acked */
+			TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) {
+				if ((tp1->sent > SCTP_DATAGRAM_RESEND) &&
+				    (tp1->sent < SCTP_FORWARD_TSN_SKIP)) {
+					tp1->sent = SCTP_DATAGRAM_SENT;
+					if (sctp_logging_level & SCTP_FLIGHT_LOGGING_ENABLE) {
+						sctp_misc_ints(SCTP_FLIGHT_LOG_UP_REVOKE,
+						    tp1->whoTo->flight_size,
+						    tp1->book_size,
+						    (uintptr_t) tp1->whoTo,
+						    tp1->rec.data.TSN_seq);
+					}
+					sctp_flight_size_increase(tp1);
+					sctp_total_flight_increase(stcb, tp1);
+					tp1->rec.data.chunk_was_revoked = 1;
+					/*
+					 * To ensure that this increase in
+					 * flightsize, which is artificial,
+					 * does not throttle the sender, we
+					 * also increase the cwnd
+					 * artificially.
+					 */
+					tp1->whoTo->cwnd += tp1->book_size;
+					cnt_revoked++;
+				}
+			}
+			if (cnt_revoked) {
+				reneged_all = 1;
+			}
+		}
+		asoc->saw_sack_with_frags = 0;
+	}
+	if (num_seg)
+		asoc->saw_sack_with_frags = 1;
+	else
+		asoc->saw_sack_with_frags = 0;
+
+	/* JRS - Use the congestion control given in the CC module */
+	asoc->cc_functions.sctp_cwnd_update_after_sack(stcb, asoc, accum_moved, reneged_all, will_exit_fast_recovery);
+
+	if (TAILQ_EMPTY(&asoc->sent_queue)) {
+		/* nothing left in-flight */
+		TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+			/* stop all timers */
+			if (sctp_early_fr) {
+				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+					SCTP_STAT_INCR(sctps_earlyfrstpidsck4);
+					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_29);
+				}
+			}
+			sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+			    stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_30);
+			net->flight_size = 0;
+			net->partial_bytes_acked = 0;
+		}
+		asoc->total_flight = 0;
+		asoc->total_flight_count = 0;
+	}
+	/**********************************/
+	/* Now what about shutdown issues */
+	/**********************************/
+	if (TAILQ_EMPTY(&asoc->send_queue) && TAILQ_EMPTY(&asoc->sent_queue)) {
+		/* nothing left on sendqueue.. consider done */
+		if (sctp_logging_level & SCTP_LOG_RWND_ENABLE) {
+			sctp_log_rwnd_set(SCTP_SET_PEER_RWND_VIA_SACK,
+			    asoc->peers_rwnd, 0, 0, a_rwnd);
+		}
+		asoc->peers_rwnd = a_rwnd;
+		if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
+			/* SWS sender side engages */
+			asoc->peers_rwnd = 0;
+		}
+		/* clean up */
+		if ((asoc->stream_queue_cnt == 1) &&
+		    ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) ||
+		    (asoc->state & SCTP_STATE_SHUTDOWN_RECEIVED)) &&
+		    (asoc->locked_on_sending)
+		    ) {
+			struct sctp_stream_queue_pending *sp;
+
+			/*
+			 * I may be in a state where we got all across.. but
+			 * cannot write more due to a shutdown... we abort
+			 * since the user did not indicate EOR in this case.
+			 */
+			sp = TAILQ_LAST(&((asoc->locked_on_sending)->outqueue),
+			    sctp_streamhead);
+			if ((sp) && (sp->length == 0)) {
+				asoc->locked_on_sending = NULL;
+				if (sp->msg_is_complete) {
+					asoc->stream_queue_cnt--;
+				} else {
+					asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+					asoc->stream_queue_cnt--;
+				}
+			}
+		}
+		if ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) &&
+		    (asoc->stream_queue_cnt == 0)) {
+			if (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT) {
+				/* Need to abort here */
+				struct mbuf *oper;
+
+		abort_out_now:
+				*abort_now = 1;
+				/* XXX */
+				oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+				    0, M_DONTWAIT, 1, MT_DATA);
+				if (oper) {
+					struct sctp_paramhdr *ph;
+					uint32_t *ippp;
+
+					SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
+					    sizeof(uint32_t);
+					ph = mtod(oper, struct sctp_paramhdr *);
+					ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
+					ph->param_length = htons(SCTP_BUF_LEN(oper));
+					ippp = (uint32_t *) (ph + 1);
+					*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_31);
+				}
+				stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_31;
+				sctp_abort_an_association(stcb->sctp_ep, stcb, SCTP_RESPONSE_TO_USER_REQ, oper, SCTP_SO_NOT_LOCKED);
+				return;
+			} else {
+				if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+				    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+					SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+				}
+				SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+				SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+				sctp_stop_timers_for_shutdown(stcb);
+				sctp_send_shutdown(stcb,
+				    stcb->asoc.primary_destination);
+				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
+				    stcb->sctp_ep, stcb, asoc->primary_destination);
+				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+				    stcb->sctp_ep, stcb, asoc->primary_destination);
+			}
+			return;
+		} else if ((SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) &&
+		    (asoc->stream_queue_cnt == 0)) {
+			if (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT) {
+				goto abort_out_now;
+			}
+			SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+			SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT);
+			SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+			sctp_send_shutdown_ack(stcb,
+			    stcb->asoc.primary_destination);
+
+			sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK,
+			    stcb->sctp_ep, stcb, asoc->primary_destination);
+			return;
+		}
+	}
+	/*
+	 * Now here we are going to recycle net_ack for a different use...
+	 * HEADS UP.
+	 */
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		net->net_ack = 0;
+	}
+
+	/*
+	 * CMT DAC algorithm: If SACK DAC flag was 0, then no extra marking
+	 * to be done. Setting this_sack_lowest_newack to the cum_ack will
+	 * automatically ensure that.
+	 */
+	if (sctp_cmt_on_off && sctp_cmt_use_dac && (cmt_dac_flag == 0)) {
+		this_sack_lowest_newack = cum_ack;
+	}
+	if (num_seg > 0) {
+		sctp_strike_gap_ack_chunks(stcb, asoc, biggest_tsn_acked,
+		    biggest_tsn_newly_acked, this_sack_lowest_newack, accum_moved);
+	}
+	/*********************************************/
+	/* Here we perform PR-SCTP procedures        */
+	/* (section 4.2)                             */
+	/*********************************************/
+	/* C1. update advancedPeerAckPoint */
+	if (compare_with_wrap(cum_ack, asoc->advanced_peer_ack_point, MAX_TSN)) {
+		asoc->advanced_peer_ack_point = cum_ack;
+	}
+	/* C2. try to further move advancedPeerAckPoint ahead */
+	if ((asoc->peer_supports_prsctp) && (asoc->pr_sctp_cnt > 0)) {
+		struct sctp_tmit_chunk *lchk;
+
+		lchk = sctp_try_advance_peer_ack_point(stcb, asoc);
+		/* C3. See if we need to send a Fwd-TSN */
+		if (compare_with_wrap(asoc->advanced_peer_ack_point, cum_ack,
+		    MAX_TSN)) {
+			/*
+			 * ISSUE with ECN, see FWD-TSN processing for notes
+			 * on issues that will occur when the ECN NONCE
+			 * stuff is put into SCTP for cross checking.
+			 */
+			send_forward_tsn(stcb, asoc);
+
+			/*
+			 * ECN Nonce: Disable Nonce Sum check when FWD TSN
+			 * is sent and store resync tsn
+			 */
+			asoc->nonce_sum_check = 0;
+			asoc->nonce_resync_tsn = asoc->advanced_peer_ack_point;
+			if (lchk) {
+				/* Assure a timer is up */
+				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
+				    stcb->sctp_ep, stcb, lchk->whoTo);
+			}
+		}
+	}
+	/* JRS - Use the congestion control given in the CC module */
+	asoc->cc_functions.sctp_cwnd_update_after_fr(stcb, asoc);
+
+	/******************************************************************
+	 *  Here we do the stuff with ECN Nonce checking.
+	 *  We basically check to see if the nonce sum flag was incorrect
+	 *  or if resynchronization needs to be done. Also if we catch a
+	 *  misbehaving receiver we give him the kick.
+	 ******************************************************************/
+
+	if (asoc->ecn_nonce_allowed) {
+		if (asoc->nonce_sum_check) {
+			if (nonce_sum_flag != ((asoc->nonce_sum_expect_base + ecn_seg_sums) & SCTP_SACK_NONCE_SUM)) {
+				if (asoc->nonce_wait_for_ecne == 0) {
+					struct sctp_tmit_chunk *lchk;
+
+					lchk = TAILQ_FIRST(&asoc->send_queue);
+					asoc->nonce_wait_for_ecne = 1;
+					if (lchk) {
+						asoc->nonce_wait_tsn = lchk->rec.data.TSN_seq;
+					} else {
+						asoc->nonce_wait_tsn = asoc->sending_seq;
+					}
+				} else {
+					if (compare_with_wrap(asoc->last_acked_seq, asoc->nonce_wait_tsn, MAX_TSN) ||
+					    (asoc->last_acked_seq == asoc->nonce_wait_tsn)) {
+						/*
+						 * Misbehaving peer. We need
+						 * to react to this guy
+						 */
+						asoc->ecn_allowed = 0;
+						asoc->ecn_nonce_allowed = 0;
+					}
+				}
+			}
+		} else {
+			/* See if Resynchronization Possible */
+			if (compare_with_wrap(asoc->last_acked_seq, asoc->nonce_resync_tsn, MAX_TSN)) {
+				asoc->nonce_sum_check = 1;
+				/*
+				 * now we must calculate what the base is.
+				 * We do this based on two things, we know
+				 * the total's for all the segments
+				 * gap-acked in the SACK, its stored in
+				 * ecn_seg_sums. We also know the SACK's
+				 * nonce sum, its in nonce_sum_flag. So we
+				 * can build a truth table to back-calculate
+				 * the new value of
+				 * asoc->nonce_sum_expect_base:
+				 * 
+				 * SACK-flag-Value         Seg-Sums Base 0 0 0
+				 * 1                    0 1 0 1 1 1
+				 * 1 0
+				 */
+				asoc->nonce_sum_expect_base = (ecn_seg_sums ^ nonce_sum_flag) & SCTP_SACK_NONCE_SUM;
+			}
+		}
+	}
+	/* Now are we exiting loss recovery ? */
+	if (will_exit_fast_recovery) {
+		/* Ok, we must exit fast recovery */
+		asoc->fast_retran_loss_recovery = 0;
+	}
+	if ((asoc->sat_t3_loss_recovery) &&
+	    ((compare_with_wrap(asoc->last_acked_seq, asoc->sat_t3_recovery_tsn,
+	    MAX_TSN) ||
+	    (asoc->last_acked_seq == asoc->sat_t3_recovery_tsn)))) {
+		/* end satellite t3 loss recovery */
+		asoc->sat_t3_loss_recovery = 0;
+	}
+	/*
+	 * CMT Fast recovery
+	 */
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		if (net->will_exit_fast_recovery) {
+			/* Ok, we must exit fast recovery */
+			net->fast_retran_loss_recovery = 0;
+		}
+	}
+
+	/* Adjust and set the new rwnd value */
+	if (sctp_logging_level & SCTP_LOG_RWND_ENABLE) {
+		sctp_log_rwnd_set(SCTP_SET_PEER_RWND_VIA_SACK,
+		    asoc->peers_rwnd, asoc->total_flight, (asoc->sent_queue_cnt * sctp_peer_chunk_oh), a_rwnd);
+	}
+	asoc->peers_rwnd = sctp_sbspace_sub(a_rwnd,
+	    (uint32_t) (asoc->total_flight + (asoc->sent_queue_cnt * sctp_peer_chunk_oh)));
+	if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
+		/* SWS sender side engages */
+		asoc->peers_rwnd = 0;
+	}
+	if (asoc->peers_rwnd > old_rwnd) {
+		win_probe_recovery = 1;
+	}
+	/*
+	 * Now we must setup so we have a timer up for anyone with
+	 * outstanding data.
+	 */
+	done_once = 0;
+again:
+	j = 0;
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		if (win_probe_recovery && (net->window_probe)) {
+			net->window_probe = 0;
+			win_probe_recovered = 1;
+			/*-
+			 * Find first chunk that was used with
+			 * window probe and clear the event. Put
+			 * it back into the send queue as if has
+			 * not been sent.
+			 */
+			TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) {
+				if (tp1->window_probe) {
+					sctp_window_probe_recovery(stcb, asoc, net, tp1);
+					break;
+				}
+			}
+		}
+		if (net->flight_size) {
+			j++;
+			sctp_timer_start(SCTP_TIMER_TYPE_SEND,
+			    stcb->sctp_ep, stcb, net);
+		} else {
+			if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
+				sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+				    stcb, net,
+				    SCTP_FROM_SCTP_INDATA + SCTP_LOC_22);
+			}
+			if (sctp_early_fr) {
+				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+					SCTP_STAT_INCR(sctps_earlyfrstpidsck4);
+					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_23);
+				}
+			}
+		}
+	}
+	if ((j == 0) &&
+	    (!TAILQ_EMPTY(&asoc->sent_queue)) &&
+	    (asoc->sent_queue_retran_cnt == 0) &&
+	    (win_probe_recovered == 0) &&
+	    (done_once == 0)) {
+		/* huh, this should not happen */
+		sctp_fs_audit(asoc);
+		TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+			net->flight_size = 0;
+		}
+		asoc->total_flight = 0;
+		asoc->total_flight_count = 0;
+		asoc->sent_queue_retran_cnt = 0;
+		TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) {
+			if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+				sctp_flight_size_increase(tp1);
+				sctp_total_flight_increase(stcb, tp1);
+			} else if (tp1->sent == SCTP_DATAGRAM_RESEND) {
+				asoc->sent_queue_retran_cnt++;
+			}
+		}
+		done_once = 1;
+		goto again;
+	}
+	if (sctp_logging_level & SCTP_SACK_RWND_LOGGING_ENABLE) {
+		sctp_misc_ints(SCTP_SACK_RWND_UPDATE,
+		    a_rwnd,
+		    stcb->asoc.peers_rwnd,
+		    stcb->asoc.total_flight,
+		    stcb->asoc.total_output_queue_size);
+	}
+}
+
+void
+sctp_update_acked(struct sctp_tcb *stcb, struct sctp_shutdown_chunk *cp,
+    struct sctp_nets *netp, int *abort_flag)
+{
+	/* Copy cum-ack */
+	uint32_t cum_ack, a_rwnd;
+
+	cum_ack = ntohl(cp->cumulative_tsn_ack);
+	/* Arrange so a_rwnd does NOT change */
+	a_rwnd = stcb->asoc.peers_rwnd + stcb->asoc.total_flight;
+
+	/* Now call the express sack handling */
+	sctp_express_handle_sack(stcb, cum_ack, a_rwnd, 0, abort_flag);
+}
+
+static void
+sctp_kick_prsctp_reorder_queue(struct sctp_tcb *stcb,
+    struct sctp_stream_in *strmin)
+{
+	struct sctp_queued_to_read *ctl, *nctl;
+	struct sctp_association *asoc;
+	int tt;
+
+	asoc = &stcb->asoc;
+	tt = strmin->last_sequence_delivered;
+	/*
+	 * First deliver anything prior to and including the stream no that
+	 * came in
+	 */
+	ctl = TAILQ_FIRST(&strmin->inqueue);
+	while (ctl) {
+		nctl = TAILQ_NEXT(ctl, next);
+		if (compare_with_wrap(tt, ctl->sinfo_ssn, MAX_SEQ) ||
+		    (tt == ctl->sinfo_ssn)) {
+			/* this is deliverable now */
+			TAILQ_REMOVE(&strmin->inqueue, ctl, next);
+			/* subtract pending on streams */
+			asoc->size_on_all_streams -= ctl->length;
+			sctp_ucount_decr(asoc->cnt_on_all_streams);
+			/* deliver it to at least the delivery-q */
+			if (stcb->sctp_socket) {
+				sctp_add_to_readq(stcb->sctp_ep, stcb,
+				    ctl,
+				    &stcb->sctp_socket->so_rcv, 1, SCTP_SO_NOT_LOCKED);
+			}
+		} else {
+			/* no more delivery now. */
+			break;
+		}
+		ctl = nctl;
+	}
+	/*
+	 * now we must deliver things in queue the normal way  if any are
+	 * now ready.
+	 */
+	tt = strmin->last_sequence_delivered + 1;
+	ctl = TAILQ_FIRST(&strmin->inqueue);
+	while (ctl) {
+		nctl = TAILQ_NEXT(ctl, next);
+		if (tt == ctl->sinfo_ssn) {
+			/* this is deliverable now */
+			TAILQ_REMOVE(&strmin->inqueue, ctl, next);
+			/* subtract pending on streams */
+			asoc->size_on_all_streams -= ctl->length;
+			sctp_ucount_decr(asoc->cnt_on_all_streams);
+			/* deliver it to at least the delivery-q */
+			strmin->last_sequence_delivered = ctl->sinfo_ssn;
+			if (stcb->sctp_socket) {
+				sctp_add_to_readq(stcb->sctp_ep, stcb,
+				    ctl,
+				    &stcb->sctp_socket->so_rcv, 1, SCTP_SO_NOT_LOCKED);
+			}
+			tt = strmin->last_sequence_delivered + 1;
+		} else {
+			break;
+		}
+		ctl = nctl;
+	}
+}
+
+void
+sctp_handle_forward_tsn(struct sctp_tcb *stcb,
+    struct sctp_forward_tsn_chunk *fwd, int *abort_flag, struct mbuf *m, int offset)
+{
+	/*
+	 * ISSUES that MUST be fixed for ECN! When we are the sender of the
+	 * forward TSN, when the SACK comes back that acknowledges the
+	 * FWD-TSN we must reset the NONCE sum to match correctly. This will
+	 * get quite tricky since we may have sent more data interveneing
+	 * and must carefully account for what the SACK says on the nonce
+	 * and any gaps that are reported. This work will NOT be done here,
+	 * but I note it here since it is really related to PR-SCTP and
+	 * FWD-TSN's
+	 */
+
+	/* The pr-sctp fwd tsn */
+	/*
+	 * here we will perform all the data receiver side steps for
+	 * processing FwdTSN, as required in by pr-sctp draft:
+	 * 
+	 * Assume we get FwdTSN(x):
+	 * 
+	 * 1) update local cumTSN to x 2) try to further advance cumTSN to x +
+	 * others we have 3) examine and update re-ordering queue on
+	 * pr-in-streams 4) clean up re-assembly queue 5) Send a sack to
+	 * report where we are.
+	 */
+	struct sctp_association *asoc;
+	uint32_t new_cum_tsn, gap;
+	unsigned int i, cnt_gone, fwd_sz, cumack_set_flag, m_size;
+	struct sctp_stream_in *strm;
+	struct sctp_tmit_chunk *chk, *at;
+
+	cumack_set_flag = 0;
+	asoc = &stcb->asoc;
+	cnt_gone = 0;
+	if ((fwd_sz = ntohs(fwd->ch.chunk_length)) < sizeof(struct sctp_forward_tsn_chunk)) {
+		SCTPDBG(SCTP_DEBUG_INDATA1,
+		    "Bad size too small/big fwd-tsn\n");
+		return;
+	}
+	m_size = (stcb->asoc.mapping_array_size << 3);
+	/*************************************************************/
+	/* 1. Here we update local cumTSN and shift the bitmap array */
+	/*************************************************************/
+	new_cum_tsn = ntohl(fwd->new_cumulative_tsn);
+
+	if (compare_with_wrap(asoc->cumulative_tsn, new_cum_tsn, MAX_TSN) ||
+	    asoc->cumulative_tsn == new_cum_tsn) {
+		/* Already got there ... */
+		return;
+	}
+	if (compare_with_wrap(new_cum_tsn, asoc->highest_tsn_inside_map,
+	    MAX_TSN)) {
+		asoc->highest_tsn_inside_map = new_cum_tsn;
+		if (sctp_logging_level & SCTP_MAP_LOGGING_ENABLE) {
+			sctp_log_map(0, 0, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT);
+		}
+	}
+	/*
+	 * now we know the new TSN is more advanced, let's find the actual
+	 * gap
+	 */
+	if ((compare_with_wrap(new_cum_tsn, asoc->mapping_array_base_tsn,
+	    MAX_TSN)) ||
+	    (new_cum_tsn == asoc->mapping_array_base_tsn)) {
+		gap = new_cum_tsn - asoc->mapping_array_base_tsn;
+	} else {
+		/* try to prevent underflow here */
+		gap = new_cum_tsn + (MAX_TSN - asoc->mapping_array_base_tsn) + 1;
+	}
+
+	if (gap >= m_size) {
+		if (sctp_logging_level & SCTP_MAP_LOGGING_ENABLE) {
+			sctp_log_map(0, 0, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT);
+		}
+		if ((long)gap > sctp_sbspace(&stcb->asoc, &stcb->sctp_socket->so_rcv)) {
+			struct mbuf *oper;
+
+			/*
+			 * out of range (of single byte chunks in the rwnd I
+			 * give out). This must be an attacker.
+			 */
+			*abort_flag = 1;
+			oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+			    0, M_DONTWAIT, 1, MT_DATA);
+			if (oper) {
+				struct sctp_paramhdr *ph;
+				uint32_t *ippp;
+
+				SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
+				    (sizeof(uint32_t) * 3);
+				ph = mtod(oper, struct sctp_paramhdr *);
+				ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+				ph->param_length = htons(SCTP_BUF_LEN(oper));
+				ippp = (uint32_t *) (ph + 1);
+				*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_33);
+				ippp++;
+				*ippp = asoc->highest_tsn_inside_map;
+				ippp++;
+				*ippp = new_cum_tsn;
+			}
+			stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_33;
+			sctp_abort_an_association(stcb->sctp_ep, stcb,
+			    SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+			return;
+		}
+		SCTP_STAT_INCR(sctps_fwdtsn_map_over);
+slide_out:
+		memset(stcb->asoc.mapping_array, 0, stcb->asoc.mapping_array_size);
+		cumack_set_flag = 1;
+		asoc->mapping_array_base_tsn = new_cum_tsn + 1;
+		asoc->cumulative_tsn = asoc->highest_tsn_inside_map = new_cum_tsn;
+
+		if (sctp_logging_level & SCTP_MAP_LOGGING_ENABLE) {
+			sctp_log_map(0, 3, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT);
+		}
+		asoc->last_echo_tsn = asoc->highest_tsn_inside_map;
+	} else {
+		SCTP_TCB_LOCK_ASSERT(stcb);
+		if ((compare_with_wrap(((uint32_t) asoc->cumulative_tsn + gap), asoc->highest_tsn_inside_map, MAX_TSN)) ||
+		    (((uint32_t) asoc->cumulative_tsn + gap) == asoc->highest_tsn_inside_map)) {
+			goto slide_out;
+		} else {
+			for (i = 0; i <= gap; i++) {
+				SCTP_SET_TSN_PRESENT(asoc->mapping_array, i);
+			}
+		}
+		/*
+		 * Now after marking all, slide thing forward but no sack
+		 * please.
+		 */
+		sctp_sack_check(stcb, 0, 0, abort_flag);
+		if (*abort_flag)
+			return;
+	}
+
+	/*************************************************************/
+	/* 2. Clear up re-assembly queue                             */
+	/*************************************************************/
+	/*
+	 * First service it if pd-api is up, just in case we can progress it
+	 * forward
+	 */
+	if (asoc->fragmented_delivery_inprogress) {
+		sctp_service_reassembly(stcb, asoc);
+	}
+	if (!TAILQ_EMPTY(&asoc->reasmqueue)) {
+		/* For each one on here see if we need to toss it */
+		/*
+		 * For now large messages held on the reasmqueue that are
+		 * complete will be tossed too. We could in theory do more
+		 * work to spin through and stop after dumping one msg aka
+		 * seeing the start of a new msg at the head, and call the
+		 * delivery function... to see if it can be delivered... But
+		 * for now we just dump everything on the queue.
+		 */
+		chk = TAILQ_FIRST(&asoc->reasmqueue);
+		while (chk) {
+			at = TAILQ_NEXT(chk, sctp_next);
+			if (compare_with_wrap(asoc->cumulative_tsn,
+			    chk->rec.data.TSN_seq, MAX_TSN) ||
+			    asoc->cumulative_tsn == chk->rec.data.TSN_seq) {
+				/* It needs to be tossed */
+				TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next);
+				if (compare_with_wrap(chk->rec.data.TSN_seq,
+				    asoc->tsn_last_delivered, MAX_TSN)) {
+					asoc->tsn_last_delivered =
+					    chk->rec.data.TSN_seq;
+					asoc->str_of_pdapi =
+					    chk->rec.data.stream_number;
+					asoc->ssn_of_pdapi =
+					    chk->rec.data.stream_seq;
+					asoc->fragment_flags =
+					    chk->rec.data.rcv_flags;
+				}
+				asoc->size_on_reasm_queue -= chk->send_size;
+				sctp_ucount_decr(asoc->cnt_on_reasm_queue);
+				cnt_gone++;
+
+				/* Clear up any stream problem */
+				if ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) !=
+				    SCTP_DATA_UNORDERED &&
+				    (compare_with_wrap(chk->rec.data.stream_seq,
+				    asoc->strmin[chk->rec.data.stream_number].last_sequence_delivered,
+				    MAX_SEQ))) {
+					/*
+					 * We must dump forward this streams
+					 * sequence number if the chunk is
+					 * not unordered that is being
+					 * skipped. There is a chance that
+					 * if the peer does not include the
+					 * last fragment in its FWD-TSN we
+					 * WILL have a problem here since
+					 * you would have a partial chunk in
+					 * queue that may not be
+					 * deliverable. Also if a Partial
+					 * delivery API as started the user
+					 * may get a partial chunk. The next
+					 * read returning a new chunk...
+					 * really ugly but I see no way
+					 * around it! Maybe a notify??
+					 */
+					asoc->strmin[chk->rec.data.stream_number].last_sequence_delivered =
+					    chk->rec.data.stream_seq;
+				}
+				if (chk->data) {
+					sctp_m_freem(chk->data);
+					chk->data = NULL;
+				}
+				sctp_free_a_chunk(stcb, chk);
+			} else {
+				/*
+				 * Ok we have gone beyond the end of the
+				 * fwd-tsn's mark. Some checks...
+				 */
+				if ((asoc->fragmented_delivery_inprogress) &&
+				    (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG)) {
+					uint32_t str_seq;
+
+					/*
+					 * Special case PD-API is up and
+					 * what we fwd-tsn' over includes
+					 * one that had the LAST_FRAG. We no
+					 * longer need to do the PD-API.
+					 */
+					asoc->fragmented_delivery_inprogress = 0;
+
+					str_seq = (asoc->str_of_pdapi << 16) | asoc->ssn_of_pdapi;
+					sctp_ulp_notify(SCTP_NOTIFY_PARTIAL_DELVIERY_INDICATION,
+					    stcb, SCTP_PARTIAL_DELIVERY_ABORTED, (void *)&str_seq, SCTP_SO_NOT_LOCKED);
+
+				}
+				break;
+			}
+			chk = at;
+		}
+	}
+	if (asoc->fragmented_delivery_inprogress) {
+		/*
+		 * Ok we removed cnt_gone chunks in the PD-API queue that
+		 * were being delivered. So now we must turn off the flag.
+		 */
+		uint32_t str_seq;
+
+		str_seq = (asoc->str_of_pdapi << 16) | asoc->ssn_of_pdapi;
+		sctp_ulp_notify(SCTP_NOTIFY_PARTIAL_DELVIERY_INDICATION,
+		    stcb, SCTP_PARTIAL_DELIVERY_ABORTED, (void *)&str_seq, SCTP_SO_NOT_LOCKED);
+		asoc->fragmented_delivery_inprogress = 0;
+	}
+	/*************************************************************/
+	/* 3. Update the PR-stream re-ordering queues                */
+	/*************************************************************/
+	fwd_sz -= sizeof(*fwd);
+	if (m && fwd_sz) {
+		/* New method. */
+		unsigned int num_str;
+		struct sctp_strseq *stseq, strseqbuf;
+
+		offset += sizeof(*fwd);
+
+		num_str = fwd_sz / sizeof(struct sctp_strseq);
+		for (i = 0; i < num_str; i++) {
+			uint16_t st;
+
+			stseq = (struct sctp_strseq *)sctp_m_getptr(m, offset,
+			    sizeof(struct sctp_strseq),
+			    (uint8_t *) & strseqbuf);
+			offset += sizeof(struct sctp_strseq);
+			if (stseq == NULL) {
+				break;
+			}
+			/* Convert */
+			st = ntohs(stseq->stream);
+			stseq->stream = st;
+			st = ntohs(stseq->sequence);
+			stseq->sequence = st;
+			/* now process */
+			if (stseq->stream >= asoc->streamincnt) {
+				/* screwed up streams, stop!  */
+				break;
+			}
+			strm = &asoc->strmin[stseq->stream];
+			if (compare_with_wrap(stseq->sequence,
+			    strm->last_sequence_delivered, MAX_SEQ)) {
+				/* Update the sequence number */
+				strm->last_sequence_delivered =
+				    stseq->sequence;
+			}
+			/* now kick the stream the new way */
+			/* sa_ignore NO_NULL_CHK */
+			sctp_kick_prsctp_reorder_queue(stcb, strm);
+		}
+	}
+	if (TAILQ_FIRST(&asoc->reasmqueue)) {
+		/* now lets kick out and check for more fragmented delivery */
+		/* sa_ignore NO_NULL_CHK */
+		sctp_deliver_reasm_check(stcb, &stcb->asoc);
+	}
+}
Index: in_pcb.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/in_pcb.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/in_pcb.h -L sys/netinet/in_pcb.h -u -r1.1.1.1 -r1.2
--- sys/netinet/in_pcb.h
+++ sys/netinet/in_pcb.h
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)in_pcb.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/in_pcb.h,v 1.80.2.2 2005/10/02 15:45:47 andre Exp $
+ * $FreeBSD: src/sys/netinet/in_pcb.h,v 1.100.2.1 2007/12/07 05:46:08 kmacy Exp $
  */
 
 #ifndef _NETINET_IN_PCB_H_
@@ -44,11 +44,12 @@
 struct inpcbpolicy;
 
 /*
- * Common structure pcb for internet protocol implementation.
- * Here are stored pointers to local and foreign host table
- * entries, local and foreign socket numbers, and pointers
- * up (to a socket structure) and down (to a protocol-specific)
- * control block.
+ * Struct inpcb is the ommon structure pcb for the Internet Protocol
+ * implementation.
+ *
+ * Pointers to local and foreign host table entries, local and foreign socket
+ * numbers, and pointers up (to a socket structure) and down (to a
+ * protocol-specific control block) are stored here.
  */
 LIST_HEAD(inpcbhead, inpcb);
 LIST_HEAD(inpcbporthead, inpcbport);
@@ -56,8 +57,8 @@
 
 /*
  * PCB with AF_INET6 null bind'ed laddr can receive AF_INET input packet.
- * So, AF_INET6 null laddr is also used as AF_INET null laddr,
- * by utilize following structure. (At last, same as INRIA)
+ * So, AF_INET6 null laddr is also used as AF_INET null laddr, by utilizing
+ * the following structure.
  */
 struct in_addr_4in6 {
 	u_int32_t	ia46_pad32[3];
@@ -65,8 +66,8 @@
 };
 
 /*
- * NOTE: ipv6 addrs should be 64-bit aligned, per RFC 2553.
- * in_conninfo has some extra padding to accomplish this.
+ * NOTE: ipv6 addrs should be 64-bit aligned, per RFC 2553.  in_conninfo has
+ * some extra padding to accomplish this.
  */
 struct in_endpoints {
 	u_int16_t	ie_fport;		/* foreign port */
@@ -89,8 +90,8 @@
 };
 
 /*
- * XXX
- * the defines for inc_* are hacks and should be changed to direct references
+ * XXX The defines for inc_* are hacks and should be changed to direct
+ * references.
  */
 struct in_conninfo {
 	u_int8_t	inc_flags;
@@ -110,39 +111,44 @@
 struct	icmp6_filter;
 
 struct inpcb {
-	LIST_ENTRY(inpcb) inp_hash; /* hash list */
-	LIST_ENTRY(inpcb) inp_list; /* list for all PCBs of this proto */
-	u_int32_t	inp_flow;
-
-	/* local and foreign ports, local and foreign addr */
-	struct	in_conninfo inp_inc;
-
-	caddr_t	inp_ppcb;		/* pointer to per-protocol pcb */
+	LIST_ENTRY(inpcb) inp_hash;	/* hash list */
+	LIST_ENTRY(inpcb) inp_list;	/* list for all PCBs of this proto */
+	void	*inp_ppcb;		/* pointer to per-protocol pcb */
 	struct	inpcbinfo *inp_pcbinfo;	/* PCB list info */
 	struct	socket *inp_socket;	/* back pointer to socket */
-					/* list for this PCB's local port */
-	struct	label *inp_label;	/* MAC label */
+
+	u_int32_t	inp_flow;
 	int	inp_flags;		/* generic IP/datagram flags */
 
-	struct	inpcbpolicy *inp_sp; /* for IPSEC */
 	u_char	inp_vflag;		/* IP version flag (v4/v6) */
 #define	INP_IPV4	0x1
 #define	INP_IPV6	0x2
-#define INP_IPV6PROTO	0x4		/* opened under IPv6 protocol */
-#define INP_TIMEWAIT	0x8		/* .. probably doesn't go here */
+#define	INP_IPV6PROTO	0x4		/* opened under IPv6 protocol */
+#define	INP_TIMEWAIT	0x8		/* .. probably doesn't go here */
 #define	INP_ONESBCAST	0x10		/* send all-ones broadcast */
+#define	INP_DROPPED	0x20		/* protocol drop flag */
+#define	INP_SOCKREF	0x40		/* strong socket reference */
 	u_char	inp_ip_ttl;		/* time to live proto */
 	u_char	inp_ip_p;		/* protocol proto */
 	u_char	inp_ip_minttl;		/* minimum TTL or drop */
+	uint32_t inp_ispare1;		/* connection id / queue id */
+	void	*inp_pspare[2];		/* rtentry / general use */
 
-	/* protocol dependent part; options */
+	/* Local and foreign ports, local and foreign addr. */
+	struct	in_conninfo inp_inc;
+
+					/* list for this PCB's local port */
+	struct	label *inp_label;	/* MAC label */
+	struct	inpcbpolicy *inp_sp;    /* for IPSEC */
+
+	/* Protocol-dependent part; options. */
 	struct {
 		u_char	inp4_ip_tos;		/* type of service proto */
 		struct	mbuf *inp4_options;	/* IP options */
 		struct	ip_moptions *inp4_moptions; /* IP multicast options */
 	} inp_depend4;
-#define inp_fport	inp_inc.inc_fport
-#define inp_lport	inp_inc.inc_lport
+#define	inp_fport	inp_inc.inc_fport
+#define	inp_lport	inp_inc.inc_lport
 #define	inp_faddr	inp_inc.inc_faddr
 #define	inp_laddr	inp_inc.inc_laddr
 #define	inp_ip_tos	inp_depend4.inp4_ip_tos
@@ -159,17 +165,16 @@
 		struct	icmp6_filter *inp6_icmp6filt;
 		/* IPV6_CHECKSUM setsockopt */
 		int	inp6_cksum;
-		u_short	inp6_ifindex;
 		short	inp6_hops;
 	} inp_depend6;
 	LIST_ENTRY(inpcb) inp_portlist;
 	struct	inpcbport *inp_phd;	/* head of this list */
+#define inp_zero_size offsetof(struct inpcb, inp_gencnt)
 	inp_gen_t	inp_gencnt;	/* generation count of this instance */
 	struct mtx	inp_mtx;
 
 #define	in6p_faddr	inp_inc.inc6_faddr
 #define	in6p_laddr	inp_inc.inc6_laddr
-#define	in6p_ip6_hlim	inp_depend6.inp6_hlim
 #define	in6p_hops	inp_depend6.inp6_hops	/* default hop limit */
 #define	in6p_ip6_nxt	inp_ip_p
 #define	in6p_flowinfo	inp_flow
@@ -179,7 +184,6 @@
 #define	in6p_moptions	inp_depend6.inp6_moptions
 #define	in6p_icmp6filt	inp_depend6.inp6_icmp6filt
 #define	in6p_cksum	inp_depend6.inp6_cksum
-#define	inp6_ifindex	inp_depend6.inp6_ifindex
 #define	in6p_flags	inp_flags  /* for KAME src sync over BSD*'s */
 #define	in6p_socket	inp_socket  /* for KAME src sync over BSD*'s */
 #define	in6p_lport	inp_lport  /* for KAME src sync over BSD*'s */
@@ -187,15 +191,15 @@
 #define	in6p_ppcb	inp_ppcb  /* for KAME src sync over BSD*'s */
 };
 /*
- * The range of the generation count, as used in this implementation,
- * is 9e19.  We would have to create 300 billion connections per
- * second for this number to roll over in a year.  This seems sufficiently
- * unlikely that we simply don't concern ourselves with that possibility.
+ * The range of the generation count, as used in this implementation, is 9e19.
+ * We would have to create 300 billion connections per second for this number
+ * to roll over in a year.  This seems sufficiently unlikely that we simply
+ * don't concern ourselves with that possibility.
  */
 
 /*
- * Interface exported to userland by various protocols which use
- * inpcbs.  Hack alert -- only define if struct xsocket is in scope.
+ * Interface exported to userland by various protocols which use inpcbs.  Hack
+ * alert -- only define if struct xsocket is in scope.
  */
 #ifdef _SYS_SOCKETVAR_H_
 struct	xinpcb {
@@ -219,35 +223,62 @@
 	u_short phd_port;
 };
 
-struct inpcbinfo {		/* XXX documentation, prefixes */
-	struct	inpcbhead *hashbase;
-	u_long	hashmask;
-	struct	inpcbporthead *porthashbase;
-	u_long	porthashmask;
-	struct	inpcbhead *listhead;
-	u_short	lastport;
-	u_short	lastlow;
-	u_short	lasthi;
-	struct	uma_zone *ipi_zone; /* zone to allocate pcbs from */
-	u_int	ipi_count;	/* number of pcbs in this list */
-	u_quad_t ipi_gencnt;	/* current generation count */
-	struct	mtx ipi_mtx;
-};
-
 /*
- * NB: We cannot enable assertions when IPv6 is configured as
- *     this code is shared by both IPv4 and IPv6 and IPv6 is
- *     not properly locked.
+ * Global data structure for each high-level protocol (UDP, TCP, ...) in both
+ * IPv4 and IPv6.  Holds inpcb lists and information for managing them.
  */
+struct inpcbinfo {
+	/*
+	 * Global list of inpcbs on the protocol.
+	 */
+	struct inpcbhead	*ipi_listhead;
+	u_int			 ipi_count;
+
+	/*
+	 * Global hash of inpcbs, hashed by local and foreign addresses and
+	 * port numbers.
+	 */
+	struct inpcbhead	*ipi_hashbase;
+	u_long			 ipi_hashmask;
+
+	/*
+	 * Global hash of inpcbs, hashed by only local port number.
+	 */
+	struct inpcbporthead	*ipi_porthashbase;
+	u_long			 ipi_porthashmask;
+
+	/*
+	 * Fields associated with port lookup and allocation.
+	 */
+	u_short			 ipi_lastport;
+	u_short			 ipi_lastlow;
+	u_short			 ipi_lasthi;
+
+	/*
+	 * UMA zone from which inpcbs are allocated for this protocol.
+	 */
+	struct	uma_zone	*ipi_zone;
+
+	/*
+	 * Generation count--incremented each time a connection is allocated
+	 * or freed.
+	 */
+	u_quad_t		 ipi_gencnt;
+	struct mtx		 ipi_mtx;
+
+	/*
+	 * vimage 1
+	 * general use 1
+	 */
+	void 			*ipi_pspare[2];	
+};
+
 #define INP_LOCK_INIT(inp, d, t) \
 	mtx_init(&(inp)->inp_mtx, (d), (t), MTX_DEF | MTX_RECURSE | MTX_DUPOK)
 #define INP_LOCK_DESTROY(inp)	mtx_destroy(&(inp)->inp_mtx)
 #define INP_LOCK(inp)		mtx_lock(&(inp)->inp_mtx)
 #define INP_UNLOCK(inp)		mtx_unlock(&(inp)->inp_mtx)
-#define INP_LOCK_ASSERT(inp)	do {					\
-	mtx_assert(&(inp)->inp_mtx, MA_OWNED);				\
-	NET_ASSERT_GIANT();						\
-} while (0)
+#define INP_LOCK_ASSERT(inp)	mtx_assert(&(inp)->inp_mtx, MA_OWNED)
 #define	INP_UNLOCK_ASSERT(inp)	mtx_assert(&(inp)->inp_mtx, MA_NOTOWNED)
 
 #define INP_INFO_LOCK_INIT(ipi, d) \
@@ -257,14 +288,9 @@
 #define INP_INFO_WLOCK(ipi)	mtx_lock(&(ipi)->ipi_mtx)
 #define INP_INFO_RUNLOCK(ipi)	mtx_unlock(&(ipi)->ipi_mtx)
 #define INP_INFO_WUNLOCK(ipi)	mtx_unlock(&(ipi)->ipi_mtx)
-#define INP_INFO_RLOCK_ASSERT(ipi)	do {				\
-	mtx_assert(&(ipi)->ipi_mtx, MA_OWNED);				\
-	NET_ASSERT_GIANT();						\
-} while (0)
-#define INP_INFO_WLOCK_ASSERT(ipi)	do {				\
-	mtx_assert(&(ipi)->ipi_mtx, MA_OWNED);				\
-	NET_ASSERT_GIANT();						\
-} while (0)
+#define INP_INFO_RLOCK_ASSERT(ipi)	mtx_assert(&(ipi)->ipi_mtx, MA_OWNED)
+#define INP_INFO_WLOCK_ASSERT(ipi)	mtx_assert(&(ipi)->ipi_mtx, MA_OWNED)
+#define INP_INFO_UNLOCK_ASSERT(ipi)	mtx_assert(&(ipi)->ipi_mtx, MA_NOTOWNED)
 
 #define INP_PCBHASH(faddr, lport, fport, mask) \
 	(((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask))
@@ -329,6 +355,8 @@
 #define	INP_CHECK_SOCKAF(so, af)	(INP_SOCKAF(so) == af)
 
 #ifdef _KERNEL
+extern int	ipport_reservedhigh;
+extern int	ipport_reservedlow;
 extern int	ipport_lowfirstauto;
 extern int	ipport_lowlastauto;
 extern int	ipport_firstauto;
@@ -338,7 +366,7 @@
 extern struct callout ipport_tick_callout;
 
 void	in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *);
-int	in_pcballoc(struct socket *, struct inpcbinfo *, const char *);
+int	in_pcballoc(struct socket *, struct inpcbinfo *);
 int	in_pcbbind(struct inpcb *, struct sockaddr *, struct ucred *);
 int	in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *,
 	    u_short *, struct ucred *);
@@ -348,6 +376,8 @@
 	    struct ucred *);
 void	in_pcbdetach(struct inpcb *);
 void	in_pcbdisconnect(struct inpcb *);
+void	in_pcbdrop(struct inpcb *);
+void	in_pcbfree(struct inpcb *);
 int	in_pcbinshash(struct inpcb *);
 struct inpcb *
 	in_pcblookup_local(struct inpcbinfo *,
@@ -359,13 +389,19 @@
 	    int, struct inpcb *(*)(struct inpcb *, int));
 void	in_pcbrehash(struct inpcb *);
 void	in_pcbsetsolabel(struct socket *so);
-int	in_setpeeraddr(struct socket *so, struct sockaddr **nam, struct inpcbinfo *pcbinfo);
-int	in_setsockaddr(struct socket *so, struct sockaddr **nam, struct inpcbinfo *pcbinfo);
+int	in_getpeeraddr(struct socket *so, struct sockaddr **nam);
+int	in_getsockaddr(struct socket *so, struct sockaddr **nam);
 struct sockaddr *
 	in_sockaddr(in_port_t port, struct in_addr *addr);
 void	in_pcbsosetlabel(struct socket *so);
 void	in_pcbremlists(struct inpcb *inp);
 void	ipport_tick(void *xtp);
+
+/*
+ * Debugging routines compiled in when DDB is present.
+ */
+void	db_print_inpcb(struct inpcb *inp, const char *name, int indent);
+
 #endif /* _KERNEL */
 
 #endif /* !_NETINET_IN_PCB_H_ */
--- /dev/null
+++ sys/netinet/sctp_bsd_addr.c
@@ -0,0 +1,529 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_output.c,v 1.46 2005/03/06 16:04:17 itojun Exp $	 */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_bsd_addr.c,v 1.14.4.2 2008/01/31 17:21:50 rrs Exp $");
+
+#include <netinet/sctp_os.h>
+#include <netinet/sctp_var.h>
+#include <netinet/sctp_pcb.h>
+#include <netinet/sctp_header.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_output.h>
+#include <netinet/sctp_bsd_addr.h>
+#include <netinet/sctp_uio.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_timer.h>
+#include <netinet/sctp_asconf.h>
+#include <netinet/sctp_sysctl.h>
+#include <netinet/sctp_indata.h>
+#include <sys/unistd.h>
+
+
+/* Declare all of our malloc named types */
+
+/* Note to Michael/Peter for mac-os,
+ * I think mac has this too since I
+ * do see the M_PCB type, so I
+ * will also put in the mac file the
+ * MALLOC_DECLARE. If this does not
+ * work for mac uncomment the defines for
+ * the strings that we use in Panda, I put
+ * them in comments in the mac-os file.
+ */
+MALLOC_DEFINE(SCTP_M_MAP, "sctp_map", "sctp asoc map descriptor");
+MALLOC_DEFINE(SCTP_M_STRMI, "sctp_stri", "sctp stream in array");
+MALLOC_DEFINE(SCTP_M_STRMO, "sctp_stro", "sctp stream out array");
+MALLOC_DEFINE(SCTP_M_ASC_ADDR, "sctp_aadr", "sctp asconf address");
+MALLOC_DEFINE(SCTP_M_ASC_IT, "sctp_a_it", "sctp asconf iterator");
+MALLOC_DEFINE(SCTP_M_AUTH_CL, "sctp_atcl", "sctp auth chunklist");
+MALLOC_DEFINE(SCTP_M_AUTH_KY, "sctp_atky", "sctp auth key");
+MALLOC_DEFINE(SCTP_M_AUTH_HL, "sctp_athm", "sctp auth hmac list");
+MALLOC_DEFINE(SCTP_M_AUTH_IF, "sctp_athi", "sctp auth info");
+MALLOC_DEFINE(SCTP_M_STRESET, "sctp_stre", "sctp stream reset");
+MALLOC_DEFINE(SCTP_M_CMSG, "sctp_cmsg", "sctp CMSG buffer");
+MALLOC_DEFINE(SCTP_M_COPYAL, "sctp_cpal", "sctp copy all");
+MALLOC_DEFINE(SCTP_M_VRF, "sctp_vrf", "sctp vrf struct");
+MALLOC_DEFINE(SCTP_M_IFA, "sctp_ifa", "sctp ifa struct");
+MALLOC_DEFINE(SCTP_M_IFN, "sctp_ifn", "sctp ifn struct");
+MALLOC_DEFINE(SCTP_M_TIMW, "sctp_timw", "sctp time block");
+MALLOC_DEFINE(SCTP_M_MVRF, "sctp_mvrf", "sctp mvrf pcb list");
+MALLOC_DEFINE(SCTP_M_ITER, "sctp_iter", "sctp iterator control");
+MALLOC_DEFINE(SCTP_M_SOCKOPT, "sctp_socko", "sctp socket option");
+
+
+#if defined(SCTP_USE_THREAD_BASED_ITERATOR)
+void
+sctp_wakeup_iterator(void)
+{
+	wakeup(&sctppcbinfo.iterator_running);
+}
+
+static void
+sctp_iterator_thread(void *v)
+{
+	SCTP_IPI_ITERATOR_WQ_LOCK();
+	sctppcbinfo.iterator_running = 0;
+	while (1) {
+		msleep(&sctppcbinfo.iterator_running,
+		    &sctppcbinfo.ipi_iterator_wq_mtx,
+		    0, "waiting_for_work", 0);
+		sctp_iterator_worker();
+	}
+}
+
+void
+sctp_startup_iterator(void)
+{
+	int ret;
+
+	ret = kthread_create(sctp_iterator_thread,
+	    (void *)NULL,
+	    &sctppcbinfo.thread_proc,
+	    RFPROC,
+	    SCTP_KTHREAD_PAGES,
+	    SCTP_KTRHEAD_NAME);
+}
+
+#endif
+
+
+void
+sctp_gather_internal_ifa_flags(struct sctp_ifa *ifa)
+{
+	struct in6_ifaddr *ifa6;
+
+	ifa6 = (struct in6_ifaddr *)ifa->ifa;
+	ifa->flags = ifa6->ia6_flags;
+	if (!ip6_use_deprecated) {
+		if (ifa->flags &
+		    IN6_IFF_DEPRECATED) {
+			ifa->localifa_flags |= SCTP_ADDR_IFA_UNUSEABLE;
+		} else {
+			ifa->localifa_flags &= ~SCTP_ADDR_IFA_UNUSEABLE;
+		}
+	} else {
+		ifa->localifa_flags &= ~SCTP_ADDR_IFA_UNUSEABLE;
+	}
+	if (ifa->flags &
+	    (IN6_IFF_DETACHED |
+	    IN6_IFF_ANYCAST |
+	    IN6_IFF_NOTREADY)) {
+		ifa->localifa_flags |= SCTP_ADDR_IFA_UNUSEABLE;
+	} else {
+		ifa->localifa_flags &= ~SCTP_ADDR_IFA_UNUSEABLE;
+	}
+}
+
+
+
+static uint32_t
+sctp_is_desired_interface_type(struct ifaddr *ifa)
+{
+	int result;
+
+	/* check the interface type to see if it's one we care about */
+	switch (ifa->ifa_ifp->if_type) {
+	case IFT_ETHER:
+	case IFT_ISO88023:
+	case IFT_ISO88024:
+	case IFT_ISO88025:
+	case IFT_ISO88026:
+	case IFT_STARLAN:
+	case IFT_P10:
+	case IFT_P80:
+	case IFT_HY:
+	case IFT_FDDI:
+	case IFT_XETHER:
+	case IFT_ISDNBASIC:
+	case IFT_ISDNPRIMARY:
+	case IFT_PTPSERIAL:
+	case IFT_OTHER:
+	case IFT_PPP:
+	case IFT_LOOP:
+	case IFT_SLIP:
+	case IFT_IP:
+	case IFT_IPOVERCDLC:
+	case IFT_IPOVERCLAW:
+	case IFT_VIRTUALIPADDRESS:
+		result = 1;
+		break;
+	default:
+		result = 0;
+	}
+
+	return (result);
+}
+
+static void
+sctp_init_ifns_for_vrf(int vrfid)
+{
+	/*
+	 * Here we must apply ANY locks needed by the IFN we access and also
+	 * make sure we lock any IFA that exists as we float through the
+	 * list of IFA's
+	 */
+	struct ifnet *ifn;
+	struct ifaddr *ifa;
+	struct in6_ifaddr *ifa6;
+	struct sctp_ifa *sctp_ifa;
+	uint32_t ifa_flags;
+
+	TAILQ_FOREACH(ifn, &ifnet, if_list) {
+		TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) {
+			if (ifa->ifa_addr == NULL) {
+				continue;
+			}
+			if ((ifa->ifa_addr->sa_family != AF_INET) &&
+			    (ifa->ifa_addr->sa_family != AF_INET6)
+			    ) {
+				/* non inet/inet6 skip */
+				continue;
+			}
+			if (ifa->ifa_addr->sa_family == AF_INET6) {
+				ifa6 = (struct in6_ifaddr *)ifa;
+				ifa_flags = ifa6->ia6_flags;
+				if (IN6_IS_ADDR_UNSPECIFIED(&((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr)) {
+					/* skip unspecifed addresses */
+					continue;
+				}
+			} else if (ifa->ifa_addr->sa_family == AF_INET) {
+				if (((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr == 0) {
+					continue;
+				}
+			}
+			if (sctp_is_desired_interface_type(ifa) == 0) {
+				/* non desired type */
+				continue;
+			}
+			if ((ifa->ifa_addr->sa_family == AF_INET6) ||
+			    (ifa->ifa_addr->sa_family == AF_INET)) {
+				if (ifa->ifa_addr->sa_family == AF_INET6) {
+					ifa6 = (struct in6_ifaddr *)ifa;
+					ifa_flags = ifa6->ia6_flags;
+				} else {
+					ifa_flags = 0;
+				}
+				sctp_ifa = sctp_add_addr_to_vrf(vrfid,
+				    (void *)ifn,
+				    ifn->if_index,
+				    ifn->if_type,
+				    ifn->if_xname,
+				    (void *)ifa,
+				    ifa->ifa_addr,
+				    ifa_flags, 0
+				    );
+				if (sctp_ifa) {
+					sctp_ifa->localifa_flags &= ~SCTP_ADDR_DEFER_USE;
+				}
+			}
+		}
+	}
+}
+
+
+void
+sctp_init_vrf_list(int vrfid)
+{
+	if (vrfid > SCTP_MAX_VRF_ID)
+		/* can't do that */
+		return;
+
+	/* Don't care about return here */
+	(void)sctp_allocate_vrf(vrfid);
+
+	/*
+	 * Now we need to build all the ifn's for this vrf and there
+	 * addresses
+	 */
+	sctp_init_ifns_for_vrf(vrfid);
+}
+
+static uint8_t first_time = 0;
+
+
+void
+sctp_addr_change(struct ifaddr *ifa, int cmd)
+{
+	struct sctp_ifa *ifap = NULL;
+	uint32_t ifa_flags = 0;
+	struct in6_ifaddr *ifa6;
+
+	/*
+	 * BSD only has one VRF, if this changes we will need to hook in the
+	 * right things here to get the id to pass to the address managment
+	 * routine.
+	 */
+	if (first_time == 0) {
+		/* Special test to see if my ::1 will showup with this */
+		first_time = 1;
+		sctp_init_ifns_for_vrf(SCTP_DEFAULT_VRFID);
+	}
+	if ((cmd != RTM_ADD) && (cmd != RTM_DELETE)) {
+		/* don't know what to do with this */
+		return;
+	}
+	if (ifa->ifa_addr == NULL) {
+		return;
+	}
+	if ((ifa->ifa_addr->sa_family != AF_INET) &&
+	    (ifa->ifa_addr->sa_family != AF_INET6)
+	    ) {
+		/* non inet/inet6 skip */
+		return;
+	}
+	if (ifa->ifa_addr->sa_family == AF_INET6) {
+		ifa6 = (struct in6_ifaddr *)ifa;
+		ifa_flags = ifa6->ia6_flags;
+		if (IN6_IS_ADDR_UNSPECIFIED(&((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr)) {
+			/* skip unspecifed addresses */
+			return;
+		}
+	} else if (ifa->ifa_addr->sa_family == AF_INET) {
+		if (((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr == 0) {
+			return;
+		}
+	}
+	if (sctp_is_desired_interface_type(ifa) == 0) {
+		/* non desired type */
+		return;
+	}
+	if (cmd == RTM_ADD) {
+		ifap = sctp_add_addr_to_vrf(SCTP_DEFAULT_VRFID, (void *)ifa->ifa_ifp,
+		    ifa->ifa_ifp->if_index, ifa->ifa_ifp->if_type,
+		    ifa->ifa_ifp->if_xname,
+		    (void *)ifa, ifa->ifa_addr, ifa_flags, 1);
+	} else if (cmd == RTM_DELETE) {
+
+		sctp_del_addr_from_vrf(SCTP_DEFAULT_VRFID, ifa->ifa_addr,
+		    ifa->ifa_ifp->if_index,
+		    ifa->ifa_ifp->if_xname
+		    );
+		/*
+		 * We don't bump refcount here so when it completes the
+		 * final delete will happen.
+		 */
+	}
+}
+
+struct mbuf *
+sctp_get_mbuf_for_msg(unsigned int space_needed, int want_header,
+    int how, int allonebuf, int type)
+{
+	struct mbuf *m = NULL;
+
+	m = m_getm2(NULL, space_needed, how, type, want_header ? M_PKTHDR : 0);
+	if (m == NULL) {
+		/* bad, no memory */
+		return (m);
+	}
+	if (allonebuf) {
+		int siz;
+
+		if (SCTP_BUF_IS_EXTENDED(m)) {
+			siz = SCTP_BUF_EXTEND_SIZE(m);
+		} else {
+			if (want_header)
+				siz = MHLEN;
+			else
+				siz = MLEN;
+		}
+		if (siz < space_needed) {
+			m_freem(m);
+			return (NULL);
+		}
+	}
+	if (SCTP_BUF_NEXT(m)) {
+		sctp_m_freem(SCTP_BUF_NEXT(m));
+		SCTP_BUF_NEXT(m) = NULL;
+	}
+#ifdef SCTP_MBUF_LOGGING
+	if (sctp_logging_level & SCTP_MBUF_LOGGING_ENABLE) {
+		if (SCTP_BUF_IS_EXTENDED(m)) {
+			sctp_log_mb(m, SCTP_MBUF_IALLOC);
+		}
+	}
+#endif
+	return (m);
+}
+
+
+#ifdef SCTP_PACKET_LOGGING
+
+int packet_log_writers = 0;
+int packet_log_end = 0;
+uint8_t packet_log_buffer[SCTP_PACKET_LOG_SIZE];
+
+
+void
+sctp_packet_log(struct mbuf *m, int length)
+{
+	int *lenat, thisone;
+	void *copyto;
+	uint32_t *tick_tock;
+	int total_len;
+	int grabbed_lock = 0;
+	int value, newval, thisend, thisbegin;
+
+	/*
+	 * Buffer layout. -sizeof this entry (total_len) -previous end
+	 * (value) -ticks of log      (ticks) o -ip packet o -as logged -
+	 * where this started (thisbegin) x <--end points here
+	 */
+	total_len = SCTP_SIZE32((length + (4 * sizeof(int))));
+	/* Log a packet to the buffer. */
+	if (total_len > SCTP_PACKET_LOG_SIZE) {
+		/* Can't log this packet I have not a buffer big enough */
+		return;
+	}
+	if (length < (SCTP_MIN_V4_OVERHEAD + sizeof(struct sctp_cookie_ack_chunk))) {
+		return;
+	}
+	atomic_add_int(&packet_log_writers, 1);
+try_again:
+	if (packet_log_writers > SCTP_PKTLOG_WRITERS_NEED_LOCK) {
+		SCTP_IP_PKTLOG_LOCK();
+		grabbed_lock = 1;
+again_locked:
+		value = packet_log_end;
+		newval = packet_log_end + total_len;
+		if (newval >= SCTP_PACKET_LOG_SIZE) {
+			/* we wrapped */
+			thisbegin = 0;
+			thisend = total_len;
+		} else {
+			thisbegin = packet_log_end;
+			thisend = newval;
+		}
+		if (!(atomic_cmpset_int(&packet_log_end, value, thisend))) {
+			goto again_locked;
+		}
+	} else {
+		value = packet_log_end;
+		newval = packet_log_end + total_len;
+		if (newval >= SCTP_PACKET_LOG_SIZE) {
+			/* we wrapped */
+			thisbegin = 0;
+			thisend = total_len;
+		} else {
+			thisbegin = packet_log_end;
+			thisend = newval;
+		}
+		if (!(atomic_cmpset_int(&packet_log_end, value, thisend))) {
+			goto try_again;
+		}
+	}
+	/* Sanity check */
+	if (thisend >= SCTP_PACKET_LOG_SIZE) {
+		printf("Insanity stops a log thisbegin:%d thisend:%d writers:%d lock:%d end:%d\n",
+		    thisbegin,
+		    thisend,
+		    packet_log_writers,
+		    grabbed_lock,
+		    packet_log_end);
+		packet_log_end = 0;
+		goto no_log;
+
+	}
+	lenat = (int *)&packet_log_buffer[thisbegin];
+	*lenat = total_len;
+	lenat++;
+	*lenat = value;
+	lenat++;
+	tick_tock = (uint32_t *) lenat;
+	lenat++;
+	*tick_tock = sctp_get_tick_count();
+	copyto = (void *)lenat;
+	thisone = thisend - sizeof(int);
+	lenat = (int *)&packet_log_buffer[thisone];
+	*lenat = thisbegin;
+	if (grabbed_lock) {
+		SCTP_IP_PKTLOG_UNLOCK();
+		grabbed_lock = 0;
+	}
+	m_copydata(m, 0, length, (caddr_t)copyto);
+no_log:
+	if (grabbed_lock) {
+		SCTP_IP_PKTLOG_UNLOCK();
+	}
+	atomic_subtract_int(&packet_log_writers, 1);
+}
+
+
+int
+sctp_copy_out_packet_log(uint8_t * target, int length)
+{
+	/*
+	 * We wind through the packet log starting at start copying up to
+	 * length bytes out. We return the number of bytes copied.
+	 */
+	int tocopy, this_copy;
+	int *lenat;
+	int did_delay = 0;
+
+	tocopy = length;
+	if (length < (2 * sizeof(int))) {
+		/* not enough room */
+		return (0);
+	}
+	if (SCTP_PKTLOG_WRITERS_NEED_LOCK) {
+		atomic_add_int(&packet_log_writers, SCTP_PKTLOG_WRITERS_NEED_LOCK);
+again:
+		if ((did_delay == 0) && (packet_log_writers != SCTP_PKTLOG_WRITERS_NEED_LOCK)) {
+			/*
+			 * we delay here for just a moment hoping the
+			 * writer(s) that were present when we entered will
+			 * have left and we only have locking ones that will
+			 * contend with us for the lock. This does not
+			 * assure 100% access, but its good enough for a
+			 * logging facility like this.
+			 */
+			did_delay = 1;
+			DELAY(10);
+			goto again;
+		}
+	}
+	SCTP_IP_PKTLOG_LOCK();
+	lenat = (int *)target;
+	*lenat = packet_log_end;
+	lenat++;
+	this_copy = min((length - sizeof(int)), SCTP_PACKET_LOG_SIZE);
+	memcpy((void *)lenat, (void *)packet_log_buffer, this_copy);
+	if (SCTP_PKTLOG_WRITERS_NEED_LOCK) {
+		atomic_subtract_int(&packet_log_writers,
+		    SCTP_PKTLOG_WRITERS_NEED_LOCK);
+	}
+	SCTP_IP_PKTLOG_UNLOCK();
+	return (this_copy + sizeof(int));
+}
+
+#endif
--- /dev/null
+++ sys/netinet/sctp_asconf.h
@@ -0,0 +1,91 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_asconf.h,v 1.8 2005/03/06 16:04:16 itojun Exp $	 */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_asconf.h,v 1.10 2007/09/15 19:07:42 rrs Exp $");
+
+#ifndef _NETINET_SCTP_ASCONF_H_
+#define _NETINET_SCTP_ASCONF_H_
+
+#if defined(_KERNEL)
+
+/*
+ * function prototypes
+ */
+extern void sctp_asconf_cleanup(struct sctp_tcb *, struct sctp_nets *);
+
+extern struct mbuf *sctp_compose_asconf(struct sctp_tcb *, int *, int);
+
+extern void
+sctp_handle_asconf(struct mbuf *, unsigned int, struct sctp_asconf_chunk *,
+    struct sctp_tcb *, int i);
+
+extern void
+sctp_handle_asconf_ack(struct mbuf *, int, struct sctp_asconf_ack_chunk *,
+    struct sctp_tcb *, struct sctp_nets *, int *);
+
+extern uint32_t
+sctp_addr_mgmt_ep_sa(struct sctp_inpcb *, struct sockaddr *,
+    uint32_t, uint32_t, struct sctp_ifa *);
+
+
+extern int 
+sctp_asconf_iterator_ep(struct sctp_inpcb *inp, void *ptr,
+    uint32_t val);
+extern void 
+sctp_asconf_iterator_stcb(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    void *ptr, uint32_t type);
+extern void sctp_asconf_iterator_end(void *ptr, uint32_t val);
+
+
+extern int32_t
+sctp_set_primary_ip_address_sa(struct sctp_tcb *,
+    struct sockaddr *);
+
+extern void
+     sctp_set_primary_ip_address(struct sctp_ifa *ifa);
+
+extern void
+sctp_check_address_list(struct sctp_tcb *, struct mbuf *, int, int,
+    struct sockaddr *, uint16_t, uint16_t, uint16_t, uint16_t);
+
+extern void
+     sctp_move_chunks_from_deleted_prim(struct sctp_tcb *, struct sctp_nets *);
+extern void
+     sctp_assoc_immediate_retrans(struct sctp_tcb *, struct sctp_nets *);
+extern void
+     sctp_net_immediate_retrans(struct sctp_tcb *, struct sctp_nets *);
+
+#endif				/* _KERNEL */
+
+#endif				/* !_NETINET_SCTP_ASCONF_H_ */
Index: tcp_timer.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_timer.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet/tcp_timer.c -L sys/netinet/tcp_timer.c -u -r1.2 -r1.3
--- sys/netinet/tcp_timer.c
+++ sys/netinet/tcp_timer.c
@@ -27,12 +27,13 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp_timer.c	8.2 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_timer.c,v 1.74.2.2 2006/03/01 21:08:53 andre Exp $
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/tcp_timer.c,v 1.99 2007/10/07 20:44:24 silby Exp $");
+
 #include "opt_inet6.h"
 #include "opt_tcpdebug.h"
-#include "opt_tcp_sack.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -76,8 +77,8 @@
     &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "");
 
 int	tcp_delacktime;
-SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime,
-    CTLTYPE_INT|CTLFLAG_RW, &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW,
+    &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
     "Time before a delayed ACK is sent");
 
 int	tcp_msl;
@@ -86,16 +87,28 @@
 
 int	tcp_rexmit_min;
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW,
-    &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", "Minimum Retransmission Timeout");
+    &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
+    "Minimum Retransmission Timeout");
 
 int	tcp_rexmit_slop;
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
-    &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", "Retransmission Timer Slop");
+    &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
+    "Retransmission Timer Slop");
 
 static int	always_keepalive = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
     &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
 
+int    tcp_fast_finwait2_recycle = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 
+    &tcp_fast_finwait2_recycle, 0,
+    "Recycle closed FIN_WAIT_2 connections faster");
+
+int    tcp_finwait2_timeout;
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW,
+    &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout");
+
+
 static int	tcp_keepcnt = TCPTV_KEEPCNT;
 	/* max idle probes */
 int	tcp_maxpersistidle;
@@ -108,12 +121,12 @@
  * causes finite state machine actions if timers expire.
  */
 void
-tcp_slowtimo()
+tcp_slowtimo(void)
 {
 
 	tcp_maxidle = tcp_keepcnt * tcp_keepintvl;
 	INP_INFO_WLOCK(&tcbinfo);
-	(void) tcp_timer_2msl_tw(0);
+	(void) tcp_tw_2msl_scan(0);
 	INP_INFO_WUNLOCK(&tcbinfo);
 }
 
@@ -125,30 +138,42 @@
 
 static int tcp_totbackoff = 2559;	/* sum of tcp_backoff[] */
 
+static int tcp_timer_race;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race,
+    0, "Count of t_inpcb races on tcp_discardcb");
+
 /*
  * TCP timer processing.
  */
 
 void
-tcp_timer_delack(xtp)
-	void *xtp;
+tcp_timer_delack(void *xtp)
 {
 	struct tcpcb *tp = xtp;
 	struct inpcb *inp;
 
 	INP_INFO_RLOCK(&tcbinfo);
 	inp = tp->t_inpcb;
+	/*
+	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
+	 * tear-down mean we need it as a work-around for races between
+	 * timers and tcp_discardcb().
+	 *
+	 * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL"));
+	 */
 	if (inp == NULL) {
+		tcp_timer_race++;
 		INP_INFO_RUNLOCK(&tcbinfo);
 		return;
 	}
 	INP_LOCK(inp);
 	INP_INFO_RUNLOCK(&tcbinfo);
-	if (callout_pending(tp->tt_delack) || !callout_active(tp->tt_delack)) {
+	if ((inp->inp_vflag & INP_DROPPED) || callout_pending(&tp->t_timers->tt_delack)
+	    || !callout_active(&tp->t_timers->tt_delack)) {
 		INP_UNLOCK(inp);
 		return;
 	}
-	callout_deactivate(tp->tt_delack);
+	callout_deactivate(&tp->t_timers->tt_delack);
 
 	tp->t_flags |= TF_ACKNOW;
 	tcpstat.tcps_delack++;
@@ -157,8 +182,7 @@
 }
 
 void
-tcp_timer_2msl(xtp)
-	void *xtp;
+tcp_timer_2msl(void *xtp)
 {
 	struct tcpcb *tp = xtp;
 	struct inpcb *inp;
@@ -167,121 +191,68 @@
 
 	ostate = tp->t_state;
 #endif
+	/*
+	 * XXXRW: Does this actually happen?
+	 */
 	INP_INFO_WLOCK(&tcbinfo);
 	inp = tp->t_inpcb;
+	/*
+	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
+	 * tear-down mean we need it as a work-around for races between
+	 * timers and tcp_discardcb().
+	 *
+	 * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL"));
+	 */
 	if (inp == NULL) {
+		tcp_timer_race++;
 		INP_INFO_WUNLOCK(&tcbinfo);
 		return;
 	}
 	INP_LOCK(inp);
 	tcp_free_sackholes(tp);
-	if (callout_pending(tp->tt_2msl) || !callout_active(tp->tt_2msl)) {
+	if ((inp->inp_vflag & INP_DROPPED) || callout_pending(&tp->t_timers->tt_2msl) ||
+	    !callout_active(&tp->t_timers->tt_2msl)) {
 		INP_UNLOCK(tp->t_inpcb);
 		INP_INFO_WUNLOCK(&tcbinfo);
 		return;
 	}
-	callout_deactivate(tp->tt_2msl);
+	callout_deactivate(&tp->t_timers->tt_2msl);
 	/*
 	 * 2 MSL timeout in shutdown went off.  If we're closed but
 	 * still waiting for peer to close and connection has been idle
 	 * too long, or if 2MSL time is up from TIME_WAIT, delete connection
 	 * control block.  Otherwise, check again in a bit.
+	 *
+	 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 
+	 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 
+	 * Ignore fact that there were recent incoming segments.
 	 */
-	if (tp->t_state != TCPS_TIME_WAIT &&
-	    (ticks - tp->t_rcvtime) <= tcp_maxidle)
-		callout_reset(tp->tt_2msl, tcp_keepintvl,
-			      tcp_timer_2msl, tp);
-	else
-		tp = tcp_close(tp);
+	if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
+	    tp->t_inpcb && tp->t_inpcb->inp_socket && 
+	    (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
+		tcpstat.tcps_finwait2_drops++;
+		tp = tcp_close(tp);             
+	} else {
+		if (tp->t_state != TCPS_TIME_WAIT &&
+		   (ticks - tp->t_rcvtime) <= tcp_maxidle)
+		       callout_reset(&tp->t_timers->tt_2msl, tcp_keepintvl,
+				     tcp_timer_2msl, tp);
+	       else
+		       tp = tcp_close(tp);
+       }
 
 #ifdef TCPDEBUG
-	if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
 			  PRU_SLOWTIMO);
 #endif
-	if (tp)
+	if (tp != NULL)
 		INP_UNLOCK(inp);
 	INP_INFO_WUNLOCK(&tcbinfo);
 }
 
-/*
- * The timed wait lists contain references to each of the TCP sessions
- * currently TIME_WAIT state.  The list pointers, including the list pointers
- * in each tcptw structure, are protected using the global tcbinfo lock,
- * which must be held over list iteration and modification.
- */
-struct twlist {
-	LIST_HEAD(, tcptw)	tw_list;
-	struct tcptw	tw_tail;
-};
-#define TWLIST_NLISTS	2
-static struct twlist twl_2msl[TWLIST_NLISTS];
-static struct twlist *tw_2msl_list[] = { &twl_2msl[0], &twl_2msl[1], NULL };
-
-void
-tcp_timer_init(void)
-{
-	int i;
-	struct twlist *twl;
-
-	for (i = 0; i < TWLIST_NLISTS; i++) {
-		twl = &twl_2msl[i];
-		LIST_INIT(&twl->tw_list);
-		LIST_INSERT_HEAD(&twl->tw_list, &twl->tw_tail, tw_2msl);
-	}
-}
-
-void
-tcp_timer_2msl_reset(struct tcptw *tw, int timeo)
-{
-	int i;
-	struct tcptw *tw_tail;
-
-	INP_INFO_WLOCK_ASSERT(&tcbinfo);
-	INP_LOCK_ASSERT(tw->tw_inpcb);
-	if (tw->tw_time != 0)
-		LIST_REMOVE(tw, tw_2msl);
-	tw->tw_time = timeo + ticks;
-	i = timeo > tcp_msl ? 1 : 0;
-	tw_tail = &twl_2msl[i].tw_tail;
-	LIST_INSERT_BEFORE(tw_tail, tw, tw_2msl);
-}
-
-void
-tcp_timer_2msl_stop(struct tcptw *tw)
-{
-
-	INP_INFO_WLOCK_ASSERT(&tcbinfo);
-	if (tw->tw_time != 0)
-		LIST_REMOVE(tw, tw_2msl);
-}
-
-struct tcptw *
-tcp_timer_2msl_tw(int reuse)
-{
-	struct tcptw *tw, *tw_tail;
-	struct twlist *twl;
-	int i;
-
-	INP_INFO_WLOCK_ASSERT(&tcbinfo);
-	for (i = 0; i < 2; i++) {
-		twl = tw_2msl_list[i];
-		tw_tail = &twl->tw_tail;
-		for (;;) {
-			tw = LIST_FIRST(&twl->tw_list);
-			if (tw == tw_tail || (!reuse && tw->tw_time > ticks))
-				break;
-			INP_LOCK(tw->tw_inpcb);
-			if (tcp_twclose(tw, reuse) != NULL)
-				return (tw);
-		}
-	}
-	return (NULL);
-}
-
 void
-tcp_timer_keep(xtp)
-	void *xtp;
+tcp_timer_keep(void *xtp)
 {
 	struct tcpcb *tp = xtp;
 	struct tcptemp *t_template;
@@ -293,17 +264,26 @@
 #endif
 	INP_INFO_WLOCK(&tcbinfo);
 	inp = tp->t_inpcb;
-	if (!inp) {
+	/*
+	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
+	 * tear-down mean we need it as a work-around for races between
+	 * timers and tcp_discardcb().
+	 *
+	 * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL"));
+	 */
+	if (inp == NULL) {
+		tcp_timer_race++;
 		INP_INFO_WUNLOCK(&tcbinfo);
 		return;
 	}
 	INP_LOCK(inp);
-	if (callout_pending(tp->tt_keep) || !callout_active(tp->tt_keep)) {
+	if ((inp->inp_vflag & INP_DROPPED) || callout_pending(&tp->t_timers->tt_keep)
+	    || !callout_active(&tp->t_timers->tt_keep)) {
 		INP_UNLOCK(inp);
 		INP_INFO_WUNLOCK(&tcbinfo);
 		return;
 	}
-	callout_deactivate(tp->tt_keep);
+	callout_deactivate(&tp->t_timers->tt_keep);
 	/*
 	 * Keep-alive timer went off; send something
 	 * or drop connection if idle for too long.
@@ -335,9 +315,9 @@
 				    tp->rcv_nxt, tp->snd_una - 1, 0);
 			(void) m_free(dtom(t_template));
 		}
-		callout_reset(tp->tt_keep, tcp_keepintvl, tcp_timer_keep, tp);
+		callout_reset(&tp->t_timers->tt_keep, tcp_keepintvl, tcp_timer_keep, tp);
 	} else
-		callout_reset(tp->tt_keep, tcp_keepidle, tcp_timer_keep, tp);
+		callout_reset(&tp->t_timers->tt_keep, tcp_keepidle, tcp_timer_keep, tp);
 
 #ifdef TCPDEBUG
 	if (inp->inp_socket->so_options & SO_DEBUG)
@@ -353,18 +333,17 @@
 	tp = tcp_drop(tp, ETIMEDOUT);
 
 #ifdef TCPDEBUG
-	if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
 			  PRU_SLOWTIMO);
 #endif
-	if (tp)
+	if (tp != NULL)
 		INP_UNLOCK(tp->t_inpcb);
 	INP_INFO_WUNLOCK(&tcbinfo);
 }
 
 void
-tcp_timer_persist(xtp)
-	void *xtp;
+tcp_timer_persist(void *xtp)
 {
 	struct tcpcb *tp = xtp;
 	struct inpcb *inp;
@@ -375,17 +354,26 @@
 #endif
 	INP_INFO_WLOCK(&tcbinfo);
 	inp = tp->t_inpcb;
-	if (!inp) {
+	/*
+	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
+	 * tear-down mean we need it as a work-around for races between
+	 * timers and tcp_discardcb().
+	 *
+	 * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL"));
+	 */
+	if (inp == NULL) {
+		tcp_timer_race++;
 		INP_INFO_WUNLOCK(&tcbinfo);
 		return;
 	}
 	INP_LOCK(inp);
-	if (callout_pending(tp->tt_persist) || !callout_active(tp->tt_persist)){
+	if ((inp->inp_vflag & INP_DROPPED) || callout_pending(&tp->t_timers->tt_persist)
+	    || !callout_active(&tp->t_timers->tt_persist)) {
 		INP_UNLOCK(inp);
 		INP_INFO_WUNLOCK(&tcbinfo);
 		return;
 	}
-	callout_deactivate(tp->tt_persist);
+	callout_deactivate(&tp->t_timers->tt_persist);
 	/*
 	 * Persistance timer into zero window.
 	 * Force a byte to be output, if possible.
@@ -412,18 +400,16 @@
 
 out:
 #ifdef TCPDEBUG
-	if (tp && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
-		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
-			  PRU_SLOWTIMO);
+	if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
+		tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
 #endif
-	if (tp)
+	if (tp != NULL)
 		INP_UNLOCK(inp);
 	INP_INFO_WUNLOCK(&tcbinfo);
 }
 
 void
-tcp_timer_rexmt(xtp)
-	void *xtp;
+tcp_timer_rexmt(void * xtp)
 {
 	struct tcpcb *tp = xtp;
 	int rexmt;
@@ -437,17 +423,26 @@
 	INP_INFO_WLOCK(&tcbinfo);
 	headlocked = 1;
 	inp = tp->t_inpcb;
-	if (!inp) {
+	/*
+	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
+	 * tear-down mean we need it as a work-around for races between
+	 * timers and tcp_discardcb().
+	 *
+	 * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL"));
+	 */
+	if (inp == NULL) {
+		tcp_timer_race++;
 		INP_INFO_WUNLOCK(&tcbinfo);
 		return;
 	}
 	INP_LOCK(inp);
-	if (callout_pending(tp->tt_rexmt) || !callout_active(tp->tt_rexmt)) {
+	if ((inp->inp_vflag & INP_DROPPED) || callout_pending(&tp->t_timers->tt_rexmt)
+	    || !callout_active(&tp->t_timers->tt_rexmt)) {
 		INP_UNLOCK(inp);
 		INP_INFO_WUNLOCK(&tcbinfo);
 		return;
 	}
-	callout_deactivate(tp->tt_rexmt);
+	callout_deactivate(&tp->t_timers->tt_rexmt);
 	tcp_free_sackholes(tp);
 	/*
 	 * Retransmission timer went off.  Message has not
@@ -560,12 +555,76 @@
 
 out:
 #ifdef TCPDEBUG
-	if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
 			  PRU_SLOWTIMO);
 #endif
-	if (tp)
+	if (tp != NULL)
 		INP_UNLOCK(inp);
 	if (headlocked)
 		INP_INFO_WUNLOCK(&tcbinfo);
 }
+
+void
+tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
+{
+	struct callout *t_callout;
+	void *f_callout;
+
+	switch (timer_type) {
+		case TT_DELACK:
+			t_callout = &tp->t_timers->tt_delack;
+			f_callout = tcp_timer_delack;
+			break;
+		case TT_REXMT:
+			t_callout = &tp->t_timers->tt_rexmt;
+			f_callout = tcp_timer_rexmt;
+			break;
+		case TT_PERSIST:
+			t_callout = &tp->t_timers->tt_persist;
+			f_callout = tcp_timer_persist;
+			break;
+		case TT_KEEP:
+			t_callout = &tp->t_timers->tt_keep;
+			f_callout = tcp_timer_keep;
+			break;
+		case TT_2MSL:
+			t_callout = &tp->t_timers->tt_2msl;
+			f_callout = tcp_timer_2msl;
+			break;
+		default:
+			panic("bad timer_type");
+		}
+	if (delta == 0) {
+		callout_stop(t_callout);
+	} else {
+		callout_reset(t_callout, delta, f_callout, tp);
+	}
+}
+
+int
+tcp_timer_active(struct tcpcb *tp, int timer_type)
+{
+	struct callout *t_callout;
+
+	switch (timer_type) {
+		case TT_DELACK:
+			t_callout = &tp->t_timers->tt_delack;
+			break;
+		case TT_REXMT:
+			t_callout = &tp->t_timers->tt_rexmt;
+			break;
+		case TT_PERSIST:
+			t_callout = &tp->t_timers->tt_persist;
+			break;
+		case TT_KEEP:
+			t_callout = &tp->t_timers->tt_keep;
+			break;
+		case TT_2MSL:
+			t_callout = &tp->t_timers->tt_2msl;
+			break;
+		default:
+			panic("bad timer_type");
+		}
+	return callout_active(t_callout);
+}
Index: tcp_sack.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_sack.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/tcp_sack.c -L sys/netinet/tcp_sack.c -u -r1.1.1.1 -r1.2
--- sys/netinet/tcp_sack.c
+++ sys/netinet/tcp_sack.c
@@ -1,6 +1,7 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
- *	The Regents of the University of California.  All rights reserved.
+ *	The Regents of the University of California.
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -27,37 +28,9 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp_sack.c	8.12 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_sack.c,v 1.26.2.1 2005/10/09 03:17:41 delphij Exp $
  */
 
 /*-
- * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994
- *	The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
  *	@@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
  *
  * NRL grants permission for redistribution and use in source and binary
@@ -96,11 +69,13 @@
  * official policies, either expressed or implied, of the US Naval
  * Research Laboratory (NRL).
  */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/tcp_sack.c,v 1.40 2007/05/11 11:21:43 rwatson Exp $");
+
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_tcpdebug.h"
-#include "opt_tcp_input.h"
-#include "opt_tcp_sack.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -125,9 +100,7 @@
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
-#include <netinet/ip_icmp.h>	/* for ICMP_BANDLIM		*/
 #include <netinet/in_var.h>
-#include <netinet/icmp_var.h>	/* for ICMP_BANDLIM		*/
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip6.h>
@@ -153,17 +126,17 @@
 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack, CTLFLAG_RW, 0, "TCP SACK");
 int tcp_do_sack = 1;
 SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, enable, CTLFLAG_RW,
-	&tcp_do_sack, 0, "Enable/Disable TCP SACK support");
+    &tcp_do_sack, 0, "Enable/Disable TCP SACK support");
 TUNABLE_INT("net.inet.tcp.sack.enable", &tcp_do_sack);
 
 static int tcp_sack_maxholes = 128;
 SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, maxholes, CTLFLAG_RW,
-	&tcp_sack_maxholes, 0, 
+    &tcp_sack_maxholes, 0, 
     "Maximum number of TCP SACK holes allowed per connection");
 
 static int tcp_sack_globalmaxholes = 65536;
 SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, globalmaxholes, CTLFLAG_RW,
-	&tcp_sack_globalmaxholes, 0, 
+    &tcp_sack_globalmaxholes, 0, 
     "Global maximum number of TCP SACK holes");
 
 static int tcp_sack_globalholes = 0;
@@ -172,8 +145,8 @@
     "Global number of TCP SACK holes currently allocated");
 
 /*
- * This function is called upon receipt of new valid data (while not in header
- * prediction mode), and it updates the ordered list of sacks.
+ * This function is called upon receipt of new valid data (while not in
+ * header prediction mode), and it updates the ordered list of sacks.
  */
 void
 tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end)
@@ -189,7 +162,7 @@
 
 	INP_LOCK_ASSERT(tp->t_inpcb);
 
-	/* Check arguments */
+	/* Check arguments. */
 	KASSERT(SEQ_LT(rcv_start, rcv_end), ("rcv_start < rcv_end"));
 
 	/* SACK block for the received segment. */
@@ -197,9 +170,9 @@
 	head_blk.end = rcv_end;
 
 	/*
-	 * Merge updated SACK blocks into head_blk, and
-	 * save unchanged SACK blocks into saved_blks[].
-	 * num_saved will have the number of the saved SACK blocks.
+	 * Merge updated SACK blocks into head_blk, and save unchanged SACK
+	 * blocks into saved_blks[].  num_saved will have the number of the
+	 * saved SACK blocks.
 	 */
 	num_saved = 0;
 	for (i = 0; i < tp->rcv_numsacks; i++) {
@@ -212,8 +185,8 @@
 		} else if (SEQ_LEQ(head_blk.start, end) &&
 			   SEQ_GEQ(head_blk.end, start)) {
 			/*
-			 * Merge this SACK block into head_blk.
-			 * This SACK block itself will be discarded.
+			 * Merge this SACK block into head_blk.  This SACK
+			 * block itself will be discarded.
 			 */
 			if (SEQ_GT(head_blk.start, start))
 				head_blk.start = start;
@@ -235,8 +208,8 @@
 	num_head = 0;
 	if (SEQ_GT(head_blk.start, tp->rcv_nxt)) {
 		/*
-		 * The received data segment is an out-of-order segment.
-		 * Put head_blk at the top of SACK list.
+		 * The received data segment is an out-of-order segment.  Put
+		 * head_blk at the top of SACK list.
 		 */
 		tp->sackblks[0] = head_blk;
 		num_head = 1;
@@ -263,8 +236,7 @@
  * Delete all receiver-side SACK information.
  */
 void
-tcp_clean_sackreport(tp)
-	struct tcpcb *tp;
+tcp_clean_sackreport(struct tcpcb *tp)
 {
 	int i;
 
@@ -308,6 +280,7 @@
 static void
 tcp_sackhole_free(struct tcpcb *tp, struct sackhole *hole)
 {
+
 	uma_zfree(sack_hole_zone, hole);
 
 	tp->snd_numholes--;
@@ -322,7 +295,7 @@
  */
 static struct sackhole *
 tcp_sackhole_insert(struct tcpcb *tp, tcp_seq start, tcp_seq end,
-		    struct sackhole *after)
+    struct sackhole *after)
 {
 	struct sackhole *hole;
 
@@ -331,7 +304,7 @@
 	if (hole == NULL)
 		return NULL;
 
-	/* Insert the new SACK hole into scoreboard */
+	/* Insert the new SACK hole into scoreboard. */
 	if (after != NULL)
 		TAILQ_INSERT_AFTER(&tp->snd_holes, after, hole, scblink);
 	else
@@ -350,6 +323,7 @@
 static void
 tcp_sackhole_remove(struct tcpcb *tp, struct sackhole *hole)
 {
+
 	/* Update SACK hint. */
 	if (tp->sackhint.nexthole == hole)
 		tp->sackhint.nexthole = TAILQ_NEXT(hole, scblink);
@@ -385,30 +359,35 @@
 		sack_blocks[num_sack_blks++].end = th_ack;
 	}
 	/*
-	 * Append received valid SACK blocks to sack_blocks[].
+	 * Append received valid SACK blocks to sack_blocks[], but only if we
+	 * received new blocks from the other side.
 	 */
-	for (i = 0; i < to->to_nsacks; i++) {
-		bcopy((to->to_sacks + i * TCPOLEN_SACK), &sack, sizeof(sack));
-		sack.start = ntohl(sack.start);
-		sack.end = ntohl(sack.end);
-		if (SEQ_GT(sack.end, sack.start) &&
-		    SEQ_GT(sack.start, tp->snd_una) &&
-		    SEQ_GT(sack.start, th_ack) &&
-		    SEQ_LEQ(sack.end, tp->snd_max))
-			sack_blocks[num_sack_blks++] = sack;
+	if (to->to_flags & TOF_SACK) {
+		for (i = 0; i < to->to_nsacks; i++) {
+			bcopy((to->to_sacks + i * TCPOLEN_SACK),
+			    &sack, sizeof(sack));
+			sack.start = ntohl(sack.start);
+			sack.end = ntohl(sack.end);
+			if (SEQ_GT(sack.end, sack.start) &&
+			    SEQ_GT(sack.start, tp->snd_una) &&
+			    SEQ_GT(sack.start, th_ack) &&
+			    SEQ_LT(sack.start, tp->snd_max) &&
+			    SEQ_GT(sack.end, tp->snd_una) &&
+			    SEQ_LEQ(sack.end, tp->snd_max))
+				sack_blocks[num_sack_blks++] = sack;
+		}
 	}
-
 	/*
-	 * Return if SND.UNA is not advanced and no valid SACK block
-	 * is received.
+	 * Return if SND.UNA is not advanced and no valid SACK block is
+	 * received.
 	 */
 	if (num_sack_blks == 0)
 		return;
 
 	/*
-	 * Sort the SACK blocks so we can update the scoreboard
-	 * with just one pass. The overhead of sorting upto 4+1 elements
-	 * is less than making upto 4+1 passes over the scoreboard.
+	 * Sort the SACK blocks so we can update the scoreboard with just one
+	 * pass. The overhead of sorting upto 4+1 elements is less than
+	 * making upto 4+1 passes over the scoreboard.
 	 */
 	for (i = 0; i < num_sack_blks; i++) {
 		for (j = i + 1; j < num_sack_blks; j++) {
@@ -423,15 +402,17 @@
 		/*
 		 * Empty scoreboard. Need to initialize snd_fack (it may be
 		 * uninitialized or have a bogus value). Scoreboard holes
-		 * (from the sack blocks received) are created later below (in
-		 * the logic that adds holes to the tail of the scoreboard).
+		 * (from the sack blocks received) are created later below
+		 * (in the logic that adds holes to the tail of the
+		 * scoreboard).
 		 */
 		tp->snd_fack = SEQ_MAX(tp->snd_una, th_ack);
 	/*
-	 * In the while-loop below, incoming SACK blocks (sack_blocks[])
-	 * and SACK holes (snd_holes) are traversed from their tails with
-	 * just one pass in order to reduce the number of compares especially
-	 * when the bandwidth-delay product is large.
+	 * In the while-loop below, incoming SACK blocks (sack_blocks[]) and
+	 * SACK holes (snd_holes) are traversed from their tails with just
+	 * one pass in order to reduce the number of compares especially when
+	 * the bandwidth-delay product is large.
+	 *
 	 * Note: Typically, in the first RTT of SACK recovery, the highest
 	 * three or four SACK blocks with the same ack number are received.
 	 * In the second RTT, if retransmitted data segments are not lost,
@@ -441,88 +422,104 @@
 	sblkp = &sack_blocks[num_sack_blks - 1];	/* Last SACK block */
 	if (SEQ_LT(tp->snd_fack, sblkp->start)) {
 		/*
-		 * The highest SACK block is beyond fack.
-		 * Append new SACK hole at the tail.
-		 * If the second or later highest SACK blocks are also
-		 * beyond the current fack, they will be inserted by
-		 * way of hole splitting in the while-loop below.
+		 * The highest SACK block is beyond fack.  Append new SACK
+		 * hole at the tail.  If the second or later highest SACK
+		 * blocks are also beyond the current fack, they will be
+		 * inserted by way of hole splitting in the while-loop below.
 		 */
 		temp = tcp_sackhole_insert(tp, tp->snd_fack,sblkp->start,NULL);
-		if (temp == NULL)
-			return;
-		tp->snd_fack = sblkp->end;
-		/* Go to the previous sack block. */
-		sblkp--;
+		if (temp != NULL) {
+			tp->snd_fack = sblkp->end;
+			/* Go to the previous sack block. */
+			sblkp--;
+		} else {
+			/* 
+			 * We failed to add a new hole based on the current 
+			 * sack block.  Skip over all the sack blocks that 
+			 * fall completely to the right of snd_fack and
+			 * proceed to trim the scoreboard based on the
+			 * remaining sack blocks.  This also trims the
+			 * scoreboard for th_ack (which is sack_blocks[0]).
+			 */
+			while (sblkp >= sack_blocks && 
+			       SEQ_LT(tp->snd_fack, sblkp->start))
+				sblkp--;
+			if (sblkp >= sack_blocks && 
+			    SEQ_LT(tp->snd_fack, sblkp->end))
+				tp->snd_fack = sblkp->end;
+		}
 	} else if (SEQ_LT(tp->snd_fack, sblkp->end))
 		/* fack is advanced. */
 		tp->snd_fack = sblkp->end;
-	/* We must have at least one SACK hole in scoreboard */
-	KASSERT(!TAILQ_EMPTY(&tp->snd_holes), ("SACK scoreboard must not be empty"));
-	cur = TAILQ_LAST(&tp->snd_holes, sackhole_head); /* Last SACK hole */
+	/* We must have at least one SACK hole in scoreboard. */
+	KASSERT(!TAILQ_EMPTY(&tp->snd_holes),
+	    ("SACK scoreboard must not be empty"));
+	cur = TAILQ_LAST(&tp->snd_holes, sackhole_head); /* Last SACK hole. */
 	/*
 	 * Since the incoming sack blocks are sorted, we can process them
 	 * making one sweep of the scoreboard.
 	 */
-	while (sblkp - sack_blocks >= 0 && cur != NULL) {
+	while (sblkp >= sack_blocks  && cur != NULL) {
 		if (SEQ_GEQ(sblkp->start, cur->end)) {
 			/*
-			 * SACKs data beyond the current hole.
-			 * Go to the previous sack block.
+			 * SACKs data beyond the current hole.  Go to the
+			 * previous sack block.
 			 */
 			sblkp--;
 			continue;
 		}
 		if (SEQ_LEQ(sblkp->end, cur->start)) {
 			/*
-			 * SACKs data before the current hole.
-			 * Go to the previous hole.
+			 * SACKs data before the current hole.  Go to the
+			 * previous hole.
 			 */
 			cur = TAILQ_PREV(cur, sackhole_head, scblink);
 			continue;
 		}
 		tp->sackhint.sack_bytes_rexmit -= (cur->rxmit - cur->start);
 		KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
-			("sackhint bytes rtx >= 0"));
+		    ("sackhint bytes rtx >= 0"));
 		if (SEQ_LEQ(sblkp->start, cur->start)) {
-			/* Data acks at least the beginning of hole */
+			/* Data acks at least the beginning of hole. */
 			if (SEQ_GEQ(sblkp->end, cur->end)) {
-				/* Acks entire hole, so delete hole */
+				/* Acks entire hole, so delete hole. */
 				temp = cur;
 				cur = TAILQ_PREV(cur, sackhole_head, scblink);
 				tcp_sackhole_remove(tp, temp);
 				/*
-				 * The sack block may ack all or part of the next
-				 * hole too, so continue onto the next hole.
+				 * The sack block may ack all or part of the
+				 * next hole too, so continue onto the next
+				 * hole.
 				 */
 				continue;
 			} else {
-				/* Move start of hole forward */
+				/* Move start of hole forward. */
 				cur->start = sblkp->end;
 				cur->rxmit = SEQ_MAX(cur->rxmit, cur->start);
 			}
 		} else {
-			/* Data acks at least the end of hole */
+			/* Data acks at least the end of hole. */
 			if (SEQ_GEQ(sblkp->end, cur->end)) {
-				/* Move end of hole backward */
+				/* Move end of hole backward. */
 				cur->end = sblkp->start;
 				cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
 			} else {
 				/*
-				 * ACKs some data in middle of a hole; need to
-				 * split current hole
+				 * ACKs some data in middle of a hole; need
+				 * to split current hole
 				 */
 				temp = tcp_sackhole_insert(tp, sblkp->end,
-							   cur->end, cur);
+				    cur->end, cur);
 				if (temp != NULL) {
 					if (SEQ_GT(cur->rxmit, temp->rxmit)) {
 						temp->rxmit = cur->rxmit;
 						tp->sackhint.sack_bytes_rexmit
-							+= (temp->rxmit
-							    - temp->start);
+						    += (temp->rxmit
+						    - temp->start);
 					}
 					cur->end = sblkp->start;
 					cur->rxmit = SEQ_MIN(cur->rxmit,
-							     cur->end);
+					    cur->end);
 				}
 			}
 		}
@@ -558,39 +555,35 @@
 }
 
 /*
- * Partial ack handling within a sack recovery episode. 
- * Keeping this very simple for now. When a partial ack
- * is received, force snd_cwnd to a value that will allow
- * the sender to transmit no more than 2 segments.
- * If necessary, a better scheme can be adopted at a 
- * later point, but for now, the goal is to prevent the
- * sender from bursting a large amount of data in the midst
- * of sack recovery.
+ * Partial ack handling within a sack recovery episode.  Keeping this very
+ * simple for now.  When a partial ack is received, force snd_cwnd to a value
+ * that will allow the sender to transmit no more than 2 segments.  If
+ * necessary, a better scheme can be adopted at a later point, but for now,
+ * the goal is to prevent the sender from bursting a large amount of data in
+ * the midst of sack recovery.
  */
 void
-tcp_sack_partialack(tp, th)
-	struct tcpcb *tp;
-	struct tcphdr *th;
+tcp_sack_partialack(struct tcpcb *tp, struct tcphdr *th)
 {
 	int num_segs = 1;
 
 	INP_LOCK_ASSERT(tp->t_inpcb);
-	callout_stop(tp->tt_rexmt);
+	tcp_timer_activate(tp, TT_REXMT, 0);
 	tp->t_rtttime = 0;
-	/* send one or 2 segments based on how much new data was acked */
+	/* Send one or 2 segments based on how much new data was acked. */
 	if (((th->th_ack - tp->snd_una) / tp->t_maxseg) > 2)
 		num_segs = 2;
 	tp->snd_cwnd = (tp->sackhint.sack_bytes_rexmit +
-		(tp->snd_nxt - tp->sack_newdata) +
-		num_segs * tp->t_maxseg);
+	    (tp->snd_nxt - tp->sack_newdata) + num_segs * tp->t_maxseg);
 	if (tp->snd_cwnd > tp->snd_ssthresh)
 		tp->snd_cwnd = tp->snd_ssthresh;
 	tp->t_flags |= TF_ACKNOW;
 	(void) tcp_output(tp);
 }
 
+#if 0
 /*
- * Debug version of tcp_sack_output() that walks the scoreboard. Used for
+ * Debug version of tcp_sack_output() that walks the scoreboard.  Used for
  * now to sanity check the hint.
  */
 static struct sackhole *
@@ -612,32 +605,31 @@
 	}
 	return (p);
 }
+#endif
 
 /*
  * Returns the next hole to retransmit and the number of retransmitted bytes
- * from the scoreboard. We store both the next hole and the number of
+ * from the scoreboard.  We store both the next hole and the number of
  * retransmitted bytes as hints (and recompute these on the fly upon SACK/ACK
- * reception). This avoids scoreboard traversals completely.
+ * reception).  This avoids scoreboard traversals completely.
  *
- * The loop here will traverse *at most* one link. Here's the argument.
- * For the loop to traverse more than 1 link before finding the next hole to
- * retransmit, we would need to have at least 1 node following the current hint
- * with (rxmit == end). But, for all holes following the current hint,
- * (start == rxmit), since we have not yet retransmitted from them. Therefore,
- * in order to traverse more 1 link in the loop below, we need to have at least
- * one node following the current hint with (start == rxmit == end).
- * But that can't happen, (start == end) means that all the data in that hole
- * has been sacked, in which case, the hole would have been removed from the
- * scoreboard.
+ * The loop here will traverse *at most* one link.  Here's the argument.  For
+ * the loop to traverse more than 1 link before finding the next hole to
+ * retransmit, we would need to have at least 1 node following the current
+ * hint with (rxmit == end).  But, for all holes following the current hint,
+ * (start == rxmit), since we have not yet retransmitted from them.
+ * Therefore, in order to traverse more 1 link in the loop below, we need to
+ * have at least one node following the current hint with (start == rxmit ==
+ * end).  But that can't happen, (start == end) means that all the data in
+ * that hole has been sacked, in which case, the hole would have been removed
+ * from the scoreboard.
  */
 struct sackhole *
 tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt)
 {
-	struct sackhole *hole = NULL, *dbg_hole = NULL;
-	int dbg_bytes_rexmt;
+	struct sackhole *hole = NULL;
 
 	INP_LOCK_ASSERT(tp->t_inpcb);
-	dbg_hole = tcp_sack_output_debug(tp, &dbg_bytes_rexmt);
 	*sack_bytes_rexmt = tp->sackhint.sack_bytes_rexmit;
 	hole = tp->sackhint.nexthole;
 	if (hole == NULL || SEQ_LT(hole->rxmit, hole->end))
@@ -649,16 +641,6 @@
 		}
 	}
 out:
-	if (dbg_hole != hole) {
-		printf("%s: Computed sack hole not the same as cached value\n", __func__);
-		hole = dbg_hole;
-	}
-	if (*sack_bytes_rexmt != dbg_bytes_rexmt) {
-		printf("%s: Computed sack_bytes_retransmitted (%d) not "
-		       "the same as cached value (%d)\n",
-		       __func__, dbg_bytes_rexmt, *sack_bytes_rexmt);
-		*sack_bytes_rexmt = dbg_bytes_rexmt;
-	}
 	return (hole);
 }
 
@@ -677,7 +659,7 @@
 		return; /* No holes */
 	if (SEQ_GEQ(tp->snd_nxt, tp->snd_fack))
 		return; /* We're already beyond any SACKed blocks */
-	/*
+	/*-
 	 * Two cases for which we want to advance snd_nxt:
 	 * i) snd_nxt lies between end of one hole and beginning of another
 	 * ii) snd_nxt lies between end of last hole and snd_fack
Index: ip_mroute.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_mroute.h,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/ip_mroute.h -L sys/netinet/ip_mroute.h -u -r1.1.1.2 -r1.2
--- sys/netinet/ip_mroute.h
+++ sys/netinet/ip_mroute.h
@@ -31,7 +31,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)ip_mroute.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/ip_mroute.h,v 1.24.2.1 2006/02/03 15:51:17 ru Exp $
+ * $FreeBSD: src/sys/netinet/ip_mroute.h,v 1.31 2007/02/08 23:05:08 bms Exp $
  */
 
 #ifndef _NETINET_IP_MROUTE_H_
@@ -102,8 +102,8 @@
 	struct	in_addr vifc_rmt_addr;	/* remote address (tunnels only) */
 };
 
-#define	VIFF_TUNNEL	0x1		/* vif represents a tunnel end-point */
-#define VIFF_SRCRT	0x2		/* tunnel uses IP source routing */
+#define	VIFF_TUNNEL	0x1		/* no-op; retained for old source */
+#define VIFF_SRCRT	0x2		/* no-op; retained for old source */
 #define VIFF_REGISTER	0x4		/* used for PIM Register encap/decap */
 
 /*
@@ -211,7 +211,7 @@
 struct mrtstat {
     u_long	mrts_mfc_lookups;	/* # forw. cache hash table hits   */
     u_long	mrts_mfc_misses;	/* # forw. cache hash table misses */
-    u_long	mrts_upcalls;		/* # calls to mrouted              */
+    u_long	mrts_upcalls;		/* # calls to multicast routing daemon */
     u_long	mrts_no_route;		/* no route for packet's origin    */
     u_long	mrts_bad_tunnel;	/* malformed tunnel options        */
     u_long	mrts_cant_tunnel;	/* no room for tunnel options      */
@@ -253,8 +253,8 @@
 struct vif {
     u_char		v_flags;	/* VIFF_ flags defined above         */
     u_char		v_threshold;	/* min ttl required to forward on vif*/
-    u_int		v_rate_limit;	/* max rate			     */
-    struct tbf	       *v_tbf;		/* token bucket structure at intf.   */
+    u_int		v_rate_limit;	/* ignored; kept for compatibility */
+    struct tbf         *v_tbf;		/* ignored; kept for compatibility */
     struct in_addr	v_lcl_addr;	/* local interface address           */
     struct in_addr	v_rmt_addr;	/* remote address (tunnels only)     */
     struct ifnet       *v_ifp;		/* pointer to interface              */
@@ -262,7 +262,7 @@
     u_long		v_pkt_out;	/* # pkts out on interface           */
     u_long		v_bytes_in;	/* # bytes in on interface	     */
     u_long		v_bytes_out;	/* # bytes out on interface	     */
-    struct route	v_route;	/* cached route if this is a tunnel */
+    struct route	v_route;	/* cached route */
     u_int		v_rsvp_on;	/* RSVP listening on this vif */
     struct socket      *v_rsvpd;	/* RSVP daemon socket */
 };
@@ -327,25 +327,6 @@
 #define MAX_UPQ	4		/* max. no of pkts in upcall Q */
 
 /*
- * Token Bucket filter code
- */
-#define MAX_BKT_SIZE    10000             /* 10K bytes size		*/
-#define MAXQSIZE        10                /* max # of pkts in queue	*/
-
-/*
- * the token bucket filter at each vif
- */
-struct tbf
-{
-    struct timeval tbf_last_pkt_t; /* arr. time of last pkt	*/
-    u_long tbf_n_tok;		/* no of tokens in bucket	*/
-    u_long tbf_q_len;		/* length of queue at this vif	*/
-    u_long tbf_max_q_len;	/* max. queue length		*/
-    struct mbuf *tbf_q;		/* Packet queue			*/
-    struct mbuf *tbf_t;		/* tail-insertion pointer	*/
-};
-
-/*
  * Structure for measuring the bandwidth and sending an upcall if the
  * measured bandwidth is above or below a threshold.
  */
Index: tcp_hostcache.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_hostcache.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/tcp_hostcache.c -L sys/netinet/tcp_hostcache.c -u -r1.1.1.1 -r1.2
--- sys/netinet/tcp_hostcache.c
+++ sys/netinet/tcp_hostcache.c
@@ -25,39 +25,36 @@
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/netinet/tcp_hostcache.c,v 1.10 2005/01/07 01:45:45 imp Exp $
  */
 
 /*
- * The tcp_hostcache moves the tcp specific cached metrics from the routing
- * table into a dedicated structure indexed by the remote IP address. It
- * keeps information on the measured tcp parameters of past tcp sessions
- * to have better initial start values for following connections from the
- * same source. Depending on the network parameters (delay, bandwidth, max
- * MTU, congestion window) between local and remote site this can lead to
- * significant speedups for new tcp connections after the first one.
+ * The tcp_hostcache moves the tcp-specific cached metrics from the routing
+ * table to a dedicated structure indexed by the remote IP address.  It keeps
+ * information on the measured TCP parameters of past TCP sessions to allow
+ * better initial start values to be used with later connections to/from the
+ * same source.  Depending on the network parameters (delay, bandwidth, max
+ * MTU, congestion window) between local and remote sites, this can lead to
+ * significant speed-ups for new TCP connections after the first one.
  *
- * Due to this new tcp_hostcache all tcp specific metrics information in
- * the routing table has been removed. The INPCB no longer keeps a pointer
- * to the routing entry and protocol initiated route cloning has been
- * removed as well. With these changes the routing table has gone back
- * to being more lightwight and only carries information related to packet
- * forwarding.
+ * Due to the tcp_hostcache, all TCP-specific metrics information in the
+ * routing table has been removed.  The inpcb no longer keeps a pointer to
+ * the routing entry, and protocol-initiated route cloning has been removed
+ * as well.  With these changes, the routing table has gone back to being
+ * more lightwight and only carries information related to packet forwarding.
  *
- * Tcp_hostcache is designed for multiple concurrent access in SMP
- * environments and high contention. All bucket rows have their own
- * lock and thus multiple lookups and modifies can be done at the same
- * time as long as they are in different bucket rows. If a request for
- * insertion of a new record can't be satisfied it simply returns an
- * empty structure. Nobody and nothing shall ever point directly to
- * any entry in tcp_hostcache. All communication is done in an object
- * oriented way and only funtions of tcp_hostcache will manipulate hostcache
- * entries. Otherwise we are unable to achieve good behaviour in concurrent
- * access situations. Since tcp_hostcache is only caching information there
- * are no fatal consequences if we either can't satisfy any particular request
- * or have to drop/overwrite an existing entry because of bucket limit
- * memory constrains.
+ * tcp_hostcache is designed for multiple concurrent access in SMP
+ * environments and high contention.  All bucket rows have their own lock and
+ * thus multiple lookups and modifies can be done at the same time as long as
+ * they are in different bucket rows.  If a request for insertion of a new
+ * record can't be satisfied, it simply returns an empty structure.  Nobody
+ * and nothing outside of tcp_hostcache.c will ever point directly to any
+ * entry in the tcp_hostcache.  All communication is done in an
+ * object-oriented way and only functions of tcp_hostcache will manipulate
+ * hostcache entries.  Otherwise, we are unable to achieve good behaviour in
+ * concurrent access situations.  Since tcp_hostcache is only caching
+ * information, there are no fatal consequences if we either can't satisfy
+ * any particular request or have to drop/overwrite an existing entry because
+ * of bucket limit memory constrains.
  */
 
 /*
@@ -65,6 +62,9 @@
  * followed here.
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/tcp_hostcache.c,v 1.17 2007/10/07 20:44:23 silby Exp $");
+
 #include "opt_inet6.h"
 
 #include <sys/param.h>
@@ -112,7 +112,7 @@
 	struct	hc_head *rmx_head; /* head of bucket tail queue */
 	struct	in_addr ip4;	/* IP address */
 	struct	in6_addr ip6;	/* IP6 address */
-	/* endpoint specific values for tcp */
+	/* endpoint specific values for TCP */
 	u_long	rmx_mtu;	/* MTU for this path */
 	u_long	rmx_ssthresh;	/* outbound gateway buffer limit */
 	u_long	rmx_rtt;	/* estimated round trip time */
@@ -121,7 +121,7 @@
 	u_long	rmx_cwnd;	/* congestion window */
 	u_long	rmx_sendpipe;	/* outbound delay-bandwidth product */
 	u_long	rmx_recvpipe;	/* inbound delay-bandwidth product */
-	/* tcp hostcache internal data */
+	/* TCP hostcache internal data */
 	int	rmx_expire;	/* lifetime for object */
 	u_long	rmx_hits;	/* number of hits */
 	u_long	rmx_updates;	/* number of updates */
@@ -142,6 +142,7 @@
 	u_int	cache_count;
 	u_int	cache_limit;
 	int	expire;
+	int	prune;
 	int	purgeall;
 };
 static struct tcp_hostcache tcp_hostcache;
@@ -156,26 +157,29 @@
 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, hostcache, CTLFLAG_RW, 0, "TCP Host cache");
 
 SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, cachelimit, CTLFLAG_RDTUN,
-     &tcp_hostcache.cache_limit, 0, "Overall entry limit for hostcache");
+    &tcp_hostcache.cache_limit, 0, "Overall entry limit for hostcache");
 
 SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, hashsize, CTLFLAG_RDTUN,
-     &tcp_hostcache.hashsize, 0, "Size of TCP hostcache hashtable");
+    &tcp_hostcache.hashsize, 0, "Size of TCP hostcache hashtable");
 
 SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, bucketlimit, CTLFLAG_RDTUN,
-     &tcp_hostcache.bucket_limit, 0, "Per-bucket hash limit for hostcache");
+    &tcp_hostcache.bucket_limit, 0, "Per-bucket hash limit for hostcache");
 
 SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, count, CTLFLAG_RD,
-     &tcp_hostcache.cache_count, 0, "Current number of entries in hostcache");
+    &tcp_hostcache.cache_count, 0, "Current number of entries in hostcache");
 
 SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, expire, CTLFLAG_RW,
-     &tcp_hostcache.expire, 0, "Expire time of TCP hostcache entries");
+    &tcp_hostcache.expire, 0, "Expire time of TCP hostcache entries");
+
+SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, prune, CTLFLAG_RW,
+     &tcp_hostcache.prune, 0, "Time between purge runs");
 
 SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, purge, CTLFLAG_RW,
-     &tcp_hostcache.purgeall, 0, "Expire all entires on next purge run");
+    &tcp_hostcache.purgeall, 0, "Expire all entires on next purge run");
 
 SYSCTL_PROC(_net_inet_tcp_hostcache, OID_AUTO, list,
-	CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP, 0, 0,
-	sysctl_tcp_hc_list, "A", "List of all hostcache entries");
+    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP, 0, 0,
+    sysctl_tcp_hc_list, "A", "List of all hostcache entries");
 
 
 static MALLOC_DEFINE(M_HOSTCACHE, "hostcache", "TCP hostcache");
@@ -201,7 +205,7 @@
 	int i;
 
 	/*
-	 * Initialize hostcache structures
+	 * Initialize hostcache structures.
 	 */
 	tcp_hostcache.cache_count = 0;
 	tcp_hostcache.hashsize = TCP_HOSTCACHE_HASHSIZE;
@@ -209,6 +213,7 @@
 	tcp_hostcache.cache_limit =
 	    tcp_hostcache.hashsize * tcp_hostcache.bucket_limit;
 	tcp_hostcache.expire = TCP_HOSTCACHE_EXPIRE;
+	tcp_hostcache.prune = TCP_HOSTCACHE_PRUNE;
 
 	TUNABLE_INT_FETCH("net.inet.tcp.hostcache.hashsize",
 	    &tcp_hostcache.hashsize);
@@ -218,19 +223,19 @@
 	    &tcp_hostcache.bucket_limit);
 	if (!powerof2(tcp_hostcache.hashsize)) {
 		printf("WARNING: hostcache hash size is not a power of 2.\n");
-		tcp_hostcache.hashsize = 512;	/* safe default */
+		tcp_hostcache.hashsize = TCP_HOSTCACHE_HASHSIZE; /* default */
 	}
 	tcp_hostcache.hashmask = tcp_hostcache.hashsize - 1;
 
 	/*
-	 * Allocate the hash table
+	 * Allocate the hash table.
 	 */
 	tcp_hostcache.hashbase = (struct hc_head *)
 	    malloc(tcp_hostcache.hashsize * sizeof(struct hc_head),
 		   M_HOSTCACHE, M_WAITOK | M_ZERO);
 
 	/*
-	 * Initialize the hash buckets
+	 * Initialize the hash buckets.
 	 */
 	for (i = 0; i < tcp_hostcache.hashsize; i++) {
 		TAILQ_INIT(&tcp_hostcache.hashbase[i].hch_bucket);
@@ -250,11 +255,11 @@
 	 * Set up periodic cache cleanup.
 	 */
 	callout_init(&tcp_hc_callout, CALLOUT_MPSAFE);
-	callout_reset(&tcp_hc_callout, TCP_HOSTCACHE_PRUNE * hz, tcp_hc_purge, 0);
+	callout_reset(&tcp_hc_callout, tcp_hostcache.prune * hz, tcp_hc_purge, 0);
 }
 
 /*
- * Internal function: lookup an entry in the hostcache or return NULL.
+ * Internal function: look up an entry in the hostcache or return NULL.
  *
  * If an entry has been returned, the caller becomes responsible for
  * unlocking the bucket row after he is done reading/modifying the entry.
@@ -279,14 +284,14 @@
 	hc_head = &tcp_hostcache.hashbase[hash];
 
 	/*
-	 * aquire lock for this bucket row
-	 * we release the lock if we don't find an entry,
-	 * otherwise the caller has to unlock after he is done
+	 * Acquire lock for this bucket row; we release the lock if we don't
+	 * find an entry, otherwise the caller has to unlock after he is
+	 * done.
 	 */
 	THC_LOCK(&hc_head->hch_mtx);
 
 	/*
-	 * circle through entries in bucket row looking for a match
+	 * Iterate through entries in bucket row looking for a match.
 	 */
 	TAILQ_FOREACH(hc_entry, &hc_head->hch_bucket, rmx_q) {
 		if (inc->inc_isipv6) {
@@ -301,15 +306,15 @@
 	}
 
 	/*
-	 * We were unsuccessful and didn't find anything
+	 * We were unsuccessful and didn't find anything.
 	 */
 	THC_UNLOCK(&hc_head->hch_mtx);
 	return NULL;
 }
 
 /*
- * Internal function: insert an entry into the hostcache or return NULL
- * if unable to allocate a new one.
+ * Internal function: insert an entry into the hostcache or return NULL if
+ * unable to allocate a new one.
  *
  * If an entry has been returned, the caller becomes responsible for
  * unlocking the bucket row after he is done reading/modifying the entry.
@@ -324,7 +329,7 @@
 	KASSERT(inc != NULL, ("tcp_hc_insert with NULL in_conninfo pointer"));
 
 	/*
-	 * Hash the foreign ip address
+	 * Hash the foreign ip address.
 	 */
 	if (inc->inc_isipv6)
 		hash = HOSTCACHE_HASH6(&inc->inc6_faddr);
@@ -334,25 +339,31 @@
 	hc_head = &tcp_hostcache.hashbase[hash];
 
 	/*
-	 * aquire lock for this bucket row
-	 * we release the lock if we don't find an entry,
-	 * otherwise the caller has to unlock after he is done
+	 * Acquire lock for this bucket row; we release the lock if we don't
+	 * find an entry, otherwise the caller has to unlock after he is
+	 * done.
 	 */
 	THC_LOCK(&hc_head->hch_mtx);
 
 	/*
-	 * If the bucket limit is reached reuse the least used element
+	 * If the bucket limit is reached, reuse the least-used element.
 	 */
 	if (hc_head->hch_length >= tcp_hostcache.bucket_limit ||
 	    tcp_hostcache.cache_count >= tcp_hostcache.cache_limit) {
 		hc_entry = TAILQ_LAST(&hc_head->hch_bucket, hc_qhead);
 		/*
 		 * At first we were dropping the last element, just to
-		 * reaquire it in the next two lines again which ain't
-		 * very efficient. Instead just reuse the least used element.
-		 * Maybe we drop something that is still "in-use" but we can
-		 * be "lossy".
+		 * reacquire it in the next two lines again, which isn't very
+		 * efficient.  Instead just reuse the least used element.
+		 * We may drop something that is still "in-use" but we can be
+		 * "lossy".
+		 * Just give up if this bucket row is empty and we don't have
+		 * anything to replace.
 		 */
+		if (hc_entry == NULL) {
+			THC_UNLOCK(&hc_head->hch_mtx);
+			return NULL;
+		}
 		TAILQ_REMOVE(&hc_head->hch_bucket, hc_entry, rmx_q);
 		tcp_hostcache.hashbase[hash].hch_length--;
 		tcp_hostcache.cache_count--;
@@ -362,7 +373,7 @@
 #endif
 	} else {
 		/*
-		 * Allocate a new entry, or balk if not possible
+		 * Allocate a new entry, or balk if not possible.
 		 */
 		hc_entry = uma_zalloc(tcp_hostcache.zone, M_NOWAIT);
 		if (hc_entry == NULL) {
@@ -372,7 +383,7 @@
 	}
 
 	/*
-	 * Initialize basic information of hostcache entry
+	 * Initialize basic information of hostcache entry.
 	 */
 	bzero(hc_entry, sizeof(*hc_entry));
 	if (inc->inc_isipv6)
@@ -383,7 +394,7 @@
 	hc_entry->rmx_expire = tcp_hostcache.expire;
 
 	/*
-	 * Put it upfront
+	 * Put it upfront.
 	 */
 	TAILQ_INSERT_HEAD(&hc_head->hch_bucket, hc_entry, rmx_q);
 	tcp_hostcache.hashbase[hash].hch_length++;
@@ -394,9 +405,9 @@
 }
 
 /*
- * External function: lookup an entry in the hostcache and fill out the
- * supplied tcp metrics structure.  Fills in null when no entry was found
- * or a value is not set.
+ * External function: look up an entry in the hostcache and fill out the
+ * supplied TCP metrics structure.  Fills in NULL when no entry was found or
+ * a value is not set.
  */
 void
 tcp_hc_get(struct in_conninfo *inc, struct hc_metrics_lite *hc_metrics_lite)
@@ -404,12 +415,12 @@
 	struct hc_metrics *hc_entry;
 
 	/*
-	 * Find the right bucket
+	 * Find the right bucket.
 	 */
 	hc_entry = tcp_hc_lookup(inc);
 
 	/*
-	 * If we don't have an existing object
+	 * If we don't have an existing object.
 	 */
 	if (hc_entry == NULL) {
 		bzero(hc_metrics_lite, sizeof(*hc_metrics_lite));
@@ -428,14 +439,14 @@
 	hc_metrics_lite->rmx_recvpipe = hc_entry->rmx_recvpipe;
 
 	/*
-	 * unlock bucket row
+	 * Unlock bucket row.
 	 */
 	THC_UNLOCK(&hc_entry->rmx_head->hch_mtx);
 }
 
 /*
- * External function: lookup an entry in the hostcache and return the
- * discovered path mtu.  Returns null if no entry is found or value is not
+ * External function: look up an entry in the hostcache and return the
+ * discovered path MTU.  Returns NULL if no entry is found or value is not
  * set.
  */
 u_long
@@ -457,7 +468,7 @@
 }
 
 /*
- * External function: update the mtu value of an entry in the hostcache.
+ * External function: update the MTU value of an entry in the hostcache.
  * Creates a new entry if none was found.
  */
 void
@@ -466,12 +477,12 @@
 	struct hc_metrics *hc_entry;
 
 	/*
-	 * Find the right bucket
+	 * Find the right bucket.
 	 */
 	hc_entry = tcp_hc_lookup(inc);
 
 	/*
-	 * If we don't have an existing object try to insert a new one
+	 * If we don't have an existing object, try to insert a new one.
 	 */
 	if (hc_entry == NULL) {
 		hc_entry = tcp_hc_insert(inc);
@@ -484,19 +495,19 @@
 	hc_entry->rmx_mtu = mtu;
 
 	/*
-	 * put it upfront so we find it faster next time
+	 * Put it upfront so we find it faster next time.
 	 */
 	TAILQ_REMOVE(&hc_entry->rmx_head->hch_bucket, hc_entry, rmx_q);
 	TAILQ_INSERT_HEAD(&hc_entry->rmx_head->hch_bucket, hc_entry, rmx_q);
 
 	/*
-	 * unlock bucket row
+	 * Unlock bucket row.
 	 */
 	THC_UNLOCK(&hc_entry->rmx_head->hch_mtx);
 }
 
 /*
- * External function: update the tcp metrics of an entry in the hostcache.
+ * External function: update the TCP metrics of an entry in the hostcache.
  * Creates a new entry if none was found.
  */
 void
@@ -587,6 +598,9 @@
 	char *p, *buf;
 	int len, i, error;
 	struct hc_metrics *hc_entry;
+#ifdef INET6
+	char ip6buf[INET6_ADDRSTRLEN];
+#endif
 
 	bufsize = linesize * (tcp_hostcache.cache_count + 1);
 
@@ -607,7 +621,7 @@
 			    "%4lu %4lu %4i\n",
 			    hc_entry->ip4.s_addr ? inet_ntoa(hc_entry->ip4) :
 #ifdef INET6
-				ip6_sprintf(&hc_entry->ip6),
+				ip6_sprintf(ip6buf, &hc_entry->ip6),
 #else
 				"IPv6?",
 #endif
@@ -635,8 +649,8 @@
 }
 
 /*
- * Expire and purge (old|all) entries in the tcp_hostcache.  Runs periodically
- * from the callout.
+ * Expire and purge (old|all) entries in the tcp_hostcache.  Runs
+ * periodically from the callout.
  */
 static void
 tcp_hc_purge(void *arg)
@@ -661,9 +675,9 @@
 				tcp_hostcache.hashbase[i].hch_length--;
 				tcp_hostcache.cache_count--;
 			} else
-				hc_entry->rmx_expire -= TCP_HOSTCACHE_PRUNE;
+				hc_entry->rmx_expire -= tcp_hostcache.prune;
 		}
 		THC_UNLOCK(&tcp_hostcache.hashbase[i].hch_mtx);
 	}
-	callout_reset(&tcp_hc_callout, TCP_HOSTCACHE_PRUNE * hz, tcp_hc_purge, 0);
+	callout_reset(&tcp_hc_callout, tcp_hostcache.prune * hz, tcp_hc_purge, 0);
 }
Index: in_proto.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/in_proto.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/in_proto.c -L sys/netinet/in_proto.c -u -r1.1.1.2 -r1.2
--- sys/netinet/in_proto.c
+++ sys/netinet/in_proto.c
@@ -27,15 +27,18 @@
  * SUCH DAMAGE.
  *
  *	@(#)in_proto.c	8.2 (Berkeley) 2/9/95
- * $FreeBSD: src/sys/netinet/in_proto.c,v 1.77.2.3 2006/01/03 08:15:32 thompsa Exp $
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/in_proto.c,v 1.87 2007/10/07 20:44:22 silby Exp $");
+
 #include "opt_ipx.h"
 #include "opt_mrouting.h"
 #include "opt_ipsec.h"
 #include "opt_inet6.h"
 #include "opt_pf.h"
 #include "opt_carp.h"
+#include "opt_sctp.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -55,9 +58,6 @@
 #include <netinet/ip_var.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/igmp_var.h>
-#ifdef PIM
-#include <netinet/pim_var.h>
-#endif
 #include <netinet/tcp.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
@@ -69,22 +69,18 @@
  * TCP/IP protocol family: IP, ICMP, UDP, TCP.
  */
 
-#ifdef IPSEC
-#include <netinet6/ipsec.h>
-#include <netinet6/ah.h>
-#ifdef IPSEC_ESP
-#include <netinet6/esp.h>
-#endif
-#include <netinet6/ipcomp.h>
-#endif /* IPSEC */
+static struct pr_usrreqs nousrreqs;
 
-#ifdef FAST_IPSEC
+#ifdef IPSEC
 #include <netipsec/ipsec.h>
-#endif /* FAST_IPSEC */
+#endif /* IPSEC */
 
-#ifdef IPXIP
-#include <netipx/ipx_ip.h>
-#endif
+#ifdef SCTP
+#include <netinet/in_pcb.h>
+#include <netinet/sctp_pcb.h>
+#include <netinet/sctp.h>
+#include <netinet/sctp_var.h>
+#endif /* SCTP */
 
 #ifdef DEV_PFSYNC
 #include <net/pfvar.h>
@@ -139,6 +135,43 @@
 	.pr_drain =		tcp_drain,
 	.pr_usrreqs =		&tcp_usrreqs
 },
+#ifdef SCTP
+{ 
+	.pr_type = 	SOCK_DGRAM,
+	.pr_domain =  	&inetdomain,
+        .pr_protocol = 	IPPROTO_SCTP,
+        .pr_flags = 	PR_WANTRCVD,
+        .pr_input = 	sctp_input,
+        .pr_ctlinput =  sctp_ctlinput,	
+        .pr_ctloutput = sctp_ctloutput,
+        .pr_init = 	sctp_init,	
+        .pr_drain = 	sctp_drain,
+        .pr_usrreqs = 	&sctp_usrreqs
+},
+{
+	.pr_type = 	SOCK_SEQPACKET,
+	.pr_domain =  	&inetdomain,
+        .pr_protocol = 	IPPROTO_SCTP,
+        .pr_flags = 	PR_WANTRCVD,
+        .pr_input = 	sctp_input,
+        .pr_ctlinput =  sctp_ctlinput,	
+        .pr_ctloutput = sctp_ctloutput,
+        .pr_drain = 	sctp_drain,
+        .pr_usrreqs = 	&sctp_usrreqs
+},
+
+{ 
+	.pr_type = 	SOCK_STREAM,
+	.pr_domain =  	&inetdomain,
+        .pr_protocol = 	IPPROTO_SCTP,
+        .pr_flags = 	PR_WANTRCVD,
+        .pr_input = 	sctp_input,
+        .pr_ctlinput =  sctp_ctlinput,	
+        .pr_ctloutput = sctp_ctloutput,
+        .pr_drain = 	sctp_drain,
+        .pr_usrreqs = 	&sctp_usrreqs
+},
+#endif /* SCTP */
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inetdomain,
@@ -186,34 +219,6 @@
 	.pr_protocol =		IPPROTO_AH,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		ah4_input,
-	.pr_usrreqs =		&nousrreqs
-},
-#ifdef IPSEC_ESP
-{
-	.pr_type =		SOCK_RAW,
-	.pr_domain =		&inetdomain,
-	.pr_protocol =		IPPROTO_ESP,
-	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		esp4_input,
-	.pr_usrreqs =		&nousrreqs
-},
-#endif
-{
-	.pr_type =		SOCK_RAW,
-	.pr_domain =		&inetdomain,
-	.pr_protocol =		IPPROTO_IPCOMP,
-	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		ipcomp4_input,
-	.pr_usrreqs =		&nousrreqs
-},
-#endif /* IPSEC */
-#ifdef FAST_IPSEC
-{
-	.pr_type =		SOCK_RAW,
-	.pr_domain =		&inetdomain,
-	.pr_protocol =		IPPROTO_AH,
-	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		ah4_input,
 	.pr_ctlinput =		ah4_ctlinput,
 	.pr_usrreqs =		&nousrreqs
 },
@@ -234,7 +239,7 @@
 	.pr_input =		ipcomp4_input,
 	.pr_usrreqs =		&nousrreqs
 },
-#endif /* FAST_IPSEC */
+#endif /* IPSEC */
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inetdomain,
@@ -287,28 +292,15 @@
 	.pr_usrreqs =		&rip_usrreqs
 },
 #endif
-#ifdef IPXIP
-{
-	.pr_type =		SOCK_RAW,
-	.pr_domain =		&inetdomain,
-	.pr_protocol =		IPPROTO_IDP,
-	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
-	.pr_input =		ipxip_input,
-	.pr_ctlinput =		ipxip_ctlinput,
-	.pr_usrreqs =		&rip_usrreqs
-},
-#endif
-#ifdef PIM
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_PIM,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
-	.pr_input =		pim_input,
+	.pr_input =		encap4_input,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_usrreqs =		&rip_usrreqs
 },
-#endif	/* PIM */
 #ifdef DEV_PFSYNC
 {
 	.pr_type =		SOCK_RAW,
@@ -374,23 +366,19 @@
 SYSCTL_NODE(_net_inet, IPPROTO_ICMP,	icmp,	CTLFLAG_RW, 0,	"ICMP");
 SYSCTL_NODE(_net_inet, IPPROTO_UDP,	udp,	CTLFLAG_RW, 0,	"UDP");
 SYSCTL_NODE(_net_inet, IPPROTO_TCP,	tcp,	CTLFLAG_RW, 0,	"TCP");
+#ifdef SCTP
+SYSCTL_NODE(_net_inet, IPPROTO_SCTP,	sctp,	CTLFLAG_RW, 0,	"SCTP");
+#endif
 SYSCTL_NODE(_net_inet, IPPROTO_IGMP,	igmp,	CTLFLAG_RW, 0,	"IGMP");
-#ifdef FAST_IPSEC
+#ifdef IPSEC
 /* XXX no protocol # to use, pick something "reserved" */
 SYSCTL_NODE(_net_inet, 253,		ipsec,	CTLFLAG_RW, 0,	"IPSEC");
 SYSCTL_NODE(_net_inet, IPPROTO_AH,	ah,	CTLFLAG_RW, 0,	"AH");
 SYSCTL_NODE(_net_inet, IPPROTO_ESP,	esp,	CTLFLAG_RW, 0,	"ESP");
 SYSCTL_NODE(_net_inet, IPPROTO_IPCOMP,	ipcomp,	CTLFLAG_RW, 0,	"IPCOMP");
 SYSCTL_NODE(_net_inet, IPPROTO_IPIP,	ipip,	CTLFLAG_RW, 0,	"IPIP");
-#else
-#ifdef IPSEC
-SYSCTL_NODE(_net_inet, IPPROTO_AH,	ipsec,	CTLFLAG_RW, 0,	"IPSEC");
 #endif /* IPSEC */
-#endif /* !FAST_IPSEC */
 SYSCTL_NODE(_net_inet, IPPROTO_RAW,	raw,	CTLFLAG_RW, 0,	"RAW");
-#ifdef PIM
-SYSCTL_NODE(_net_inet, IPPROTO_PIM,	pim,	CTLFLAG_RW, 0,	"PIM");
-#endif
 #ifdef DEV_PFSYNC
 SYSCTL_NODE(_net_inet, IPPROTO_PFSYNC,	pfsync,	CTLFLAG_RW, 0,	"PFSYNC");
 #endif
--- /dev/null
+++ sys/netinet/sctp_cc_functions.c
@@ -0,0 +1,1631 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <netinet/sctp_os.h>
+#include <netinet/sctp_var.h>
+#include <netinet/sctp_sysctl.h>
+#include <netinet/sctp_pcb.h>
+#include <netinet/sctp_header.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_output.h>
+#include <netinet/sctp_input.h>
+#include <netinet/sctp_indata.h>
+#include <netinet/sctp_uio.h>
+#include <netinet/sctp_timer.h>
+#include <netinet/sctp_auth.h>
+#include <netinet/sctp_asconf.h>
+#include <netinet/sctp_cc_functions.h>
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_cc_functions.c,v 1.3 2007/09/08 11:35:10 rrs Exp $");
+void
+sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	/*
+	 * We take the max of the burst limit times a MTU or the
+	 * INITIAL_CWND. We then limit this to 4 MTU's of sending.
+	 */
+	net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND));
+	/* we always get at LEAST 2 MTU's */
+	if (net->cwnd < (2 * net->mtu)) {
+		net->cwnd = 2 * net->mtu;
+	}
+	net->ssthresh = stcb->asoc.peers_rwnd;
+
+	if (sctp_logging_level & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
+		sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION);
+	}
+}
+
+void
+sctp_cwnd_update_after_fr(struct sctp_tcb *stcb,
+    struct sctp_association *asoc)
+{
+	struct sctp_nets *net;
+
+	/*-
+	 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) &&
+	 * (net->fast_retran_loss_recovery == 0)))
+	 */
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		if ((asoc->fast_retran_loss_recovery == 0) || (sctp_cmt_on_off == 1)) {
+			/* out of a RFC2582 Fast recovery window? */
+			if (net->net_ack > 0) {
+				/*
+				 * per section 7.2.3, are there any
+				 * destinations that had a fast retransmit
+				 * to them. If so what we need to do is
+				 * adjust ssthresh and cwnd.
+				 */
+				struct sctp_tmit_chunk *lchk;
+				int old_cwnd = net->cwnd;
+
+				net->ssthresh = net->cwnd / 2;
+				if (net->ssthresh < (net->mtu * 2)) {
+					net->ssthresh = 2 * net->mtu;
+				}
+				net->cwnd = net->ssthresh;
+				if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+					sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
+					    SCTP_CWND_LOG_FROM_FR);
+				}
+				lchk = TAILQ_FIRST(&asoc->send_queue);
+
+				net->partial_bytes_acked = 0;
+				/* Turn on fast recovery window */
+				asoc->fast_retran_loss_recovery = 1;
+				if (lchk == NULL) {
+					/* Mark end of the window */
+					asoc->fast_recovery_tsn = asoc->sending_seq - 1;
+				} else {
+					asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
+				}
+
+				/*
+				 * CMT fast recovery -- per destination
+				 * recovery variable.
+				 */
+				net->fast_retran_loss_recovery = 1;
+
+				if (lchk == NULL) {
+					/* Mark end of the window */
+					net->fast_recovery_tsn = asoc->sending_seq - 1;
+				} else {
+					net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
+				}
+
+				/*
+				 * Disable Nonce Sum Checking and store the
+				 * resync tsn
+				 */
+				asoc->nonce_sum_check = 0;
+				asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1;
+
+				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
+				    stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
+				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
+				    stcb->sctp_ep, stcb, net);
+			}
+		} else if (net->net_ack > 0) {
+			/*
+			 * Mark a peg that we WOULD have done a cwnd
+			 * reduction but RFC2582 prevented this action.
+			 */
+			SCTP_STAT_INCR(sctps_fastretransinrtt);
+		}
+	}
+}
+
+void
+sctp_cwnd_update_after_sack(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    int accum_moved, int reneged_all, int will_exit)
+{
+	struct sctp_nets *net;
+
+	/******************************/
+	/* update cwnd and Early FR   */
+	/******************************/
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+
+#ifdef JANA_CMT_FAST_RECOVERY
+		/*
+		 * CMT fast recovery code. Need to debug.
+		 */
+		if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
+			if (compare_with_wrap(asoc->last_acked_seq,
+			    net->fast_recovery_tsn, MAX_TSN) ||
+			    (asoc->last_acked_seq == net->fast_recovery_tsn) ||
+			    compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) ||
+			    (net->pseudo_cumack == net->fast_recovery_tsn)) {
+				net->will_exit_fast_recovery = 1;
+			}
+		}
+#endif
+		if (sctp_early_fr) {
+			/*
+			 * So, first of all do we need to have a Early FR
+			 * timer running?
+			 */
+			if (((TAILQ_FIRST(&asoc->sent_queue)) &&
+			    (net->ref_count > 1) &&
+			    (net->flight_size < net->cwnd)) ||
+			    (reneged_all)) {
+				/*
+				 * yes, so in this case stop it if its
+				 * running, and then restart it. Reneging
+				 * all is a special case where we want to
+				 * run the Early FR timer and then force the
+				 * last few unacked to be sent, causing us
+				 * to illicit a sack with gaps to force out
+				 * the others.
+				 */
+				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+					SCTP_STAT_INCR(sctps_earlyfrstpidsck2);
+					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_20);
+				}
+				SCTP_STAT_INCR(sctps_earlyfrstrid);
+				sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net);
+			} else {
+				/* No, stop it if its running */
+				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+					SCTP_STAT_INCR(sctps_earlyfrstpidsck3);
+					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_21);
+				}
+			}
+		}
+		/* if nothing was acked on this destination skip it */
+		if (net->net_ack == 0) {
+			if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+				sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
+			}
+			continue;
+		}
+		if (net->net_ack2 > 0) {
+			/*
+			 * Karn's rule applies to clearing error count, this
+			 * is optional.
+			 */
+			net->error_count = 0;
+			if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) ==
+			    SCTP_ADDR_NOT_REACHABLE) {
+				/* addr came good */
+				net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE;
+				net->dest_state |= SCTP_ADDR_REACHABLE;
+				sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
+				    SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED);
+				/* now was it the primary? if so restore */
+				if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) {
+					(void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net);
+				}
+			}
+			/*
+			 * JRS 5/14/07 - If CMT PF is on and the destination
+			 * is in PF state, set the destination to active
+			 * state and set the cwnd to one or two MTU's based
+			 * on whether PF1 or PF2 is being used.
+			 * 
+			 * Should we stop any running T3 timer here?
+			 */
+			if (sctp_cmt_on_off && sctp_cmt_pf && ((net->dest_state & SCTP_ADDR_PF) ==
+			    SCTP_ADDR_PF)) {
+				net->dest_state &= ~SCTP_ADDR_PF;
+				net->cwnd = net->mtu * sctp_cmt_pf;
+				SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n",
+				    net, net->cwnd);
+				/*
+				 * Since the cwnd value is explicitly set,
+				 * skip the code that updates the cwnd
+				 * value.
+				 */
+				goto skip_cwnd_update;
+			}
+		}
+#ifdef JANA_CMT_FAST_RECOVERY
+		/*
+		 * CMT fast recovery code
+		 */
+		/*
+		 * if (sctp_cmt_on_off == 1 &&
+		 * net->fast_retran_loss_recovery &&
+		 * net->will_exit_fast_recovery == 0) { @@@ Do something }
+		 * else if (sctp_cmt_on_off == 0 &&
+		 * asoc->fast_retran_loss_recovery && will_exit == 0) {
+		 */
+#endif
+
+		if (asoc->fast_retran_loss_recovery && will_exit == 0 && sctp_cmt_on_off == 0) {
+			/*
+			 * If we are in loss recovery we skip any cwnd
+			 * update
+			 */
+			goto skip_cwnd_update;
+		}
+		/*
+		 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
+		 * moved.
+		 */
+		if (accum_moved || (sctp_cmt_on_off && net->new_pseudo_cumack)) {
+			/* If the cumulative ack moved we can proceed */
+			if (net->cwnd <= net->ssthresh) {
+				/* We are in slow start */
+				if (net->flight_size + net->net_ack >=
+				    net->cwnd) {
+					if (net->net_ack > (net->mtu * sctp_L2_abc_variable)) {
+						net->cwnd += (net->mtu * sctp_L2_abc_variable);
+						if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+							sctp_log_cwnd(stcb, net, net->mtu,
+							    SCTP_CWND_LOG_FROM_SS);
+						}
+					} else {
+						net->cwnd += net->net_ack;
+						if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+							sctp_log_cwnd(stcb, net, net->net_ack,
+							    SCTP_CWND_LOG_FROM_SS);
+						}
+					}
+				} else {
+					unsigned int dif;
+
+					dif = net->cwnd - (net->flight_size +
+					    net->net_ack);
+					if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+						sctp_log_cwnd(stcb, net, net->net_ack,
+						    SCTP_CWND_LOG_NOADV_SS);
+					}
+				}
+			} else {
+				/* We are in congestion avoidance */
+				if (net->flight_size + net->net_ack >=
+				    net->cwnd) {
+					/*
+					 * add to pba only if we had a
+					 * cwnd's worth (or so) in flight OR
+					 * the burst limit was applied.
+					 */
+					net->partial_bytes_acked +=
+					    net->net_ack;
+
+					/*
+					 * Do we need to increase (if pba is
+					 * > cwnd)?
+					 */
+					if (net->partial_bytes_acked >=
+					    net->cwnd) {
+						if (net->cwnd <
+						    net->partial_bytes_acked) {
+							net->partial_bytes_acked -=
+							    net->cwnd;
+						} else {
+							net->partial_bytes_acked =
+							    0;
+						}
+						net->cwnd += net->mtu;
+						if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+							sctp_log_cwnd(stcb, net, net->mtu,
+							    SCTP_CWND_LOG_FROM_CA);
+						}
+					} else {
+						if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+							sctp_log_cwnd(stcb, net, net->net_ack,
+							    SCTP_CWND_LOG_NOADV_CA);
+						}
+					}
+				} else {
+					unsigned int dif;
+
+					if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+						sctp_log_cwnd(stcb, net, net->net_ack,
+						    SCTP_CWND_LOG_NOADV_CA);
+					}
+					dif = net->cwnd - (net->flight_size +
+					    net->net_ack);
+				}
+			}
+		} else {
+			if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+				sctp_log_cwnd(stcb, net, net->mtu,
+				    SCTP_CWND_LOG_NO_CUMACK);
+			}
+		}
+skip_cwnd_update:
+		/*
+		 * NOW, according to Karn's rule do we need to restore the
+		 * RTO timer back? Check our net_ack2. If not set then we
+		 * have a ambiguity.. i.e. all data ack'd was sent to more
+		 * than one place.
+		 */
+		if (net->net_ack2) {
+			/* restore any doubled timers */
+			net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1;
+			if (net->RTO < stcb->asoc.minrto) {
+				net->RTO = stcb->asoc.minrto;
+			}
+			if (net->RTO > stcb->asoc.maxrto) {
+				net->RTO = stcb->asoc.maxrto;
+			}
+		}
+	}
+}
+
+void
+sctp_cwnd_update_after_timeout(struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	int old_cwnd = net->cwnd;
+
+	net->ssthresh = net->cwnd >> 1;
+	if (net->ssthresh < (net->mtu << 1)) {
+		net->ssthresh = (net->mtu << 1);
+	}
+	net->cwnd = net->mtu;
+	/* floor of 1 mtu */
+	if (net->cwnd < net->mtu)
+		net->cwnd = net->mtu;
+	if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+		sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX);
+	}
+	net->partial_bytes_acked = 0;
+}
+
+struct sctp_hs_raise_drop {
+	int32_t cwnd;
+	int32_t increase;
+	int32_t drop_percent;
+};
+
+#define SCTP_HS_TABLE_SIZE 73
+
+struct sctp_hs_raise_drop sctp_cwnd_adjust[SCTP_HS_TABLE_SIZE] = {
+	{38, 1, 50},		/* 0   */
+	{118, 2, 44},		/* 1   */
+	{221, 3, 41},		/* 2   */
+	{347, 4, 38},		/* 3   */
+	{495, 5, 37},		/* 4   */
+	{663, 6, 35},		/* 5   */
+	{851, 7, 34},		/* 6   */
+	{1058, 8, 33},		/* 7   */
+	{1284, 9, 32},		/* 8   */
+	{1529, 10, 31},		/* 9   */
+	{1793, 11, 30},		/* 10  */
+	{2076, 12, 29},		/* 11  */
+	{2378, 13, 28},		/* 12  */
+	{2699, 14, 28},		/* 13  */
+	{3039, 15, 27},		/* 14  */
+	{3399, 16, 27},		/* 15  */
+	{3778, 17, 26},		/* 16  */
+	{4177, 18, 26},		/* 17  */
+	{4596, 19, 25},		/* 18  */
+	{5036, 20, 25},		/* 19  */
+	{5497, 21, 24},		/* 20  */
+	{5979, 22, 24},		/* 21  */
+	{6483, 23, 23},		/* 22  */
+	{7009, 24, 23},		/* 23  */
+	{7558, 25, 22},		/* 24  */
+	{8130, 26, 22},		/* 25  */
+	{8726, 27, 22},		/* 26  */
+	{9346, 28, 21},		/* 27  */
+	{9991, 29, 21},		/* 28  */
+	{10661, 30, 21},	/* 29  */
+	{11358, 31, 20},	/* 30  */
+	{12082, 32, 20},	/* 31  */
+	{12834, 33, 20},	/* 32  */
+	{13614, 34, 19},	/* 33  */
+	{14424, 35, 19},	/* 34  */
+	{15265, 36, 19},	/* 35  */
+	{16137, 37, 19},	/* 36  */
+	{17042, 38, 18},	/* 37  */
+	{17981, 39, 18},	/* 38  */
+	{18955, 40, 18},	/* 39  */
+	{19965, 41, 17},	/* 40  */
+	{21013, 42, 17},	/* 41  */
+	{22101, 43, 17},	/* 42  */
+	{23230, 44, 17},	/* 43  */
+	{24402, 45, 16},	/* 44  */
+	{25618, 46, 16},	/* 45  */
+	{26881, 47, 16},	/* 46  */
+	{28193, 48, 16},	/* 47  */
+	{29557, 49, 15},	/* 48  */
+	{30975, 50, 15},	/* 49  */
+	{32450, 51, 15},	/* 50  */
+	{33986, 52, 15},	/* 51  */
+	{35586, 53, 14},	/* 52  */
+	{37253, 54, 14},	/* 53  */
+	{38992, 55, 14},	/* 54  */
+	{40808, 56, 14},	/* 55  */
+	{42707, 57, 13},	/* 56  */
+	{44694, 58, 13},	/* 57  */
+	{46776, 59, 13},	/* 58  */
+	{48961, 60, 13},	/* 59  */
+	{51258, 61, 13},	/* 60  */
+	{53677, 62, 12},	/* 61  */
+	{56230, 63, 12},	/* 62  */
+	{58932, 64, 12},	/* 63  */
+	{61799, 65, 12},	/* 64  */
+	{64851, 66, 11},	/* 65  */
+	{68113, 67, 11},	/* 66  */
+	{71617, 68, 11},	/* 67  */
+	{75401, 69, 10},	/* 68  */
+	{79517, 70, 10},	/* 69  */
+	{84035, 71, 10},	/* 70  */
+	{89053, 72, 10},	/* 71  */
+	{94717, 73, 9}		/* 72  */
+};
+
+static void
+sctp_hs_cwnd_increase(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	int cur_val, i, indx, incr;
+
+	cur_val = net->cwnd >> 10;
+	indx = SCTP_HS_TABLE_SIZE - 1;
+#ifdef SCTP_DEBUG
+	printf("HS CC CAlled.\n");
+#endif
+	if (cur_val < sctp_cwnd_adjust[0].cwnd) {
+		/* normal mode */
+		if (net->net_ack > net->mtu) {
+			net->cwnd += net->mtu;
+			if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+				sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_FROM_SS);
+			}
+		} else {
+			net->cwnd += net->net_ack;
+			if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+				sctp_log_cwnd(stcb, net, net->net_ack, SCTP_CWND_LOG_FROM_SS);
+			}
+		}
+	} else {
+		for (i = net->last_hs_used; i < SCTP_HS_TABLE_SIZE; i++) {
+			if (cur_val < sctp_cwnd_adjust[i].cwnd) {
+				indx = i;
+				break;
+			}
+		}
+		net->last_hs_used = indx;
+		incr = ((sctp_cwnd_adjust[indx].increase) << 10);
+		net->cwnd += incr;
+		if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+			sctp_log_cwnd(stcb, net, incr, SCTP_CWND_LOG_FROM_SS);
+		}
+	}
+}
+
+static void
+sctp_hs_cwnd_decrease(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	int cur_val, i, indx;
+	int old_cwnd = net->cwnd;
+
+	cur_val = net->cwnd >> 10;
+	indx = net->last_hs_used;
+	if (cur_val < sctp_cwnd_adjust[0].cwnd) {
+		/* normal mode */
+		net->ssthresh = net->cwnd / 2;
+		if (net->ssthresh < (net->mtu * 2)) {
+			net->ssthresh = 2 * net->mtu;
+		}
+		net->cwnd = net->ssthresh;
+	} else {
+		/* drop by the proper amount */
+		net->ssthresh = net->cwnd - (int)((net->cwnd / 100) *
+		    sctp_cwnd_adjust[net->last_hs_used].drop_percent);
+		net->cwnd = net->ssthresh;
+		/* now where are we */
+		indx = net->last_hs_used;
+		cur_val = net->cwnd >> 10;
+		/* reset where we are in the table */
+		if (cur_val < sctp_cwnd_adjust[0].cwnd) {
+			/* feel out of hs */
+			net->last_hs_used = 0;
+		} else {
+			for (i = indx; i >= 1; i--) {
+				if (cur_val > sctp_cwnd_adjust[i - 1].cwnd) {
+					break;
+				}
+			}
+			net->last_hs_used = indx;
+		}
+	}
+	if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+		sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_FR);
+	}
+}
+
+void
+sctp_hs_cwnd_update_after_fr(struct sctp_tcb *stcb,
+    struct sctp_association *asoc)
+{
+	struct sctp_nets *net;
+
+	/*
+	 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) &&
+	 * (net->fast_retran_loss_recovery == 0)))
+	 */
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		if ((asoc->fast_retran_loss_recovery == 0) || (sctp_cmt_on_off == 1)) {
+			/* out of a RFC2582 Fast recovery window? */
+			if (net->net_ack > 0) {
+				/*
+				 * per section 7.2.3, are there any
+				 * destinations that had a fast retransmit
+				 * to them. If so what we need to do is
+				 * adjust ssthresh and cwnd.
+				 */
+				struct sctp_tmit_chunk *lchk;
+
+				sctp_hs_cwnd_decrease(stcb, net);
+
+				lchk = TAILQ_FIRST(&asoc->send_queue);
+
+				net->partial_bytes_acked = 0;
+				/* Turn on fast recovery window */
+				asoc->fast_retran_loss_recovery = 1;
+				if (lchk == NULL) {
+					/* Mark end of the window */
+					asoc->fast_recovery_tsn = asoc->sending_seq - 1;
+				} else {
+					asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
+				}
+
+				/*
+				 * CMT fast recovery -- per destination
+				 * recovery variable.
+				 */
+				net->fast_retran_loss_recovery = 1;
+
+				if (lchk == NULL) {
+					/* Mark end of the window */
+					net->fast_recovery_tsn = asoc->sending_seq - 1;
+				} else {
+					net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
+				}
+
+				/*
+				 * Disable Nonce Sum Checking and store the
+				 * resync tsn
+				 */
+				asoc->nonce_sum_check = 0;
+				asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1;
+
+				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
+				    stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
+				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
+				    stcb->sctp_ep, stcb, net);
+			}
+		} else if (net->net_ack > 0) {
+			/*
+			 * Mark a peg that we WOULD have done a cwnd
+			 * reduction but RFC2582 prevented this action.
+			 */
+			SCTP_STAT_INCR(sctps_fastretransinrtt);
+		}
+	}
+}
+
+void
+sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    int accum_moved, int reneged_all, int will_exit)
+{
+	struct sctp_nets *net;
+
+	/******************************/
+	/* update cwnd and Early FR   */
+	/******************************/
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+
+#ifdef JANA_CMT_FAST_RECOVERY
+		/*
+		 * CMT fast recovery code. Need to debug.
+		 */
+		if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
+			if (compare_with_wrap(asoc->last_acked_seq,
+			    net->fast_recovery_tsn, MAX_TSN) ||
+			    (asoc->last_acked_seq == net->fast_recovery_tsn) ||
+			    compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) ||
+			    (net->pseudo_cumack == net->fast_recovery_tsn)) {
+				net->will_exit_fast_recovery = 1;
+			}
+		}
+#endif
+		if (sctp_early_fr) {
+			/*
+			 * So, first of all do we need to have a Early FR
+			 * timer running?
+			 */
+			if (((TAILQ_FIRST(&asoc->sent_queue)) &&
+			    (net->ref_count > 1) &&
+			    (net->flight_size < net->cwnd)) ||
+			    (reneged_all)) {
+				/*
+				 * yes, so in this case stop it if its
+				 * running, and then restart it. Reneging
+				 * all is a special case where we want to
+				 * run the Early FR timer and then force the
+				 * last few unacked to be sent, causing us
+				 * to illicit a sack with gaps to force out
+				 * the others.
+				 */
+				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+					SCTP_STAT_INCR(sctps_earlyfrstpidsck2);
+					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_20);
+				}
+				SCTP_STAT_INCR(sctps_earlyfrstrid);
+				sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net);
+			} else {
+				/* No, stop it if its running */
+				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+					SCTP_STAT_INCR(sctps_earlyfrstpidsck3);
+					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_21);
+				}
+			}
+		}
+		/* if nothing was acked on this destination skip it */
+		if (net->net_ack == 0) {
+			if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+				sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
+			}
+			continue;
+		}
+		if (net->net_ack2 > 0) {
+			/*
+			 * Karn's rule applies to clearing error count, this
+			 * is optional.
+			 */
+			net->error_count = 0;
+			if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) ==
+			    SCTP_ADDR_NOT_REACHABLE) {
+				/* addr came good */
+				net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE;
+				net->dest_state |= SCTP_ADDR_REACHABLE;
+				sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
+				    SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED);
+				/* now was it the primary? if so restore */
+				if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) {
+					(void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net);
+				}
+			}
+			/*
+			 * JRS 5/14/07 - If CMT PF is on and the destination
+			 * is in PF state, set the destination to active
+			 * state and set the cwnd to one or two MTU's based
+			 * on whether PF1 or PF2 is being used.
+			 * 
+			 * Should we stop any running T3 timer here?
+			 */
+			if (sctp_cmt_on_off && sctp_cmt_pf && ((net->dest_state & SCTP_ADDR_PF) ==
+			    SCTP_ADDR_PF)) {
+				net->dest_state &= ~SCTP_ADDR_PF;
+				net->cwnd = net->mtu * sctp_cmt_pf;
+				SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n",
+				    net, net->cwnd);
+				/*
+				 * Since the cwnd value is explicitly set,
+				 * skip the code that updates the cwnd
+				 * value.
+				 */
+				goto skip_cwnd_update;
+			}
+		}
+#ifdef JANA_CMT_FAST_RECOVERY
+		/*
+		 * CMT fast recovery code
+		 */
+		/*
+		 * if (sctp_cmt_on_off == 1 &&
+		 * net->fast_retran_loss_recovery &&
+		 * net->will_exit_fast_recovery == 0) { @@@ Do something }
+		 * else if (sctp_cmt_on_off == 0 &&
+		 * asoc->fast_retran_loss_recovery && will_exit == 0) {
+		 */
+#endif
+
+		if (asoc->fast_retran_loss_recovery && will_exit == 0 && sctp_cmt_on_off == 0) {
+			/*
+			 * If we are in loss recovery we skip any cwnd
+			 * update
+			 */
+			goto skip_cwnd_update;
+		}
+		/*
+		 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
+		 * moved.
+		 */
+		if (accum_moved || (sctp_cmt_on_off && net->new_pseudo_cumack)) {
+			/* If the cumulative ack moved we can proceed */
+			if (net->cwnd <= net->ssthresh) {
+				/* We are in slow start */
+				if (net->flight_size + net->net_ack >=
+				    net->cwnd) {
+
+					sctp_hs_cwnd_increase(stcb, net);
+
+				} else {
+					unsigned int dif;
+
+					dif = net->cwnd - (net->flight_size +
+					    net->net_ack);
+					if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+						sctp_log_cwnd(stcb, net, net->net_ack,
+						    SCTP_CWND_LOG_NOADV_SS);
+					}
+				}
+			} else {
+				/* We are in congestion avoidance */
+				if (net->flight_size + net->net_ack >=
+				    net->cwnd) {
+					/*
+					 * add to pba only if we had a
+					 * cwnd's worth (or so) in flight OR
+					 * the burst limit was applied.
+					 */
+					net->partial_bytes_acked +=
+					    net->net_ack;
+
+					/*
+					 * Do we need to increase (if pba is
+					 * > cwnd)?
+					 */
+					if (net->partial_bytes_acked >=
+					    net->cwnd) {
+						if (net->cwnd <
+						    net->partial_bytes_acked) {
+							net->partial_bytes_acked -=
+							    net->cwnd;
+						} else {
+							net->partial_bytes_acked =
+							    0;
+						}
+						net->cwnd += net->mtu;
+						if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+							sctp_log_cwnd(stcb, net, net->mtu,
+							    SCTP_CWND_LOG_FROM_CA);
+						}
+					} else {
+						if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+							sctp_log_cwnd(stcb, net, net->net_ack,
+							    SCTP_CWND_LOG_NOADV_CA);
+						}
+					}
+				} else {
+					unsigned int dif;
+
+					if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+						sctp_log_cwnd(stcb, net, net->net_ack,
+						    SCTP_CWND_LOG_NOADV_CA);
+					}
+					dif = net->cwnd - (net->flight_size +
+					    net->net_ack);
+				}
+			}
+		} else {
+			if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+				sctp_log_cwnd(stcb, net, net->mtu,
+				    SCTP_CWND_LOG_NO_CUMACK);
+			}
+		}
+skip_cwnd_update:
+		/*
+		 * NOW, according to Karn's rule do we need to restore the
+		 * RTO timer back? Check our net_ack2. If not set then we
+		 * have a ambiguity.. i.e. all data ack'd was sent to more
+		 * than one place.
+		 */
+		if (net->net_ack2) {
+			/* restore any doubled timers */
+			net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1;
+			if (net->RTO < stcb->asoc.minrto) {
+				net->RTO = stcb->asoc.minrto;
+			}
+			if (net->RTO > stcb->asoc.maxrto) {
+				net->RTO = stcb->asoc.maxrto;
+			}
+		}
+	}
+}
+
+void
+sctp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	int old_cwnd;
+
+	old_cwnd = net->cwnd;
+
+	SCTP_STAT_INCR(sctps_ecnereducedcwnd);
+	net->ssthresh = net->cwnd / 2;
+	if (net->ssthresh < net->mtu) {
+		net->ssthresh = net->mtu;
+		/* here back off the timer as well, to slow us down */
+		net->RTO <<= 1;
+	}
+	net->cwnd = net->ssthresh;
+	if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+		sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
+	}
+}
+
+void
+sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb,
+    struct sctp_nets *net, struct sctp_pktdrop_chunk *cp,
+    uint32_t * bottle_bw, uint32_t * on_queue)
+{
+	uint32_t bw_avail;
+	int rtt, incr;
+	int old_cwnd = net->cwnd;
+
+	/* need real RTT for this calc */
+	rtt = ((net->lastsa >> 2) + net->lastsv) >> 1;
+	/* get bottle neck bw */
+	*bottle_bw = ntohl(cp->bottle_bw);
+	/* and whats on queue */
+	*on_queue = ntohl(cp->current_onq);
+	/*
+	 * adjust the on-queue if our flight is more it could be that the
+	 * router has not yet gotten data "in-flight" to it
+	 */
+	if (*on_queue < net->flight_size)
+		*on_queue = net->flight_size;
+	/* calculate the available space */
+	bw_avail = (*bottle_bw * rtt) / 1000;
+	if (bw_avail > *bottle_bw) {
+		/*
+		 * Cap the growth to no more than the bottle neck. This can
+		 * happen as RTT slides up due to queues. It also means if
+		 * you have more than a 1 second RTT with a empty queue you
+		 * will be limited to the bottle_bw per second no matter if
+		 * other points have 1/2 the RTT and you could get more
+		 * out...
+		 */
+		bw_avail = *bottle_bw;
+	}
+	if (*on_queue > bw_avail) {
+		/*
+		 * No room for anything else don't allow anything else to be
+		 * "added to the fire".
+		 */
+		int seg_inflight, seg_onqueue, my_portion;
+
+		net->partial_bytes_acked = 0;
+
+		/* how much are we over queue size? */
+		incr = *on_queue - bw_avail;
+		if (stcb->asoc.seen_a_sack_this_pkt) {
+			/*
+			 * undo any cwnd adjustment that the sack might have
+			 * made
+			 */
+			net->cwnd = net->prev_cwnd;
+		}
+		/* Now how much of that is mine? */
+		seg_inflight = net->flight_size / net->mtu;
+		seg_onqueue = *on_queue / net->mtu;
+		my_portion = (incr * seg_inflight) / seg_onqueue;
+
+		/* Have I made an adjustment already */
+		if (net->cwnd > net->flight_size) {
+			/*
+			 * for this flight I made an adjustment we need to
+			 * decrease the portion by a share our previous
+			 * adjustment.
+			 */
+			int diff_adj;
+
+			diff_adj = net->cwnd - net->flight_size;
+			if (diff_adj > my_portion)
+				my_portion = 0;
+			else
+				my_portion -= diff_adj;
+		}
+		/*
+		 * back down to the previous cwnd (assume we have had a sack
+		 * before this packet). minus what ever portion of the
+		 * overage is my fault.
+		 */
+		net->cwnd -= my_portion;
+
+		/* we will NOT back down more than 1 MTU */
+		if (net->cwnd <= net->mtu) {
+			net->cwnd = net->mtu;
+		}
+		/* force into CA */
+		net->ssthresh = net->cwnd - 1;
+	} else {
+		/*
+		 * Take 1/4 of the space left or max burst up .. whichever
+		 * is less.
+		 */
+		incr = min((bw_avail - *on_queue) >> 2,
+		    stcb->asoc.max_burst * net->mtu);
+		net->cwnd += incr;
+	}
+	if (net->cwnd > bw_avail) {
+		/* We can't exceed the pipe size */
+		net->cwnd = bw_avail;
+	}
+	if (net->cwnd < net->mtu) {
+		/* We always have 1 MTU */
+		net->cwnd = net->mtu;
+	}
+	if (net->cwnd - old_cwnd != 0) {
+		/* log only changes */
+		if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+			sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
+			    SCTP_CWND_LOG_FROM_SAT);
+		}
+	}
+}
+
+void
+sctp_cwnd_update_after_output(struct sctp_tcb *stcb,
+    struct sctp_nets *net, int burst_limit)
+{
+	int old_cwnd;
+
+	if (net->ssthresh < net->cwnd)
+		net->ssthresh = net->cwnd;
+	old_cwnd = net->cwnd;
+	net->cwnd = (net->flight_size + (burst_limit * net->mtu));
+
+	if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+		sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_BRST);
+	}
+}
+
+void
+sctp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	int old_cwnd;
+
+	old_cwnd = net->cwnd;
+
+	sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED);
+	/*
+	 * make a small adjustment to cwnd and force to CA.
+	 */
+	if (net->cwnd > net->mtu)
+		/* drop down one MTU after sending */
+		net->cwnd -= net->mtu;
+	if (net->cwnd < net->ssthresh)
+		/* still in SS move to CA */
+		net->ssthresh = net->cwnd - 1;
+	if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+		sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR);
+	}
+}
+
+/*
+ * H-TCP congestion control. The algorithm is detailed in:
+ * R.N.Shorten, D.J.Leith:
+ *   "H-TCP: TCP for high-speed and long-distance networks"
+ *   Proc. PFLDnet, Argonne, 2004.
+ * http://www.hamilton.ie/net/htcp3.pdf
+ */
+
+
+static int use_rtt_scaling = 1;
+static int use_bandwidth_switch = 1;
+
+static inline int
+between(uint32_t seq1, uint32_t seq2, uint32_t seq3)
+{
+	return seq3 - seq2 >= seq1 - seq2;
+}
+
+static inline uint32_t
+htcp_cong_time(struct htcp *ca)
+{
+	return sctp_get_tick_count() - ca->last_cong;
+}
+
+static inline uint32_t
+htcp_ccount(struct htcp *ca)
+{
+	return htcp_cong_time(ca) / ca->minRTT;
+}
+
+static inline void
+htcp_reset(struct htcp *ca)
+{
+	ca->undo_last_cong = ca->last_cong;
+	ca->undo_maxRTT = ca->maxRTT;
+	ca->undo_old_maxB = ca->old_maxB;
+	ca->last_cong = sctp_get_tick_count();
+}
+
+#ifdef SCTP_NOT_USED
+
+static uint32_t
+htcp_cwnd_undo(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	net->htcp_ca.last_cong = net->htcp_ca.undo_last_cong;
+	net->htcp_ca.maxRTT = net->htcp_ca.undo_maxRTT;
+	net->htcp_ca.old_maxB = net->htcp_ca.undo_old_maxB;
+	return max(net->cwnd, ((net->ssthresh / net->mtu << 7) / net->htcp_ca.beta) * net->mtu);
+}
+
+#endif
+
+static inline void
+measure_rtt(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	uint32_t srtt = net->lastsa >> 3;
+
+	/* keep track of minimum RTT seen so far, minRTT is zero at first */
+	if (net->htcp_ca.minRTT > srtt || !net->htcp_ca.minRTT)
+		net->htcp_ca.minRTT = srtt;
+
+	/* max RTT */
+	if (net->fast_retran_ip == 0 && net->ssthresh < 0xFFFF && htcp_ccount(&net->htcp_ca) > 3) {
+		if (net->htcp_ca.maxRTT < net->htcp_ca.minRTT)
+			net->htcp_ca.maxRTT = net->htcp_ca.minRTT;
+		if (net->htcp_ca.maxRTT < srtt && srtt <= net->htcp_ca.maxRTT + MSEC_TO_TICKS(20))
+			net->htcp_ca.maxRTT = srtt;
+	}
+}
+
+static void
+measure_achieved_throughput(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	uint32_t now = sctp_get_tick_count();
+
+	if (net->fast_retran_ip == 0)
+		net->htcp_ca.bytes_acked = net->net_ack;
+
+	if (!use_bandwidth_switch)
+		return;
+
+	/* achieved throughput calculations */
+	/* JRS - not 100% sure of this statement */
+	if (net->fast_retran_ip == 1) {
+		net->htcp_ca.bytecount = 0;
+		net->htcp_ca.lasttime = now;
+		return;
+	}
+	net->htcp_ca.bytecount += net->net_ack;
+
+	if (net->htcp_ca.bytecount >= net->cwnd - ((net->htcp_ca.alpha >> 7 ? : 1) * net->mtu)
+	    && now - net->htcp_ca.lasttime >= net->htcp_ca.minRTT
+	    && net->htcp_ca.minRTT > 0) {
+		uint32_t cur_Bi = net->htcp_ca.bytecount / net->mtu * hz / (now - net->htcp_ca.lasttime);
+
+		if (htcp_ccount(&net->htcp_ca) <= 3) {
+			/* just after backoff */
+			net->htcp_ca.minB = net->htcp_ca.maxB = net->htcp_ca.Bi = cur_Bi;
+		} else {
+			net->htcp_ca.Bi = (3 * net->htcp_ca.Bi + cur_Bi) / 4;
+			if (net->htcp_ca.Bi > net->htcp_ca.maxB)
+				net->htcp_ca.maxB = net->htcp_ca.Bi;
+			if (net->htcp_ca.minB > net->htcp_ca.maxB)
+				net->htcp_ca.minB = net->htcp_ca.maxB;
+		}
+		net->htcp_ca.bytecount = 0;
+		net->htcp_ca.lasttime = now;
+	}
+}
+
+static inline void
+htcp_beta_update(struct htcp *ca, uint32_t minRTT, uint32_t maxRTT)
+{
+	if (use_bandwidth_switch) {
+		uint32_t maxB = ca->maxB;
+		uint32_t old_maxB = ca->old_maxB;
+
+		ca->old_maxB = ca->maxB;
+
+		if (!between(5 * maxB, 4 * old_maxB, 6 * old_maxB)) {
+			ca->beta = BETA_MIN;
+			ca->modeswitch = 0;
+			return;
+		}
+	}
+	if (ca->modeswitch && minRTT > (uint32_t) MSEC_TO_TICKS(10) && maxRTT) {
+		ca->beta = (minRTT << 7) / maxRTT;
+		if (ca->beta < BETA_MIN)
+			ca->beta = BETA_MIN;
+		else if (ca->beta > BETA_MAX)
+			ca->beta = BETA_MAX;
+	} else {
+		ca->beta = BETA_MIN;
+		ca->modeswitch = 1;
+	}
+}
+
+static inline void
+htcp_alpha_update(struct htcp *ca)
+{
+	uint32_t minRTT = ca->minRTT;
+	uint32_t factor = 1;
+	uint32_t diff = htcp_cong_time(ca);
+
+	if (diff > (uint32_t) hz) {
+		diff -= hz;
+		factor = 1 + (10 * diff + ((diff / 2) * (diff / 2) / hz)) / hz;
+	}
+	if (use_rtt_scaling && minRTT) {
+		uint32_t scale = (hz << 3) / (10 * minRTT);
+
+		scale = min(max(scale, 1U << 2), 10U << 3);	/* clamping ratio to
+								 * interval [0.5,10]<<3 */
+		factor = (factor << 3) / scale;
+		if (!factor)
+			factor = 1;
+	}
+	ca->alpha = 2 * factor * ((1 << 7) - ca->beta);
+	if (!ca->alpha)
+		ca->alpha = ALPHA_BASE;
+}
+
+/* After we have the rtt data to calculate beta, we'd still prefer to wait one
+ * rtt before we adjust our beta to ensure we are working from a consistent
+ * data.
+ *
+ * This function should be called when we hit a congestion event since only at
+ * that point do we really have a real sense of maxRTT (the queues en route
+ * were getting just too full now).
+ */
+static void
+htcp_param_update(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	uint32_t minRTT = net->htcp_ca.minRTT;
+	uint32_t maxRTT = net->htcp_ca.maxRTT;
+
+	htcp_beta_update(&net->htcp_ca, minRTT, maxRTT);
+	htcp_alpha_update(&net->htcp_ca);
+
+	/*
+	 * add slowly fading memory for maxRTT to accommodate routing
+	 * changes etc
+	 */
+	if (minRTT > 0 && maxRTT > minRTT)
+		net->htcp_ca.maxRTT = minRTT + ((maxRTT - minRTT) * 95) / 100;
+}
+
+static uint32_t
+htcp_recalc_ssthresh(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	htcp_param_update(stcb, net);
+	return max(((net->cwnd / net->mtu * net->htcp_ca.beta) >> 7) * net->mtu, 2U * net->mtu);
+}
+
+static void
+htcp_cong_avoid(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	/*-
+	 * How to handle these functions?
+         *	if (!tcp_is_cwnd_limited(sk, in_flight)) RRS - good question.
+	 *		return;
+	 */
+	if (net->cwnd <= net->ssthresh) {
+		/* We are in slow start */
+		if (net->flight_size + net->net_ack >= net->cwnd) {
+			if (net->net_ack > (net->mtu * sctp_L2_abc_variable)) {
+				net->cwnd += (net->mtu * sctp_L2_abc_variable);
+				if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+					sctp_log_cwnd(stcb, net, net->mtu,
+					    SCTP_CWND_LOG_FROM_SS);
+				}
+			} else {
+				net->cwnd += net->net_ack;
+				if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+					sctp_log_cwnd(stcb, net, net->net_ack,
+					    SCTP_CWND_LOG_FROM_SS);
+				}
+			}
+		} else {
+			unsigned int dif;
+
+			dif = net->cwnd - (net->flight_size +
+			    net->net_ack);
+			if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+				sctp_log_cwnd(stcb, net, net->net_ack,
+				    SCTP_CWND_LOG_NOADV_SS);
+			}
+		}
+	} else {
+		measure_rtt(stcb, net);
+
+		/*
+		 * In dangerous area, increase slowly. In theory this is
+		 * net->cwnd += alpha / net->cwnd
+		 */
+		/* What is snd_cwnd_cnt?? */
+		if (((net->partial_bytes_acked / net->mtu * net->htcp_ca.alpha) >> 7) * net->mtu >= net->cwnd) {
+			/*-
+			 * Does SCTP have a cwnd clamp?
+			 * if (net->snd_cwnd < net->snd_cwnd_clamp) - Nope (RRS).
+			 */
+			net->cwnd += net->mtu;
+			net->partial_bytes_acked = 0;
+			htcp_alpha_update(&net->htcp_ca);
+			if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+				sctp_log_cwnd(stcb, net, net->mtu,
+				    SCTP_CWND_LOG_FROM_CA);
+			}
+		} else {
+			net->partial_bytes_acked += net->net_ack;
+			if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+				sctp_log_cwnd(stcb, net, net->net_ack,
+				    SCTP_CWND_LOG_NOADV_CA);
+			}
+		}
+
+		net->htcp_ca.bytes_acked = net->mtu;
+	}
+}
+
+#ifdef SCTP_NOT_USED
+/* Lower bound on congestion window. */
+static uint32_t
+htcp_min_cwnd(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	return net->ssthresh;
+}
+
+#endif
+
+static void
+htcp_init(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	memset(&net->htcp_ca, 0, sizeof(struct htcp));
+	net->htcp_ca.alpha = ALPHA_BASE;
+	net->htcp_ca.beta = BETA_MIN;
+	net->htcp_ca.bytes_acked = net->mtu;
+	net->htcp_ca.last_cong = sctp_get_tick_count();
+}
+
+void
+sctp_htcp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	/*
+	 * We take the max of the burst limit times a MTU or the
+	 * INITIAL_CWND. We then limit this to 4 MTU's of sending.
+	 */
+	net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND));
+	/* we always get at LEAST 2 MTU's */
+	if (net->cwnd < (2 * net->mtu)) {
+		net->cwnd = 2 * net->mtu;
+	}
+	net->ssthresh = stcb->asoc.peers_rwnd;
+	htcp_init(stcb, net);
+
+	if (sctp_logging_level & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
+		sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION);
+	}
+}
+
+void
+sctp_htcp_cwnd_update_after_sack(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    int accum_moved, int reneged_all, int will_exit)
+{
+	struct sctp_nets *net;
+
+	/******************************/
+	/* update cwnd and Early FR   */
+	/******************************/
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+
+#ifdef JANA_CMT_FAST_RECOVERY
+		/*
+		 * CMT fast recovery code. Need to debug.
+		 */
+		if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
+			if (compare_with_wrap(asoc->last_acked_seq,
+			    net->fast_recovery_tsn, MAX_TSN) ||
+			    (asoc->last_acked_seq == net->fast_recovery_tsn) ||
+			    compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) ||
+			    (net->pseudo_cumack == net->fast_recovery_tsn)) {
+				net->will_exit_fast_recovery = 1;
+			}
+		}
+#endif
+		if (sctp_early_fr) {
+			/*
+			 * So, first of all do we need to have a Early FR
+			 * timer running?
+			 */
+			if (((TAILQ_FIRST(&asoc->sent_queue)) &&
+			    (net->ref_count > 1) &&
+			    (net->flight_size < net->cwnd)) ||
+			    (reneged_all)) {
+				/*
+				 * yes, so in this case stop it if its
+				 * running, and then restart it. Reneging
+				 * all is a special case where we want to
+				 * run the Early FR timer and then force the
+				 * last few unacked to be sent, causing us
+				 * to illicit a sack with gaps to force out
+				 * the others.
+				 */
+				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+					SCTP_STAT_INCR(sctps_earlyfrstpidsck2);
+					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_20);
+				}
+				SCTP_STAT_INCR(sctps_earlyfrstrid);
+				sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net);
+			} else {
+				/* No, stop it if its running */
+				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+					SCTP_STAT_INCR(sctps_earlyfrstpidsck3);
+					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_21);
+				}
+			}
+		}
+		/* if nothing was acked on this destination skip it */
+		if (net->net_ack == 0) {
+			if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+				sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
+			}
+			continue;
+		}
+		if (net->net_ack2 > 0) {
+			/*
+			 * Karn's rule applies to clearing error count, this
+			 * is optional.
+			 */
+			net->error_count = 0;
+			if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) ==
+			    SCTP_ADDR_NOT_REACHABLE) {
+				/* addr came good */
+				net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE;
+				net->dest_state |= SCTP_ADDR_REACHABLE;
+				sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
+				    SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED);
+				/* now was it the primary? if so restore */
+				if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) {
+					(void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net);
+				}
+			}
+			/*
+			 * JRS 5/14/07 - If CMT PF is on and the destination
+			 * is in PF state, set the destination to active
+			 * state and set the cwnd to one or two MTU's based
+			 * on whether PF1 or PF2 is being used.
+			 * 
+			 * Should we stop any running T3 timer here?
+			 */
+			if (sctp_cmt_on_off && sctp_cmt_pf && ((net->dest_state & SCTP_ADDR_PF) ==
+			    SCTP_ADDR_PF)) {
+				net->dest_state &= ~SCTP_ADDR_PF;
+				net->cwnd = net->mtu * sctp_cmt_pf;
+				SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n",
+				    net, net->cwnd);
+				/*
+				 * Since the cwnd value is explicitly set,
+				 * skip the code that updates the cwnd
+				 * value.
+				 */
+				goto skip_cwnd_update;
+			}
+		}
+#ifdef JANA_CMT_FAST_RECOVERY
+		/*
+		 * CMT fast recovery code
+		 */
+		/*
+		 * if (sctp_cmt_on_off == 1 &&
+		 * net->fast_retran_loss_recovery &&
+		 * net->will_exit_fast_recovery == 0) { @@@ Do something }
+		 * else if (sctp_cmt_on_off == 0 &&
+		 * asoc->fast_retran_loss_recovery && will_exit == 0) {
+		 */
+#endif
+
+		if (asoc->fast_retran_loss_recovery && will_exit == 0 && sctp_cmt_on_off == 0) {
+			/*
+			 * If we are in loss recovery we skip any cwnd
+			 * update
+			 */
+			goto skip_cwnd_update;
+		}
+		/*
+		 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
+		 * moved.
+		 */
+		if (accum_moved || (sctp_cmt_on_off && net->new_pseudo_cumack)) {
+			htcp_cong_avoid(stcb, net);
+			measure_achieved_throughput(stcb, net);
+		} else {
+			if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+				sctp_log_cwnd(stcb, net, net->mtu,
+				    SCTP_CWND_LOG_NO_CUMACK);
+			}
+		}
+skip_cwnd_update:
+		/*
+		 * NOW, according to Karn's rule do we need to restore the
+		 * RTO timer back? Check our net_ack2. If not set then we
+		 * have a ambiguity.. i.e. all data ack'd was sent to more
+		 * than one place.
+		 */
+		if (net->net_ack2) {
+			/* restore any doubled timers */
+			net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1;
+			if (net->RTO < stcb->asoc.minrto) {
+				net->RTO = stcb->asoc.minrto;
+			}
+			if (net->RTO > stcb->asoc.maxrto) {
+				net->RTO = stcb->asoc.maxrto;
+			}
+		}
+	}
+}
+
+void
+sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb,
+    struct sctp_association *asoc)
+{
+	struct sctp_nets *net;
+
+	/*
+	 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) &&
+	 * (net->fast_retran_loss_recovery == 0)))
+	 */
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		if ((asoc->fast_retran_loss_recovery == 0) || (sctp_cmt_on_off == 1)) {
+			/* out of a RFC2582 Fast recovery window? */
+			if (net->net_ack > 0) {
+				/*
+				 * per section 7.2.3, are there any
+				 * destinations that had a fast retransmit
+				 * to them. If so what we need to do is
+				 * adjust ssthresh and cwnd.
+				 */
+				struct sctp_tmit_chunk *lchk;
+				int old_cwnd = net->cwnd;
+
+				/* JRS - reset as if state were changed */
+				htcp_reset(&net->htcp_ca);
+				net->ssthresh = htcp_recalc_ssthresh(stcb, net);
+				net->cwnd = net->ssthresh;
+				if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+					sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
+					    SCTP_CWND_LOG_FROM_FR);
+				}
+				lchk = TAILQ_FIRST(&asoc->send_queue);
+
+				net->partial_bytes_acked = 0;
+				/* Turn on fast recovery window */
+				asoc->fast_retran_loss_recovery = 1;
+				if (lchk == NULL) {
+					/* Mark end of the window */
+					asoc->fast_recovery_tsn = asoc->sending_seq - 1;
+				} else {
+					asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
+				}
+
+				/*
+				 * CMT fast recovery -- per destination
+				 * recovery variable.
+				 */
+				net->fast_retran_loss_recovery = 1;
+
+				if (lchk == NULL) {
+					/* Mark end of the window */
+					net->fast_recovery_tsn = asoc->sending_seq - 1;
+				} else {
+					net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
+				}
+
+				/*
+				 * Disable Nonce Sum Checking and store the
+				 * resync tsn
+				 */
+				asoc->nonce_sum_check = 0;
+				asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1;
+
+				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
+				    stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
+				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
+				    stcb->sctp_ep, stcb, net);
+			}
+		} else if (net->net_ack > 0) {
+			/*
+			 * Mark a peg that we WOULD have done a cwnd
+			 * reduction but RFC2582 prevented this action.
+			 */
+			SCTP_STAT_INCR(sctps_fastretransinrtt);
+		}
+	}
+}
+
+void
+sctp_htcp_cwnd_update_after_timeout(struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	int old_cwnd = net->cwnd;
+
+	/* JRS - reset as if the state were being changed to timeout */
+	htcp_reset(&net->htcp_ca);
+	net->ssthresh = htcp_recalc_ssthresh(stcb, net);
+	net->cwnd = net->mtu;
+	/* floor of 1 mtu */
+	if (net->cwnd < net->mtu)
+		net->cwnd = net->mtu;
+	if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+		sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX);
+	}
+	net->partial_bytes_acked = 0;
+}
+
+void
+sctp_htcp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	int old_cwnd;
+
+	old_cwnd = net->cwnd;
+
+	sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED);
+	net->htcp_ca.last_cong = sctp_get_tick_count();
+	/*
+	 * make a small adjustment to cwnd and force to CA.
+	 */
+	if (net->cwnd > net->mtu)
+		/* drop down one MTU after sending */
+		net->cwnd -= net->mtu;
+	if (net->cwnd < net->ssthresh)
+		/* still in SS move to CA */
+		net->ssthresh = net->cwnd - 1;
+	if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+		sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR);
+	}
+}
+
+void
+sctp_htcp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	int old_cwnd;
+
+	old_cwnd = net->cwnd;
+
+	/* JRS - reset hctp as if state changed */
+	htcp_reset(&net->htcp_ca);
+	SCTP_STAT_INCR(sctps_ecnereducedcwnd);
+	net->ssthresh = htcp_recalc_ssthresh(stcb, net);
+	if (net->ssthresh < net->mtu) {
+		net->ssthresh = net->mtu;
+		/* here back off the timer as well, to slow us down */
+		net->RTO <<= 1;
+	}
+	net->cwnd = net->ssthresh;
+	if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+		sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
+	}
+}
--- /dev/null
+++ sys/netinet/in_mcast.c
@@ -0,0 +1,1844 @@
+/*-
+ * Copyright (c) 2007 Bruce M. Simpson.
+ * Copyright (c) 2005 Robert N. M. Watson.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * IPv4 multicast socket, group, and socket option processing module.
+ * Until further notice, this file requires INET to compile.
+ * TODO: Make this infrastructure independent of address family.
+ * TODO: Teach netinet6 to use this code.
+ * TODO: Hook up SSM logic to IGMPv3/MLDv2.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/in_mcast.c,v 1.3.2.1 2007/11/29 20:16:42 rwatson Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+#include <netinet/igmp_var.h>
+
+#ifndef __SOCKUNION_DECLARED
+union sockunion {
+	struct sockaddr_storage	ss;
+	struct sockaddr		sa;
+	struct sockaddr_dl	sdl;
+	struct sockaddr_in	sin;
+#ifdef INET6
+	struct sockaddr_in6	sin6;
+#endif
+};
+typedef union sockunion sockunion_t;
+#define __SOCKUNION_DECLARED
+#endif /* __SOCKUNION_DECLARED */
+
+static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group");
+static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options");
+static MALLOC_DEFINE(M_IPMSOURCE, "in_msource", "IPv4 multicast source filter");
+
+/*
+ * The IPv4 multicast list (in_multihead and associated structures) are
+ * protected by the global in_multi_mtx.  See in_var.h for more details.  For
+ * now, in_multi_mtx is marked as recursible due to IGMP's calling back into
+ * ip_output() to send IGMP packets while holding the lock; this probably is
+ * not quite desirable.
+ */
+struct in_multihead in_multihead;	/* XXX BSS initialization */
+struct mtx in_multi_mtx;
+MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF | MTX_RECURSE);
+
+/*
+ * Functions with non-static linkage defined in this file should be
+ * declared in in_var.h:
+ *  imo_match_group()
+ *  imo_match_source()
+ *  in_addmulti()
+ *  in_delmulti()
+ *  in_delmulti_locked()
+ * and ip_var.h:
+ *  inp_freemoptions()
+ *  inp_getmoptions()
+ *  inp_setmoptions()
+ */
+static int	imo_grow(struct ip_moptions *);
+static int	imo_join_source(struct ip_moptions *, size_t, sockunion_t *);
+static int	imo_leave_source(struct ip_moptions *, size_t, sockunion_t *);
+static int	inp_change_source_filter(struct inpcb *, struct sockopt *);
+static struct ip_moptions *
+		inp_findmoptions(struct inpcb *);
+static int	inp_get_source_filters(struct inpcb *, struct sockopt *);
+static int	inp_join_group(struct inpcb *, struct sockopt *);
+static int	inp_leave_group(struct inpcb *, struct sockopt *);
+static int	inp_set_multicast_if(struct inpcb *, struct sockopt *);
+static int	inp_set_source_filters(struct inpcb *, struct sockopt *);
+static struct ifnet *
+		ip_multicast_if(struct in_addr *a);
+
+/*
+ * Resize the ip_moptions vector to the next power-of-two minus 1.
+ * May be called with locks held; do not sleep.
+ */
+static int
+imo_grow(struct ip_moptions *imo)
+{
+	struct in_multi		**nmships;
+	struct in_multi		**omships;
+	struct in_mfilter	 *nmfilters;
+	struct in_mfilter	 *omfilters;
+	size_t			  idx;
+	size_t			  newmax;
+	size_t			  oldmax;
+
+	nmships = NULL;
+	nmfilters = NULL;
+	omships = imo->imo_membership;
+	omfilters = imo->imo_mfilters;
+	oldmax = imo->imo_max_memberships;
+	newmax = ((oldmax + 1) * 2) - 1;
+
+	if (newmax <= IP_MAX_MEMBERSHIPS) {
+		nmships = (struct in_multi **)realloc(omships,
+		    sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT);
+		nmfilters = (struct in_mfilter *)realloc(omfilters,
+		    sizeof(struct in_mfilter) * newmax, M_IPMSOURCE, M_NOWAIT);
+		if (nmships != NULL && nmfilters != NULL) {
+			/* Initialize newly allocated source filter heads. */
+			for (idx = oldmax; idx < newmax; idx++) {
+				nmfilters[idx].imf_fmode = MCAST_EXCLUDE;
+				nmfilters[idx].imf_nsources = 0;
+				TAILQ_INIT(&nmfilters[idx].imf_sources);
+			}
+			imo->imo_max_memberships = newmax;
+			imo->imo_membership = nmships;
+			imo->imo_mfilters = nmfilters;
+		}
+	}
+
+	if (nmships == NULL || nmfilters == NULL) {
+		if (nmships != NULL)
+			free(nmships, M_IPMOPTS);
+		if (nmfilters != NULL)
+			free(nmfilters, M_IPMSOURCE);
+		return (ETOOMANYREFS);
+	}
+
+	return (0);
+}
+
+/*
+ * Add a source to a multicast filter list.
+ * Assumes the associated inpcb is locked.
+ */
+static int
+imo_join_source(struct ip_moptions *imo, size_t gidx, sockunion_t *src)
+{
+	struct in_msource	*ims, *nims;
+	struct in_mfilter	*imf;
+
+	KASSERT(src->ss.ss_family == AF_INET, ("%s: !AF_INET", __func__));
+	KASSERT(imo->imo_mfilters != NULL,
+	    ("%s: imo_mfilters vector not allocated", __func__));
+
+	imf = &imo->imo_mfilters[gidx];
+	if (imf->imf_nsources == IP_MAX_SOURCE_FILTER)
+		return (ENOBUFS);
+
+	ims = imo_match_source(imo, gidx, &src->sa);
+	if (ims != NULL)
+		return (EADDRNOTAVAIL);
+
+	/* Do not sleep with inp lock held. */
+	MALLOC(nims, struct in_msource *, sizeof(struct in_msource),
+	    M_IPMSOURCE, M_NOWAIT | M_ZERO);
+	if (nims == NULL)
+		return (ENOBUFS);
+
+	nims->ims_addr = src->ss;
+	TAILQ_INSERT_TAIL(&imf->imf_sources, nims, ims_next);
+	imf->imf_nsources++;
+
+	return (0);
+}
+
+static int
+imo_leave_source(struct ip_moptions *imo, size_t gidx, sockunion_t *src)
+{
+	struct in_msource	*ims;
+	struct in_mfilter	*imf;
+
+	KASSERT(src->ss.ss_family == AF_INET, ("%s: !AF_INET", __func__));
+	KASSERT(imo->imo_mfilters != NULL,
+	    ("%s: imo_mfilters vector not allocated", __func__));
+
+	imf = &imo->imo_mfilters[gidx];
+	if (imf->imf_nsources == IP_MAX_SOURCE_FILTER)
+		return (ENOBUFS);
+
+	ims = imo_match_source(imo, gidx, &src->sa);
+	if (ims == NULL)
+		return (EADDRNOTAVAIL);
+
+	TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
+	FREE(ims, M_IPMSOURCE);
+	imf->imf_nsources--;
+
+	return (0);
+}
+
+/*
+ * Find an IPv4 multicast group entry for this ip_moptions instance
+ * which matches the specified group, and optionally an interface.
+ * Return its index into the array, or -1 if not found.
+ */
+size_t
+imo_match_group(struct ip_moptions *imo, struct ifnet *ifp,
+    struct sockaddr *group)
+{
+	sockunion_t	 *gsa;
+	struct in_multi	**pinm;
+	int		  idx;
+	int		  nmships;
+
+	gsa = (sockunion_t *)group;
+
+	/* The imo_membership array may be lazy allocated. */
+	if (imo->imo_membership == NULL || imo->imo_num_memberships == 0)
+		return (-1);
+
+	nmships = imo->imo_num_memberships;
+	pinm = &imo->imo_membership[0];
+	for (idx = 0; idx < nmships; idx++, pinm++) {
+		if (*pinm == NULL)
+			continue;
+#if 0
+		printf("%s: trying ifp = %p, inaddr = %s ", __func__,
+		    ifp, inet_ntoa(gsa->sin.sin_addr));
+		printf("against %p, %s\n",
+		    (*pinm)->inm_ifp, inet_ntoa((*pinm)->inm_addr));
+#endif
+		if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) &&
+		    (*pinm)->inm_addr.s_addr == gsa->sin.sin_addr.s_addr) {
+			break;
+		}
+	}
+	if (idx >= nmships)
+		idx = -1;
+
+	return (idx);
+}
+
+/*
+ * Find a multicast source entry for this imo which matches
+ * the given group index for this socket, and source address.
+ */
+struct in_msource *
+imo_match_source(struct ip_moptions *imo, size_t gidx, struct sockaddr *src)
+{
+	struct in_mfilter	*imf;
+	struct in_msource	*ims, *pims;
+
+	KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__));
+	KASSERT(gidx != -1 && gidx < imo->imo_num_memberships,
+	    ("%s: invalid index %d\n", __func__, (int)gidx));
+
+	/* The imo_mfilters array may be lazy allocated. */
+	if (imo->imo_mfilters == NULL)
+		return (NULL);
+
+	pims = NULL;
+	imf = &imo->imo_mfilters[gidx];
+	TAILQ_FOREACH(ims, &imf->imf_sources, ims_next) {
+		/*
+		 * Perform bitwise comparison of two IPv4 addresses.
+		 * TODO: Do the same for IPv6.
+		 * Do not use sa_equal() for this as it is not aware of
+		 * deeper structure in sockaddr_in or sockaddr_in6.
+		 */
+		if (((struct sockaddr_in *)&ims->ims_addr)->sin_addr.s_addr ==
+		    ((struct sockaddr_in *)src)->sin_addr.s_addr) {
+			pims = ims;
+			break;
+		}
+	}
+
+	return (pims);
+}
+
+/*
+ * Join an IPv4 multicast group.
+ */
+struct in_multi *
+in_addmulti(struct in_addr *ap, struct ifnet *ifp)
+{
+	struct in_multi *inm;
+
+	inm = NULL;
+
+	IFF_LOCKGIANT(ifp);
+	IN_MULTI_LOCK();
+
+	IN_LOOKUP_MULTI(*ap, ifp, inm);
+	if (inm != NULL) {
+		/*
+		 * If we already joined this group, just bump the
+		 * refcount and return it.
+		 */
+		KASSERT(inm->inm_refcount >= 1,
+		    ("%s: bad refcount %d", __func__, inm->inm_refcount));
+		++inm->inm_refcount;
+	} else do {
+		sockunion_t		 gsa;
+		struct ifmultiaddr	*ifma;
+		struct in_multi		*ninm;
+		int			 error;
+
+		memset(&gsa, 0, sizeof(gsa));
+		gsa.sin.sin_family = AF_INET;
+		gsa.sin.sin_len = sizeof(struct sockaddr_in);
+		gsa.sin.sin_addr = *ap;
+
+		/*
+		 * Check if a link-layer group is already associated
+		 * with this network-layer group on the given ifnet.
+		 * If so, bump the refcount on the existing network-layer
+		 * group association and return it.
+		 */
+		error = if_addmulti(ifp, &gsa.sa, &ifma);
+		if (error)
+			break;
+		if (ifma->ifma_protospec != NULL) {
+			inm = (struct in_multi *)ifma->ifma_protospec;
+#ifdef INVARIANTS
+			if (inm->inm_ifma != ifma || inm->inm_ifp != ifp ||
+			    inm->inm_addr.s_addr != ap->s_addr)
+				panic("%s: ifma is inconsistent", __func__);
+#endif
+			++inm->inm_refcount;
+			break;
+		}
+
+		/*
+		 * A new membership is needed; construct it and
+		 * perform the IGMP join.
+		 */
+		ninm = malloc(sizeof(*ninm), M_IPMADDR, M_NOWAIT | M_ZERO);
+		if (ninm == NULL) {
+			if_delmulti_ifma(ifma);
+			break;
+		}
+		ninm->inm_addr = *ap;
+		ninm->inm_ifp = ifp;
+		ninm->inm_ifma = ifma;
+		ninm->inm_refcount = 1;
+		ifma->ifma_protospec = ninm;
+		LIST_INSERT_HEAD(&in_multihead, ninm, inm_link);
+
+		igmp_joingroup(ninm);
+
+		inm = ninm;
+	} while (0);
+
+	IN_MULTI_UNLOCK();
+	IFF_UNLOCKGIANT(ifp);
+
+	return (inm);
+}
+
+/*
+ * Leave an IPv4 multicast group.
+ * It is OK to call this routine if the underlying ifnet went away.
+ *
+ * XXX: To deal with the ifp going away, we cheat; the link-layer code in net
+ * will set ifma_ifp to NULL when the associated ifnet instance is detached
+ * from the system.
+ *
+ * The only reason we need to violate layers and check ifma_ifp here at all
+ * is because certain hardware drivers still require Giant to be held,
+ * and it must always be taken before other locks.
+ */
+void
+in_delmulti(struct in_multi *inm)
+{
+	struct ifnet *ifp;
+
+	KASSERT(inm != NULL, ("%s: inm is NULL", __func__));
+	KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
+	ifp = inm->inm_ifma->ifma_ifp;
+
+	if (ifp != NULL) {
+		/*
+		 * Sanity check that netinet's notion of ifp is the
+		 * same as net's.
+		 */
+		KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
+		IFF_LOCKGIANT(ifp);
+	}
+
+	IN_MULTI_LOCK();
+	in_delmulti_locked(inm);
+	IN_MULTI_UNLOCK();
+
+	if (ifp != NULL)
+		IFF_UNLOCKGIANT(ifp);
+}
+
+/*
+ * Delete a multicast address record, with locks held.
+ *
+ * It is OK to call this routine if the ifp went away.
+ * Assumes that caller holds the IN_MULTI lock, and that
+ * Giant was taken before other locks if required by the hardware.
+ */
+void
+in_delmulti_locked(struct in_multi *inm)
+{
+	struct ifmultiaddr *ifma;
+
+	IN_MULTI_LOCK_ASSERT();
+	KASSERT(inm->inm_refcount >= 1, ("%s: freeing freed inm", __func__));
+
+	if (--inm->inm_refcount == 0) {
+		igmp_leavegroup(inm);
+
+		ifma = inm->inm_ifma;
+#ifdef DIAGNOSTIC
+		if (bootverbose)
+			printf("%s: purging ifma %p\n", __func__, ifma);
+#endif
+		KASSERT(ifma->ifma_protospec == inm,
+		    ("%s: ifma_protospec != inm", __func__));
+		ifma->ifma_protospec = NULL;
+
+		LIST_REMOVE(inm, inm_link);
+		free(inm, M_IPMADDR);
+
+		if_delmulti_ifma(ifma);
+	}
+}
+
+/*
+ * Block or unblock an ASM/SSM multicast source on an inpcb.
+ */
+static int
+inp_change_source_filter(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct group_source_req		 gsr;
+	sockunion_t			*gsa, *ssa;
+	struct ifnet			*ifp;
+	struct in_mfilter		*imf;
+	struct ip_moptions		*imo;
+	struct in_msource		*ims;
+	size_t				 idx;
+	int				 error;
+	int				 block;
+
+	ifp = NULL;
+	error = 0;
+	block = 0;
+
+	memset(&gsr, 0, sizeof(struct group_source_req));
+	gsa = (sockunion_t *)&gsr.gsr_group;
+	ssa = (sockunion_t *)&gsr.gsr_source;
+
+	switch (sopt->sopt_name) {
+	case IP_BLOCK_SOURCE:
+	case IP_UNBLOCK_SOURCE: {
+		struct ip_mreq_source	 mreqs;
+
+		error = sooptcopyin(sopt, &mreqs,
+		    sizeof(struct ip_mreq_source),
+		    sizeof(struct ip_mreq_source));
+		if (error)
+			return (error);
+
+		gsa->sin.sin_family = AF_INET;
+		gsa->sin.sin_len = sizeof(struct sockaddr_in);
+		gsa->sin.sin_addr = mreqs.imr_multiaddr;
+
+		ssa->sin.sin_family = AF_INET;
+		ssa->sin.sin_len = sizeof(struct sockaddr_in);
+		ssa->sin.sin_addr = mreqs.imr_sourceaddr;
+
+		if (mreqs.imr_interface.s_addr != INADDR_ANY)
+			INADDR_TO_IFP(mreqs.imr_interface, ifp);
+
+		if (sopt->sopt_name == IP_BLOCK_SOURCE)
+			block = 1;
+
+#ifdef DIAGNOSTIC
+		if (bootverbose) {
+			printf("%s: imr_interface = %s, ifp = %p\n",
+			    __func__, inet_ntoa(mreqs.imr_interface), ifp);
+		}
+#endif
+		break;
+	    }
+
+	case MCAST_BLOCK_SOURCE:
+	case MCAST_UNBLOCK_SOURCE:
+		error = sooptcopyin(sopt, &gsr,
+		    sizeof(struct group_source_req),
+		    sizeof(struct group_source_req));
+		if (error)
+			return (error);
+
+		if (gsa->sin.sin_family != AF_INET ||
+		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
+			return (EINVAL);
+
+		if (ssa->sin.sin_family != AF_INET ||
+		    ssa->sin.sin_len != sizeof(struct sockaddr_in))
+			return (EINVAL);
+
+		if (gsr.gsr_interface == 0 || if_index < gsr.gsr_interface)
+			return (EADDRNOTAVAIL);
+
+		ifp = ifnet_byindex(gsr.gsr_interface);
+
+		if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
+			block = 1;
+		break;
+
+	default:
+#ifdef DIAGNOSTIC
+		if (bootverbose) {
+			printf("%s: unknown sopt_name %d\n", __func__,
+			    sopt->sopt_name);
+		}
+#endif
+		return (EOPNOTSUPP);
+		break;
+	}
+
+	/* XXX INET6 */
+	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
+		return (EINVAL);
+
+	/*
+	 * Check if we are actually a member of this group.
+	 */
+	imo = inp_findmoptions(inp);
+	idx = imo_match_group(imo, ifp, &gsa->sa);
+	if (idx == -1 || imo->imo_mfilters == NULL) {
+		error = EADDRNOTAVAIL;
+		goto out_locked;
+	}
+
+	KASSERT(imo->imo_mfilters != NULL,
+	    ("%s: imo_mfilters not allocated", __func__));
+	imf = &imo->imo_mfilters[idx];
+
+	/*
+	 * SSM multicast truth table for block/unblock operations.
+	 *
+	 * Operation   Filter Mode  Entry exists?   Action
+	 *
+	 * block       exclude      no              add source to filter
+	 * unblock     include      no              add source to filter
+	 * block       include      no              EINVAL
+	 * unblock     exclude      no              EINVAL
+	 * block       exclude      yes             EADDRNOTAVAIL
+	 * unblock     include      yes             EADDRNOTAVAIL
+	 * block       include      yes             remove source from filter
+	 * unblock     exclude      yes             remove source from filter
+	 *
+	 * FreeBSD does not explicitly distinguish between ASM and SSM
+	 * mode sockets; all sockets are assumed to have a filter list.
+	 */
+#ifdef DIAGNOSTIC
+	if (bootverbose) {
+		printf("%s: imf_fmode is %s\n", __func__,
+		    imf->imf_fmode == MCAST_INCLUDE ? "include" : "exclude");
+	}
+#endif
+	ims = imo_match_source(imo, idx, &ssa->sa);
+	if (ims == NULL) {
+		if ((block == 1 && imf->imf_fmode == MCAST_EXCLUDE) ||
+		    (block == 0 && imf->imf_fmode == MCAST_INCLUDE)) {
+#ifdef DIAGNOSTIC
+			if (bootverbose) {
+				printf("%s: adding %s to filter list\n",
+				    __func__, inet_ntoa(ssa->sin.sin_addr));
+			}
+#endif
+			error = imo_join_source(imo, idx, ssa);
+		}
+		if ((block == 1 && imf->imf_fmode == MCAST_INCLUDE) ||
+		    (block == 0 && imf->imf_fmode == MCAST_EXCLUDE)) {
+			/*
+			 * If the socket is in inclusive mode:
+			 *  the source is already blocked as it has no entry.
+			 * If the socket is in exclusive mode:
+			 *  the source is already unblocked as it has no entry.
+			 */
+#ifdef DIAGNOSTIC
+			if (bootverbose) {
+				printf("%s: ims %p; %s already [un]blocked\n",
+				    __func__, ims,
+				    inet_ntoa(ssa->sin.sin_addr));
+			}
+#endif
+			error = EINVAL;
+		}
+	} else {
+		if ((block == 1 && imf->imf_fmode == MCAST_EXCLUDE) ||
+		    (block == 0 && imf->imf_fmode == MCAST_INCLUDE)) {
+			/*
+			 * If the socket is in exclusive mode:
+			 *  the source is already blocked as it has an entry.
+			 * If the socket is in inclusive mode:
+			 *  the source is already unblocked as it has an entry.
+			 */
+#ifdef DIAGNOSTIC
+			if (bootverbose) {
+				printf("%s: ims %p; %s already [un]blocked\n",
+				    __func__, ims,
+				    inet_ntoa(ssa->sin.sin_addr));
+			}
+#endif
+			error = EADDRNOTAVAIL;
+		}
+		if ((block == 1 && imf->imf_fmode == MCAST_INCLUDE) ||
+		    (block == 0 && imf->imf_fmode == MCAST_EXCLUDE)) {
+#ifdef DIAGNOSTIC
+			if (bootverbose) {
+				printf("%s: removing %s from filter list\n",
+				    __func__, inet_ntoa(ssa->sin.sin_addr));
+			}
+#endif
+			error = imo_leave_source(imo, idx, ssa);
+		}
+	}
+
+out_locked:
+	INP_UNLOCK(inp);
+	return (error);
+}
+
+/*
+ * Given an inpcb, return its multicast options structure pointer.  Accepts
+ * an unlocked inpcb pointer, but will return it locked.  May sleep.
+ */
+static struct ip_moptions *
+inp_findmoptions(struct inpcb *inp)
+{
+	struct ip_moptions	 *imo;
+	struct in_multi		**immp;
+	struct in_mfilter	 *imfp;
+	size_t			  idx;
+
+	INP_LOCK(inp);
+	if (inp->inp_moptions != NULL)
+		return (inp->inp_moptions);
+
+	INP_UNLOCK(inp);
+
+	imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS,
+	    M_WAITOK);
+	immp = (struct in_multi **)malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS,
+	    M_IPMOPTS, M_WAITOK | M_ZERO);
+	imfp = (struct in_mfilter *)malloc(
+	    sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS,
+	    M_IPMSOURCE, M_WAITOK);
+
+	imo->imo_multicast_ifp = NULL;
+	imo->imo_multicast_addr.s_addr = INADDR_ANY;
+	imo->imo_multicast_vif = -1;
+	imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
+	imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
+	imo->imo_num_memberships = 0;
+	imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
+	imo->imo_membership = immp;
+
+	/* Initialize per-group source filters. */
+	for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++) {
+		imfp[idx].imf_fmode = MCAST_EXCLUDE;
+		imfp[idx].imf_nsources = 0;
+		TAILQ_INIT(&imfp[idx].imf_sources);
+	}
+	imo->imo_mfilters = imfp;
+
+	INP_LOCK(inp);
+	if (inp->inp_moptions != NULL) {
+		free(imfp, M_IPMSOURCE);
+		free(immp, M_IPMOPTS);
+		free(imo, M_IPMOPTS);
+		return (inp->inp_moptions);
+	}
+	inp->inp_moptions = imo;
+	return (imo);
+}
+
+/*
+ * Discard the IP multicast options (and source filters).
+ */
+void
+inp_freemoptions(struct ip_moptions *imo)
+{
+	struct in_mfilter	*imf;
+	struct in_msource	*ims, *tims;
+	size_t			 idx, nmships;
+
+	KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
+
+	nmships = imo->imo_num_memberships;
+	for (idx = 0; idx < nmships; ++idx) {
+		in_delmulti(imo->imo_membership[idx]);
+
+		if (imo->imo_mfilters != NULL) {
+			imf = &imo->imo_mfilters[idx];
+			TAILQ_FOREACH_SAFE(ims, &imf->imf_sources,
+			    ims_next, tims) {
+				TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
+				FREE(ims, M_IPMSOURCE);
+				imf->imf_nsources--;
+			}
+			KASSERT(imf->imf_nsources == 0,
+			    ("%s: did not free all imf_nsources", __func__));
+		}
+	}
+
+	if (imo->imo_mfilters != NULL)
+		free(imo->imo_mfilters, M_IPMSOURCE);
+	free(imo->imo_membership, M_IPMOPTS);
+	free(imo, M_IPMOPTS);
+}
+
+/*
+ * Atomically get source filters on a socket for an IPv4 multicast group.
+ * Called with INP lock held; returns with lock released.
+ */
+static int
+inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct __msfilterreq	 msfr;
+	sockunion_t		*gsa;
+	struct ifnet		*ifp;
+	struct ip_moptions	*imo;
+	struct in_mfilter	*imf;
+	struct in_msource	*ims;
+	struct sockaddr_storage	*ptss;
+	struct sockaddr_storage	*tss;
+	int			 error;
+	size_t			 idx;
+
+	INP_LOCK_ASSERT(inp);
+
+	imo = inp->inp_moptions;
+	KASSERT(imo != NULL, ("%s: null ip_moptions", __func__));
+
+	INP_UNLOCK(inp);
+
+	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
+	    sizeof(struct __msfilterreq));
+	if (error)
+		return (error);
+
+	if (msfr.msfr_ifindex == 0 || if_index < msfr.msfr_ifindex)
+		return (EINVAL);
+
+	ifp = ifnet_byindex(msfr.msfr_ifindex);
+	if (ifp == NULL)
+		return (EINVAL);
+
+	INP_LOCK(inp);
+
+	/*
+	 * Lookup group on the socket.
+	 */
+	gsa = (sockunion_t *)&msfr.msfr_group;
+	idx = imo_match_group(imo, ifp, &gsa->sa);
+	if (idx == -1 || imo->imo_mfilters == NULL) {
+		INP_UNLOCK(inp);
+		return (EADDRNOTAVAIL);
+	}
+
+	imf = &imo->imo_mfilters[idx];
+	msfr.msfr_fmode = imf->imf_fmode;
+	msfr.msfr_nsrcs = imf->imf_nsources;
+
+	/*
+	 * If the user specified a buffer, copy out the source filter
+	 * entries to userland gracefully.
+	 * msfr.msfr_nsrcs is always set to the total number of filter
+	 * entries which the kernel currently has for this group.
+	 */
+	tss = NULL;
+	if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
+		/*
+		 * Make a copy of the source vector so that we do not
+		 * thrash the inpcb lock whilst copying it out.
+		 * We only copy out the number of entries which userland
+		 * has asked for, but we always tell userland how big the
+		 * buffer really needs to be.
+		 */
+		MALLOC(tss, struct sockaddr_storage *,
+		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
+		    M_TEMP, M_NOWAIT);
+		if (tss == NULL) {
+			error = ENOBUFS;
+		} else {
+			ptss = tss;
+			TAILQ_FOREACH(ims, &imf->imf_sources, ims_next) {
+				memcpy(ptss++, &ims->ims_addr,
+				    sizeof(struct sockaddr_storage));
+			}
+		}
+	}
+
+	INP_UNLOCK(inp);
+
+	if (tss != NULL) {
+		error = copyout(tss, msfr.msfr_srcs,
+		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
+		FREE(tss, M_TEMP);
+	}
+
+	if (error)
+		return (error);
+
+	error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq));
+
+	return (error);
+}
+
+/*
+ * Return the IP multicast options in response to user getsockopt().
+ */
+int
+inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct ip_mreqn		 mreqn;
+	struct ip_moptions	*imo;
+	struct ifnet		*ifp;
+	struct in_ifaddr	*ia;
+	int			 error, optval;
+	u_char			 coptval;
+
+	INP_LOCK(inp);
+	imo = inp->inp_moptions;
+	/*
+	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
+	 * or is a divert socket, reject it.
+	 */
+	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
+	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
+	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
+		INP_UNLOCK(inp);
+		return (EOPNOTSUPP);
+	}
+
+	error = 0;
+	switch (sopt->sopt_name) {
+	case IP_MULTICAST_VIF:
+		if (imo != NULL)
+			optval = imo->imo_multicast_vif;
+		else
+			optval = -1;
+		INP_UNLOCK(inp);
+		error = sooptcopyout(sopt, &optval, sizeof(int));
+		break;
+
+	case IP_MULTICAST_IF:
+		memset(&mreqn, 0, sizeof(struct ip_mreqn));
+		if (imo != NULL) {
+			ifp = imo->imo_multicast_ifp;
+			if (imo->imo_multicast_addr.s_addr != INADDR_ANY) {
+				mreqn.imr_address = imo->imo_multicast_addr;
+			} else if (ifp != NULL) {
+				mreqn.imr_ifindex = ifp->if_index;
+				IFP_TO_IA(ifp, ia);
+				if (ia != NULL) {
+					mreqn.imr_address =
+					    IA_SIN(ia)->sin_addr;
+				}
+			}
+		}
+		INP_UNLOCK(inp);
+		if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
+			error = sooptcopyout(sopt, &mreqn,
+			    sizeof(struct ip_mreqn));
+		} else {
+			error = sooptcopyout(sopt, &mreqn.imr_address,
+			    sizeof(struct in_addr));
+		}
+		break;
+
+	case IP_MULTICAST_TTL:
+		if (imo == 0)
+			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
+		else
+			optval = coptval = imo->imo_multicast_ttl;
+		INP_UNLOCK(inp);
+		if (sopt->sopt_valsize == sizeof(u_char))
+			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
+		else
+			error = sooptcopyout(sopt, &optval, sizeof(int));
+		break;
+
+	case IP_MULTICAST_LOOP:
+		if (imo == 0)
+			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
+		else
+			optval = coptval = imo->imo_multicast_loop;
+		INP_UNLOCK(inp);
+		if (sopt->sopt_valsize == sizeof(u_char))
+			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
+		else
+			error = sooptcopyout(sopt, &optval, sizeof(int));
+		break;
+
+	case IP_MSFILTER:
+		if (imo == NULL) {
+			error = EADDRNOTAVAIL;
+			INP_UNLOCK(inp);
+		} else {
+			error = inp_get_source_filters(inp, sopt);
+		}
+		break;
+
+	default:
+		INP_UNLOCK(inp);
+		error = ENOPROTOOPT;
+		break;
+	}
+
+	INP_UNLOCK_ASSERT(inp);
+
+	return (error);
+}
+
+/*
+ * Join an IPv4 multicast group, possibly with a source.
+ */
+static int
+inp_join_group(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct group_source_req		 gsr;
+	sockunion_t			*gsa, *ssa;
+	struct ifnet			*ifp;
+	struct in_mfilter		*imf;
+	struct ip_moptions		*imo;
+	struct in_multi			*inm;
+	size_t				 idx;
+	int				 error;
+
+	ifp = NULL;
+	error = 0;
+
+	memset(&gsr, 0, sizeof(struct group_source_req));
+	gsa = (sockunion_t *)&gsr.gsr_group;
+	gsa->ss.ss_family = AF_UNSPEC;
+	ssa = (sockunion_t *)&gsr.gsr_source;
+	ssa->ss.ss_family = AF_UNSPEC;
+
+	switch (sopt->sopt_name) {
+	case IP_ADD_MEMBERSHIP:
+	case IP_ADD_SOURCE_MEMBERSHIP: {
+		struct ip_mreq_source	 mreqs;
+
+		if (sopt->sopt_name == IP_ADD_MEMBERSHIP) {
+			error = sooptcopyin(sopt, &mreqs,
+			    sizeof(struct ip_mreq),
+			    sizeof(struct ip_mreq));
+			/*
+			 * Do argument switcharoo from ip_mreq into
+			 * ip_mreq_source to avoid using two instances.
+			 */
+			mreqs.imr_interface = mreqs.imr_sourceaddr;
+			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
+		} else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
+			error = sooptcopyin(sopt, &mreqs,
+			    sizeof(struct ip_mreq_source),
+			    sizeof(struct ip_mreq_source));
+		}
+		if (error)
+			return (error);
+
+		gsa->sin.sin_family = AF_INET;
+		gsa->sin.sin_len = sizeof(struct sockaddr_in);
+		gsa->sin.sin_addr = mreqs.imr_multiaddr;
+
+		if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
+			ssa->sin.sin_family = AF_INET;
+			ssa->sin.sin_len = sizeof(struct sockaddr_in);
+			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
+		}
+
+		/*
+		 * Obtain ifp. If no interface address was provided,
+		 * use the interface of the route in the unicast FIB for
+		 * the given multicast destination; usually, this is the
+		 * default route.
+		 * If this lookup fails, attempt to use the first non-loopback
+		 * interface with multicast capability in the system as a
+		 * last resort. The legacy IPv4 ASM API requires that we do
+		 * this in order to allow groups to be joined when the routing
+		 * table has not yet been populated during boot.
+		 * If all of these conditions fail, return EADDRNOTAVAIL, and
+		 * reject the IPv4 multicast join.
+		 */
+		if (mreqs.imr_interface.s_addr != INADDR_ANY) {
+			ifp = ip_multicast_if(&mreqs.imr_interface);
+		} else {
+			struct route ro;
+
+			ro.ro_rt = NULL;
+			*(struct sockaddr_in *)&ro.ro_dst = gsa->sin;
+			rtalloc_ign(&ro, RTF_CLONING);
+			if (ro.ro_rt != NULL) {
+				ifp = ro.ro_rt->rt_ifp;
+				KASSERT(ifp != NULL, ("%s: null ifp",
+				    __func__));
+				RTFREE(ro.ro_rt);
+			} else {
+				struct in_ifaddr *ia;
+				struct ifnet *mfp = NULL;
+				TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
+					mfp = ia->ia_ifp;
+					if (!(mfp->if_flags & IFF_LOOPBACK) &&
+					     (mfp->if_flags & IFF_MULTICAST)) {
+						ifp = mfp;
+						break;
+					}
+				}
+			}
+		}
+#ifdef DIAGNOSTIC
+		if (bootverbose) {
+			printf("%s: imr_interface = %s, ifp = %p\n",
+			    __func__, inet_ntoa(mreqs.imr_interface), ifp);
+		}
+#endif
+		break;
+	}
+
+	case MCAST_JOIN_GROUP:
+	case MCAST_JOIN_SOURCE_GROUP:
+		if (sopt->sopt_name == MCAST_JOIN_GROUP) {
+			error = sooptcopyin(sopt, &gsr,
+			    sizeof(struct group_req),
+			    sizeof(struct group_req));
+		} else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
+			error = sooptcopyin(sopt, &gsr,
+			    sizeof(struct group_source_req),
+			    sizeof(struct group_source_req));
+		}
+		if (error)
+			return (error);
+
+		if (gsa->sin.sin_family != AF_INET ||
+		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
+			return (EINVAL);
+
+		/*
+		 * Overwrite the port field if present, as the sockaddr
+		 * being copied in may be matched with a binary comparison.
+		 * XXX INET6
+		 */
+		gsa->sin.sin_port = 0;
+		if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
+			if (ssa->sin.sin_family != AF_INET ||
+			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
+				return (EINVAL);
+			ssa->sin.sin_port = 0;
+		}
+
+		/*
+		 * Obtain the ifp.
+		 */
+		if (gsr.gsr_interface == 0 || if_index < gsr.gsr_interface)
+			return (EADDRNOTAVAIL);
+		ifp = ifnet_byindex(gsr.gsr_interface);
+
+		break;
+
+	default:
+#ifdef DIAGNOSTIC
+		if (bootverbose) {
+			printf("%s: unknown sopt_name %d\n", __func__,
+			    sopt->sopt_name);
+		}
+#endif
+		return (EOPNOTSUPP);
+		break;
+	}
+
+	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
+		return (EINVAL);
+
+	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
+		return (EADDRNOTAVAIL);
+
+	/*
+	 * Check if we already hold membership of this group for this inpcb.
+	 * If so, we do not need to perform the initial join.
+	 */
+	imo = inp_findmoptions(inp);
+	idx = imo_match_group(imo, ifp, &gsa->sa);
+	if (idx != -1) {
+		if (ssa->ss.ss_family != AF_UNSPEC) {
+			/*
+			 * Attempting to join an ASM group (when already
+			 * an ASM or SSM member) is an error.
+			 */
+			error = EADDRNOTAVAIL;
+		} else {
+			imf = &imo->imo_mfilters[idx];
+			if (imf->imf_nsources == 0) {
+				/*
+				 * Attempting to join an SSM group (when
+				 * already an ASM member) is an error.
+				 */
+				error = EINVAL;
+			} else {
+				/*
+				 * Attempting to join an SSM group (when
+				 * already an SSM member) means "add this
+				 * source to the inclusive filter list".
+				 */
+				error = imo_join_source(imo, idx, ssa);
+			}
+		}
+		goto out_locked;
+	}
+
+	/*
+	 * Call imo_grow() to reallocate the membership and source filter
+	 * vectors if they are full. If the size would exceed the hard limit,
+	 * then we know we've really run out of entries. We keep the INP
+	 * lock held to avoid introducing a race condition.
+	 */
+	if (imo->imo_num_memberships == imo->imo_max_memberships) {
+		error = imo_grow(imo);
+		if (error)
+			goto out_locked;
+	}
+
+	/*
+	 * So far, so good: perform the layer 3 join, layer 2 join,
+	 * and make an IGMP announcement if needed.
+	 */
+	inm = in_addmulti(&gsa->sin.sin_addr, ifp);
+	if (inm == NULL) {
+		error = ENOBUFS;
+		goto out_locked;
+	}
+	idx = imo->imo_num_memberships;
+	imo->imo_membership[idx] = inm;
+	imo->imo_num_memberships++;
+
+	KASSERT(imo->imo_mfilters != NULL,
+	    ("%s: imf_mfilters vector was not allocated", __func__));
+	imf = &imo->imo_mfilters[idx];
+	KASSERT(TAILQ_EMPTY(&imf->imf_sources),
+	    ("%s: imf_sources not empty", __func__));
+
+	/*
+	 * If this is a new SSM group join (i.e. a source was specified
+	 * with this group), add this source to the filter list.
+	 */
+	if (ssa->ss.ss_family != AF_UNSPEC) {
+		/*
+		 * An initial SSM join implies that this socket's membership
+		 * of the multicast group is now in inclusive mode.
+		 */
+		imf->imf_fmode = MCAST_INCLUDE;
+
+		error = imo_join_source(imo, idx, ssa);
+		if (error) {
+			/*
+			 * Drop inp lock before calling in_delmulti(),
+			 * to prevent a lock order reversal.
+			 */
+			--imo->imo_num_memberships;
+			INP_UNLOCK(inp);
+			in_delmulti(inm);
+			return (error);
+		}
+	}
+
+out_locked:
+	INP_UNLOCK(inp);
+	return (error);
+}
+
+/*
+ * Leave an IPv4 multicast group on an inpcb, possibly with a source.
+ */
+static int
+inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct group_source_req		 gsr;
+	struct ip_mreq_source		 mreqs;
+	sockunion_t			*gsa, *ssa;
+	struct ifnet			*ifp;
+	struct in_mfilter		*imf;
+	struct ip_moptions		*imo;
+	struct in_msource		*ims, *tims;
+	struct in_multi			*inm;
+	size_t				 idx;
+	int				 error;
+
+	ifp = NULL;
+	error = 0;
+
+	memset(&gsr, 0, sizeof(struct group_source_req));
+	gsa = (sockunion_t *)&gsr.gsr_group;
+	gsa->ss.ss_family = AF_UNSPEC;
+	ssa = (sockunion_t *)&gsr.gsr_source;
+	ssa->ss.ss_family = AF_UNSPEC;
+
+	switch (sopt->sopt_name) {
+	case IP_DROP_MEMBERSHIP:
+	case IP_DROP_SOURCE_MEMBERSHIP:
+		if (sopt->sopt_name == IP_DROP_MEMBERSHIP) {
+			error = sooptcopyin(sopt, &mreqs,
+			    sizeof(struct ip_mreq),
+			    sizeof(struct ip_mreq));
+			/*
+			 * Swap interface and sourceaddr arguments,
+			 * as ip_mreq and ip_mreq_source are laid
+			 * out differently.
+			 */
+			mreqs.imr_interface = mreqs.imr_sourceaddr;
+			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
+		} else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
+			error = sooptcopyin(sopt, &mreqs,
+			    sizeof(struct ip_mreq_source),
+			    sizeof(struct ip_mreq_source));
+		}
+		if (error)
+			return (error);
+
+		gsa->sin.sin_family = AF_INET;
+		gsa->sin.sin_len = sizeof(struct sockaddr_in);
+		gsa->sin.sin_addr = mreqs.imr_multiaddr;
+
+		if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
+			ssa->sin.sin_family = AF_INET;
+			ssa->sin.sin_len = sizeof(struct sockaddr_in);
+			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
+		}
+
+		if (gsa->sin.sin_addr.s_addr != INADDR_ANY)
+			INADDR_TO_IFP(mreqs.imr_interface, ifp);
+
+#ifdef DIAGNOSTIC
+		if (bootverbose) {
+			printf("%s: imr_interface = %s, ifp = %p\n",
+			    __func__, inet_ntoa(mreqs.imr_interface), ifp);
+		}
+#endif
+		break;
+
+	case MCAST_LEAVE_GROUP:
+	case MCAST_LEAVE_SOURCE_GROUP:
+		if (sopt->sopt_name == MCAST_LEAVE_GROUP) {
+			error = sooptcopyin(sopt, &gsr,
+			    sizeof(struct group_req),
+			    sizeof(struct group_req));
+		} else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
+			error = sooptcopyin(sopt, &gsr,
+			    sizeof(struct group_source_req),
+			    sizeof(struct group_source_req));
+		}
+		if (error)
+			return (error);
+
+		if (gsa->sin.sin_family != AF_INET ||
+		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
+			return (EINVAL);
+
+		if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
+			if (ssa->sin.sin_family != AF_INET ||
+			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
+				return (EINVAL);
+		}
+
+		if (gsr.gsr_interface == 0 || if_index < gsr.gsr_interface)
+			return (EADDRNOTAVAIL);
+
+		ifp = ifnet_byindex(gsr.gsr_interface);
+		break;
+
+	default:
+#ifdef DIAGNOSTIC
+		if (bootverbose) {
+			printf("%s: unknown sopt_name %d\n", __func__,
+			    sopt->sopt_name);
+		}
+#endif
+		return (EOPNOTSUPP);
+		break;
+	}
+
+	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
+		return (EINVAL);
+
+	/*
+	 * Find the membership in the membership array.
+	 */
+	imo = inp_findmoptions(inp);
+	idx = imo_match_group(imo, ifp, &gsa->sa);
+	if (idx == -1) {
+		error = EADDRNOTAVAIL;
+		goto out_locked;
+	}
+	imf = &imo->imo_mfilters[idx];
+
+	/*
+	 * If we were instructed only to leave a given source, do so.
+	 */
+	if (ssa->ss.ss_family != AF_UNSPEC) {
+		if (imf->imf_nsources == 0 ||
+		    imf->imf_fmode == MCAST_EXCLUDE) {
+			/*
+			 * Attempting to SSM leave an ASM group
+			 * is an error; should use *_BLOCK_SOURCE instead.
+			 * Attempting to SSM leave a source in a group when
+			 * the socket is in 'exclude mode' is also an error.
+			 */
+			error = EINVAL;
+		} else {
+			error = imo_leave_source(imo, idx, ssa);
+		}
+		/*
+		 * If an error occurred, or this source is not the last
+		 * source in the group, do not leave the whole group.
+		 */
+		if (error || imf->imf_nsources > 0)
+			goto out_locked;
+	}
+
+	/*
+	 * Give up the multicast address record to which the membership points.
+	 */
+	inm = imo->imo_membership[idx];
+	in_delmulti(inm);
+
+	/*
+	 * Free any source filters for this group if they exist.
+	 * Revert inpcb to the default MCAST_EXCLUDE state.
+	 */
+	if (imo->imo_mfilters != NULL) {
+		TAILQ_FOREACH_SAFE(ims, &imf->imf_sources, ims_next, tims) {
+			TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
+			FREE(ims, M_IPMSOURCE);
+			imf->imf_nsources--;
+		}
+		KASSERT(imf->imf_nsources == 0,
+		    ("%s: imf_nsources not 0", __func__));
+		KASSERT(TAILQ_EMPTY(&imf->imf_sources),
+		    ("%s: imf_sources not empty", __func__));
+		imf->imf_fmode = MCAST_EXCLUDE;
+	}
+
+	/*
+	 * Remove the gap in the membership array.
+	 */
+	for (++idx; idx < imo->imo_num_memberships; ++idx)
+		imo->imo_membership[idx-1] = imo->imo_membership[idx];
+	imo->imo_num_memberships--;
+
+out_locked:
+	INP_UNLOCK(inp);
+	return (error);
+}
+
+/*
+ * Select the interface for transmitting IPv4 multicast datagrams.
+ *
+ * Either an instance of struct in_addr or an instance of struct ip_mreqn
+ * may be passed to this socket option. An address of INADDR_ANY or an
+ * interface index of 0 is used to remove a previous selection.
+ * When no interface is selected, one is chosen for every send.
+ */
+static int
+inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct in_addr		 addr;
+	struct ip_mreqn		 mreqn;
+	struct ifnet		*ifp;
+	struct ip_moptions	*imo;
+	int			 error;
+
+	if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
+		/*
+		 * An interface index was specified using the
+		 * Linux-derived ip_mreqn structure.
+		 */
+		error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn),
+		    sizeof(struct ip_mreqn));
+		if (error)
+			return (error);
+
+		if (mreqn.imr_ifindex < 0 || if_index < mreqn.imr_ifindex)
+			return (EINVAL);
+
+		if (mreqn.imr_ifindex == 0) {
+			ifp = NULL;
+		} else {
+			ifp = ifnet_byindex(mreqn.imr_ifindex);
+			if (ifp == NULL)
+				return (EADDRNOTAVAIL);
+		}
+	} else {
+		/*
+		 * An interface was specified by IPv4 address.
+		 * This is the traditional BSD usage.
+		 */
+		error = sooptcopyin(sopt, &addr, sizeof(struct in_addr),
+		    sizeof(struct in_addr));
+		if (error)
+			return (error);
+		if (addr.s_addr == INADDR_ANY) {
+			ifp = NULL;
+		} else {
+			ifp = ip_multicast_if(&addr);
+			if (ifp == NULL)
+				return (EADDRNOTAVAIL);
+		}
+#ifdef DIAGNOSTIC
+		if (bootverbose) {
+			printf("%s: ifp = %p, addr = %s\n",
+			    __func__, ifp, inet_ntoa(addr)); /* XXX INET6 */
+		}
+#endif
+	}
+
+	/* Reject interfaces which do not support multicast. */
+	if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0)
+		return (EOPNOTSUPP);
+
+	imo = inp_findmoptions(inp);
+	imo->imo_multicast_ifp = ifp;
+	imo->imo_multicast_addr.s_addr = INADDR_ANY;
+	INP_UNLOCK(inp);
+
+	return (0);
+}
+
+/*
+ * Atomically set source filters on a socket for an IPv4 multicast group.
+ */
+static int
+inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct __msfilterreq	 msfr;
+	sockunion_t		*gsa;
+	struct ifnet		*ifp;
+	struct in_mfilter	*imf;
+	struct ip_moptions	*imo;
+	struct in_msource	*ims, *tims;
+	size_t			 idx;
+	int			 error;
+
+	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
+	    sizeof(struct __msfilterreq));
+	if (error)
+		return (error);
+
+	if (msfr.msfr_nsrcs > IP_MAX_SOURCE_FILTER ||
+	    (msfr.msfr_fmode != MCAST_EXCLUDE &&
+	     msfr.msfr_fmode != MCAST_INCLUDE))
+		return (EINVAL);
+
+	if (msfr.msfr_group.ss_family != AF_INET ||
+	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in))
+		return (EINVAL);
+
+	gsa = (sockunion_t *)&msfr.msfr_group;
+	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
+		return (EINVAL);
+
+	gsa->sin.sin_port = 0;	/* ignore port */
+
+	if (msfr.msfr_ifindex == 0 || if_index < msfr.msfr_ifindex)
+		return (EADDRNOTAVAIL);
+
+	ifp = ifnet_byindex(msfr.msfr_ifindex);
+	if (ifp == NULL)
+		return (EADDRNOTAVAIL);
+
+	/*
+	 * Take the INP lock.
+	 * Check if this socket is a member of this group.
+	 */
+	imo = inp_findmoptions(inp);
+	idx = imo_match_group(imo, ifp, &gsa->sa);
+	if (idx == -1 || imo->imo_mfilters == NULL) {
+		error = EADDRNOTAVAIL;
+		goto out_locked;
+	}
+	imf = &imo->imo_mfilters[idx];
+
+#ifdef DIAGNOSTIC
+	if (bootverbose)
+		printf("%s: clearing source list\n", __func__);
+#endif
+
+	/*
+	 * Remove any existing source filters.
+	 */
+	TAILQ_FOREACH_SAFE(ims, &imf->imf_sources, ims_next, tims) {
+		TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
+		FREE(ims, M_IPMSOURCE);
+		imf->imf_nsources--;
+	}
+	KASSERT(imf->imf_nsources == 0,
+	    ("%s: source list not cleared", __func__));
+
+	/*
+	 * Apply any new source filters, if present.
+	 */
+	if (msfr.msfr_nsrcs > 0) {
+		struct in_msource	**pnims;
+		struct in_msource	*nims;
+		struct sockaddr_storage	*kss;
+		struct sockaddr_storage	*pkss;
+		sockunion_t		*psu;
+		int			 i, j;
+
+		/*
+		 * Drop the inp lock so we may sleep if we need to
+		 * in order to satisfy a malloc request.
+		 * We will re-take it before changing socket state.
+		 */
+		INP_UNLOCK(inp);
+#ifdef DIAGNOSTIC
+		if (bootverbose) {
+			printf("%s: loading %lu source list entries\n",
+			    __func__, (unsigned long)msfr.msfr_nsrcs);
+		}
+#endif
+		/*
+		 * Make a copy of the user-space source vector so
+		 * that we may copy them with a single copyin. This
+		 * allows us to deal with page faults up-front.
+		 */
+		MALLOC(kss, struct sockaddr_storage *,
+		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
+		    M_TEMP, M_WAITOK);
+		error = copyin(msfr.msfr_srcs, kss,
+		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
+		if (error) {
+			FREE(kss, M_TEMP);
+			return (error);
+		}
+
+		/*
+		 * Perform argument checking on every sockaddr_storage
+		 * structure in the vector provided to us. Overwrite
+		 * fields which should not apply to source entries.
+		 * TODO: Check for duplicate sources on this pass.
+		 */
+		psu = (sockunion_t *)kss;
+		for (i = 0; i < msfr.msfr_nsrcs; i++, psu++) {
+			switch (psu->ss.ss_family) {
+			case AF_INET:
+				if (psu->sin.sin_len !=
+				    sizeof(struct sockaddr_in)) {
+					error = EINVAL;
+				} else {
+					psu->sin.sin_port = 0;
+				}
+				break;
+#ifdef notyet
+			case AF_INET6;
+				if (psu->sin6.sin6_len !=
+				    sizeof(struct sockaddr_in6)) {
+					error = EINVAL;
+				} else {
+					psu->sin6.sin6_port = 0;
+					psu->sin6.sin6_flowinfo = 0;
+				}
+				break;
+#endif
+			default:
+				error = EAFNOSUPPORT;
+				break;
+			}
+			if (error)
+				break;
+		}
+		if (error) {
+			FREE(kss, M_TEMP);
+			return (error);
+		}
+
+		/*
+		 * Allocate a block to track all the in_msource
+		 * entries we are about to allocate, in case we
+		 * abruptly need to free them.
+		 */
+		MALLOC(pnims, struct in_msource **,
+		    sizeof(struct in_msource *) * msfr.msfr_nsrcs,
+		    M_TEMP, M_WAITOK | M_ZERO);
+
+		/*
+		 * Allocate up to nsrcs individual chunks.
+		 * If we encounter an error, backtrack out of
+		 * all allocations cleanly; updates must be atomic.
+		 */
+		pkss = kss;
+		nims = NULL;
+		for (i = 0; i < msfr.msfr_nsrcs; i++, pkss++) {
+			MALLOC(nims, struct in_msource *,
+			    sizeof(struct in_msource) * msfr.msfr_nsrcs,
+			    M_IPMSOURCE, M_WAITOK | M_ZERO);
+			pnims[i] = nims;
+		}
+		if (i < msfr.msfr_nsrcs) {
+			for (j = 0; j < i; j++) {
+				if (pnims[j] != NULL)
+					FREE(pnims[j], M_IPMSOURCE);
+			}
+			FREE(pnims, M_TEMP);
+			FREE(kss, M_TEMP);
+			return (ENOBUFS);
+		}
+
+		INP_UNLOCK_ASSERT(inp);
+
+		/*
+		 * Finally, apply the filters to the socket.
+		 * Re-take the inp lock; we are changing socket state.
+		 */
+		pkss = kss;
+		INP_LOCK(inp);
+		for (i = 0; i < msfr.msfr_nsrcs; i++, pkss++) {
+			memcpy(&(pnims[i]->ims_addr), pkss,
+			    sizeof(struct sockaddr_storage));
+			TAILQ_INSERT_TAIL(&imf->imf_sources, pnims[i],
+			    ims_next);
+			imf->imf_nsources++;
+		}
+		FREE(pnims, M_TEMP);
+		FREE(kss, M_TEMP);
+	}
+
+	/*
+	 * Update the filter mode on the socket before releasing the inpcb.
+	 */
+	INP_LOCK_ASSERT(inp);
+	imf->imf_fmode = msfr.msfr_fmode;
+
+out_locked:
+	INP_UNLOCK(inp);
+	return (error);
+}
+
+/*
+ * Set the IP multicast options in response to user setsockopt().
+ *
+ * Many of the socket options handled in this function duplicate the
+ * functionality of socket options in the regular unicast API. However,
+ * it is not possible to merge the duplicate code, because the idempotence
+ * of the IPv4 multicast part of the BSD Sockets API must be preserved;
+ * the effects of these options must be treated as separate and distinct.
+ */
+int
+inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct ip_moptions	*imo;
+	int			 error;
+
+	error = 0;
+
+	/*
+	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
+	 * or is a divert socket, reject it.
+	 * XXX Unlocked read of inp_socket believed OK.
+	 */
+	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
+	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
+	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
+		return (EOPNOTSUPP);
+
+	switch (sopt->sopt_name) {
+	case IP_MULTICAST_VIF: {
+		int vifi;
+		/*
+		 * Select a multicast VIF for transmission.
+		 * Only useful if multicast forwarding is active.
+		 */
+		if (legal_vif_num == NULL) {
+			error = EOPNOTSUPP;
+			break;
+		}
+		error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int));
+		if (error)
+			break;
+		if (!legal_vif_num(vifi) && (vifi != -1)) {
+			error = EINVAL;
+			break;
+		}
+		imo = inp_findmoptions(inp);
+		imo->imo_multicast_vif = vifi;
+		INP_UNLOCK(inp);
+		break;
+	}
+
+	case IP_MULTICAST_IF:
+		error = inp_set_multicast_if(inp, sopt);
+		break;
+
+	case IP_MULTICAST_TTL: {
+		u_char ttl;
+
+		/*
+		 * Set the IP time-to-live for outgoing multicast packets.
+		 * The original multicast API required a char argument,
+		 * which is inconsistent with the rest of the socket API.
+		 * We allow either a char or an int.
+		 */
+		if (sopt->sopt_valsize == sizeof(u_char)) {
+			error = sooptcopyin(sopt, &ttl, sizeof(u_char),
+			    sizeof(u_char));
+			if (error)
+				break;
+		} else {
+			u_int ittl;
+
+			error = sooptcopyin(sopt, &ittl, sizeof(u_int),
+			    sizeof(u_int));
+			if (error)
+				break;
+			if (ittl > 255) {
+				error = EINVAL;
+				break;
+			}
+			ttl = (u_char)ittl;
+		}
+		imo = inp_findmoptions(inp);
+		imo->imo_multicast_ttl = ttl;
+		INP_UNLOCK(inp);
+		break;
+	}
+
+	case IP_MULTICAST_LOOP: {
+		u_char loop;
+
+		/*
+		 * Set the loopback flag for outgoing multicast packets.
+		 * Must be zero or one.  The original multicast API required a
+		 * char argument, which is inconsistent with the rest
+		 * of the socket API.  We allow either a char or an int.
+		 */
+		if (sopt->sopt_valsize == sizeof(u_char)) {
+			error = sooptcopyin(sopt, &loop, sizeof(u_char),
+			    sizeof(u_char));
+			if (error)
+				break;
+		} else {
+			u_int iloop;
+
+			error = sooptcopyin(sopt, &iloop, sizeof(u_int),
+					    sizeof(u_int));
+			if (error)
+				break;
+			loop = (u_char)iloop;
+		}
+		imo = inp_findmoptions(inp);
+		imo->imo_multicast_loop = !!loop;
+		INP_UNLOCK(inp);
+		break;
+	}
+
+	case IP_ADD_MEMBERSHIP:
+	case IP_ADD_SOURCE_MEMBERSHIP:
+	case MCAST_JOIN_GROUP:
+	case MCAST_JOIN_SOURCE_GROUP:
+		error = inp_join_group(inp, sopt);
+		break;
+
+	case IP_DROP_MEMBERSHIP:
+	case IP_DROP_SOURCE_MEMBERSHIP:
+	case MCAST_LEAVE_GROUP:
+	case MCAST_LEAVE_SOURCE_GROUP:
+		error = inp_leave_group(inp, sopt);
+		break;
+
+	case IP_BLOCK_SOURCE:
+	case IP_UNBLOCK_SOURCE:
+	case MCAST_BLOCK_SOURCE:
+	case MCAST_UNBLOCK_SOURCE:
+		error = inp_change_source_filter(inp, sopt);
+		break;
+
+	case IP_MSFILTER:
+		error = inp_set_source_filters(inp, sopt);
+		break;
+
+	default:
+		error = EOPNOTSUPP;
+		break;
+	}
+
+	INP_UNLOCK_ASSERT(inp);
+
+	return (error);
+}
+
+/*
+ * Following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
+ */
+static struct ifnet *
+ip_multicast_if(struct in_addr *a)
+{
+	int ifindex;
+	struct ifnet *ifp;
+
+	if (ntohl(a->s_addr) >> 24 == 0) {
+		ifindex = ntohl(a->s_addr) & 0xffffff;
+		if (ifindex < 0 || if_index < ifindex)
+			return (NULL);
+		ifp = ifnet_byindex(ifindex);
+	} else
+		INADDR_TO_IFP(*a, ifp);
+	return (ifp);
+}
+
Index: igmp_var.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/igmp_var.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/igmp_var.h -L sys/netinet/igmp_var.h -u -r1.1.1.1 -r1.2
--- sys/netinet/igmp_var.h
+++ sys/netinet/igmp_var.h
@@ -31,7 +31,7 @@
  * SUCH DAMAGE.
  *
  *	from: @(#)igmp_var.h	8.1 (Berkeley) 7/19/93
- * $FreeBSD: src/sys/netinet/igmp_var.h,v 1.21 2005/01/07 01:45:44 imp Exp $
+ * $FreeBSD: src/sys/netinet/igmp_var.h,v 1.22 2007/06/12 16:24:53 bms Exp $
  */
 
 #ifndef _NETINET_IGMP_VAR_H_
@@ -56,6 +56,7 @@
 	u_int	igps_rcv_badreports;	/* received invalid reports */
 	u_int	igps_rcv_ourreports;	/* received reports for our groups */
 	u_int	igps_snd_reports;	/* sent membership reports */
+	u_int	igps_rcv_toolong;	/* received with too many bytes */
 };
 
 #ifdef _KERNEL
@@ -68,12 +69,20 @@
 #define IGMP_IREPORTEDLAST			1
 
 /*
+ * State masks for IGMPv3
+ */
+#define IGMP_V3_NONEXISTENT			0x01
+#define IGMP_V3_OTHERMEMBER			0x02
+#define IGMP_V3_IREPORTEDLAST			0x04
+
+/*
  * We must remember what version the subnet's querier is.
  * We conveniently use the IGMP message type for the proper
  * membership report to keep this state.
  */
 #define IGMP_V1_ROUTER				IGMP_V1_MEMBERSHIP_REPORT
 #define IGMP_V2_ROUTER				IGMP_V2_MEMBERSHIP_REPORT
+#define IGMP_V3_ROUTER				IGMP_V3_MEMBERSHIP_REPORT
 
 /*
  * Revert to new router if we haven't heard from an old router in
@@ -81,6 +90,51 @@
  */
 #define IGMP_AGE_THRESHOLD			540
 
+/*
+ * IGMPv3 protocol defaults
+ */
+#define IGMP_INIT_ROBVAR	2	/* Robustness */
+#define IGMP_MAX_ROBVAR		7
+#define IGMP_INIT_QRYINT	125	/* Querier's Query interval */
+#define IGMP_MAX_QRYINT		255
+#define IGMP_INIT_QRYRSP	10	/* Query Response interval */
+#define IGMP_DEF_QRYMRT		10
+#define IGMP_UNSOL_INT		1	/* Unsolicited Report interval */
+
+/*
+ * IGMPv3 report types
+ */
+#define IGMP_REPORT_MODE_IN	1	/* mode-is-include */
+#define IGMP_REPORT_MODE_EX	2	/* mode-is-exclude */
+#define IGMP_REPORT_TO_IN	3	/* change-to-include */
+#define IGMP_REPORT_TO_EX	4	/* change-to-exclude */
+#define IGMP_REPORT_ALLOW_NEW	5	/* allow-new-sources */
+#define IGMP_REPORT_BLOCK_OLD	6	/* block-old-sources */
+
+/*
+ * Report types
+ */
+#define IGMP_MASK_CUR_STATE	0x01	/* Report current-state */
+#define IGMP_MASK_ALLOW_NEW	0x02	/* Report source as allow-new */
+#define IGMP_MASK_BLOCK_OLD	0x04	/* Report source as block-old */
+#define IGMP_MASK_TO_IN		0x08	/* Report source as to_in */
+#define IGMP_MASK_TO_EX		0x10	/* Report source as to_ex */
+#define IGMP_MASK_STATE_T1	0x20	/* State at T1 */
+#define IGMP_MASK_STATE_T2	0x40	/* State at T2 */
+#define IGMP_MASK_IF_STATE	0x80	/* Report current-state per interface */
+
+#define IGMP_MASK_STATE_TX	(IGMP_MASK_STATE_T1 | IGMP_MASK_STATE_T2)
+#define IGMP_MASK_PENDING	(IGMP_MASK_CUR_STATE |			\
+				 IGMP_MASK_ALLOW_NEW |			\
+				 IGMP_MASK_BLOCK_OLD)
+
+/*
+ * List identifiers
+ */
+#define IGMP_EXCLUDE_LIST	1	/* exclude list used to tag report */
+#define IGMP_INCLUDE_LIST	2	/* include list used to tag report */
+#define IGMP_RECORDED_LIST	3	/* recorded list used to tag report */
+
 void	igmp_init(void);
 void	igmp_input(struct mbuf *, int);
 void	igmp_joingroup(struct in_multi *);
@@ -100,6 +154,6 @@
 
 #define IGMPCTL_NAMES { \
 	{ 0, 0 }, \
-	{ "stats", CTLTYPE_STRUCT }, \
+	{ "stats", CTLTYPE_STRUCT } \
 }
 #endif
--- /dev/null
+++ sys/netinet/ip_options.c
@@ -0,0 +1,683 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *      The Regents of the University of California.
+ * Copyright (c) 2005 Andre Oppermann, Internet Business Solutions AG.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_options.c,v 1.6 2007/10/07 20:44:23 silby Exp $");
+
+#include "opt_ipstealth.h"
+#include "opt_mac.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/syslog.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/if_var.h>
+#include <net/if_dl.h>
+#include <net/route.h>
+#include <net/netisr.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_options.h>
+#include <netinet/ip_icmp.h>
+#include <machine/in_cksum.h>
+
+#include <sys/socketvar.h>
+
+#include <security/mac/mac_framework.h>
+
+static int	ip_dosourceroute = 0;
+SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
+    &ip_dosourceroute, 0, "Enable forwarding source routed IP packets");
+
+static int	ip_acceptsourceroute = 0;
+SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute, 
+    CTLFLAG_RW, &ip_acceptsourceroute, 0, 
+    "Enable accepting source routed IP packets");
+
+int		ip_doopts = 1;	/* 0 = ignore, 1 = process, 2 = reject */
+SYSCTL_INT(_net_inet_ip, OID_AUTO, process_options, CTLFLAG_RW,
+    &ip_doopts, 0, "Enable IP options processing ([LS]SRR, RR, TS)");
+
+static void	save_rte(struct mbuf *m, u_char *, struct in_addr);
+
+/*
+ * Do option processing on a datagram, possibly discarding it if bad options
+ * are encountered, or forwarding it if source-routed.
+ *
+ * The pass argument is used when operating in the IPSTEALTH mode to tell
+ * what options to process: [LS]SRR (pass 0) or the others (pass 1).  The
+ * reason for as many as two passes is that when doing IPSTEALTH, non-routing
+ * options should be processed only if the packet is for us.
+ *
+ * Returns 1 if packet has been forwarded/freed, 0 if the packet should be
+ * processed further.
+ */
+int
+ip_dooptions(struct mbuf *m, int pass)
+{
+	struct ip *ip = mtod(m, struct ip *);
+	u_char *cp;
+	struct in_ifaddr *ia;
+	int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
+	struct in_addr *sin, dst;
+	n_time ntime;
+	struct	sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
+
+	/* Ignore or reject packets with IP options. */
+	if (ip_doopts == 0)
+		return 0;
+	else if (ip_doopts == 2) {
+		type = ICMP_UNREACH;
+		code = ICMP_UNREACH_FILTER_PROHIB;
+		goto bad;
+	}
+
+	dst = ip->ip_dst;
+	cp = (u_char *)(ip + 1);
+	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
+	for (; cnt > 0; cnt -= optlen, cp += optlen) {
+		opt = cp[IPOPT_OPTVAL];
+		if (opt == IPOPT_EOL)
+			break;
+		if (opt == IPOPT_NOP)
+			optlen = 1;
+		else {
+			if (cnt < IPOPT_OLEN + sizeof(*cp)) {
+				code = &cp[IPOPT_OLEN] - (u_char *)ip;
+				goto bad;
+			}
+			optlen = cp[IPOPT_OLEN];
+			if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
+				code = &cp[IPOPT_OLEN] - (u_char *)ip;
+				goto bad;
+			}
+		}
+		switch (opt) {
+
+		default:
+			break;
+
+		/*
+		 * Source routing with record.  Find interface with current
+		 * destination address.  If none on this machine then drop if
+		 * strictly routed, or do nothing if loosely routed.  Record
+		 * interface address and bring up next address component.  If
+		 * strictly routed make sure next address is on directly
+		 * accessible net.
+		 */
+		case IPOPT_LSRR:
+		case IPOPT_SSRR:
+#ifdef IPSTEALTH
+			if (ipstealth && pass > 0)
+				break;
+#endif
+			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
+				code = &cp[IPOPT_OLEN] - (u_char *)ip;
+				goto bad;
+			}
+			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
+				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
+				goto bad;
+			}
+			ipaddr.sin_addr = ip->ip_dst;
+			ia = (struct in_ifaddr *)
+				ifa_ifwithaddr((struct sockaddr *)&ipaddr);
+			if (ia == NULL) {
+				if (opt == IPOPT_SSRR) {
+					type = ICMP_UNREACH;
+					code = ICMP_UNREACH_SRCFAIL;
+					goto bad;
+				}
+				if (!ip_dosourceroute)
+					goto nosourcerouting;
+				/*
+				 * Loose routing, and not at next destination
+				 * yet; nothing to do except forward.
+				 */
+				break;
+			}
+			off--;			/* 0 origin */
+			if (off > optlen - (int)sizeof(struct in_addr)) {
+				/*
+				 * End of source route.  Should be for us.
+				 */
+				if (!ip_acceptsourceroute)
+					goto nosourcerouting;
+				save_rte(m, cp, ip->ip_src);
+				break;
+			}
+#ifdef IPSTEALTH
+			if (ipstealth)
+				goto dropit;
+#endif
+			if (!ip_dosourceroute) {
+				if (ipforwarding) {
+					char buf[16]; /* aaa.bbb.ccc.ddd\0 */
+					/*
+					 * Acting as a router, so generate
+					 * ICMP
+					 */
+nosourcerouting:
+					strcpy(buf, inet_ntoa(ip->ip_dst));
+					log(LOG_WARNING, 
+					    "attempted source route from %s to %s\n",
+					    inet_ntoa(ip->ip_src), buf);
+					type = ICMP_UNREACH;
+					code = ICMP_UNREACH_SRCFAIL;
+					goto bad;
+				} else {
+					/*
+					 * Not acting as a router, so
+					 * silently drop.
+					 */
+#ifdef IPSTEALTH
+dropit:
+#endif
+					ipstat.ips_cantforward++;
+					m_freem(m);
+					return (1);
+				}
+			}
+
+			/*
+			 * locate outgoing interface
+			 */
+			(void)memcpy(&ipaddr.sin_addr, cp + off,
+			    sizeof(ipaddr.sin_addr));
+
+			if (opt == IPOPT_SSRR) {
+#define	INA	struct in_ifaddr *
+#define	SA	struct sockaddr *
+			    if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == NULL)
+				ia = (INA)ifa_ifwithnet((SA)&ipaddr);
+			} else
+				ia = ip_rtaddr(ipaddr.sin_addr);
+			if (ia == NULL) {
+				type = ICMP_UNREACH;
+				code = ICMP_UNREACH_SRCFAIL;
+				goto bad;
+			}
+			ip->ip_dst = ipaddr.sin_addr;
+			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
+			    sizeof(struct in_addr));
+			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
+			/*
+			 * Let ip_intr's mcast routing check handle mcast pkts
+			 */
+			forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
+			break;
+
+		case IPOPT_RR:
+#ifdef IPSTEALTH
+			if (ipstealth && pass == 0)
+				break;
+#endif
+			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
+				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
+				goto bad;
+			}
+			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
+				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
+				goto bad;
+			}
+			/*
+			 * If no space remains, ignore.
+			 */
+			off--;			/* 0 origin */
+			if (off > optlen - (int)sizeof(struct in_addr))
+				break;
+			(void)memcpy(&ipaddr.sin_addr, &ip->ip_dst,
+			    sizeof(ipaddr.sin_addr));
+			/*
+			 * Locate outgoing interface; if we're the
+			 * destination, use the incoming interface (should be
+			 * same).
+			 */
+			if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == NULL &&
+			    (ia = ip_rtaddr(ipaddr.sin_addr)) == NULL) {
+				type = ICMP_UNREACH;
+				code = ICMP_UNREACH_HOST;
+				goto bad;
+			}
+			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
+			    sizeof(struct in_addr));
+			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
+			break;
+
+		case IPOPT_TS:
+#ifdef IPSTEALTH
+			if (ipstealth && pass == 0)
+				break;
+#endif
+			code = cp - (u_char *)ip;
+			if (optlen < 4 || optlen > 40) {
+				code = &cp[IPOPT_OLEN] - (u_char *)ip;
+				goto bad;
+			}
+			if ((off = cp[IPOPT_OFFSET]) < 5) {
+				code = &cp[IPOPT_OLEN] - (u_char *)ip;
+				goto bad;
+			}
+			if (off > optlen - (int)sizeof(int32_t)) {
+				cp[IPOPT_OFFSET + 1] += (1 << 4);
+				if ((cp[IPOPT_OFFSET + 1] & 0xf0) == 0) {
+					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
+					goto bad;
+				}
+				break;
+			}
+			off--;				/* 0 origin */
+			sin = (struct in_addr *)(cp + off);
+			switch (cp[IPOPT_OFFSET + 1] & 0x0f) {
+
+			case IPOPT_TS_TSONLY:
+				break;
+
+			case IPOPT_TS_TSANDADDR:
+				if (off + sizeof(n_time) +
+				    sizeof(struct in_addr) > optlen) {
+					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
+					goto bad;
+				}
+				ipaddr.sin_addr = dst;
+				ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
+							    m->m_pkthdr.rcvif);
+				if (ia == NULL)
+					continue;
+				(void)memcpy(sin, &IA_SIN(ia)->sin_addr,
+				    sizeof(struct in_addr));
+				cp[IPOPT_OFFSET] += sizeof(struct in_addr);
+				off += sizeof(struct in_addr);
+				break;
+
+			case IPOPT_TS_PRESPEC:
+				if (off + sizeof(n_time) +
+				    sizeof(struct in_addr) > optlen) {
+					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
+					goto bad;
+				}
+				(void)memcpy(&ipaddr.sin_addr, sin,
+				    sizeof(struct in_addr));
+				if (ifa_ifwithaddr((SA)&ipaddr) == NULL)
+					continue;
+				cp[IPOPT_OFFSET] += sizeof(struct in_addr);
+				off += sizeof(struct in_addr);
+				break;
+
+			default:
+				code = &cp[IPOPT_OFFSET + 1] - (u_char *)ip;
+				goto bad;
+			}
+			ntime = iptime();
+			(void)memcpy(cp + off, &ntime, sizeof(n_time));
+			cp[IPOPT_OFFSET] += sizeof(n_time);
+		}
+	}
+	if (forward && ipforwarding) {
+		ip_forward(m, 1);
+		return (1);
+	}
+	return (0);
+bad:
+	icmp_error(m, type, code, 0, 0);
+	ipstat.ips_badoptions++;
+	return (1);
+}
+
+/*
+ * Save incoming source route for use in replies, to be picked up later by
+ * ip_srcroute if the receiver is interested.
+ */
+static void
+save_rte(struct mbuf *m, u_char *option, struct in_addr dst)
+{
+	unsigned olen;
+	struct ipopt_tag *opts;
+
+	opts = (struct ipopt_tag *)m_tag_get(PACKET_TAG_IPOPTIONS,
+	    sizeof(struct ipopt_tag), M_NOWAIT);
+	if (opts == NULL)
+		return;
+
+	olen = option[IPOPT_OLEN];
+	if (olen > sizeof(opts->ip_srcrt) - (1 + sizeof(dst))) {
+		m_tag_free((struct m_tag *)opts);
+		return;
+	}
+	bcopy(option, opts->ip_srcrt.srcopt, olen);
+	opts->ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
+	opts->ip_srcrt.dst = dst;
+	m_tag_prepend(m, (struct m_tag *)opts);
+}
+
+/*
+ * Retrieve incoming source route for use in replies, in the same form used
+ * by setsockopt.  The first hop is placed before the options, will be
+ * removed later.
+ */
+struct mbuf *
+ip_srcroute(struct mbuf *m0)
+{
+	struct in_addr *p, *q;
+	struct mbuf *m;
+	struct ipopt_tag *opts;
+
+	opts = (struct ipopt_tag *)m_tag_find(m0, PACKET_TAG_IPOPTIONS, NULL);
+	if (opts == NULL)
+		return (NULL);
+
+	if (opts->ip_nhops == 0)
+		return (NULL);
+	m = m_get(M_DONTWAIT, MT_DATA);
+	if (m == NULL)
+		return (NULL);
+
+#define OPTSIZ	(sizeof(opts->ip_srcrt.nop) + sizeof(opts->ip_srcrt.srcopt))
+
+	/* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
+	m->m_len = opts->ip_nhops * sizeof(struct in_addr) +
+	    sizeof(struct in_addr) + OPTSIZ;
+
+	/*
+	 * First, save first hop for return route.
+	 */
+	p = &(opts->ip_srcrt.route[opts->ip_nhops - 1]);
+	*(mtod(m, struct in_addr *)) = *p--;
+
+	/*
+	 * Copy option fields and padding (nop) to mbuf.
+	 */
+	opts->ip_srcrt.nop = IPOPT_NOP;
+	opts->ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
+	(void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
+	    &(opts->ip_srcrt.nop), OPTSIZ);
+	q = (struct in_addr *)(mtod(m, caddr_t) +
+	    sizeof(struct in_addr) + OPTSIZ);
+#undef OPTSIZ
+	/*
+	 * Record return path as an IP source route, reversing the path
+	 * (pointers are now aligned).
+	 */
+	while (p >= opts->ip_srcrt.route) {
+		*q++ = *p--;
+	}
+	/*
+	 * Last hop goes to final destination.
+	 */
+	*q = opts->ip_srcrt.dst;
+	m_tag_delete(m0, (struct m_tag *)opts);
+	return (m);
+}
+
+/*
+ * Strip out IP options, at higher level protocol in the kernel.  Second
+ * argument is buffer to which options will be moved, and return value is
+ * their length.
+ *
+ * XXX should be deleted; last arg currently ignored.
+ */
+void
+ip_stripoptions(struct mbuf *m, struct mbuf *mopt)
+{
+	int i;
+	struct ip *ip = mtod(m, struct ip *);
+	caddr_t opts;
+	int olen;
+
+	olen = (ip->ip_hl << 2) - sizeof (struct ip);
+	opts = (caddr_t)(ip + 1);
+	i = m->m_len - (sizeof (struct ip) + olen);
+	bcopy(opts + olen, opts, (unsigned)i);
+	m->m_len -= olen;
+	if (m->m_flags & M_PKTHDR)
+		m->m_pkthdr.len -= olen;
+	ip->ip_v = IPVERSION;
+	ip->ip_hl = sizeof(struct ip) >> 2;
+}
+
+/*
+ * Insert IP options into preformed packet.  Adjust IP destination as
+ * required for IP source routing, as indicated by a non-zero in_addr at the
+ * start of the options.
+ *
+ * XXX This routine assumes that the packet has no options in place.
+ */
+struct mbuf *
+ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen)
+{
+	struct ipoption *p = mtod(opt, struct ipoption *);
+	struct mbuf *n;
+	struct ip *ip = mtod(m, struct ip *);
+	unsigned optlen;
+
+	optlen = opt->m_len - sizeof(p->ipopt_dst);
+	if (optlen + ip->ip_len > IP_MAXPACKET) {
+		*phlen = 0;
+		return (m);		/* XXX should fail */
+	}
+	if (p->ipopt_dst.s_addr)
+		ip->ip_dst = p->ipopt_dst;
+	if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
+		MGETHDR(n, M_DONTWAIT, MT_DATA);
+		if (n == NULL) {
+			*phlen = 0;
+			return (m);
+		}
+		M_MOVE_PKTHDR(n, m);
+		n->m_pkthdr.rcvif = NULL;
+#ifdef MAC
+		mac_copy_mbuf(m, n);
+#endif
+		n->m_pkthdr.len += optlen;
+		m->m_len -= sizeof(struct ip);
+		m->m_data += sizeof(struct ip);
+		n->m_next = m;
+		m = n;
+		m->m_len = optlen + sizeof(struct ip);
+		m->m_data += max_linkhdr;
+		bcopy(ip, mtod(m, void *), sizeof(struct ip));
+	} else {
+		m->m_data -= optlen;
+		m->m_len += optlen;
+		m->m_pkthdr.len += optlen;
+		bcopy(ip, mtod(m, void *), sizeof(struct ip));
+	}
+	ip = mtod(m, struct ip *);
+	bcopy(p->ipopt_list, ip + 1, optlen);
+	*phlen = sizeof(struct ip) + optlen;
+	ip->ip_v = IPVERSION;
+	ip->ip_hl = *phlen >> 2;
+	ip->ip_len += optlen;
+	return (m);
+}
+
+/*
+ * Copy options from ip to jp, omitting those not copied during
+ * fragmentation.
+ */
+int
+ip_optcopy(struct ip *ip, struct ip *jp)
+{
+	u_char *cp, *dp;
+	int opt, optlen, cnt;
+
+	cp = (u_char *)(ip + 1);
+	dp = (u_char *)(jp + 1);
+	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
+	for (; cnt > 0; cnt -= optlen, cp += optlen) {
+		opt = cp[0];
+		if (opt == IPOPT_EOL)
+			break;
+		if (opt == IPOPT_NOP) {
+			/* Preserve for IP mcast tunnel's LSRR alignment. */
+			*dp++ = IPOPT_NOP;
+			optlen = 1;
+			continue;
+		}
+
+		KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp),
+		    ("ip_optcopy: malformed ipv4 option"));
+		optlen = cp[IPOPT_OLEN];
+		KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen <= cnt,
+		    ("ip_optcopy: malformed ipv4 option"));
+
+		/* Bogus lengths should have been caught by ip_dooptions. */
+		if (optlen > cnt)
+			optlen = cnt;
+		if (IPOPT_COPIED(opt)) {
+			bcopy(cp, dp, optlen);
+			dp += optlen;
+		}
+	}
+	for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
+		*dp++ = IPOPT_EOL;
+	return (optlen);
+}
+
+/*
+ * Set up IP options in pcb for insertion in output packets.  Store in mbuf
+ * with pointer in pcbopt, adding pseudo-option with destination address if
+ * source routed.
+ */
+int
+ip_pcbopts(struct inpcb *inp, int optname, struct mbuf *m)
+{
+	int cnt, optlen;
+	u_char *cp;
+	struct mbuf **pcbopt;
+	u_char opt;
+
+	INP_LOCK_ASSERT(inp);
+
+	pcbopt = &inp->inp_options;
+
+	/* turn off any old options */
+	if (*pcbopt)
+		(void)m_free(*pcbopt);
+	*pcbopt = 0;
+	if (m == NULL || m->m_len == 0) {
+		/*
+		 * Only turning off any previous options.
+		 */
+		if (m != NULL)
+			(void)m_free(m);
+		return (0);
+	}
+
+	if (m->m_len % sizeof(int32_t))
+		goto bad;
+	/*
+	 * IP first-hop destination address will be stored before actual
+	 * options; move other options back and clear it when none present.
+	 */
+	if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
+		goto bad;
+	cnt = m->m_len;
+	m->m_len += sizeof(struct in_addr);
+	cp = mtod(m, u_char *) + sizeof(struct in_addr);
+	bcopy(mtod(m, void *), cp, (unsigned)cnt);
+	bzero(mtod(m, void *), sizeof(struct in_addr));
+
+	for (; cnt > 0; cnt -= optlen, cp += optlen) {
+		opt = cp[IPOPT_OPTVAL];
+		if (opt == IPOPT_EOL)
+			break;
+		if (opt == IPOPT_NOP)
+			optlen = 1;
+		else {
+			if (cnt < IPOPT_OLEN + sizeof(*cp))
+				goto bad;
+			optlen = cp[IPOPT_OLEN];
+			if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
+				goto bad;
+		}
+		switch (opt) {
+
+		default:
+			break;
+
+		case IPOPT_LSRR:
+		case IPOPT_SSRR:
+			/*
+			 * User process specifies route as:
+			 *
+			 *	->A->B->C->D
+			 *
+			 * D must be our final destination (but we can't
+			 * check that since we may not have connected yet).
+			 * A is first hop destination, which doesn't appear
+			 * in actual IP option, but is stored before the
+			 * options.
+			 */
+			if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
+				goto bad;
+			m->m_len -= sizeof(struct in_addr);
+			cnt -= sizeof(struct in_addr);
+			optlen -= sizeof(struct in_addr);
+			cp[IPOPT_OLEN] = optlen;
+			/*
+			 * Move first hop before start of options.
+			 */
+			bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
+			    sizeof(struct in_addr));
+			/*
+			 * Then copy rest of options back
+			 * to close up the deleted entry.
+			 */
+			bcopy((&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr)),
+			    &cp[IPOPT_OFFSET+1],
+			    (unsigned)cnt - (IPOPT_MINOFF - 1));
+			break;
+		}
+	}
+	if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
+		goto bad;
+	*pcbopt = m;
+	return (0);
+
+bad:
+	(void)m_free(m);
+	return (EINVAL);
+}
Index: in.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/in.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet/in.h -L sys/netinet/in.h -u -r1.2 -r1.3
--- sys/netinet/in.h
+++ sys/netinet/in.h
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)in.h	8.3 (Berkeley) 1/3/94
- * $FreeBSD: src/sys/netinet/in.h,v 1.90.2.4 2005/12/27 00:52:24 gnn Exp $
+ * $FreeBSD: src/sys/netinet/in.h,v 1.100 2007/06/12 16:24:53 bms Exp $
  */
 
 #ifndef _NETINET_IN_H_
@@ -84,6 +84,33 @@
 #define	_STRUCT_IN_ADDR_DECLARED
 #endif
 
+#ifndef	_SOCKLEN_T_DECLARED
+typedef	__socklen_t	socklen_t;
+#define	_SOCKLEN_T_DECLARED
+#endif
+
+/* Avoid collision with original definition in sys/socket.h. */
+#ifndef	_STRUCT_SOCKADDR_STORAGE_DECLARED
+/*
+ * RFC 2553: protocol-independent placeholder for socket addresses
+ */
+#define	_SS_MAXSIZE	128U
+#define	_SS_ALIGNSIZE	(sizeof(__int64_t))
+#define	_SS_PAD1SIZE	(_SS_ALIGNSIZE - sizeof(unsigned char) - \
+			    sizeof(sa_family_t))
+#define	_SS_PAD2SIZE	(_SS_MAXSIZE - sizeof(unsigned char) - \
+			    sizeof(sa_family_t) - _SS_PAD1SIZE - _SS_ALIGNSIZE)
+
+struct sockaddr_storage {
+	unsigned char	ss_len;		/* address length */
+	sa_family_t	ss_family;	/* address family */
+	char		__ss_pad1[_SS_PAD1SIZE];
+	__int64_t	__ss_align;	/* force desired struct alignment */
+	char		__ss_pad2[_SS_PAD2SIZE];
+};
+#define	_STRUCT_SOCKADDR_STORAGE_DECLARED
+#endif
+
 /* Socket address, internet style. */
 struct sockaddr_in {
 	uint8_t	sin_len;
@@ -228,7 +255,7 @@
 #define	IPPROTO_APES		99		/* any private encr. scheme */
 #define	IPPROTO_GMTP		100		/* GMTP*/
 #define	IPPROTO_IPCOMP		108		/* payload compression (IPComp) */
-#define IPPROTO_SCTP		132		/* SCTP */
+#define	IPPROTO_SCTP		132		/* SCTP */
 /* 101-254: Partly Unassigned */
 #define	IPPROTO_PIM		103		/* Protocol Independent Mcast */
 #define	IPPROTO_CARP		112		/* CARP */
@@ -351,15 +378,15 @@
 #define	IN_EXPERIMENTAL(i)	(((u_int32_t)(i) & 0xf0000000) == 0xf0000000)
 #define	IN_BADCLASS(i)		(((u_int32_t)(i) & 0xf0000000) == 0xf0000000)
 
-#define IN_LINKLOCAL(i)         (((u_int32_t)(i) & 0xffff0000) == 0xa9fe0000)
+#define IN_LINKLOCAL(i)		(((u_int32_t)(i) & 0xffff0000) == 0xa9fe0000)
 
-#define IN_PRIVATE(i)   ((((u_int32_t)(i) & 0xff000000) == 0x0a000000) || \
-                         (((u_int32_t)(i) & 0xfff00000) == 0xac100000) || \
-                         (((u_int32_t)(i) & 0xffff0000) == 0xc0a80000))
- 
-#define IN_LOCAL_GROUP(i)       (((u_int32_t)(i) & 0xffffff00) == 0xe0000000)
+#define	IN_PRIVATE(i)	((((u_int32_t)(i) & 0xff000000) == 0x0a000000) || \
+			 (((u_int32_t)(i) & 0xfff00000) == 0xac100000) || \
+			 (((u_int32_t)(i) & 0xffff0000) == 0xc0a80000))
+
+#define	IN_LOCAL_GROUP(i)	(((u_int32_t)(i) & 0xffffff00) == 0xe0000000)
  
-#define IN_ANY_LOCAL(i)         (IN_LINKLOCAL(i) || IN_LOCAL_GROUP(i))
+#define	IN_ANY_LOCAL(i)		(IN_LINKLOCAL(i) || IN_LOCAL_GROUP(i))
 
 #define	INADDR_LOOPBACK		(u_int32_t)0x7f000001
 #ifndef _KERNEL
@@ -369,6 +396,7 @@
 #define	INADDR_UNSPEC_GROUP	(u_int32_t)0xe0000000	/* 224.0.0.0 */
 #define	INADDR_ALLHOSTS_GROUP	(u_int32_t)0xe0000001	/* 224.0.0.1 */
 #define	INADDR_ALLRTRS_GROUP	(u_int32_t)0xe0000002	/* 224.0.0.2 */
+#define	INADDR_ALLRPTS_GROUP	(u_int32_t)0xe0000016	/* 224.0.0.22, IGMPv3 */
 #define	INADDR_CARP_GROUP	(u_int32_t)0xe0000012	/* 224.0.0.18 */
 #define	INADDR_PFSYNC_GROUP	(u_int32_t)0xe00000f0	/* 224.0.0.240 */
 #define	INADDR_ALLMDNS_GROUP	(u_int32_t)0xe00000fb	/* 224.0.0.251 */
@@ -389,7 +417,8 @@
 #define	IP_RECVDSTADDR		7    /* bool; receive IP dst addr w/dgram */
 #define	IP_SENDSRCADDR		IP_RECVDSTADDR /* cmsg_type to set src addr */
 #define	IP_RETOPTS		8    /* ip_opts; set/get IP options */
-#define	IP_MULTICAST_IF		9    /* u_char; set/get IP multicast i/f  */
+#define	IP_MULTICAST_IF		9    /* struct in_addr *or* struct ip_mreqn;
+				      * set/get IP multicast i/f  */
 #define	IP_MULTICAST_TTL	10   /* u_char; set/get IP multicast ttl */
 #define	IP_MULTICAST_LOOP	11   /* u_char; set/get IP multicast loopback */
 #define	IP_ADD_MEMBERSHIP	12   /* ip_mreq; add an IP group membership */
@@ -420,6 +449,11 @@
 #define	IP_FW_GET		54   /* get entire firewall rule chain */
 #define	IP_FW_RESETLOG		55   /* reset logging counters */
 
+#define IP_FW_NAT_CFG           56   /* add/config a nat rule */
+#define IP_FW_NAT_DEL           57   /* delete a nat rule */
+#define IP_FW_NAT_GET_CONFIG    58   /* get configuration of a nat rule */
+#define IP_FW_NAT_GET_LOG       59   /* get log of a nat rule */
+
 #define	IP_DUMMYNET_CONFIGURE	60   /* add/configure a dummynet pipe */
 #define	IP_DUMMYNET_DEL		61   /* delete a dummynet pipe from chain */
 #define	IP_DUMMYNET_FLUSH	62   /* flush dummynet */
@@ -429,12 +463,37 @@
 #define	IP_MINTTL		66   /* minimum TTL for packet or drop */
 #define	IP_DONTFRAG		67   /* don't fragment packet */
 
+/* IPv4 Source Filter Multicast API [RFC3678] */
+#define	IP_ADD_SOURCE_MEMBERSHIP	70   /* join a source-specific group */
+#define	IP_DROP_SOURCE_MEMBERSHIP	71   /* drop a single source */
+#define	IP_BLOCK_SOURCE			72   /* block a source */
+#define	IP_UNBLOCK_SOURCE		73   /* unblock a source */
+
+/* The following option is private; do not use it from user applications. */
+#define	IP_MSFILTER			74   /* set/get filter list */
+
+/* Protocol Independent Multicast API [RFC3678] */
+#define	MCAST_JOIN_GROUP		80   /* join an any-source group */
+#define	MCAST_LEAVE_GROUP		81   /* leave all sources for group */
+#define	MCAST_JOIN_SOURCE_GROUP		82   /* join a source-specific group */
+#define	MCAST_LEAVE_SOURCE_GROUP	83   /* leave a single source */
+#define	MCAST_BLOCK_SOURCE		84   /* block a source */
+#define	MCAST_UNBLOCK_SOURCE		85   /* unblock a source */
+
 /*
  * Defaults and limits for options
  */
 #define	IP_DEFAULT_MULTICAST_TTL  1	/* normally limit m'casts to 1 hop  */
 #define	IP_DEFAULT_MULTICAST_LOOP 1	/* normally hear sends if a member  */
-#define	IP_MAX_MEMBERSHIPS	20	/* per socket */
+
+/*
+ * The imo_membership vector for each socket is now dynamically allocated at
+ * run-time, bounded by USHRT_MAX, and is reallocated when needed, sized
+ * according to a power-of-two increment.
+ */
+#define	IP_MIN_MEMBERSHIPS	31
+#define	IP_MAX_MEMBERSHIPS	4095
+#define	IP_MAX_SOURCE_FILTER	1024	/* # of filters per socket, per group */
 
 /*
  * Argument structure for IP_ADD_MEMBERSHIP and IP_DROP_MEMBERSHIP.
@@ -445,6 +504,82 @@
 };
 
 /*
+ * Modified argument structure for IP_MULTICAST_IF, obtained from Linux.
+ * This is used to specify an interface index for multicast sends, as
+ * the IPv4 legacy APIs do not support this (unless IP_SENDIF is available).
+ */
+struct ip_mreqn {
+	struct	in_addr imr_multiaddr;	/* IP multicast address of group */
+	struct	in_addr imr_address;	/* local IP address of interface */
+	int		imr_ifindex;	/* Interface index; cast to uint32_t */
+};
+
+/*
+ * Argument structure for IPv4 Multicast Source Filter APIs. [RFC3678]
+ */
+struct ip_mreq_source {
+	struct	in_addr imr_multiaddr;	/* IP multicast address of group */
+	struct	in_addr imr_sourceaddr;	/* IP address of source */
+	struct	in_addr imr_interface;	/* local IP address of interface */
+};
+
+/*
+ * Argument structures for Protocol-Independent Multicast Source
+ * Filter APIs. [RFC3678]
+ */
+struct group_req {
+	uint32_t		gr_interface;	/* interface index */
+	struct sockaddr_storage	gr_group;	/* group address */
+};
+
+struct group_source_req {
+	uint32_t		gsr_interface;	/* interface index */
+	struct sockaddr_storage	gsr_group;	/* group address */
+	struct sockaddr_storage	gsr_source;	/* source address */
+};
+
+#ifndef __MSFILTERREQ_DEFINED
+#define __MSFILTERREQ_DEFINED
+/*
+ * The following structure is private; do not use it from user applications.
+ * It is used to communicate IP_MSFILTER/IPV6_MSFILTER information between
+ * the RFC 3678 libc functions and the kernel.
+ */
+struct __msfilterreq {
+	uint32_t		 msfr_ifindex;	/* interface index */
+	uint32_t		 msfr_fmode;	/* filter mode for group */
+	uint32_t		 msfr_nsrcs;	/* # of sources in msfr_srcs */
+	struct sockaddr_storage	 msfr_group;	/* group address */
+	struct sockaddr_storage	*msfr_srcs;	/* pointer to the first member
+						 * of a contiguous array of
+						 * sources to filter in full.
+						 */
+};
+#endif
+
+struct sockaddr;
+
+/*
+ * Advanced (Full-state) APIs [RFC3678]
+ * The RFC specifies uint_t for the 6th argument to [sg]etsourcefilter().
+ * We use uint32_t here to be consistent.
+ */
+int	setipv4sourcefilter(int, struct in_addr, struct in_addr, uint32_t,
+	    uint32_t, struct in_addr *);
+int	getipv4sourcefilter(int, struct in_addr, struct in_addr, uint32_t *,
+	    uint32_t *, struct in_addr *);
+int	setsourcefilter(int, uint32_t, struct sockaddr *, socklen_t,
+	    uint32_t, uint32_t, struct sockaddr_storage *);
+int	getsourcefilter(int, uint32_t, struct sockaddr *, socklen_t,
+	    uint32_t *, uint32_t *, struct sockaddr_storage *);
+
+/*
+ * Filter modes; also used to represent per-socket filter mode internally.
+ */
+#define	MCAST_INCLUDE	1	/* fmode: include these source(s) */
+#define	MCAST_EXCLUDE	2	/* fmode: exclude these source(s) */
+
+/*
  * Argument for IP_PORTRANGE:
  * - which range to search when port is unspecified at bind() or connect()
  */
Index: tcp_input.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_input.c,v
retrieving revision 1.5
retrieving revision 1.6
diff -L sys/netinet/tcp_input.c -L sys/netinet/tcp_input.c -u -r1.5 -r1.6
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -27,21 +27,20 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp_input.c	8.12 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_input.c,v 1.281.2.5 2006/03/01 21:13:29 andre Exp $
  */
 
-#include "opt_ipfw.h"		/* for ipfw_fwd		*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/tcp_input.c,v 1.370 2007/10/07 20:44:23 silby Exp $");
+
+#include "opt_ipfw.h"		/* for ipfw_fwd	*/
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_mac.h"
 #include "opt_tcpdebug.h"
-#include "opt_tcp_input.h"
-#include "opt_tcp_sack.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
-#include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>		/* for proc0 declaration */
@@ -60,14 +59,17 @@
 #include <net/if.h>
 #include <net/route.h>
 
+#define TCPSTATES		/* for logging */
+
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
-#include <netinet/ip_icmp.h>	/* for ICMP_BANDLIM		*/
-#include <netinet/icmp_var.h>	/* for ICMP_BANDLIM		*/
+#include <netinet/ip_icmp.h>	/* required for icmp_var.h */
+#include <netinet/icmp_var.h>	/* for ICMP_BANDLIM */
 #include <netinet/ip_var.h>
+#include <netinet/ip_options.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/in6_pcb.h>
@@ -80,47 +82,42 @@
 #include <netinet/tcp_var.h>
 #include <netinet6/tcp6_var.h>
 #include <netinet/tcpip.h>
+#include <netinet/tcp_syncache.h>
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif /* TCPDEBUG */
 
-#ifdef FAST_IPSEC
+#ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
-#endif /*FAST_IPSEC*/
-
-#ifdef IPSEC
-#include <netinet6/ipsec.h>
-#include <netinet6/ipsec6.h>
-#include <netkey/key.h>
 #endif /*IPSEC*/
 
 #include <machine/in_cksum.h>
 
+#include <security/mac/mac_framework.h>
+
 static const int tcprexmtthresh = 3;
 
 struct	tcpstat tcpstat;
 SYSCTL_STRUCT(_net_inet_tcp, TCPCTL_STATS, stats, CTLFLAG_RW,
     &tcpstat , tcpstat, "TCP statistics (struct tcpstat, netinet/tcp_var.h)");
 
-static int log_in_vain = 0;
+int tcp_log_in_vain = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_RW,
-    &log_in_vain, 0, "Log all incoming TCP connections");
+    &tcp_log_in_vain, 0, "Log all incoming TCP segments to closed ports");
 
 static int blackhole = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, blackhole, CTLFLAG_RW,
-	&blackhole, 0, "Do not send RST when dropping refused connections");
+    &blackhole, 0, "Do not send RST on segments to closed ports");
 
 int tcp_delack_enabled = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, delayed_ack, CTLFLAG_RW,
     &tcp_delack_enabled, 0,
     "Delay ACK to try and piggyback it onto a data packet");
 
-#ifdef TCP_DROP_SYNFIN
 static int drop_synfin = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_RW,
     &drop_synfin, 0, "Drop TCP packets with SYN+FIN set");
-#endif
 
 static int tcp_do_rfc3042 = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3042, CTLFLAG_RW,
@@ -134,58 +131,34 @@
 static int tcp_insecure_rst = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, insecure_rst, CTLFLAG_RW,
     &tcp_insecure_rst, 0,
-    "Follow the old (insecure) criteria for accepting RST packets.");
-
-SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
-	    "TCP Segment Reassembly Queue");
-
-static int tcp_reass_maxseg = 0;
-SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN,
-	   &tcp_reass_maxseg, 0,
-	   "Global maximum number of TCP Segments in Reassembly Queue");
-
-int tcp_reass_qsize = 0;
-SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_RD,
-	   &tcp_reass_qsize, 0,
-	   "Global number of TCP Segments currently in Reassembly Queue");
-
-static int tcp_reass_maxqlen = 48;
-SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxqlen, CTLFLAG_RW,
-	   &tcp_reass_maxqlen, 0,
-	   "Maximum number of TCP Segments per individual Reassembly Queue");
-
-static int tcp_reass_overflows = 0;
-SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD,
-	   &tcp_reass_overflows, 0,
-	   "Global number of TCP Segment Reassembly Queue Overflows");
+    "Follow the old (insecure) criteria for accepting RST packets");
 
 int	tcp_do_autorcvbuf = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_auto, CTLFLAG_RW,
-	   &tcp_do_autorcvbuf, 0, "Enable automatic receive buffer sizing");
+    &tcp_do_autorcvbuf, 0, "Enable automatic receive buffer sizing");
 
 int	tcp_autorcvbuf_inc = 16*1024;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_inc, CTLFLAG_RW,
-	   &tcp_autorcvbuf_inc, 0, "Incrementor step size of automatic receive buffer");
+    &tcp_autorcvbuf_inc, 0,
+    "Incrementor step size of automatic receive buffer");
 
 int	tcp_autorcvbuf_max = 256*1024;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_RW,
-	   &tcp_autorcvbuf_max, 0, "Max size of automatic receive buffer");
+    &tcp_autorcvbuf_max, 0, "Max size of automatic receive buffer");
 
 struct inpcbhead tcb;
 #define	tcb6	tcb  /* for KAME src sync over BSD*'s */
 struct inpcbinfo tcbinfo;
-struct mtx	*tcbinfo_mtx;
 
 static void	 tcp_dooptions(struct tcpopt *, u_char *, int, int);
-
+static void	 tcp_do_segment(struct mbuf *, struct tcphdr *,
+		     struct socket *, struct tcpcb *, int, int);
+static void	 tcp_dropwithreset(struct mbuf *, struct tcphdr *,
+		     struct tcpcb *, int, int);
 static void	 tcp_pulloutofband(struct socket *,
 		     struct tcphdr *, struct mbuf *, int);
-static int	 tcp_reass(struct tcpcb *, struct tcphdr *, int *,
-		     struct mbuf *);
 static void	 tcp_xmit_timer(struct tcpcb *, int);
 static void	 tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *);
-static int	 tcp_timewait(struct tcptw *, struct tcpopt *,
-		     struct tcphdr *, struct mbuf *, int);
 
 /* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */
 #ifdef INET6
@@ -208,201 +181,25 @@
  *		- this is a half-synchronized T/TCP connection.
  */
 #define DELAY_ACK(tp)							\
-	((!callout_active(tp->tt_delack) &&				\
+	((!tcp_timer_active(tp, TT_DELACK) &&				\
 	    (tp->t_flags & TF_RXWIN0SENT) == 0) &&			\
 	    (tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
 
-/* Initialize TCP reassembly queue */
-uma_zone_t	tcp_reass_zone;
-void
-tcp_reass_init()
-{
-	tcp_reass_maxseg = nmbclusters / 16;
-	TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments",
-	    &tcp_reass_maxseg);
-	tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent),
-	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
-	uma_zone_set_max(tcp_reass_zone, tcp_reass_maxseg);
-}
-
-static int
-tcp_reass(tp, th, tlenp, m)
-	register struct tcpcb *tp;
-	register struct tcphdr *th;
-	int *tlenp;
-	struct mbuf *m;
-{
-	struct tseg_qent *q;
-	struct tseg_qent *p = NULL;
-	struct tseg_qent *nq;
-	struct tseg_qent *te = NULL;
-	struct socket *so = tp->t_inpcb->inp_socket;
-	int flags;
-
-	INP_LOCK_ASSERT(tp->t_inpcb);
-
-	/*
-	 * XXX: tcp_reass() is rather inefficient with its data structures
-	 * and should be rewritten (see NetBSD for optimizations).  While
-	 * doing that it should move to its own file tcp_reass.c.
-	 */
-
-	/*
-	 * Call with th==NULL after become established to
-	 * force pre-ESTABLISHED data up to user socket.
-	 */
-	if (th == NULL)
-		goto present;
-
-	/*
-	 * Limit the number of segments in the reassembly queue to prevent
-	 * holding on to too many segments (and thus running out of mbufs).
-	 * Make sure to let the missing segment through which caused this
-	 * queue.  Always keep one global queue entry spare to be able to
-	 * process the missing segment.
-	 */
-	if (th->th_seq != tp->rcv_nxt &&
-	    (tcp_reass_qsize + 1 >= tcp_reass_maxseg ||
-	     tp->t_segqlen >= tcp_reass_maxqlen)) {
-		tcp_reass_overflows++;
-		tcpstat.tcps_rcvmemdrop++;
-		m_freem(m);
-		*tlenp = 0;
-		return (0);
-	}
-
-	/*
-	 * Allocate a new queue entry. If we can't, or hit the zone limit
-	 * just drop the pkt.
-	 */
-	te = uma_zalloc(tcp_reass_zone, M_NOWAIT);
-	if (te == NULL) {
-		tcpstat.tcps_rcvmemdrop++;
-		m_freem(m);
-		*tlenp = 0;
-		return (0);
-	}
-	tp->t_segqlen++;
-	tcp_reass_qsize++;
-
-	/*
-	 * Find a segment which begins after this one does.
-	 */
-	LIST_FOREACH(q, &tp->t_segq, tqe_q) {
-		if (SEQ_GT(q->tqe_th->th_seq, th->th_seq))
-			break;
-		p = q;
-	}
-
-	/*
-	 * If there is a preceding segment, it may provide some of
-	 * our data already.  If so, drop the data from the incoming
-	 * segment.  If it provides all of our data, drop us.
-	 */
-	if (p != NULL) {
-		register int i;
-		/* conversion to int (in i) handles seq wraparound */
-		i = p->tqe_th->th_seq + p->tqe_len - th->th_seq;
-		if (i > 0) {
-			if (i >= *tlenp) {
-				tcpstat.tcps_rcvduppack++;
-				tcpstat.tcps_rcvdupbyte += *tlenp;
-				m_freem(m);
-				uma_zfree(tcp_reass_zone, te);
-				tp->t_segqlen--;
-				tcp_reass_qsize--;
-				/*
-				 * Try to present any queued data
-				 * at the left window edge to the user.
-				 * This is needed after the 3-WHS
-				 * completes.
-				 */
-				goto present;	/* ??? */
-			}
-			m_adj(m, i);
-			*tlenp -= i;
-			th->th_seq += i;
-		}
-	}
-	tcpstat.tcps_rcvoopack++;
-	tcpstat.tcps_rcvoobyte += *tlenp;
-
-	/*
-	 * While we overlap succeeding segments trim them or,
-	 * if they are completely covered, dequeue them.
-	 */
-	while (q) {
-		register int i = (th->th_seq + *tlenp) - q->tqe_th->th_seq;
-		if (i <= 0)
-			break;
-		if (i < q->tqe_len) {
-			q->tqe_th->th_seq += i;
-			q->tqe_len -= i;
-			m_adj(q->tqe_m, i);
-			break;
-		}
-
-		nq = LIST_NEXT(q, tqe_q);
-		LIST_REMOVE(q, tqe_q);
-		m_freem(q->tqe_m);
-		uma_zfree(tcp_reass_zone, q);
-		tp->t_segqlen--;
-		tcp_reass_qsize--;
-		q = nq;
-	}
-
-	/* Insert the new segment queue entry into place. */
-	te->tqe_m = m;
-	te->tqe_th = th;
-	te->tqe_len = *tlenp;
-
-	if (p == NULL) {
-		LIST_INSERT_HEAD(&tp->t_segq, te, tqe_q);
-	} else {
-		LIST_INSERT_AFTER(p, te, tqe_q);
-	}
-
-present:
-	/*
-	 * Present data to user, advancing rcv_nxt through
-	 * completed sequence space.
-	 */
-	if (!TCPS_HAVEESTABLISHED(tp->t_state))
-		return (0);
-	q = LIST_FIRST(&tp->t_segq);
-	if (!q || q->tqe_th->th_seq != tp->rcv_nxt)
-		return (0);
-	SOCKBUF_LOCK(&so->so_rcv);
-	do {
-		tp->rcv_nxt += q->tqe_len;
-		flags = q->tqe_th->th_flags & TH_FIN;
-		nq = LIST_NEXT(q, tqe_q);
-		LIST_REMOVE(q, tqe_q);
-		if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
-			m_freem(q->tqe_m);
-		else
-			sbappendstream_locked(&so->so_rcv, q->tqe_m);
-		uma_zfree(tcp_reass_zone, q);
-		tp->t_segqlen--;
-		tcp_reass_qsize--;
-		q = nq;
-	} while (q && q->tqe_th->th_seq == tp->rcv_nxt);
-	ND6_HINT(tp);
-	sorwakeup_locked(so);
-	return (flags);
-}
 
 /*
- * TCP input routine, follows pages 65-76 of the
- * protocol specification dated September, 1981 very closely.
+ * TCP input handling is split into multiple parts:
+ *   tcp6_input is a thin wrapper around tcp_input for the extended
+ *	ip6_protox[] call format in ip6_input
+ *   tcp_input handles primary segment validation, inpcb lookup and
+ *	SYN processing on listen sockets
+ *   tcp_do_segment processes the ACK and text of the segment for
+ *	establishing, established and closing connections
  */
 #ifdef INET6
 int
-tcp6_input(mp, offp, proto)
-	struct mbuf **mp;
-	int *offp, proto;
+tcp6_input(struct mbuf **mp, int *offp, int proto)
 {
-	register struct mbuf *m = *mp;
+	struct mbuf *m = *mp;
 	struct in6_ifaddr *ia6;
 
 	IP6_EXTHDR_CHECK(m, *offp, sizeof(struct tcphdr), IPPROTO_DONE);
@@ -427,43 +224,39 @@
 #endif
 
 void
-tcp_input(m, off0)
-	register struct mbuf *m;
-	int off0;
+tcp_input(struct mbuf *m, int off0)
 {
-	register struct tcphdr *th;
-	register struct ip *ip = NULL;
-	register struct ipovly *ipov;
-	register struct inpcb *inp = NULL;
+	struct tcphdr *th;
+	struct ip *ip = NULL;
+	struct ipovly *ipov;
+	struct inpcb *inp = NULL;
+	struct tcpcb *tp = NULL;
+	struct socket *so = NULL;
 	u_char *optp = NULL;
 	int optlen = 0;
 	int len, tlen, off;
 	int drop_hdrlen;
-	register struct tcpcb *tp = 0;
-	register int thflags;
-	struct socket *so = 0;
-	int todrop, acked, ourfinisacked, needoutput = 0;
-	u_long tiwin;
-	struct tcpopt to;		/* options in this segment */
-	int headlocked = 0;
+	int thflags;
+	int rstreason = 0;	/* For badport_bandlim accounting purposes */
 #ifdef IPFIREWALL_FORWARD
 	struct m_tag *fwd_tag;
 #endif
-	int rstreason; /* For badport_bandlim accounting purposes */
-
-	struct ip6_hdr *ip6 = NULL;
 #ifdef INET6
+	struct ip6_hdr *ip6 = NULL;
 	int isipv6;
 #else
+	const void *ip6 = NULL;
 	const int isipv6 = 0;
 #endif
+	struct tcpopt to;		/* options in this segment */
+	char *s = NULL;			/* address and port logging */
 
 #ifdef TCPDEBUG
 	/*
 	 * The size of tcp_saveipgen must be the size of the max ip header,
 	 * now IPv6.
 	 */
-	u_char tcp_saveipgen[40];
+	u_char tcp_saveipgen[IP6_HDR_LEN];
 	struct tcphdr tcp_savetcp;
 	short ostate = 0;
 #endif
@@ -471,13 +264,13 @@
 #ifdef INET6
 	isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
 #endif
-	bzero((char *)&to, sizeof(to));
 
+	to.to_flags = 0;
 	tcpstat.tcps_rcvtotal++;
 
 	if (isipv6) {
 #ifdef INET6
-		/* IP6_EXTHDR_CHECK() is already done at tcp6_input() */
+		/* IP6_EXTHDR_CHECK() is already done at tcp6_input(). */
 		ip6 = mtod(m, struct ip6_hdr *);
 		tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0;
 		if (in6_cksum(m, IPPROTO_TCP, off0, tlen)) {
@@ -499,7 +292,7 @@
 			goto drop;
 		}
 #else
-		th = NULL;		/* XXX: avoid compiler warning */
+		th = NULL;		/* XXX: Avoid compiler warning. */
 #endif
 	} else {
 		/*
@@ -511,7 +304,8 @@
 			off0 = sizeof(struct ip);
 		}
 		if (m->m_len < sizeof (struct tcpiphdr)) {
-			if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) {
+			if ((m = m_pullup(m, sizeof (struct tcpiphdr)))
+			    == NULL) {
 				tcpstat.tcps_rcvshort++;
 				return;
 			}
@@ -549,10 +343,8 @@
 			tcpstat.tcps_rcvbadsum++;
 			goto drop;
 		}
-#ifdef INET6
 		/* Re-initialization for later version check */
 		ip->ip_v = IPVERSION;
-#endif
 	}
 
 	/*
@@ -575,7 +367,7 @@
 		} else {
 			if (m->m_len < sizeof(struct ip) + off) {
 				if ((m = m_pullup(m, sizeof (struct ip) + off))
-						== 0) {
+				    == NULL) {
 					tcpstat.tcps_rcvshort++;
 					return;
 				}
@@ -589,18 +381,6 @@
 	}
 	thflags = th->th_flags;
 
-#ifdef TCP_DROP_SYNFIN
-	/*
-	 * If the drop_synfin option is enabled, drop all packets with
-	 * both the SYN and FIN bits set. This prevents e.g. nmap from
-	 * identifying the TCP/IP stack.
-	 *
-	 * This is a violation of the TCP specification.
-	 */
-	if (drop_synfin && (thflags & (TH_SYN|TH_FIN)) == (TH_SYN|TH_FIN))
-		goto drop;
-#endif
-
 	/*
 	 * Convert TCP protocol specific fields to host format.
 	 */
@@ -610,14 +390,7 @@
 	th->th_urp = ntohs(th->th_urp);
 
 	/*
-	 * Delay dropping TCP, IP headers, IPv6 ext headers, and TCP options,
-	 * until after ip6_savecontrol() is called and before other functions
-	 * which don't want those proto headers.
-	 * Because ip6_savecontrol() is going to parse the mbuf to
-	 * search for data to be passed up to user-land, it wants mbuf
-	 * parameters to be unchanged.
-	 * XXX: the call of ip6_savecontrol() has been obsoleted based on
-	 * latest version of the advanced API (20020110).
+	 * Delay dropping TCP, IP headers, IPv6 ext headers, and TCP options.
 	 */
 	drop_hdrlen = off0 + off;
 
@@ -625,11 +398,12 @@
 	 * Locate pcb for segment.
 	 */
 	INP_INFO_WLOCK(&tcbinfo);
-	headlocked = 1;
 findpcb:
-	KASSERT(headlocked, ("tcp_input: findpcb: head not locked"));
+	INP_INFO_WLOCK_ASSERT(&tcbinfo);
 #ifdef IPFIREWALL_FORWARD
-	/* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */
+	/*
+	 * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
+	 */
 	fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
 
 	if (fwd_tag != NULL && isipv6 == 0) {	/* IPv6 support is not yet */
@@ -652,251 +426,294 @@
 						next_hop->sin_port ?
 						    ntohs(next_hop->sin_port) :
 						    th->th_dport,
-						1, m->m_pkthdr.rcvif);
+						INPLOOKUP_WILDCARD,
+						m->m_pkthdr.rcvif);
 		}
 		/* Remove the tag from the packet.  We don't need it anymore. */
 		m_tag_delete(m, fwd_tag);
-	} else {
+	} else
 #endif /* IPFIREWALL_FORWARD */
+	{
 		if (isipv6) {
 #ifdef INET6
 			inp = in6_pcblookup_hash(&tcbinfo,
 						 &ip6->ip6_src, th->th_sport,
 						 &ip6->ip6_dst, th->th_dport,
-						 1, m->m_pkthdr.rcvif);
+						 INPLOOKUP_WILDCARD,
+						 m->m_pkthdr.rcvif);
 #endif
 		} else
 			inp = in_pcblookup_hash(&tcbinfo,
 						ip->ip_src, th->th_sport,
 						ip->ip_dst, th->th_dport,
-						1, m->m_pkthdr.rcvif);
-#ifdef IPFIREWALL_FORWARD
+						INPLOOKUP_WILDCARD,
+						m->m_pkthdr.rcvif);
 	}
-#endif /* IPFIREWALL_FORWARD */
-
-#if defined(IPSEC) || defined(FAST_IPSEC)
-#ifdef INET6
-	if (isipv6) {
-		if (inp != NULL && ipsec6_in_reject(m, inp)) {
-#ifdef IPSEC
-			ipsec6stat.in_polvio++;
-#endif
-			goto drop;
-		}
-	} else
-#endif /* INET6 */
-	if (inp != NULL && ipsec4_in_reject(m, inp)) {
-#ifdef IPSEC
-		ipsecstat.in_polvio++;
-#endif
-		goto drop;
-	}
-#endif /*IPSEC || FAST_IPSEC*/
 
 	/*
-	 * If the state is CLOSED (i.e., TCB does not exist) then
-	 * all data in the incoming segment is discarded.
-	 * If the TCB exists but is in CLOSED state, it is embryonic,
-	 * but should either do a listen or a connect soon.
+	 * If the INPCB does not exist then all data in the incoming
+	 * segment is discarded and an appropriate RST is sent back.
 	 */
 	if (inp == NULL) {
-		if (log_in_vain) {
-#ifdef INET6
-			char dbuf[INET6_ADDRSTRLEN+2], sbuf[INET6_ADDRSTRLEN+2];
-#else
-			char dbuf[4*sizeof "123"], sbuf[4*sizeof "123"];
-#endif
+		/*
+		 * Log communication attempts to ports that are not
+		 * in use.
+		 */
+		if ((tcp_log_in_vain == 1 && (thflags & TH_SYN)) ||
+		    tcp_log_in_vain == 2) {
+			if ((s = tcp_log_addrs(NULL, th, (void *)ip, ip6)))
+				log(LOG_INFO, "%s; %s: Connection attempt "
+				    "to closed port\n", s, __func__);
+		}
+		/*
+		 * When blackholing do not respond with a RST but
+		 * completely ignore the segment and drop it.
+		 */
+		if ((blackhole == 1 && (thflags & TH_SYN)) ||
+		    blackhole == 2)
+			goto dropunlock;
 
-			if (isipv6) {
-#ifdef INET6
-				strcpy(dbuf, "[");
-				strcpy(sbuf, "[");
-				strcat(dbuf, ip6_sprintf(&ip6->ip6_dst));
-				strcat(sbuf, ip6_sprintf(&ip6->ip6_src));
-				strcat(dbuf, "]");
-				strcat(sbuf, "]");
-#endif
-			} else {
-				strcpy(dbuf, inet_ntoa(ip->ip_dst));
-				strcpy(sbuf, inet_ntoa(ip->ip_src));
-			}
-			switch (log_in_vain) {
-			case 1:
-				if ((thflags & TH_SYN) == 0)
-					break;
-				/* FALLTHROUGH */
-			case 2:
-				log(LOG_INFO,
-				    "Connection attempt to TCP %s:%d "
-				    "from %s:%d flags:0x%02x\n",
-				    dbuf, ntohs(th->th_dport), sbuf,
-				    ntohs(th->th_sport), thflags);
-				break;
-			default:
-				break;
-			}
-		}
-		if (blackhole) {
-			switch (blackhole) {
-			case 1:
-				if (thflags & TH_SYN)
-					goto drop;
-				break;
-			case 2:
-				goto drop;
-			default:
-				goto drop;
-			}
-		}
 		rstreason = BANDLIM_RST_CLOSEDPORT;
 		goto dropwithreset;
 	}
 	INP_LOCK(inp);
 
-	/* Check the minimum TTL for socket. */
+#ifdef IPSEC
+#ifdef INET6
+	if (isipv6 && ipsec6_in_reject(m, inp)) {
+		ipsec6stat.in_polvio++;
+		goto dropunlock;
+	} else
+#endif /* INET6 */
+	if (ipsec4_in_reject(m, inp) != 0) {
+		ipsec4stat.in_polvio++;
+		goto dropunlock;
+	}
+#endif /* IPSEC */
+
+	/*
+	 * Check the minimum TTL for socket.
+	 */
 	if (inp->inp_ip_minttl != 0) {
 #ifdef INET6
 		if (isipv6 && inp->inp_ip_minttl > ip6->ip6_hlim)
-			goto drop;
+			goto dropunlock;
 		else
 #endif
 		if (inp->inp_ip_minttl > ip->ip_ttl)
-			goto drop;
+			goto dropunlock;
 	}
 
+	/*
+	 * A previous connection in TIMEWAIT state is supposed to catch
+	 * stray or duplicate segments arriving late.  If this segment
+	 * was a legitimate new connection attempt the old INPCB gets
+	 * removed and we can try again to find a listening socket.
+	 */
 	if (inp->inp_vflag & INP_TIMEWAIT) {
-		/*
-		 * The only option of relevance is TOF_CC, and only if
-		 * present in a SYN segment.  See tcp_timewait().
-		 */
 		if (thflags & TH_SYN)
-			tcp_dooptions(&to, optp, optlen, 1);
-		if (tcp_timewait((struct tcptw *)inp->inp_ppcb,
-		    &to, th, m, tlen))
-			goto findpcb;
+			tcp_dooptions(&to, optp, optlen, TO_SYN);
 		/*
-		 * tcp_timewait unlocks inp.
+		 * NB: tcp_twcheck unlocks the INP and frees the mbuf.
 		 */
+		if (tcp_twcheck(inp, &to, th, m, tlen))
+			goto findpcb;
 		INP_INFO_WUNLOCK(&tcbinfo);
 		return;
 	}
+	/*
+	 * The TCPCB may no longer exist if the connection is winding
+	 * down or it is in the CLOSED state.  Either way we drop the
+	 * segment and send an appropriate response.
+	 */
 	tp = intotcpcb(inp);
-	if (tp == 0) {
-		INP_UNLOCK(inp);
+	if (tp == NULL || tp->t_state == TCPS_CLOSED) {
 		rstreason = BANDLIM_RST_CLOSEDPORT;
 		goto dropwithreset;
 	}
-	if (tp->t_state == TCPS_CLOSED)
-		goto drop;
-
-	/* Unscale the window into a 32-bit value. */
-	if ((thflags & TH_SYN) == 0)
-		tiwin = th->th_win << tp->snd_scale;
-	else
-		tiwin = th->th_win;
 
 #ifdef MAC
 	INP_LOCK_ASSERT(inp);
 	if (mac_check_inpcb_deliver(inp, m))
-		goto drop;
+		goto dropunlock;
 #endif
 	so = inp->inp_socket;
+	KASSERT(so != NULL, ("%s: so == NULL", __func__));
 #ifdef TCPDEBUG
 	if (so->so_options & SO_DEBUG) {
 		ostate = tp->t_state;
-		if (isipv6)
+		if (isipv6) {
+#ifdef INET6
 			bcopy((char *)ip6, (char *)tcp_saveipgen, sizeof(*ip6));
-		else
+#endif
+		} else
 			bcopy((char *)ip, (char *)tcp_saveipgen, sizeof(*ip));
 		tcp_savetcp = *th;
 	}
 #endif
+	/*
+	 * When the socket is accepting connections (the INPCB is in LISTEN
+	 * state) we look into the SYN cache if this is a new connection
+	 * attempt or the completion of a previous one.
+	 */
 	if (so->so_options & SO_ACCEPTCONN) {
 		struct in_conninfo inc;
 
-#ifdef INET6
+		KASSERT(tp->t_state == TCPS_LISTEN, ("%s: so accepting but "
+		    "tp not listening", __func__));
+
+		bzero(&inc, sizeof(inc));
 		inc.inc_isipv6 = isipv6;
-#endif
+#ifdef INET6
 		if (isipv6) {
 			inc.inc6_faddr = ip6->ip6_src;
 			inc.inc6_laddr = ip6->ip6_dst;
-		} else {
+		} else
+#endif
+		{
 			inc.inc_faddr = ip->ip_src;
 			inc.inc_laddr = ip->ip_dst;
 		}
 		inc.inc_fport = th->th_sport;
 		inc.inc_lport = th->th_dport;
 
-	        /*
-	         * If the state is LISTEN then ignore segment if it contains
-		 * a RST.  If the segment contains an ACK then it is bad and
-		 * send a RST.  If it does not contain a SYN then it is not
-		 * interesting; drop it.
-		 *
-		 * If the state is SYN_RECEIVED (syncache) and seg contains
-		 * an ACK, but not for our SYN/ACK, send a RST.  If the seg
-		 * contains a RST, check the sequence number to see if it
-		 * is a valid reset segment.
-		 */
-		if ((thflags & (TH_RST|TH_ACK|TH_SYN)) != TH_SYN) {
-			if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) {
-				if (!syncache_expand(&inc, th, &so, m)) {
-					/*
-					 * No syncache entry, or ACK was not
-					 * for our SYN/ACK.  Send a RST.
-					 */
-					tcpstat.tcps_badsyn++;
-					rstreason = BANDLIM_RST_OPENPORT;
-					goto dropwithreset;
-				}
-				if (so == NULL) {
-					/*
-					 * Could not complete 3-way handshake,
-					 * connection is being closed down, and
-					 * syncache will free mbuf.
-					 */
-					INP_UNLOCK(inp);
-					INP_INFO_WUNLOCK(&tcbinfo);
-					return;
-				}
-				/*
-				 * Socket is created in state SYN_RECEIVED.
-				 * Continue processing segment.
-				 */
-				INP_UNLOCK(inp);
-				inp = sotoinpcb(so);
-				INP_LOCK(inp);
-				tp = intotcpcb(inp);
-				/*
-				 * This is what would have happened in
-				 * tcp_output() when the SYN,ACK was sent.
-				 */
-				tp->snd_up = tp->snd_una;
-				tp->snd_max = tp->snd_nxt = tp->iss + 1;
-				tp->last_ack_sent = tp->rcv_nxt;
+		/*
+		 * Check for an existing connection attempt in syncache if
+		 * the flag is only ACK.  A successful lookup creates a new
+		 * socket appended to the listen queue in SYN_RECEIVED state.
+		 */
+		if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) {
+			/*
+			 * Parse the TCP options here because
+			 * syncookies need access to the reflected
+			 * timestamp.
+			 */
+			tcp_dooptions(&to, optp, optlen, 0);
+			/*
+			 * NB: syncache_expand() doesn't unlock
+			 * inp and tcpinfo locks.
+			 */
+			if (!syncache_expand(&inc, &to, th, &so, m)) {
 				/*
-				 * RFC1323: The window in SYN & SYN/ACK
-				 * segments is never scaled.
+				 * No syncache entry or ACK was not
+				 * for our SYN/ACK.  Send a RST.
+				 * NB: syncache did its own logging
+				 * of the failure cause.
 				 */
-				tp->snd_wnd = tiwin;	/* unscaled */
-				goto after_listen;
-			}
-			if (thflags & TH_RST) {
-				syncache_chkrst(&inc, th);
-				goto drop;
-			}
-			if (thflags & TH_ACK) {
-				syncache_badack(&inc);
-				tcpstat.tcps_badsyn++;
 				rstreason = BANDLIM_RST_OPENPORT;
 				goto dropwithreset;
 			}
-			goto drop;
+			if (so == NULL) {
+				/*
+				 * We completed the 3-way handshake
+				 * but could not allocate a socket
+				 * either due to memory shortage,
+				 * listen queue length limits or
+				 * global socket limits.  Send RST
+				 * or wait and have the remote end
+				 * retransmit the ACK for another
+				 * try.
+				 */
+				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+					log(LOG_DEBUG, "%s; %s: Listen socket: "
+					    "Socket allocation failed due to "
+					    "limits or memory shortage, %s\n",
+					    s, __func__, (tcp_sc_rst_sock_fail ?
+					    "sending RST" : "try again"));
+				if (tcp_sc_rst_sock_fail) {
+					rstreason = BANDLIM_UNLIMITED;
+					goto dropwithreset;
+				} else
+					goto dropunlock;
+			}
+			/*
+			 * Socket is created in state SYN_RECEIVED.
+			 * Unlock the listen socket, lock the newly
+			 * created socket and update the tp variable.
+			 */
+			INP_UNLOCK(inp);	/* listen socket */
+			inp = sotoinpcb(so);
+			INP_LOCK(inp);		/* new connection */
+			tp = intotcpcb(inp);
+			KASSERT(tp->t_state == TCPS_SYN_RECEIVED,
+			    ("%s: ", __func__));
+			/*
+			 * Process the segment and the data it
+			 * contains.  tcp_do_segment() consumes
+			 * the mbuf chain and unlocks the inpcb.
+			 */
+			tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen);
+			INP_INFO_UNLOCK_ASSERT(&tcbinfo);
+			return;
 		}
-
 		/*
-		 * Segment's flags are (SYN) or (SYN|FIN).
+		 * Segment flag validation for new connection attempts:
+		 *
+		 * Our (SYN|ACK) response was rejected.
+		 * Check with syncache and remove entry to prevent
+		 * retransmits.
+		 *
+		 * NB: syncache_chkrst does its own logging of failure
+		 * causes.
+		 */
+		if (thflags & TH_RST) {
+			syncache_chkrst(&inc, th);
+			goto dropunlock;
+		}
+		/*
+		 * We can't do anything without SYN.
+		 */
+		if ((thflags & TH_SYN) == 0) {
+			if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+				log(LOG_DEBUG, "%s; %s: Listen socket: "
+				    "SYN is missing, segment ignored\n",
+				    s, __func__);
+			tcpstat.tcps_badsyn++;
+			goto dropunlock;
+		}
+		/*
+		 * (SYN|ACK) is bogus on a listen socket.
 		 */
+		if (thflags & TH_ACK) {
+			if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+				log(LOG_DEBUG, "%s; %s: Listen socket: "
+				    "SYN|ACK invalid, segment rejected\n",
+				    s, __func__);
+			syncache_badack(&inc);	/* XXX: Not needed! */
+			tcpstat.tcps_badsyn++;
+			rstreason = BANDLIM_RST_OPENPORT;
+			goto dropwithreset;
+		}
+		/*
+		 * If the drop_synfin option is enabled, drop all
+		 * segments with both the SYN and FIN bits set.
+		 * This prevents e.g. nmap from identifying the
+		 * TCP/IP stack.
+		 * XXX: Poor reasoning.  nmap has other methods
+		 * and is constantly refining its stack detection
+		 * strategies.
+		 * XXX: This is a violation of the TCP specification
+		 * and was used by RFC1644.
+		 */
+		if ((thflags & TH_FIN) && drop_synfin) {
+			if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+				log(LOG_DEBUG, "%s; %s: Listen socket: "
+				    "SYN|FIN segment ignored (based on "
+				    "sysctl setting)\n", s, __func__);
+			tcpstat.tcps_badsyn++;
+                	goto dropunlock;
+		}
+		/*
+		 * Segment's flags are (SYN) or (SYN|FIN).
+		 *
+		 * TH_PUSH, TH_URG, TH_ECE, TH_CWR are ignored
+		 * as they do not affect the state of the TCP FSM.
+		 * The data pointed to by TH_URG and th_urp is ignored.
+		 */
+		KASSERT((thflags & (TH_RST|TH_ACK)) == 0,
+		    ("%s: Listen socket: TH_RST or TH_ACK set", __func__));
+		KASSERT(thflags & (TH_SYN),
+		    ("%s: Listen socket: TH_SYN not set", __func__));
 #ifdef INET6
 		/*
 		 * If deprecated address is forbidden,
@@ -933,182 +750,201 @@
 
 			if ((ia6 = ip6_getdstifaddr(m)) &&
 			    (ia6->ia6_flags & IN6_IFF_DEPRECATED)) {
-				INP_UNLOCK(inp);
-				tp = NULL;
+				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+				    log(LOG_DEBUG, "%s; %s: Listen socket: "
+					"Connection attempt to deprecated "
+					"IPv6 address rejected\n",
+					s, __func__);
 				rstreason = BANDLIM_RST_OPENPORT;
 				goto dropwithreset;
 			}
 		}
 #endif
 		/*
-		 * If it is from this socket, drop it, it must be forged.
-		 * Don't bother responding if the destination was a broadcast.
-		 */
-		if (th->th_dport == th->th_sport) {
-			if (isipv6) {
-				if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
-						       &ip6->ip6_src))
-					goto drop;
-			} else {
-				if (ip->ip_dst.s_addr == ip->ip_src.s_addr)
-					goto drop;
-			}
+		 * Basic sanity checks on incoming SYN requests:
+		 *   Don't respond if the destination is a link layer
+		 *	broadcast according to RFC1122 4.2.3.10, p. 104.
+		 *   If it is from this socket it must be forged.
+		 *   Don't respond if the source or destination is a
+		 *	global or subnet broad- or multicast address.
+		 *   Note that it is quite possible to receive unicast
+		 *	link-layer packets with a broadcast IP address. Use
+		 *	in_broadcast() to find them.
+		 */
+		if (m->m_flags & (M_BCAST|M_MCAST)) {
+			if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+			    log(LOG_DEBUG, "%s; %s: Listen socket: "
+				"Connection attempt from broad- or multicast "
+				"link layer address ignored\n", s, __func__);
+			goto dropunlock;
 		}
-		/*
-		 * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN
-		 *
-		 * Note that it is quite possible to receive unicast
-		 * link-layer packets with a broadcast IP address. Use
-		 * in_broadcast() to find them.
-		 */
-		if (m->m_flags & (M_BCAST|M_MCAST))
-			goto drop;
 		if (isipv6) {
+#ifdef INET6
+			if (th->th_dport == th->th_sport &&
+			    IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6->ip6_src)) {
+				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+				    log(LOG_DEBUG, "%s; %s: Listen socket: "
+					"Connection attempt to/from self "
+					"ignored\n", s, __func__);
+				goto dropunlock;
+			}
 			if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
-			    IN6_IS_ADDR_MULTICAST(&ip6->ip6_src))
-				goto drop;
+			    IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
+				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+				    log(LOG_DEBUG, "%s; %s: Listen socket: "
+					"Connection attempt from/to multicast "
+					"address ignored\n", s, __func__);
+				goto dropunlock;
+			}
+#endif
 		} else {
+			if (th->th_dport == th->th_sport &&
+			    ip->ip_dst.s_addr == ip->ip_src.s_addr) {
+				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+				    log(LOG_DEBUG, "%s; %s: Listen socket: "
+					"Connection attempt from/to self "
+					"ignored\n", s, __func__);
+				goto dropunlock;
+			}
 			if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
 			    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
 			    ip->ip_src.s_addr == htonl(INADDR_BROADCAST) ||
-			    in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))
-				goto drop;
+			    in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) {
+				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+				    log(LOG_DEBUG, "%s; %s: Listen socket: "
+					"Connection attempt from/to broad- "
+					"or multicast address ignored\n",
+					s, __func__);
+				goto dropunlock;
+			}
 		}
 		/*
-		 * SYN appears to be valid; create compressed TCP state
-		 * for syncache, or perform t/tcp connection.
+		 * SYN appears to be valid.  Create compressed TCP state
+		 * for syncache.
 		 */
-		if (so->so_qlen <= so->so_qlimit) {
 #ifdef TCPDEBUG
-			if (so->so_options & SO_DEBUG)
-				tcp_trace(TA_INPUT, ostate, tp,
-				    (void *)tcp_saveipgen, &tcp_savetcp, 0);
+		if (so->so_options & SO_DEBUG)
+			tcp_trace(TA_INPUT, ostate, tp,
+			    (void *)tcp_saveipgen, &tcp_savetcp, 0);
 #endif
-			tcp_dooptions(&to, optp, optlen, 1);
-			if (!syncache_add(&inc, &to, th, &so, m))
-				goto drop;
-			if (so == NULL) {
-				/*
-				 * Entry added to syncache, mbuf used to
-				 * send SYN,ACK packet.
-				 */
-				KASSERT(headlocked, ("headlocked"));
-				INP_UNLOCK(inp);
-				INP_INFO_WUNLOCK(&tcbinfo);
-				return;
-			}
-			/*
-			 * Segment passed TAO tests.
-			 */
-			INP_UNLOCK(inp);
-			inp = sotoinpcb(so);
-			INP_LOCK(inp);
-			tp = intotcpcb(inp);
-			tp->snd_wnd = tiwin;
-			tp->t_starttime = ticks;
-			tp->t_state = TCPS_ESTABLISHED;
-
-			/*
-			 * T/TCP logic:
-			 * If there is a FIN or if there is data, then
-			 * delay SYN,ACK(SYN) in the hope of piggy-backing
-			 * it on a response segment.  Otherwise must send
-			 * ACK now in case the other side is slow starting.
-			 */
-			if (thflags & TH_FIN || tlen != 0)
-				tp->t_flags |= (TF_DELACK | TF_NEEDSYN);
-			else
-				tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
-			tcpstat.tcps_connects++;
-			soisconnected(so);
-			goto trimthenstep6;
-		}
-		goto drop;
+		tcp_dooptions(&to, optp, optlen, TO_SYN);
+		syncache_add(&inc, &to, th, inp, &so, m);
+		/*
+		 * Entry added to syncache and mbuf consumed.
+		 * Everything already unlocked by syncache_add().
+		 */
+		INP_INFO_UNLOCK_ASSERT(&tcbinfo);
+		return;
 	}
-after_listen:
-	KASSERT(headlocked, ("tcp_input: after_listen: head not locked"));
-	INP_LOCK_ASSERT(inp);
 
-	/* Syncache takes care of sockets in the listen state. */
-	KASSERT(tp->t_state != TCPS_LISTEN, ("tcp_input: TCPS_LISTEN"));
+	/*
+	 * Segment belongs to a connection in SYN_SENT, ESTABLISHED or later
+	 * state.  tcp_do_segment() always consumes the mbuf chain, unlocks
+	 * the inpcb, and unlocks pcbinfo.
+	 */
+	tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen);
+	INP_INFO_UNLOCK_ASSERT(&tcbinfo);
+	return;
+
+dropwithreset:
+	INP_INFO_WLOCK_ASSERT(&tcbinfo);
+	tcp_dropwithreset(m, th, tp, tlen, rstreason);
+	m = NULL;	/* mbuf chain got consumed. */
+dropunlock:
+	INP_INFO_WLOCK_ASSERT(&tcbinfo);
+	if (inp != NULL)
+		INP_UNLOCK(inp);
+	INP_INFO_WUNLOCK(&tcbinfo);
+drop:
+	INP_INFO_UNLOCK_ASSERT(&tcbinfo);
+	if (s != NULL)
+		free(s, M_TCPLOG);
+	if (m != NULL)
+		m_freem(m);
+	return;
+}
+
+static void
+tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
+    struct tcpcb *tp, int drop_hdrlen, int tlen)
+{
+	int thflags, acked, ourfinisacked, needoutput = 0;
+	int headlocked = 1;
+	int rstreason, todrop, win;
+	u_long tiwin;
+	struct tcpopt to;
 
+#ifdef TCPDEBUG
 	/*
-	 * This is the second part of the MSS DoS prevention code (after
-	 * minmss on the sending side) and it deals with too many too small
-	 * tcp packets in a too short timeframe (1 second).
-	 *
-	 * For every full second we count the number of received packets
-	 * and bytes. If we get a lot of packets per second for this connection
-	 * (tcp_minmssoverload) we take a closer look at it and compute the
-	 * average packet size for the past second. If that is less than
-	 * tcp_minmss we get too many packets with very small payload which
-	 * is not good and burdens our system (and every packet generates
-	 * a wakeup to the process connected to our socket). We can reasonable
-	 * expect this to be small packet DoS attack to exhaust our CPU
-	 * cycles.
-	 *
-	 * Care has to be taken for the minimum packet overload value. This
-	 * value defines the minimum number of packets per second before we
-	 * start to worry. This must not be too low to avoid killing for
-	 * example interactive connections with many small packets like
-	 * telnet or SSH.
-	 *
-	 * Setting either tcp_minmssoverload or tcp_minmss to "0" disables
-	 * this check.
-	 *
-	 * Account for packet if payload packet, skip over ACK, etc.
+	 * The size of tcp_saveipgen must be the size of the max ip header,
+	 * now IPv6.
 	 */
-	if (tcp_minmss && tcp_minmssoverload &&
-	    tp->t_state == TCPS_ESTABLISHED && tlen > 0) {
-		if ((unsigned int)(tp->rcv_second - ticks) < hz) {
-			tp->rcv_pps++;
-			tp->rcv_byps += tlen + off;
-			if (tp->rcv_pps > tcp_minmssoverload) {
-				if ((tp->rcv_byps / tp->rcv_pps) < tcp_minmss) {
-					printf("too many small tcp packets from "
-					       "%s:%u, av. %lubyte/packet, "
-					       "dropping connection\n",
-#ifdef INET6
-						isipv6 ?
-						ip6_sprintf(&inp->inp_inc.inc6_faddr) :
-#endif
-						inet_ntoa(inp->inp_inc.inc_faddr),
-						inp->inp_inc.inc_fport,
-						tp->rcv_byps / tp->rcv_pps);
-					KASSERT(headlocked, ("tcp_input: "
-					    "after_listen: tcp_drop: head "
-					    "not locked"));
-					tp = tcp_drop(tp, ECONNRESET);
-					tcpstat.tcps_minmssdrops++;
-					goto drop;
-				}
-			}
-		} else {
-			tp->rcv_second = ticks + hz;
-			tp->rcv_pps = 1;
-			tp->rcv_byps = tlen + off;
-		}
-	}
+	u_char tcp_saveipgen[IP6_HDR_LEN];
+	struct tcphdr tcp_savetcp;
+	short ostate = 0;
+#endif
+	thflags = th->th_flags;
+
+	INP_INFO_WLOCK_ASSERT(&tcbinfo);
+	INP_LOCK_ASSERT(tp->t_inpcb);
+	KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
+	    __func__));
+	KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT",
+	    __func__));
 
 	/*
 	 * Segment received on connection.
 	 * Reset idle time and keep-alive timer.
+	 * XXX: This should be done after segment
+	 * validation to ignore broken/spoofed segs.
 	 */
 	tp->t_rcvtime = ticks;
 	if (TCPS_HAVEESTABLISHED(tp->t_state))
-		callout_reset(tp->tt_keep, tcp_keepidle, tcp_timer_keep, tp);
+		tcp_timer_activate(tp, TT_KEEP, tcp_keepidle);
+
+	/*
+	 * Unscale the window into a 32-bit value.
+	 * For the SYN_SENT state the scale is zero.
+	 */
+	tiwin = th->th_win << tp->snd_scale;
+
+	/*
+	 * Parse options on any incoming segment.
+	 */
+	tcp_dooptions(&to, (u_char *)(th + 1),
+	    (th->th_off << 2) - sizeof(struct tcphdr),
+	    (thflags & TH_SYN) ? TO_SYN : 0);
+
+	/*
+	 * If echoed timestamp is later than the current time,
+	 * fall back to non RFC1323 RTT calculation.  Normalize
+	 * timestamp if syncookies were used when this connection
+	 * was established.
+	 */
+	if ((to.to_flags & TOF_TS) && (to.to_tsecr != 0)) {
+		to.to_tsecr -= tp->ts_offset;
+		if (TSTMP_GT(to.to_tsecr, ticks))
+			to.to_tsecr = 0;
+	}
 
 	/*
 	 * Process options only when we get SYN/ACK back. The SYN case
 	 * for incoming connections is handled in tcp_syncache.
+	 * According to RFC1323 the window field in a SYN (i.e., a <SYN>
+	 * or <SYN,ACK>) segment itself is never scaled.
 	 * XXX this is traditional behavior, may need to be cleaned up.
 	 */
-	tcp_dooptions(&to, optp, optlen, thflags & TH_SYN);
 	if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
-		if (to.to_flags & TOF_SCALE) {
+		if ((to.to_flags & TOF_SCALE) &&
+		    (tp->t_flags & TF_REQ_SCALE)) {
 			tp->t_flags |= TF_RCVD_SCALE;
-			tp->requested_s_scale = to.to_requested_s_scale;
+			tp->snd_scale = to.to_wscale;
 		}
+		/*
+		 * Initial send window.  It will be updated with
+		 * the next incoming segment to the scaled value.
+		 */
+		tp->snd_wnd = th->th_win;
 		if (to.to_flags & TOF_TS) {
 			tp->t_flags |= TF_RCVD_TSTMP;
 			tp->ts_recent = to.to_tsval;
@@ -1116,13 +952,9 @@
 		}
 		if (to.to_flags & TOF_MSS)
 			tcp_mss(tp, to.to_mss);
-		if (tp->sack_enable) {
-			if (!(to.to_flags & TOF_SACK))
-				tp->sack_enable = 0;
-			else
-				tp->t_flags |= TF_SACK_PERMIT;
-		}
-
+		if ((tp->t_flags & TF_SACK_PERMIT) &&
+		    (to.to_flags & TOF_SACKPERM) == 0)
+			tp->t_flags &= ~TF_SACK_PERMIT;
 	}
 
 	/*
@@ -1139,16 +971,18 @@
 	 * (the reassembly queue is empty), add the data to
 	 * the socket buffer and note that we need a delayed ack.
 	 * Make sure that the hidden state-flags are also off.
-	 * Since we check for TCPS_ESTABLISHED above, it can only
+	 * Since we check for TCPS_ESTABLISHED first, it can only
 	 * be TH_NEEDSYN.
 	 */
 	if (tp->t_state == TCPS_ESTABLISHED &&
+	    th->th_seq == tp->rcv_nxt &&
 	    (thflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
+	    tp->snd_nxt == tp->snd_max &&
+	    tiwin && tiwin == tp->snd_wnd && 
 	    ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&
+	    LIST_EMPTY(&tp->t_segq) &&
 	    ((to.to_flags & TOF_TS) == 0 ||
-	     TSTMP_GEQ(to.to_tsval, tp->ts_recent)) &&
-	     th->th_seq == tp->rcv_nxt && tiwin && tiwin == tp->snd_wnd &&
-	     tp->snd_nxt == tp->snd_max) {
+	     TSTMP_GEQ(to.to_tsval, tp->ts_recent)) ) {
 
 		/*
 		 * If last ACK falls within this segment's sequence numbers,
@@ -1166,20 +1000,24 @@
 			if (SEQ_GT(th->th_ack, tp->snd_una) &&
 			    SEQ_LEQ(th->th_ack, tp->snd_max) &&
 			    tp->snd_cwnd >= tp->snd_wnd &&
-			    ((!tcp_do_newreno && !tp->sack_enable &&
+			    ((!tcp_do_newreno &&
+			      !(tp->t_flags & TF_SACK_PERMIT) &&
 			      tp->t_dupacks < tcprexmtthresh) ||
-			     ((tcp_do_newreno || tp->sack_enable) &&
-			      !IN_FASTRECOVERY(tp) && to.to_nsacks == 0 &&
+			     ((tcp_do_newreno ||
+			       (tp->t_flags & TF_SACK_PERMIT)) &&
+			      !IN_FASTRECOVERY(tp) &&
+			      (to.to_flags & TOF_SACK) == 0 &&
 			      TAILQ_EMPTY(&tp->snd_holes)))) {
-				KASSERT(headlocked, ("headlocked"));
+				KASSERT(headlocked,
+				    ("%s: headlocked", __func__));
 				INP_INFO_WUNLOCK(&tcbinfo);
 				headlocked = 0;
 				/*
-				 * this is a pure ack for outstanding data.
+				 * This is a pure ack for outstanding data.
 				 */
 				++tcpstat.tcps_predack;
 				/*
-				 * "bad retransmit" recovery
+				 * "bad retransmit" recovery.
 				 */
 				if (tp->t_rxtshift == 1 &&
 				    ticks < tp->t_badrxtwin) {
@@ -1210,7 +1048,7 @@
 					tcp_xmit_timer(tp,
 					    ticks - to.to_tsecr + 1);
 				} else if (tp->t_rtttime &&
-					    SEQ_GT(th->th_ack, tp->t_rtseq)) {
+				    SEQ_GT(th->th_ack, tp->t_rtseq)) {
 					if (!tp->t_rttlow ||
 					    tp->t_rttlow > ticks - tp->t_rtttime)
 						tp->t_rttlow = ticks - tp->t_rtttime;
@@ -1227,13 +1065,13 @@
 					tp->snd_recover = th->th_ack - 1;
 				tp->snd_una = th->th_ack;
 				/*
-				 * pull snd_wl2 up to prevent seq wrap relative
+				 * Pull snd_wl2 up to prevent seq wrap relative
 				 * to th_ack.
 				 */
 				tp->snd_wl2 = th->th_ack;
 				tp->t_dupacks = 0;
 				m_freem(m);
-				ND6_HINT(tp); /* some progress has been done */
+				ND6_HINT(tp); /* Some progress has been made. */
 
 				/*
 				 * If all outstanding data are acked, stop
@@ -1243,41 +1081,37 @@
 				 * wakeup/selwakeup/signal.  If data
 				 * are ready to send, let tcp_output
 				 * decide between more output or persist.
-
+				 */
 #ifdef TCPDEBUG
 				if (so->so_options & SO_DEBUG)
 					tcp_trace(TA_INPUT, ostate, tp,
 					    (void *)tcp_saveipgen,
 					    &tcp_savetcp, 0);
 #endif
-				 */
 				if (tp->snd_una == tp->snd_max)
-					callout_stop(tp->tt_rexmt);
-				else if (!callout_active(tp->tt_persist))
-					callout_reset(tp->tt_rexmt,
-						      tp->t_rxtcur,
-						      tcp_timer_rexmt, tp);
-
+					tcp_timer_activate(tp, TT_REXMT, 0);
+				else if (!tcp_timer_active(tp, TT_PERSIST))
+					tcp_timer_activate(tp, TT_REXMT,
+						      tp->t_rxtcur);
 				sowwakeup(so);
 				if (so->so_snd.sb_cc)
 					(void) tcp_output(tp);
 				goto check_delack;
 			}
 		} else if (th->th_ack == tp->snd_una &&
-		    LIST_EMPTY(&tp->t_segq) &&
 		    tlen <= sbspace(&so->so_rcv)) {
 			int newsize = 0;	/* automatic sockbuf scaling */
 
-			KASSERT(headlocked, ("headlocked"));
+			KASSERT(headlocked, ("%s: headlocked", __func__));
 			INP_INFO_WUNLOCK(&tcbinfo);
 			headlocked = 0;
 			/*
-			 * this is a pure, in-sequence data packet
+			 * This is a pure, in-sequence data packet
 			 * with nothing on the reassembly queue and
 			 * we have enough buffer space to take it.
 			 */
 			/* Clean receiver SACK report if present */
-			if (tp->sack_enable && tp->rcv_numsacks)
+			if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks)
 				tcp_clean_sackreport(tp);
 			++tcpstat.tcps_preddat;
 			tp->rcv_nxt += tlen;
@@ -1293,7 +1127,7 @@
 			tp->rcv_up = tp->rcv_nxt;
 			tcpstat.tcps_rcvpack++;
 			tcpstat.tcps_rcvbyte += tlen;
-			ND6_HINT(tp);	/* some progress has been done */
+			ND6_HINT(tp);	/* Some progress has been made */
 #ifdef TCPDEBUG
 			if (so->so_options & SO_DEBUG)
 				tcp_trace(TA_INPUT, ostate, tp,
@@ -1323,12 +1157,14 @@
 		 *  2. received bytes per RTT is within seven eighth of the
 		 *     current socket buffer size;
 		 *  3. receive buffer size has not hit maximal automatic size;
-		 *  4. Profit!
 		 *
 		 * This algorithm does one step per RTT at most and only if
 		 * we receive a bulk stream w/o packet losses or reorderings.
 		 * Shrinking the buffer during idle times is not necessary as
 		 * it doesn't consume any memory when idle.
+		 *
+		 * TODO: Only step up if the application is actually serving
+		 * the buffer to better manage the socket buffer resources.
 		 */
 			if (tcp_do_autorcvbuf &&
 			    to.to_tsecr &&
@@ -1343,12 +1179,6 @@
 						    min(so->so_rcv.sb_hiwat +
 						    tcp_autorcvbuf_inc,
 						    tcp_autorcvbuf_max);
-#if 0
-			log(LOG_DEBUG, "%s: hiwat %i, ref_ts %i, ts %i, "
-			    "count %i, new %i, max %i\n",
-			    __func__, so->so_rcv.sb_hiwat, tp->rfbuf_ts,
-			    to.to_tsecr, tp->rfbuf_cnt, newsize, (int)sb_max);
-#endif
 					}
 					/* Start over with next RTT. */
 					tp->rfbuf_ts = 0;
@@ -1373,6 +1203,7 @@
 				m_adj(m, drop_hdrlen);	/* delayed header drop */
 				sbappendstream_locked(&so->so_rcv, m);
 			}
+			/* NB: sorwakeup_locked() does an implicit unlock. */
 			sorwakeup_locked(so);
 			if (DELAY_ACK(tp)) {
 				tp->t_flags |= TF_DELACK;
@@ -1390,13 +1221,10 @@
 	 * Receive window is amount of space in rcv queue,
 	 * but not less than advertised window.
 	 */
-	{ int win;
-
 	win = sbspace(&so->so_rcv);
 	if (win < 0)
 		win = 0;
 	tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
-	}
 
 	/* Reset receive buffer auto scaling when not in bulk receive mode. */
 	tp->rfbuf_ts = 0;
@@ -1436,17 +1264,12 @@
 			rstreason = BANDLIM_UNLIMITED;
 			goto dropwithreset;
 		}
-		if (thflags & TH_RST) {
-			if (thflags & TH_ACK) {
-				KASSERT(headlocked, ("tcp_input: after_listen"
-				    ": tcp_drop.2: head not locked"));
-				tp = tcp_drop(tp, ECONNREFUSED);
-			}
+		if ((thflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST))
+			tp = tcp_drop(tp, ECONNREFUSED);
+		if (thflags & TH_RST)
 			goto drop;
-		}
-		if ((thflags & TH_SYN) == 0)
+		if (!(thflags & TH_SYN))
 			goto drop;
-		tp->snd_wnd = th->th_win;	/* initial send window */
 
 		tp->irs = th->th_seq;
 		tcp_rcvseqinit(tp);
@@ -1461,7 +1284,6 @@
 			/* Do window scaling on this connection? */
 			if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
 				(TF_RCVD_SCALE|TF_REQ_SCALE)) {
-				tp->snd_scale = tp->requested_s_scale;
 				tp->rcv_scale = tp->request_r_scale;
 			}
 			tp->rcv_adv += tp->rcv_wnd;
@@ -1471,8 +1293,8 @@
 			 * ACKNOW will be turned on later.
 			 */
 			if (DELAY_ACK(tp) && tlen != 0)
-				callout_reset(tp->tt_delack, tcp_delacktime,
-				    tcp_timer_delack, tp);
+				tcp_timer_activate(tp, TT_DELACK,
+				    tcp_delacktime);
 			else
 				tp->t_flags |= TF_ACKNOW;
 			/*
@@ -1488,8 +1310,7 @@
 				thflags &= ~TH_SYN;
 			} else {
 				tp->t_state = TCPS_ESTABLISHED;
-				callout_reset(tp->tt_keep, tcp_keepidle,
-					      tcp_timer_keep, tp);
+				tcp_timer_activate(tp, TT_KEEP, tcp_keepidle);
 			}
 		} else {
 			/*
@@ -1502,15 +1323,14 @@
 			 *        SYN-SENT* -> SYN-RECEIVED*
 			 * If there was no CC option, clear cached CC value.
 			 */
-			tp->t_flags |= TF_ACKNOW;
-			callout_stop(tp->tt_rexmt);
+			tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
+			tcp_timer_activate(tp, TT_REXMT, 0);
 			tp->t_state = TCPS_SYN_RECEIVED;
 		}
 
-trimthenstep6:
-		KASSERT(headlocked, ("tcp_input: trimthenstep6: head not "
-		    "locked"));
-		INP_LOCK_ASSERT(inp);
+		KASSERT(headlocked, ("%s: trimthenstep6: head not locked",
+		    __func__));
+		INP_LOCK_ASSERT(tp->t_inpcb);
 
 		/*
 		 * Advance th->th_seq to correspond to first data byte.
@@ -1548,8 +1368,6 @@
 	 */
 	case TCPS_LAST_ACK:
 	case TCPS_CLOSING:
-	case TCPS_TIME_WAIT:
-		KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
 		break;  /* continue normal processing */
 	}
 
@@ -1616,9 +1434,8 @@
 	 *      RFC 1337.
 	 */
 	if (thflags & TH_RST) {
-		if ((SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
-		    SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) ||
-		    (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) {
+		if (SEQ_GEQ(th->th_seq, tp->last_ack_sent - 1) &&
+		    SEQ_LEQ(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) {
 			switch (tp->t_state) {
 
 			case TCPS_SYN_RECEIVED:
@@ -1626,11 +1443,15 @@
 				goto close;
 
 			case TCPS_ESTABLISHED:
-				if (tp->last_ack_sent != th->th_seq &&
-			 	    tcp_insecure_rst == 0) {
+				if (tcp_insecure_rst == 0 &&
+				    !(SEQ_GEQ(th->th_seq, tp->rcv_nxt - 1) &&
+				    SEQ_LEQ(th->th_seq, tp->rcv_nxt + 1)) &&
+				    !(SEQ_GEQ(th->th_seq, tp->last_ack_sent - 1) &&
+				    SEQ_LEQ(th->th_seq, tp->last_ack_sent + 1))) {
 					tcpstat.tcps_badrst++;
 					goto drop;
 				}
+				/* FALLTHROUGH */
 			case TCPS_FIN_WAIT_1:
 			case TCPS_FIN_WAIT_2:
 			case TCPS_CLOSE_WAIT:
@@ -1638,23 +1459,17 @@
 			close:
 				tp->t_state = TCPS_CLOSED;
 				tcpstat.tcps_drops++;
-				KASSERT(headlocked, ("tcp_input: "
-				    "trimthenstep6: tcp_close: head not "
-				    "locked"));
+				KASSERT(headlocked, ("%s: trimthenstep6: "
+				    "tcp_close: head not locked", __func__));
 				tp = tcp_close(tp);
 				break;
 
 			case TCPS_CLOSING:
 			case TCPS_LAST_ACK:
-				KASSERT(headlocked, ("trimthenstep6: "
-				    "tcp_close.2: head not locked"));
+				KASSERT(headlocked, ("%s: trimthenstep6: "
+				    "tcp_close.2: head not locked", __func__));
 				tp = tcp_close(tp);
 				break;
-
-			case TCPS_TIME_WAIT:
-				KASSERT(tp->t_state != TCPS_TIME_WAIT,
-				    ("timewait"));
-				break;
 			}
 		}
 		goto drop;
@@ -1755,8 +1570,16 @@
 	 */
 	if ((so->so_state & SS_NOFDREF) &&
 	    tp->t_state > TCPS_CLOSE_WAIT && tlen) {
-		KASSERT(headlocked, ("trimthenstep6: tcp_close.3: head not "
-		    "locked"));
+		char *s;
+
+		KASSERT(headlocked, ("%s: trimthenstep6: tcp_close.3: head "
+		    "not locked", __func__));
+		if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) {
+			log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data after socket "
+			    "was closed, sending RST and removing tcpcb\n",
+			    s, __func__, tcpstates[tp->t_state], tlen);
+			free(s, M_TCPLOG);
+		}
 		tp = tcp_close(tp);
 		tcpstat.tcps_rcvafterclose++;
 		rstreason = BANDLIM_UNLIMITED;
@@ -1767,27 +1590,12 @@
 	 * If segment ends after window, drop trailing data
 	 * (and PUSH and FIN); if nothing left, just ACK.
 	 */
-	todrop = (th->th_seq+tlen) - (tp->rcv_nxt+tp->rcv_wnd);
+	todrop = (th->th_seq + tlen) - (tp->rcv_nxt + tp->rcv_wnd);
 	if (todrop > 0) {
 		tcpstat.tcps_rcvpackafterwin++;
 		if (todrop >= tlen) {
 			tcpstat.tcps_rcvbyteafterwin += tlen;
 			/*
-			 * If a new connection request is received
-			 * while in TIME_WAIT, drop the old connection
-			 * and start over if the sequence numbers
-			 * are above the previous ones.
-			 */
-			KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
-			if (thflags & TH_SYN &&
-			    tp->t_state == TCPS_TIME_WAIT &&
-			    SEQ_GT(th->th_seq, tp->rcv_nxt)) {
-				KASSERT(headlocked, ("trimthenstep6: "
-				    "tcp_close.4: head not locked"));
-				tp = tcp_close(tp);
-				goto findpcb;
-			}
-			/*
 			 * If window is closed can only take segments at
 			 * window edge, and have to drop data and PUSH from
 			 * incoming segments.  Continue processing, but
@@ -1837,8 +1645,8 @@
 	 * error and we send an RST and drop the connection.
 	 */
 	if (thflags & TH_SYN) {
-		KASSERT(headlocked, ("tcp_input: tcp_drop: trimthenstep6: "
-		    "head not locked"));
+		KASSERT(headlocked, ("%s: tcp_drop: trimthenstep6: "
+		    "head not locked", __func__));
 		tp = tcp_drop(tp, ECONNRESET);
 		rstreason = BANDLIM_UNLIMITED;
 		goto drop;
@@ -1876,8 +1684,8 @@
 		/* Do window scaling? */
 		if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
 			(TF_RCVD_SCALE|TF_REQ_SCALE)) {
-			tp->snd_scale = tp->requested_s_scale;
 			tp->rcv_scale = tp->request_r_scale;
+			tp->snd_wnd = tiwin;
 		}
 		/*
 		 * Make transitions:
@@ -1890,8 +1698,7 @@
 			tp->t_flags &= ~TF_NEEDFIN;
 		} else {
 			tp->t_state = TCPS_ESTABLISHED;
-			callout_reset(tp->tt_keep, tcp_keepidle,
-				      tcp_timer_keep, tp);
+			tcp_timer_activate(tp, TT_KEEP, tcp_keepidle);
 		}
 		/*
 		 * If segment contains data or ACK, will call tcp_reass()
@@ -1917,14 +1724,13 @@
 	case TCPS_CLOSE_WAIT:
 	case TCPS_CLOSING:
 	case TCPS_LAST_ACK:
-	case TCPS_TIME_WAIT:
-		KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
 		if (SEQ_GT(th->th_ack, tp->snd_max)) {
 			tcpstat.tcps_rcvacktoomuch++;
 			goto dropafterack;
 		}
-		if (tp->sack_enable &&
-		    (to.to_nsacks > 0 || !TAILQ_EMPTY(&tp->snd_holes)))
+		if ((tp->t_flags & TF_SACK_PERMIT) &&
+		    ((to.to_flags & TOF_SACK) ||
+		     !TAILQ_EMPTY(&tp->snd_holes)))
 			tcp_sack_doack(tp, &to, th->th_ack);
 		if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
 			if (tlen == 0 && tiwin == tp->snd_wnd) {
@@ -1953,13 +1759,15 @@
 				 * to keep a constant cwnd packets in the
 				 * network.
 				 */
-				if (!callout_active(tp->tt_rexmt) ||
+				if (!tcp_timer_active(tp, TT_REXMT) ||
 				    th->th_ack != tp->snd_una)
 					tp->t_dupacks = 0;
 				else if (++tp->t_dupacks > tcprexmtthresh ||
-					 ((tcp_do_newreno || tp->sack_enable) &&
-					  IN_FASTRECOVERY(tp))) {
-                                        if (tp->sack_enable && IN_FASTRECOVERY(tp)) {
+				    ((tcp_do_newreno ||
+				      (tp->t_flags & TF_SACK_PERMIT)) &&
+				     IN_FASTRECOVERY(tp))) {
+					if ((tp->t_flags & TF_SACK_PERMIT) &&
+					    IN_FASTRECOVERY(tp)) {
 						int awnd;
 						
 						/*
@@ -1990,7 +1798,7 @@
 					 * check to see if we're in newreno
 					 * recovery.
 					 */
-					if (tp->sack_enable) {
+					if (tp->t_flags & TF_SACK_PERMIT) {
 						if (IN_FASTRECOVERY(tp)) {
 							tp->t_dupacks = 0;
 							break;
@@ -2009,9 +1817,9 @@
 					tp->snd_ssthresh = win * tp->t_maxseg;
 					ENTER_FASTRECOVERY(tp);
 					tp->snd_recover = tp->snd_max;
-					callout_stop(tp->tt_rexmt);
+					tcp_timer_activate(tp, TT_REXMT, 0);
 					tp->t_rtttime = 0;
-					if (tp->sack_enable) {
+					if (tp->t_flags & TF_SACK_PERMIT) {
 						tcpstat.tcps_sack_recovery_episode++;
 						tp->sack_newdata = tp->snd_nxt;
 						tp->snd_cwnd = tp->t_maxseg;
@@ -2022,7 +1830,8 @@
 					tp->snd_cwnd = tp->t_maxseg;
 					(void) tcp_output(tp);
 					KASSERT(tp->snd_limited <= 2,
-					    ("tp->snd_limited too big"));
+					    ("%s: tp->snd_limited too big",
+					    __func__));
 					tp->snd_cwnd = tp->snd_ssthresh +
 					     tp->t_maxseg *
 					     (tp->t_dupacks - tp->snd_limited);
@@ -2036,7 +1845,8 @@
 
 					KASSERT(tp->t_dupacks == 1 ||
 					    tp->t_dupacks == 2,
-					    ("dupacks not 1 or 2"));
+					    ("%s: dupacks not 1 or 2",
+					    __func__));
 					if (tp->t_dupacks == 1)
 						tp->snd_limited = 0;
 					tp->snd_cwnd =
@@ -2050,7 +1860,8 @@
 						    tp->snd_limited == 0) ||
 						   (sent == tp->t_maxseg + 1 &&
 						    tp->t_flags & TF_SENTFIN),
-						    ("sent too much"));
+						    ("%s: sent too much",
+						    __func__));
 						tp->snd_limited = 2;
 					} else if (sent > 0)
 						++tp->snd_limited;
@@ -2062,16 +1873,17 @@
 			break;
 		}
 
-		KASSERT(SEQ_GT(th->th_ack, tp->snd_una), ("th_ack <= snd_una"));
+		KASSERT(SEQ_GT(th->th_ack, tp->snd_una),
+		    ("%s: th_ack <= snd_una", __func__));
 
 		/*
 		 * If the congestion window was inflated to account
 		 * for the other side's cached packets, retract it.
 		 */
-		if (tcp_do_newreno || tp->sack_enable) {
+		if (tcp_do_newreno || (tp->t_flags & TF_SACK_PERMIT)) {
 			if (IN_FASTRECOVERY(tp)) {
 				if (SEQ_LT(th->th_ack, tp->snd_recover)) {
-					if (tp->sack_enable)
+					if (tp->t_flags & TF_SACK_PERMIT)
 						tcp_sack_partialack(tp, th);
 					else
 						tcp_newreno_partial_ack(tp, th);
@@ -2118,15 +1930,15 @@
 			/* Do window scaling? */
 			if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
 				(TF_RCVD_SCALE|TF_REQ_SCALE)) {
-				tp->snd_scale = tp->requested_s_scale;
 				tp->rcv_scale = tp->request_r_scale;
+				/* Send window already scaled. */
 			}
 		}
 
 process_ACK:
-		KASSERT(headlocked, ("tcp_input: process_ACK: head not "
-		    "locked"));
-		INP_LOCK_ASSERT(inp);
+		KASSERT(headlocked, ("%s: process_ACK: head not locked",
+		    __func__));
+		INP_LOCK_ASSERT(tp->t_inpcb);
 
 		acked = th->th_ack - tp->snd_una;
 		tcpstat.tcps_rcvackpack++;
@@ -2183,11 +1995,10 @@
 		 * timer, using current (possibly backed-off) value.
 		 */
 		if (th->th_ack == tp->snd_max) {
-			callout_stop(tp->tt_rexmt);
+			tcp_timer_activate(tp, TT_REXMT, 0);
 			needoutput = 1;
-		} else if (!callout_active(tp->tt_persist))
-			callout_reset(tp->tt_rexmt, tp->t_rxtcur,
-				      tcp_timer_rexmt, tp);
+		} else if (!tcp_timer_active(tp, TT_PERSIST))
+			tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
 
 		/*
 		 * If no data (only SYN) was ACK'd,
@@ -2203,10 +2014,10 @@
 		 * Otherwise open linearly: maxseg per window
 		 * (maxseg^2 / cwnd per packet).
 		 */
-		if ((!tcp_do_newreno && !tp->sack_enable) ||
+		if ((!tcp_do_newreno && !(tp->t_flags & TF_SACK_PERMIT)) ||
 		    !IN_FASTRECOVERY(tp)) {
-			register u_int cw = tp->snd_cwnd;
-			register u_int incr = tp->t_maxseg;
+			u_int cw = tp->snd_cwnd;
+			u_int incr = tp->t_maxseg;
 			if (cw > tp->snd_ssthresh)
 				incr = incr * incr / cw;
 			tp->snd_cwnd = min(cw+incr, TCP_MAXWIN<<tp->snd_scale);
@@ -2221,19 +2032,20 @@
 			tp->snd_wnd -= acked;
 			ourfinisacked = 0;
 		}
+		/* NB: sowwakeup_locked() does an implicit unlock. */
 		sowwakeup_locked(so);
-		/* detect una wraparound */
-		if ((tcp_do_newreno || tp->sack_enable) &&
+		/* Detect una wraparound. */
+		if ((tcp_do_newreno || (tp->t_flags & TF_SACK_PERMIT)) &&
 		    !IN_FASTRECOVERY(tp) &&
 		    SEQ_GT(tp->snd_una, tp->snd_recover) &&
 		    SEQ_LEQ(th->th_ack, tp->snd_recover))
 			tp->snd_recover = th->th_ack - 1;
-		if ((tcp_do_newreno || tp->sack_enable) &&
+		if ((tcp_do_newreno || (tp->t_flags & TF_SACK_PERMIT)) &&
 		    IN_FASTRECOVERY(tp) &&
 		    SEQ_GEQ(th->th_ack, tp->snd_recover))
 			EXIT_FASTRECOVERY(tp);
 		tp->snd_una = th->th_ack;
-		if (tp->sack_enable) {
+		if (tp->t_flags & TF_SACK_PERMIT) {
 			if (SEQ_GT(tp->snd_una, tp->snd_recover))
 				tp->snd_recover = tp->snd_una;
 		}
@@ -2255,15 +2067,18 @@
 				 * Starting the timer is contrary to the
 				 * specification, but if we don't get a FIN
 				 * we'll hang forever.
+				 *
+				 * XXXjl:
+				 * we should release the tp also, and use a
+				 * compressed state.
 				 */
-		/* XXXjl
-		 * we should release the tp also, and use a
-		 * compressed state.
-		 */
 				if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
+					int timeout;
+
 					soisdisconnected(so);
-					callout_reset(tp->tt_2msl, tcp_maxidle,
-						      tcp_timer_2msl, tp);
+					timeout = (tcp_fast_finwait2_recycle) ? 
+						tcp_finwait2_timeout : tcp_maxidle;
+					tcp_timer_activate(tp, TT_2MSL, timeout);
 				}
 				tp->t_state = TCPS_FIN_WAIT_2;
 			}
@@ -2277,10 +2092,11 @@
 		 */
 		case TCPS_CLOSING:
 			if (ourfinisacked) {
-				KASSERT(headlocked, ("tcp_input: process_ACK: "
-				    "head not locked"));
+				KASSERT(headlocked, ("%s: process_ACK: "
+				    "head not locked", __func__));
 				tcp_twstart(tp);
 				INP_INFO_WUNLOCK(&tcbinfo);
+				headlocked = 0;
 				m_freem(m);
 				return;
 			}
@@ -2294,29 +2110,18 @@
 		 */
 		case TCPS_LAST_ACK:
 			if (ourfinisacked) {
-				KASSERT(headlocked, ("tcp_input: process_ACK:"
-				    " tcp_close: head not locked"));
+				KASSERT(headlocked, ("%s: process_ACK: "
+				    "tcp_close: head not locked", __func__));
 				tp = tcp_close(tp);
 				goto drop;
 			}
 			break;
-
-		/*
-		 * In TIME_WAIT state the only thing that should arrive
-		 * is a retransmission of the remote FIN.  Acknowledge
-		 * it and restart the finack timer.
-		 */
-		case TCPS_TIME_WAIT:
-			KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
-			callout_reset(tp->tt_2msl, 2 * tcp_msl,
-				      tcp_timer_2msl, tp);
-			goto dropafterack;
 		}
 	}
 
 step6:
-	KASSERT(headlocked, ("tcp_input: step6: head not locked"));
-	INP_LOCK_ASSERT(inp);
+	KASSERT(headlocked, ("%s: step6: head not locked", __func__));
+	INP_LOCK_ASSERT(tp->t_inpcb);
 
 	/*
 	 * Update window information.
@@ -2401,8 +2206,8 @@
 			tp->rcv_up = tp->rcv_nxt;
 	}
 dodata:							/* XXX */
-	KASSERT(headlocked, ("tcp_input: dodata: head not locked"));
-	INP_LOCK_ASSERT(inp);
+	KASSERT(headlocked, ("%s: dodata: head not locked", __func__));
+	INP_LOCK_ASSERT(tp->t_inpcb);
 
 	/*
 	 * Process the segment text, merging it into the TCP sequencing queue,
@@ -2415,7 +2220,6 @@
 	if ((tlen || (thflags & TH_FIN)) &&
 	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
 		tcp_seq save_start = th->th_seq;
-		tcp_seq save_end = th->th_seq + tlen;
 		m_adj(m, drop_hdrlen);	/* delayed header drop */
 		/*
 		 * Insert segment which includes th into TCP reassembly queue
@@ -2446,19 +2250,29 @@
 				m_freem(m);
 			else
 				sbappendstream_locked(&so->so_rcv, m);
+			/* NB: sorwakeup_locked() does an implicit unlock. */
 			sorwakeup_locked(so);
 		} else {
+			/*
+			 * XXX: Due to the header drop above "th" is
+			 * theoretically invalid by now.  Fortunately
+			 * m_adj() doesn't actually frees any mbufs
+			 * when trimming from the head.
+			 */
 			thflags = tcp_reass(tp, th, &tlen, m);
 			tp->t_flags |= TF_ACKNOW;
 		}
-		if (tlen > 0 && tp->sack_enable)
-			tcp_update_sack_list(tp, save_start, save_end);
+		if (tlen > 0 && (tp->t_flags & TF_SACK_PERMIT))
+			tcp_update_sack_list(tp, save_start, save_start + tlen);
+#if 0
 		/*
 		 * Note the amount of data that peer has sent into
 		 * our window, in order to estimate the sender's
 		 * buffer size.
+		 * XXX: Unused.
 		 */
 		len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
+#endif
 	} else {
 		m_freem(m);
 		thflags &= ~TH_FIN;
@@ -2492,7 +2306,7 @@
 		 */
 		case TCPS_SYN_RECEIVED:
 			tp->t_starttime = ticks;
-			/*FALLTHROUGH*/
+			/* FALLTHROUGH */
 		case TCPS_ESTABLISHED:
 			tp->t_state = TCPS_CLOSE_WAIT;
 			break;
@@ -2511,20 +2325,11 @@
 		 * standard timers.
 		 */
 		case TCPS_FIN_WAIT_2:
-			KASSERT(headlocked == 1, ("tcp_input: dodata: "
-			    "TCP_FIN_WAIT_2: head not locked"));
+			KASSERT(headlocked == 1, ("%s: dodata: "
+			    "TCP_FIN_WAIT_2: head not locked", __func__));
 			tcp_twstart(tp);
 			INP_INFO_WUNLOCK(&tcbinfo);
 			return;
-
-		/*
-		 * In TIME_WAIT state restart the 2 MSL time_wait timer.
-		 */
-		case TCPS_TIME_WAIT:
-			KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
-			callout_reset(tp->tt_2msl, 2 * tcp_msl,
-				      tcp_timer_2msl, tp);
-			break;
 		}
 	}
 	INP_INFO_WUNLOCK(&tcbinfo);
@@ -2542,18 +2347,19 @@
 		(void) tcp_output(tp);
 
 check_delack:
-	KASSERT(headlocked == 0, ("tcp_input: check_delack: head locked"));
-	INP_LOCK_ASSERT(inp);
+	KASSERT(headlocked == 0, ("%s: check_delack: head locked",
+	    __func__));
+	INP_INFO_UNLOCK_ASSERT(&tcbinfo);
+	INP_LOCK_ASSERT(tp->t_inpcb);
 	if (tp->t_flags & TF_DELACK) {
 		tp->t_flags &= ~TF_DELACK;
-		callout_reset(tp->tt_delack, tcp_delacktime,
-		    tcp_timer_delack, tp);
+		tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
 	}
-	INP_UNLOCK(inp);
+	INP_UNLOCK(tp->t_inpcb);
 	return;
 
 dropafterack:
-	KASSERT(headlocked, ("tcp_input: dropafterack: head not locked"));
+	KASSERT(headlocked, ("%s: dropafterack: head not locked", __func__));
 	/*
 	 * Generate an ACK dropping incoming segment if it occupies
 	 * sequence space, where the ACK reflects our state.
@@ -2580,79 +2386,92 @@
 		tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
 			  &tcp_savetcp, 0);
 #endif
-	KASSERT(headlocked, ("headlocked should be 1"));
+	KASSERT(headlocked, ("%s: headlocked should be 1", __func__));
 	INP_INFO_WUNLOCK(&tcbinfo);
 	tp->t_flags |= TF_ACKNOW;
 	(void) tcp_output(tp);
-	INP_UNLOCK(inp);
+	INP_UNLOCK(tp->t_inpcb);
 	m_freem(m);
 	return;
 
 dropwithreset:
-	KASSERT(headlocked, ("tcp_input: dropwithreset: head not locked"));
+	KASSERT(headlocked, ("%s: dropwithreset: head not locked", __func__));
+
+	tcp_dropwithreset(m, th, tp, tlen, rstreason);
+
+	if (tp != NULL)
+		INP_UNLOCK(tp->t_inpcb);
+	if (headlocked)
+		INP_INFO_WUNLOCK(&tcbinfo);
+	return;
+
+drop:
 	/*
-	 * Generate a RST, dropping incoming segment.
-	 * Make ACK acceptable to originator of segment.
-	 * Don't bother to respond if destination was broadcast/multicast.
+	 * Drop space held by incoming segment and return.
 	 */
-	if ((thflags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST))
+#ifdef TCPDEBUG
+	if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+		tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
+			  &tcp_savetcp, 0);
+#endif
+	if (tp != NULL)
+		INP_UNLOCK(tp->t_inpcb);
+	if (headlocked)
+		INP_INFO_WUNLOCK(&tcbinfo);
+	m_freem(m);
+	return;
+}
+
+/*
+ * Issue RST and make ACK acceptable to originator of segment.
+ * The mbuf must still include the original packet header.
+ * tp may be NULL.
+ */
+static void
+tcp_dropwithreset(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp,
+    int tlen, int rstreason)
+{
+	struct ip *ip;
+#ifdef INET6
+	struct ip6_hdr *ip6;
+#endif
+	/* Don't bother if destination was broadcast/multicast. */
+	if ((th->th_flags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST))
 		goto drop;
-	if (isipv6) {
+#ifdef INET6
+	if (mtod(m, struct ip *)->ip_v == 6) {
+		ip6 = mtod(m, struct ip6_hdr *);
 		if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
 		    IN6_IS_ADDR_MULTICAST(&ip6->ip6_src))
 			goto drop;
-	} else {
+		/* IPv6 anycast check is done at tcp6_input() */
+	} else
+#endif
+	{
+		ip = mtod(m, struct ip *);
 		if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
 		    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
 		    ip->ip_src.s_addr == htonl(INADDR_BROADCAST) ||
 		    in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))
 			goto drop;
 	}
-	/* IPv6 anycast check is done at tcp6_input() */
 
-	/*
-	 * Perform bandwidth limiting.
-	 */
+	/* Perform bandwidth limiting. */
 	if (badport_bandlim(rstreason) < 0)
 		goto drop;
 
-#ifdef TCPDEBUG
-	if (tp == 0 || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
-		tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
-			  &tcp_savetcp, 0);
-#endif
-
-	if (thflags & TH_ACK)
-		/* mtod() below is safe as long as hdr dropping is delayed */
-		tcp_respond(tp, mtod(m, void *), th, m, (tcp_seq)0, th->th_ack,
-			    TH_RST);
-	else {
-		if (thflags & TH_SYN)
+	/* tcp_respond consumes the mbuf chain. */
+	if (th->th_flags & TH_ACK) {
+		tcp_respond(tp, mtod(m, void *), th, m, (tcp_seq)0,
+		    th->th_ack, TH_RST);
+	} else {
+		if (th->th_flags & TH_SYN)
 			tlen++;
-		/* mtod() below is safe as long as hdr dropping is delayed */
 		tcp_respond(tp, mtod(m, void *), th, m, th->th_seq+tlen,
-			    (tcp_seq)0, TH_RST|TH_ACK);
+		    (tcp_seq)0, TH_RST|TH_ACK);
 	}
-
-	if (tp)
-		INP_UNLOCK(inp);
-	if (headlocked)
-		INP_INFO_WUNLOCK(&tcbinfo);
 	return;
-
 drop:
-	/*
-	 * Drop space held by incoming segment and return.
-	 */
-#ifdef TCPDEBUG
-	if (tp == 0 || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
-		tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
-			  &tcp_savetcp, 0);
-#endif
-	if (tp)
-		INP_UNLOCK(inp);
-	if (headlocked)
-		INP_INFO_WUNLOCK(&tcbinfo);
 	m_freem(m);
 	return;
 }
@@ -2661,11 +2480,7 @@
  * Parse TCP options and place in tcpopt.
  */
 static void
-tcp_dooptions(to, cp, cnt, is_syn)
-	struct tcpopt *to;
-	u_char *cp;
-	int cnt;
-	int is_syn;
+tcp_dooptions(struct tcpopt *to, u_char *cp, int cnt, int flags)
 {
 	int opt, optlen;
 
@@ -2687,7 +2502,7 @@
 		case TCPOPT_MAXSEG:
 			if (optlen != TCPOLEN_MAXSEG)
 				continue;
-			if (!is_syn)
+			if (!(flags & TO_SYN))
 				continue;
 			to->to_flags |= TOF_MSS;
 			bcopy((char *)cp + 2,
@@ -2697,10 +2512,10 @@
 		case TCPOPT_WINDOW:
 			if (optlen != TCPOLEN_WINDOW)
 				continue;
-			if (! is_syn)
+			if (!(flags & TO_SYN))
 				continue;
 			to->to_flags |= TOF_SCALE;
-			to->to_requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT);
+			to->to_wscale = min(cp[2], TCP_MAX_WINSHIFT);
 			break;
 		case TCPOPT_TIMESTAMP:
 			if (optlen != TCPOLEN_TIMESTAMP)
@@ -2712,12 +2527,6 @@
 			bcopy((char *)cp + 6,
 			    (char *)&to->to_tsecr, sizeof(to->to_tsecr));
 			to->to_tsecr = ntohl(to->to_tsecr);
-			/*
-			 * If echoed timestamp is later than the current time,
-			 * fall back to non RFC1323 RTT calculation.
-			 */
-			if ((to->to_tsecr != 0) && TSTMP_GT(to->to_tsecr, ticks))
-				to->to_tsecr = 0;
 			break;
 #ifdef TCP_SIGNATURE
 		/*
@@ -2729,21 +2538,25 @@
 		case TCPOPT_SIGNATURE:
 			if (optlen != TCPOLEN_SIGNATURE)
 				continue;
-			to->to_flags |= (TOF_SIGNATURE | TOF_SIGLEN);
+			to->to_flags |= TOF_SIGNATURE;
+			to->to_signature = cp + 2;
 			break;
 #endif
 		case TCPOPT_SACK_PERMITTED:
-			if (!tcp_do_sack ||
-			    optlen != TCPOLEN_SACK_PERMITTED)
+			if (optlen != TCPOLEN_SACK_PERMITTED)
 				continue;
-			if (is_syn) {
-				/* MUST only be set on SYN */
-				to->to_flags |= TOF_SACK;
-			}
+			if (!(flags & TO_SYN))
+				continue;
+			if (!tcp_do_sack)
+				continue;
+			to->to_flags |= TOF_SACKPERM;
 			break;
 		case TCPOPT_SACK:
 			if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0)
 				continue;
+			if (flags & TO_SYN)
+				continue;
+			to->to_flags |= TOF_SACK;
 			to->to_nsacks = (optlen - 2) / TCPOLEN_SACK;
 			to->to_sacks = cp + 2;
 			tcpstat.tcps_sack_rcv_blocks++;
@@ -2761,11 +2574,8 @@
  * sequencing purposes.
  */
 static void
-tcp_pulloutofband(so, th, m, off)
-	struct socket *so;
-	struct tcphdr *th;
-	register struct mbuf *m;
-	int off;		/* delayed to be droped hdrlen */
+tcp_pulloutofband(struct socket *so, struct tcphdr *th, struct mbuf *m,
+    int off)
 {
 	int cnt = off + th->th_urp - 1;
 
@@ -2784,7 +2594,7 @@
 		}
 		cnt -= m->m_len;
 		m = m->m_next;
-		if (m == 0)
+		if (m == NULL)
 			break;
 	}
 	panic("tcp_pulloutofband");
@@ -2795,11 +2605,9 @@
  * and update averages and current timeout.
  */
 static void
-tcp_xmit_timer(tp, rtt)
-	register struct tcpcb *tp;
-	int rtt;
+tcp_xmit_timer(struct tcpcb *tp, int rtt)
 {
-	register int delta;
+	int delta;
 
 	INP_LOCK_ASSERT(tp->t_inpcb);
 
@@ -2894,7 +2702,6 @@
  * are present.  Store the upper limit of the length of options plus
  * data in maxopd.
  *
- *
  * In case of T/TCP, we call this routine during implicit connection
  * setup as well (offer = -1), to initialize maxseg from the cached
  * MSS of our peer.
@@ -2903,9 +2710,7 @@
  * segment. Outgoing SYN/ACK MSS settings are handled in tcp_mssopt().
  */
 void
-tcp_mss(tp, offer)
-	struct tcpcb *tp;
-	int offer;
+tcp_mss(struct tcpcb *tp, int offer)
 {
 	int rtt, mss;
 	u_long bufsize;
@@ -2914,6 +2719,7 @@
 	struct socket *so;
 	struct hc_metrics_lite metrics;
 	int origoffer = offer;
+	int mtuflags = 0;
 #ifdef INET6
 	int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
 	size_t min_protoh = isipv6 ?
@@ -2923,26 +2729,26 @@
 	const size_t min_protoh = sizeof(struct tcpiphdr);
 #endif
 
-	/* initialize */
+	/* Initialize. */
 #ifdef INET6
 	if (isipv6) {
-		maxmtu = tcp_maxmtu6(&inp->inp_inc);
+		maxmtu = tcp_maxmtu6(&inp->inp_inc, &mtuflags);
 		tp->t_maxopd = tp->t_maxseg = tcp_v6mssdflt;
 	} else
 #endif
 	{
-		maxmtu = tcp_maxmtu(&inp->inp_inc);
+		maxmtu = tcp_maxmtu(&inp->inp_inc, &mtuflags);
 		tp->t_maxopd = tp->t_maxseg = tcp_mssdflt;
 	}
 	so = inp->inp_socket;
 
 	/*
-	 * no route to sender, stay with default mss and return
+	 * No route to sender, stay with default mss and return.
 	 */
 	if (maxmtu == 0)
 		return;
 
-	/* what have we got? */
+	/* What have we got? */
 	switch (offer) {
 		case 0:
 			/*
@@ -2978,12 +2784,12 @@
 	}
 
 	/*
-	 * rmx information is now retrieved from tcp_hostcache
+	 * rmx information is now retrieved from tcp_hostcache.
 	 */
 	tcp_hc_get(&inp->inp_inc, &metrics);
 
 	/*
-	 * if there's a discovered mtu int tcp hostcache, use it
+	 * If there's a discovered mtu int tcp hostcache, use it
 	 * else, use the link mtu.
 	 */
 	if (metrics.rmx_mtu)
@@ -3016,7 +2822,7 @@
 	tp->t_maxopd = mss;
 
 	/*
-	 * origoffer==-1 indicates, that no segments were received yet.
+	 * origoffer==-1 indicates that no segments were received yet.
 	 * In this case we just guess.
 	 */
 	if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
@@ -3072,7 +2878,7 @@
 	}
 	SOCKBUF_UNLOCK(&so->so_rcv);
 	/*
-	 * While we're here, check the others too
+	 * While we're here, check the others too.
 	 */
 	if (tp->t_srtt == 0 && (rtt = metrics.rmx_rtt)) {
 		tp->t_srtt = rtt;
@@ -3138,14 +2944,17 @@
 		tp->snd_cwnd = mss * ss_fltsz_local;
 	else
 		tp->snd_cwnd = mss * ss_fltsz;
+
+	/* Check the interface for TSO capabilities. */
+	if (mtuflags & CSUM_TSO)
+		tp->t_flags |= TF_TSO;
 }
 
 /*
  * Determine the MSS option to send on an outgoing SYN.
  */
 int
-tcp_mssopt(inc)
-	struct in_conninfo *inc;
+tcp_mssopt(struct in_conninfo *inc)
 {
 	int mss = 0;
 	u_long maxmtu = 0;
@@ -3160,14 +2969,14 @@
 #ifdef INET6
 	if (isipv6) {
 		mss = tcp_v6mssdflt;
-		maxmtu = tcp_maxmtu6(inc);
+		maxmtu = tcp_maxmtu6(inc, NULL);
 		thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
 		min_protoh = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
 	} else
 #endif
 	{
 		mss = tcp_mssdflt;
-		maxmtu = tcp_maxmtu(inc);
+		maxmtu = tcp_maxmtu(inc, NULL);
 		thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
 		min_protoh = sizeof(struct tcpiphdr);
 	}
@@ -3187,14 +2996,12 @@
  * be started again.
  */
 static void
-tcp_newreno_partial_ack(tp, th)
-	struct tcpcb *tp;
-	struct tcphdr *th;
+tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th)
 {
 	tcp_seq onxt = tp->snd_nxt;
 	u_long  ocwnd = tp->snd_cwnd;
 
-	callout_stop(tp->tt_rexmt);
+	tcp_timer_activate(tp, TT_REXMT, 0);
 	tp->t_rtttime = 0;
 	tp->snd_nxt = th->th_ack;
 	/*
@@ -3217,135 +3024,3 @@
 		tp->snd_cwnd = 0;
 	tp->snd_cwnd += tp->t_maxseg;
 }
-
-/*
- * Returns 1 if the TIME_WAIT state was killed and we should start over,
- * looking for a pcb in the listen state.  Returns 0 otherwise.
- */
-static int
-tcp_timewait(tw, to, th, m, tlen)
-	struct tcptw *tw;
-	struct tcpopt *to;
-	struct tcphdr *th;
-	struct mbuf *m;
-	int tlen;
-{
-	int thflags;
-	tcp_seq seq;
-#ifdef INET6
-	int isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
-#else
-	const int isipv6 = 0;
-#endif
-
-	/* tcbinfo lock required for tcp_twclose(), tcp_2msl_reset. */
-	INP_INFO_WLOCK_ASSERT(&tcbinfo);
-	INP_LOCK_ASSERT(tw->tw_inpcb);
-
-	thflags = th->th_flags;
-
-	/*
-	 * NOTE: for FIN_WAIT_2 (to be added later),
-	 * must validate sequence number before accepting RST
-	 */
-
-	/*
-	 * If the segment contains RST:
-	 *	Drop the segment - see Stevens, vol. 2, p. 964 and
-	 *      RFC 1337.
-	 */
-	if (thflags & TH_RST)
-		goto drop;
-
-#if 0
-/* PAWS not needed at the moment */
-	/*
-	 * RFC 1323 PAWS: If we have a timestamp reply on this segment
-	 * and it's less than ts_recent, drop it.
-	 */
-	if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent &&
-	    TSTMP_LT(to.to_tsval, tp->ts_recent)) {
-		if ((thflags & TH_ACK) == 0)
-			goto drop;
-		goto ack;
-	}
-	/*
-	 * ts_recent is never updated because we never accept new segments.
-	 */
-#endif
-
-	/*
-	 * If a new connection request is received
-	 * while in TIME_WAIT, drop the old connection
-	 * and start over if the sequence numbers
-	 * are above the previous ones.
-	 */
-	if ((thflags & TH_SYN) && SEQ_GT(th->th_seq, tw->rcv_nxt)) {
-		(void) tcp_twclose(tw, 0);
-		return (1);
-	}
-
-	/*
-	 * Drop the the segment if it does not contain an ACK.
-	 */
-	if ((thflags & TH_ACK) == 0)
-		goto drop;
-
-	/*
-	 * Reset the 2MSL timer if this is a duplicate FIN.
-	 */
-	if (thflags & TH_FIN) {
-		seq = th->th_seq + tlen + (thflags & TH_SYN ? 1 : 0);
-		if (seq + 1 == tw->rcv_nxt)
-			tcp_timer_2msl_reset(tw, 2 * tcp_msl);
-	}
-
-	/*
-	 * Acknowledge the segment if it has data or is not a duplicate ACK.
-	 */
-	if (thflags != TH_ACK || tlen != 0 ||
-	    th->th_seq != tw->rcv_nxt || th->th_ack != tw->snd_nxt)
-		tcp_twrespond(tw, TH_ACK);
-	goto drop;
-
-	/*
-	 * Generate a RST, dropping incoming segment.
-	 * Make ACK acceptable to originator of segment.
-	 * Don't bother to respond if destination was broadcast/multicast.
-	 */
-	if (m->m_flags & (M_BCAST|M_MCAST))
-		goto drop;
-	if (isipv6) {
-		struct ip6_hdr *ip6;
-
-		/* IPv6 anycast check is done at tcp6_input() */
-		ip6 = mtod(m, struct ip6_hdr *);
-		if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
-		    IN6_IS_ADDR_MULTICAST(&ip6->ip6_src))
-			goto drop;
-	} else {
-		struct ip *ip;
-
-		ip = mtod(m, struct ip *);
-		if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
-		    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
-		    ip->ip_src.s_addr == htonl(INADDR_BROADCAST) ||
-		    in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))
-			goto drop;
-	}
-	if (thflags & TH_ACK) {
-		tcp_respond(NULL,
-		    mtod(m, void *), th, m, 0, th->th_ack, TH_RST);
-	} else {
-		seq = th->th_seq + (thflags & TH_SYN ? 1 : 0);
-		tcp_respond(NULL,
-		    mtod(m, void *), th, m, seq, 0, TH_RST|TH_ACK);
-	}
-	INP_UNLOCK(tw->tw_inpcb);
-	return (0);
-
-drop:
-	INP_UNLOCK(tw->tw_inpcb);
-	m_freem(m);
-	return (0);
-}
Index: in_pcb.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/in_pcb.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/in_pcb.c -L sys/netinet/in_pcb.c -u -r1.1.1.2 -r1.2
--- sys/netinet/in_pcb.c
+++ sys/netinet/in_pcb.c
@@ -1,6 +1,8 @@
 /*-
  * Copyright (c) 1982, 1986, 1991, 1993, 1995
- *	The Regents of the University of California.  All rights reserved.
+ *	The Regents of the University of California.
+ * Copyright (c) 2007 Robert N. M. Watson
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -27,27 +29,34 @@
  * SUCH DAMAGE.
  *
  *	@(#)in_pcb.c	8.4 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.165.2.3 2006/02/14 22:09:27 rwatson Exp $
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/in_pcb.c,v 1.196.4.1 2007/12/22 20:54:46 rwatson Exp $");
+
+#include "opt_ddb.h"
 #include "opt_ipsec.h"
 #include "opt_inet6.h"
 #include "opt_mac.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
+
 #include <vm/uma.h>
 
 #include <net/if.h>
@@ -66,19 +75,13 @@
 #include <netinet6/ip6_var.h>
 #endif /* INET6 */
 
-#ifdef IPSEC
-#include <netinet6/ipsec.h>
-#include <netkey/key.h>
-#endif /* IPSEC */
-
-#ifdef FAST_IPSEC
-#if defined(IPSEC) || defined(IPSEC_ESP)
-#error "Bad idea: don't compile with both IPSEC and FAST_IPSEC!"
-#endif
 
+#ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/key.h>
-#endif /* FAST_IPSEC */
+#endif /* IPSEC */
+
+#include <security/mac/mac_framework.h>
 
 /*
  * These configure the range of local port addresses assigned to
@@ -167,19 +170,20 @@
 
 /*
  * Allocate a PCB and associate it with the socket.
+ * On success return with the PCB locked.
  */
 int
-in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, const char *type)
+in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
 {
 	struct inpcb *inp;
 	int error;
 
 	INP_INFO_WLOCK_ASSERT(pcbinfo);
 	error = 0;
-	inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT | M_ZERO);
+	inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT);
 	if (inp == NULL)
 		return (ENOBUFS);
-	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
+	bzero(inp, inp_zero_size);
 	inp->inp_pcbinfo = pcbinfo;
 	inp->inp_socket = so;
 #ifdef MAC
@@ -190,31 +194,34 @@
 	mac_create_inpcb_from_socket(so, inp);
 	SOCK_UNLOCK(so);
 #endif
-#if defined(IPSEC) || defined(FAST_IPSEC)
-#ifdef FAST_IPSEC
+
+#ifdef IPSEC
 	error = ipsec_init_policy(so, &inp->inp_sp);
-#else
-	error = ipsec_init_pcbpolicy(so, &inp->inp_sp);
+	if (error != 0) {
+#ifdef MAC
+		mac_destroy_inpcb(inp);
 #endif
-	if (error != 0)
 		goto out;
+}
 #endif /*IPSEC*/
-#if defined(INET6)
+#ifdef INET6
 	if (INP_SOCKAF(so) == AF_INET6) {
 		inp->inp_vflag |= INP_IPV6PROTO;
 		if (ip6_v6only)
 			inp->inp_flags |= IN6P_IPV6_V6ONLY;
 	}
 #endif
-	LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list);
+	LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
 	pcbinfo->ipi_count++;
 	so->so_pcb = (caddr_t)inp;
-	INP_LOCK_INIT(inp, "inp", type);
 #ifdef INET6
 	if (ip6_auto_flowlabel)
 		inp->inp_flags |= IN6P_AUTOFLOWLABEL;
 #endif
-#if defined(IPSEC) || defined(FAST_IPSEC) || defined(MAC)
+	INP_LOCK(inp);
+	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
+	
+#if defined(IPSEC) || defined(MAC)
 out:
 	if (error != 0)
 		uma_zfree(pcbinfo->ipi_zone, inp);
@@ -280,7 +287,7 @@
 	if (nam != NULL && laddr.s_addr != INADDR_ANY)
 		return (EINVAL);
 	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
-		wild = 1;
+		wild = INPLOOKUP_WILDCARD;
 	if (nam) {
 		sin = (struct sockaddr_in *)nam;
 		if (nam->sa_len != sizeof (*sin))
@@ -322,15 +329,19 @@
 		laddr = sin->sin_addr;
 		if (lport) {
 			struct inpcb *t;
+			struct tcptw *tw;
+
 			/* GROSS */
 			if (ntohs(lport) <= ipport_reservedhigh &&
 			    ntohs(lport) >= ipport_reservedlow &&
-			    suser_cred(cred, SUSER_ALLOWJAIL))
+			    priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT,
+			    0))
 				return (EACCES);
 			if (jailed(cred))
 				prison = 1;
-			if (so->so_cred->cr_uid != 0 &&
-			    !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
+			if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
+			    priv_check_cred(so->so_cred,
+			    PRIV_NETINET_REUSEPORT, 0) != 0) {
 				t = in_pcblookup_local(inp->inp_pcbinfo,
 				    sin->sin_addr, lport,
 				    prison ? 0 :  INPLOOKUP_WILDCARD);
@@ -355,19 +366,26 @@
 			t = in_pcblookup_local(pcbinfo, sin->sin_addr,
 			    lport, prison ? 0 : wild);
 			if (t && (t->inp_vflag & INP_TIMEWAIT)) {
-				if ((reuseport & intotw(t)->tw_so_options) == 0)
+				/*
+				 * XXXRW: If an incpb has had its timewait
+				 * state recycled, we treat the address as
+				 * being in use (for now).  This is better
+				 * than a panic, but not desirable.
+				 */
+				tw = intotw(inp);
+				if (tw == NULL ||
+				    (reuseport & tw->tw_so_options) == 0)
 					return (EADDRINUSE);
-			} else
-			if (t &&
+			} else if (t &&
 			    (reuseport & t->inp_socket->so_options) == 0) {
-#if defined(INET6)
+#ifdef INET6
 				if (ntohl(sin->sin_addr.s_addr) !=
 				    INADDR_ANY ||
 				    ntohl(t->inp_laddr.s_addr) !=
 				    INADDR_ANY ||
 				    INP_SOCKAF(so) ==
 				    INP_SOCKAF(t->inp_socket))
-#endif /* defined(INET6) */
+#endif
 				return (EADDRINUSE);
 			}
 		}
@@ -385,17 +403,19 @@
 		if (inp->inp_flags & INP_HIGHPORT) {
 			first = ipport_hifirstauto;	/* sysctl */
 			last  = ipport_hilastauto;
-			lastport = &pcbinfo->lasthi;
+			lastport = &pcbinfo->ipi_lasthi;
 		} else if (inp->inp_flags & INP_LOWPORT) {
-			if ((error = suser_cred(cred, SUSER_ALLOWJAIL)) != 0)
+			error = priv_check_cred(cred,
+			    PRIV_NETINET_RESERVEDPORT, 0);
+			if (error)
 				return error;
 			first = ipport_lowfirstauto;	/* 1023 */
 			last  = ipport_lowlastauto;	/* 600 */
-			lastport = &pcbinfo->lastlow;
+			lastport = &pcbinfo->ipi_lastlow;
 		} else {
 			first = ipport_firstauto;	/* sysctl */
 			last  = ipport_lastauto;
-			lastport = &pcbinfo->lastport;
+			lastport = &pcbinfo->ipi_lastport;
 		}
 		/*
 		 * For UDP, use random port allocation as long as the user
@@ -510,10 +530,7 @@
 	inp->inp_faddr.s_addr = faddr;
 	inp->inp_fport = fport;
 	in_pcbrehash(inp);
-#ifdef IPSEC
-	if (inp->inp_socket->so_type == SOCK_STREAM)
-		ipsec_pcbconn(inp->inp_sp);
-#endif
+
 	if (anonport)
 		inp->inp_flags |= INP_ANONPORT;
 	return (0);
@@ -591,29 +608,23 @@
 			    &in_ifaddrhead)->ia_broadaddr)->sin_addr;
 	}
 	if (laddr.s_addr == INADDR_ANY) {
-		struct route sro;
-
-		bzero(&sro, sizeof(sro));
 		ia = (struct in_ifaddr *)0;
 		/*
 		 * If route is known our src addr is taken from the i/f,
 		 * else punt.
+		 *
+		 * Find out route to destination
 		 */
-		if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0) {
-			/* Find out route to destination */
-			sro.ro_dst.sa_family = AF_INET;
-			sro.ro_dst.sa_len = sizeof(struct sockaddr_in);
-			((struct sockaddr_in *)&sro.ro_dst)->sin_addr = faddr;
-			rtalloc_ign(&sro, RTF_CLONING);
-		}
+		if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0)
+			ia = ip_rtaddr(faddr);
 		/*
-		 * If we found a route, use the address
-		 * corresponding to the outgoing interface.
+		 * If we found a route, use the address corresponding to
+		 * the outgoing interface.
+		 * 
+		 * Otherwise assume faddr is reachable on a directly connected
+		 * network and try to find a corresponding interface to take
+		 * the source address from.
 		 */
-		if (sro.ro_rt) {
-			ia = ifatoia(sro.ro_rt->rt_ifa);
-			RTFREE(sro.ro_rt);
-		}
 		if (ia == 0) {
 			bzero(&sa, sizeof(sa));
 			sa.sin_addr = faddr;
@@ -679,44 +690,81 @@
 	inp->inp_faddr.s_addr = INADDR_ANY;
 	inp->inp_fport = 0;
 	in_pcbrehash(inp);
-#ifdef IPSEC
-	ipsec_pcbdisconn(inp->inp_sp);
-#endif
-	if (inp->inp_socket->so_state & SS_NOFDREF)
-		in_pcbdetach(inp);
 }
 
+/*
+ * In the old world order, in_pcbdetach() served two functions: to detach the
+ * pcb from the socket/potentially free the socket, and to free the pcb
+ * itself.  In the new world order, the protocol code is responsible for
+ * managing the relationship with the socket, and this code simply frees the
+ * pcb.
+ */
 void
 in_pcbdetach(struct inpcb *inp)
 {
-	struct socket *so = inp->inp_socket;
+
+	KASSERT(inp->inp_socket != NULL, ("in_pcbdetach: inp_socket == NULL"));
+	inp->inp_socket->so_pcb = NULL;
+	inp->inp_socket = NULL;
+}
+
+void
+in_pcbfree(struct inpcb *inp)
+{
 	struct inpcbinfo *ipi = inp->inp_pcbinfo;
 
+	KASSERT(inp->inp_socket == NULL, ("in_pcbfree: inp_socket != NULL"));
 	INP_INFO_WLOCK_ASSERT(ipi);
 	INP_LOCK_ASSERT(inp);
 
-#if defined(IPSEC) || defined(FAST_IPSEC)
+#ifdef IPSEC
 	ipsec4_delete_pcbpolicy(inp);
 #endif /*IPSEC*/
 	inp->inp_gencnt = ++ipi->ipi_gencnt;
 	in_pcbremlists(inp);
-	if (so) {
-		ACCEPT_LOCK();
-		SOCK_LOCK(so);
-		so->so_pcb = NULL;
-		sotryfree(so);
-	}
 	if (inp->inp_options)
 		(void)m_free(inp->inp_options);
-	ip_freemoptions(inp->inp_moptions);
+	if (inp->inp_moptions != NULL)
+		inp_freemoptions(inp->inp_moptions);
 	inp->inp_vflag = 0;
-	INP_LOCK_DESTROY(inp);
+	
 #ifdef MAC
 	mac_destroy_inpcb(inp);
 #endif
+	INP_UNLOCK(inp);
 	uma_zfree(ipi->ipi_zone, inp);
 }
 
+/*
+ * TCP needs to maintain its inpcb structure after the TCP connection has
+ * been torn down.  However, it must be disconnected from the inpcb hashes as
+ * it must not prevent binding of future connections to the same port/ip
+ * combination by other inpcbs.
+ */
+void
+in_pcbdrop(struct inpcb *inp)
+{
+
+	INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
+	INP_LOCK_ASSERT(inp);
+
+	inp->inp_vflag |= INP_DROPPED;
+	if (inp->inp_lport) {
+		struct inpcbport *phd = inp->inp_phd;
+
+		LIST_REMOVE(inp, inp_hash);
+		LIST_REMOVE(inp, inp_portlist);
+		if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
+			LIST_REMOVE(phd, phd_hash);
+			free(phd, M_PCB);
+		}
+		inp->inp_lport = 0;
+	}
+}
+
+/*
+ * Common routines to return the socket addresses associated with inpcbs.
+ */
 struct sockaddr *
 in_sockaddr(in_port_t port, struct in_addr *addr_p)
 {
@@ -732,60 +780,39 @@
 	return (struct sockaddr *)sin;
 }
 
-/*
- * The wrapper function will pass down the pcbinfo for this function to lock.
- * The socket must have a valid
- * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
- * except through a kernel programming error, so it is acceptable to panic
- * (or in this case trap) if the PCB is invalid.  (Actually, we don't trap
- * because there actually /is/ a programming error somewhere... XXX)
- */
 int
-in_setsockaddr(struct socket *so, struct sockaddr **nam,
-    struct inpcbinfo *pcbinfo)
+in_getsockaddr(struct socket *so, struct sockaddr **nam)
 {
 	struct inpcb *inp;
 	struct in_addr addr;
 	in_port_t port;
 
-	INP_INFO_RLOCK(pcbinfo);
 	inp = sotoinpcb(so);
-	if (!inp) {
-		INP_INFO_RUNLOCK(pcbinfo);
-		return ECONNRESET;
-	}
+	KASSERT(inp != NULL, ("in_getsockaddr: inp == NULL"));
+
 	INP_LOCK(inp);
 	port = inp->inp_lport;
 	addr = inp->inp_laddr;
 	INP_UNLOCK(inp);
-	INP_INFO_RUNLOCK(pcbinfo);
 
 	*nam = in_sockaddr(port, &addr);
 	return 0;
 }
 
-/*
- * The wrapper function will pass down the pcbinfo for this function to lock.
- */
 int
-in_setpeeraddr(struct socket *so, struct sockaddr **nam,
-    struct inpcbinfo *pcbinfo)
+in_getpeeraddr(struct socket *so, struct sockaddr **nam)
 {
 	struct inpcb *inp;
 	struct in_addr addr;
 	in_port_t port;
 
-	INP_INFO_RLOCK(pcbinfo);
 	inp = sotoinpcb(so);
-	if (!inp) {
-		INP_INFO_RUNLOCK(pcbinfo);
-		return ECONNRESET;
-	}
+	KASSERT(inp != NULL, ("in_getpeeraddr: inp == NULL"));
+
 	INP_LOCK(inp);
 	port = inp->inp_fport;
 	addr = inp->inp_faddr;
 	INP_UNLOCK(inp);
-	INP_INFO_RUNLOCK(pcbinfo);
 
 	*nam = in_sockaddr(port, &addr);
 	return 0;
@@ -799,7 +826,7 @@
 	struct inpcbhead *head;
 
 	INP_INFO_WLOCK(pcbinfo);
-	head = pcbinfo->listhead;
+	head = pcbinfo->ipi_listhead;
 	for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) {
 		INP_LOCK(inp);
 		ninp = LIST_NEXT(inp, inp_list);
@@ -828,7 +855,7 @@
 	int i, gap;
 
 	INP_INFO_RLOCK(pcbinfo);
-	LIST_FOREACH(inp, pcbinfo->listhead, inp_list) {
+	LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
 		INP_LOCK(inp);
 		imo = inp->inp_moptions;
 		if ((inp->inp_vflag & INP_IPV4) &&
@@ -885,7 +912,8 @@
 		 * Look for an unconnected (wildcard foreign addr) PCB that
 		 * matches the local address and port we're looking for.
 		 */
-		head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
+		head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
+		    0, pcbinfo->ipi_hashmask)];
 		LIST_FOREACH(inp, head, inp_hash) {
 #ifdef INET6
 			if ((inp->inp_vflag & INP_IPV4) == 0)
@@ -914,9 +942,8 @@
 		 * First see if this local port is in use by looking on the
 		 * port hash list.
 		 */
-		retrylookup:
-		porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
-		    pcbinfo->porthashmask)];
+		porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
+		    pcbinfo->ipi_porthashmask)];
 		LIST_FOREACH(phd, porthash, phd_hash) {
 			if (phd->phd_port == lport)
 				break;
@@ -947,18 +974,6 @@
 				if ((inp->inp_vflag & INP_IPV6) != 0)
 					wildcard += INP_LOOKUP_MAPPED_PCB_COST;
 #endif
-				/*
-				 * Clean out old time_wait sockets if they
-				 * are clogging up needed local ports.
-				 */
-				if ((inp->inp_vflag & INP_TIMEWAIT) != 0) {
-					if (tcp_twrecycleable((struct tcptw *)inp->inp_ppcb)) {
-						INP_LOCK(inp);
-						tcp_twclose((struct tcptw *)inp->inp_ppcb, 0);
-						match = NULL;
-						goto retrylookup;
-					}
-				}
 				if (inp->inp_faddr.s_addr != INADDR_ANY)
 					wildcard++;
 				if (inp->inp_laddr.s_addr != INADDR_ANY) {
@@ -997,10 +1012,12 @@
 	u_short fport = fport_arg, lport = lport_arg;
 
 	INP_INFO_RLOCK_ASSERT(pcbinfo);
+
 	/*
 	 * First look for an exact match.
 	 */
-	head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)];
+	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
+	    pcbinfo->ipi_hashmask)];
 	LIST_FOREACH(inp, head, inp_hash) {
 #ifdef INET6
 		if ((inp->inp_vflag & INP_IPV4) == 0)
@@ -1009,20 +1026,21 @@
 		if (inp->inp_faddr.s_addr == faddr.s_addr &&
 		    inp->inp_laddr.s_addr == laddr.s_addr &&
 		    inp->inp_fport == fport &&
-		    inp->inp_lport == lport) {
-			/*
-			 * Found.
-			 */
+		    inp->inp_lport == lport)
 			return (inp);
-		}
 	}
+
+	/*
+	 * Then look for a wildcard match, if requested.
+	 */
 	if (wildcard) {
 		struct inpcb *local_wild = NULL;
-#if defined(INET6)
+#ifdef INET6
 		struct inpcb *local_wild_mapped = NULL;
-#endif /* defined(INET6) */
+#endif
 
-		head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
+		head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
+		    0, pcbinfo->ipi_hashmask)];
 		LIST_FOREACH(inp, head, inp_hash) {
 #ifdef INET6
 			if ((inp->inp_vflag & INP_IPV4) == 0)
@@ -1036,26 +1054,22 @@
 				if (inp->inp_laddr.s_addr == laddr.s_addr)
 					return (inp);
 				else if (inp->inp_laddr.s_addr == INADDR_ANY) {
-#if defined(INET6)
+#ifdef INET6
 					if (INP_CHECK_SOCKAF(inp->inp_socket,
 							     AF_INET6))
 						local_wild_mapped = inp;
 					else
-#endif /* defined(INET6) */
-					local_wild = inp;
+#endif
+						local_wild = inp;
 				}
 			}
 		}
-#if defined(INET6)
+#ifdef INET6
 		if (local_wild == NULL)
 			return (local_wild_mapped);
-#endif /* defined(INET6) */
+#endif
 		return (local_wild);
 	}
-
-	/*
-	 * Not found.
-	 */
 	return (NULL);
 }
 
@@ -1072,6 +1086,8 @@
 	u_int32_t hashkey_faddr;
 
 	INP_INFO_WLOCK_ASSERT(pcbinfo);
+	INP_LOCK_ASSERT(inp);
+
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6)
 		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
@@ -1079,11 +1095,11 @@
 #endif /* INET6 */
 	hashkey_faddr = inp->inp_faddr.s_addr;
 
-	pcbhash = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr,
-		 inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)];
+	pcbhash = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
+		 inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)];
 
-	pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport,
-	    pcbinfo->porthashmask)];
+	pcbporthash = &pcbinfo->ipi_porthashbase[
+	    INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)];
 
 	/*
 	 * Go through port list and look for a head for this lport.
@@ -1125,6 +1141,7 @@
 
 	INP_INFO_WLOCK_ASSERT(pcbinfo);
 	INP_LOCK_ASSERT(inp);
+
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6)
 		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
@@ -1132,8 +1149,8 @@
 #endif /* INET6 */
 	hashkey_faddr = inp->inp_faddr.s_addr;
 
-	head = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr,
-		inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)];
+	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
+		inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)];
 
 	LIST_REMOVE(inp, inp_hash);
 	LIST_INSERT_HEAD(head, inp, inp_hash);
@@ -1175,7 +1192,9 @@
 #ifdef MAC
 	struct inpcb *inp;
 
-	inp = (struct inpcb *)so->so_pcb;
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("in_pcbsosetlabel: so->so_pcb == NULL"));
+
 	INP_LOCK(inp);
 	SOCK_LOCK(so);
 	mac_inpcb_sosetlabel(so, inp);
@@ -1185,23 +1204,268 @@
 }
 
 /*
- * ipport_tick runs once per second, determining if random port
- * allocation should be continued.  If more than ipport_randomcps
- * ports have been allocated in the last second, then we return to
- * sequential port allocation. We return to random allocation only
- * once we drop below ipport_randomcps for at least ipport_randomtime
- * seconds.
+ * ipport_tick runs once per second, determining if random port allocation
+ * should be continued.  If more than ipport_randomcps ports have been
+ * allocated in the last second, then we return to sequential port
+ * allocation. We return to random allocation only once we drop below
+ * ipport_randomcps for at least ipport_randomtime seconds.
  */
-
 void
 ipport_tick(void *xtp)
 {
-	if (ipport_tcpallocs > ipport_tcplastcount + ipport_randomcps) {
-		ipport_stoprandom = ipport_randomtime;
-	} else {
+
+	if (ipport_tcpallocs <= ipport_tcplastcount + ipport_randomcps) {
 		if (ipport_stoprandom > 0)
 			ipport_stoprandom--;
-	}
+	} else
+		ipport_stoprandom = ipport_randomtime;
 	ipport_tcplastcount = ipport_tcpallocs;
 	callout_reset(&ipport_tick_callout, hz, ipport_tick, NULL);
 }
+
+#ifdef DDB
+static void
+db_print_indent(int indent)
+{
+	int i;
+
+	for (i = 0; i < indent; i++)
+		db_printf(" ");
+}
+
+static void
+db_print_inconninfo(struct in_conninfo *inc, const char *name, int indent)
+{
+	char faddr_str[48], laddr_str[48];
+
+	db_print_indent(indent);
+	db_printf("%s at %p\n", name, inc);
+
+	indent += 2;
+
+#ifdef INET6
+	if (inc->inc_flags == 1) {
+		/* IPv6. */
+		ip6_sprintf(laddr_str, &inc->inc6_laddr);
+		ip6_sprintf(faddr_str, &inc->inc6_faddr);
+	} else {
+#endif
+		/* IPv4. */
+		inet_ntoa_r(inc->inc_laddr, laddr_str);
+		inet_ntoa_r(inc->inc_faddr, faddr_str);
+#ifdef INET6
+	}
+#endif
+	db_print_indent(indent);
+	db_printf("inc_laddr %s   inc_lport %u\n", laddr_str,
+	    ntohs(inc->inc_lport));
+	db_print_indent(indent);
+	db_printf("inc_faddr %s   inc_fport %u\n", faddr_str,
+	    ntohs(inc->inc_fport));
+}
+
+static void
+db_print_inpflags(int inp_flags)
+{
+	int comma;
+
+	comma = 0;
+	if (inp_flags & INP_RECVOPTS) {
+		db_printf("%sINP_RECVOPTS", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & INP_RECVRETOPTS) {
+		db_printf("%sINP_RECVRETOPTS", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & INP_RECVDSTADDR) {
+		db_printf("%sINP_RECVDSTADDR", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & INP_HDRINCL) {
+		db_printf("%sINP_HDRINCL", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & INP_HIGHPORT) {
+		db_printf("%sINP_HIGHPORT", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & INP_LOWPORT) {
+		db_printf("%sINP_LOWPORT", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & INP_ANONPORT) {
+		db_printf("%sINP_ANONPORT", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & INP_RECVIF) {
+		db_printf("%sINP_RECVIF", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & INP_MTUDISC) {
+		db_printf("%sINP_MTUDISC", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & INP_FAITH) {
+		db_printf("%sINP_FAITH", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & INP_RECVTTL) {
+		db_printf("%sINP_RECVTTL", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & INP_DONTFRAG) {
+		db_printf("%sINP_DONTFRAG", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & IN6P_IPV6_V6ONLY) {
+		db_printf("%sIN6P_IPV6_V6ONLY", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & IN6P_PKTINFO) {
+		db_printf("%sIN6P_PKTINFO", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & IN6P_HOPLIMIT) {
+		db_printf("%sIN6P_HOPLIMIT", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & IN6P_HOPOPTS) {
+		db_printf("%sIN6P_HOPOPTS", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & IN6P_DSTOPTS) {
+		db_printf("%sIN6P_DSTOPTS", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & IN6P_RTHDR) {
+		db_printf("%sIN6P_RTHDR", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & IN6P_RTHDRDSTOPTS) {
+		db_printf("%sIN6P_RTHDRDSTOPTS", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & IN6P_TCLASS) {
+		db_printf("%sIN6P_TCLASS", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & IN6P_AUTOFLOWLABEL) {
+		db_printf("%sIN6P_AUTOFLOWLABEL", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & IN6P_RFC2292) {
+		db_printf("%sIN6P_RFC2292", comma ? ", " : "");
+		comma = 1;
+	}
+	if (inp_flags & IN6P_MTU) {
+		db_printf("IN6P_MTU%s", comma ? ", " : "");
+		comma = 1;
+	}
+}
+
+static void
+db_print_inpvflag(u_char inp_vflag)
+{
+	int comma;
+
+	comma = 0;
+	if (inp_vflag & INP_IPV4) {
+		db_printf("%sINP_IPV4", comma ? ", " : "");
+		comma  = 1;
+	}
+	if (inp_vflag & INP_IPV6) {
+		db_printf("%sINP_IPV6", comma ? ", " : "");
+		comma  = 1;
+	}
+	if (inp_vflag & INP_IPV6PROTO) {
+		db_printf("%sINP_IPV6PROTO", comma ? ", " : "");
+		comma  = 1;
+	}
+	if (inp_vflag & INP_TIMEWAIT) {
+		db_printf("%sINP_TIMEWAIT", comma ? ", " : "");
+		comma  = 1;
+	}
+	if (inp_vflag & INP_ONESBCAST) {
+		db_printf("%sINP_ONESBCAST", comma ? ", " : "");
+		comma  = 1;
+	}
+	if (inp_vflag & INP_DROPPED) {
+		db_printf("%sINP_DROPPED", comma ? ", " : "");
+		comma  = 1;
+	}
+	if (inp_vflag & INP_SOCKREF) {
+		db_printf("%sINP_SOCKREF", comma ? ", " : "");
+		comma  = 1;
+	}
+}
+
+void
+db_print_inpcb(struct inpcb *inp, const char *name, int indent)
+{
+
+	db_print_indent(indent);
+	db_printf("%s at %p\n", name, inp);
+
+	indent += 2;
+
+	db_print_indent(indent);
+	db_printf("inp_flow: 0x%x\n", inp->inp_flow);
+
+	db_print_inconninfo(&inp->inp_inc, "inp_conninfo", indent);
+
+	db_print_indent(indent);
+	db_printf("inp_ppcb: %p   inp_pcbinfo: %p   inp_socket: %p\n",
+	    inp->inp_ppcb, inp->inp_pcbinfo, inp->inp_socket);
+
+	db_print_indent(indent);
+	db_printf("inp_label: %p   inp_flags: 0x%x (",
+	   inp->inp_label, inp->inp_flags);
+	db_print_inpflags(inp->inp_flags);
+	db_printf(")\n");
+
+	db_print_indent(indent);
+	db_printf("inp_sp: %p   inp_vflag: 0x%x (", inp->inp_sp,
+	    inp->inp_vflag);
+	db_print_inpvflag(inp->inp_vflag);
+	db_printf(")\n");
+
+	db_print_indent(indent);
+	db_printf("inp_ip_ttl: %d   inp_ip_p: %d   inp_ip_minttl: %d\n",
+	    inp->inp_ip_ttl, inp->inp_ip_p, inp->inp_ip_minttl);
+
+	db_print_indent(indent);
+#ifdef INET6
+	if (inp->inp_vflag & INP_IPV6) {
+		db_printf("in6p_options: %p   in6p_outputopts: %p   "
+		    "in6p_moptions: %p\n", inp->in6p_options,
+		    inp->in6p_outputopts, inp->in6p_moptions);
+		db_printf("in6p_icmp6filt: %p   in6p_cksum %d   "
+		    "in6p_hops %u\n", inp->in6p_icmp6filt, inp->in6p_cksum,
+		    inp->in6p_hops);
+	} else
+#endif
+	{
+		db_printf("inp_ip_tos: %d   inp_ip_options: %p   "
+		    "inp_ip_moptions: %p\n", inp->inp_ip_tos,
+		    inp->inp_options, inp->inp_moptions);
+	}
+
+	db_print_indent(indent);
+	db_printf("inp_phd: %p   inp_gencnt: %ju\n", inp->inp_phd,
+	    (uintmax_t)inp->inp_gencnt);
+}
+
+DB_SHOW_COMMAND(inpcb, db_show_inpcb)
+{
+	struct inpcb *inp;
+
+	if (!have_addr) {
+		db_printf("usage: show inpcb <addr>\n");
+		return;
+	}
+	inp = (struct inpcb *)addr;
+
+	db_print_inpcb(inp, "inpcb", 0);
+}
+#endif
--- /dev/null
+++ sys/netinet/sctp_indata.h
@@ -0,0 +1,124 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_indata.h,v 1.9 2005/03/06 16:04:17 itojun Exp $	 */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_indata.h,v 1.9 2007/06/22 13:50:56 rrs Exp $");
+
+#ifndef __sctp_indata_h__
+#define __sctp_indata_h__
+
+#if defined(_KERNEL)
+
+struct sctp_queued_to_read *
+sctp_build_readq_entry(struct sctp_tcb *stcb,
+    struct sctp_nets *net,
+    uint32_t tsn, uint32_t ppid,
+    uint32_t context, uint16_t stream_no,
+    uint16_t stream_seq, uint8_t flags,
+    struct mbuf *dm);
+
+
+#define sctp_build_readq_entry_mac(_ctl, in_it, a, net, tsn, ppid, context, stream_no, stream_seq, flags, dm) do { \
+	if (_ctl) { \
+		atomic_add_int(&((net)->ref_count), 1); \
+		(_ctl)->sinfo_stream = stream_no; \
+		(_ctl)->sinfo_ssn = stream_seq; \
+		(_ctl)->sinfo_flags = (flags << 8); \
+		(_ctl)->sinfo_ppid = ppid; \
+		(_ctl)->sinfo_context = a; \
+		(_ctl)->sinfo_timetolive = 0; \
+		(_ctl)->sinfo_tsn = tsn; \
+		(_ctl)->sinfo_cumtsn = tsn; \
+		(_ctl)->sinfo_assoc_id = sctp_get_associd((in_it)); \
+		(_ctl)->length = 0; \
+		(_ctl)->held_length = 0; \
+		(_ctl)->whoFrom = net; \
+		(_ctl)->data = dm; \
+		(_ctl)->tail_mbuf = NULL; \
+	        (_ctl)->aux_data = NULL; \
+		(_ctl)->stcb = (in_it); \
+		(_ctl)->port_from = (in_it)->rport; \
+		(_ctl)->spec_flags = 0; \
+		(_ctl)->do_not_ref_stcb = 0; \
+		(_ctl)->end_added = 0; \
+		(_ctl)->pdapi_aborted = 0; \
+		(_ctl)->some_taken = 0; \
+	} \
+} while (0)
+
+
+
+struct mbuf *
+sctp_build_ctl_nchunk(struct sctp_inpcb *inp,
+    struct sctp_sndrcvinfo *sinfo);
+
+char *
+sctp_build_ctl_cchunk(struct sctp_inpcb *inp,
+    int *control_len,
+    struct sctp_sndrcvinfo *sinfo);
+
+void sctp_set_rwnd(struct sctp_tcb *, struct sctp_association *);
+
+uint32_t
+sctp_calc_rwnd(struct sctp_tcb *stcb, struct sctp_association *asoc);
+
+void
+sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack,
+    uint32_t rwnd, int nonce_sum_flag, int *abort_now);
+
+void
+sctp_handle_sack(struct mbuf *m, int offset, struct sctp_sack_chunk *, struct sctp_tcb *,
+    struct sctp_nets *, int *, int, uint32_t);
+
+/* draft-ietf-tsvwg-usctp */
+void
+sctp_handle_forward_tsn(struct sctp_tcb *,
+    struct sctp_forward_tsn_chunk *, int *, struct mbuf *, int);
+
+struct sctp_tmit_chunk *
+                sctp_try_advance_peer_ack_point(struct sctp_tcb *, struct sctp_association *);
+
+void sctp_service_queues(struct sctp_tcb *, struct sctp_association *);
+
+void
+sctp_update_acked(struct sctp_tcb *, struct sctp_shutdown_chunk *,
+    struct sctp_nets *, int *);
+
+int
+sctp_process_data(struct mbuf **, int, int *, int, struct sctphdr *,
+    struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *, uint32_t *);
+
+void sctp_sack_check(struct sctp_tcb *, int, int, int *);
+
+#endif
+#endif
Index: ip_var.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_var.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet/ip_var.h -L sys/netinet/ip_var.h -u -r1.2 -r1.3
--- sys/netinet/ip_var.h
+++ sys/netinet/ip_var.h
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)ip_var.h	8.2 (Berkeley) 1/9/95
- * $FreeBSD: src/sys/netinet/ip_var.h,v 1.95 2005/07/02 23:13:31 thompsa Exp $
+ * $FreeBSD: src/sys/netinet/ip_var.h,v 1.101 2007/06/12 16:24:53 bms Exp $
  */
 
 #ifndef _NETINET_IP_VAR_H_
@@ -61,7 +61,7 @@
 	struct mbuf *ipq_frags;		/* to ip headers of fragments */
 	struct	in_addr ipq_src,ipq_dst;
 	u_char	ipq_nfrags;		/* # frags in this packet */
-	struct label *ipq_label;		/* MAC label */
+	struct label *ipq_label;	/* MAC label */
 };
 #endif /* _KERNEL */
 
@@ -79,17 +79,39 @@
 };
 
 /*
+ * Multicast source list entry.
+ */
+struct in_msource {
+	TAILQ_ENTRY(in_msource) ims_next;	/* next source */
+	struct sockaddr_storage ims_addr;	/* address of this source */
+};
+
+/*
+ * Multicast filter descriptor; there is one instance per group membership
+ * on a socket, allocated as an expandable vector hung off ip_moptions.
+ * struct in_multi contains separate IPv4-stack-wide state for IGMPv3.
+ */
+struct in_mfilter {
+	uint16_t	imf_fmode;	/* filter mode for this socket/group */
+	uint16_t	imf_nsources;	/* # of sources for this socket/group */
+	TAILQ_HEAD(, in_msource) imf_sources;	/* source list */
+};
+
+/*
  * Structure attached to inpcb.ip_moptions and
  * passed to ip_output when IP multicast options are in use.
+ * This structure is lazy-allocated.
  */
 struct ip_moptions {
 	struct	ifnet *imo_multicast_ifp; /* ifp for outgoing multicasts */
 	struct in_addr imo_multicast_addr; /* ifindex/addr on MULTICAST_IF */
+	u_long	imo_multicast_vif;	/* vif num outgoing multicasts */
 	u_char	imo_multicast_ttl;	/* TTL for outgoing multicasts */
 	u_char	imo_multicast_loop;	/* 1 => hear sends if a member */
 	u_short	imo_num_memberships;	/* no. memberships this socket */
-	struct	in_multi *imo_membership[IP_MAX_MEMBERSHIPS];
-	u_long	imo_multicast_vif;	/* vif num outgoing multicasts */
+	u_short	imo_max_memberships;	/* max memberships this socket */
+	struct	in_multi **imo_membership;	/* group memberships */
+	struct	in_mfilter *imo_mfilters;	/* source filters */
 };
 
 struct	ipstat {
@@ -130,10 +152,13 @@
 #define	IP_FORWARDING		0x1		/* most of ip header exists */
 #define	IP_RAWOUTPUT		0x2		/* raw ip header exists */
 #define	IP_SENDONES		0x4		/* send all-ones broadcast */
-#define	IP_ROUTETOIF		SO_DONTROUTE	/* bypass routing tables */
-#define	IP_ALLOWBROADCAST	SO_BROADCAST	/* can send broadcast packets */
+#define	IP_SENDTOIF		0x8		/* send on specific ifnet */
+#define IP_ROUTETOIF		SO_DONTROUTE	/* 0x10 bypass routing tables */
+#define IP_ALLOWBROADCAST	SO_BROADCAST	/* 0x20 can send broadcast packets */
 
-/* mbuf flag used by ip_fastfwd */
+/*
+ * mbuf flag used by ip_fastfwd
+ */
 #define	M_FASTFWD_OURS		M_PROTO1	/* changed dst to local */
 
 #ifdef __NO_STRICT_ALIGNMENT
@@ -144,52 +169,50 @@
 
 struct ip;
 struct inpcb;
-struct inpcbinfo;
 struct route;
 struct sockopt;
 
 extern struct	ipstat	ipstat;
-extern u_short	ip_id;				/* ip packet ctr, for ids */
-extern int	ip_defttl;			/* default IP ttl */
-extern int	ipforwarding;			/* ip forwarding */
-extern int	ip_doopts;			/* process or ignore IP options */
+extern u_short	ip_id;			/* ip packet ctr, for ids */
+extern int	ip_defttl;		/* default IP ttl */
+extern int	ipforwarding;		/* ip forwarding */
 #ifdef IPSTEALTH
-extern int	ipstealth;			/* stealth forwarding */
+extern int	ipstealth;		/* stealth forwarding */
 #endif
 extern u_char	ip_protox[];
-extern struct socket *ip_rsvpd;	/* reservation protocol daemon */
-extern struct socket *ip_mrouter; /* multicast routing daemon */
+extern struct socket *ip_rsvpd;		/* reservation protocol daemon */
+extern struct socket *ip_mrouter;	/* multicast routing daemon */
 extern int	(*legal_vif_num)(int);
 extern u_long	(*ip_mcast_src)(int);
 extern int rsvp_on;
 extern struct	pr_usrreqs rip_usrreqs;
 
-int	 ip_ctloutput(struct socket *, struct sockopt *sopt);
-int	 ip_ctloutput_pcbinfo(struct socket *, struct sockopt *sopt,
-	    struct inpcbinfo *pcbinfo);
-void	 ip_drain(void);
-void	 ip_fini(void *xtp);
-int	 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
+void	inp_freemoptions(struct ip_moptions *);
+int	inp_getmoptions(struct inpcb *, struct sockopt *);
+int	inp_setmoptions(struct inpcb *, struct sockopt *);
+
+int	ip_ctloutput(struct socket *, struct sockopt *sopt);
+void	ip_drain(void);
+void	ip_fini(void *xtp);
+int	ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
 	    u_long if_hwassist_flags, int sw_csum);
-void	 ip_freemoptions(struct ip_moptions *);
-void	 ip_init(void);
-extern int	 (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
-			  struct ip_moptions *);
-int	 ip_output(struct mbuf *,
+void	ip_forward(struct mbuf *m, int srcrt);
+void	ip_init(void);
+extern int
+	(*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
+	    struct ip_moptions *);
+int	ip_output(struct mbuf *,
 	    struct mbuf *, struct route *, int, struct ip_moptions *,
 	    struct inpcb *);
-int	 ipproto_register(u_char);
-int	 ipproto_unregister(u_char);
+int	ipproto_register(u_char);
+int	ipproto_unregister(u_char);
 struct mbuf *
-	 ip_reass(struct mbuf *);
+	ip_reass(struct mbuf *);
 struct in_ifaddr *
-	 ip_rtaddr(struct in_addr);
-void	 ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
-		struct mbuf *);
-void	 ip_slowtimo(void);
-struct mbuf *
-	 ip_srcroute(struct mbuf *);
-void	 ip_stripoptions(struct mbuf *, struct mbuf *);
+	ip_rtaddr(struct in_addr);
+void	ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
+	    struct mbuf *);
+void	ip_slowtimo(void);
 u_int16_t	ip_randomid(void);
 int	rip_ctloutput(struct socket *, struct sockopt *);
 void	rip_ctlinput(int, struct sockaddr *, void *);
Index: ip_carp.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/ip_carp.c -L sys/netinet/ip_carp.c -u -r1.1.1.2 -r1.2
--- sys/netinet/ip_carp.c
+++ sys/netinet/ip_carp.c
@@ -1,5 +1,3 @@
-/* 	$FreeBSD: src/sys/netinet/ip_carp.c,v 1.27.2.6 2005/12/25 21:59:20 mlaier Exp $ */
-
 /*
  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
  * Copyright (c) 2003 Ryan McBride. All rights reserved.
@@ -26,6 +24,9 @@
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_carp.c,v 1.52 2007/10/07 20:44:22 silby Exp $");
+
 #include "opt_carp.h"
 #include "opt_bpf.h"
 #include "opt_inet.h"
@@ -41,6 +42,7 @@
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/time.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
@@ -59,6 +61,7 @@
 #include <net/iso88025.h>
 #include <net/if.h>
 #include <net/if_clone.h>
+#include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/route.h>
 
@@ -78,7 +81,6 @@
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/nd6.h>
-#include <net/if_dl.h>
 #endif
 
 #include <crypto/sha1.h>
@@ -189,9 +191,9 @@
 		    unsigned char *);
 static void	carp_setroute(struct carp_softc *, int);
 static void	carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
-static int 	carp_clone_create(struct if_clone *, int);
+static int 	carp_clone_create(struct if_clone *, int, caddr_t);
 static void 	carp_clone_destroy(struct ifnet *);
-static void	carpdetach(struct carp_softc *);
+static void	carpdetach(struct carp_softc *, int);
 static int	carp_prepare_ad(struct mbuf *, struct carp_softc *,
 		    struct carp_header *);
 static void	carp_send_ad_all(void);
@@ -209,6 +211,7 @@
 static int	carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
 enum	{ CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
 
+static void	carp_multicast_cleanup(struct carp_softc *);
 static int	carp_set_addr(struct carp_softc *, struct sockaddr_in *);
 static int	carp_del_addr(struct carp_softc *, struct sockaddr_in *);
 static void	carp_carpdev_state_locked(struct carp_if *);
@@ -217,12 +220,15 @@
 static void	carp_send_na(struct carp_softc *);
 static int	carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
 static int	carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
+static void	carp_multicast6_cleanup(struct carp_softc *);
 #endif
 
 static LIST_HEAD(, carp_softc) carpif_list;
 static struct mtx carp_mtx;
 IFC_SIMPLE_DECLARE(carp, 0);
 
+static eventhandler_tag if_detach_event_tag;
+
 static __inline u_int16_t
 carp_cksum(struct mbuf *m, int len)
 {
@@ -349,7 +355,7 @@
 }
 
 static int
-carp_clone_create(struct if_clone *ifc, int unit)
+carp_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
 
 	struct carp_softc *sc;
@@ -372,10 +378,16 @@
 #ifdef INET6
 	sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
 #endif
-
-	callout_init(&sc->sc_ad_tmo, NET_CALLOUT_MPSAFE);
-	callout_init(&sc->sc_md_tmo, NET_CALLOUT_MPSAFE);
-	callout_init(&sc->sc_md6_tmo, NET_CALLOUT_MPSAFE);
+	sc->sc_imo.imo_membership = (struct in_multi **)malloc(
+	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP,
+	    M_WAITOK);
+	sc->sc_imo.imo_mfilters = NULL;
+	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
+	sc->sc_imo.imo_multicast_vif = -1;
+
+	callout_init(&sc->sc_ad_tmo, CALLOUT_MPSAFE);
+	callout_init(&sc->sc_md_tmo, CALLOUT_MPSAFE);
+	callout_init(&sc->sc_md6_tmo, CALLOUT_MPSAFE);
 	
 	ifp->if_softc = sc;
 	if_initname(ifp, CARP_IFNAME, unit);
@@ -399,63 +411,90 @@
 carp_clone_destroy(struct ifnet *ifp)
 {
 	struct carp_softc *sc = ifp->if_softc;
+
+	if (sc->sc_carpdev)
+		CARP_SCLOCK(sc);
+	carpdetach(sc, 1);	/* Returns unlocked. */
+
+	mtx_lock(&carp_mtx);
+	LIST_REMOVE(sc, sc_next);
+	mtx_unlock(&carp_mtx);
+	bpfdetach(ifp);
+	if_detach(ifp);
+	if_free_type(ifp, IFT_ETHER);
+	free(sc->sc_imo.imo_membership, M_CARP);
+	free(sc, M_CARP);
+}
+
+/*
+ * This function can be called on CARP interface destroy path,
+ * and in case of the removal of the underlying interface as
+ * well. We differentiate these two cases. In the latter case
+ * we do not cleanup our multicast memberships, since they
+ * are already freed. Also, in the latter case we do not
+ * release the lock on return, because the function will be
+ * called once more, for another CARP instance on the same
+ * interface.
+ */
+static void
+carpdetach(struct carp_softc *sc, int unlock)
+{
 	struct carp_if *cif;
-	struct ip_moptions *imo = &sc->sc_imo;
-#ifdef INET6
-	struct ip6_moptions *im6o = &sc->sc_im6o;
-#endif
-	
-/*	carpdetach(sc); */
 
-	/*
-	 * If an interface is destroyed which is suppressing the preemption,
-	 * decrease the global counter, otherwise the host will never get
-	 * out of the carp supressing state.
-	 */
+	callout_stop(&sc->sc_ad_tmo);
+	callout_stop(&sc->sc_md_tmo);
+	callout_stop(&sc->sc_md6_tmo);
+
 	if (sc->sc_suppress)
 		carp_suppress_preempt--;
 	sc->sc_suppress = 0;
 
-	callout_stop(&sc->sc_ad_tmo);
-	callout_stop(&sc->sc_md_tmo);
-	callout_stop(&sc->sc_md6_tmo);
+	if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
+		carp_suppress_preempt--;
+	sc->sc_sendad_errors = 0;
 
-	if (imo->imo_num_memberships) {
-		in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
-		imo->imo_multicast_ifp = NULL;
-	}
+	carp_set_state(sc, INIT);
+	SC2IFP(sc)->if_flags &= ~IFF_UP;
+	carp_setrun(sc, 0);
+	if (unlock)
+		carp_multicast_cleanup(sc);
 #ifdef INET6
-	while (!LIST_EMPTY(&im6o->im6o_memberships)) {
-		struct in6_multi_mship *imm =
-		    LIST_FIRST(&im6o->im6o_memberships);
-		LIST_REMOVE(imm, i6mm_chain);
-		in6_leavegroup(imm);
-	}
-	im6o->im6o_multicast_ifp = NULL;
+	carp_multicast6_cleanup(sc);
 #endif
 
-	/* Remove ourself from parents if_carp queue */
-	if (sc->sc_carpdev && (cif = sc->sc_carpdev->if_carp)) {
-		CARP_LOCK(cif);
+	if (sc->sc_carpdev != NULL) {
+		cif = (struct carp_if *)sc->sc_carpdev->if_carp;
+		CARP_LOCK_ASSERT(cif);
 		TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
 		if (!--cif->vhif_nvrs) {
+			ifpromisc(sc->sc_carpdev, 0);
 			sc->sc_carpdev->if_carp = NULL;
 			CARP_LOCK_DESTROY(cif);
-			FREE(cif, M_CARP);
-			ifpromisc(sc->sc_carpdev, 0);
-			sc->sc_carpdev = NULL;
-		} else {
+			FREE(cif, M_IFADDR);
+		} else if (unlock)
 			CARP_UNLOCK(cif);
-		}
+		sc->sc_carpdev = NULL;
 	}
+}
 
-	mtx_lock(&carp_mtx);
-	LIST_REMOVE(sc, sc_next);
-	mtx_unlock(&carp_mtx);
-	bpfdetach(ifp);
-	if_detach(ifp);
-	if_free_type(ifp, IFT_ETHER);
-	free(sc, M_CARP);
+/* Detach an interface from the carp. */
+static void
+carp_ifdetach(void *arg __unused, struct ifnet *ifp)
+{
+	struct carp_if *cif = (struct carp_if *)ifp->if_carp;
+	struct carp_softc *sc, *nextsc;
+
+	if (cif == NULL)
+		return;
+
+	/*
+	 * XXX: At the end of for() cycle the lock will be destroyed.
+	 */
+	CARP_LOCK(cif);
+	for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
+		nextsc = TAILQ_NEXT(sc, sc_list);
+		carpdetach(sc, 0);
+	}
 }
 
 /*
@@ -641,7 +680,7 @@
 	SC2IFP(sc)->if_ipackets++;
 	SC2IFP(sc)->if_ibytes += m->m_pkthdr.len;
 
-	if (SC2IFP(sc)->if_bpf) {
+	if (bpf_peers_present(SC2IFP(sc)->if_bpf)) {
 		struct ip *ip = mtod(m, struct ip *);
 		uint32_t af1 = af;
 
@@ -751,42 +790,6 @@
 	return;
 }
 
-static void
-carpdetach(struct carp_softc *sc)
-{
-	struct ifaddr *ifa;
-
-	callout_stop(&sc->sc_ad_tmo);
-	callout_stop(&sc->sc_md_tmo);
-	callout_stop(&sc->sc_md6_tmo);
-
-	while ((ifa = TAILQ_FIRST(&SC2IFP(sc)->if_addrlist)) != NULL)
-		if (ifa->ifa_addr->sa_family == AF_INET) {
-			struct in_ifaddr *ia = ifatoia(ifa);
-
-			carp_del_addr(sc, &ia->ia_addr);
-
-			/* ripped screaming from in_control(SIOCDIFADDR) */
-			in_ifscrub(SC2IFP(sc), ia);
-			TAILQ_REMOVE(&SC2IFP(sc)->if_addrlist, ifa, ifa_link);
-			TAILQ_REMOVE(&in_ifaddrhead, ia, ia_link);
-			IFAFREE((&ia->ia_ifa));
-		}
-}
-
-/* Detach an interface from the carp.  */
-void
-carp_ifdetach(struct ifnet *ifp)
-{
-	struct carp_softc *sc;
-	struct carp_if *cif = (struct carp_if *)ifp->if_carp;
-
-	CARP_LOCK(cif);
-	TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
-		carpdetach(sc);
-	CARP_UNLOCK(cif);
-}
-
 static int
 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
 {
@@ -984,9 +987,14 @@
 		    sizeof(struct in6_addr));
 		/* set the multicast destination */
 
-		ip6->ip6_dst.s6_addr8[0] = 0xff;
-		ip6->ip6_dst.s6_addr8[1] = 0x02;
+		ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
 		ip6->ip6_dst.s6_addr8[15] = 0x12;
+		if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
+			SC2IFP(sc)->if_oerrors++;
+			m_freem(m);
+			CARP_LOG("%s: in6_setscope failed\n", __func__);
+			return;
+		}
 
 		ch_ptr = (struct carp_header *)(&ip6[1]);
 		bcopy(&ch, ch_ptr, sizeof(ch));
@@ -1049,8 +1057,8 @@
 		if (ifa->ifa_addr->sa_family != AF_INET)
 			continue;
 
-/*		arprequest(sc->sc_carpdev, &in, &in, IFP2ENADDR(sc->sc_ifp)); */
-		arp_ifinit2(sc->sc_carpdev, ifa, IFP2ENADDR(sc->sc_ifp));
+/*		arprequest(sc->sc_carpdev, &in, &in, IF_LLADDR(sc->sc_ifp)); */
+		arp_ifinit2(sc->sc_carpdev, ifa, IF_LLADDR(sc->sc_ifp));
 
 		DELAY(1000);	/* XXX */
 	}
@@ -1145,7 +1153,7 @@
 						if (count == index) {
 							if (vh->sc_state ==
 							    MASTER) {
-								*enaddr = IFP2ENADDR(vh->sc_ifp);
+								*enaddr = IF_LLADDR(vh->sc_ifp);
 								CARP_UNLOCK(cif);
 								return (1);
 							} else {
@@ -1164,7 +1172,7 @@
 			    (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
 			    ia->ia_ifp == SC2IFP(vh) &&
 			    vh->sc_state == MASTER) {
-				*enaddr = IFP2ENADDR(vh->sc_ifp);
+				*enaddr = IF_LLADDR(vh->sc_ifp);
 				CARP_UNLOCK(cif);
 				return (1);
 			}
@@ -1221,14 +1229,14 @@
 				if (mtag == NULL) {
 					/* better a bit than nothing */
 					CARP_UNLOCK(cif);
-					return (IFP2ENADDR(sc->sc_ifp));
+					return (IF_LLADDR(sc->sc_ifp));
 				}
 				bcopy(&ifp, (caddr_t)(mtag + 1),
 				    sizeof(struct ifnet *));
 				m_tag_prepend(m, mtag);
 
 				CARP_UNLOCK(cif);
-				return (IFP2ENADDR(sc->sc_ifp));
+				return (IF_LLADDR(sc->sc_ifp));
 			}
 		}
 	}
@@ -1253,7 +1261,7 @@
 		if ((SC2IFP(vh)->if_flags & IFF_UP) &&
 		    (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
 		    vh->sc_state == MASTER &&
-		    !bcmp(dhost, IFP2ENADDR(vh->sc_ifp), ETHER_ADDR_LEN)) {
+		    !bcmp(dhost, IF_LLADDR(vh->sc_ifp), ETHER_ADDR_LEN)) {
 		    	CARP_UNLOCK(cif);
 			return (SC2IFP(vh));
 		}
@@ -1307,7 +1315,11 @@
 {
 	struct timeval tv;
 
-	if (sc->sc_carpdev)
+	if (sc->sc_carpdev == NULL) {
+		SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
+		carp_set_state(sc, INIT);
+		return;
+	} else
 		CARP_SCLOCK_ASSERT(sc);
 
 	if (SC2IFP(sc)->if_flags & IFF_UP &&
@@ -1374,6 +1386,42 @@
 	}
 }
 
+static void
+carp_multicast_cleanup(struct carp_softc *sc)
+{
+	struct ip_moptions *imo = &sc->sc_imo;
+	u_int16_t n = imo->imo_num_memberships;
+
+	/* Clean up our own multicast memberships */
+	while (n-- > 0) {
+		if (imo->imo_membership[n] != NULL) {
+			in_delmulti(imo->imo_membership[n]);
+			imo->imo_membership[n] = NULL;
+		}
+	}
+	KASSERT(imo->imo_mfilters == NULL,
+	   ("%s: imo_mfilters != NULL", __func__));
+	imo->imo_num_memberships = 0;
+	imo->imo_multicast_ifp = NULL;
+}
+
+#ifdef INET6
+static void
+carp_multicast6_cleanup(struct carp_softc *sc)
+{
+	struct ip6_moptions *im6o = &sc->sc_im6o;
+
+	while (!LIST_EMPTY(&im6o->im6o_memberships)) {
+		struct in6_multi_mship *imm =
+		    LIST_FIRST(&im6o->im6o_memberships);
+
+		LIST_REMOVE(imm, i6mm_chain);
+		in6_leavegroup(imm);
+	}
+	im6o->im6o_multicast_ifp = NULL;
+}
+#endif
+
 static int
 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
 {
@@ -1827,7 +1875,8 @@
 		break;
 
 	case SIOCSVH:
-		if ((error = suser(curthread)) != 0)
+		error = priv_check(curthread, PRIV_NETINET_CARP);
+		if (error)
 			break;
 		if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
 			break;
@@ -1861,16 +1910,20 @@
 				cif = (struct carp_if *)sc->sc_carpdev->if_carp;
 				TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
 					if (vr != sc &&
-					    vr->sc_vhid == carpr.carpr_vhid)
-						return EEXIST;
+					    vr->sc_vhid == carpr.carpr_vhid) {
+						error = EEXIST;
+						break;
+					}
+				if (error == EEXIST)
+					break;
 			}
 			sc->sc_vhid = carpr.carpr_vhid;
-			IFP2ENADDR(sc->sc_ifp)[0] = 0;
-			IFP2ENADDR(sc->sc_ifp)[1] = 0;
-			IFP2ENADDR(sc->sc_ifp)[2] = 0x5e;
-			IFP2ENADDR(sc->sc_ifp)[3] = 0;
-			IFP2ENADDR(sc->sc_ifp)[4] = 1;
-			IFP2ENADDR(sc->sc_ifp)[5] = sc->sc_vhid;
+			IF_LLADDR(sc->sc_ifp)[0] = 0;
+			IF_LLADDR(sc->sc_ifp)[1] = 0;
+			IF_LLADDR(sc->sc_ifp)[2] = 0x5e;
+			IF_LLADDR(sc->sc_ifp)[3] = 0;
+			IF_LLADDR(sc->sc_ifp)[4] = 1;
+			IF_LLADDR(sc->sc_ifp)[5] = sc->sc_vhid;
 			error--;
 		}
 		if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
@@ -1902,7 +1955,8 @@
 		carpr.carpr_vhid = sc->sc_vhid;
 		carpr.carpr_advbase = sc->sc_advbase;
 		carpr.carpr_advskew = sc->sc_advskew;
-		if (suser(curthread) == 0)
+		error = priv_check(curthread, PRIV_NETINET_CARP);
+		if (error == 0)
 			bcopy(sc->sc_key, carpr.carpr_key,
 			    sizeof(carpr.carpr_key));
 		error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
@@ -2134,28 +2188,28 @@
 static int
 carp_modevent(module_t mod, int type, void *data)
 {
-	int error = 0;
-
 	switch (type) {
 	case MOD_LOAD:
+		if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
+		    carp_ifdetach, NULL, EVENTHANDLER_PRI_ANY);
+		if (if_detach_event_tag == NULL)
+			return (ENOMEM);
 		mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF);
 		LIST_INIT(&carpif_list);
 		if_clone_attach(&carp_cloner);
 		break;
 
 	case MOD_UNLOAD:
+		EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag);
 		if_clone_detach(&carp_cloner);
-		while (!LIST_EMPTY(&carpif_list))
-			carp_clone_destroy(SC2IFP(LIST_FIRST(&carpif_list)));
 		mtx_destroy(&carp_mtx);
 		break;
 
 	default:
-		error = EINVAL;
-		break;
+		return (EINVAL);
 	}
 
-	return error;
+	return (0);
 }
 
 static moduledata_t carp_mod = {
Index: ip_dummynet.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_dummynet.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/ip_dummynet.c -L sys/netinet/ip_dummynet.c -u -r1.1.1.2 -r1.2
--- sys/netinet/ip_dummynet.c
+++ sys/netinet/ip_dummynet.c
@@ -23,10 +23,11 @@
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/netinet/ip_dummynet.c,v 1.93.2.4 2006/02/23 08:28:15 ume Exp $
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_dummynet.c,v 1.110 2007/10/07 20:44:22 silby Exp $");
+
 #define	DUMMYNET_DEBUG
 
 #include "opt_inet6.h"
@@ -66,7 +67,9 @@
 #include <sys/socketvar.h>
 #include <sys/time.h>
 #include <sys/sysctl.h>
+#include <sys/taskqueue.h>
 #include <net/if.h>
+#include <net/netisr.h>
 #include <net/route.h>
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
@@ -77,7 +80,6 @@
 #include <netinet/ip_var.h>
 
 #include <netinet/if_ether.h> /* for struct arpcom */
-#include <net/bridge.h>
 
 #include <netinet/ip6.h>       /* for ip6_input, ip6_output prototypes */
 #include <netinet6/ip6_var.h>
@@ -91,7 +93,7 @@
 static int dn_hash_size = 64 ;	/* default hash size */
 
 /* statistics on number of queue searches and search steps */
-static int searches, search_steps ;
+static long searches, search_steps ;
 static int pipe_expire = 1 ;   /* expire queue if empty */
 static int dn_max_ratio = 16 ; /* max queues/buckets ratio */
 
@@ -99,6 +101,15 @@
 static int red_avg_pkt_size = 512;      /* RED - default medium packet size */
 static int red_max_pkt_size = 1500;     /* RED - default max packet size */
 
+static struct timeval prev_t, t;
+static long tick_last;			/* Last tick duration (usec). */
+static long tick_delta;			/* Last vs standard tick diff (usec). */
+static long tick_delta_sum;		/* Accumulated tick difference (usec).*/
+static long tick_adjustment;		/* Tick adjustments done. */
+static long tick_lost;			/* Lost(coalesced) ticks number. */
+/* Adjusted vs non-adjusted curr_time difference (ticks). */
+static long tick_diff;
+
 /*
  * Three heaps contain queues and pipes that the scheduler handles:
  *
@@ -134,31 +145,42 @@
 extern	void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
 
 #ifdef SYSCTL_NODE
-SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet,
-		CTLFLAG_RW, 0, "Dummynet");
+SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size,
-	    CTLFLAG_RW, &dn_hash_size, 0, "Default hash table size");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, curr_time,
-	    CTLFLAG_RD, &curr_time, 0, "Current tick");
+    CTLFLAG_RW, &dn_hash_size, 0, "Default hash table size");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, curr_time,
+    CTLFLAG_RD, &curr_time, 0, "Current tick");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, ready_heap,
-	    CTLFLAG_RD, &ready_heap.size, 0, "Size of ready heap");
+    CTLFLAG_RD, &ready_heap.size, 0, "Size of ready heap");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, extract_heap,
-	    CTLFLAG_RD, &extract_heap.size, 0, "Size of extract heap");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, searches,
-	    CTLFLAG_RD, &searches, 0, "Number of queue searches");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, search_steps,
-	    CTLFLAG_RD, &search_steps, 0, "Number of queue search steps");
+    CTLFLAG_RD, &extract_heap.size, 0, "Size of extract heap");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, searches,
+    CTLFLAG_RD, &searches, 0, "Number of queue searches");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, search_steps,
+    CTLFLAG_RD, &search_steps, 0, "Number of queue search steps");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire,
-	    CTLFLAG_RW, &pipe_expire, 0, "Expire queue if empty");
+    CTLFLAG_RW, &pipe_expire, 0, "Expire queue if empty");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, max_chain_len,
-	    CTLFLAG_RW, &dn_max_ratio, 0,
-	"Max ratio between dynamic queues and buckets");
+    CTLFLAG_RW, &dn_max_ratio, 0,
+    "Max ratio between dynamic queues and buckets");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth,
-	CTLFLAG_RD, &red_lookup_depth, 0, "Depth of RED lookup table");
+    CTLFLAG_RD, &red_lookup_depth, 0, "Depth of RED lookup table");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size,
-	CTLFLAG_RD, &red_avg_pkt_size, 0, "RED Medium packet size");
+    CTLFLAG_RD, &red_avg_pkt_size, 0, "RED Medium packet size");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size,
-	CTLFLAG_RD, &red_max_pkt_size, 0, "RED Max packet size");
+    CTLFLAG_RD, &red_max_pkt_size, 0, "RED Max packet size");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta,
+    CTLFLAG_RD, &tick_delta, 0, "Last vs standard tick difference (usec).");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta_sum,
+    CTLFLAG_RD, &tick_delta_sum, 0, "Accumulated tick difference (usec).");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_adjustment,
+    CTLFLAG_RD, &tick_adjustment, 0, "Tick adjustments done.");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_diff,
+    CTLFLAG_RD, &tick_diff, 0,
+    "Adjusted vs non-adjusted curr_time difference (ticks).");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_lost,
+    CTLFLAG_RD, &tick_lost, 0,
+    "Number of ticks coalesced by dummynet taskqueue.");
 #endif
 
 #ifdef DUMMYNET_DEBUG
@@ -172,21 +194,17 @@
 #define	DPRINTF(X)
 #endif
 
+static struct task	dn_task;
+static struct taskqueue	*dn_tq = NULL;
+static void dummynet_task(void *, int);
+
 static struct mtx dummynet_mtx;
-/*
- * NB: Recursion is needed to deal with re-entry via ICMP.  That is,
- *     a packet may be dispatched via ip_input from dummynet_io and
- *     re-enter through ip_output.  Yech.
- */
 #define	DUMMYNET_LOCK_INIT() \
-	mtx_init(&dummynet_mtx, "dummynet", NULL, MTX_DEF | MTX_RECURSE)
+	mtx_init(&dummynet_mtx, "dummynet", NULL, MTX_DEF)
 #define	DUMMYNET_LOCK_DESTROY()	mtx_destroy(&dummynet_mtx)
 #define	DUMMYNET_LOCK()		mtx_lock(&dummynet_mtx)
 #define	DUMMYNET_UNLOCK()	mtx_unlock(&dummynet_mtx)
-#define	DUMMYNET_LOCK_ASSERT()	do {				\
-	mtx_assert(&dummynet_mtx, MA_OWNED);			\
-	NET_ASSERT_GIANT();					\
-} while (0)
+#define	DUMMYNET_LOCK_ASSERT()	mtx_assert(&dummynet_mtx, MA_OWNED)
 
 static int config_pipe(struct dn_pipe *p);
 static int ip_dn_ctl(struct sockopt *sopt);
@@ -486,8 +504,8 @@
  * and put into delay line (p_queue)
  */
 static void
-move_pkt(struct mbuf *pkt, struct dn_flow_queue *q,
-	struct dn_pipe *p, int len)
+move_pkt(struct mbuf *pkt, struct dn_flow_queue *q, struct dn_pipe *p,
+    int len)
 {
     struct dn_pkt_tag *dt = dn_tag_get(pkt);
 
@@ -695,64 +713,115 @@
 }
 
 /*
- * This is called once per tick, or HZ times per second. It is used to
- * increment the current tick counter and schedule expired events.
+ * This is called one tick, after previous run. It is used to
+ * schedule next run.
  */
 static void
 dummynet(void * __unused unused)
 {
-    struct mbuf *head = NULL, *tail = NULL;
-    struct dn_pipe *pipe;
-    struct dn_heap *heaps[3];
-    struct dn_heap *h;
-    void *p; /* generic parameter to handler */
-    int i;
 
-    heaps[0] = &ready_heap ;		/* fixed-rate queues */
-    heaps[1] = &wfq_ready_heap ;	/* wfq queues */
-    heaps[2] = &extract_heap ;		/* delay line */
+	taskqueue_enqueue(dn_tq, &dn_task);
+}
 
-    DUMMYNET_LOCK();
-    curr_time++ ;
-    for (i=0; i < 3 ; i++) {
-	h = heaps[i];
-	while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time) ) {
-	    if (h->p[0].key > curr_time)
-		printf("dummynet: warning, heap %d is %d ticks late\n",
-		    i, (int)(curr_time - h->p[0].key));
-	    p = h->p[0].object ; /* store a copy before heap_extract */
-	    heap_extract(h, NULL); /* need to extract before processing */
-	    if (i == 0)
-		ready_event(p, &head, &tail);
-	    else if (i == 1) {
-		struct dn_pipe *pipe = p;
-		if (pipe->if_name[0] != '\0')
-		    printf("dummynet: bad ready_event_wfq for pipe %s\n",
-			pipe->if_name);
-		else
-		    ready_event_wfq(p, &head, &tail);
-	    } else
-		transmit_event(p, &head, &tail);
-	}
-    }
-    /* Sweep pipes trying to expire idle flow_queues. */
-    for (i = 0; i < HASHSIZE; i++)
-	SLIST_FOREACH(pipe, &pipehash[i], next)
-		if (pipe->idle_heap.elements > 0 &&
-		    DN_KEY_LT(pipe->idle_heap.p[0].key, pipe->V) ) {
-			struct dn_flow_queue *q = pipe->idle_heap.p[0].object;
-
-			heap_extract(&(pipe->idle_heap), NULL);
-			q->S = q->F + 1; /* Mark timestamp as invalid. */
-			pipe->sum -= q->fs->weight;
+/*
+ * The main dummynet processing function.
+ */
+static void
+dummynet_task(void *context, int pending)
+{
+	struct mbuf *head = NULL, *tail = NULL;
+	struct dn_pipe *pipe;
+	struct dn_heap *heaps[3];
+	struct dn_heap *h;
+	void *p;	/* generic parameter to handler */
+	int i;
+
+	DUMMYNET_LOCK();
+
+	heaps[0] = &ready_heap;			/* fixed-rate queues */
+	heaps[1] = &wfq_ready_heap;		/* wfq queues */
+	heaps[2] = &extract_heap;		/* delay line */
+
+ 	/* Update number of lost(coalesced) ticks. */
+ 	tick_lost += pending - 1;
+ 
+ 	getmicrouptime(&t);
+ 	/* Last tick duration (usec). */
+ 	tick_last = (t.tv_sec - prev_t.tv_sec) * 1000000 +
+ 	    (t.tv_usec - prev_t.tv_usec);
+ 	/* Last tick vs standard tick difference (usec). */
+ 	tick_delta = (tick_last * hz - 1000000) / hz;
+ 	/* Accumulated tick difference (usec). */
+ 	tick_delta_sum += tick_delta;
+ 
+ 	prev_t = t;
+ 
+ 	/*
+ 	 * Adjust curr_time if accumulated tick difference greater than
+ 	 * 'standard' tick. Since curr_time should be monotonically increasing,
+ 	 * we do positive adjustment as required and throttle curr_time in
+ 	 * case of negative adjustment.
+ 	 */
+  	curr_time++;
+ 	if (tick_delta_sum - tick >= 0) {
+ 		int diff = tick_delta_sum / tick;
+ 
+ 		curr_time += diff;
+ 		tick_diff += diff;
+ 		tick_delta_sum %= tick;
+ 		tick_adjustment++;
+ 	} else if (tick_delta_sum + tick <= 0) {
+ 		curr_time--;
+ 		tick_diff--;
+ 		tick_delta_sum += tick;
+ 		tick_adjustment++;
+ 	}
+
+	for (i = 0; i < 3; i++) {
+		h = heaps[i];
+		while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time)) {
+			if (h->p[0].key > curr_time)
+				printf("dummynet: warning, "
+				    "heap %d is %d ticks late\n",
+				    i, (int)(curr_time - h->p[0].key));
+			/* store a copy before heap_extract */
+			p = h->p[0].object;
+			/* need to extract before processing */
+			heap_extract(h, NULL);
+			if (i == 0)
+				ready_event(p, &head, &tail);
+			else if (i == 1) {
+				struct dn_pipe *pipe = p;
+				if (pipe->if_name[0] != '\0')
+					printf("dummynet: bad ready_event_wfq "
+					    "for pipe %s\n", pipe->if_name);
+				else
+					ready_event_wfq(p, &head, &tail);
+			} else
+				transmit_event(p, &head, &tail);
 		}
+	}
 
-    DUMMYNET_UNLOCK();
+	/* Sweep pipes trying to expire idle flow_queues. */
+	for (i = 0; i < HASHSIZE; i++)
+		SLIST_FOREACH(pipe, &pipehash[i], next)
+			if (pipe->idle_heap.elements > 0 &&
+			    DN_KEY_LT(pipe->idle_heap.p[0].key, pipe->V)) {
+				struct dn_flow_queue *q =
+				    pipe->idle_heap.p[0].object;
+
+				heap_extract(&(pipe->idle_heap), NULL);
+				/* Mark timestamp as invalid. */
+				q->S = q->F + 1;
+				pipe->sum -= q->fs->weight;
+			}
 
-    if (head != NULL)
-	dummynet_send(head);
+	DUMMYNET_UNLOCK();
+
+	if (head != NULL)
+		dummynet_send(head);
 
-    callout_reset(&dn_timeout, 1, dummynet, NULL);
+	callout_reset(&dn_timeout, 1, dummynet, NULL);
 }
 
 static void
@@ -774,11 +843,11 @@
 			ip = mtod(m, struct ip *);
 			ip->ip_len = htons(ip->ip_len);
 			ip->ip_off = htons(ip->ip_off);
-			ip_input(m);
+			netisr_dispatch(NETISR_IP, m);
 			break;
 #ifdef INET6
 		case DN_TO_IP6_IN:
-			ip6_input(m);
+			netisr_dispatch(NETISR_IPV6, m);
 			break;
 
 		case DN_TO_IP6_OUT:
@@ -792,32 +861,6 @@
 				printf("dummynet: if_bridge not loaded\n");
 
 			break;
-		case DN_TO_BDG_FWD :
-			/*
-			 * The bridge requires/assumes the Ethernet header is
-			 * contiguous in the first mbuf header.  Ensure this
-			 * is true.
-			 */
-			if (BDG_LOADED) {
-				if (m->m_len < ETHER_HDR_LEN &&
-				    (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) {
-					printf("dummynet/bridge: pullup fail, "
-					    "dropping pkt\n");
-					break;
-				}
-				m = bdg_forward_ptr(m, pkt->ifp);
-			} else {
-				/*
-				 * somebody unloaded the bridge module.
-				 * Drop pkt
-				 */
-				/* XXX rate limit */
-				printf("dummynet: dropping bridged packet "
-				    "trapped in pipe\n");
-			}
-			if (m)
-				m_freem(m);
-			break;
 		case DN_TO_ETH_DEMUX:
 			/*
 			 * The Ethernet code assumes the Ethernet header is
@@ -853,9 +896,9 @@
     struct dn_flow_queue *q, *prev ;
     int i, initial_elements = fs->rq_elements ;
 
-    if (fs->last_expired == time_second)
+    if (fs->last_expired == time_uptime)
 	return 0 ;
-    fs->last_expired = time_second ;
+    fs->last_expired = time_uptime ;
     for (i = 0 ; i <= fs->rq_size ; i++) /* last one is overflow */
 	for (prev=NULL, q = fs->rq[i] ; q != NULL ; )
 	    if (q->head != NULL || q->S != q->F+1) {
@@ -1021,103 +1064,106 @@
 static int
 red_drops(struct dn_flow_set *fs, struct dn_flow_queue *q, int len)
 {
-    /*
-     * RED algorithm
-     *
-     * RED calculates the average queue size (avg) using a low-pass filter
-     * with an exponential weighted (w_q) moving average:
-     * 	avg  <-  (1-w_q) * avg + w_q * q_size
-     * where q_size is the queue length (measured in bytes or * packets).
-     *
-     * If q_size == 0, we compute the idle time for the link, and set
-     *	avg = (1 - w_q)^(idle/s)
-     * where s is the time needed for transmitting a medium-sized packet.
-     *
-     * Now, if avg < min_th the packet is enqueued.
-     * If avg > max_th the packet is dropped. Otherwise, the packet is
-     * dropped with probability P function of avg.
-     *
-     */
+	/*
+	 * RED algorithm
+	 *
+	 * RED calculates the average queue size (avg) using a low-pass filter
+	 * with an exponential weighted (w_q) moving average:
+	 * 	avg  <-  (1-w_q) * avg + w_q * q_size
+	 * where q_size is the queue length (measured in bytes or * packets).
+	 *
+	 * If q_size == 0, we compute the idle time for the link, and set
+	 *	avg = (1 - w_q)^(idle/s)
+	 * where s is the time needed for transmitting a medium-sized packet.
+	 *
+	 * Now, if avg < min_th the packet is enqueued.
+	 * If avg > max_th the packet is dropped. Otherwise, the packet is
+	 * dropped with probability P function of avg.
+	 */
 
-    int64_t p_b = 0;
-    /* queue in bytes or packets ? */
-    u_int q_size = (fs->flags_fs & DN_QSIZE_IS_BYTES) ? q->len_bytes : q->len;
+	int64_t p_b = 0;
 
-    DPRINTF(("\ndummynet: %d q: %2u ", (int) curr_time, q_size));
+	/* Queue in bytes or packets? */
+	u_int q_size = (fs->flags_fs & DN_QSIZE_IS_BYTES) ?
+	    q->len_bytes : q->len;
 
-    /* average queue size estimation */
-    if (q_size != 0) {
-	/*
-	 * queue is not empty, avg <- avg + (q_size - avg) * w_q
-	 */
-	int diff = SCALE(q_size) - q->avg;
-	int64_t v = SCALE_MUL((int64_t) diff, (int64_t) fs->w_q);
+	DPRINTF(("\ndummynet: %d q: %2u ", (int)curr_time, q_size));
 
-	q->avg += (int) v;
-    } else {
-	/*
-	 * queue is empty, find for how long the queue has been
-	 * empty and use a lookup table for computing
-	 * (1 - * w_q)^(idle_time/s) where s is the time to send a
-	 * (small) packet.
-	 * XXX check wraps...
-	 */
-	if (q->avg) {
-	    u_int t = (curr_time - q->q_time) / fs->lookup_step;
+	/* Average queue size estimation. */
+	if (q_size != 0) {
+		/* Queue is not empty, avg <- avg + (q_size - avg) * w_q */
+		int diff = SCALE(q_size) - q->avg;
+		int64_t v = SCALE_MUL((int64_t)diff, (int64_t)fs->w_q);
 
-	    q->avg = (t < fs->lookup_depth) ?
-		    SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0;
+		q->avg += (int)v;
+	} else {
+		/*
+		 * Queue is empty, find for how long the queue has been
+		 * empty and use a lookup table for computing
+		 * (1 - * w_q)^(idle_time/s) where s is the time to send a
+		 * (small) packet.
+		 * XXX check wraps...
+		 */
+		if (q->avg) {
+			u_int t = (curr_time - q->q_time) / fs->lookup_step;
+
+			q->avg = (t < fs->lookup_depth) ?
+			    SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0;
+		}
 	}
-    }
-    DPRINTF(("dummynet: avg: %u ", SCALE_VAL(q->avg)));
+	DPRINTF(("dummynet: avg: %u ", SCALE_VAL(q->avg)));
 
-    /* should i drop ? */
-
-    if (q->avg < fs->min_th) {
-	q->count = -1;
-	return 0; /* accept packet ; */
-    }
-    if (q->avg >= fs->max_th) { /* average queue >=  max threshold */
-	if (fs->flags_fs & DN_IS_GENTLE_RED) {
-	    /*
-	     * According to Gentle-RED, if avg is greater than max_th the
-	     * packet is dropped with a probability
-	     *	p_b = c_3 * avg - c_4
-	     * where c_3 = (1 - max_p) / max_th, and c_4 = 1 - 2 * max_p
-	     */
-	    p_b = SCALE_MUL((int64_t) fs->c_3, (int64_t) q->avg) - fs->c_4;
-	} else {
-	    q->count = -1;
-	    DPRINTF(("dummynet: - drop"));
-	    return 1 ;
+	/* Should i drop? */
+	if (q->avg < fs->min_th) {
+		q->count = -1;
+		return (0);	/* accept packet */
 	}
-    } else if (q->avg > fs->min_th) {
-	/*
-	 * we compute p_b using the linear dropping function p_b = c_1 *
-	 * avg - c_2, where c_1 = max_p / (max_th - min_th), and c_2 =
-	 * max_p * min_th / (max_th - min_th)
-	 */
-	p_b = SCALE_MUL((int64_t) fs->c_1, (int64_t) q->avg) - fs->c_2;
-    }
-    if (fs->flags_fs & DN_QSIZE_IS_BYTES)
-	p_b = (p_b * len) / fs->max_pkt_size;
-    if (++q->count == 0)
-	q->random = random() & 0xffff;
-    else {
-	/*
-	 * q->count counts packets arrived since last drop, so a greater
-	 * value of q->count means a greater packet drop probability.
-	 */
-	if (SCALE_MUL(p_b, SCALE((int64_t) q->count)) > q->random) {
-	    q->count = 0;
-	    DPRINTF(("dummynet: - red drop"));
-	    /* after a drop we calculate a new random value */
-	    q->random = random() & 0xffff;
-	    return 1;    /* drop */
+	if (q->avg >= fs->max_th) {	/* average queue >=  max threshold */
+		if (fs->flags_fs & DN_IS_GENTLE_RED) {
+			/*
+			 * According to Gentle-RED, if avg is greater than
+			 * max_th the packet is dropped with a probability
+			 *	 p_b = c_3 * avg - c_4
+			 * where c_3 = (1 - max_p) / max_th
+			 *       c_4 = 1 - 2 * max_p
+			 */
+			p_b = SCALE_MUL((int64_t)fs->c_3, (int64_t)q->avg) -
+			    fs->c_4;
+		} else {
+			q->count = -1;
+			DPRINTF(("dummynet: - drop"));
+			return (1);
+		}
+	} else if (q->avg > fs->min_th) {
+		/*
+		 * We compute p_b using the linear dropping function
+		 *	 p_b = c_1 * avg - c_2
+		 * where c_1 = max_p / (max_th - min_th)
+		 * 	 c_2 = max_p * min_th / (max_th - min_th)
+		 */
+		p_b = SCALE_MUL((int64_t)fs->c_1, (int64_t)q->avg) - fs->c_2;
 	}
-    }
-    /* end of RED algorithm */
-    return 0 ; /* accept */
+
+	if (fs->flags_fs & DN_QSIZE_IS_BYTES)
+		p_b = (p_b * len) / fs->max_pkt_size;
+	if (++q->count == 0)
+		q->random = random() & 0xffff;
+	else {
+		/*
+		 * q->count counts packets arrived since last drop, so a greater
+		 * value of q->count means a greater packet drop probability.
+		 */
+		if (SCALE_MUL(p_b, SCALE((int64_t)q->count)) > q->random) {
+			q->count = 0;
+			DPRINTF(("dummynet: - red drop"));
+			/* After a drop we calculate a new random value. */
+			q->random = random() & 0xffff;
+			return (1);	/* drop */
+		}
+	}
+	/* End of RED algorithm. */
+
+	return (0);	/* accept */
 }
 
 static __inline struct dn_flow_set *
@@ -1153,7 +1199,6 @@
  * m		the mbuf with the packet
  * ifp		the 'ifp' parameter from the caller.
  *		NULL in ip_input, destination interface in ip_output,
- *		real_dst in bdg_forward
  * rule		matching rule, in case of multiple passes
  *
  */
@@ -1175,6 +1220,10 @@
 
     if (cmd->opcode == O_LOG)
 	cmd += F_LEN(cmd);
+    if (cmd->opcode == O_ALTQ)
+	cmd += F_LEN(cmd);
+    if (cmd->opcode == O_TAG)
+	cmd += F_LEN(cmd);
     is_pipe = (cmd->opcode == O_PIPE);
 
     DUMMYNET_LOCK();
@@ -1349,36 +1398,37 @@
 static void
 purge_flow_set(struct dn_flow_set *fs, int all)
 {
-    struct dn_flow_queue *q, *qn ;
-    int i ;
+	struct dn_flow_queue *q, *qn;
+	int i;
 
-    DUMMYNET_LOCK_ASSERT();
+	DUMMYNET_LOCK_ASSERT();
 
-    for (i = 0 ; i <= fs->rq_size ; i++ ) {
-	for (q = fs->rq[i] ; q ; q = qn ) {
-	    struct mbuf *m, *mnext;
-
-	    mnext = q->head;
-	    while ((m = mnext) != NULL) {
-		mnext = m->m_nextpkt;
-		DN_FREE_PKT(m);
-	    }
-	    qn = q->next ;
-	    free(q, M_DUMMYNET);
+	for (i = 0; i <= fs->rq_size; i++) {
+		for (q = fs->rq[i]; q != NULL; q = qn) {
+			struct mbuf *m, *mnext;
+
+			mnext = q->head;
+			while ((m = mnext) != NULL) {
+				mnext = m->m_nextpkt;
+				DN_FREE_PKT(m);
+			}
+			qn = q->next;
+			free(q, M_DUMMYNET);
+		}
+		fs->rq[i] = NULL;
+	}
+
+	fs->rq_elements = 0;
+	if (all) {
+		/* RED - free lookup table. */
+		if (fs->w_q_lookup != NULL)
+			free(fs->w_q_lookup, M_DUMMYNET);
+		if (fs->rq != NULL)
+			free(fs->rq, M_DUMMYNET);
+		/* If this fs is not part of a pipe, free it. */
+		if (fs->pipe == NULL || fs != &(fs->pipe->fs))
+			free(fs, M_DUMMYNET);
 	}
-	fs->rq[i] = NULL ;
-    }
-    fs->rq_elements = 0 ;
-    if (all) {
-	/* RED - free lookup table */
-	if (fs->w_q_lookup)
-	    free(fs->w_q_lookup, M_DUMMYNET);
-	if (fs->rq)
-	    free(fs->rq, M_DUMMYNET);
-	/* if this fs is not part of a pipe, free it */
-	if (fs->pipe && fs != &(fs->pipe->fs) )
-	    free(fs, M_DUMMYNET);
-    }
 }
 
 /*
@@ -1496,54 +1546,59 @@
  * setup RED parameters
  */
 static int
-config_red(struct dn_flow_set *p, struct dn_flow_set * x)
+config_red(struct dn_flow_set *p, struct dn_flow_set *x)
 {
-    int i;
+	int i;
 
-    x->w_q = p->w_q;
-    x->min_th = SCALE(p->min_th);
-    x->max_th = SCALE(p->max_th);
-    x->max_p = p->max_p;
-
-    x->c_1 = p->max_p / (p->max_th - p->min_th);
-    x->c_2 = SCALE_MUL(x->c_1, SCALE(p->min_th));
-    if (x->flags_fs & DN_IS_GENTLE_RED) {
-	x->c_3 = (SCALE(1) - p->max_p) / p->max_th;
-	x->c_4 = (SCALE(1) - 2 * p->max_p);
-    }
-
-    /* if the lookup table already exist, free and create it again */
-    if (x->w_q_lookup) {
-	free(x->w_q_lookup, M_DUMMYNET);
-	x->w_q_lookup = NULL ;
-    }
-    if (red_lookup_depth == 0) {
-	printf("\ndummynet: net.inet.ip.dummynet.red_lookup_depth must be > 0\n");
-	free(x, M_DUMMYNET);
-	return EINVAL;
-    }
-    x->lookup_depth = red_lookup_depth;
-    x->w_q_lookup = (u_int *) malloc(x->lookup_depth * sizeof(int),
+	x->w_q = p->w_q;
+	x->min_th = SCALE(p->min_th);
+	x->max_th = SCALE(p->max_th);
+	x->max_p = p->max_p;
+
+	x->c_1 = p->max_p / (p->max_th - p->min_th);
+	x->c_2 = SCALE_MUL(x->c_1, SCALE(p->min_th));
+
+	if (x->flags_fs & DN_IS_GENTLE_RED) {
+		x->c_3 = (SCALE(1) - p->max_p) / p->max_th;
+		x->c_4 = SCALE(1) - 2 * p->max_p;
+	}
+
+	/* If the lookup table already exist, free and create it again. */
+	if (x->w_q_lookup) {
+		free(x->w_q_lookup, M_DUMMYNET);
+		x->w_q_lookup = NULL;
+	}
+	if (red_lookup_depth == 0) {
+		printf("\ndummynet: net.inet.ip.dummynet.red_lookup_depth"
+		    "must be > 0\n");
+		free(x, M_DUMMYNET);
+		return (EINVAL);
+	}
+	x->lookup_depth = red_lookup_depth;
+	x->w_q_lookup = (u_int *)malloc(x->lookup_depth * sizeof(int),
 	    M_DUMMYNET, M_NOWAIT);
-    if (x->w_q_lookup == NULL) {
-	printf("dummynet: sorry, cannot allocate red lookup table\n");
-	free(x, M_DUMMYNET);
-	return ENOSPC;
-    }
-
-    /* fill the lookup table with (1 - w_q)^x */
-    x->lookup_step = p->lookup_step ;
-    x->lookup_weight = p->lookup_weight ;
-    x->w_q_lookup[0] = SCALE(1) - x->w_q;
-    for (i = 1; i < x->lookup_depth; i++)
-	x->w_q_lookup[i] = SCALE_MUL(x->w_q_lookup[i - 1], x->lookup_weight);
-    if (red_avg_pkt_size < 1)
-	red_avg_pkt_size = 512 ;
-    x->avg_pkt_size = red_avg_pkt_size ;
-    if (red_max_pkt_size < 1)
-	red_max_pkt_size = 1500 ;
-    x->max_pkt_size = red_max_pkt_size ;
-    return 0 ;
+	if (x->w_q_lookup == NULL) {
+		printf("dummynet: sorry, cannot allocate red lookup table\n");
+		free(x, M_DUMMYNET);
+		return(ENOSPC);
+	}
+
+	/* Fill the lookup table with (1 - w_q)^x */
+	x->lookup_step = p->lookup_step;
+	x->lookup_weight = p->lookup_weight;
+	x->w_q_lookup[0] = SCALE(1) - x->w_q;
+
+	for (i = 1; i < x->lookup_depth; i++)
+		x->w_q_lookup[i] =
+		    SCALE_MUL(x->w_q_lookup[i - 1], x->lookup_weight);
+
+	if (red_avg_pkt_size < 1)
+		red_avg_pkt_size = 512;
+	x->avg_pkt_size = red_avg_pkt_size;
+	if (red_max_pkt_size < 1)
+		red_max_pkt_size = 1500;
+	x->max_pkt_size = red_max_pkt_size;
+	return (0);
 }
 
 static int
@@ -1574,137 +1629,146 @@
 static void
 set_fs_parms(struct dn_flow_set *x, struct dn_flow_set *src)
 {
-    x->flags_fs = src->flags_fs;
-    x->qsize = src->qsize;
-    x->plr = src->plr;
-    x->flow_mask = src->flow_mask;
-    if (x->flags_fs & DN_QSIZE_IS_BYTES) {
-	if (x->qsize > 1024*1024)
-	    x->qsize = 1024*1024 ;
-    } else {
-	if (x->qsize == 0)
-	    x->qsize = 50 ;
-	if (x->qsize > 100)
-	    x->qsize = 50 ;
-    }
-    /* configuring RED */
-    if ( x->flags_fs & DN_IS_RED )
-	config_red(src, x) ;    /* XXX should check errors */
+	x->flags_fs = src->flags_fs;
+	x->qsize = src->qsize;
+	x->plr = src->plr;
+	x->flow_mask = src->flow_mask;
+	if (x->flags_fs & DN_QSIZE_IS_BYTES) {
+		if (x->qsize > 1024 * 1024)
+			x->qsize = 1024 * 1024;
+	} else {
+		if (x->qsize == 0)
+			x->qsize = 50;
+		if (x->qsize > 100)
+			x->qsize = 50;
+	}
+	/* Configuring RED. */
+	if (x->flags_fs & DN_IS_RED)
+		config_red(src, x);	/* XXX should check errors */
 }
 
 /*
- * setup pipe or queue parameters.
+ * Setup pipe or queue parameters.
  */
-
 static int
 config_pipe(struct dn_pipe *p)
 {
-    struct dn_flow_set *pfs = &(p->fs);
-    struct dn_flow_queue *q;
-    int i, error;
-
-    /*
-     * The config program passes parameters as follows:
-     * bw = bits/second (0 means no limits),
-     * delay = ms, must be translated into ticks.
-     * qsize = slots/bytes
-     */
-    p->delay = ( p->delay * hz ) / 1000 ;
-    /* We need either a pipe number or a flow_set number */
-    if (p->pipe_nr == 0 && pfs->fs_nr == 0)
-	return EINVAL ;
-    if (p->pipe_nr != 0 && pfs->fs_nr != 0)
-	return EINVAL ;
-    if (p->pipe_nr != 0) { /* this is a pipe */
-	struct dn_pipe *pipe;
+	struct dn_flow_set *pfs = &(p->fs);
+	struct dn_flow_queue *q;
+	int i, error;
 
-	DUMMYNET_LOCK();
-	pipe = locate_pipe(p->pipe_nr);	/* locate pipe */
-
-	if (pipe == NULL) {	/* new pipe */
-	    pipe = malloc(sizeof(struct dn_pipe), M_DUMMYNET,
-	        M_NOWAIT | M_ZERO);
-	    if (pipe == NULL) {
-	    	DUMMYNET_UNLOCK();
-		printf("dummynet: no memory for new pipe\n");
-		return (ENOMEM);
-	    }
-	    pipe->pipe_nr = p->pipe_nr;
-	    pipe->fs.pipe = pipe ;
-	    /* idle_heap is the only one from which we extract from the middle.
-	     */
-	    pipe->idle_heap.size = pipe->idle_heap.elements = 0 ;
-	    pipe->idle_heap.offset=OFFSET_OF(struct dn_flow_queue, heap_pos);
-	} else
-	    /* Flush accumulated credit for all queues */
-	    for (i = 0; i <= pipe->fs.rq_size; i++)
-		for (q = pipe->fs.rq[i]; q; q = q->next)
-		    q->numbytes = 0;
-
-	pipe->bandwidth = p->bandwidth ;
-	pipe->numbytes = 0; /* just in case... */
-	bcopy(p->if_name, pipe->if_name, sizeof(p->if_name) );
-	pipe->ifp = NULL ; /* reset interface ptr */
-	pipe->delay = p->delay ;
-	set_fs_parms(&(pipe->fs), pfs);
-
-
-	if (pipe->fs.rq == NULL) {	/* a new pipe */
-	    error = alloc_hash(&(pipe->fs), pfs);
-	    if (error) {
+	/*
+	 * The config program passes parameters as follows:
+	 * bw = bits/second (0 means no limits),
+	 * delay = ms, must be translated into ticks.
+	 * qsize = slots/bytes
+	 */
+	p->delay = (p->delay * hz) / 1000;
+	/* We need either a pipe number or a flow_set number. */
+	if (p->pipe_nr == 0 && pfs->fs_nr == 0)
+		return (EINVAL);
+	if (p->pipe_nr != 0 && pfs->fs_nr != 0)
+		return (EINVAL);
+	if (p->pipe_nr != 0) {			/* this is a pipe */
+		struct dn_pipe *pipe;
+
+		DUMMYNET_LOCK();
+		pipe = locate_pipe(p->pipe_nr);	/* locate pipe */
+
+		if (pipe == NULL) {		/* new pipe */
+			pipe = malloc(sizeof(struct dn_pipe), M_DUMMYNET,
+			    M_NOWAIT | M_ZERO);
+			if (pipe == NULL) {
+				DUMMYNET_UNLOCK();
+				printf("dummynet: no memory for new pipe\n");
+				return (ENOMEM);
+			}
+			pipe->pipe_nr = p->pipe_nr;
+			pipe->fs.pipe = pipe;
+			/*
+			 * idle_heap is the only one from which
+			 * we extract from the middle.
+			 */
+			pipe->idle_heap.size = pipe->idle_heap.elements = 0;
+			pipe->idle_heap.offset =
+			    offsetof(struct dn_flow_queue, heap_pos);
+		} else
+			/* Flush accumulated credit for all queues. */
+			for (i = 0; i <= pipe->fs.rq_size; i++)
+				for (q = pipe->fs.rq[i]; q; q = q->next)
+					q->numbytes = 0;
+
+		pipe->bandwidth = p->bandwidth;
+		pipe->numbytes = 0;		/* just in case... */
+		bcopy(p->if_name, pipe->if_name, sizeof(p->if_name));
+		pipe->ifp = NULL;		/* reset interface ptr */
+		pipe->delay = p->delay;
+		set_fs_parms(&(pipe->fs), pfs);
+
+		if (pipe->fs.rq == NULL) {	/* a new pipe */
+			error = alloc_hash(&(pipe->fs), pfs);
+			if (error) {
+				DUMMYNET_UNLOCK();
+				free(pipe, M_DUMMYNET);
+				return (error);
+			}
+			SLIST_INSERT_HEAD(&pipehash[HASH(pipe->pipe_nr)],
+			    pipe, next);
+		}
 		DUMMYNET_UNLOCK();
-		free(pipe, M_DUMMYNET);
-		return (error);
-	    }
-	    SLIST_INSERT_HEAD(&pipehash[HASH(pipe->pipe_nr)], pipe, next);
-	}
-	DUMMYNET_UNLOCK();
-    } else { /* config queue */
-	struct dn_flow_set *fs;
+	} else {				/* config queue */
+		struct dn_flow_set *fs;
 
-	DUMMYNET_LOCK();
-	fs = locate_flowset(pfs->fs_nr); /* locate flow_set */
+		DUMMYNET_LOCK();
+		fs = locate_flowset(pfs->fs_nr); /* locate flow_set */
 
-	if (fs == NULL) {	/* new */
-	    if (pfs->parent_nr == 0) {	/* need link to a pipe */
-	    	DUMMYNET_UNLOCK();
-		return EINVAL ;
-	    }
-	    fs = malloc(sizeof(struct dn_flow_set), M_DUMMYNET,
-		M_NOWAIT|M_ZERO);
-	    if (fs == NULL) {
-		DUMMYNET_UNLOCK();
-		printf("dummynet: no memory for new flow_set\n");
-		return (ENOMEM);
-	    }
-	    fs->fs_nr = pfs->fs_nr;
-	    fs->parent_nr = pfs->parent_nr;
-	    fs->weight = pfs->weight;
-	    if (fs->weight == 0)
-		fs->weight = 1;
-	    else if (fs->weight > 100)
-		fs->weight = 100;
-	} else {
-	    /* Change parent pipe not allowed; must delete and recreate */
-	    if (pfs->parent_nr != 0 && fs->parent_nr != pfs->parent_nr) {
-	    	DUMMYNET_UNLOCK();
-		return EINVAL ;
-	    }
-	}
-	set_fs_parms(fs, pfs);
+		if (fs == NULL) {		/* new */
+			if (pfs->parent_nr == 0) { /* need link to a pipe */
+				DUMMYNET_UNLOCK();
+				return (EINVAL);
+			}
+			fs = malloc(sizeof(struct dn_flow_set), M_DUMMYNET,
+			    M_NOWAIT | M_ZERO);
+			if (fs == NULL) {
+				DUMMYNET_UNLOCK();
+				printf(
+				    "dummynet: no memory for new flow_set\n");
+				return (ENOMEM);
+			}
+			fs->fs_nr = pfs->fs_nr;
+			fs->parent_nr = pfs->parent_nr;
+			fs->weight = pfs->weight;
+			if (fs->weight == 0)
+				fs->weight = 1;
+			else if (fs->weight > 100)
+				fs->weight = 100;
+		} else {
+			/*
+			 * Change parent pipe not allowed;
+			 * must delete and recreate.
+			 */
+			if (pfs->parent_nr != 0 &&
+			    fs->parent_nr != pfs->parent_nr) {
+				DUMMYNET_UNLOCK();
+				return (EINVAL);
+			}
+		}
+
+		set_fs_parms(fs, pfs);
 
-	if (fs->rq == NULL) {	/* a new flow_set */
-	    error = alloc_hash(fs, pfs);
-	    if (error) {
+		if (fs->rq == NULL) {		/* a new flow_set */
+			error = alloc_hash(fs, pfs);
+			if (error) {
+				DUMMYNET_UNLOCK();
+				free(fs, M_DUMMYNET);
+				return (error);
+			}
+			SLIST_INSERT_HEAD(&flowsethash[HASH(fs->fs_nr)],
+			    fs, next);
+		}
 		DUMMYNET_UNLOCK();
-		free(fs, M_DUMMYNET);
-		return (error);
-	    }
-	    SLIST_INSERT_HEAD(&flowsethash[HASH(fs->fs_nr)], fs, next);
 	}
-	DUMMYNET_UNLOCK();
-    }
-    return 0 ;
+	return (0);
 }
 
 /*
@@ -1749,7 +1813,7 @@
  * drain all queues. Called in case of severe mbuf shortage.
  */
 void
-dummynet_drain()
+dummynet_drain(void)
 {
     struct dn_flow_set *fs;
     struct dn_pipe *pipe;
@@ -1786,6 +1850,7 @@
 static int
 delete_pipe(struct dn_pipe *p)
 {
+
     if (p->pipe_nr == 0 && p->fs.fs_nr == 0)
 	return EINVAL ;
     if (p->pipe_nr != 0 && p->fs.fs_nr != 0)
@@ -2042,52 +2107,66 @@
 static void
 ip_dn_init(void)
 {
-    int i;
+	int i;
 
-    if (bootverbose)
-	    printf("DUMMYNET with IPv6 initialized (040826)\n");
+	if (bootverbose)
+		printf("DUMMYNET with IPv6 initialized (040826)\n");
 
-    DUMMYNET_LOCK_INIT();
+	DUMMYNET_LOCK_INIT();
 
-    for (i = 0; i < HASHSIZE; i++) {
-	SLIST_INIT(&pipehash[i]);
-	SLIST_INIT(&flowsethash[i]);
-    }
-    ready_heap.size = ready_heap.elements = 0 ;
-    ready_heap.offset = 0 ;
+	for (i = 0; i < HASHSIZE; i++) {
+		SLIST_INIT(&pipehash[i]);
+		SLIST_INIT(&flowsethash[i]);
+	}
+	ready_heap.size = ready_heap.elements = 0;
+	ready_heap.offset = 0;
+
+	wfq_ready_heap.size = wfq_ready_heap.elements = 0;
+	wfq_ready_heap.offset = 0;
 
-    wfq_ready_heap.size = wfq_ready_heap.elements = 0 ;
-    wfq_ready_heap.offset = 0 ;
+	extract_heap.size = extract_heap.elements = 0;
+	extract_heap.offset = 0;
 
-    extract_heap.size = extract_heap.elements = 0 ;
-    extract_heap.offset = 0 ;
+	ip_dn_ctl_ptr = ip_dn_ctl;
+	ip_dn_io_ptr = dummynet_io;
+	ip_dn_ruledel_ptr = dn_rule_delete;
 
-    ip_dn_ctl_ptr = ip_dn_ctl;
-    ip_dn_io_ptr = dummynet_io;
-    ip_dn_ruledel_ptr = dn_rule_delete;
+	TASK_INIT(&dn_task, 0, dummynet_task, NULL);
+	dn_tq = taskqueue_create_fast("dummynet", M_NOWAIT,
+	    taskqueue_thread_enqueue, &dn_tq);
+	taskqueue_start_threads(&dn_tq, 1, PI_NET, "dummynet");
 
-    callout_init(&dn_timeout, NET_CALLOUT_MPSAFE);
-    callout_reset(&dn_timeout, 1, dummynet, NULL);
+	callout_init(&dn_timeout, CALLOUT_MPSAFE);
+	callout_reset(&dn_timeout, 1, dummynet, NULL);
+
+	/* Initialize curr_time adjustment mechanics. */
+	getmicrouptime(&prev_t);
 }
 
 #ifdef KLD_MODULE
 static void
 ip_dn_destroy(void)
 {
-    ip_dn_ctl_ptr = NULL;
-    ip_dn_io_ptr = NULL;
-    ip_dn_ruledel_ptr = NULL;
+	ip_dn_ctl_ptr = NULL;
+	ip_dn_io_ptr = NULL;
+	ip_dn_ruledel_ptr = NULL;
 
-    callout_stop(&dn_timeout);
-    dummynet_flush();
+	DUMMYNET_LOCK();
+	callout_stop(&dn_timeout);
+	DUMMYNET_UNLOCK();
+	taskqueue_drain(dn_tq, &dn_task);
+	taskqueue_free(dn_tq);
 
-    DUMMYNET_LOCK_DESTROY();
+	dummynet_flush();
+
+	DUMMYNET_LOCK_DESTROY();
 }
 #endif /* KLD_MODULE */
 
 static int
 dummynet_modevent(module_t mod, int type, void *data)
 {
+
 	switch (type) {
 	case MOD_LOAD:
 		if (DUMMYNET_LOADED) {
Index: tcp_subr.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_subr.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet/tcp_subr.c -L sys/netinet/tcp_subr.c -u -r1.2 -r1.3
--- sys/netinet/tcp_subr.c
+++ sys/netinet/tcp_subr.c
@@ -27,28 +27,29 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp_subr.c	8.2 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_subr.c,v 1.228.2.6 2006/03/01 21:13:29 andre Exp $
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/tcp_subr.c,v 1.300.2.2 2007/12/02 10:32:49 bz Exp $");
+
 #include "opt_compat.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_mac.h"
 #include "opt_tcpdebug.h"
-#include "opt_tcp_sack.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/callout.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
-#include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #ifdef INET6
 #include <sys/domain.h>
 #endif
+#include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
@@ -83,6 +84,7 @@
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
+#include <netinet/tcp_syncache.h>
 #ifdef INET6
 #include <netinet6/tcp6_var.h>
 #endif
@@ -93,35 +95,28 @@
 #include <netinet6/ip6protosw.h>
 
 #ifdef IPSEC
-#include <netinet6/ipsec.h>
-#ifdef INET6
-#include <netinet6/ipsec6.h>
-#endif
-#include <netkey/key.h>
-#endif /*IPSEC*/
-
-#ifdef FAST_IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/xform.h>
 #ifdef INET6
 #include <netipsec/ipsec6.h>
 #endif
 #include <netipsec/key.h>
-#define	IPSEC
-#endif /*FAST_IPSEC*/
+#endif /*IPSEC*/
 
 #include <machine/in_cksum.h>
 #include <sys/md5.h>
 
+#include <security/mac/mac_framework.h>
+
 int	tcp_mssdflt = TCP_MSS;
 SYSCTL_INT(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt, CTLFLAG_RW,
-    &tcp_mssdflt , 0, "Default TCP Maximum Segment Size");
+    &tcp_mssdflt, 0, "Default TCP Maximum Segment Size");
 
 #ifdef INET6
 int	tcp_v6mssdflt = TCP6_MSS;
 SYSCTL_INT(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt,
-	CTLFLAG_RW, &tcp_v6mssdflt , 0,
-	"Default TCP Maximum Segment Size for IPv6");
+    CTLFLAG_RW, &tcp_v6mssdflt , 0,
+    "Default TCP Maximum Segment Size for IPv6");
 #endif
 
 /*
@@ -135,42 +130,30 @@
 int	tcp_minmss = TCP_MINMSS;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmss, CTLFLAG_RW,
     &tcp_minmss , 0, "Minmum TCP Maximum Segment Size");
-/*
- * Number of TCP segments per second we accept from remote host
- * before we start to calculate average segment size. If average
- * segment size drops below the minimum TCP MSS we assume a DoS
- * attack and reset+drop the connection. Care has to be taken not to
- * set this value too small to not kill interactive type connections
- * (telnet, SSH) which send many small packets.
- */
-int     tcp_minmssoverload = TCP_MINMSSOVERLOAD;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmssoverload, CTLFLAG_RW,
-    &tcp_minmssoverload , 0, "Number of TCP Segments per Second allowed to"
-    "be under the MINMSS Size");
-
-#if 0
-static int	tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ;
-SYSCTL_INT(_net_inet_tcp, TCPCTL_RTTDFLT, rttdflt, CTLFLAG_RW,
-    &tcp_rttdflt , 0, "Default maximum TCP Round Trip Time");
-#endif
 
 int	tcp_do_rfc1323 = 1;
 SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_RW,
-    &tcp_do_rfc1323 , 0, "Enable rfc1323 (high performance TCP) extensions");
+    &tcp_do_rfc1323, 0, "Enable rfc1323 (high performance TCP) extensions");
+
+static int	tcp_log_debug = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_debug, CTLFLAG_RW,
+    &tcp_log_debug, 0, "Log errors caused by incoming TCP segments");
 
 static int	tcp_tcbhashsize = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RDTUN,
-     &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable");
+    &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable");
 
 static int	do_tcpdrain = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0,
-     "Enable tcp_drain routine for extra help when low on mbufs");
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW,
+    &do_tcpdrain, 0,
+    "Enable tcp_drain routine for extra help when low on mbufs");
 
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD,
     &tcbinfo.ipi_count, 0, "Number of active PCBs");
 
 static int	icmp_may_rst = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_RW, &icmp_may_rst, 0,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_RW,
+    &icmp_may_rst, 0,
     "Certain ICMP unreachable messages may abort connections in SYN_SENT");
 
 static int	tcp_isn_reseed_interval = 0;
@@ -213,7 +196,6 @@
 uma_zone_t sack_hole_zone;
 
 static struct inpcb *tcp_notify(struct inpcb *, int);
-static void	tcp_discardcb(struct tcpcb *);
 static void	tcp_isn_tick(void *);
 
 /*
@@ -232,24 +214,46 @@
  * separate because the tcpcb structure is exported to userland for sysctl
  * parsing purposes, which do not know about callouts.
  */
-struct	tcpcb_mem {
-	struct	tcpcb tcb;
-	struct	callout tcpcb_mem_rexmt, tcpcb_mem_persist, tcpcb_mem_keep;
-	struct	callout tcpcb_mem_2msl, tcpcb_mem_delack;
+struct tcpcb_mem {
+	struct	tcpcb		tcb;
+	struct	tcp_timer	tt;
 };
 
 static uma_zone_t tcpcb_zone;
-static uma_zone_t tcptw_zone;
+MALLOC_DEFINE(M_TCPLOG, "tcplog", "TCP address and flags print buffers");
 struct callout isn_callout;
+static struct mtx isn_mtx;
+
+#define	ISN_LOCK_INIT()	mtx_init(&isn_mtx, "isn_mtx", NULL, MTX_DEF)
+#define	ISN_LOCK()	mtx_lock(&isn_mtx)
+#define	ISN_UNLOCK()	mtx_unlock(&isn_mtx)
 
 /*
- * Tcp initialization
+ * TCP initialization.
  */
+static void
+tcp_zone_change(void *tag)
+{
+
+	uma_zone_set_max(tcbinfo.ipi_zone, maxsockets);
+	uma_zone_set_max(tcpcb_zone, maxsockets);
+	tcp_tw_zone_change();
+}
+
+static int
+tcp_inpcb_init(void *mem, int size, int flags)
+{
+	struct inpcb *inp = mem;
+
+	INP_LOCK_INIT(inp, "inp", "tcpinp");
+	return (0);
+}
+
 void
-tcp_init()
+tcp_init(void)
 {
-	int hashsize = TCBHASHSIZE;
 
+	int hashsize = TCBHASHSIZE;
 	tcp_delacktime = TCPTV_DELACK;
 	tcp_keepinit = TCPTV_KEEP_INIT;
 	tcp_keepidle = TCPTV_KEEP_IDLE;
@@ -257,23 +261,27 @@
 	tcp_maxpersistidle = TCPTV_KEEP_IDLE;
 	tcp_msl = TCPTV_MSL;
 	tcp_rexmit_min = TCPTV_MIN;
+	if (tcp_rexmit_min < 1)
+		tcp_rexmit_min = 1;
 	tcp_rexmit_slop = TCPTV_CPU_VAR;
 	tcp_inflight_rttthresh = TCPTV_INFLIGHT_RTTTHRESH;
+	tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT;
 
 	INP_INFO_LOCK_INIT(&tcbinfo, "tcp");
 	LIST_INIT(&tcb);
-	tcbinfo.listhead = &tcb;
+	tcbinfo.ipi_listhead = &tcb;
 	TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", &hashsize);
 	if (!powerof2(hashsize)) {
 		printf("WARNING: TCB hash size not a power of 2\n");
 		hashsize = 512; /* safe default */
 	}
 	tcp_tcbhashsize = hashsize;
-	tcbinfo.hashbase = hashinit(hashsize, M_PCB, &tcbinfo.hashmask);
-	tcbinfo.porthashbase = hashinit(hashsize, M_PCB,
-					&tcbinfo.porthashmask);
+	tcbinfo.ipi_hashbase = hashinit(hashsize, M_PCB,
+	    &tcbinfo.ipi_hashmask);
+	tcbinfo.ipi_porthashbase = hashinit(hashsize, M_PCB,
+	    &tcbinfo.ipi_porthashmask);
 	tcbinfo.ipi_zone = uma_zcreate("inpcb", sizeof(struct inpcb),
-	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+	    NULL, NULL, tcp_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	uma_zone_set_max(tcbinfo.ipi_zone, maxsockets);
 #ifdef INET6
 #define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr))
@@ -291,27 +299,26 @@
 	tcpcb_zone = uma_zcreate("tcpcb", sizeof(struct tcpcb_mem),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	uma_zone_set_max(tcpcb_zone, maxsockets);
-	tcptw_zone = uma_zcreate("tcptw", sizeof(struct tcptw),
-	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
-	uma_zone_set_max(tcptw_zone, maxsockets / 5);
-	tcp_timer_init();
+	tcp_tw_init();
 	syncache_init();
 	tcp_hc_init();
 	tcp_reass_init();
+	ISN_LOCK_INIT();
 	callout_init(&isn_callout, CALLOUT_MPSAFE);
 	tcp_isn_tick(NULL);
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, tcp_fini, NULL,
 		SHUTDOWN_PRI_DEFAULT);
 	sack_hole_zone = uma_zcreate("sackhole", sizeof(struct sackhole),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+	EVENTHANDLER_REGISTER(maxsockets_change, tcp_zone_change, NULL,
+		EVENTHANDLER_PRI_ANY);
 }
 
 void
-tcp_fini(xtp)
-	void *xtp;
+tcp_fini(void *xtp)
 {
-	callout_stop(&isn_callout);
 
+	callout_stop(&isn_callout);
 }
 
 /*
@@ -320,10 +327,7 @@
  * of the tcpcb each time to conserve mbufs.
  */
 void
-tcpip_fillheaders(inp, ip_ptr, tcp_ptr)
-	struct inpcb *inp;
-	void *ip_ptr;
-	void *tcp_ptr;
+tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void *tcp_ptr)
 {
 	struct tcphdr *th = (struct tcphdr *)tcp_ptr;
 
@@ -378,8 +382,7 @@
  * use for this function is in keepalives, which use tcp_respond.
  */
 struct tcptemp *
-tcpip_maketemplate(inp)
-	struct inpcb *inp;
+tcpip_maketemplate(struct inpcb *inp)
 {
 	struct mbuf *m;
 	struct tcptemp *n;
@@ -409,15 +412,10 @@
  * NOTE: If m != NULL, then ti must point to *inside* the mbuf.
  */
 void
-tcp_respond(tp, ipgen, th, m, ack, seq, flags)
-	struct tcpcb *tp;
-	void *ipgen;
-	register struct tcphdr *th;
-	register struct mbuf *m;
-	tcp_seq ack, seq;
-	int flags;
+tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
+    tcp_seq ack, tcp_seq seq, int flags)
 {
-	register int tlen;
+	int tlen;
 	int win = 0;
 	struct ip *ip;
 	struct tcphdr *nth;
@@ -439,7 +437,6 @@
 	if (tp != NULL) {
 		inp = tp->t_inpcb;
 		KASSERT(inp != NULL, ("tcp control block w/o inpcb"));
-		INP_INFO_WLOCK_ASSERT(&tcbinfo);
 		INP_LOCK_ASSERT(inp);
 	} else
 		inp = NULL;
@@ -452,7 +449,7 @@
 		}
 	}
 	if (m == NULL) {
-		m = m_gethdr(M_DONTWAIT, MT_HEADER);
+		m = m_gethdr(M_DONTWAIT, MT_DATA);
 		if (m == NULL)
 			return;
 		tlen = 0;
@@ -582,8 +579,7 @@
  * come from the zone allocator set up in tcp_init().
  */
 struct tcpcb *
-tcp_newtcpcb(inp)
-	struct inpcb *inp;
+tcp_newtcpcb(struct inpcb *inp)
 {
 	struct tcpcb_mem *tm;
 	struct tcpcb *tp;
@@ -595,6 +591,7 @@
 	if (tm == NULL)
 		return (NULL);
 	tp = &tm->tcb;
+	tp->t_timers = &tm->tt;
 	/*	LIST_INIT(&tp->t_segq); */	/* XXX covered by M_ZERO */
 	tp->t_maxseg = tp->t_maxopd =
 #ifdef INET6
@@ -603,15 +600,16 @@
 		tcp_mssdflt;
 
 	/* Set up our timeouts. */
-	callout_init(tp->tt_rexmt = &tm->tcpcb_mem_rexmt, NET_CALLOUT_MPSAFE);
-	callout_init(tp->tt_persist = &tm->tcpcb_mem_persist, NET_CALLOUT_MPSAFE);
-	callout_init(tp->tt_keep = &tm->tcpcb_mem_keep, NET_CALLOUT_MPSAFE);
-	callout_init(tp->tt_2msl = &tm->tcpcb_mem_2msl, NET_CALLOUT_MPSAFE);
-	callout_init(tp->tt_delack = &tm->tcpcb_mem_delack, NET_CALLOUT_MPSAFE);
+	callout_init(&tp->t_timers->tt_rexmt, CALLOUT_MPSAFE);
+	callout_init(&tp->t_timers->tt_persist, CALLOUT_MPSAFE);
+	callout_init(&tp->t_timers->tt_keep, CALLOUT_MPSAFE);
+	callout_init(&tp->t_timers->tt_2msl, CALLOUT_MPSAFE);
+	callout_init(&tp->t_timers->tt_delack, CALLOUT_MPSAFE);
 
 	if (tcp_do_rfc1323)
 		tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
-	tp->sack_enable = tcp_do_sack;
+	if (tcp_do_sack)
+		tp->t_flags |= TF_SACK_PERMIT;
 	TAILQ_INIT(&tp->snd_holes);
 	tp->t_inpcb = inp;	/* XXX */
 	/*
@@ -634,7 +632,7 @@
 	 * which may match an IPv4-mapped IPv6 address.
 	 */
 	inp->inp_ip_ttl = ip_defttl;
-	inp->inp_ppcb = (caddr_t)tp;
+	inp->inp_ppcb = tp;
 	return (tp);		/* XXX */
 }
 
@@ -644,9 +642,7 @@
  * then send a RST to peer.
  */
 struct tcpcb *
-tcp_drop(tp, errno)
-	register struct tcpcb *tp;
-	int errno;
+tcp_drop(struct tcpcb *tp, int errno)
 {
 	struct socket *so = tp->t_inpcb->inp_socket;
 
@@ -665,9 +661,8 @@
 	return (tcp_close(tp));
 }
 
-static void
-tcp_discardcb(tp)
-	struct tcpcb *tp;
+void
+tcp_discardcb(struct tcpcb *tp)
 {
 	struct tseg_qent *q;
 	struct inpcb *inp = tp->t_inpcb;
@@ -682,11 +677,11 @@
 	 * Make sure that all of our timers are stopped before we
 	 * delete the PCB.
 	 */
-	callout_stop(tp->tt_rexmt);
-	callout_stop(tp->tt_persist);
-	callout_stop(tp->tt_keep);
-	callout_stop(tp->tt_2msl);
-	callout_stop(tp->tt_delack);
+	callout_stop(&tp->t_timers->tt_rexmt);
+	callout_stop(&tp->t_timers->tt_persist);
+	callout_stop(&tp->t_timers->tt_keep);
+	callout_stop(&tp->t_timers->tt_2msl);
+	callout_stop(&tp->t_timers->tt_delack);
 
 	/*
 	 * If we got enough samples through the srtt filter,
@@ -708,6 +703,9 @@
 		 * are satisfied. This gives us better new start value
 		 * for the congestion avoidance for new connections.
 		 * ssthresh is only set if packet loss occured on a session.
+		 *
+		 * XXXRW: 'so' may be NULL here, and/or socket buffer may be
+		 * being torn down.  Ideally this code would not use 'so'.
 		 */
 		ssthresh = tp->snd_ssthresh;
 		if (ssthresh != 0 && ssthresh < so->so_snd.sb_hiwat / 2) {
@@ -755,43 +753,45 @@
 	inp->inp_ppcb = NULL;
 	tp->t_inpcb = NULL;
 	uma_zfree(tcpcb_zone, tp);
-	soisdisconnected(so);
 }
 
 /*
- * Close a TCP control block:
- *    discard all space held by the tcp
- *    discard internet protocol block
- *    wake up any sleepers
+ * Attempt to close a TCP control block, marking it as dropped, and freeing
+ * the socket if we hold the only reference.
  */
 struct tcpcb *
-tcp_close(tp)
-	struct tcpcb *tp;
+tcp_close(struct tcpcb *tp)
 {
 	struct inpcb *inp = tp->t_inpcb;
-#ifdef INET6
-	struct socket *so = inp->inp_socket;
-#endif
+	struct socket *so;
 
 	INP_INFO_WLOCK_ASSERT(&tcbinfo);
 	INP_LOCK_ASSERT(inp);
 
-	tcp_discardcb(tp);
-#ifdef INET6
-	if (INP_CHECK_SOCKAF(so, AF_INET6))
-		in6_pcbdetach(inp);
-	else
-#endif
-		in_pcbdetach(inp);
+	in_pcbdrop(inp);
 	tcpstat.tcps_closed++;
-	return (NULL);
+	KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL"));
+	so = inp->inp_socket;
+	soisdisconnected(so);
+	if (inp->inp_vflag & INP_SOCKREF) {
+		KASSERT(so->so_state & SS_PROTOREF,
+		    ("tcp_close: !SS_PROTOREF"));
+		inp->inp_vflag &= ~INP_SOCKREF;
+		INP_UNLOCK(inp);
+		ACCEPT_LOCK();
+		SOCK_LOCK(so);
+		so->so_state &= ~SS_PROTOREF;
+		sofree(so);
+		return (NULL);
+	}
+	return (tp);
 }
 
 void
-tcp_drain()
+tcp_drain(void)
 {
-	if (do_tcpdrain)
-	{
+
+	if (do_tcpdrain) {
 		struct inpcb *inpb;
 		struct tcpcb *tcpb;
 		struct tseg_qent *te;
@@ -805,7 +805,7 @@
 	 *	usefull.
 	 */
 		INP_INFO_RLOCK(&tcbinfo);
-		LIST_FOREACH(inpb, tcbinfo.listhead, inp_list) {
+		LIST_FOREACH(inpb, tcbinfo.ipi_listhead, inp_list) {
 			if (inpb->inp_vflag & INP_TIMEWAIT)
 				continue;
 			INP_LOCK(inpb);
@@ -835,15 +835,20 @@
  * reporting soft errors (yet - a kqueue filter may be added).
  */
 static struct inpcb *
-tcp_notify(inp, error)
-	struct inpcb *inp;
-	int error;
+tcp_notify(struct inpcb *inp, int error)
 {
-	struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
+	struct tcpcb *tp;
 
 	INP_INFO_WLOCK_ASSERT(&tcbinfo);
 	INP_LOCK_ASSERT(inp);
 
+	if ((inp->inp_vflag & INP_TIMEWAIT) ||
+	    (inp->inp_vflag & INP_DROPPED))
+		return (inp);
+
+	tp = intotcpcb(inp);
+	KASSERT(tp != NULL, ("tcp_notify: tp == NULL"));
+
 	/*
 	 * Ignore some errors if we are hooked up.
 	 * If connection hasn't completed, has retransmitted several times,
@@ -857,8 +862,11 @@
 		return (inp);
 	} else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
 	    tp->t_softerror) {
-		tcp_drop(tp, error);
-		return (struct inpcb *)0;
+		tp = tcp_drop(tp, error);
+		if (tp != NULL)
+			return (inp);
+		else
+			return (NULL);
 	} else {
 		tp->t_softerror = error;
 		return (inp);
@@ -873,7 +881,7 @@
 static int
 tcp_pcblist(SYSCTL_HANDLER_ARGS)
 {
-	int error, i, n;
+	int error, i, m, n, pcb_count;
 	struct inpcb *inp, **inp_list;
 	inp_gen_t gencnt;
 	struct xinpgen xig;
@@ -883,9 +891,10 @@
 	 * resource-intensive to repeat twice on every request.
 	 */
 	if (req->oldptr == NULL) {
+		m = syncache_pcbcount();
 		n = tcbinfo.ipi_count;
 		req->oldidx = 2 * (sizeof xig)
-			+ (n + n/8) * sizeof(struct xtcpcb);
+			+ ((m + n) + n/8) * sizeof(struct xtcpcb);
 		return (0);
 	}
 
@@ -900,26 +909,32 @@
 	n = tcbinfo.ipi_count;
 	INP_INFO_RUNLOCK(&tcbinfo);
 
+	m = syncache_pcbcount();
+
 	error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
-		+ n * sizeof(struct xtcpcb));
+		+ (n + m) * sizeof(struct xtcpcb));
 	if (error != 0)
 		return (error);
 
 	xig.xig_len = sizeof xig;
-	xig.xig_count = n;
+	xig.xig_count = n + m;
 	xig.xig_gen = gencnt;
 	xig.xig_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, &xig, sizeof xig);
 	if (error)
 		return (error);
 
+	error = syncache_pcblist(req, m, &pcb_count);
+	if (error)
+		return (error);
+
 	inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
 	if (inp_list == NULL)
 		return (ENOMEM);
 
 	INP_INFO_RLOCK(&tcbinfo);
-	for (inp = LIST_FIRST(tcbinfo.listhead), i = 0; inp != NULL && i < n;
-	     inp = LIST_NEXT(inp, inp_list)) {
+	for (inp = LIST_FIRST(tcbinfo.ipi_listhead), i = 0; inp != NULL && i
+	    < n; inp = LIST_NEXT(inp, inp_list)) {
 		INP_LOCK(inp);
 		if (inp->inp_gencnt <= gencnt) {
 			/*
@@ -927,10 +942,13 @@
 			 * TCP state changes, is not quite right, but for
 			 * now, better than nothing.
 			 */
-			if (inp->inp_vflag & INP_TIMEWAIT)
-				error = cr_cansee(req->td->td_ucred,
-				    intotw(inp)->tw_cred);
-			else
+			if (inp->inp_vflag & INP_TIMEWAIT) {
+				if (intotw(inp) != NULL)
+					error = cr_cansee(req->td->td_ucred,
+					    intotw(inp)->tw_cred);
+				else
+					error = EINVAL;	/* Skip this inp. */
+			} else
 				error = cr_canseesocket(req->td->td_ucred,
 				    inp->inp_socket);
 			if (error == 0)
@@ -944,9 +962,10 @@
 	error = 0;
 	for (i = 0; i < n; i++) {
 		inp = inp_list[i];
+		INP_LOCK(inp);
 		if (inp->inp_gencnt <= gencnt) {
 			struct xtcpcb xt;
-			caddr_t inp_ppcb;
+			void *inp_ppcb;
 
 			bzero(&xt, sizeof(xt));
 			xt.xt_len = sizeof xt;
@@ -967,8 +986,11 @@
 				xt.xt_socket.xso_protocol = IPPROTO_TCP;
 			}
 			xt.xt_inp.inp_gencnt = inp->inp_gencnt;
+			INP_UNLOCK(inp);
 			error = SYSCTL_OUT(req, &xt, sizeof xt);
-		}
+		} else
+			INP_UNLOCK(inp);
+	
 	}
 	if (!error) {
 		/*
@@ -981,7 +1003,7 @@
 		INP_INFO_RLOCK(&tcbinfo);
 		xig.xig_gen = tcbinfo.ipi_gencnt;
 		xig.xig_sogen = so_gencnt;
-		xig.xig_count = tcbinfo.ipi_count;
+		xig.xig_count = tcbinfo.ipi_count + pcb_count;
 		INP_INFO_RUNLOCK(&tcbinfo);
 		error = SYSCTL_OUT(req, &xig, sizeof xig);
 	}
@@ -990,7 +1012,7 @@
 }
 
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
-	    tcp_pcblist, "S,xtcpcb", "List of active TCP connections");
+    tcp_pcblist, "S,xtcpcb", "List of active TCP connections");
 
 static int
 tcp_getcred(SYSCTL_HANDLER_ARGS)
@@ -1000,7 +1022,7 @@
 	struct inpcb *inp;
 	int error;
 
-	error = suser_cred(req->td->td_ucred, SUSER_ALLOWJAIL);
+	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 	if (error)
 		return (error);
 	error = SYSCTL_IN(req, addrs, sizeof(addrs));
@@ -1044,7 +1066,7 @@
 	struct inpcb *inp;
 	int error, mapped = 0;
 
-	error = suser_cred(req->td->td_ucred, SUSER_ALLOWJAIL);
+	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 	if (error)
 		return (error);
 	error = SYSCTL_IN(req, addrs, sizeof(addrs));
@@ -1102,10 +1124,7 @@
 
 
 void
-tcp_ctlinput(cmd, sa, vip)
-	int cmd;
-	struct sockaddr *sa;
-	void *vip;
+tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 {
 	struct ip *ip = vip;
 	struct tcphdr *th;
@@ -1156,7 +1175,9 @@
 		    ip->ip_src, th->th_sport, 0, NULL);
 		if (inp != NULL)  {
 			INP_LOCK(inp);
-			if (inp->inp_socket != NULL) {
+			if (!(inp->inp_vflag & INP_TIMEWAIT) &&
+			    !(inp->inp_vflag & INP_DROPPED) &&
+			    !(inp->inp_socket == NULL)) {
 				icmp_tcp_seq = htonl(th->th_seq);
 				tp = intotcpcb(inp);
 				if (SEQ_GEQ(icmp_tcp_seq, tp->snd_una) &&
@@ -1194,7 +1215,7 @@
 					     * or route MTU.  tcp_mtudisc()
 					     * will do right thing by itself.
 					     */
-					    if (mtu <= tcp_maxmtu(&inc))
+					    if (mtu <= tcp_maxmtu(&inc, NULL))
 						tcp_hc_updatemtu(&inc, mtu);
 					}
 
@@ -1220,10 +1241,7 @@
 
 #ifdef INET6
 void
-tcp6_ctlinput(cmd, sa, d)
-	int cmd;
-	struct sockaddr *sa;
-	void *d;
+tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
 {
 	struct tcphdr th;
 	struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify;
@@ -1351,15 +1369,14 @@
 static MD5_CTX isn_ctx;
 
 tcp_seq
-tcp_new_isn(tp)
-	struct tcpcb *tp;
+tcp_new_isn(struct tcpcb *tp)
 {
 	u_int32_t md5_buffer[4];
 	tcp_seq new_isn;
 
-	INP_INFO_WLOCK_ASSERT(&tcbinfo);
 	INP_LOCK_ASSERT(tp->t_inpcb);
 
+	ISN_LOCK();
 	/* Seed if this is the first use, reseed if requested. */
 	if ((isn_last_reseed == 0) || ((tcp_isn_reseed_interval > 0) &&
 	     (((u_int)isn_last_reseed + (u_int)tcp_isn_reseed_interval*hz)
@@ -1392,29 +1409,29 @@
 	isn_offset += ISN_STATIC_INCREMENT +
 		(arc4random() & ISN_RANDOM_INCREMENT);
 	new_isn += isn_offset;
+	ISN_UNLOCK();
 	return (new_isn);
 }
 
 /*
- * Increment the offset to the next ISN_BYTES_PER_SECOND / hz boundary
+ * Increment the offset to the next ISN_BYTES_PER_SECOND / 100 boundary
  * to keep time flowing at a relatively constant rate.  If the random
  * increments have already pushed us past the projected offset, do nothing.
  */
 static void
-tcp_isn_tick(xtp)
-	void *xtp;
+tcp_isn_tick(void *xtp)
 {
 	u_int32_t projected_offset;
 
-	INP_INFO_WLOCK(&tcbinfo);
+	ISN_LOCK();
 	projected_offset = isn_offset_old + ISN_BYTES_PER_SECOND / 100;
 
-	if (projected_offset > isn_offset)
+	if (SEQ_GT(projected_offset, isn_offset))
 		isn_offset = projected_offset;
 
 	isn_offset_old = isn_offset;
 	callout_reset(&isn_callout, hz/100, tcp_isn_tick, NULL);
-	INP_INFO_WUNLOCK(&tcbinfo);
+	ISN_UNLOCK();
 }
 
 /*
@@ -1423,20 +1440,26 @@
  * is controlled by the icmp_may_rst sysctl.
  */
 struct inpcb *
-tcp_drop_syn_sent(inp, errno)
-	struct inpcb *inp;
-	int errno;
+tcp_drop_syn_sent(struct inpcb *inp, int errno)
 {
-	struct tcpcb *tp = intotcpcb(inp);
+	struct tcpcb *tp;
 
 	INP_INFO_WLOCK_ASSERT(&tcbinfo);
 	INP_LOCK_ASSERT(inp);
 
-	if (tp != NULL && tp->t_state == TCPS_SYN_SENT) {
-		tcp_drop(tp, errno);
+	if ((inp->inp_vflag & INP_TIMEWAIT) ||
+	    (inp->inp_vflag & INP_DROPPED))
+		return (inp);
+
+	tp = intotcpcb(inp);
+	if (tp->t_state != TCPS_SYN_SENT)
+		return (inp);
+
+	tp = tcp_drop(tp, errno);
+	if (tp != NULL)
+		return (inp);
+	else
 		return (NULL);
-	}
-	return (inp);
 }
 
 /*
@@ -1446,11 +1469,9 @@
  * This duplicates some code in the tcp_mss() function in tcp_input.c.
  */
 struct inpcb *
-tcp_mtudisc(inp, errno)
-	struct inpcb *inp;
-	int errno;
+tcp_mtudisc(struct inpcb *inp, int errno)
 {
-	struct tcpcb *tp = intotcpcb(inp);
+	struct tcpcb *tp;
 	struct socket *so = inp->inp_socket;
 	u_int maxmtu;
 	u_int romtu;
@@ -1460,80 +1481,88 @@
 #endif /* INET6 */
 
 	INP_LOCK_ASSERT(inp);
-	if (tp != NULL) {
+	if ((inp->inp_vflag & INP_TIMEWAIT) ||
+	    (inp->inp_vflag & INP_DROPPED))
+		return (inp);
+
+	tp = intotcpcb(inp);
+	KASSERT(tp != NULL, ("tcp_mtudisc: tp == NULL"));
+
 #ifdef INET6
-		isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0;
+	isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0;
 #endif
-		maxmtu = tcp_hc_getmtu(&inp->inp_inc); /* IPv4 and IPv6 */
-		romtu =
+	maxmtu = tcp_hc_getmtu(&inp->inp_inc); /* IPv4 and IPv6 */
+	romtu =
 #ifdef INET6
-		    isipv6 ? tcp_maxmtu6(&inp->inp_inc) :
+	    isipv6 ? tcp_maxmtu6(&inp->inp_inc, NULL) :
 #endif /* INET6 */
-		    tcp_maxmtu(&inp->inp_inc);
-		if (!maxmtu)
-			maxmtu = romtu;
-		else
-			maxmtu = min(maxmtu, romtu);
-		if (!maxmtu) {
-			tp->t_maxopd = tp->t_maxseg =
+	    tcp_maxmtu(&inp->inp_inc, NULL);
+	if (!maxmtu)
+		maxmtu = romtu;
+	else
+		maxmtu = min(maxmtu, romtu);
+	if (!maxmtu) {
+		tp->t_maxopd = tp->t_maxseg =
 #ifdef INET6
-				isipv6 ? tcp_v6mssdflt :
+			isipv6 ? tcp_v6mssdflt :
 #endif /* INET6 */
-				tcp_mssdflt;
-			return (inp);
-		}
-		mss = maxmtu -
+			tcp_mssdflt;
+		return (inp);
+	}
+	mss = maxmtu -
 #ifdef INET6
-			(isipv6 ?
-			 sizeof(struct ip6_hdr) + sizeof(struct tcphdr) :
+		(isipv6 ? sizeof(struct ip6_hdr) + sizeof(struct tcphdr) :
 #endif /* INET6 */
-			 sizeof(struct tcpiphdr)
+		 sizeof(struct tcpiphdr)
 #ifdef INET6
-			 )
+		 )
 #endif /* INET6 */
-			;
+		;
 
-		/*
-		 * XXX - The above conditional probably violates the TCP
-		 * spec.  The problem is that, since we don't know the
-		 * other end's MSS, we are supposed to use a conservative
-		 * default.  But, if we do that, then MTU discovery will
-		 * never actually take place, because the conservative
-		 * default is much less than the MTUs typically seen
-		 * on the Internet today.  For the moment, we'll sweep
-		 * this under the carpet.
-		 *
-		 * The conservative default might not actually be a problem
-		 * if the only case this occurs is when sending an initial
-		 * SYN with options and data to a host we've never talked
-		 * to before.  Then, they will reply with an MSS value which
-		 * will get recorded and the new parameters should get
-		 * recomputed.  For Further Study.
-		 */
-		if (tp->t_maxopd <= mss)
-			return (inp);
-		tp->t_maxopd = mss;
+	/*
+	 * XXX - The above conditional probably violates the TCP
+	 * spec.  The problem is that, since we don't know the
+	 * other end's MSS, we are supposed to use a conservative
+	 * default.  But, if we do that, then MTU discovery will
+	 * never actually take place, because the conservative
+	 * default is much less than the MTUs typically seen
+	 * on the Internet today.  For the moment, we'll sweep
+	 * this under the carpet.
+	 *
+	 * The conservative default might not actually be a problem
+	 * if the only case this occurs is when sending an initial
+	 * SYN with options and data to a host we've never talked
+	 * to before.  Then, they will reply with an MSS value which
+	 * will get recorded and the new parameters should get
+	 * recomputed.  For Further Study.
+	 */
+	if (tp->t_maxopd <= mss)
+		return (inp);
+	tp->t_maxopd = mss;
 
-		if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
-		    (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)
-			mss -= TCPOLEN_TSTAMP_APPA;
+	if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
+	    (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)
+		mss -= TCPOLEN_TSTAMP_APPA;
 #if	(MCLBYTES & (MCLBYTES - 1)) == 0
-		if (mss > MCLBYTES)
-			mss &= ~(MCLBYTES-1);
+	if (mss > MCLBYTES)
+		mss &= ~(MCLBYTES-1);
 #else
-		if (mss > MCLBYTES)
-			mss = mss / MCLBYTES * MCLBYTES;
+	if (mss > MCLBYTES)
+		mss = mss / MCLBYTES * MCLBYTES;
 #endif
-		if (so->so_snd.sb_hiwat < mss)
-			mss = so->so_snd.sb_hiwat;
+	if (so->so_snd.sb_hiwat < mss)
+		mss = so->so_snd.sb_hiwat;
 
-		tp->t_maxseg = mss;
+	tp->t_maxseg = mss;
 
-		tcpstat.tcps_mturesent++;
-		tp->t_rtttime = 0;
-		tp->snd_nxt = tp->snd_una;
-		tcp_output(tp);
-	}
+	tcpstat.tcps_mturesent++;
+	tp->t_rtttime = 0;
+	tp->snd_nxt = tp->snd_una;
+	tcp_free_sackholes(tp);
+	tp->snd_recover = tp->snd_max;
+	if (tp->t_flags & TF_SACK_PERMIT)
+		EXIT_FASTRECOVERY(tp);
+	tcp_output(tp);
 	return (inp);
 }
 
@@ -1544,8 +1573,7 @@
  * to get the interface MTU.
  */
 u_long
-tcp_maxmtu(inc)
-	struct in_conninfo *inc;
+tcp_maxmtu(struct in_conninfo *inc, int *flags)
 {
 	struct route sro;
 	struct sockaddr_in *dst;
@@ -1568,6 +1596,13 @@
 			maxmtu = ifp->if_mtu;
 		else
 			maxmtu = min(sro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu);
+
+		/* Report additional interface capabilities. */
+		if (flags != NULL) {
+			if (ifp->if_capenable & IFCAP_TSO4 &&
+			    ifp->if_hwassist & CSUM_TSO)
+				*flags |= CSUM_TSO;
+		}
 		RTFREE(sro.ro_rt);
 	}
 	return (maxmtu);
@@ -1575,8 +1610,7 @@
 
 #ifdef INET6
 u_long
-tcp_maxmtu6(inc)
-	struct in_conninfo *inc;
+tcp_maxmtu6(struct in_conninfo *inc, int *flags)
 {
 	struct route_in6 sro6;
 	struct ifnet *ifp;
@@ -1598,6 +1632,13 @@
 		else
 			maxmtu = min(sro6.ro_rt->rt_rmx.rmx_mtu,
 				     IN6_LINKMTU(sro6.ro_rt->rt_ifp));
+
+		/* Report additional interface capabilities. */
+		if (flags != NULL) {
+			if (ifp->if_capenable & IFCAP_TSO6 &&
+			    ifp->if_hwassist & CSUM_TSO)
+				*flags |= CSUM_TSO;
+		}
 		RTFREE(sro6.ro_rt);
 	}
 
@@ -1608,8 +1649,7 @@
 #ifdef IPSEC
 /* compute ESP/AH header size for TCP, including outer IP header. */
 size_t
-ipsec_hdrsiz_tcp(tp)
-	struct tcpcb *tp;
+ipsec_hdrsiz_tcp(struct tcpcb *tp)
 {
 	struct inpcb *inp;
 	struct mbuf *m;
@@ -1647,239 +1687,7 @@
 	m_free(m);
 	return (hdrsiz);
 }
-#endif /*IPSEC*/
-
-/*
- * Move a TCP connection into TIME_WAIT state.
- *    tcbinfo is locked.
- *    inp is locked, and is unlocked before returning.
- */
-void
-tcp_twstart(tp)
-	struct tcpcb *tp;
-{
-	struct tcptw *tw;
-	struct inpcb *inp;
-	int tw_time, acknow;
-	struct socket *so;
-
-	INP_INFO_WLOCK_ASSERT(&tcbinfo);	/* tcp_timer_2msl_reset(). */
-	INP_LOCK_ASSERT(tp->t_inpcb);
-
-	tw = uma_zalloc(tcptw_zone, M_NOWAIT);
-	if (tw == NULL) {
-		tw = tcp_timer_2msl_tw(1);
-		if (tw == NULL) {
-			tcp_close(tp);
-			return;
-		}
-	}
-	inp = tp->t_inpcb;
-	tw->tw_inpcb = inp;
-
-	/*
-	 * Recover last window size sent.
-	 */
-	tw->last_win = (tp->rcv_adv - tp->rcv_nxt) >> tp->rcv_scale;
-
-	/*
-	 * Set t_recent if timestamps are used on the connection.
-	 */
-	if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) ==
-	    (TF_REQ_TSTMP|TF_RCVD_TSTMP))
-		tw->t_recent = tp->ts_recent;
-	else
-		tw->t_recent = 0;
-
-	tw->snd_nxt = tp->snd_nxt;
-	tw->rcv_nxt = tp->rcv_nxt;
-	tw->iss     = tp->iss;
-	tw->irs     = tp->irs;
-	tw->t_starttime = tp->t_starttime;
-	tw->tw_time = 0;
-
-/* XXX
- * If this code will
- * be used for fin-wait-2 state also, then we may need
- * a ts_recent from the last segment.
- */
-	tw_time = 2 * tcp_msl;
-	acknow = tp->t_flags & TF_ACKNOW;
-	tcp_discardcb(tp);
-	so = inp->inp_socket;
-	ACCEPT_LOCK();
-	SOCK_LOCK(so);
-	so->so_pcb = NULL;
-	tw->tw_cred = crhold(so->so_cred);
-	tw->tw_so_options = so->so_options;
-	sotryfree(so);
-	inp->inp_socket = NULL;
-	if (acknow)
-		tcp_twrespond(tw, TH_ACK);
-	inp->inp_ppcb = (caddr_t)tw;
-	inp->inp_vflag |= INP_TIMEWAIT;
-	tcp_timer_2msl_reset(tw, tw_time);
-	INP_UNLOCK(inp);
-}
-
-/*
- * The appromixate rate of ISN increase of Microsoft TCP stacks;
- * the actual rate is slightly higher due to the addition of
- * random positive increments.
- *
- * Most other new OSes use semi-randomized ISN values, so we
- * do not need to worry about them.
- */
-#define MS_ISN_BYTES_PER_SECOND		250000
-
-/*
- * Determine if the ISN we will generate has advanced beyond the last
- * sequence number used by the previous connection.  If so, indicate
- * that it is safe to recycle this tw socket by returning 1.
- *
- * XXXRW: This function should assert the inpcb lock as it does multiple
- * non-atomic reads from the tcptw, but is currently called without it from
- * in_pcb.c:in_pcblookup_local().
- */
-int
-tcp_twrecycleable(struct tcptw *tw)
-{
-	tcp_seq new_iss = tw->iss;
-	tcp_seq new_irs = tw->irs;
-
-	new_iss += (ticks - tw->t_starttime) * (ISN_BYTES_PER_SECOND / hz);
-	new_irs += (ticks - tw->t_starttime) * (MS_ISN_BYTES_PER_SECOND / hz);
-
-	if (SEQ_GT(new_iss, tw->snd_nxt) && SEQ_GT(new_irs, tw->rcv_nxt))
-		return (1);
-	else
-		return (0);
-}
-
-struct tcptw *
-tcp_twclose(struct tcptw *tw, int reuse)
-{
-	struct inpcb *inp;
-
-	inp = tw->tw_inpcb;
-	INP_INFO_WLOCK_ASSERT(&tcbinfo);	/* tcp_timer_2msl_stop(). */
-	INP_LOCK_ASSERT(inp);
-
-	tw->tw_inpcb = NULL;
-	tcp_timer_2msl_stop(tw);
-	inp->inp_ppcb = NULL;
-#ifdef INET6
-	if (inp->inp_vflag & INP_IPV6PROTO)
-		in6_pcbdetach(inp);
-	else
-#endif
-		in_pcbdetach(inp);
-	tcpstat.tcps_closed++;
-	crfree(tw->tw_cred);
-	tw->tw_cred = NULL;
-	if (reuse)
-		return (tw);
-	uma_zfree(tcptw_zone, tw);
-	return (NULL);
-}
-
-int
-tcp_twrespond(struct tcptw *tw, int flags)
-{
-	struct inpcb *inp = tw->tw_inpcb;
-	struct tcphdr *th;
-	struct mbuf *m;
-	struct ip *ip = NULL;
-	u_int8_t *optp;
-	u_int hdrlen, optlen;
-	int error;
-#ifdef INET6
-	struct ip6_hdr *ip6 = NULL;
-	int isipv6 = inp->inp_inc.inc_isipv6;
-#endif
-
-	INP_LOCK_ASSERT(inp);
-
-	m = m_gethdr(M_DONTWAIT, MT_HEADER);
-	if (m == NULL)
-		return (ENOBUFS);
-	m->m_data += max_linkhdr;
-
-#ifdef MAC
-	mac_create_mbuf_from_inpcb(inp, m);
-#endif
-
-#ifdef INET6
-	if (isipv6) {
-		hdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
-		ip6 = mtod(m, struct ip6_hdr *);
-		th = (struct tcphdr *)(ip6 + 1);
-		tcpip_fillheaders(inp, ip6, th);
-	} else
-#endif
-	{
-		hdrlen = sizeof(struct tcpiphdr);
-		ip = mtod(m, struct ip *);
-		th = (struct tcphdr *)(ip + 1);
-		tcpip_fillheaders(inp, ip, th);
-	}
-	optp = (u_int8_t *)(th + 1);
-
-	/*
-	 * Send a timestamp and echo-reply if both our side and our peer
-	 * have sent timestamps in our SYN's and this is not a RST.
-	 */
-	if (tw->t_recent && flags == TH_ACK) {
-		u_int32_t *lp = (u_int32_t *)optp;
-
-		/* Form timestamp option as shown in appendix A of RFC 1323. */
-		*lp++ = htonl(TCPOPT_TSTAMP_HDR);
-		*lp++ = htonl(ticks);
-		*lp   = htonl(tw->t_recent);
-		optp += TCPOLEN_TSTAMP_APPA;
-	}
-
-	optlen = optp - (u_int8_t *)(th + 1);
-
-	m->m_len = hdrlen + optlen;
-	m->m_pkthdr.len = m->m_len;
-
-	KASSERT(max_linkhdr + m->m_len <= MHLEN, ("tcptw: mbuf too small"));
-
-	th->th_seq = htonl(tw->snd_nxt);
-	th->th_ack = htonl(tw->rcv_nxt);
-	th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
-	th->th_flags = flags;
-	th->th_win = htons(tw->last_win);
-
-#ifdef INET6
-	if (isipv6) {
-		th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr),
-		    sizeof(struct tcphdr) + optlen);
-		ip6->ip6_hlim = in6_selecthlim(inp, NULL);
-		error = ip6_output(m, inp->in6p_outputopts, NULL,
-		    (tw->tw_so_options & SO_DONTROUTE), NULL, NULL, inp);
-	} else
-#endif
-	{
-		th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
-		    htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP));
-		m->m_pkthdr.csum_flags = CSUM_TCP;
-		m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
-		ip->ip_len = m->m_pkthdr.len;
-		if (path_mtu_discovery)
-			ip->ip_off |= IP_DF;
-		error = ip_output(m, inp->inp_options, NULL,
-		    ((tw->tw_so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0),
-		    NULL, inp);
-	}
-	if (flags & TH_ACK)
-		tcpstat.tcps_sndacks++;
-	else
-		tcpstat.tcps_sndctrl++;
-	tcpstat.tcps_sndtotal++;
-	return (error);
-}
+#endif /* IPSEC */
 
 /*
  * TCP BANDWIDTH DELAY PRODUCT WINDOW LIMITING
@@ -2140,7 +1948,7 @@
 	/*
 	 * Step 4: Update MD5 hash with shared secret.
 	 */
-	MD5Update(&ctx, _KEYBUF(sav->key_auth), _KEYLEN(sav->key_auth));
+	MD5Update(&ctx, sav->key_auth->key_data, _KEYLEN(sav->key_auth));
 	MD5Final(buf, &ctx);
 
 	key_sa_recordxfer(sav, m);
@@ -2231,11 +2039,21 @@
 	}
 	if (inp != NULL) {
 		INP_LOCK(inp);
-		if ((tw = intotw(inp)) &&
-		    (inp->inp_vflag & INP_TIMEWAIT) != 0) {
-			(void) tcp_twclose(tw, 0);
-		} else if ((tp = intotcpcb(inp)) &&
-		    ((inp->inp_socket->so_options & SO_ACCEPTCONN) == 0)) {
+		if (inp->inp_vflag & INP_TIMEWAIT) {
+			/*
+			 * XXXRW: There currently exists a state where an
+			 * inpcb is present, but its timewait state has been
+			 * discarded.  For now, don't allow dropping of this
+			 * type of inpcb.
+			 */
+			tw = intotw(inp);
+			if (tw != NULL)
+				tcp_twclose(tw, 0);
+			else
+				INP_UNLOCK(inp);
+		} else if (!(inp->inp_vflag & INP_DROPPED) &&
+			   !(inp->inp_socket->so_options & SO_ACCEPTCONN)) {
+			tp = intotcpcb(inp);
 			tp = tcp_drop(tp, ECONNABORTED);
 			if (tp != NULL)
 				INP_UNLOCK(inp);
@@ -2250,3 +2068,97 @@
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DROP, drop,
     CTLTYPE_STRUCT|CTLFLAG_WR|CTLFLAG_SKIP, NULL,
     0, sysctl_drop, "", "Drop TCP connection");
+
+/*
+ * Generate a standardized TCP log line for use throughout the
+ * tcp subsystem.  Memory allocation is done with M_NOWAIT to
+ * allow use in the interrupt context.
+ *
+ * NB: The caller MUST free(s, M_TCPLOG) the returned string.
+ * NB: The function may return NULL if memory allocation failed.
+ *
+ * Due to header inclusion and ordering limitations the struct ip
+ * and ip6_hdr pointers have to be passed as void pointers.
+ */
+char *
+tcp_log_addrs(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr,
+    const void *ip6hdr)
+{
+	char *s, *sp;
+	size_t size;
+	struct ip *ip;
+#ifdef INET6
+	const struct ip6_hdr *ip6;
+
+	ip6 = (const struct ip6_hdr *)ip6hdr;
+#endif /* INET6 */
+	ip = (struct ip *)ip4hdr;
+
+	/*
+	 * The log line looks like this:
+	 * "TCP: [1.2.3.4]:50332 to [1.2.3.4]:80 tcpflags 0x2<SYN>"
+	 */
+	size = sizeof("TCP: []:12345 to []:12345 tcpflags 0x2<>") +
+	    sizeof(PRINT_TH_FLAGS) + 1 +
+#ifdef INET6
+	    2 * INET6_ADDRSTRLEN;
+#else
+	    2 * INET_ADDRSTRLEN;
+#endif /* INET6 */
+
+	/* Is logging enabled? */
+	if (tcp_log_debug == 0 && tcp_log_in_vain == 0)
+		return (NULL);
+
+	s = malloc(size, M_TCPLOG, M_ZERO|M_NOWAIT);
+	if (s == NULL)
+		return (NULL);
+
+	strcat(s, "TCP: [");
+	sp = s + strlen(s);
+
+	if (inc && inc->inc_isipv6 == 0) {
+		inet_ntoa_r(inc->inc_faddr, sp);
+		sp = s + strlen(s);
+		sprintf(sp, "]:%i to [", ntohs(inc->inc_fport));
+		sp = s + strlen(s);
+		inet_ntoa_r(inc->inc_laddr, sp);
+		sp = s + strlen(s);
+		sprintf(sp, "]:%i", ntohs(inc->inc_lport));
+#ifdef INET6
+	} else if (inc) {
+		ip6_sprintf(sp, &inc->inc6_faddr);
+		sp = s + strlen(s);
+		sprintf(sp, "]:%i to [", ntohs(inc->inc_fport));
+		sp = s + strlen(s);
+		ip6_sprintf(sp, &inc->inc6_laddr);
+		sp = s + strlen(s);
+		sprintf(sp, "]:%i", ntohs(inc->inc_lport));
+	} else if (ip6 && th) {
+		ip6_sprintf(sp, &ip6->ip6_src);
+		sp = s + strlen(s);
+		sprintf(sp, "]:%i to [", ntohs(th->th_sport));
+		sp = s + strlen(s);
+		ip6_sprintf(sp, &ip6->ip6_dst);
+		sp = s + strlen(s);
+		sprintf(sp, "]:%i", ntohs(th->th_dport));
+#endif /* INET6 */
+	} else if (ip && th) {
+		inet_ntoa_r(ip->ip_src, sp);
+		sp = s + strlen(s);
+		sprintf(sp, "]:%i to [", ntohs(th->th_sport));
+		sp = s + strlen(s);
+		inet_ntoa_r(ip->ip_dst, sp);
+		sp = s + strlen(s);
+		sprintf(sp, "]:%i", ntohs(th->th_dport));
+	} else {
+		free(s, M_TCPLOG);
+		return (NULL);
+	}
+	sp = s + strlen(s);
+	if (th)
+		sprintf(sp, " tcpflags 0x%b", th->th_flags, PRINT_TH_FLAGS);
+	if (*(s + size - 1) != '\0')
+		panic("%s: string too long", __func__);
+	return (s);
+}
--- /dev/null
+++ sys/netinet/sctputil.c
@@ -0,0 +1,6520 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctputil.c,v 1.37 2005/03/07 23:26:09 itojun Exp $	 */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctputil.c,v 1.65.2.4.2.1 2008/02/02 12:44:13 rwatson Exp $");
+
+#include <netinet/sctp_os.h>
+#include <netinet/sctp_pcb.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_var.h>
+#include <netinet/sctp_sysctl.h>
+#ifdef INET6
+#include <netinet6/sctp6_var.h>
+#endif
+#include <netinet/sctp_header.h>
+#include <netinet/sctp_output.h>
+#include <netinet/sctp_uio.h>
+#include <netinet/sctp_timer.h>
+#include <netinet/sctp_crc32.h>
+#include <netinet/sctp_indata.h>/* for sctp_deliver_data() */
+#include <netinet/sctp_auth.h>
+#include <netinet/sctp_asconf.h>
+#include <netinet/sctp_cc_functions.h>
+
+#define NUMBER_OF_MTU_SIZES 18
+
+
+#ifndef KTR_SCTP
+#define KTR_SCTP KTR_SUBSYS
+#endif
+
+void
+sctp_sblog(struct sockbuf *sb,
+    struct sctp_tcb *stcb, int from, int incr)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	sctp_clog.x.sb.stcb = stcb;
+	sctp_clog.x.sb.so_sbcc = sb->sb_cc;
+	if (stcb)
+		sctp_clog.x.sb.stcb_sbcc = stcb->asoc.sb_cc;
+	else
+		sctp_clog.x.sb.stcb_sbcc = 0;
+	sctp_clog.x.sb.incr = incr;
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_SB,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+}
+
+void
+sctp_log_closing(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int16_t loc)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	sctp_clog.x.close.inp = (void *)inp;
+	sctp_clog.x.close.sctp_flags = inp->sctp_flags;
+	if (stcb) {
+		sctp_clog.x.close.stcb = (void *)stcb;
+		sctp_clog.x.close.state = (uint16_t) stcb->asoc.state;
+	} else {
+		sctp_clog.x.close.stcb = 0;
+		sctp_clog.x.close.state = 0;
+	}
+	sctp_clog.x.close.loc = loc;
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_CLOSE,
+	    0,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+}
+
+
+void
+rto_logging(struct sctp_nets *net, int from)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	sctp_clog.x.rto.net = (void *)net;
+	sctp_clog.x.rto.rtt = net->prev_rtt;
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_RTT,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+
+}
+
+void
+sctp_log_strm_del_alt(struct sctp_tcb *stcb, uint32_t tsn, uint16_t sseq, uint16_t stream, int from)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	sctp_clog.x.strlog.stcb = stcb;
+	sctp_clog.x.strlog.n_tsn = tsn;
+	sctp_clog.x.strlog.n_sseq = sseq;
+	sctp_clog.x.strlog.e_tsn = 0;
+	sctp_clog.x.strlog.e_sseq = 0;
+	sctp_clog.x.strlog.strm = stream;
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_STRM,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+
+}
+
+void
+sctp_log_nagle_event(struct sctp_tcb *stcb, int action)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	sctp_clog.x.nagle.stcb = (void *)stcb;
+	sctp_clog.x.nagle.total_flight = stcb->asoc.total_flight;
+	sctp_clog.x.nagle.total_in_queue = stcb->asoc.total_output_queue_size;
+	sctp_clog.x.nagle.count_in_queue = stcb->asoc.chunks_on_out_queue;
+	sctp_clog.x.nagle.count_in_flight = stcb->asoc.total_flight_count;
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_NAGLE,
+	    action,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+}
+
+
+void
+sctp_log_sack(uint32_t old_cumack, uint32_t cumack, uint32_t tsn, uint16_t gaps, uint16_t dups, int from)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	sctp_clog.x.sack.cumack = cumack;
+	sctp_clog.x.sack.oldcumack = old_cumack;
+	sctp_clog.x.sack.tsn = tsn;
+	sctp_clog.x.sack.numGaps = gaps;
+	sctp_clog.x.sack.numDups = dups;
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_SACK,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+}
+
+void
+sctp_log_map(uint32_t map, uint32_t cum, uint32_t high, int from)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	sctp_clog.x.map.base = map;
+	sctp_clog.x.map.cum = cum;
+	sctp_clog.x.map.high = high;
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_MAP,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+}
+
+void
+sctp_log_fr(uint32_t biggest_tsn, uint32_t biggest_new_tsn, uint32_t tsn,
+    int from)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	sctp_clog.x.fr.largest_tsn = biggest_tsn;
+	sctp_clog.x.fr.largest_new_tsn = biggest_new_tsn;
+	sctp_clog.x.fr.tsn = tsn;
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_FR,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+
+}
+
+
+void
+sctp_log_mb(struct mbuf *m, int from)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	sctp_clog.x.mb.mp = m;
+	sctp_clog.x.mb.mbuf_flags = (uint8_t) (SCTP_BUF_GET_FLAGS(m));
+	sctp_clog.x.mb.size = (uint16_t) (SCTP_BUF_LEN(m));
+	sctp_clog.x.mb.data = SCTP_BUF_AT(m, 0);
+	if (SCTP_BUF_IS_EXTENDED(m)) {
+		sctp_clog.x.mb.ext = SCTP_BUF_EXTEND_BASE(m);
+		sctp_clog.x.mb.refcnt = (uint8_t) (SCTP_BUF_EXTEND_REFCNT(m));
+	} else {
+		sctp_clog.x.mb.ext = 0;
+		sctp_clog.x.mb.refcnt = 0;
+	}
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_MBUF,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+}
+
+
+void
+sctp_log_strm_del(struct sctp_queued_to_read *control, struct sctp_queued_to_read *poschk,
+    int from)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	if (control == NULL) {
+		SCTP_PRINTF("Gak log of NULL?\n");
+		return;
+	}
+	sctp_clog.x.strlog.stcb = control->stcb;
+	sctp_clog.x.strlog.n_tsn = control->sinfo_tsn;
+	sctp_clog.x.strlog.n_sseq = control->sinfo_ssn;
+	sctp_clog.x.strlog.strm = control->sinfo_stream;
+	if (poschk != NULL) {
+		sctp_clog.x.strlog.e_tsn = poschk->sinfo_tsn;
+		sctp_clog.x.strlog.e_sseq = poschk->sinfo_ssn;
+	} else {
+		sctp_clog.x.strlog.e_tsn = 0;
+		sctp_clog.x.strlog.e_sseq = 0;
+	}
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_STRM,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+
+}
+
+void
+sctp_log_cwnd(struct sctp_tcb *stcb, struct sctp_nets *net, int augment, uint8_t from)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	sctp_clog.x.cwnd.net = net;
+	if (stcb->asoc.send_queue_cnt > 255)
+		sctp_clog.x.cwnd.cnt_in_send = 255;
+	else
+		sctp_clog.x.cwnd.cnt_in_send = stcb->asoc.send_queue_cnt;
+	if (stcb->asoc.stream_queue_cnt > 255)
+		sctp_clog.x.cwnd.cnt_in_str = 255;
+	else
+		sctp_clog.x.cwnd.cnt_in_str = stcb->asoc.stream_queue_cnt;
+
+	if (net) {
+		sctp_clog.x.cwnd.cwnd_new_value = net->cwnd;
+		sctp_clog.x.cwnd.inflight = net->flight_size;
+		sctp_clog.x.cwnd.pseudo_cumack = net->pseudo_cumack;
+		sctp_clog.x.cwnd.meets_pseudo_cumack = net->new_pseudo_cumack;
+		sctp_clog.x.cwnd.need_new_pseudo_cumack = net->find_pseudo_cumack;
+	}
+	if (SCTP_CWNDLOG_PRESEND == from) {
+		sctp_clog.x.cwnd.meets_pseudo_cumack = stcb->asoc.peers_rwnd;
+	}
+	sctp_clog.x.cwnd.cwnd_augment = augment;
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_CWND,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+
+}
+
+void
+sctp_log_lock(struct sctp_inpcb *inp, struct sctp_tcb *stcb, uint8_t from)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	if (inp) {
+		sctp_clog.x.lock.sock = (void *)inp->sctp_socket;
+
+	} else {
+		sctp_clog.x.lock.sock = (void *)NULL;
+	}
+	sctp_clog.x.lock.inp = (void *)inp;
+	if (stcb) {
+		sctp_clog.x.lock.tcb_lock = mtx_owned(&stcb->tcb_mtx);
+	} else {
+		sctp_clog.x.lock.tcb_lock = SCTP_LOCK_UNKNOWN;
+	}
+	if (inp) {
+		sctp_clog.x.lock.inp_lock = mtx_owned(&inp->inp_mtx);
+		sctp_clog.x.lock.create_lock = mtx_owned(&inp->inp_create_mtx);
+	} else {
+		sctp_clog.x.lock.inp_lock = SCTP_LOCK_UNKNOWN;
+		sctp_clog.x.lock.create_lock = SCTP_LOCK_UNKNOWN;
+	}
+	sctp_clog.x.lock.info_lock = rw_wowned(&sctppcbinfo.ipi_ep_mtx);
+	if (inp->sctp_socket) {
+		sctp_clog.x.lock.sock_lock = mtx_owned(&(inp->sctp_socket->so_rcv.sb_mtx));
+		sctp_clog.x.lock.sockrcvbuf_lock = mtx_owned(&(inp->sctp_socket->so_rcv.sb_mtx));
+		sctp_clog.x.lock.socksndbuf_lock = mtx_owned(&(inp->sctp_socket->so_snd.sb_mtx));
+	} else {
+		sctp_clog.x.lock.sock_lock = SCTP_LOCK_UNKNOWN;
+		sctp_clog.x.lock.sockrcvbuf_lock = SCTP_LOCK_UNKNOWN;
+		sctp_clog.x.lock.socksndbuf_lock = SCTP_LOCK_UNKNOWN;
+	}
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_LOCK_EVENT,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+
+}
+
+void
+sctp_log_maxburst(struct sctp_tcb *stcb, struct sctp_nets *net, int error, int burst, uint8_t from)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	sctp_clog.x.cwnd.net = net;
+	sctp_clog.x.cwnd.cwnd_new_value = error;
+	sctp_clog.x.cwnd.inflight = net->flight_size;
+	sctp_clog.x.cwnd.cwnd_augment = burst;
+	if (stcb->asoc.send_queue_cnt > 255)
+		sctp_clog.x.cwnd.cnt_in_send = 255;
+	else
+		sctp_clog.x.cwnd.cnt_in_send = stcb->asoc.send_queue_cnt;
+	if (stcb->asoc.stream_queue_cnt > 255)
+		sctp_clog.x.cwnd.cnt_in_str = 255;
+	else
+		sctp_clog.x.cwnd.cnt_in_str = stcb->asoc.stream_queue_cnt;
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_MAXBURST,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+
+}
+
+void
+sctp_log_rwnd(uint8_t from, uint32_t peers_rwnd, uint32_t snd_size, uint32_t overhead)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	sctp_clog.x.rwnd.rwnd = peers_rwnd;
+	sctp_clog.x.rwnd.send_size = snd_size;
+	sctp_clog.x.rwnd.overhead = overhead;
+	sctp_clog.x.rwnd.new_rwnd = 0;
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_RWND,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+}
+
+void
+sctp_log_rwnd_set(uint8_t from, uint32_t peers_rwnd, uint32_t flight_size, uint32_t overhead, uint32_t a_rwndval)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	sctp_clog.x.rwnd.rwnd = peers_rwnd;
+	sctp_clog.x.rwnd.send_size = flight_size;
+	sctp_clog.x.rwnd.overhead = overhead;
+	sctp_clog.x.rwnd.new_rwnd = a_rwndval;
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_RWND,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+}
+
+void
+sctp_log_mbcnt(uint8_t from, uint32_t total_oq, uint32_t book, uint32_t total_mbcnt_q, uint32_t mbcnt)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	sctp_clog.x.mbcnt.total_queue_size = total_oq;
+	sctp_clog.x.mbcnt.size_change = book;
+	sctp_clog.x.mbcnt.total_queue_mb_size = total_mbcnt_q;
+	sctp_clog.x.mbcnt.mbcnt_change = mbcnt;
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_MBCNT,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+
+}
+
+void
+sctp_misc_ints(uint8_t from, uint32_t a, uint32_t b, uint32_t c, uint32_t d)
+{
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_MISC_EVENT,
+	    from,
+	    a, b, c, d);
+}
+
+void
+sctp_wakeup_log(struct sctp_tcb *stcb, uint32_t cumtsn, uint32_t wake_cnt, int from)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	sctp_clog.x.wake.stcb = (void *)stcb;
+	sctp_clog.x.wake.wake_cnt = wake_cnt;
+	sctp_clog.x.wake.flight = stcb->asoc.total_flight_count;
+	sctp_clog.x.wake.send_q = stcb->asoc.send_queue_cnt;
+	sctp_clog.x.wake.sent_q = stcb->asoc.sent_queue_cnt;
+
+	if (stcb->asoc.stream_queue_cnt < 0xff)
+		sctp_clog.x.wake.stream_qcnt = (uint8_t) stcb->asoc.stream_queue_cnt;
+	else
+		sctp_clog.x.wake.stream_qcnt = 0xff;
+
+	if (stcb->asoc.chunks_on_out_queue < 0xff)
+		sctp_clog.x.wake.chunks_on_oque = (uint8_t) stcb->asoc.chunks_on_out_queue;
+	else
+		sctp_clog.x.wake.chunks_on_oque = 0xff;
+
+	sctp_clog.x.wake.sctpflags = 0;
+	/* set in the defered mode stuff */
+	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_DONT_WAKE)
+		sctp_clog.x.wake.sctpflags |= 1;
+	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_WAKEOUTPUT)
+		sctp_clog.x.wake.sctpflags |= 2;
+	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_WAKEINPUT)
+		sctp_clog.x.wake.sctpflags |= 4;
+	/* what about the sb */
+	if (stcb->sctp_socket) {
+		struct socket *so = stcb->sctp_socket;
+
+		sctp_clog.x.wake.sbflags = (uint8_t) ((so->so_snd.sb_flags & 0x00ff));
+	} else {
+		sctp_clog.x.wake.sbflags = 0xff;
+	}
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_WAKE,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+
+}
+
+void
+sctp_log_block(uint8_t from, struct socket *so, struct sctp_association *asoc, int sendlen)
+{
+	struct sctp_cwnd_log sctp_clog;
+
+	sctp_clog.x.blk.onsb = asoc->total_output_queue_size;
+	sctp_clog.x.blk.send_sent_qcnt = (uint16_t) (asoc->send_queue_cnt + asoc->sent_queue_cnt);
+	sctp_clog.x.blk.peer_rwnd = asoc->peers_rwnd;
+	sctp_clog.x.blk.stream_qcnt = (uint16_t) asoc->stream_queue_cnt;
+	sctp_clog.x.blk.chunks_on_oque = (uint16_t) asoc->chunks_on_out_queue;
+	sctp_clog.x.blk.flight_size = (uint16_t) (asoc->total_flight / 1024);
+	sctp_clog.x.blk.sndlen = sendlen;
+	SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+	    SCTP_LOG_EVENT_BLOCK,
+	    from,
+	    sctp_clog.x.misc.log1,
+	    sctp_clog.x.misc.log2,
+	    sctp_clog.x.misc.log3,
+	    sctp_clog.x.misc.log4);
+
+}
+
+int
+sctp_fill_stat_log(void *optval, size_t *optsize)
+{
+	/* May need to fix this if ktrdump does not work */
+	return (0);
+}
+
+#ifdef SCTP_AUDITING_ENABLED
+uint8_t sctp_audit_data[SCTP_AUDIT_SIZE][2];
+static int sctp_audit_indx = 0;
+
+static
+void
+sctp_print_audit_report(void)
+{
+	int i;
+	int cnt;
+
+	cnt = 0;
+	for (i = sctp_audit_indx; i < SCTP_AUDIT_SIZE; i++) {
+		if ((sctp_audit_data[i][0] == 0xe0) &&
+		    (sctp_audit_data[i][1] == 0x01)) {
+			cnt = 0;
+			SCTP_PRINTF("\n");
+		} else if (sctp_audit_data[i][0] == 0xf0) {
+			cnt = 0;
+			SCTP_PRINTF("\n");
+		} else if ((sctp_audit_data[i][0] == 0xc0) &&
+		    (sctp_audit_data[i][1] == 0x01)) {
+			SCTP_PRINTF("\n");
+			cnt = 0;
+		}
+		SCTP_PRINTF("%2.2x%2.2x ", (uint32_t) sctp_audit_data[i][0],
+		    (uint32_t) sctp_audit_data[i][1]);
+		cnt++;
+		if ((cnt % 14) == 0)
+			SCTP_PRINTF("\n");
+	}
+	for (i = 0; i < sctp_audit_indx; i++) {
+		if ((sctp_audit_data[i][0] == 0xe0) &&
+		    (sctp_audit_data[i][1] == 0x01)) {
+			cnt = 0;
+			SCTP_PRINTF("\n");
+		} else if (sctp_audit_data[i][0] == 0xf0) {
+			cnt = 0;
+			SCTP_PRINTF("\n");
+		} else if ((sctp_audit_data[i][0] == 0xc0) &&
+		    (sctp_audit_data[i][1] == 0x01)) {
+			SCTP_PRINTF("\n");
+			cnt = 0;
+		}
+		SCTP_PRINTF("%2.2x%2.2x ", (uint32_t) sctp_audit_data[i][0],
+		    (uint32_t) sctp_audit_data[i][1]);
+		cnt++;
+		if ((cnt % 14) == 0)
+			SCTP_PRINTF("\n");
+	}
+	SCTP_PRINTF("\n");
+}
+
+void
+sctp_auditing(int from, struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	int resend_cnt, tot_out, rep, tot_book_cnt;
+	struct sctp_nets *lnet;
+	struct sctp_tmit_chunk *chk;
+
+	sctp_audit_data[sctp_audit_indx][0] = 0xAA;
+	sctp_audit_data[sctp_audit_indx][1] = 0x000000ff & from;
+	sctp_audit_indx++;
+	if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+		sctp_audit_indx = 0;
+	}
+	if (inp == NULL) {
+		sctp_audit_data[sctp_audit_indx][0] = 0xAF;
+		sctp_audit_data[sctp_audit_indx][1] = 0x01;
+		sctp_audit_indx++;
+		if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+			sctp_audit_indx = 0;
+		}
+		return;
+	}
+	if (stcb == NULL) {
+		sctp_audit_data[sctp_audit_indx][0] = 0xAF;
+		sctp_audit_data[sctp_audit_indx][1] = 0x02;
+		sctp_audit_indx++;
+		if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+			sctp_audit_indx = 0;
+		}
+		return;
+	}
+	sctp_audit_data[sctp_audit_indx][0] = 0xA1;
+	sctp_audit_data[sctp_audit_indx][1] =
+	    (0x000000ff & stcb->asoc.sent_queue_retran_cnt);
+	sctp_audit_indx++;
+	if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+		sctp_audit_indx = 0;
+	}
+	rep = 0;
+	tot_book_cnt = 0;
+	resend_cnt = tot_out = 0;
+	TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
+		if (chk->sent == SCTP_DATAGRAM_RESEND) {
+			resend_cnt++;
+		} else if (chk->sent < SCTP_DATAGRAM_RESEND) {
+			tot_out += chk->book_size;
+			tot_book_cnt++;
+		}
+	}
+	if (resend_cnt != stcb->asoc.sent_queue_retran_cnt) {
+		sctp_audit_data[sctp_audit_indx][0] = 0xAF;
+		sctp_audit_data[sctp_audit_indx][1] = 0xA1;
+		sctp_audit_indx++;
+		if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+			sctp_audit_indx = 0;
+		}
+		SCTP_PRINTF("resend_cnt:%d asoc-tot:%d\n",
+		    resend_cnt, stcb->asoc.sent_queue_retran_cnt);
+		rep = 1;
+		stcb->asoc.sent_queue_retran_cnt = resend_cnt;
+		sctp_audit_data[sctp_audit_indx][0] = 0xA2;
+		sctp_audit_data[sctp_audit_indx][1] =
+		    (0x000000ff & stcb->asoc.sent_queue_retran_cnt);
+		sctp_audit_indx++;
+		if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+			sctp_audit_indx = 0;
+		}
+	}
+	if (tot_out != stcb->asoc.total_flight) {
+		sctp_audit_data[sctp_audit_indx][0] = 0xAF;
+		sctp_audit_data[sctp_audit_indx][1] = 0xA2;
+		sctp_audit_indx++;
+		if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+			sctp_audit_indx = 0;
+		}
+		rep = 1;
+		SCTP_PRINTF("tot_flt:%d asoc_tot:%d\n", tot_out,
+		    (int)stcb->asoc.total_flight);
+		stcb->asoc.total_flight = tot_out;
+	}
+	if (tot_book_cnt != stcb->asoc.total_flight_count) {
+		sctp_audit_data[sctp_audit_indx][0] = 0xAF;
+		sctp_audit_data[sctp_audit_indx][1] = 0xA5;
+		sctp_audit_indx++;
+		if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+			sctp_audit_indx = 0;
+		}
+		rep = 1;
+		SCTP_PRINTF("tot_flt_book:%d\n", tot_book);
+
+		stcb->asoc.total_flight_count = tot_book_cnt;
+	}
+	tot_out = 0;
+	TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
+		tot_out += lnet->flight_size;
+	}
+	if (tot_out != stcb->asoc.total_flight) {
+		sctp_audit_data[sctp_audit_indx][0] = 0xAF;
+		sctp_audit_data[sctp_audit_indx][1] = 0xA3;
+		sctp_audit_indx++;
+		if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+			sctp_audit_indx = 0;
+		}
+		rep = 1;
+		SCTP_PRINTF("real flight:%d net total was %d\n",
+		    stcb->asoc.total_flight, tot_out);
+		/* now corrective action */
+		TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
+
+			tot_out = 0;
+			TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
+				if ((chk->whoTo == lnet) &&
+				    (chk->sent < SCTP_DATAGRAM_RESEND)) {
+					tot_out += chk->book_size;
+				}
+			}
+			if (lnet->flight_size != tot_out) {
+				SCTP_PRINTF("net:%x flight was %d corrected to %d\n",
+				    (uint32_t) lnet, lnet->flight_size,
+				    tot_out);
+				lnet->flight_size = tot_out;
+			}
+		}
+	}
+	if (rep) {
+		sctp_print_audit_report();
+	}
+}
+
+void
+sctp_audit_log(uint8_t ev, uint8_t fd)
+{
+
+	sctp_audit_data[sctp_audit_indx][0] = ev;
+	sctp_audit_data[sctp_audit_indx][1] = fd;
+	sctp_audit_indx++;
+	if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+		sctp_audit_indx = 0;
+	}
+}
+
+#endif
+
+/*
+ * a list of sizes based on typical mtu's, used only if next hop size not
+ * returned.
+ */
+static int sctp_mtu_sizes[] = {
+	68,
+	296,
+	508,
+	512,
+	544,
+	576,
+	1006,
+	1492,
+	1500,
+	1536,
+	2002,
+	2048,
+	4352,
+	4464,
+	8166,
+	17914,
+	32000,
+	65535
+};
+
+void
+sctp_stop_timers_for_shutdown(struct sctp_tcb *stcb)
+{
+	struct sctp_association *asoc;
+	struct sctp_nets *net;
+
+	asoc = &stcb->asoc;
+
+	(void)SCTP_OS_TIMER_STOP(&asoc->hb_timer.timer);
+	(void)SCTP_OS_TIMER_STOP(&asoc->dack_timer.timer);
+	(void)SCTP_OS_TIMER_STOP(&asoc->strreset_timer.timer);
+	(void)SCTP_OS_TIMER_STOP(&asoc->asconf_timer.timer);
+	(void)SCTP_OS_TIMER_STOP(&asoc->autoclose_timer.timer);
+	(void)SCTP_OS_TIMER_STOP(&asoc->delayed_event_timer.timer);
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		(void)SCTP_OS_TIMER_STOP(&net->fr_timer.timer);
+		(void)SCTP_OS_TIMER_STOP(&net->pmtu_timer.timer);
+	}
+}
+
+int
+find_next_best_mtu(int totsz)
+{
+	int i, perfer;
+
+	/*
+	 * if we are in here we must find the next best fit based on the
+	 * size of the dg that failed to be sent.
+	 */
+	perfer = 0;
+	for (i = 0; i < NUMBER_OF_MTU_SIZES; i++) {
+		if (totsz < sctp_mtu_sizes[i]) {
+			perfer = i - 1;
+			if (perfer < 0)
+				perfer = 0;
+			break;
+		}
+	}
+	return (sctp_mtu_sizes[perfer]);
+}
+
+void
+sctp_fill_random_store(struct sctp_pcb *m)
+{
+	/*
+	 * Here we use the MD5/SHA-1 to hash with our good randomNumbers and
+	 * our counter. The result becomes our good random numbers and we
+	 * then setup to give these out. Note that we do no locking to
+	 * protect this. This is ok, since if competing folks call this we
+	 * will get more gobbled gook in the random store which is what we
+	 * want. There is a danger that two guys will use the same random
+	 * numbers, but thats ok too since that is random as well :->
+	 */
+	m->store_at = 0;
+	(void)sctp_hmac(SCTP_HMAC, (uint8_t *) m->random_numbers,
+	    sizeof(m->random_numbers), (uint8_t *) & m->random_counter,
+	    sizeof(m->random_counter), (uint8_t *) m->random_store);
+	m->random_counter++;
+}
+
+uint32_t
+sctp_select_initial_TSN(struct sctp_pcb *inp)
+{
+	/*
+	 * A true implementation should use random selection process to get
+	 * the initial stream sequence number, using RFC1750 as a good
+	 * guideline
+	 */
+	uint32_t x, *xp;
+	uint8_t *p;
+	int store_at, new_store;
+
+	if (inp->initial_sequence_debug != 0) {
+		uint32_t ret;
+
+		ret = inp->initial_sequence_debug;
+		inp->initial_sequence_debug++;
+		return (ret);
+	}
+retry:
+	store_at = inp->store_at;
+	new_store = store_at + sizeof(uint32_t);
+	if (new_store >= (SCTP_SIGNATURE_SIZE - 3)) {
+		new_store = 0;
+	}
+	if (!atomic_cmpset_int(&inp->store_at, store_at, new_store)) {
+		goto retry;
+	}
+	if (new_store == 0) {
+		/* Refill the random store */
+		sctp_fill_random_store(inp);
+	}
+	p = &inp->random_store[store_at];
+	xp = (uint32_t *) p;
+	x = *xp;
+	return (x);
+}
+
+uint32_t
+sctp_select_a_tag(struct sctp_inpcb *inp, int save_in_twait)
+{
+	u_long x, not_done;
+	struct timeval now;
+
+	(void)SCTP_GETTIME_TIMEVAL(&now);
+	not_done = 1;
+	while (not_done) {
+		x = sctp_select_initial_TSN(&inp->sctp_ep);
+		if (x == 0) {
+			/* we never use 0 */
+			continue;
+		}
+		if (sctp_is_vtag_good(inp, x, &now, save_in_twait)) {
+			not_done = 0;
+		}
+	}
+	return (x);
+}
+
+int
+sctp_init_asoc(struct sctp_inpcb *m, struct sctp_tcb *stcb,
+    int for_a_init, uint32_t override_tag, uint32_t vrf_id)
+{
+	struct sctp_association *asoc;
+
+	/*
+	 * Anything set to zero is taken care of by the allocation routine's
+	 * bzero
+	 */
+
+	/*
+	 * Up front select what scoping to apply on addresses I tell my peer
+	 * Not sure what to do with these right now, we will need to come up
+	 * with a way to set them. We may need to pass them through from the
+	 * caller in the sctp_aloc_assoc() function.
+	 */
+	int i;
+
+	asoc = &stcb->asoc;
+	/* init all variables to a known value. */
+	SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_INUSE);
+	asoc->max_burst = m->sctp_ep.max_burst;
+	asoc->heart_beat_delay = TICKS_TO_MSEC(m->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT]);
+	asoc->cookie_life = m->sctp_ep.def_cookie_life;
+	asoc->sctp_cmt_on_off = (uint8_t) sctp_cmt_on_off;
+	/* JRS 5/21/07 - Init CMT PF variables */
+	asoc->sctp_cmt_pf = (uint8_t) sctp_cmt_pf;
+	asoc->sctp_frag_point = m->sctp_frag_point;
+#ifdef INET
+	asoc->default_tos = m->ip_inp.inp.inp_ip_tos;
+#else
+	asoc->default_tos = 0;
+#endif
+
+#ifdef INET6
+	asoc->default_flowlabel = ((struct in6pcb *)m)->in6p_flowinfo;
+#else
+	asoc->default_flowlabel = 0;
+#endif
+	asoc->sb_send_resv = 0;
+	if (override_tag) {
+		struct timeval now;
+
+		(void)SCTP_GETTIME_TIMEVAL(&now);
+		if (sctp_is_in_timewait(override_tag)) {
+			/*
+			 * It must be in the time-wait hash, we put it there
+			 * when we aloc one. If not the peer is playing
+			 * games.
+			 */
+			asoc->my_vtag = override_tag;
+		} else {
+			SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ENOMEM);
+			panic("Huh is_in_timewait fails");
+			return (ENOMEM);
+		}
+
+	} else {
+		asoc->my_vtag = sctp_select_a_tag(m, 1);
+	}
+	/* Get the nonce tags */
+	asoc->my_vtag_nonce = sctp_select_a_tag(m, 0);
+	asoc->peer_vtag_nonce = sctp_select_a_tag(m, 0);
+	asoc->vrf_id = vrf_id;
+
+	if (sctp_is_feature_on(m, SCTP_PCB_FLAGS_DONOT_HEARTBEAT))
+		asoc->hb_is_disabled = 1;
+	else
+		asoc->hb_is_disabled = 0;
+
+#ifdef SCTP_ASOCLOG_OF_TSNS
+	asoc->tsn_in_at = 0;
+	asoc->tsn_out_at = 0;
+	asoc->tsn_in_wrapped = 0;
+	asoc->tsn_out_wrapped = 0;
+	asoc->cumack_log_at = 0;
+	asoc->cumack_log_atsnt = 0;
+#endif
+#ifdef SCTP_FS_SPEC_LOG
+	asoc->fs_index = 0;
+#endif
+	asoc->refcnt = 0;
+	asoc->assoc_up_sent = 0;
+	asoc->assoc_id = asoc->my_vtag;
+	asoc->asconf_seq_out = asoc->str_reset_seq_out = asoc->init_seq_number = asoc->sending_seq =
+	    sctp_select_initial_TSN(&m->sctp_ep);
+	/* we are optimisitic here */
+	asoc->peer_supports_pktdrop = 1;
+
+	asoc->sent_queue_retran_cnt = 0;
+
+	/* for CMT */
+	asoc->last_net_data_came_from = NULL;
+
+	/* This will need to be adjusted */
+	asoc->last_cwr_tsn = asoc->init_seq_number - 1;
+	asoc->last_acked_seq = asoc->init_seq_number - 1;
+	asoc->advanced_peer_ack_point = asoc->last_acked_seq;
+	asoc->asconf_seq_in = asoc->last_acked_seq;
+
+	/* here we are different, we hold the next one we expect */
+	asoc->str_reset_seq_in = asoc->last_acked_seq + 1;
+
+	asoc->initial_init_rto_max = m->sctp_ep.initial_init_rto_max;
+	asoc->initial_rto = m->sctp_ep.initial_rto;
+
+	asoc->max_init_times = m->sctp_ep.max_init_times;
+	asoc->max_send_times = m->sctp_ep.max_send_times;
+	asoc->def_net_failure = m->sctp_ep.def_net_failure;
+	asoc->free_chunk_cnt = 0;
+
+	asoc->iam_blocking = 0;
+	/* ECN Nonce initialization */
+	asoc->context = m->sctp_context;
+	asoc->def_send = m->def_send;
+	asoc->ecn_nonce_allowed = 0;
+	asoc->receiver_nonce_sum = 1;
+	asoc->nonce_sum_expect_base = 1;
+	asoc->nonce_sum_check = 1;
+	asoc->nonce_resync_tsn = 0;
+	asoc->nonce_wait_for_ecne = 0;
+	asoc->nonce_wait_tsn = 0;
+	asoc->delayed_ack = TICKS_TO_MSEC(m->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV]);
+	asoc->sack_freq = m->sctp_ep.sctp_sack_freq;
+	asoc->pr_sctp_cnt = 0;
+	asoc->total_output_queue_size = 0;
+
+	if (m->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+		struct in6pcb *inp6;
+
+		/* Its a V6 socket */
+		inp6 = (struct in6pcb *)m;
+		asoc->ipv6_addr_legal = 1;
+		/* Now look at the binding flag to see if V4 will be legal */
+		if (SCTP_IPV6_V6ONLY(inp6) == 0) {
+			asoc->ipv4_addr_legal = 1;
+		} else {
+			/* V4 addresses are NOT legal on the association */
+			asoc->ipv4_addr_legal = 0;
+		}
+	} else {
+		/* Its a V4 socket, no - V6 */
+		asoc->ipv4_addr_legal = 1;
+		asoc->ipv6_addr_legal = 0;
+	}
+
+	asoc->my_rwnd = max(SCTP_SB_LIMIT_RCV(m->sctp_socket), SCTP_MINIMAL_RWND);
+	asoc->peers_rwnd = SCTP_SB_LIMIT_RCV(m->sctp_socket);
+
+	asoc->smallest_mtu = m->sctp_frag_point;
+#ifdef SCTP_PRINT_FOR_B_AND_M
+	SCTP_PRINTF("smallest_mtu init'd with asoc to :%d\n",
+	    asoc->smallest_mtu);
+#endif
+	asoc->minrto = m->sctp_ep.sctp_minrto;
+	asoc->maxrto = m->sctp_ep.sctp_maxrto;
+
+	asoc->locked_on_sending = NULL;
+	asoc->stream_locked_on = 0;
+	asoc->ecn_echo_cnt_onq = 0;
+	asoc->stream_locked = 0;
+
+	asoc->send_sack = 1;
+
+	LIST_INIT(&asoc->sctp_restricted_addrs);
+
+	TAILQ_INIT(&asoc->nets);
+	TAILQ_INIT(&asoc->pending_reply_queue);
+	TAILQ_INIT(&asoc->asconf_ack_sent);
+	/* Setup to fill the hb random cache at first HB */
+	asoc->hb_random_idx = 4;
+
+	asoc->sctp_autoclose_ticks = m->sctp_ep.auto_close_time;
+
+	/*
+	 * JRS - Pick the default congestion control module based on the
+	 * sysctl.
+	 */
+	switch (m->sctp_ep.sctp_default_cc_module) {
+		/* JRS - Standard TCP congestion control */
+	case SCTP_CC_RFC2581:
+		{
+			stcb->asoc.congestion_control_module = SCTP_CC_RFC2581;
+			stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_set_initial_cc_param;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_cwnd_update_after_sack;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_cwnd_update_after_fr;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_cwnd_update_after_timeout;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_cwnd_update_after_ecn_echo;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_cwnd_update_after_fr_timer;
+			break;
+		}
+		/* JRS - High Speed TCP congestion control (Floyd) */
+	case SCTP_CC_HSTCP:
+		{
+			stcb->asoc.congestion_control_module = SCTP_CC_HSTCP;
+			stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_set_initial_cc_param;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_hs_cwnd_update_after_sack;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_hs_cwnd_update_after_fr;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_cwnd_update_after_timeout;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_cwnd_update_after_ecn_echo;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_cwnd_update_after_fr_timer;
+			break;
+		}
+		/* JRS - HTCP congestion control */
+	case SCTP_CC_HTCP:
+		{
+			stcb->asoc.congestion_control_module = SCTP_CC_HTCP;
+			stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_htcp_set_initial_cc_param;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_htcp_cwnd_update_after_sack;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_htcp_cwnd_update_after_fr;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_htcp_cwnd_update_after_timeout;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_htcp_cwnd_update_after_ecn_echo;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_htcp_cwnd_update_after_fr_timer;
+			break;
+		}
+		/* JRS - By default, use RFC2581 */
+	default:
+		{
+			stcb->asoc.congestion_control_module = SCTP_CC_RFC2581;
+			stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_set_initial_cc_param;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_cwnd_update_after_sack;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_cwnd_update_after_fr;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_cwnd_update_after_timeout;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_cwnd_update_after_ecn_echo;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output;
+			stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_cwnd_update_after_fr_timer;
+			break;
+		}
+	}
+
+	/*
+	 * Now the stream parameters, here we allocate space for all streams
+	 * that we request by default.
+	 */
+	asoc->streamoutcnt = asoc->pre_open_streams =
+	    m->sctp_ep.pre_open_stream_count;
+	SCTP_MALLOC(asoc->strmout, struct sctp_stream_out *,
+	    asoc->streamoutcnt * sizeof(struct sctp_stream_out),
+	    SCTP_M_STRMO);
+	if (asoc->strmout == NULL) {
+		/* big trouble no memory */
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ENOMEM);
+		return (ENOMEM);
+	}
+	for (i = 0; i < asoc->streamoutcnt; i++) {
+		/*
+		 * inbound side must be set to 0xffff, also NOTE when we get
+		 * the INIT-ACK back (for INIT sender) we MUST reduce the
+		 * count (streamoutcnt) but first check if we sent to any of
+		 * the upper streams that were dropped (if some were). Those
+		 * that were dropped must be notified to the upper layer as
+		 * failed to send.
+		 */
+		asoc->strmout[i].next_sequence_sent = 0x0;
+		TAILQ_INIT(&asoc->strmout[i].outqueue);
+		asoc->strmout[i].stream_no = i;
+		asoc->strmout[i].last_msg_incomplete = 0;
+		asoc->strmout[i].next_spoke.tqe_next = 0;
+		asoc->strmout[i].next_spoke.tqe_prev = 0;
+	}
+	/* Now the mapping array */
+	asoc->mapping_array_size = SCTP_INITIAL_MAPPING_ARRAY;
+	SCTP_MALLOC(asoc->mapping_array, uint8_t *, asoc->mapping_array_size,
+	    SCTP_M_MAP);
+	if (asoc->mapping_array == NULL) {
+		SCTP_FREE(asoc->strmout, SCTP_M_STRMO);
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ENOMEM);
+		return (ENOMEM);
+	}
+	memset(asoc->mapping_array, 0, asoc->mapping_array_size);
+	/* Now the init of the other outqueues */
+	TAILQ_INIT(&asoc->free_chunks);
+	TAILQ_INIT(&asoc->out_wheel);
+	TAILQ_INIT(&asoc->control_send_queue);
+	TAILQ_INIT(&asoc->send_queue);
+	TAILQ_INIT(&asoc->sent_queue);
+	TAILQ_INIT(&asoc->reasmqueue);
+	TAILQ_INIT(&asoc->resetHead);
+	asoc->max_inbound_streams = m->sctp_ep.max_open_streams_intome;
+	TAILQ_INIT(&asoc->asconf_queue);
+	/* authentication fields */
+	asoc->authinfo.random = NULL;
+	asoc->authinfo.assoc_key = NULL;
+	asoc->authinfo.assoc_keyid = 0;
+	asoc->authinfo.recv_key = NULL;
+	asoc->authinfo.recv_keyid = 0;
+	LIST_INIT(&asoc->shared_keys);
+	asoc->marked_retrans = 0;
+	asoc->timoinit = 0;
+	asoc->timodata = 0;
+	asoc->timosack = 0;
+	asoc->timoshutdown = 0;
+	asoc->timoheartbeat = 0;
+	asoc->timocookie = 0;
+	asoc->timoshutdownack = 0;
+	(void)SCTP_GETTIME_TIMEVAL(&asoc->start_time);
+	asoc->discontinuity_time = asoc->start_time;
+	/*
+	 * sa_ignore MEMLEAK {memory is put in the assoc mapping array and
+	 * freed later whe the association is freed.
+	 */
+	return (0);
+}
+
+int
+sctp_expand_mapping_array(struct sctp_association *asoc, uint32_t needed)
+{
+	/* mapping array needs to grow */
+	uint8_t *new_array;
+	uint32_t new_size;
+
+	new_size = asoc->mapping_array_size + ((needed + 7) / 8 + SCTP_MAPPING_ARRAY_INCR);
+	SCTP_MALLOC(new_array, uint8_t *, new_size, SCTP_M_MAP);
+	if (new_array == NULL) {
+		/* can't get more, forget it */
+		SCTP_PRINTF("No memory for expansion of SCTP mapping array %d\n",
+		    new_size);
+		return (-1);
+	}
+	memset(new_array, 0, new_size);
+	memcpy(new_array, asoc->mapping_array, asoc->mapping_array_size);
+	SCTP_FREE(asoc->mapping_array, SCTP_M_MAP);
+	asoc->mapping_array = new_array;
+	asoc->mapping_array_size = new_size;
+	return (0);
+}
+
+#if defined(SCTP_USE_THREAD_BASED_ITERATOR)
+static void
+sctp_iterator_work(struct sctp_iterator *it)
+{
+	int iteration_count = 0;
+	int inp_skip = 0;
+
+	SCTP_ITERATOR_LOCK();
+	if (it->inp) {
+		SCTP_INP_DECR_REF(it->inp);
+	}
+	if (it->inp == NULL) {
+		/* iterator is complete */
+done_with_iterator:
+		SCTP_ITERATOR_UNLOCK();
+		if (it->function_atend != NULL) {
+			(*it->function_atend) (it->pointer, it->val);
+		}
+		SCTP_FREE(it, SCTP_M_ITER);
+		return;
+	}
+select_a_new_ep:
+	SCTP_INP_WLOCK(it->inp);
+	while (((it->pcb_flags) &&
+	    ((it->inp->sctp_flags & it->pcb_flags) != it->pcb_flags)) ||
+	    ((it->pcb_features) &&
+	    ((it->inp->sctp_features & it->pcb_features) != it->pcb_features))) {
+		/* endpoint flags or features don't match, so keep looking */
+		if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
+			SCTP_INP_WUNLOCK(it->inp);
+			goto done_with_iterator;
+		}
+		SCTP_INP_WUNLOCK(it->inp);
+		it->inp = LIST_NEXT(it->inp, sctp_list);
+		if (it->inp == NULL) {
+			goto done_with_iterator;
+		}
+		SCTP_INP_WLOCK(it->inp);
+	}
+
+	SCTP_INP_WUNLOCK(it->inp);
+	SCTP_INP_RLOCK(it->inp);
+
+	/* now go through each assoc which is in the desired state */
+	if (it->done_current_ep == 0) {
+		if (it->function_inp != NULL)
+			inp_skip = (*it->function_inp) (it->inp, it->pointer, it->val);
+		it->done_current_ep = 1;
+	}
+	if (it->stcb == NULL) {
+		/* run the per instance function */
+		it->stcb = LIST_FIRST(&it->inp->sctp_asoc_list);
+	}
+	if ((inp_skip) || it->stcb == NULL) {
+		if (it->function_inp_end != NULL) {
+			inp_skip = (*it->function_inp_end) (it->inp,
+			    it->pointer,
+			    it->val);
+		}
+		SCTP_INP_RUNLOCK(it->inp);
+		goto no_stcb;
+	}
+	while (it->stcb) {
+		SCTP_TCB_LOCK(it->stcb);
+		if (it->asoc_state && ((it->stcb->asoc.state & it->asoc_state) != it->asoc_state)) {
+			/* not in the right state... keep looking */
+			SCTP_TCB_UNLOCK(it->stcb);
+			goto next_assoc;
+		}
+		/* see if we have limited out the iterator loop */
+		iteration_count++;
+		if (iteration_count > SCTP_ITERATOR_MAX_AT_ONCE) {
+			/* Pause to let others grab the lock */
+			atomic_add_int(&it->stcb->asoc.refcnt, 1);
+			SCTP_TCB_UNLOCK(it->stcb);
+
+			SCTP_INP_INCR_REF(it->inp);
+			SCTP_INP_RUNLOCK(it->inp);
+			SCTP_ITERATOR_UNLOCK();
+			SCTP_ITERATOR_LOCK();
+			SCTP_INP_RLOCK(it->inp);
+
+			SCTP_INP_DECR_REF(it->inp);
+			SCTP_TCB_LOCK(it->stcb);
+			atomic_add_int(&it->stcb->asoc.refcnt, -1);
+			iteration_count = 0;
+		}
+		/* run function on this one */
+		(*it->function_assoc) (it->inp, it->stcb, it->pointer, it->val);
+
+		/*
+		 * we lie here, it really needs to have its own type but
+		 * first I must verify that this won't effect things :-0
+		 */
+		if (it->no_chunk_output == 0)
+			sctp_chunk_output(it->inp, it->stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED);
+
+		SCTP_TCB_UNLOCK(it->stcb);
+next_assoc:
+		it->stcb = LIST_NEXT(it->stcb, sctp_tcblist);
+		if (it->stcb == NULL) {
+			/* Run last function */
+			if (it->function_inp_end != NULL) {
+				inp_skip = (*it->function_inp_end) (it->inp,
+				    it->pointer,
+				    it->val);
+			}
+		}
+	}
+	SCTP_INP_RUNLOCK(it->inp);
+no_stcb:
+	/* done with all assocs on this endpoint, move on to next endpoint */
+	it->done_current_ep = 0;
+	SCTP_INP_WLOCK(it->inp);
+	SCTP_INP_WUNLOCK(it->inp);
+	if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
+		it->inp = NULL;
+	} else {
+		SCTP_INP_INFO_RLOCK();
+		it->inp = LIST_NEXT(it->inp, sctp_list);
+		SCTP_INP_INFO_RUNLOCK();
+	}
+	if (it->inp == NULL) {
+		goto done_with_iterator;
+	}
+	goto select_a_new_ep;
+}
+
+void
+sctp_iterator_worker(void)
+{
+	struct sctp_iterator *it = NULL;
+
+	/* This function is called with the WQ lock in place */
+
+	sctppcbinfo.iterator_running = 1;
+again:
+	it = TAILQ_FIRST(&sctppcbinfo.iteratorhead);
+	while (it) {
+		/* now lets work on this one */
+		TAILQ_REMOVE(&sctppcbinfo.iteratorhead, it, sctp_nxt_itr);
+		SCTP_IPI_ITERATOR_WQ_UNLOCK();
+		sctp_iterator_work(it);
+		SCTP_IPI_ITERATOR_WQ_LOCK();
+		/* sa_ignore FREED_MEMORY */
+		it = TAILQ_FIRST(&sctppcbinfo.iteratorhead);
+	}
+	if (TAILQ_FIRST(&sctppcbinfo.iteratorhead)) {
+		goto again;
+	}
+	sctppcbinfo.iterator_running = 0;
+	return;
+}
+
+#endif
+
+
+static void
+sctp_handle_addr_wq(void)
+{
+	/* deal with the ADDR wq from the rtsock calls */
+	struct sctp_laddr *wi;
+	struct sctp_asconf_iterator *asc;
+
+	SCTP_MALLOC(asc, struct sctp_asconf_iterator *,
+	    sizeof(struct sctp_asconf_iterator), SCTP_M_ASC_IT);
+	if (asc == NULL) {
+		/* Try later, no memory */
+		sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ,
+		    (struct sctp_inpcb *)NULL,
+		    (struct sctp_tcb *)NULL,
+		    (struct sctp_nets *)NULL);
+		return;
+	}
+	LIST_INIT(&asc->list_of_work);
+	asc->cnt = 0;
+	SCTP_IPI_ITERATOR_WQ_LOCK();
+	wi = LIST_FIRST(&sctppcbinfo.addr_wq);
+	while (wi != NULL) {
+		LIST_REMOVE(wi, sctp_nxt_addr);
+		LIST_INSERT_HEAD(&asc->list_of_work, wi, sctp_nxt_addr);
+		asc->cnt++;
+		wi = LIST_FIRST(&sctppcbinfo.addr_wq);
+	}
+	SCTP_IPI_ITERATOR_WQ_UNLOCK();
+	if (asc->cnt == 0) {
+		SCTP_FREE(asc, SCTP_M_ASC_IT);
+	} else {
+		(void)sctp_initiate_iterator(sctp_asconf_iterator_ep,
+		    sctp_asconf_iterator_stcb,
+		    NULL,	/* No ep end for boundall */
+		    SCTP_PCB_FLAGS_BOUNDALL,
+		    SCTP_PCB_ANY_FEATURES,
+		    SCTP_ASOC_ANY_STATE,
+		    (void *)asc, 0,
+		    sctp_asconf_iterator_end, NULL, 0);
+	}
+}
+
+int retcode = 0;
+int cur_oerr = 0;
+
+void
+sctp_timeout_handler(void *t)
+{
+	struct sctp_inpcb *inp;
+	struct sctp_tcb *stcb;
+	struct sctp_nets *net;
+	struct sctp_timer *tmr;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	struct socket *so;
+
+#endif
+	int did_output;
+	struct sctp_iterator *it = NULL;
+
+	tmr = (struct sctp_timer *)t;
+	inp = (struct sctp_inpcb *)tmr->ep;
+	stcb = (struct sctp_tcb *)tmr->tcb;
+	net = (struct sctp_nets *)tmr->net;
+	did_output = 1;
+
+#ifdef SCTP_AUDITING_ENABLED
+	sctp_audit_log(0xF0, (uint8_t) tmr->type);
+	sctp_auditing(3, inp, stcb, net);
+#endif
+
+	/* sanity checks... */
+	if (tmr->self != (void *)tmr) {
+		/*
+		 * SCTP_PRINTF("Stale SCTP timer fired (%p), ignoring...\n",
+		 * tmr);
+		 */
+		return;
+	}
+	tmr->stopped_from = 0xa001;
+	if (!SCTP_IS_TIMER_TYPE_VALID(tmr->type)) {
+		/*
+		 * SCTP_PRINTF("SCTP timer fired with invalid type: 0x%x\n",
+		 * tmr->type);
+		 */
+		return;
+	}
+	tmr->stopped_from = 0xa002;
+	if ((tmr->type != SCTP_TIMER_TYPE_ADDR_WQ) && (inp == NULL)) {
+		return;
+	}
+	/* if this is an iterator timeout, get the struct and clear inp */
+	tmr->stopped_from = 0xa003;
+	if (tmr->type == SCTP_TIMER_TYPE_ITERATOR) {
+		it = (struct sctp_iterator *)inp;
+		inp = NULL;
+	}
+	if (inp) {
+		SCTP_INP_INCR_REF(inp);
+		if ((inp->sctp_socket == 0) &&
+		    ((tmr->type != SCTP_TIMER_TYPE_INPKILL) &&
+		    (tmr->type != SCTP_TIMER_TYPE_SHUTDOWN) &&
+		    (tmr->type != SCTP_TIMER_TYPE_SHUTDOWNACK) &&
+		    (tmr->type != SCTP_TIMER_TYPE_SHUTDOWNGUARD) &&
+		    (tmr->type != SCTP_TIMER_TYPE_ASOCKILL))
+		    ) {
+			SCTP_INP_DECR_REF(inp);
+			return;
+		}
+	}
+	tmr->stopped_from = 0xa004;
+	if (stcb) {
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		if (stcb->asoc.state == 0) {
+			atomic_add_int(&stcb->asoc.refcnt, -1);
+			if (inp) {
+				SCTP_INP_DECR_REF(inp);
+			}
+			return;
+		}
+	}
+	tmr->stopped_from = 0xa005;
+	SCTPDBG(SCTP_DEBUG_TIMER1, "Timer type %d goes off\n", tmr->type);
+	if (!SCTP_OS_TIMER_ACTIVE(&tmr->timer)) {
+		if (inp) {
+			SCTP_INP_DECR_REF(inp);
+		}
+		if (stcb) {
+			atomic_add_int(&stcb->asoc.refcnt, -1);
+		}
+		return;
+	}
+	tmr->stopped_from = 0xa006;
+
+	if (stcb) {
+		SCTP_TCB_LOCK(stcb);
+		atomic_add_int(&stcb->asoc.refcnt, -1);
+		if ((tmr->type != SCTP_TIMER_TYPE_ASOCKILL) &&
+		    ((stcb->asoc.state == 0) ||
+		    (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED))) {
+			SCTP_TCB_UNLOCK(stcb);
+			if (inp) {
+				SCTP_INP_DECR_REF(inp);
+			}
+			return;
+		}
+	}
+	/* record in stopped what t-o occured */
+	tmr->stopped_from = tmr->type;
+
+	/* mark as being serviced now */
+	if (SCTP_OS_TIMER_PENDING(&tmr->timer)) {
+		/*
+		 * Callout has been rescheduled.
+		 */
+		goto get_out;
+	}
+	if (!SCTP_OS_TIMER_ACTIVE(&tmr->timer)) {
+		/*
+		 * Not active, so no action.
+		 */
+		goto get_out;
+	}
+	SCTP_OS_TIMER_DEACTIVATE(&tmr->timer);
+
+	/* call the handler for the appropriate timer type */
+	switch (tmr->type) {
+	case SCTP_TIMER_TYPE_ZERO_COPY:
+		if (inp == NULL) {
+			break;
+		}
+		if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE)) {
+			SCTP_ZERO_COPY_EVENT(inp, inp->sctp_socket);
+		}
+		break;
+	case SCTP_TIMER_TYPE_ZCOPY_SENDQ:
+		if (inp == NULL) {
+			break;
+		}
+		if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE)) {
+			SCTP_ZERO_COPY_SENDQ_EVENT(inp, inp->sctp_socket);
+		}
+		break;
+	case SCTP_TIMER_TYPE_ADDR_WQ:
+		sctp_handle_addr_wq();
+		break;
+	case SCTP_TIMER_TYPE_ITERATOR:
+		SCTP_STAT_INCR(sctps_timoiterator);
+		sctp_iterator_timer(it);
+		break;
+	case SCTP_TIMER_TYPE_SEND:
+		if ((stcb == NULL) || (inp == NULL)) {
+			break;
+		}
+		SCTP_STAT_INCR(sctps_timodata);
+		stcb->asoc.timodata++;
+		stcb->asoc.num_send_timers_up--;
+		if (stcb->asoc.num_send_timers_up < 0) {
+			stcb->asoc.num_send_timers_up = 0;
+		}
+		SCTP_TCB_LOCK_ASSERT(stcb);
+		cur_oerr = stcb->asoc.overall_error_count;
+		retcode = sctp_t3rxt_timer(inp, stcb, net);
+		if (retcode) {
+			/* no need to unlock on tcb its gone */
+
+			goto out_decr;
+		}
+		SCTP_TCB_LOCK_ASSERT(stcb);
+#ifdef SCTP_AUDITING_ENABLED
+		sctp_auditing(4, inp, stcb, net);
+#endif
+		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED);
+		if ((stcb->asoc.num_send_timers_up == 0) &&
+		    (stcb->asoc.sent_queue_cnt > 0)
+		    ) {
+			struct sctp_tmit_chunk *chk;
+
+			/*
+			 * safeguard. If there on some on the sent queue
+			 * somewhere but no timers running something is
+			 * wrong... so we start a timer on the first chunk
+			 * on the send queue on whatever net it is sent to.
+			 */
+			chk = TAILQ_FIRST(&stcb->asoc.sent_queue);
+			sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb,
+			    chk->whoTo);
+		}
+		break;
+	case SCTP_TIMER_TYPE_INIT:
+		if ((stcb == NULL) || (inp == NULL)) {
+			break;
+		}
+		SCTP_STAT_INCR(sctps_timoinit);
+		stcb->asoc.timoinit++;
+		if (sctp_t1init_timer(inp, stcb, net)) {
+			/* no need to unlock on tcb its gone */
+			goto out_decr;
+		}
+		/* We do output but not here */
+		did_output = 0;
+		break;
+	case SCTP_TIMER_TYPE_RECV:
+		if ((stcb == NULL) || (inp == NULL)) {
+			break;
+		} {
+			int abort_flag;
+
+			SCTP_STAT_INCR(sctps_timosack);
+			stcb->asoc.timosack++;
+			if (stcb->asoc.cumulative_tsn != stcb->asoc.highest_tsn_inside_map)
+				sctp_sack_check(stcb, 0, 0, &abort_flag);
+			sctp_send_sack(stcb);
+		}
+#ifdef SCTP_AUDITING_ENABLED
+		sctp_auditing(4, inp, stcb, net);
+#endif
+		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_SACK_TMR, SCTP_SO_NOT_LOCKED);
+		break;
+	case SCTP_TIMER_TYPE_SHUTDOWN:
+		if ((stcb == NULL) || (inp == NULL)) {
+			break;
+		}
+		if (sctp_shutdown_timer(inp, stcb, net)) {
+			/* no need to unlock on tcb its gone */
+			goto out_decr;
+		}
+		SCTP_STAT_INCR(sctps_timoshutdown);
+		stcb->asoc.timoshutdown++;
+#ifdef SCTP_AUDITING_ENABLED
+		sctp_auditing(4, inp, stcb, net);
+#endif
+		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_SHUT_TMR, SCTP_SO_NOT_LOCKED);
+		break;
+	case SCTP_TIMER_TYPE_HEARTBEAT:
+		{
+			struct sctp_nets *lnet;
+			int cnt_of_unconf = 0;
+
+			if ((stcb == NULL) || (inp == NULL)) {
+				break;
+			}
+			SCTP_STAT_INCR(sctps_timoheartbeat);
+			stcb->asoc.timoheartbeat++;
+			TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
+				if ((lnet->dest_state & SCTP_ADDR_UNCONFIRMED) &&
+				    (lnet->dest_state & SCTP_ADDR_REACHABLE)) {
+					cnt_of_unconf++;
+				}
+			}
+			if (cnt_of_unconf == 0) {
+				if (sctp_heartbeat_timer(inp, stcb, lnet,
+				    cnt_of_unconf)) {
+					/* no need to unlock on tcb its gone */
+					goto out_decr;
+				}
+			}
+#ifdef SCTP_AUDITING_ENABLED
+			sctp_auditing(4, inp, stcb, lnet);
+#endif
+			sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT,
+			    stcb->sctp_ep, stcb, lnet);
+			sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_HB_TMR, SCTP_SO_NOT_LOCKED);
+		}
+		break;
+	case SCTP_TIMER_TYPE_COOKIE:
+		if ((stcb == NULL) || (inp == NULL)) {
+			break;
+		}
+		if (sctp_cookie_timer(inp, stcb, net)) {
+			/* no need to unlock on tcb its gone */
+			goto out_decr;
+		}
+		SCTP_STAT_INCR(sctps_timocookie);
+		stcb->asoc.timocookie++;
+#ifdef SCTP_AUDITING_ENABLED
+		sctp_auditing(4, inp, stcb, net);
+#endif
+		/*
+		 * We consider T3 and Cookie timer pretty much the same with
+		 * respect to where from in chunk_output.
+		 */
+		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED);
+		break;
+	case SCTP_TIMER_TYPE_NEWCOOKIE:
+		{
+			struct timeval tv;
+			int i, secret;
+
+			if (inp == NULL) {
+				break;
+			}
+			SCTP_STAT_INCR(sctps_timosecret);
+			(void)SCTP_GETTIME_TIMEVAL(&tv);
+			SCTP_INP_WLOCK(inp);
+			inp->sctp_ep.time_of_secret_change = tv.tv_sec;
+			inp->sctp_ep.last_secret_number =
+			    inp->sctp_ep.current_secret_number;
+			inp->sctp_ep.current_secret_number++;
+			if (inp->sctp_ep.current_secret_number >=
+			    SCTP_HOW_MANY_SECRETS) {
+				inp->sctp_ep.current_secret_number = 0;
+			}
+			secret = (int)inp->sctp_ep.current_secret_number;
+			for (i = 0; i < SCTP_NUMBER_OF_SECRETS; i++) {
+				inp->sctp_ep.secret_key[secret][i] =
+				    sctp_select_initial_TSN(&inp->sctp_ep);
+			}
+			SCTP_INP_WUNLOCK(inp);
+			sctp_timer_start(SCTP_TIMER_TYPE_NEWCOOKIE, inp, stcb, net);
+		}
+		did_output = 0;
+		break;
+	case SCTP_TIMER_TYPE_PATHMTURAISE:
+		if ((stcb == NULL) || (inp == NULL)) {
+			break;
+		}
+		SCTP_STAT_INCR(sctps_timopathmtu);
+		sctp_pathmtu_timer(inp, stcb, net);
+		did_output = 0;
+		break;
+	case SCTP_TIMER_TYPE_SHUTDOWNACK:
+		if ((stcb == NULL) || (inp == NULL)) {
+			break;
+		}
+		if (sctp_shutdownack_timer(inp, stcb, net)) {
+			/* no need to unlock on tcb its gone */
+			goto out_decr;
+		}
+		SCTP_STAT_INCR(sctps_timoshutdownack);
+		stcb->asoc.timoshutdownack++;
+#ifdef SCTP_AUDITING_ENABLED
+		sctp_auditing(4, inp, stcb, net);
+#endif
+		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_SHUT_ACK_TMR, SCTP_SO_NOT_LOCKED);
+		break;
+	case SCTP_TIMER_TYPE_SHUTDOWNGUARD:
+		if ((stcb == NULL) || (inp == NULL)) {
+			break;
+		}
+		SCTP_STAT_INCR(sctps_timoshutdownguard);
+		sctp_abort_an_association(inp, stcb,
+		    SCTP_SHUTDOWN_GUARD_EXPIRES, NULL, SCTP_SO_NOT_LOCKED);
+		/* no need to unlock on tcb its gone */
+		goto out_decr;
+
+	case SCTP_TIMER_TYPE_STRRESET:
+		if ((stcb == NULL) || (inp == NULL)) {
+			break;
+		}
+		if (sctp_strreset_timer(inp, stcb, net)) {
+			/* no need to unlock on tcb its gone */
+			goto out_decr;
+		}
+		SCTP_STAT_INCR(sctps_timostrmrst);
+		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_TMR, SCTP_SO_NOT_LOCKED);
+		break;
+	case SCTP_TIMER_TYPE_EARLYFR:
+		/* Need to do FR of things for net */
+		if ((stcb == NULL) || (inp == NULL)) {
+			break;
+		}
+		SCTP_STAT_INCR(sctps_timoearlyfr);
+		sctp_early_fr_timer(inp, stcb, net);
+		break;
+	case SCTP_TIMER_TYPE_ASCONF:
+		if ((stcb == NULL) || (inp == NULL)) {
+			break;
+		}
+		if (sctp_asconf_timer(inp, stcb, net)) {
+			/* no need to unlock on tcb its gone */
+			goto out_decr;
+		}
+		SCTP_STAT_INCR(sctps_timoasconf);
+#ifdef SCTP_AUDITING_ENABLED
+		sctp_auditing(4, inp, stcb, net);
+#endif
+		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_ASCONF_TMR, SCTP_SO_NOT_LOCKED);
+		break;
+	case SCTP_TIMER_TYPE_PRIM_DELETED:
+		if ((stcb == NULL) || (inp == NULL)) {
+			break;
+		}
+		sctp_delete_prim_timer(inp, stcb, net);
+		SCTP_STAT_INCR(sctps_timodelprim);
+		break;
+
+	case SCTP_TIMER_TYPE_AUTOCLOSE:
+		if ((stcb == NULL) || (inp == NULL)) {
+			break;
+		}
+		SCTP_STAT_INCR(sctps_timoautoclose);
+		sctp_autoclose_timer(inp, stcb, net);
+		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_AUTOCLOSE_TMR, SCTP_SO_NOT_LOCKED);
+		did_output = 0;
+		break;
+	case SCTP_TIMER_TYPE_ASOCKILL:
+		if ((stcb == NULL) || (inp == NULL)) {
+			break;
+		}
+		SCTP_STAT_INCR(sctps_timoassockill);
+		/* Can we free it yet? */
+		SCTP_INP_DECR_REF(inp);
+		sctp_timer_stop(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL, SCTP_FROM_SCTPUTIL + SCTP_LOC_1);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		so = SCTP_INP_SO(inp);
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_SOCKET_LOCK(so, 1);
+		SCTP_TCB_LOCK(stcb);
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTPUTIL + SCTP_LOC_2);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+		/*
+		 * free asoc, always unlocks (or destroy's) so prevent
+		 * duplicate unlock or unlock of a free mtx :-0
+		 */
+		stcb = NULL;
+		goto out_no_decr;
+	case SCTP_TIMER_TYPE_INPKILL:
+		SCTP_STAT_INCR(sctps_timoinpkill);
+		if (inp == NULL) {
+			break;
+		}
+		/*
+		 * special case, take away our increment since WE are the
+		 * killer
+		 */
+		SCTP_INP_DECR_REF(inp);
+		sctp_timer_stop(SCTP_TIMER_TYPE_INPKILL, inp, NULL, NULL, SCTP_FROM_SCTPUTIL + SCTP_LOC_3);
+		sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
+		    SCTP_CALLED_DIRECTLY_NOCMPSET);
+		goto out_no_decr;
+	default:
+		SCTPDBG(SCTP_DEBUG_TIMER1, "sctp_timeout_handler:unknown timer %d\n",
+		    tmr->type);
+		break;
+	};
+#ifdef SCTP_AUDITING_ENABLED
+	sctp_audit_log(0xF1, (uint8_t) tmr->type);
+	if (inp)
+		sctp_auditing(5, inp, stcb, net);
+#endif
+	if ((did_output) && stcb) {
+		/*
+		 * Now we need to clean up the control chunk chain if an
+		 * ECNE is on it. It must be marked as UNSENT again so next
+		 * call will continue to send it until such time that we get
+		 * a CWR, to remove it. It is, however, less likely that we
+		 * will find a ecn echo on the chain though.
+		 */
+		sctp_fix_ecn_echo(&stcb->asoc);
+	}
+get_out:
+	if (stcb) {
+		SCTP_TCB_UNLOCK(stcb);
+	}
+out_decr:
+	if (inp) {
+		SCTP_INP_DECR_REF(inp);
+	}
+out_no_decr:
+	SCTPDBG(SCTP_DEBUG_TIMER1, "Timer now complete (type %d)\n",
+	    tmr->type);
+	if (inp) {
+	}
+}
+
+void
+sctp_timer_start(int t_type, struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	int to_ticks;
+	struct sctp_timer *tmr;
+
+	if ((t_type != SCTP_TIMER_TYPE_ADDR_WQ) && (inp == NULL))
+		return;
+
+	to_ticks = 0;
+
+	tmr = NULL;
+	if (stcb) {
+		SCTP_TCB_LOCK_ASSERT(stcb);
+	}
+	switch (t_type) {
+	case SCTP_TIMER_TYPE_ZERO_COPY:
+		tmr = &inp->sctp_ep.zero_copy_timer;
+		to_ticks = SCTP_ZERO_COPY_TICK_DELAY;
+		break;
+	case SCTP_TIMER_TYPE_ZCOPY_SENDQ:
+		tmr = &inp->sctp_ep.zero_copy_sendq_timer;
+		to_ticks = SCTP_ZERO_COPY_SENDQ_TICK_DELAY;
+		break;
+	case SCTP_TIMER_TYPE_ADDR_WQ:
+		/* Only 1 tick away :-) */
+		tmr = &sctppcbinfo.addr_wq_timer;
+		to_ticks = SCTP_ADDRESS_TICK_DELAY;
+		break;
+	case SCTP_TIMER_TYPE_ITERATOR:
+		{
+			struct sctp_iterator *it;
+
+			it = (struct sctp_iterator *)inp;
+			tmr = &it->tmr;
+			to_ticks = SCTP_ITERATOR_TICKS;
+		}
+		break;
+	case SCTP_TIMER_TYPE_SEND:
+		/* Here we use the RTO timer */
+		{
+			int rto_val;
+
+			if ((stcb == NULL) || (net == NULL)) {
+				return;
+			}
+			tmr = &net->rxt_timer;
+			if (net->RTO == 0) {
+				rto_val = stcb->asoc.initial_rto;
+			} else {
+				rto_val = net->RTO;
+			}
+			to_ticks = MSEC_TO_TICKS(rto_val);
+		}
+		break;
+	case SCTP_TIMER_TYPE_INIT:
+		/*
+		 * Here we use the INIT timer default usually about 1
+		 * minute.
+		 */
+		if ((stcb == NULL) || (net == NULL)) {
+			return;
+		}
+		tmr = &net->rxt_timer;
+		if (net->RTO == 0) {
+			to_ticks = MSEC_TO_TICKS(stcb->asoc.initial_rto);
+		} else {
+			to_ticks = MSEC_TO_TICKS(net->RTO);
+		}
+		break;
+	case SCTP_TIMER_TYPE_RECV:
+		/*
+		 * Here we use the Delayed-Ack timer value from the inp
+		 * ususually about 200ms.
+		 */
+		if (stcb == NULL) {
+			return;
+		}
+		tmr = &stcb->asoc.dack_timer;
+		to_ticks = MSEC_TO_TICKS(stcb->asoc.delayed_ack);
+		break;
+	case SCTP_TIMER_TYPE_SHUTDOWN:
+		/* Here we use the RTO of the destination. */
+		if ((stcb == NULL) || (net == NULL)) {
+			return;
+		}
+		if (net->RTO == 0) {
+			to_ticks = MSEC_TO_TICKS(stcb->asoc.initial_rto);
+		} else {
+			to_ticks = MSEC_TO_TICKS(net->RTO);
+		}
+		tmr = &net->rxt_timer;
+		break;
+	case SCTP_TIMER_TYPE_HEARTBEAT:
+		/*
+		 * the net is used here so that we can add in the RTO. Even
+		 * though we use a different timer. We also add the HB timer
+		 * PLUS a random jitter.
+		 */
+		if ((inp == NULL) || (stcb == NULL)) {
+			return;
+		} else {
+			uint32_t rndval;
+			uint8_t this_random;
+			int cnt_of_unconf = 0;
+			struct sctp_nets *lnet;
+
+			TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
+				if ((lnet->dest_state & SCTP_ADDR_UNCONFIRMED) &&
+				    (lnet->dest_state & SCTP_ADDR_REACHABLE)) {
+					cnt_of_unconf++;
+				}
+			}
+			if (cnt_of_unconf) {
+				net = lnet = NULL;
+				(void)sctp_heartbeat_timer(inp, stcb, lnet, cnt_of_unconf);
+			}
+			if (stcb->asoc.hb_random_idx > 3) {
+				rndval = sctp_select_initial_TSN(&inp->sctp_ep);
+				memcpy(stcb->asoc.hb_random_values, &rndval,
+				    sizeof(stcb->asoc.hb_random_values));
+				stcb->asoc.hb_random_idx = 0;
+			}
+			this_random = stcb->asoc.hb_random_values[stcb->asoc.hb_random_idx];
+			stcb->asoc.hb_random_idx++;
+			stcb->asoc.hb_ect_randombit = 0;
+			/*
+			 * this_random will be 0 - 256 ms RTO is in ms.
+			 */
+			if ((stcb->asoc.hb_is_disabled) &&
+			    (cnt_of_unconf == 0)) {
+				return;
+			}
+			if (net) {
+				int delay;
+
+				delay = stcb->asoc.heart_beat_delay;
+				TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
+					if ((lnet->dest_state & SCTP_ADDR_UNCONFIRMED) &&
+					    ((lnet->dest_state & SCTP_ADDR_OUT_OF_SCOPE) == 0) &&
+					    (lnet->dest_state & SCTP_ADDR_REACHABLE)) {
+						delay = 0;
+					}
+				}
+				if (net->RTO == 0) {
+					/* Never been checked */
+					to_ticks = this_random + stcb->asoc.initial_rto + delay;
+				} else {
+					/* set rto_val to the ms */
+					to_ticks = delay + net->RTO + this_random;
+				}
+			} else {
+				if (cnt_of_unconf) {
+					to_ticks = this_random + stcb->asoc.initial_rto;
+				} else {
+					to_ticks = stcb->asoc.heart_beat_delay + this_random + stcb->asoc.initial_rto;
+				}
+			}
+			/*
+			 * Now we must convert the to_ticks that are now in
+			 * ms to ticks.
+			 */
+			to_ticks = MSEC_TO_TICKS(to_ticks);
+			tmr = &stcb->asoc.hb_timer;
+		}
+		break;
+	case SCTP_TIMER_TYPE_COOKIE:
+		/*
+		 * Here we can use the RTO timer from the network since one
+		 * RTT was compelete. If a retran happened then we will be
+		 * using the RTO initial value.
+		 */
+		if ((stcb == NULL) || (net == NULL)) {
+			return;
+		}
+		if (net->RTO == 0) {
+			to_ticks = MSEC_TO_TICKS(stcb->asoc.initial_rto);
+		} else {
+			to_ticks = MSEC_TO_TICKS(net->RTO);
+		}
+		tmr = &net->rxt_timer;
+		break;
+	case SCTP_TIMER_TYPE_NEWCOOKIE:
+		/*
+		 * nothing needed but the endpoint here ususually about 60
+		 * minutes.
+		 */
+		if (inp == NULL) {
+			return;
+		}
+		tmr = &inp->sctp_ep.signature_change;
+		to_ticks = inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_SIGNATURE];
+		break;
+	case SCTP_TIMER_TYPE_ASOCKILL:
+		if (stcb == NULL) {
+			return;
+		}
+		tmr = &stcb->asoc.strreset_timer;
+		to_ticks = MSEC_TO_TICKS(SCTP_ASOC_KILL_TIMEOUT);
+		break;
+	case SCTP_TIMER_TYPE_INPKILL:
+		/*
+		 * The inp is setup to die. We re-use the signature_chage
+		 * timer since that has stopped and we are in the GONE
+		 * state.
+		 */
+		if (inp == NULL) {
+			return;
+		}
+		tmr = &inp->sctp_ep.signature_change;
+		to_ticks = MSEC_TO_TICKS(SCTP_INP_KILL_TIMEOUT);
+		break;
+	case SCTP_TIMER_TYPE_PATHMTURAISE:
+		/*
+		 * Here we use the value found in the EP for PMTU ususually
+		 * about 10 minutes.
+		 */
+		if ((stcb == NULL) || (inp == NULL)) {
+			return;
+		}
+		if (net == NULL) {
+			return;
+		}
+		to_ticks = inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_PMTU];
+		tmr = &net->pmtu_timer;
+		break;
+	case SCTP_TIMER_TYPE_SHUTDOWNACK:
+		/* Here we use the RTO of the destination */
+		if ((stcb == NULL) || (net == NULL)) {
+			return;
+		}
+		if (net->RTO == 0) {
+			to_ticks = MSEC_TO_TICKS(stcb->asoc.initial_rto);
+		} else {
+			to_ticks = MSEC_TO_TICKS(net->RTO);
+		}
+		tmr = &net->rxt_timer;
+		break;
+	case SCTP_TIMER_TYPE_SHUTDOWNGUARD:
+		/*
+		 * Here we use the endpoints shutdown guard timer usually
+		 * about 3 minutes.
+		 */
+		if ((inp == NULL) || (stcb == NULL)) {
+			return;
+		}
+		to_ticks = inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_MAXSHUTDOWN];
+		tmr = &stcb->asoc.shut_guard_timer;
+		break;
+	case SCTP_TIMER_TYPE_STRRESET:
+		/*
+		 * Here the timer comes from the stcb but its value is from
+		 * the net's RTO.
+		 */
+		if ((stcb == NULL) || (net == NULL)) {
+			return;
+		}
+		if (net->RTO == 0) {
+			to_ticks = MSEC_TO_TICKS(stcb->asoc.initial_rto);
+		} else {
+			to_ticks = MSEC_TO_TICKS(net->RTO);
+		}
+		tmr = &stcb->asoc.strreset_timer;
+		break;
+
+	case SCTP_TIMER_TYPE_EARLYFR:
+		{
+			unsigned int msec;
+
+			if ((stcb == NULL) || (net == NULL)) {
+				return;
+			}
+			if (net->flight_size > net->cwnd) {
+				/* no need to start */
+				return;
+			}
+			SCTP_STAT_INCR(sctps_earlyfrstart);
+			if (net->lastsa == 0) {
+				/* Hmm no rtt estimate yet? */
+				msec = stcb->asoc.initial_rto >> 2;
+			} else {
+				msec = ((net->lastsa >> 2) + net->lastsv) >> 1;
+			}
+			if (msec < sctp_early_fr_msec) {
+				msec = sctp_early_fr_msec;
+				if (msec < SCTP_MINFR_MSEC_FLOOR) {
+					msec = SCTP_MINFR_MSEC_FLOOR;
+				}
+			}
+			to_ticks = MSEC_TO_TICKS(msec);
+			tmr = &net->fr_timer;
+		}
+		break;
+	case SCTP_TIMER_TYPE_ASCONF:
+		/*
+		 * Here the timer comes from the stcb but its value is from
+		 * the net's RTO.
+		 */
+		if ((stcb == NULL) || (net == NULL)) {
+			return;
+		}
+		if (net->RTO == 0) {
+			to_ticks = MSEC_TO_TICKS(stcb->asoc.initial_rto);
+		} else {
+			to_ticks = MSEC_TO_TICKS(net->RTO);
+		}
+		tmr = &stcb->asoc.asconf_timer;
+		break;
+	case SCTP_TIMER_TYPE_PRIM_DELETED:
+		if ((stcb == NULL) || (net != NULL)) {
+			return;
+		}
+		to_ticks = MSEC_TO_TICKS(stcb->asoc.initial_rto);
+		tmr = &stcb->asoc.delete_prim_timer;
+		break;
+	case SCTP_TIMER_TYPE_AUTOCLOSE:
+		if (stcb == NULL) {
+			return;
+		}
+		if (stcb->asoc.sctp_autoclose_ticks == 0) {
+			/*
+			 * Really an error since stcb is NOT set to
+			 * autoclose
+			 */
+			return;
+		}
+		to_ticks = stcb->asoc.sctp_autoclose_ticks;
+		tmr = &stcb->asoc.autoclose_timer;
+		break;
+	default:
+		SCTPDBG(SCTP_DEBUG_TIMER1, "%s: Unknown timer type %d\n",
+		    __FUNCTION__, t_type);
+		return;
+		break;
+	};
+	if ((to_ticks <= 0) || (tmr == NULL)) {
+		SCTPDBG(SCTP_DEBUG_TIMER1, "%s: %d:software error to_ticks:%d tmr:%p not set ??\n",
+		    __FUNCTION__, t_type, to_ticks, tmr);
+		return;
+	}
+	if (SCTP_OS_TIMER_PENDING(&tmr->timer)) {
+		/*
+		 * we do NOT allow you to have it already running. if it is
+		 * we leave the current one up unchanged
+		 */
+		return;
+	}
+	/* At this point we can proceed */
+	if (t_type == SCTP_TIMER_TYPE_SEND) {
+		stcb->asoc.num_send_timers_up++;
+	}
+	tmr->stopped_from = 0;
+	tmr->type = t_type;
+	tmr->ep = (void *)inp;
+	tmr->tcb = (void *)stcb;
+	tmr->net = (void *)net;
+	tmr->self = (void *)tmr;
+	tmr->ticks = sctp_get_tick_count();
+	(void)SCTP_OS_TIMER_START(&tmr->timer, to_ticks, sctp_timeout_handler, tmr);
+	return;
+}
+
+void
+sctp_timer_stop(int t_type, struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net, uint32_t from)
+{
+	struct sctp_timer *tmr;
+
+	if ((t_type != SCTP_TIMER_TYPE_ADDR_WQ) &&
+	    (inp == NULL))
+		return;
+
+	tmr = NULL;
+	if (stcb) {
+		SCTP_TCB_LOCK_ASSERT(stcb);
+	}
+	switch (t_type) {
+	case SCTP_TIMER_TYPE_ZERO_COPY:
+		tmr = &inp->sctp_ep.zero_copy_timer;
+		break;
+	case SCTP_TIMER_TYPE_ZCOPY_SENDQ:
+		tmr = &inp->sctp_ep.zero_copy_sendq_timer;
+		break;
+	case SCTP_TIMER_TYPE_ADDR_WQ:
+		tmr = &sctppcbinfo.addr_wq_timer;
+		break;
+	case SCTP_TIMER_TYPE_EARLYFR:
+		if ((stcb == NULL) || (net == NULL)) {
+			return;
+		}
+		tmr = &net->fr_timer;
+		SCTP_STAT_INCR(sctps_earlyfrstop);
+		break;
+	case SCTP_TIMER_TYPE_ITERATOR:
+		{
+			struct sctp_iterator *it;
+
+			it = (struct sctp_iterator *)inp;
+			tmr = &it->tmr;
+		}
+		break;
+	case SCTP_TIMER_TYPE_SEND:
+		if ((stcb == NULL) || (net == NULL)) {
+			return;
+		}
+		tmr = &net->rxt_timer;
+		break;
+	case SCTP_TIMER_TYPE_INIT:
+		if ((stcb == NULL) || (net == NULL)) {
+			return;
+		}
+		tmr = &net->rxt_timer;
+		break;
+	case SCTP_TIMER_TYPE_RECV:
+		if (stcb == NULL) {
+			return;
+		}
+		tmr = &stcb->asoc.dack_timer;
+		break;
+	case SCTP_TIMER_TYPE_SHUTDOWN:
+		if ((stcb == NULL) || (net == NULL)) {
+			return;
+		}
+		tmr = &net->rxt_timer;
+		break;
+	case SCTP_TIMER_TYPE_HEARTBEAT:
+		if (stcb == NULL) {
+			return;
+		}
+		tmr = &stcb->asoc.hb_timer;
+		break;
+	case SCTP_TIMER_TYPE_COOKIE:
+		if ((stcb == NULL) || (net == NULL)) {
+			return;
+		}
+		tmr = &net->rxt_timer;
+		break;
+	case SCTP_TIMER_TYPE_NEWCOOKIE:
+		/* nothing needed but the endpoint here */
+		tmr = &inp->sctp_ep.signature_change;
+		/*
+		 * We re-use the newcookie timer for the INP kill timer. We
+		 * must assure that we do not kill it by accident.
+		 */
+		break;
+	case SCTP_TIMER_TYPE_ASOCKILL:
+		/*
+		 * Stop the asoc kill timer.
+		 */
+		if (stcb == NULL) {
+			return;
+		}
+		tmr = &stcb->asoc.strreset_timer;
+		break;
+
+	case SCTP_TIMER_TYPE_INPKILL:
+		/*
+		 * The inp is setup to die. We re-use the signature_chage
+		 * timer since that has stopped and we are in the GONE
+		 * state.
+		 */
+		tmr = &inp->sctp_ep.signature_change;
+		break;
+	case SCTP_TIMER_TYPE_PATHMTURAISE:
+		if ((stcb == NULL) || (net == NULL)) {
+			return;
+		}
+		tmr = &net->pmtu_timer;
+		break;
+	case SCTP_TIMER_TYPE_SHUTDOWNACK:
+		if ((stcb == NULL) || (net == NULL)) {
+			return;
+		}
+		tmr = &net->rxt_timer;
+		break;
+	case SCTP_TIMER_TYPE_SHUTDOWNGUARD:
+		if (stcb == NULL) {
+			return;
+		}
+		tmr = &stcb->asoc.shut_guard_timer;
+		break;
+	case SCTP_TIMER_TYPE_STRRESET:
+		if (stcb == NULL) {
+			return;
+		}
+		tmr = &stcb->asoc.strreset_timer;
+		break;
+	case SCTP_TIMER_TYPE_ASCONF:
+		if (stcb == NULL) {
+			return;
+		}
+		tmr = &stcb->asoc.asconf_timer;
+		break;
+	case SCTP_TIMER_TYPE_PRIM_DELETED:
+		if (stcb == NULL) {
+			return;
+		}
+		tmr = &stcb->asoc.delete_prim_timer;
+		break;
+	case SCTP_TIMER_TYPE_AUTOCLOSE:
+		if (stcb == NULL) {
+			return;
+		}
+		tmr = &stcb->asoc.autoclose_timer;
+		break;
+	default:
+		SCTPDBG(SCTP_DEBUG_TIMER1, "%s: Unknown timer type %d\n",
+		    __FUNCTION__, t_type);
+		break;
+	};
+	if (tmr == NULL) {
+		return;
+	}
+	if ((tmr->type != t_type) && tmr->type) {
+		/*
+		 * Ok we have a timer that is under joint use. Cookie timer
+		 * per chance with the SEND timer. We therefore are NOT
+		 * running the timer that the caller wants stopped.  So just
+		 * return.
+		 */
+		return;
+	}
+	if ((t_type == SCTP_TIMER_TYPE_SEND) && (stcb != NULL)) {
+		stcb->asoc.num_send_timers_up--;
+		if (stcb->asoc.num_send_timers_up < 0) {
+			stcb->asoc.num_send_timers_up = 0;
+		}
+	}
+	tmr->self = NULL;
+	tmr->stopped_from = from;
+	(void)SCTP_OS_TIMER_STOP(&tmr->timer);
+	return;
+}
+
+#ifdef SCTP_USE_ADLER32
+static uint32_t
+update_adler32(uint32_t adler, uint8_t * buf, int32_t len)
+{
+	uint32_t s1 = adler & 0xffff;
+	uint32_t s2 = (adler >> 16) & 0xffff;
+	int n;
+
+	for (n = 0; n < len; n++, buf++) {
+		/* s1 = (s1 + buf[n]) % BASE */
+		/* first we add */
+		s1 = (s1 + *buf);
+		/*
+		 * now if we need to, we do a mod by subtracting. It seems a
+		 * bit faster since I really will only ever do one subtract
+		 * at the MOST, since buf[n] is a max of 255.
+		 */
+		if (s1 >= SCTP_ADLER32_BASE) {
+			s1 -= SCTP_ADLER32_BASE;
+		}
+		/* s2 = (s2 + s1) % BASE */
+		/* first we add */
+		s2 = (s2 + s1);
+		/*
+		 * again, it is more efficent (it seems) to subtract since
+		 * the most s2 will ever be is (BASE-1 + BASE-1) in the
+		 * worse case. This would then be (2 * BASE) - 2, which will
+		 * still only do one subtract. On Intel this is much better
+		 * to do this way and avoid the divide. Have not -pg'd on
+		 * sparc.
+		 */
+		if (s2 >= SCTP_ADLER32_BASE) {
+			s2 -= SCTP_ADLER32_BASE;
+		}
+	}
+	/* Return the adler32 of the bytes buf[0..len-1] */
+	return ((s2 << 16) + s1);
+}
+
+#endif
+
+
+uint32_t
+sctp_calculate_len(struct mbuf *m)
+{
+	uint32_t tlen = 0;
+	struct mbuf *at;
+
+	at = m;
+	while (at) {
+		tlen += SCTP_BUF_LEN(at);
+		at = SCTP_BUF_NEXT(at);
+	}
+	return (tlen);
+}
+
+#if defined(SCTP_WITH_NO_CSUM)
+
+uint32_t
+sctp_calculate_sum(struct mbuf *m, int32_t * pktlen, uint32_t offset)
+{
+	/*
+	 * given a mbuf chain with a packetheader offset by 'offset'
+	 * pointing at a sctphdr (with csum set to 0) go through the chain
+	 * of SCTP_BUF_NEXT()'s and calculate the SCTP checksum. This also
+	 * has a side bonus as it will calculate the total length of the
+	 * mbuf chain. Note: if offset is greater than the total mbuf
+	 * length, checksum=1, pktlen=0 is returned (ie. no real error code)
+	 */
+	if (pktlen == NULL)
+		return (0);
+	*pktlen = sctp_calculate_len(m);
+	return (0);
+}
+
+#elif defined(SCTP_USE_INCHKSUM)
+
+#include <machine/in_cksum.h>
+
+uint32_t
+sctp_calculate_sum(struct mbuf *m, int32_t * pktlen, uint32_t offset)
+{
+	/*
+	 * given a mbuf chain with a packetheader offset by 'offset'
+	 * pointing at a sctphdr (with csum set to 0) go through the chain
+	 * of SCTP_BUF_NEXT()'s and calculate the SCTP checksum. This also
+	 * has a side bonus as it will calculate the total length of the
+	 * mbuf chain. Note: if offset is greater than the total mbuf
+	 * length, checksum=1, pktlen=0 is returned (ie. no real error code)
+	 */
+	int32_t tlen = 0;
+	struct mbuf *at;
+	uint32_t the_sum, retsum;
+
+	at = m;
+	while (at) {
+		tlen += SCTP_BUF_LEN(at);
+		at = SCTP_BUF_NEXT(at);
+	}
+	the_sum = (uint32_t) (in_cksum_skip(m, tlen, offset));
+	if (pktlen != NULL)
+		*pktlen = (tlen - offset);
+	retsum = htons(the_sum);
+	return (the_sum);
+}
+
+#else
+
+uint32_t
+sctp_calculate_sum(struct mbuf *m, int32_t * pktlen, uint32_t offset)
+{
+	/*
+	 * given a mbuf chain with a packetheader offset by 'offset'
+	 * pointing at a sctphdr (with csum set to 0) go through the chain
+	 * of SCTP_BUF_NEXT()'s and calculate the SCTP checksum. This also
+	 * has a side bonus as it will calculate the total length of the
+	 * mbuf chain. Note: if offset is greater than the total mbuf
+	 * length, checksum=1, pktlen=0 is returned (ie. no real error code)
+	 */
+	int32_t tlen = 0;
+
+#ifdef SCTP_USE_ADLER32
+	uint32_t base = 1L;
+
+#else
+	uint32_t base = 0xffffffff;
+
+#endif
+	struct mbuf *at;
+
+	at = m;
+	/* find the correct mbuf and offset into mbuf */
+	while ((at != NULL) && (offset > (uint32_t) SCTP_BUF_LEN(at))) {
+		offset -= SCTP_BUF_LEN(at);	/* update remaining offset
+						 * left */
+		at = SCTP_BUF_NEXT(at);
+	}
+	while (at != NULL) {
+		if ((SCTP_BUF_LEN(at) - offset) > 0) {
+#ifdef SCTP_USE_ADLER32
+			base = update_adler32(base,
+			    (unsigned char *)(SCTP_BUF_AT(at, offset)),
+			    (unsigned int)(SCTP_BUF_LEN(at) - offset));
+#else
+			if ((SCTP_BUF_LEN(at) - offset) < 4) {
+				/* Use old method if less than 4 bytes */
+				base = old_update_crc32(base,
+				    (unsigned char *)(SCTP_BUF_AT(at, offset)),
+				    (unsigned int)(SCTP_BUF_LEN(at) - offset));
+			} else {
+				base = update_crc32(base,
+				    (unsigned char *)(SCTP_BUF_AT(at, offset)),
+				    (unsigned int)(SCTP_BUF_LEN(at) - offset));
+			}
+#endif
+			tlen += SCTP_BUF_LEN(at) - offset;
+			/* we only offset once into the first mbuf */
+		}
+		if (offset) {
+			if (offset < (uint32_t) SCTP_BUF_LEN(at))
+				offset = 0;
+			else
+				offset -= SCTP_BUF_LEN(at);
+		}
+		at = SCTP_BUF_NEXT(at);
+	}
+	if (pktlen != NULL) {
+		*pktlen = tlen;
+	}
+#ifdef SCTP_USE_ADLER32
+	/* Adler32 */
+	base = htonl(base);
+#else
+	/* CRC-32c */
+	base = sctp_csum_finalize(base);
+#endif
+	return (base);
+}
+
+
+#endif
+
+void
+sctp_mtu_size_reset(struct sctp_inpcb *inp,
+    struct sctp_association *asoc, uint32_t mtu)
+{
+	/*
+	 * Reset the P-MTU size on this association, this involves changing
+	 * the asoc MTU, going through ANY chunk+overhead larger than mtu to
+	 * allow the DF flag to be cleared.
+	 */
+	struct sctp_tmit_chunk *chk;
+	unsigned int eff_mtu, ovh;
+
+#ifdef SCTP_PRINT_FOR_B_AND_M
+	SCTP_PRINTF("sctp_mtu_size_reset(%p, asoc:%p mtu:%d\n",
+	    inp, asoc, mtu);
+#endif
+	asoc->smallest_mtu = mtu;
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+		ovh = SCTP_MIN_OVERHEAD;
+	} else {
+		ovh = SCTP_MIN_V4_OVERHEAD;
+	}
+	eff_mtu = mtu - ovh;
+	TAILQ_FOREACH(chk, &asoc->send_queue, sctp_next) {
+
+		if (chk->send_size > eff_mtu) {
+			chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
+		}
+	}
+	TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
+		if (chk->send_size > eff_mtu) {
+			chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
+		}
+	}
+}
+
+
+/*
+ * given an association and starting time of the current RTT period return
+ * RTO in number of msecs net should point to the current network
+ */
+uint32_t
+sctp_calculate_rto(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    struct sctp_nets *net,
+    struct timeval *told,
+    int safe)
+{
+	/*-
+	 * given an association and the starting time of the current RTT
+	 * period (in value1/value2) return RTO in number of msecs.
+	 */
+	int calc_time = 0;
+	int o_calctime;
+	uint32_t new_rto = 0;
+	int first_measure = 0;
+	struct timeval now, then, *old;
+
+	/* Copy it out for sparc64 */
+	if (safe == sctp_align_unsafe_makecopy) {
+		old = &then;
+		memcpy(&then, told, sizeof(struct timeval));
+	} else if (safe == sctp_align_safe_nocopy) {
+		old = told;
+	} else {
+		/* error */
+		SCTP_PRINTF("Huh, bad rto calc call\n");
+		return (0);
+	}
+	/************************/
+	/* 1. calculate new RTT */
+	/************************/
+	/* get the current time */
+	(void)SCTP_GETTIME_TIMEVAL(&now);
+	/* compute the RTT value */
+	if ((u_long)now.tv_sec > (u_long)old->tv_sec) {
+		calc_time = ((u_long)now.tv_sec - (u_long)old->tv_sec) * 1000;
+		if ((u_long)now.tv_usec > (u_long)old->tv_usec) {
+			calc_time += (((u_long)now.tv_usec -
+			    (u_long)old->tv_usec) / 1000);
+		} else if ((u_long)now.tv_usec < (u_long)old->tv_usec) {
+			/* Borrow 1,000ms from current calculation */
+			calc_time -= 1000;
+			/* Add in the slop over */
+			calc_time += ((int)now.tv_usec / 1000);
+			/* Add in the pre-second ms's */
+			calc_time += (((int)1000000 - (int)old->tv_usec) / 1000);
+		}
+	} else if ((u_long)now.tv_sec == (u_long)old->tv_sec) {
+		if ((u_long)now.tv_usec > (u_long)old->tv_usec) {
+			calc_time = ((u_long)now.tv_usec -
+			    (u_long)old->tv_usec) / 1000;
+		} else if ((u_long)now.tv_usec < (u_long)old->tv_usec) {
+			/* impossible .. garbage in nothing out */
+			goto calc_rto;
+		} else if ((u_long)now.tv_usec == (u_long)old->tv_usec) {
+			/*
+			 * We have to have 1 usec :-D this must be the
+			 * loopback.
+			 */
+			calc_time = 1;
+		} else {
+			/* impossible .. garbage in nothing out */
+			goto calc_rto;
+		}
+	} else {
+		/* Clock wrapped? */
+		goto calc_rto;
+	}
+	/***************************/
+	/* 2. update RTTVAR & SRTT */
+	/***************************/
+	o_calctime = calc_time;
+	/* this is Van Jacobson's integer version */
+	if (net->RTO_measured) {
+		calc_time -= (net->lastsa >> SCTP_RTT_SHIFT);	/* take away 1/8th when
+								 * shift=3 */
+		if (sctp_logging_level & SCTP_RTTVAR_LOGGING_ENABLE) {
+			rto_logging(net, SCTP_LOG_RTTVAR);
+		}
+		net->prev_rtt = o_calctime;
+		net->lastsa += calc_time;	/* add 7/8th into sa when
+						 * shift=3 */
+		if (calc_time < 0) {
+			calc_time = -calc_time;
+		}
+		calc_time -= (net->lastsv >> SCTP_RTT_VAR_SHIFT);	/* take away 1/4 when
+									 * VAR shift=2 */
+		net->lastsv += calc_time;
+		if (net->lastsv == 0) {
+			net->lastsv = SCTP_CLOCK_GRANULARITY;
+		}
+	} else {
+		/* First RTO measurment */
+		net->RTO_measured = 1;
+		net->lastsa = calc_time << SCTP_RTT_SHIFT;	/* Multiply by 8 when
+								 * shift=3 */
+		net->lastsv = calc_time;
+		if (net->lastsv == 0) {
+			net->lastsv = SCTP_CLOCK_GRANULARITY;
+		}
+		first_measure = 1;
+		net->prev_rtt = o_calctime;
+		if (sctp_logging_level & SCTP_RTTVAR_LOGGING_ENABLE) {
+			rto_logging(net, SCTP_LOG_INITIAL_RTT);
+		}
+	}
+calc_rto:
+	new_rto = (net->lastsa >> SCTP_RTT_SHIFT) + net->lastsv;
+	if ((new_rto > SCTP_SAT_NETWORK_MIN) &&
+	    (stcb->asoc.sat_network_lockout == 0)) {
+		stcb->asoc.sat_network = 1;
+	} else if ((!first_measure) && stcb->asoc.sat_network) {
+		stcb->asoc.sat_network = 0;
+		stcb->asoc.sat_network_lockout = 1;
+	}
+	/* bound it, per C6/C7 in Section 5.3.1 */
+	if (new_rto < stcb->asoc.minrto) {
+		new_rto = stcb->asoc.minrto;
+	}
+	if (new_rto > stcb->asoc.maxrto) {
+		new_rto = stcb->asoc.maxrto;
+	}
+	/* we are now returning the RTO */
+	return (new_rto);
+}
+
+/*
+ * return a pointer to a contiguous piece of data from the given mbuf chain
+ * starting at 'off' for 'len' bytes.  If the desired piece spans more than
+ * one mbuf, a copy is made at 'ptr'. caller must ensure that the buffer size
+ * is >= 'len' returns NULL if there there isn't 'len' bytes in the chain.
+ */
+caddr_t
+sctp_m_getptr(struct mbuf *m, int off, int len, uint8_t * in_ptr)
+{
+	uint32_t count;
+	uint8_t *ptr;
+
+	ptr = in_ptr;
+	if ((off < 0) || (len <= 0))
+		return (NULL);
+
+	/* find the desired start location */
+	while ((m != NULL) && (off > 0)) {
+		if (off < SCTP_BUF_LEN(m))
+			break;
+		off -= SCTP_BUF_LEN(m);
+		m = SCTP_BUF_NEXT(m);
+	}
+	if (m == NULL)
+		return (NULL);
+
+	/* is the current mbuf large enough (eg. contiguous)? */
+	if ((SCTP_BUF_LEN(m) - off) >= len) {
+		return (mtod(m, caddr_t)+off);
+	} else {
+		/* else, it spans more than one mbuf, so save a temp copy... */
+		while ((m != NULL) && (len > 0)) {
+			count = min(SCTP_BUF_LEN(m) - off, len);
+			bcopy(mtod(m, caddr_t)+off, ptr, count);
+			len -= count;
+			ptr += count;
+			off = 0;
+			m = SCTP_BUF_NEXT(m);
+		}
+		if ((m == NULL) && (len > 0))
+			return (NULL);
+		else
+			return ((caddr_t)in_ptr);
+	}
+}
+
+
+
+struct sctp_paramhdr *
+sctp_get_next_param(struct mbuf *m,
+    int offset,
+    struct sctp_paramhdr *pull,
+    int pull_limit)
+{
+	/* This just provides a typed signature to Peter's Pull routine */
+	return ((struct sctp_paramhdr *)sctp_m_getptr(m, offset, pull_limit,
+	    (uint8_t *) pull));
+}
+
+
+int
+sctp_add_pad_tombuf(struct mbuf *m, int padlen)
+{
+	/*
+	 * add padlen bytes of 0 filled padding to the end of the mbuf. If
+	 * padlen is > 3 this routine will fail.
+	 */
+	uint8_t *dp;
+	int i;
+
+	if (padlen > 3) {
+		SCTP_LTRACE_ERR_RET_PKT(m, NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOBUFS);
+		return (ENOBUFS);
+	}
+	if (padlen <= M_TRAILINGSPACE(m)) {
+		/*
+		 * The easy way. We hope the majority of the time we hit
+		 * here :)
+		 */
+		dp = (uint8_t *) (mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+		SCTP_BUF_LEN(m) += padlen;
+	} else {
+		/* Hard way we must grow the mbuf */
+		struct mbuf *tmp;
+
+		tmp = sctp_get_mbuf_for_msg(padlen, 0, M_DONTWAIT, 1, MT_DATA);
+		if (tmp == NULL) {
+			/* Out of space GAK! we are in big trouble. */
+			SCTP_LTRACE_ERR_RET_PKT(m, NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+			return (ENOSPC);
+		}
+		/* setup and insert in middle */
+		SCTP_BUF_LEN(tmp) = padlen;
+		SCTP_BUF_NEXT(tmp) = NULL;
+		SCTP_BUF_NEXT(m) = tmp;
+		dp = mtod(tmp, uint8_t *);
+	}
+	/* zero out the pad */
+	for (i = 0; i < padlen; i++) {
+		*dp = 0;
+		dp++;
+	}
+	return (0);
+}
+
+int
+sctp_pad_lastmbuf(struct mbuf *m, int padval, struct mbuf *last_mbuf)
+{
+	/* find the last mbuf in chain and pad it */
+	struct mbuf *m_at;
+
+	m_at = m;
+	if (last_mbuf) {
+		return (sctp_add_pad_tombuf(last_mbuf, padval));
+	} else {
+		while (m_at) {
+			if (SCTP_BUF_NEXT(m_at) == NULL) {
+				return (sctp_add_pad_tombuf(m_at, padval));
+			}
+			m_at = SCTP_BUF_NEXT(m_at);
+		}
+	}
+	SCTP_LTRACE_ERR_RET_PKT(m, NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, EFAULT);
+	return (EFAULT);
+}
+
+int sctp_asoc_change_wake = 0;
+
+static void
+sctp_notify_assoc_change(uint32_t event, struct sctp_tcb *stcb,
+    uint32_t error, void *data, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	struct mbuf *m_notify;
+	struct sctp_assoc_change *sac;
+	struct sctp_queued_to_read *control;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	struct socket *so;
+
+#endif
+
+	/*
+	 * First if we are are going down dump everything we can to the
+	 * socket rcv queue.
+	 */
+
+	if ((stcb == NULL) ||
+	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+	    (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET)
+	    ) {
+		/* If the socket is gone we are out of here */
+		return;
+	}
+	/*
+	 * For TCP model AND UDP connected sockets we will send an error up
+	 * when an ABORT comes in.
+	 */
+	if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
+	    ((event == SCTP_COMM_LOST) || (event == SCTP_CANT_STR_ASSOC))) {
+		if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_WAIT) {
+			SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNREFUSED);
+			stcb->sctp_socket->so_error = ECONNREFUSED;
+		} else {
+			SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNRESET);
+			stcb->sctp_socket->so_error = ECONNRESET;
+		}
+		/* Wake ANY sleepers */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		so = SCTP_INP_SO(stcb->sctp_ep);
+		if (!so_locked) {
+			atomic_add_int(&stcb->asoc.refcnt, 1);
+			SCTP_TCB_UNLOCK(stcb);
+			SCTP_SOCKET_LOCK(so, 1);
+			SCTP_TCB_LOCK(stcb);
+			atomic_subtract_int(&stcb->asoc.refcnt, 1);
+			if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+				SCTP_SOCKET_UNLOCK(so, 1);
+				return;
+			}
+		}
+#endif
+		sorwakeup(stcb->sctp_socket);
+		sowwakeup(stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		if (!so_locked) {
+			SCTP_SOCKET_UNLOCK(so, 1);
+		}
+#endif
+		sctp_asoc_change_wake++;
+	}
+	if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVASSOCEVNT)) {
+		/* event not enabled */
+		return;
+	}
+	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_assoc_change), 0, M_DONTWAIT, 1, MT_DATA);
+	if (m_notify == NULL)
+		/* no space left */
+		return;
+	SCTP_BUF_LEN(m_notify) = 0;
+
+	sac = mtod(m_notify, struct sctp_assoc_change *);
+	sac->sac_type = SCTP_ASSOC_CHANGE;
+	sac->sac_flags = 0;
+	sac->sac_length = sizeof(struct sctp_assoc_change);
+	sac->sac_state = event;
+	sac->sac_error = error;
+	/* XXX verify these stream counts */
+	sac->sac_outbound_streams = stcb->asoc.streamoutcnt;
+	sac->sac_inbound_streams = stcb->asoc.streamincnt;
+	sac->sac_assoc_id = sctp_get_associd(stcb);
+	SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_assoc_change);
+	SCTP_BUF_NEXT(m_notify) = NULL;
+	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+	    0, 0, 0, 0, 0, 0,
+	    m_notify);
+	if (control == NULL) {
+		/* no memory */
+		sctp_m_freem(m_notify);
+		return;
+	}
+	control->length = SCTP_BUF_LEN(m_notify);
+	/* not that we need this */
+	control->tail_mbuf = m_notify;
+	control->spec_flags = M_NOTIFICATION;
+	sctp_add_to_readq(stcb->sctp_ep, stcb,
+	    control,
+	    &stcb->sctp_socket->so_rcv, 1, so_locked);
+	if (event == SCTP_COMM_LOST) {
+		/* Wake up any sleeper */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		so = SCTP_INP_SO(stcb->sctp_ep);
+		if (!so_locked) {
+			atomic_add_int(&stcb->asoc.refcnt, 1);
+			SCTP_TCB_UNLOCK(stcb);
+			SCTP_SOCKET_LOCK(so, 1);
+			SCTP_TCB_LOCK(stcb);
+			atomic_subtract_int(&stcb->asoc.refcnt, 1);
+			if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+				SCTP_SOCKET_UNLOCK(so, 1);
+				return;
+			}
+		}
+#endif
+		sctp_sowwakeup(stcb->sctp_ep, stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		if (!so_locked) {
+			SCTP_SOCKET_UNLOCK(so, 1);
+		}
+#endif
+	}
+}
+
+static void
+sctp_notify_peer_addr_change(struct sctp_tcb *stcb, uint32_t state,
+    struct sockaddr *sa, uint32_t error)
+{
+	struct mbuf *m_notify;
+	struct sctp_paddr_change *spc;
+	struct sctp_queued_to_read *control;
+
+	if ((stcb == NULL) || (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVPADDREVNT)))
+		/* event not enabled */
+		return;
+
+	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_paddr_change), 0, M_DONTWAIT, 1, MT_DATA);
+	if (m_notify == NULL)
+		return;
+	SCTP_BUF_LEN(m_notify) = 0;
+	spc = mtod(m_notify, struct sctp_paddr_change *);
+	spc->spc_type = SCTP_PEER_ADDR_CHANGE;
+	spc->spc_flags = 0;
+	spc->spc_length = sizeof(struct sctp_paddr_change);
+	if (sa->sa_family == AF_INET) {
+		memcpy(&spc->spc_aaddr, sa, sizeof(struct sockaddr_in));
+	} else {
+		struct sockaddr_in6 *sin6;
+
+		memcpy(&spc->spc_aaddr, sa, sizeof(struct sockaddr_in6));
+
+		sin6 = (struct sockaddr_in6 *)&spc->spc_aaddr;
+		if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) {
+			if (sin6->sin6_scope_id == 0) {
+				/* recover scope_id for user */
+				(void)sa6_recoverscope(sin6);
+			} else {
+				/* clear embedded scope_id for user */
+				in6_clearscope(&sin6->sin6_addr);
+			}
+		}
+	}
+	spc->spc_state = state;
+	spc->spc_error = error;
+	spc->spc_assoc_id = sctp_get_associd(stcb);
+
+	SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_paddr_change);
+	SCTP_BUF_NEXT(m_notify) = NULL;
+
+	/* append to socket */
+	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+	    0, 0, 0, 0, 0, 0,
+	    m_notify);
+	if (control == NULL) {
+		/* no memory */
+		sctp_m_freem(m_notify);
+		return;
+	}
+	control->length = SCTP_BUF_LEN(m_notify);
+	control->spec_flags = M_NOTIFICATION;
+	/* not that we need this */
+	control->tail_mbuf = m_notify;
+	sctp_add_to_readq(stcb->sctp_ep, stcb,
+	    control,
+	    &stcb->sctp_socket->so_rcv, 1, SCTP_SO_NOT_LOCKED);
+}
+
+
+static void
+sctp_notify_send_failed(struct sctp_tcb *stcb, uint32_t error,
+    struct sctp_tmit_chunk *chk, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	struct mbuf *m_notify;
+	struct sctp_send_failed *ssf;
+	struct sctp_queued_to_read *control;
+	int length;
+
+	if ((stcb == NULL) || (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVSENDFAILEVNT)))
+		/* event not enabled */
+		return;
+
+	length = sizeof(struct sctp_send_failed) + chk->send_size;
+	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_send_failed), 0, M_DONTWAIT, 1, MT_DATA);
+	if (m_notify == NULL)
+		/* no space left */
+		return;
+	SCTP_BUF_LEN(m_notify) = 0;
+	ssf = mtod(m_notify, struct sctp_send_failed *);
+	ssf->ssf_type = SCTP_SEND_FAILED;
+	if (error == SCTP_NOTIFY_DATAGRAM_UNSENT)
+		ssf->ssf_flags = SCTP_DATA_UNSENT;
+	else
+		ssf->ssf_flags = SCTP_DATA_SENT;
+	ssf->ssf_length = length;
+	ssf->ssf_error = error;
+	/* not exactly what the user sent in, but should be close :) */
+	bzero(&ssf->ssf_info, sizeof(ssf->ssf_info));
+	ssf->ssf_info.sinfo_stream = chk->rec.data.stream_number;
+	ssf->ssf_info.sinfo_ssn = chk->rec.data.stream_seq;
+	ssf->ssf_info.sinfo_flags = chk->rec.data.rcv_flags;
+	ssf->ssf_info.sinfo_ppid = chk->rec.data.payloadtype;
+	ssf->ssf_info.sinfo_context = chk->rec.data.context;
+	ssf->ssf_info.sinfo_assoc_id = sctp_get_associd(stcb);
+	ssf->ssf_assoc_id = sctp_get_associd(stcb);
+	SCTP_BUF_NEXT(m_notify) = chk->data;
+	SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_send_failed);
+
+	/* Steal off the mbuf */
+	chk->data = NULL;
+	/*
+	 * For this case, we check the actual socket buffer, since the assoc
+	 * is going away we don't want to overfill the socket buffer for a
+	 * non-reader
+	 */
+	if (sctp_sbspace_failedmsgs(&stcb->sctp_socket->so_rcv) < SCTP_BUF_LEN(m_notify)) {
+		sctp_m_freem(m_notify);
+		return;
+	}
+	/* append to socket */
+	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+	    0, 0, 0, 0, 0, 0,
+	    m_notify);
+	if (control == NULL) {
+		/* no memory */
+		sctp_m_freem(m_notify);
+		return;
+	}
+	control->spec_flags = M_NOTIFICATION;
+	sctp_add_to_readq(stcb->sctp_ep, stcb,
+	    control,
+	    &stcb->sctp_socket->so_rcv, 1, so_locked);
+}
+
+
+static void
+sctp_notify_send_failed2(struct sctp_tcb *stcb, uint32_t error,
+    struct sctp_stream_queue_pending *sp, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	struct mbuf *m_notify;
+	struct sctp_send_failed *ssf;
+	struct sctp_queued_to_read *control;
+	int length;
+
+	if ((stcb == NULL) || (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVSENDFAILEVNT)))
+		/* event not enabled */
+		return;
+
+	length = sizeof(struct sctp_send_failed) + sp->length;
+	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_send_failed), 0, M_DONTWAIT, 1, MT_DATA);
+	if (m_notify == NULL)
+		/* no space left */
+		return;
+	SCTP_BUF_LEN(m_notify) = 0;
+	ssf = mtod(m_notify, struct sctp_send_failed *);
+	ssf->ssf_type = SCTP_SEND_FAILED;
+	if (error == SCTP_NOTIFY_DATAGRAM_UNSENT)
+		ssf->ssf_flags = SCTP_DATA_UNSENT;
+	else
+		ssf->ssf_flags = SCTP_DATA_SENT;
+	ssf->ssf_length = length;
+	ssf->ssf_error = error;
+	/* not exactly what the user sent in, but should be close :) */
+	bzero(&ssf->ssf_info, sizeof(ssf->ssf_info));
+	ssf->ssf_info.sinfo_stream = sp->stream;
+	ssf->ssf_info.sinfo_ssn = sp->strseq;
+	ssf->ssf_info.sinfo_flags = sp->sinfo_flags;
+	ssf->ssf_info.sinfo_ppid = sp->ppid;
+	ssf->ssf_info.sinfo_context = sp->context;
+	ssf->ssf_info.sinfo_assoc_id = sctp_get_associd(stcb);
+	ssf->ssf_assoc_id = sctp_get_associd(stcb);
+	SCTP_BUF_NEXT(m_notify) = sp->data;
+	SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_send_failed);
+
+	/* Steal off the mbuf */
+	sp->data = NULL;
+	/*
+	 * For this case, we check the actual socket buffer, since the assoc
+	 * is going away we don't want to overfill the socket buffer for a
+	 * non-reader
+	 */
+	if (sctp_sbspace_failedmsgs(&stcb->sctp_socket->so_rcv) < SCTP_BUF_LEN(m_notify)) {
+		sctp_m_freem(m_notify);
+		return;
+	}
+	/* append to socket */
+	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+	    0, 0, 0, 0, 0, 0,
+	    m_notify);
+	if (control == NULL) {
+		/* no memory */
+		sctp_m_freem(m_notify);
+		return;
+	}
+	control->spec_flags = M_NOTIFICATION;
+	sctp_add_to_readq(stcb->sctp_ep, stcb,
+	    control,
+	    &stcb->sctp_socket->so_rcv, 1, so_locked);
+}
+
+
+
+static void
+sctp_notify_adaptation_layer(struct sctp_tcb *stcb,
+    uint32_t error)
+{
+	struct mbuf *m_notify;
+	struct sctp_adaptation_event *sai;
+	struct sctp_queued_to_read *control;
+
+	if ((stcb == NULL) || (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_ADAPTATIONEVNT)))
+		/* event not enabled */
+		return;
+
+	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_adaption_event), 0, M_DONTWAIT, 1, MT_DATA);
+	if (m_notify == NULL)
+		/* no space left */
+		return;
+	SCTP_BUF_LEN(m_notify) = 0;
+	sai = mtod(m_notify, struct sctp_adaptation_event *);
+	sai->sai_type = SCTP_ADAPTATION_INDICATION;
+	sai->sai_flags = 0;
+	sai->sai_length = sizeof(struct sctp_adaptation_event);
+	sai->sai_adaptation_ind = stcb->asoc.peers_adaptation;
+	sai->sai_assoc_id = sctp_get_associd(stcb);
+
+	SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_adaptation_event);
+	SCTP_BUF_NEXT(m_notify) = NULL;
+
+	/* append to socket */
+	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+	    0, 0, 0, 0, 0, 0,
+	    m_notify);
+	if (control == NULL) {
+		/* no memory */
+		sctp_m_freem(m_notify);
+		return;
+	}
+	control->length = SCTP_BUF_LEN(m_notify);
+	control->spec_flags = M_NOTIFICATION;
+	/* not that we need this */
+	control->tail_mbuf = m_notify;
+	sctp_add_to_readq(stcb->sctp_ep, stcb,
+	    control,
+	    &stcb->sctp_socket->so_rcv, 1, SCTP_SO_NOT_LOCKED);
+}
+
+/* This always must be called with the read-queue LOCKED in the INP */
+void
+sctp_notify_partial_delivery_indication(struct sctp_tcb *stcb, uint32_t error,
+    int nolock, uint32_t val)
+{
+	struct mbuf *m_notify;
+	struct sctp_pdapi_event *pdapi;
+	struct sctp_queued_to_read *control;
+	struct sockbuf *sb;
+
+	if ((stcb == NULL) || (stcb->sctp_socket == NULL) ||
+	    sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_PDAPIEVNT))
+		/* event not enabled */
+		return;
+
+	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_pdapi_event), 0, M_DONTWAIT, 1, MT_DATA);
+	if (m_notify == NULL)
+		/* no space left */
+		return;
+	SCTP_BUF_LEN(m_notify) = 0;
+	pdapi = mtod(m_notify, struct sctp_pdapi_event *);
+	pdapi->pdapi_type = SCTP_PARTIAL_DELIVERY_EVENT;
+	pdapi->pdapi_flags = 0;
+	pdapi->pdapi_length = sizeof(struct sctp_pdapi_event);
+	pdapi->pdapi_indication = error;
+	pdapi->pdapi_stream = (val >> 16);
+	pdapi->pdapi_seq = (val & 0x0000ffff);
+	pdapi->pdapi_assoc_id = sctp_get_associd(stcb);
+
+	SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_pdapi_event);
+	SCTP_BUF_NEXT(m_notify) = NULL;
+	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+	    0, 0, 0, 0, 0, 0,
+	    m_notify);
+	if (control == NULL) {
+		/* no memory */
+		sctp_m_freem(m_notify);
+		return;
+	}
+	control->spec_flags = M_NOTIFICATION;
+	control->length = SCTP_BUF_LEN(m_notify);
+	/* not that we need this */
+	control->tail_mbuf = m_notify;
+	control->held_length = 0;
+	control->length = 0;
+	if (nolock == 0) {
+		SCTP_INP_READ_LOCK(stcb->sctp_ep);
+	}
+	sb = &stcb->sctp_socket->so_rcv;
+	if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+		sctp_sblog(sb, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBALLOC, SCTP_BUF_LEN(m_notify));
+	}
+	sctp_sballoc(stcb, sb, m_notify);
+	if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+		sctp_sblog(sb, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
+	}
+	atomic_add_int(&control->length, SCTP_BUF_LEN(m_notify));
+	control->end_added = 1;
+	if (stcb->asoc.control_pdapi)
+		TAILQ_INSERT_AFTER(&stcb->sctp_ep->read_queue, stcb->asoc.control_pdapi, control, next);
+	else {
+		/* we really should not see this case */
+		TAILQ_INSERT_TAIL(&stcb->sctp_ep->read_queue, control, next);
+	}
+	if (nolock == 0) {
+		SCTP_INP_READ_UNLOCK(stcb->sctp_ep);
+	}
+	if (stcb->sctp_ep && stcb->sctp_socket) {
+		/* This should always be the case */
+		sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
+	}
+}
+
+static void
+sctp_notify_shutdown_event(struct sctp_tcb *stcb)
+{
+	struct mbuf *m_notify;
+	struct sctp_shutdown_event *sse;
+	struct sctp_queued_to_read *control;
+
+	/*
+	 * For TCP model AND UDP connected sockets we will send an error up
+	 * when an SHUTDOWN completes
+	 */
+	if (stcb == NULL) {
+		return;
+	}
+	if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+		/* mark socket closed for read/write and wakeup! */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		struct socket *so;
+
+		so = SCTP_INP_SO(stcb->sctp_ep);
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_SOCKET_LOCK(so, 1);
+		SCTP_TCB_LOCK(stcb);
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+		if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+			SCTP_SOCKET_UNLOCK(so, 1);
+			return;
+		}
+#endif
+		socantsendmore(stcb->sctp_socket);
+		socantrcvmore(stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+	}
+	if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT))
+		/* event not enabled */
+		return;
+
+	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_event), 0, M_DONTWAIT, 1, MT_DATA);
+	if (m_notify == NULL)
+		/* no space left */
+		return;
+	sse = mtod(m_notify, struct sctp_shutdown_event *);
+	sse->sse_type = SCTP_SHUTDOWN_EVENT;
+	sse->sse_flags = 0;
+	sse->sse_length = sizeof(struct sctp_shutdown_event);
+	sse->sse_assoc_id = sctp_get_associd(stcb);
+
+	SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_shutdown_event);
+	SCTP_BUF_NEXT(m_notify) = NULL;
+
+	/* append to socket */
+	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+	    0, 0, 0, 0, 0, 0,
+	    m_notify);
+	if (control == NULL) {
+		/* no memory */
+		sctp_m_freem(m_notify);
+		return;
+	}
+	control->spec_flags = M_NOTIFICATION;
+	control->length = SCTP_BUF_LEN(m_notify);
+	/* not that we need this */
+	control->tail_mbuf = m_notify;
+	sctp_add_to_readq(stcb->sctp_ep, stcb,
+	    control,
+	    &stcb->sctp_socket->so_rcv, 1, SCTP_SO_NOT_LOCKED);
+}
+
+static void
+sctp_notify_stream_reset(struct sctp_tcb *stcb,
+    int number_entries, uint16_t * list, int flag)
+{
+	struct mbuf *m_notify;
+	struct sctp_queued_to_read *control;
+	struct sctp_stream_reset_event *strreset;
+	int len;
+
+	if (stcb == NULL) {
+		return;
+	}
+	if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_STREAM_RESETEVNT))
+		/* event not enabled */
+		return;
+
+	m_notify = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+	if (m_notify == NULL)
+		/* no space left */
+		return;
+	SCTP_BUF_LEN(m_notify) = 0;
+	len = sizeof(struct sctp_stream_reset_event) + (number_entries * sizeof(uint16_t));
+	if (len > M_TRAILINGSPACE(m_notify)) {
+		/* never enough room */
+		sctp_m_freem(m_notify);
+		return;
+	}
+	strreset = mtod(m_notify, struct sctp_stream_reset_event *);
+	strreset->strreset_type = SCTP_STREAM_RESET_EVENT;
+	if (number_entries == 0) {
+		strreset->strreset_flags = flag | SCTP_STRRESET_ALL_STREAMS;
+	} else {
+		strreset->strreset_flags = flag | SCTP_STRRESET_STREAM_LIST;
+	}
+	strreset->strreset_length = len;
+	strreset->strreset_assoc_id = sctp_get_associd(stcb);
+	if (number_entries) {
+		int i;
+
+		for (i = 0; i < number_entries; i++) {
+			strreset->strreset_list[i] = ntohs(list[i]);
+		}
+	}
+	SCTP_BUF_LEN(m_notify) = len;
+	SCTP_BUF_NEXT(m_notify) = NULL;
+	if (sctp_sbspace(&stcb->asoc, &stcb->sctp_socket->so_rcv) < SCTP_BUF_LEN(m_notify)) {
+		/* no space */
+		sctp_m_freem(m_notify);
+		return;
+	}
+	/* append to socket */
+	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+	    0, 0, 0, 0, 0, 0,
+	    m_notify);
+	if (control == NULL) {
+		/* no memory */
+		sctp_m_freem(m_notify);
+		return;
+	}
+	control->spec_flags = M_NOTIFICATION;
+	control->length = SCTP_BUF_LEN(m_notify);
+	/* not that we need this */
+	control->tail_mbuf = m_notify;
+	sctp_add_to_readq(stcb->sctp_ep, stcb,
+	    control,
+	    &stcb->sctp_socket->so_rcv, 1, SCTP_SO_NOT_LOCKED);
+}
+
+
+void
+sctp_ulp_notify(uint32_t notification, struct sctp_tcb *stcb,
+    uint32_t error, void *data, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	if (stcb == NULL) {
+		/* unlikely but */
+		return;
+	}
+	if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+	    (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET)
+	    ) {
+		/* No notifications up when we are in a no socket state */
+		return;
+	}
+	if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+		/* Can't send up to a closed socket any notifications */
+		return;
+	}
+	if (stcb && ((stcb->asoc.state & SCTP_STATE_COOKIE_WAIT) ||
+	    (stcb->asoc.state & SCTP_STATE_COOKIE_ECHOED))) {
+		if ((notification == SCTP_NOTIFY_INTERFACE_DOWN) ||
+		    (notification == SCTP_NOTIFY_INTERFACE_UP) ||
+		    (notification == SCTP_NOTIFY_INTERFACE_CONFIRMED)) {
+			/* Don't report these in front states */
+			return;
+		}
+	}
+	switch (notification) {
+	case SCTP_NOTIFY_ASSOC_UP:
+		if (stcb->asoc.assoc_up_sent == 0) {
+			sctp_notify_assoc_change(SCTP_COMM_UP, stcb, error, NULL, so_locked);
+			stcb->asoc.assoc_up_sent = 1;
+		}
+		if (stcb->asoc.adaptation_needed && (stcb->asoc.adaptation_sent == 0)) {
+			sctp_notify_adaptation_layer(stcb, error);
+		}
+		break;
+	case SCTP_NOTIFY_ASSOC_DOWN:
+		sctp_notify_assoc_change(SCTP_SHUTDOWN_COMP, stcb, error, NULL, so_locked);
+		break;
+	case SCTP_NOTIFY_INTERFACE_DOWN:
+		{
+			struct sctp_nets *net;
+
+			net = (struct sctp_nets *)data;
+			sctp_notify_peer_addr_change(stcb, SCTP_ADDR_UNREACHABLE,
+			    (struct sockaddr *)&net->ro._l_addr, error);
+			break;
+		}
+	case SCTP_NOTIFY_INTERFACE_UP:
+		{
+			struct sctp_nets *net;
+
+			net = (struct sctp_nets *)data;
+			sctp_notify_peer_addr_change(stcb, SCTP_ADDR_AVAILABLE,
+			    (struct sockaddr *)&net->ro._l_addr, error);
+			break;
+		}
+	case SCTP_NOTIFY_INTERFACE_CONFIRMED:
+		{
+			struct sctp_nets *net;
+
+			net = (struct sctp_nets *)data;
+			sctp_notify_peer_addr_change(stcb, SCTP_ADDR_CONFIRMED,
+			    (struct sockaddr *)&net->ro._l_addr, error);
+			break;
+		}
+	case SCTP_NOTIFY_SPECIAL_SP_FAIL:
+		sctp_notify_send_failed2(stcb, error,
+		    (struct sctp_stream_queue_pending *)data, so_locked);
+		break;
+	case SCTP_NOTIFY_DG_FAIL:
+		sctp_notify_send_failed(stcb, error,
+		    (struct sctp_tmit_chunk *)data, so_locked);
+		break;
+	case SCTP_NOTIFY_PARTIAL_DELVIERY_INDICATION:
+		{
+			uint32_t val;
+
+			val = *((uint32_t *) data);
+
+			sctp_notify_partial_delivery_indication(stcb, error, 0, val);
+		}
+		break;
+	case SCTP_NOTIFY_STRDATA_ERR:
+		break;
+	case SCTP_NOTIFY_ASSOC_ABORTED:
+		if ((stcb) && (((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_WAIT) ||
+		    ((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_ECHOED))) {
+			sctp_notify_assoc_change(SCTP_CANT_STR_ASSOC, stcb, error, NULL, so_locked);
+		} else {
+			sctp_notify_assoc_change(SCTP_COMM_LOST, stcb, error, NULL, so_locked);
+		}
+		break;
+	case SCTP_NOTIFY_PEER_OPENED_STREAM:
+		break;
+	case SCTP_NOTIFY_STREAM_OPENED_OK:
+		break;
+	case SCTP_NOTIFY_ASSOC_RESTART:
+		sctp_notify_assoc_change(SCTP_RESTART, stcb, error, data, so_locked);
+		break;
+	case SCTP_NOTIFY_HB_RESP:
+		break;
+	case SCTP_NOTIFY_STR_RESET_SEND:
+		sctp_notify_stream_reset(stcb, error, ((uint16_t *) data), SCTP_STRRESET_OUTBOUND_STR);
+		break;
+	case SCTP_NOTIFY_STR_RESET_RECV:
+		sctp_notify_stream_reset(stcb, error, ((uint16_t *) data), SCTP_STRRESET_INBOUND_STR);
+		break;
+	case SCTP_NOTIFY_STR_RESET_FAILED_OUT:
+		sctp_notify_stream_reset(stcb, error, ((uint16_t *) data), (SCTP_STRRESET_OUTBOUND_STR | SCTP_STRRESET_FAILED));
+		break;
+
+	case SCTP_NOTIFY_STR_RESET_FAILED_IN:
+		sctp_notify_stream_reset(stcb, error, ((uint16_t *) data), (SCTP_STRRESET_INBOUND_STR | SCTP_STRRESET_FAILED));
+		break;
+
+	case SCTP_NOTIFY_ASCONF_ADD_IP:
+		sctp_notify_peer_addr_change(stcb, SCTP_ADDR_ADDED, data,
+		    error);
+		break;
+	case SCTP_NOTIFY_ASCONF_DELETE_IP:
+		sctp_notify_peer_addr_change(stcb, SCTP_ADDR_REMOVED, data,
+		    error);
+		break;
+	case SCTP_NOTIFY_ASCONF_SET_PRIMARY:
+		sctp_notify_peer_addr_change(stcb, SCTP_ADDR_MADE_PRIM, data,
+		    error);
+		break;
+	case SCTP_NOTIFY_ASCONF_SUCCESS:
+		break;
+	case SCTP_NOTIFY_ASCONF_FAILED:
+		break;
+	case SCTP_NOTIFY_PEER_SHUTDOWN:
+		sctp_notify_shutdown_event(stcb);
+		break;
+	case SCTP_NOTIFY_AUTH_NEW_KEY:
+		sctp_notify_authentication(stcb, SCTP_AUTH_NEWKEY, error,
+		    (uint16_t) (uintptr_t) data);
+		break;
+#if 0
+	case SCTP_NOTIFY_AUTH_KEY_CONFLICT:
+		sctp_notify_authentication(stcb, SCTP_AUTH_KEY_CONFLICT,
+		    error, (uint16_t) (uintptr_t) data);
+		break;
+#endif				/* not yet? remove? */
+
+
+	default:
+		SCTPDBG(SCTP_DEBUG_UTIL1, "%s: unknown notification %xh (%u)\n",
+		    __FUNCTION__, notification, notification);
+		break;
+	}			/* end switch */
+}
+
+void
+sctp_report_all_outbound(struct sctp_tcb *stcb, int holds_lock, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	struct sctp_association *asoc;
+	struct sctp_stream_out *outs;
+	struct sctp_tmit_chunk *chk;
+	struct sctp_stream_queue_pending *sp;
+	int i;
+
+	asoc = &stcb->asoc;
+
+	if (stcb == NULL) {
+		return;
+	}
+	if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+	    (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET)) {
+		return;
+	}
+	/* now through all the gunk freeing chunks */
+	if (holds_lock == 0) {
+		SCTP_TCB_SEND_LOCK(stcb);
+	}
+	/* sent queue SHOULD be empty */
+	if (!TAILQ_EMPTY(&asoc->sent_queue)) {
+		chk = TAILQ_FIRST(&asoc->sent_queue);
+		while (chk) {
+			TAILQ_REMOVE(&asoc->sent_queue, chk, sctp_next);
+			asoc->sent_queue_cnt--;
+			if (chk->data) {
+				/*
+				 * trim off the sctp chunk header(it should
+				 * be there)
+				 */
+				if (chk->send_size >= sizeof(struct sctp_data_chunk)) {
+					m_adj(chk->data, sizeof(struct sctp_data_chunk));
+					sctp_mbuf_crush(chk->data);
+					chk->send_size -= sizeof(struct sctp_data_chunk);
+				}
+			}
+			sctp_free_bufspace(stcb, asoc, chk, 1);
+			sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb,
+			    SCTP_NOTIFY_DATAGRAM_SENT, chk, so_locked);
+			if (chk->data) {
+				sctp_m_freem(chk->data);
+				chk->data = NULL;
+			}
+			sctp_free_a_chunk(stcb, chk);
+			/* sa_ignore FREED_MEMORY */
+			chk = TAILQ_FIRST(&asoc->sent_queue);
+		}
+	}
+	/* pending send queue SHOULD be empty */
+	if (!TAILQ_EMPTY(&asoc->send_queue)) {
+		chk = TAILQ_FIRST(&asoc->send_queue);
+		while (chk) {
+			TAILQ_REMOVE(&asoc->send_queue, chk, sctp_next);
+			asoc->send_queue_cnt--;
+			if (chk->data) {
+				/*
+				 * trim off the sctp chunk header(it should
+				 * be there)
+				 */
+				if (chk->send_size >= sizeof(struct sctp_data_chunk)) {
+					m_adj(chk->data, sizeof(struct sctp_data_chunk));
+					sctp_mbuf_crush(chk->data);
+					chk->send_size -= sizeof(struct sctp_data_chunk);
+				}
+			}
+			sctp_free_bufspace(stcb, asoc, chk, 1);
+			sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb, SCTP_NOTIFY_DATAGRAM_UNSENT, chk, so_locked);
+			if (chk->data) {
+				sctp_m_freem(chk->data);
+				chk->data = NULL;
+			}
+			sctp_free_a_chunk(stcb, chk);
+			/* sa_ignore FREED_MEMORY */
+			chk = TAILQ_FIRST(&asoc->send_queue);
+		}
+	}
+	for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+		/* For each stream */
+		outs = &stcb->asoc.strmout[i];
+		/* clean up any sends there */
+		stcb->asoc.locked_on_sending = NULL;
+		sp = TAILQ_FIRST(&outs->outqueue);
+		while (sp) {
+			stcb->asoc.stream_queue_cnt--;
+			TAILQ_REMOVE(&outs->outqueue, sp, next);
+			sctp_free_spbufspace(stcb, asoc, sp);
+			sctp_ulp_notify(SCTP_NOTIFY_SPECIAL_SP_FAIL, stcb,
+			    SCTP_NOTIFY_DATAGRAM_UNSENT, (void *)sp, so_locked);
+			if (sp->data) {
+				sctp_m_freem(sp->data);
+				sp->data = NULL;
+			}
+			if (sp->net)
+				sctp_free_remote_addr(sp->net);
+			sp->net = NULL;
+			/* Free the chunk */
+			sctp_free_a_strmoq(stcb, sp);
+			/* sa_ignore FREED_MEMORY */
+			sp = TAILQ_FIRST(&outs->outqueue);
+		}
+	}
+
+	if (holds_lock == 0) {
+		SCTP_TCB_SEND_UNLOCK(stcb);
+	}
+}
+
+void
+sctp_abort_notification(struct sctp_tcb *stcb, int error, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+
+	if (stcb == NULL) {
+		return;
+	}
+	if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+	    (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET)) {
+		return;
+	}
+	/* Tell them we lost the asoc */
+	sctp_report_all_outbound(stcb, 1, so_locked);
+	if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+	    ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
+	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_CONNECTED))) {
+		stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_WAS_ABORTED;
+	}
+	sctp_ulp_notify(SCTP_NOTIFY_ASSOC_ABORTED, stcb, error, NULL, so_locked);
+}
+
+void
+sctp_abort_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct mbuf *m, int iphlen, struct sctphdr *sh, struct mbuf *op_err,
+    uint32_t vrf_id)
+{
+	uint32_t vtag;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	struct socket *so;
+
+#endif
+
+	vtag = 0;
+	if (stcb != NULL) {
+		/* We have a TCB to abort, send notification too */
+		vtag = stcb->asoc.peer_vtag;
+		sctp_abort_notification(stcb, 0, SCTP_SO_NOT_LOCKED);
+		/* get the assoc vrf id and table id */
+		vrf_id = stcb->asoc.vrf_id;
+		stcb->asoc.state |= SCTP_STATE_WAS_ABORTED;
+	}
+	sctp_send_abort(m, iphlen, sh, vtag, op_err, vrf_id);
+	if (stcb != NULL) {
+		/* Ok, now lets free it */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		so = SCTP_INP_SO(inp);
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_SOCKET_LOCK(so, 1);
+		SCTP_TCB_LOCK(stcb);
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTPUTIL + SCTP_LOC_4);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+	} else {
+		if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+			if (LIST_FIRST(&inp->sctp_asoc_list) == NULL) {
+				sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
+				    SCTP_CALLED_DIRECTLY_NOCMPSET);
+			}
+		}
+	}
+}
+
+#ifdef SCTP_ASOCLOG_OF_TSNS
+void
+sctp_print_out_track_log(struct sctp_tcb *stcb)
+{
+#ifdef NOSIY_PRINTS
+	int i;
+
+	SCTP_PRINTF("Last ep reason:%x\n", stcb->sctp_ep->last_abort_code);
+	SCTP_PRINTF("IN bound TSN log-aaa\n");
+	if ((stcb->asoc.tsn_in_at == 0) && (stcb->asoc.tsn_in_wrapped == 0)) {
+		SCTP_PRINTF("None rcvd\n");
+		goto none_in;
+	}
+	if (stcb->asoc.tsn_in_wrapped) {
+		for (i = stcb->asoc.tsn_in_at; i < SCTP_TSN_LOG_SIZE; i++) {
+			SCTP_PRINTF("TSN:%x strm:%d seq:%d flags:%x sz:%d\n",
+			    stcb->asoc.in_tsnlog[i].tsn,
+			    stcb->asoc.in_tsnlog[i].strm,
+			    stcb->asoc.in_tsnlog[i].seq,
+			    stcb->asoc.in_tsnlog[i].flgs,
+			    stcb->asoc.in_tsnlog[i].sz);
+		}
+	}
+	if (stcb->asoc.tsn_in_at) {
+		for (i = 0; i < stcb->asoc.tsn_in_at; i++) {
+			SCTP_PRINTF("TSN:%x strm:%d seq:%d flags:%x sz:%d\n",
+			    stcb->asoc.in_tsnlog[i].tsn,
+			    stcb->asoc.in_tsnlog[i].strm,
+			    stcb->asoc.in_tsnlog[i].seq,
+			    stcb->asoc.in_tsnlog[i].flgs,
+			    stcb->asoc.in_tsnlog[i].sz);
+		}
+	}
+none_in:
+	SCTP_PRINTF("OUT bound TSN log-aaa\n");
+	if ((stcb->asoc.tsn_out_at == 0) &&
+	    (stcb->asoc.tsn_out_wrapped == 0)) {
+		SCTP_PRINTF("None sent\n");
+	}
+	if (stcb->asoc.tsn_out_wrapped) {
+		for (i = stcb->asoc.tsn_out_at; i < SCTP_TSN_LOG_SIZE; i++) {
+			SCTP_PRINTF("TSN:%x strm:%d seq:%d flags:%x sz:%d\n",
+			    stcb->asoc.out_tsnlog[i].tsn,
+			    stcb->asoc.out_tsnlog[i].strm,
+			    stcb->asoc.out_tsnlog[i].seq,
+			    stcb->asoc.out_tsnlog[i].flgs,
+			    stcb->asoc.out_tsnlog[i].sz);
+		}
+	}
+	if (stcb->asoc.tsn_out_at) {
+		for (i = 0; i < stcb->asoc.tsn_out_at; i++) {
+			SCTP_PRINTF("TSN:%x strm:%d seq:%d flags:%x sz:%d\n",
+			    stcb->asoc.out_tsnlog[i].tsn,
+			    stcb->asoc.out_tsnlog[i].strm,
+			    stcb->asoc.out_tsnlog[i].seq,
+			    stcb->asoc.out_tsnlog[i].flgs,
+			    stcb->asoc.out_tsnlog[i].sz);
+		}
+	}
+#endif
+}
+
+#endif
+
+void
+sctp_abort_an_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    int error, struct mbuf *op_err,
+    int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	uint32_t vtag;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	struct socket *so;
+
+#endif
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	so = SCTP_INP_SO(inp);
+#endif
+	if (stcb == NULL) {
+		/* Got to have a TCB */
+		if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+			if (LIST_FIRST(&inp->sctp_asoc_list) == NULL) {
+				sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
+				    SCTP_CALLED_DIRECTLY_NOCMPSET);
+			}
+		}
+		return;
+	} else {
+		stcb->asoc.state |= SCTP_STATE_WAS_ABORTED;
+	}
+	vtag = stcb->asoc.peer_vtag;
+	/* notify the ulp */
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0)
+		sctp_abort_notification(stcb, error, so_locked);
+	/* notify the peer */
+#if defined(SCTP_PANIC_ON_ABORT)
+	panic("aborting an association");
+#endif
+	sctp_send_abort_tcb(stcb, op_err, so_locked);
+	SCTP_STAT_INCR_COUNTER32(sctps_aborted);
+	if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
+	    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+		SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+	}
+	/* now free the asoc */
+#ifdef SCTP_ASOCLOG_OF_TSNS
+	sctp_print_out_track_log(stcb);
+#endif
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	if (!so_locked) {
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_SOCKET_LOCK(so, 1);
+		SCTP_TCB_LOCK(stcb);
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+	}
+#endif
+	(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTPUTIL + SCTP_LOC_5);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	if (!so_locked) {
+		SCTP_SOCKET_UNLOCK(so, 1);
+	}
+#endif
+}
+
+void
+sctp_handle_ootb(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh,
+    struct sctp_inpcb *inp, struct mbuf *op_err, uint32_t vrf_id)
+{
+	struct sctp_chunkhdr *ch, chunk_buf;
+	unsigned int chk_length;
+
+	SCTP_STAT_INCR_COUNTER32(sctps_outoftheblue);
+	/* Generate a TO address for future reference */
+	if (inp && (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) {
+		if (LIST_FIRST(&inp->sctp_asoc_list) == NULL) {
+			sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
+			    SCTP_CALLED_DIRECTLY_NOCMPSET);
+		}
+	}
+	ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset,
+	    sizeof(*ch), (uint8_t *) & chunk_buf);
+	while (ch != NULL) {
+		chk_length = ntohs(ch->chunk_length);
+		if (chk_length < sizeof(*ch)) {
+			/* break to abort land */
+			break;
+		}
+		switch (ch->chunk_type) {
+		case SCTP_COOKIE_ECHO:
+			/* We hit here only if the assoc is being freed */
+			return;
+		case SCTP_PACKET_DROPPED:
+			/* we don't respond to pkt-dropped */
+			return;
+		case SCTP_ABORT_ASSOCIATION:
+			/* we don't respond with an ABORT to an ABORT */
+			return;
+		case SCTP_SHUTDOWN_COMPLETE:
+			/*
+			 * we ignore it since we are not waiting for it and
+			 * peer is gone
+			 */
+			return;
+		case SCTP_SHUTDOWN_ACK:
+			sctp_send_shutdown_complete2(m, iphlen, sh, vrf_id);
+			return;
+		default:
+			break;
+		}
+		offset += SCTP_SIZE32(chk_length);
+		ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset,
+		    sizeof(*ch), (uint8_t *) & chunk_buf);
+	}
+	sctp_send_abort(m, iphlen, sh, 0, op_err, vrf_id);
+}
+
+/*
+ * check the inbound datagram to make sure there is not an abort inside it,
+ * if there is return 1, else return 0.
+ */
+int
+sctp_is_there_an_abort_here(struct mbuf *m, int iphlen, uint32_t * vtagfill)
+{
+	struct sctp_chunkhdr *ch;
+	struct sctp_init_chunk *init_chk, chunk_buf;
+	int offset;
+	unsigned int chk_length;
+
+	offset = iphlen + sizeof(struct sctphdr);
+	ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset, sizeof(*ch),
+	    (uint8_t *) & chunk_buf);
+	while (ch != NULL) {
+		chk_length = ntohs(ch->chunk_length);
+		if (chk_length < sizeof(*ch)) {
+			/* packet is probably corrupt */
+			break;
+		}
+		/* we seem to be ok, is it an abort? */
+		if (ch->chunk_type == SCTP_ABORT_ASSOCIATION) {
+			/* yep, tell them */
+			return (1);
+		}
+		if (ch->chunk_type == SCTP_INITIATION) {
+			/* need to update the Vtag */
+			init_chk = (struct sctp_init_chunk *)sctp_m_getptr(m,
+			    offset, sizeof(*init_chk), (uint8_t *) & chunk_buf);
+			if (init_chk != NULL) {
+				*vtagfill = ntohl(init_chk->init.initiate_tag);
+			}
+		}
+		/* Nope, move to the next chunk */
+		offset += SCTP_SIZE32(chk_length);
+		ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset,
+		    sizeof(*ch), (uint8_t *) & chunk_buf);
+	}
+	return (0);
+}
+
+/*
+ * currently (2/02), ifa_addr embeds scope_id's and don't have sin6_scope_id
+ * set (i.e. it's 0) so, create this function to compare link local scopes
+ */
+uint32_t
+sctp_is_same_scope(struct sockaddr_in6 *addr1, struct sockaddr_in6 *addr2)
+{
+	struct sockaddr_in6 a, b;
+
+	/* save copies */
+	a = *addr1;
+	b = *addr2;
+
+	if (a.sin6_scope_id == 0)
+		if (sa6_recoverscope(&a)) {
+			/* can't get scope, so can't match */
+			return (0);
+		}
+	if (b.sin6_scope_id == 0)
+		if (sa6_recoverscope(&b)) {
+			/* can't get scope, so can't match */
+			return (0);
+		}
+	if (a.sin6_scope_id != b.sin6_scope_id)
+		return (0);
+
+	return (1);
+}
+
+/*
+ * returns a sockaddr_in6 with embedded scope recovered and removed
+ */
+struct sockaddr_in6 *
+sctp_recover_scope(struct sockaddr_in6 *addr, struct sockaddr_in6 *store)
+{
+	/* check and strip embedded scope junk */
+	if (addr->sin6_family == AF_INET6) {
+		if (IN6_IS_SCOPE_LINKLOCAL(&addr->sin6_addr)) {
+			if (addr->sin6_scope_id == 0) {
+				*store = *addr;
+				if (!sa6_recoverscope(store)) {
+					/* use the recovered scope */
+					addr = store;
+				}
+			} else {
+				/* else, return the original "to" addr */
+				in6_clearscope(&addr->sin6_addr);
+			}
+		}
+	}
+	return (addr);
+}
+
+/*
+ * are the two addresses the same?  currently a "scopeless" check returns: 1
+ * if same, 0 if not
+ */
+int
+sctp_cmpaddr(struct sockaddr *sa1, struct sockaddr *sa2)
+{
+
+	/* must be valid */
+	if (sa1 == NULL || sa2 == NULL)
+		return (0);
+
+	/* must be the same family */
+	if (sa1->sa_family != sa2->sa_family)
+		return (0);
+
+	if (sa1->sa_family == AF_INET6) {
+		/* IPv6 addresses */
+		struct sockaddr_in6 *sin6_1, *sin6_2;
+
+		sin6_1 = (struct sockaddr_in6 *)sa1;
+		sin6_2 = (struct sockaddr_in6 *)sa2;
+		return (SCTP6_ARE_ADDR_EQUAL(&sin6_1->sin6_addr,
+		    &sin6_2->sin6_addr));
+	} else if (sa1->sa_family == AF_INET) {
+		/* IPv4 addresses */
+		struct sockaddr_in *sin_1, *sin_2;
+
+		sin_1 = (struct sockaddr_in *)sa1;
+		sin_2 = (struct sockaddr_in *)sa2;
+		return (sin_1->sin_addr.s_addr == sin_2->sin_addr.s_addr);
+	} else {
+		/* we don't do these... */
+		return (0);
+	}
+}
+
+void
+sctp_print_address(struct sockaddr *sa)
+{
+	char ip6buf[INET6_ADDRSTRLEN];
+
+	ip6buf[0] = 0;
+	if (sa->sa_family == AF_INET6) {
+		struct sockaddr_in6 *sin6;
+
+		sin6 = (struct sockaddr_in6 *)sa;
+		SCTP_PRINTF("IPv6 address: %s:port:%d scope:%u\n",
+		    ip6_sprintf(ip6buf, &sin6->sin6_addr),
+		    ntohs(sin6->sin6_port),
+		    sin6->sin6_scope_id);
+	} else if (sa->sa_family == AF_INET) {
+		struct sockaddr_in *sin;
+		unsigned char *p;
+
+		sin = (struct sockaddr_in *)sa;
+		p = (unsigned char *)&sin->sin_addr;
+		SCTP_PRINTF("IPv4 address: %u.%u.%u.%u:%d\n",
+		    p[0], p[1], p[2], p[3], ntohs(sin->sin_port));
+	} else {
+		SCTP_PRINTF("?\n");
+	}
+}
+
+void
+sctp_print_address_pkt(struct ip *iph, struct sctphdr *sh)
+{
+	if (iph->ip_v == IPVERSION) {
+		struct sockaddr_in lsa, fsa;
+
+		bzero(&lsa, sizeof(lsa));
+		lsa.sin_len = sizeof(lsa);
+		lsa.sin_family = AF_INET;
+		lsa.sin_addr = iph->ip_src;
+		lsa.sin_port = sh->src_port;
+		bzero(&fsa, sizeof(fsa));
+		fsa.sin_len = sizeof(fsa);
+		fsa.sin_family = AF_INET;
+		fsa.sin_addr = iph->ip_dst;
+		fsa.sin_port = sh->dest_port;
+		SCTP_PRINTF("src: ");
+		sctp_print_address((struct sockaddr *)&lsa);
+		SCTP_PRINTF("dest: ");
+		sctp_print_address((struct sockaddr *)&fsa);
+	} else if (iph->ip_v == (IPV6_VERSION >> 4)) {
+		struct ip6_hdr *ip6;
+		struct sockaddr_in6 lsa6, fsa6;
+
+		ip6 = (struct ip6_hdr *)iph;
+		bzero(&lsa6, sizeof(lsa6));
+		lsa6.sin6_len = sizeof(lsa6);
+		lsa6.sin6_family = AF_INET6;
+		lsa6.sin6_addr = ip6->ip6_src;
+		lsa6.sin6_port = sh->src_port;
+		bzero(&fsa6, sizeof(fsa6));
+		fsa6.sin6_len = sizeof(fsa6);
+		fsa6.sin6_family = AF_INET6;
+		fsa6.sin6_addr = ip6->ip6_dst;
+		fsa6.sin6_port = sh->dest_port;
+		SCTP_PRINTF("src: ");
+		sctp_print_address((struct sockaddr *)&lsa6);
+		SCTP_PRINTF("dest: ");
+		sctp_print_address((struct sockaddr *)&fsa6);
+	}
+}
+
+void
+sctp_pull_off_control_to_new_inp(struct sctp_inpcb *old_inp,
+    struct sctp_inpcb *new_inp,
+    struct sctp_tcb *stcb,
+    int waitflags)
+{
+	/*
+	 * go through our old INP and pull off any control structures that
+	 * belong to stcb and move then to the new inp.
+	 */
+	struct socket *old_so, *new_so;
+	struct sctp_queued_to_read *control, *nctl;
+	struct sctp_readhead tmp_queue;
+	struct mbuf *m;
+	int error = 0;
+
+	old_so = old_inp->sctp_socket;
+	new_so = new_inp->sctp_socket;
+	TAILQ_INIT(&tmp_queue);
+	error = sblock(&old_so->so_rcv, waitflags);
+	if (error) {
+		/*
+		 * Gak, can't get sblock, we have a problem. data will be
+		 * left stranded.. and we don't dare look at it since the
+		 * other thread may be reading something. Oh well, its a
+		 * screwed up app that does a peeloff OR a accept while
+		 * reading from the main socket... actually its only the
+		 * peeloff() case, since I think read will fail on a
+		 * listening socket..
+		 */
+		return;
+	}
+	/* lock the socket buffers */
+	SCTP_INP_READ_LOCK(old_inp);
+	control = TAILQ_FIRST(&old_inp->read_queue);
+	/* Pull off all for out target stcb */
+	while (control) {
+		nctl = TAILQ_NEXT(control, next);
+		if (control->stcb == stcb) {
+			/* remove it we want it */
+			TAILQ_REMOVE(&old_inp->read_queue, control, next);
+			TAILQ_INSERT_TAIL(&tmp_queue, control, next);
+			m = control->data;
+			while (m) {
+				if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+					sctp_sblog(&old_so->so_rcv, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBFREE, SCTP_BUF_LEN(m));
+				}
+				sctp_sbfree(control, stcb, &old_so->so_rcv, m);
+				if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+					sctp_sblog(&old_so->so_rcv, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
+				}
+				m = SCTP_BUF_NEXT(m);
+			}
+		}
+		control = nctl;
+	}
+	SCTP_INP_READ_UNLOCK(old_inp);
+	/* Remove the sb-lock on the old socket */
+
+	sbunlock(&old_so->so_rcv);
+	/* Now we move them over to the new socket buffer */
+	control = TAILQ_FIRST(&tmp_queue);
+	SCTP_INP_READ_LOCK(new_inp);
+	while (control) {
+		nctl = TAILQ_NEXT(control, next);
+		TAILQ_INSERT_TAIL(&new_inp->read_queue, control, next);
+		m = control->data;
+		while (m) {
+			if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+				sctp_sblog(&new_so->so_rcv, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBALLOC, SCTP_BUF_LEN(m));
+			}
+			sctp_sballoc(stcb, &new_so->so_rcv, m);
+			if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+				sctp_sblog(&new_so->so_rcv, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
+			}
+			m = SCTP_BUF_NEXT(m);
+		}
+		control = nctl;
+	}
+	SCTP_INP_READ_UNLOCK(new_inp);
+}
+
+
+void
+sctp_add_to_readq(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_queued_to_read *control,
+    struct sockbuf *sb,
+    int end,
+    int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	/*
+	 * Here we must place the control on the end of the socket read
+	 * queue AND increment sb_cc so that select will work properly on
+	 * read.
+	 */
+	struct mbuf *m, *prev = NULL;
+
+	if (inp == NULL) {
+		/* Gak, TSNH!! */
+#ifdef INVARIANTS
+		panic("Gak, inp NULL on add_to_readq");
+#endif
+		return;
+	}
+	SCTP_INP_READ_LOCK(inp);
+	if (!(control->spec_flags & M_NOTIFICATION)) {
+		atomic_add_int(&inp->total_recvs, 1);
+		if (!control->do_not_ref_stcb) {
+			atomic_add_int(&stcb->total_recvs, 1);
+		}
+	}
+	m = control->data;
+	control->held_length = 0;
+	control->length = 0;
+	while (m) {
+		if (SCTP_BUF_LEN(m) == 0) {
+			/* Skip mbufs with NO length */
+			if (prev == NULL) {
+				/* First one */
+				control->data = sctp_m_free(m);
+				m = control->data;
+			} else {
+				SCTP_BUF_NEXT(prev) = sctp_m_free(m);
+				m = SCTP_BUF_NEXT(prev);
+			}
+			if (m == NULL) {
+				control->tail_mbuf = prev;;
+			}
+			continue;
+		}
+		prev = m;
+		if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+			sctp_sblog(sb, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBALLOC, SCTP_BUF_LEN(m));
+		}
+		sctp_sballoc(stcb, sb, m);
+		if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+			sctp_sblog(sb, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
+		}
+		atomic_add_int(&control->length, SCTP_BUF_LEN(m));
+		m = SCTP_BUF_NEXT(m);
+	}
+	if (prev != NULL) {
+		control->tail_mbuf = prev;
+	} else {
+		/* Everything got collapsed out?? */
+		return;
+	}
+	if (end) {
+		control->end_added = 1;
+	}
+	TAILQ_INSERT_TAIL(&inp->read_queue, control, next);
+	SCTP_INP_READ_UNLOCK(inp);
+	if (inp && inp->sctp_socket) {
+		if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE)) {
+			SCTP_ZERO_COPY_EVENT(inp, inp->sctp_socket);
+		} else {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			struct socket *so;
+
+			so = SCTP_INP_SO(inp);
+			if (!so_locked) {
+				atomic_add_int(&stcb->asoc.refcnt, 1);
+				SCTP_TCB_UNLOCK(stcb);
+				SCTP_SOCKET_LOCK(so, 1);
+				SCTP_TCB_LOCK(stcb);
+				atomic_subtract_int(&stcb->asoc.refcnt, 1);
+				if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+					SCTP_SOCKET_UNLOCK(so, 1);
+					return;
+				}
+			}
+#endif
+			sctp_sorwakeup(inp, inp->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			if (!so_locked) {
+				SCTP_SOCKET_UNLOCK(so, 1);
+			}
+#endif
+		}
+	}
+}
+
+
+int
+sctp_append_to_readq(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_queued_to_read *control,
+    struct mbuf *m,
+    int end,
+    int ctls_cumack,
+    struct sockbuf *sb)
+{
+	/*
+	 * A partial delivery API event is underway. OR we are appending on
+	 * the reassembly queue.
+	 * 
+	 * If PDAPI this means we need to add m to the end of the data.
+	 * Increase the length in the control AND increment the sb_cc.
+	 * Otherwise sb is NULL and all we need to do is put it at the end
+	 * of the mbuf chain.
+	 */
+	int len = 0;
+	struct mbuf *mm, *tail = NULL, *prev = NULL;
+
+	if (inp) {
+		SCTP_INP_READ_LOCK(inp);
+	}
+	if (control == NULL) {
+get_out:
+		if (inp) {
+			SCTP_INP_READ_UNLOCK(inp);
+		}
+		return (-1);
+	}
+	if (control->end_added) {
+		/* huh this one is complete? */
+		goto get_out;
+	}
+	mm = m;
+	if (mm == NULL) {
+		goto get_out;
+	}
+	while (mm) {
+		if (SCTP_BUF_LEN(mm) == 0) {
+			/* Skip mbufs with NO lenght */
+			if (prev == NULL) {
+				/* First one */
+				m = sctp_m_free(mm);
+				mm = m;
+			} else {
+				SCTP_BUF_NEXT(prev) = sctp_m_free(mm);
+				mm = SCTP_BUF_NEXT(prev);
+			}
+			continue;
+		}
+		prev = mm;
+		len += SCTP_BUF_LEN(mm);
+		if (sb) {
+			if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+				sctp_sblog(sb, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBALLOC, SCTP_BUF_LEN(mm));
+			}
+			sctp_sballoc(stcb, sb, mm);
+			if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+				sctp_sblog(sb, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
+			}
+		}
+		mm = SCTP_BUF_NEXT(mm);
+	}
+	if (prev) {
+		tail = prev;
+	} else {
+		/* Really there should always be a prev */
+		if (m == NULL) {
+			/* Huh nothing left? */
+#ifdef INVARIANTS
+			panic("Nothing left to add?");
+#else
+			goto get_out;
+#endif
+		}
+		tail = m;
+	}
+	if (control->tail_mbuf) {
+		/* append */
+		SCTP_BUF_NEXT(control->tail_mbuf) = m;
+		control->tail_mbuf = tail;
+	} else {
+		/* nothing there */
+#ifdef INVARIANTS
+		if (control->data != NULL) {
+			panic("This should NOT happen");
+		}
+#endif
+		control->data = m;
+		control->tail_mbuf = tail;
+	}
+	atomic_add_int(&control->length, len);
+	if (end) {
+		/* message is complete */
+		if (stcb && (control == stcb->asoc.control_pdapi)) {
+			stcb->asoc.control_pdapi = NULL;
+		}
+		control->held_length = 0;
+		control->end_added = 1;
+	}
+	if (stcb == NULL) {
+		control->do_not_ref_stcb = 1;
+	}
+	/*
+	 * When we are appending in partial delivery, the cum-ack is used
+	 * for the actual pd-api highest tsn on this mbuf. The true cum-ack
+	 * is populated in the outbound sinfo structure from the true cumack
+	 * if the association exists...
+	 */
+	control->sinfo_tsn = control->sinfo_cumtsn = ctls_cumack;
+	if (inp) {
+		SCTP_INP_READ_UNLOCK(inp);
+	}
+	if (inp && inp->sctp_socket) {
+		if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE)) {
+			SCTP_ZERO_COPY_EVENT(inp, inp->sctp_socket);
+		} else {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			struct socket *so;
+
+			so = SCTP_INP_SO(inp);
+			atomic_add_int(&stcb->asoc.refcnt, 1);
+			SCTP_TCB_UNLOCK(stcb);
+			SCTP_SOCKET_LOCK(so, 1);
+			SCTP_TCB_LOCK(stcb);
+			atomic_subtract_int(&stcb->asoc.refcnt, 1);
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+				SCTP_SOCKET_UNLOCK(so, 1);
+				return (0);
+			}
+#endif
+			sctp_sorwakeup(inp, inp->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+		}
+	}
+	return (0);
+}
+
+
+
+/*************HOLD THIS COMMENT FOR PATCH FILE OF
+ *************ALTERNATE ROUTING CODE
+ */
+
+/*************HOLD THIS COMMENT FOR END OF PATCH FILE OF
+ *************ALTERNATE ROUTING CODE
+ */
+
+struct mbuf *
+sctp_generate_invmanparam(int err)
+{
+	/* Return a MBUF with a invalid mandatory parameter */
+	struct mbuf *m;
+
+	m = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_DONTWAIT, 1, MT_DATA);
+	if (m) {
+		struct sctp_paramhdr *ph;
+
+		SCTP_BUF_LEN(m) = sizeof(struct sctp_paramhdr);
+		ph = mtod(m, struct sctp_paramhdr *);
+		ph->param_length = htons(sizeof(struct sctp_paramhdr));
+		ph->param_type = htons(err);
+	}
+	return (m);
+}
+
+#ifdef SCTP_MBCNT_LOGGING
+void
+sctp_free_bufspace(struct sctp_tcb *stcb, struct sctp_association *asoc,
+    struct sctp_tmit_chunk *tp1, int chk_cnt)
+{
+	if (tp1->data == NULL) {
+		return;
+	}
+	asoc->chunks_on_out_queue -= chk_cnt;
+	if (sctp_logging_level & SCTP_MBCNT_LOGGING_ENABLE) {
+		sctp_log_mbcnt(SCTP_LOG_MBCNT_DECREASE,
+		    asoc->total_output_queue_size,
+		    tp1->book_size,
+		    0,
+		    tp1->mbcnt);
+	}
+	if (asoc->total_output_queue_size >= tp1->book_size) {
+		atomic_add_int(&asoc->total_output_queue_size, -tp1->book_size);
+	} else {
+		asoc->total_output_queue_size = 0;
+	}
+
+	if (stcb->sctp_socket && (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) ||
+	    ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE)))) {
+		if (stcb->sctp_socket->so_snd.sb_cc >= tp1->book_size) {
+			stcb->sctp_socket->so_snd.sb_cc -= tp1->book_size;
+		} else {
+			stcb->sctp_socket->so_snd.sb_cc = 0;
+
+		}
+	}
+}
+
+#endif
+
+int
+sctp_release_pr_sctp_chunk(struct sctp_tcb *stcb, struct sctp_tmit_chunk *tp1,
+    int reason, struct sctpchunk_listhead *queue, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	int ret_sz = 0;
+	int notdone;
+	uint8_t foundeom = 0;
+
+	do {
+		ret_sz += tp1->book_size;
+		tp1->sent = SCTP_FORWARD_TSN_SKIP;
+		if (tp1->data) {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			struct socket *so;
+
+#endif
+			sctp_free_bufspace(stcb, &stcb->asoc, tp1, 1);
+			sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb, reason, tp1, SCTP_SO_NOT_LOCKED);
+			sctp_m_freem(tp1->data);
+			tp1->data = NULL;
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			so = SCTP_INP_SO(stcb->sctp_ep);
+			if (!so_locked) {
+				atomic_add_int(&stcb->asoc.refcnt, 1);
+				SCTP_TCB_UNLOCK(stcb);
+				SCTP_SOCKET_LOCK(so, 1);
+				SCTP_TCB_LOCK(stcb);
+				atomic_subtract_int(&stcb->asoc.refcnt, 1);
+				if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+					/*
+					 * assoc was freed while we were
+					 * unlocked
+					 */
+					SCTP_SOCKET_UNLOCK(so, 1);
+					return (ret_sz);
+				}
+			}
+#endif
+			sctp_sowwakeup(stcb->sctp_ep, stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			if (!so_locked) {
+				SCTP_SOCKET_UNLOCK(so, 1);
+			}
+#endif
+		}
+		if (PR_SCTP_BUF_ENABLED(tp1->flags)) {
+			stcb->asoc.sent_queue_cnt_removeable--;
+		}
+		if (queue == &stcb->asoc.send_queue) {
+			TAILQ_REMOVE(&stcb->asoc.send_queue, tp1, sctp_next);
+			/* on to the sent queue */
+			TAILQ_INSERT_TAIL(&stcb->asoc.sent_queue, tp1,
+			    sctp_next);
+			stcb->asoc.sent_queue_cnt++;
+		}
+		if ((tp1->rec.data.rcv_flags & SCTP_DATA_NOT_FRAG) ==
+		    SCTP_DATA_NOT_FRAG) {
+			/* not frag'ed we ae done   */
+			notdone = 0;
+			foundeom = 1;
+		} else if (tp1->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG) {
+			/* end of frag, we are done */
+			notdone = 0;
+			foundeom = 1;
+		} else {
+			/*
+			 * Its a begin or middle piece, we must mark all of
+			 * it
+			 */
+			notdone = 1;
+			tp1 = TAILQ_NEXT(tp1, sctp_next);
+		}
+	} while (tp1 && notdone);
+	if ((foundeom == 0) && (queue == &stcb->asoc.sent_queue)) {
+		/*
+		 * The multi-part message was scattered across the send and
+		 * sent queue.
+		 */
+		tp1 = TAILQ_FIRST(&stcb->asoc.send_queue);
+		/*
+		 * recurse throught the send_queue too, starting at the
+		 * beginning.
+		 */
+		if (tp1) {
+			ret_sz += sctp_release_pr_sctp_chunk(stcb, tp1, reason,
+			    &stcb->asoc.send_queue, so_locked);
+		} else {
+			SCTP_PRINTF("hmm, nothing on the send queue and no EOM?\n");
+		}
+	}
+	return (ret_sz);
+}
+
+/*
+ * checks to see if the given address, sa, is one that is currently known by
+ * the kernel note: can't distinguish the same address on multiple interfaces
+ * and doesn't handle multiple addresses with different zone/scope id's note:
+ * ifa_ifwithaddr() compares the entire sockaddr struct
+ */
+struct sctp_ifa *
+sctp_find_ifa_in_ep(struct sctp_inpcb *inp, struct sockaddr *addr,
+    int holds_lock)
+{
+	struct sctp_laddr *laddr;
+
+	if (holds_lock == 0) {
+		SCTP_INP_RLOCK(inp);
+	}
+	LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+		if (laddr->ifa == NULL)
+			continue;
+		if (addr->sa_family != laddr->ifa->address.sa.sa_family)
+			continue;
+		if (addr->sa_family == AF_INET) {
+			if (((struct sockaddr_in *)addr)->sin_addr.s_addr ==
+			    laddr->ifa->address.sin.sin_addr.s_addr) {
+				/* found him. */
+				if (holds_lock == 0) {
+					SCTP_INP_RUNLOCK(inp);
+				}
+				return (laddr->ifa);
+				break;
+			}
+		} else if (addr->sa_family == AF_INET6) {
+			if (SCTP6_ARE_ADDR_EQUAL(&((struct sockaddr_in6 *)addr)->sin6_addr,
+			    &laddr->ifa->address.sin6.sin6_addr)) {
+				/* found him. */
+				if (holds_lock == 0) {
+					SCTP_INP_RUNLOCK(inp);
+				}
+				return (laddr->ifa);
+				break;
+			}
+		}
+	}
+	if (holds_lock == 0) {
+		SCTP_INP_RUNLOCK(inp);
+	}
+	return (NULL);
+}
+
+uint32_t
+sctp_get_ifa_hash_val(struct sockaddr *addr)
+{
+	if (addr->sa_family == AF_INET) {
+		struct sockaddr_in *sin;
+
+		sin = (struct sockaddr_in *)addr;
+		return (sin->sin_addr.s_addr ^ (sin->sin_addr.s_addr >> 16));
+	} else if (addr->sa_family == AF_INET6) {
+		struct sockaddr_in6 *sin6;
+		uint32_t hash_of_addr;
+
+		sin6 = (struct sockaddr_in6 *)addr;
+		hash_of_addr = (sin6->sin6_addr.s6_addr32[0] +
+		    sin6->sin6_addr.s6_addr32[1] +
+		    sin6->sin6_addr.s6_addr32[2] +
+		    sin6->sin6_addr.s6_addr32[3]);
+		hash_of_addr = (hash_of_addr ^ (hash_of_addr >> 16));
+		return (hash_of_addr);
+	}
+	return (0);
+}
+
+struct sctp_ifa *
+sctp_find_ifa_by_addr(struct sockaddr *addr, uint32_t vrf_id, int holds_lock)
+{
+	struct sctp_ifa *sctp_ifap;
+	struct sctp_vrf *vrf;
+	struct sctp_ifalist *hash_head;
+	uint32_t hash_of_addr;
+
+	if (holds_lock == 0)
+		SCTP_IPI_ADDR_RLOCK();
+
+	vrf = sctp_find_vrf(vrf_id);
+	if (vrf == NULL) {
+		if (holds_lock == 0)
+			SCTP_IPI_ADDR_RUNLOCK();
+		return (NULL);
+	}
+	hash_of_addr = sctp_get_ifa_hash_val(addr);
+
+	hash_head = &vrf->vrf_addr_hash[(hash_of_addr & vrf->vrf_addr_hashmark)];
+	if (hash_head == NULL) {
+		SCTP_PRINTF("hash_of_addr:%x mask:%x table:%x - ",
+		    hash_of_addr, (uint32_t) vrf->vrf_addr_hashmark,
+		    (uint32_t) (hash_of_addr & vrf->vrf_addr_hashmark));
+		sctp_print_address(addr);
+		SCTP_PRINTF("No such bucket for address\n");
+		if (holds_lock == 0)
+			SCTP_IPI_ADDR_RUNLOCK();
+
+		return (NULL);
+	}
+	LIST_FOREACH(sctp_ifap, hash_head, next_bucket) {
+		if (sctp_ifap == NULL) {
+			panic("Huh LIST_FOREACH corrupt");
+		}
+		if (addr->sa_family != sctp_ifap->address.sa.sa_family)
+			continue;
+		if (addr->sa_family == AF_INET) {
+			if (((struct sockaddr_in *)addr)->sin_addr.s_addr ==
+			    sctp_ifap->address.sin.sin_addr.s_addr) {
+				/* found him. */
+				if (holds_lock == 0)
+					SCTP_IPI_ADDR_RUNLOCK();
+				return (sctp_ifap);
+				break;
+			}
+		} else if (addr->sa_family == AF_INET6) {
+			if (SCTP6_ARE_ADDR_EQUAL(&((struct sockaddr_in6 *)addr)->sin6_addr,
+			    &sctp_ifap->address.sin6.sin6_addr)) {
+				/* found him. */
+				if (holds_lock == 0)
+					SCTP_IPI_ADDR_RUNLOCK();
+				return (sctp_ifap);
+				break;
+			}
+		}
+	}
+	if (holds_lock == 0)
+		SCTP_IPI_ADDR_RUNLOCK();
+	return (NULL);
+}
+
+static void
+sctp_user_rcvd(struct sctp_tcb *stcb, uint32_t * freed_so_far, int hold_rlock,
+    uint32_t rwnd_req)
+{
+	/* User pulled some data, do we need a rwnd update? */
+	int r_unlocked = 0;
+	uint32_t dif, rwnd;
+	struct socket *so = NULL;
+
+	if (stcb == NULL)
+		return;
+
+	atomic_add_int(&stcb->asoc.refcnt, 1);
+
+	if (stcb->asoc.state & (SCTP_STATE_ABOUT_TO_BE_FREED |
+	    SCTP_STATE_SHUTDOWN_RECEIVED |
+	    SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+		/* Pre-check If we are freeing no update */
+		goto no_lock;
+	}
+	SCTP_INP_INCR_REF(stcb->sctp_ep);
+	if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE)) {
+		goto out;
+	}
+	so = stcb->sctp_socket;
+	if (so == NULL) {
+		goto out;
+	}
+	atomic_add_int(&stcb->freed_by_sorcv_sincelast, *freed_so_far);
+	/* Have you have freed enough to look */
+	*freed_so_far = 0;
+	/* Yep, its worth a look and the lock overhead */
+
+	/* Figure out what the rwnd would be */
+	rwnd = sctp_calc_rwnd(stcb, &stcb->asoc);
+	if (rwnd >= stcb->asoc.my_last_reported_rwnd) {
+		dif = rwnd - stcb->asoc.my_last_reported_rwnd;
+	} else {
+		dif = 0;
+	}
+	if (dif >= rwnd_req) {
+		if (hold_rlock) {
+			SCTP_INP_READ_UNLOCK(stcb->sctp_ep);
+			r_unlocked = 1;
+		}
+		if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+			/*
+			 * One last check before we allow the guy possibly
+			 * to get in. There is a race, where the guy has not
+			 * reached the gate. In that case
+			 */
+			goto out;
+		}
+		SCTP_TCB_LOCK(stcb);
+		if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+			/* No reports here */
+			SCTP_TCB_UNLOCK(stcb);
+			goto out;
+		}
+		SCTP_STAT_INCR(sctps_wu_sacks_sent);
+		sctp_send_sack(stcb);
+		sctp_chunk_output(stcb->sctp_ep, stcb,
+		    SCTP_OUTPUT_FROM_USR_RCVD, SCTP_SO_LOCKED);
+		/* make sure no timer is running */
+		sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTPUTIL + SCTP_LOC_6);
+		SCTP_TCB_UNLOCK(stcb);
+	} else {
+		/* Update how much we have pending */
+		stcb->freed_by_sorcv_sincelast = dif;
+	}
+out:
+	if (so && r_unlocked && hold_rlock) {
+		SCTP_INP_READ_LOCK(stcb->sctp_ep);
+	}
+	SCTP_INP_DECR_REF(stcb->sctp_ep);
+no_lock:
+	atomic_add_int(&stcb->asoc.refcnt, -1);
+	return;
+}
+
+int
+sctp_sorecvmsg(struct socket *so,
+    struct uio *uio,
+    struct mbuf **mp,
+    struct sockaddr *from,
+    int fromlen,
+    int *msg_flags,
+    struct sctp_sndrcvinfo *sinfo,
+    int filling_sinfo)
+{
+	/*
+	 * MSG flags we will look at MSG_DONTWAIT - non-blocking IO.
+	 * MSG_PEEK - Look don't touch :-D (only valid with OUT mbuf copy
+	 * mp=NULL thus uio is the copy method to userland) MSG_WAITALL - ??
+	 * On the way out we may send out any combination of:
+	 * MSG_NOTIFICATION MSG_EOR
+	 * 
+	 */
+	struct sctp_inpcb *inp = NULL;
+	int my_len = 0;
+	int cp_len = 0, error = 0;
+	struct sctp_queued_to_read *control = NULL, *ctl = NULL, *nxt = NULL;
+	struct mbuf *m = NULL, *embuf = NULL;
+	struct sctp_tcb *stcb = NULL;
+	int wakeup_read_socket = 0;
+	int freecnt_applied = 0;
+	int out_flags = 0, in_flags = 0;
+	int block_allowed = 1;
+	uint32_t freed_so_far = 0;
+	uint32_t copied_so_far = 0;
+	int in_eeor_mode = 0;
+	int no_rcv_needed = 0;
+	uint32_t rwnd_req = 0;
+	int hold_sblock = 0;
+	int hold_rlock = 0;
+	int slen = 0;
+	uint32_t held_length = 0;
+	int sockbuf_lock = 0;
+
+	if (uio == NULL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+		return (EINVAL);
+	}
+	if (msg_flags) {
+		in_flags = *msg_flags;
+		if (in_flags & MSG_PEEK)
+			SCTP_STAT_INCR(sctps_read_peeks);
+	} else {
+		in_flags = 0;
+	}
+	slen = uio->uio_resid;
+
+	/* Pull in and set up our int flags */
+	if (in_flags & MSG_OOB) {
+		/* Out of band's NOT supported */
+		return (EOPNOTSUPP);
+	}
+	if ((in_flags & MSG_PEEK) && (mp != NULL)) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+		return (EINVAL);
+	}
+	if ((in_flags & (MSG_DONTWAIT
+	    | MSG_NBIO
+	    )) ||
+	    SCTP_SO_IS_NBIO(so)) {
+		block_allowed = 0;
+	}
+	/* setup the endpoint */
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp == NULL) {
+		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, EFAULT);
+		return (EFAULT);
+	}
+	rwnd_req = (SCTP_SB_LIMIT_RCV(so) >> SCTP_RWND_HIWAT_SHIFT);
+	/* Must be at least a MTU's worth */
+	if (rwnd_req < SCTP_MIN_RWND)
+		rwnd_req = SCTP_MIN_RWND;
+	in_eeor_mode = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR);
+	if (sctp_logging_level & SCTP_RECV_RWND_LOGGING_ENABLE) {
+		sctp_misc_ints(SCTP_SORECV_ENTER,
+		    rwnd_req, in_eeor_mode, so->so_rcv.sb_cc, uio->uio_resid);
+	}
+	if (sctp_logging_level & SCTP_RECV_RWND_LOGGING_ENABLE) {
+		sctp_misc_ints(SCTP_SORECV_ENTERPL,
+		    rwnd_req, block_allowed, so->so_rcv.sb_cc, uio->uio_resid);
+	}
+	error = sblock(&so->so_rcv, (block_allowed ? SBL_WAIT : 0));
+	sockbuf_lock = 1;
+	if (error) {
+		goto release_unlocked;
+	}
+restart:
+
+
+restart_nosblocks:
+	if (hold_sblock == 0) {
+		SOCKBUF_LOCK(&so->so_rcv);
+		hold_sblock = 1;
+	}
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE)) {
+		goto out;
+	}
+	if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
+		if (so->so_error) {
+			error = so->so_error;
+			if ((in_flags & MSG_PEEK) == 0)
+				so->so_error = 0;
+			goto out;
+		} else {
+			if (so->so_rcv.sb_cc == 0) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOTCONN);
+				/* indicate EOF */
+				error = 0;
+				goto out;
+			}
+		}
+	}
+	if ((so->so_rcv.sb_cc <= held_length) && block_allowed) {
+		/* we need to wait for data */
+		if ((so->so_rcv.sb_cc == 0) &&
+		    ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+		    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) {
+			if ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) == 0) {
+				/*
+				 * For active open side clear flags for
+				 * re-use passive open is blocked by
+				 * connect.
+				 */
+				if (inp->sctp_flags & SCTP_PCB_FLAGS_WAS_ABORTED) {
+					/*
+					 * You were aborted, passive side
+					 * always hits here
+					 */
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, ECONNRESET);
+					error = ECONNRESET;
+					/*
+					 * You get this once if you are
+					 * active open side
+					 */
+					if (!(inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+						/*
+						 * Remove flag if on the
+						 * active open side
+						 */
+						inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAS_ABORTED;
+					}
+				}
+				so->so_state &= ~(SS_ISCONNECTING |
+				    SS_ISDISCONNECTING |
+				    SS_ISCONFIRMING |
+				    SS_ISCONNECTED);
+				if (error == 0) {
+					if ((inp->sctp_flags & SCTP_PCB_FLAGS_WAS_CONNECTED) == 0) {
+						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOTCONN);
+						error = ENOTCONN;
+					} else {
+						inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAS_CONNECTED;
+					}
+				}
+				goto out;
+			}
+		}
+		error = sbwait(&so->so_rcv);
+		if (error) {
+			goto out;
+		}
+		held_length = 0;
+		goto restart_nosblocks;
+	} else if (so->so_rcv.sb_cc == 0) {
+		if (so->so_error) {
+			error = so->so_error;
+			if ((in_flags & MSG_PEEK) == 0)
+				so->so_error = 0;
+		} else {
+			if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+			    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+				if ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) == 0) {
+					/*
+					 * For active open side clear flags
+					 * for re-use passive open is
+					 * blocked by connect.
+					 */
+					if (inp->sctp_flags & SCTP_PCB_FLAGS_WAS_ABORTED) {
+						/*
+						 * You were aborted, passive
+						 * side always hits here
+						 */
+						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, ECONNRESET);
+						error = ECONNRESET;
+						/*
+						 * You get this once if you
+						 * are active open side
+						 */
+						if (!(inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+							/*
+							 * Remove flag if on
+							 * the active open
+							 * side
+							 */
+							inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAS_ABORTED;
+						}
+					}
+					so->so_state &= ~(SS_ISCONNECTING |
+					    SS_ISDISCONNECTING |
+					    SS_ISCONFIRMING |
+					    SS_ISCONNECTED);
+					if (error == 0) {
+						if ((inp->sctp_flags & SCTP_PCB_FLAGS_WAS_CONNECTED) == 0) {
+							SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOTCONN);
+							error = ENOTCONN;
+						} else {
+							inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAS_CONNECTED;
+						}
+					}
+					goto out;
+				}
+			}
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EWOULDBLOCK);
+			error = EWOULDBLOCK;
+		}
+		goto out;
+	}
+	if (hold_sblock == 1) {
+		SOCKBUF_UNLOCK(&so->so_rcv);
+		hold_sblock = 0;
+	}
+	/* we possibly have data we can read */
+	/* sa_ignore FREED_MEMORY */
+	control = TAILQ_FIRST(&inp->read_queue);
+	if (control == NULL) {
+		/*
+		 * This could be happening since the appender did the
+		 * increment but as not yet did the tailq insert onto the
+		 * read_queue
+		 */
+		if (hold_rlock == 0) {
+			SCTP_INP_READ_LOCK(inp);
+			hold_rlock = 1;
+		}
+		control = TAILQ_FIRST(&inp->read_queue);
+		if ((control == NULL) && (so->so_rcv.sb_cc != 0)) {
+#ifdef INVARIANTS
+			panic("Huh, its non zero and nothing on control?");
+#endif
+			so->so_rcv.sb_cc = 0;
+		}
+		SCTP_INP_READ_UNLOCK(inp);
+		hold_rlock = 0;
+		goto restart;
+	}
+	if ((control->length == 0) &&
+	    (control->do_not_ref_stcb)) {
+		/*
+		 * Clean up code for freeing assoc that left behind a
+		 * pdapi.. maybe a peer in EEOR that just closed after
+		 * sending and never indicated a EOR.
+		 */
+		if (hold_rlock == 0) {
+			hold_rlock = 1;
+			SCTP_INP_READ_LOCK(inp);
+		}
+		control->held_length = 0;
+		if (control->data) {
+			/* Hmm there is data here .. fix */
+			struct mbuf *m_tmp;
+			int cnt = 0;
+
+			m_tmp = control->data;
+			while (m_tmp) {
+				cnt += SCTP_BUF_LEN(m_tmp);
+				if (SCTP_BUF_NEXT(m_tmp) == NULL) {
+					control->tail_mbuf = m_tmp;
+					control->end_added = 1;
+				}
+				m_tmp = SCTP_BUF_NEXT(m_tmp);
+			}
+			control->length = cnt;
+		} else {
+			/* remove it */
+			TAILQ_REMOVE(&inp->read_queue, control, next);
+			/* Add back any hiddend data */
+			sctp_free_remote_addr(control->whoFrom);
+			sctp_free_a_readq(stcb, control);
+		}
+		if (hold_rlock) {
+			hold_rlock = 0;
+			SCTP_INP_READ_UNLOCK(inp);
+		}
+		goto restart;
+	}
+	if (control->length == 0) {
+		if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE)) &&
+		    (filling_sinfo)) {
+			/* find a more suitable one then this */
+			ctl = TAILQ_NEXT(control, next);
+			while (ctl) {
+				if ((ctl->stcb != control->stcb) && (ctl->length) &&
+				    (ctl->some_taken ||
+				    (ctl->spec_flags & M_NOTIFICATION) ||
+				    ((ctl->do_not_ref_stcb == 0) &&
+				    (ctl->stcb->asoc.strmin[ctl->sinfo_stream].delivery_started == 0)))
+				    ) {
+					/*-
+					 * If we have a different TCB next, and there is data
+					 * present. If we have already taken some (pdapi), OR we can
+					 * ref the tcb and no delivery as started on this stream, we
+					 * take it. Note we allow a notification on a different
+					 * assoc to be delivered..
+					 */
+					control = ctl;
+					goto found_one;
+				} else if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS)) &&
+					    (ctl->length) &&
+					    ((ctl->some_taken) ||
+					    ((ctl->do_not_ref_stcb == 0) &&
+					    ((ctl->spec_flags & M_NOTIFICATION) == 0) &&
+					    (ctl->stcb->asoc.strmin[ctl->sinfo_stream].delivery_started == 0)))
+				    ) {
+					/*-
+					 * If we have the same tcb, and there is data present, and we
+					 * have the strm interleave feature present. Then if we have
+					 * taken some (pdapi) or we can refer to tht tcb AND we have
+					 * not started a delivery for this stream, we can take it.
+					 * Note we do NOT allow a notificaiton on the same assoc to
+					 * be delivered.
+					 */
+					control = ctl;
+					goto found_one;
+				}
+				ctl = TAILQ_NEXT(ctl, next);
+			}
+		}
+		/*
+		 * if we reach here, not suitable replacement is available
+		 * <or> fragment interleave is NOT on. So stuff the sb_cc
+		 * into the our held count, and its time to sleep again.
+		 */
+		held_length = so->so_rcv.sb_cc;
+		control->held_length = so->so_rcv.sb_cc;
+		goto restart;
+	}
+	/* Clear the held length since there is something to read */
+	control->held_length = 0;
+	if (hold_rlock) {
+		SCTP_INP_READ_UNLOCK(inp);
+		hold_rlock = 0;
+	}
+found_one:
+	/*
+	 * If we reach here, control has a some data for us to read off.
+	 * Note that stcb COULD be NULL.
+	 */
+	control->some_taken++;
+	if (hold_sblock) {
+		SOCKBUF_UNLOCK(&so->so_rcv);
+		hold_sblock = 0;
+	}
+	stcb = control->stcb;
+	if (stcb) {
+		if ((control->do_not_ref_stcb == 0) &&
+		    (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED)) {
+			if (freecnt_applied == 0)
+				stcb = NULL;
+		} else if (control->do_not_ref_stcb == 0) {
+			/* you can't free it on me please */
+			/*
+			 * The lock on the socket buffer protects us so the
+			 * free code will stop. But since we used the
+			 * socketbuf lock and the sender uses the tcb_lock
+			 * to increment, we need to use the atomic add to
+			 * the refcnt
+			 */
+			if (freecnt_applied) {
+#ifdef INVARIANTS
+				panic("refcnt already incremented");
+#else
+				printf("refcnt already incremented?\n");
+#endif
+			} else {
+				atomic_add_int(&stcb->asoc.refcnt, 1);
+				freecnt_applied = 1;
+			}
+			/*
+			 * Setup to remember how much we have not yet told
+			 * the peer our rwnd has opened up. Note we grab the
+			 * value from the tcb from last time. Note too that
+			 * sack sending clears this when a sack is sent,
+			 * which is fine. Once we hit the rwnd_req, we then
+			 * will go to the sctp_user_rcvd() that will not
+			 * lock until it KNOWs it MUST send a WUP-SACK.
+			 */
+			freed_so_far = stcb->freed_by_sorcv_sincelast;
+			stcb->freed_by_sorcv_sincelast = 0;
+		}
+	}
+	if (stcb &&
+	    ((control->spec_flags & M_NOTIFICATION) == 0) &&
+	    control->do_not_ref_stcb == 0) {
+		stcb->asoc.strmin[control->sinfo_stream].delivery_started = 1;
+	}
+	/* First lets get off the sinfo and sockaddr info */
+	if ((sinfo) && filling_sinfo) {
+		memcpy(sinfo, control, sizeof(struct sctp_nonpad_sndrcvinfo));
+		nxt = TAILQ_NEXT(control, next);
+		if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO)) {
+			struct sctp_extrcvinfo *s_extra;
+
+			s_extra = (struct sctp_extrcvinfo *)sinfo;
+			if ((nxt) &&
+			    (nxt->length)) {
+				s_extra->sreinfo_next_flags = SCTP_NEXT_MSG_AVAIL;
+				if (nxt->sinfo_flags & SCTP_UNORDERED) {
+					s_extra->sreinfo_next_flags |= SCTP_NEXT_MSG_IS_UNORDERED;
+				}
+				if (nxt->spec_flags & M_NOTIFICATION) {
+					s_extra->sreinfo_next_flags |= SCTP_NEXT_MSG_IS_NOTIFICATION;
+				}
+				s_extra->sreinfo_next_aid = nxt->sinfo_assoc_id;
+				s_extra->sreinfo_next_length = nxt->length;
+				s_extra->sreinfo_next_ppid = nxt->sinfo_ppid;
+				s_extra->sreinfo_next_stream = nxt->sinfo_stream;
+				if (nxt->tail_mbuf != NULL) {
+					if (nxt->end_added) {
+						s_extra->sreinfo_next_flags |= SCTP_NEXT_MSG_ISCOMPLETE;
+					}
+				}
+			} else {
+				/*
+				 * we explicitly 0 this, since the memcpy
+				 * got some other things beyond the older
+				 * sinfo_ that is on the control's structure
+				 * :-D
+				 */
+				nxt = NULL;
+				s_extra->sreinfo_next_flags = SCTP_NO_NEXT_MSG;
+				s_extra->sreinfo_next_aid = 0;
+				s_extra->sreinfo_next_length = 0;
+				s_extra->sreinfo_next_ppid = 0;
+				s_extra->sreinfo_next_stream = 0;
+			}
+		}
+		/*
+		 * update off the real current cum-ack, if we have an stcb.
+		 */
+		if ((control->do_not_ref_stcb == 0) && stcb)
+			sinfo->sinfo_cumtsn = stcb->asoc.cumulative_tsn;
+		/*
+		 * mask off the high bits, we keep the actual chunk bits in
+		 * there.
+		 */
+		sinfo->sinfo_flags &= 0x00ff;
+		if ((control->sinfo_flags >> 8) & SCTP_DATA_UNORDERED) {
+			sinfo->sinfo_flags |= SCTP_UNORDERED;
+		}
+	}
+#ifdef SCTP_ASOCLOG_OF_TSNS
+	{
+		int index, newindex;
+		struct sctp_pcbtsn_rlog *entry;
+
+		do {
+			index = inp->readlog_index;
+			newindex = index + 1;
+			if (newindex >= SCTP_READ_LOG_SIZE) {
+				newindex = 0;
+			}
+		} while (atomic_cmpset_int(&inp->readlog_index, index, newindex) == 0);
+		entry = &inp->readlog[index];
+		entry->vtag = control->sinfo_assoc_id;
+		entry->strm = control->sinfo_stream;
+		entry->seq = control->sinfo_ssn;
+		entry->sz = control->length;
+		entry->flgs = control->sinfo_flags;
+	}
+#endif
+	if (fromlen && from) {
+		struct sockaddr *to;
+
+#ifdef INET
+		cp_len = min((size_t)fromlen, (size_t)control->whoFrom->ro._l_addr.sin.sin_len);
+		memcpy(from, &control->whoFrom->ro._l_addr, cp_len);
+		((struct sockaddr_in *)from)->sin_port = control->port_from;
+#else
+		/* No AF_INET use AF_INET6 */
+		cp_len = min((size_t)fromlen, (size_t)control->whoFrom->ro._l_addr.sin6.sin6_len);
+		memcpy(from, &control->whoFrom->ro._l_addr, cp_len);
+		((struct sockaddr_in6 *)from)->sin6_port = control->port_from;
+#endif
+
+		to = from;
+#if defined(INET) && defined(INET6)
+		if ((inp->sctp_flags & SCTP_PCB_FLAGS_NEEDS_MAPPED_V4) &&
+		    (to->sa_family == AF_INET) &&
+		    ((size_t)fromlen >= sizeof(struct sockaddr_in6))) {
+			struct sockaddr_in *sin;
+			struct sockaddr_in6 sin6;
+
+			sin = (struct sockaddr_in *)to;
+			bzero(&sin6, sizeof(sin6));
+			sin6.sin6_family = AF_INET6;
+			sin6.sin6_len = sizeof(struct sockaddr_in6);
+			sin6.sin6_addr.s6_addr16[2] = 0xffff;
+			bcopy(&sin->sin_addr,
+			    &sin6.sin6_addr.s6_addr16[3],
+			    sizeof(sin6.sin6_addr.s6_addr16[3]));
+			sin6.sin6_port = sin->sin_port;
+			memcpy(from, (caddr_t)&sin6, sizeof(sin6));
+		}
+#endif
+#if defined(INET6)
+		{
+			struct sockaddr_in6 lsa6, *to6;
+
+			to6 = (struct sockaddr_in6 *)to;
+			sctp_recover_scope_mac(to6, (&lsa6));
+		}
+#endif
+	}
+	/* now copy out what data we can */
+	if (mp == NULL) {
+		/* copy out each mbuf in the chain up to length */
+get_more_data:
+		m = control->data;
+		while (m) {
+			/* Move out all we can */
+			cp_len = (int)uio->uio_resid;
+			my_len = (int)SCTP_BUF_LEN(m);
+			if (cp_len > my_len) {
+				/* not enough in this buf */
+				cp_len = my_len;
+			}
+			if (hold_rlock) {
+				SCTP_INP_READ_UNLOCK(inp);
+				hold_rlock = 0;
+			}
+			if (cp_len > 0)
+				error = uiomove(mtod(m, char *), cp_len, uio);
+			/* re-read */
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+				goto release;
+			}
+			if ((control->do_not_ref_stcb == 0) && stcb &&
+			    stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+				no_rcv_needed = 1;
+			}
+			if (error) {
+				/* error we are out of here */
+				goto release;
+			}
+			if ((SCTP_BUF_NEXT(m) == NULL) &&
+			    (cp_len >= SCTP_BUF_LEN(m)) &&
+			    ((control->end_added == 0) ||
+			    (control->end_added &&
+			    (TAILQ_NEXT(control, next) == NULL)))
+			    ) {
+				SCTP_INP_READ_LOCK(inp);
+				hold_rlock = 1;
+			}
+			if (cp_len == SCTP_BUF_LEN(m)) {
+				if ((SCTP_BUF_NEXT(m) == NULL) &&
+				    (control->end_added)) {
+					out_flags |= MSG_EOR;
+					if ((control->do_not_ref_stcb == 0) && ((control->spec_flags & M_NOTIFICATION) == 0))
+						control->stcb->asoc.strmin[control->sinfo_stream].delivery_started = 0;
+				}
+				if (control->spec_flags & M_NOTIFICATION) {
+					out_flags |= MSG_NOTIFICATION;
+				}
+				/* we ate up the mbuf */
+				if (in_flags & MSG_PEEK) {
+					/* just looking */
+					m = SCTP_BUF_NEXT(m);
+					copied_so_far += cp_len;
+				} else {
+					/* dispose of the mbuf */
+					if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+						sctp_sblog(&so->so_rcv,
+						    control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBFREE, SCTP_BUF_LEN(m));
+					}
+					sctp_sbfree(control, stcb, &so->so_rcv, m);
+					if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+						sctp_sblog(&so->so_rcv,
+						    control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
+					}
+					embuf = m;
+					copied_so_far += cp_len;
+					freed_so_far += cp_len;
+					freed_so_far += MSIZE;
+					atomic_subtract_int(&control->length, cp_len);
+					control->data = sctp_m_free(m);
+					m = control->data;
+					/*
+					 * been through it all, must hold sb
+					 * lock ok to null tail
+					 */
+					if (control->data == NULL) {
+#ifdef INVARIANTS
+						if ((control->end_added == 0) ||
+						    (TAILQ_NEXT(control, next) == NULL)) {
+							/*
+							 * If the end is not
+							 * added, OR the
+							 * next is NOT null
+							 * we MUST have the
+							 * lock.
+							 */
+							if (mtx_owned(&inp->inp_rdata_mtx) == 0) {
+								panic("Hmm we don't own the lock?");
+							}
+						}
+#endif
+						control->tail_mbuf = NULL;
+#ifdef INVARIANTS
+						if ((control->end_added) && ((out_flags & MSG_EOR) == 0)) {
+							panic("end_added, nothing left and no MSG_EOR");
+						}
+#endif
+					}
+				}
+			} else {
+				/* Do we need to trim the mbuf? */
+				if (control->spec_flags & M_NOTIFICATION) {
+					out_flags |= MSG_NOTIFICATION;
+				}
+				if ((in_flags & MSG_PEEK) == 0) {
+					SCTP_BUF_RESV_UF(m, cp_len);
+					SCTP_BUF_LEN(m) -= cp_len;
+					if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+						sctp_sblog(&so->so_rcv, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBFREE, cp_len);
+					}
+					atomic_subtract_int(&so->so_rcv.sb_cc, cp_len);
+					if ((control->do_not_ref_stcb == 0) &&
+					    stcb) {
+						atomic_subtract_int(&stcb->asoc.sb_cc, cp_len);
+					}
+					copied_so_far += cp_len;
+					embuf = m;
+					freed_so_far += cp_len;
+					freed_so_far += MSIZE;
+					if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+						sctp_sblog(&so->so_rcv, control->do_not_ref_stcb ? NULL : stcb,
+						    SCTP_LOG_SBRESULT, 0);
+					}
+					atomic_subtract_int(&control->length, cp_len);
+				} else {
+					copied_so_far += cp_len;
+				}
+			}
+			if ((out_flags & MSG_EOR) || (uio->uio_resid == 0)) {
+				break;
+			}
+			if (((stcb) && (in_flags & MSG_PEEK) == 0) &&
+			    (control->do_not_ref_stcb == 0) &&
+			    (freed_so_far >= rwnd_req)) {
+				sctp_user_rcvd(stcb, &freed_so_far, hold_rlock, rwnd_req);
+			}
+		}		/* end while(m) */
+		/*
+		 * At this point we have looked at it all and we either have
+		 * a MSG_EOR/or read all the user wants... <OR>
+		 * control->length == 0.
+		 */
+		if ((out_flags & MSG_EOR) && ((in_flags & MSG_PEEK) == 0)) {
+			/* we are done with this control */
+			if (control->length == 0) {
+				if (control->data) {
+#ifdef INVARIANTS
+					panic("control->data not null at read eor?");
+#else
+					SCTP_PRINTF("Strange, data left in the control buffer .. invarients would panic?\n");
+					sctp_m_freem(control->data);
+					control->data = NULL;
+#endif
+				}
+		done_with_control:
+				if (TAILQ_NEXT(control, next) == NULL) {
+					/*
+					 * If we don't have a next we need a
+					 * lock, if there is a next
+					 * interrupt is filling ahead of us
+					 * and we don't need a lock to
+					 * remove this guy (which is the
+					 * head of the queue).
+					 */
+					if (hold_rlock == 0) {
+						SCTP_INP_READ_LOCK(inp);
+						hold_rlock = 1;
+					}
+				}
+				TAILQ_REMOVE(&inp->read_queue, control, next);
+				/* Add back any hiddend data */
+				if (control->held_length) {
+					held_length = 0;
+					control->held_length = 0;
+					wakeup_read_socket = 1;
+				}
+				if (control->aux_data) {
+					sctp_m_free(control->aux_data);
+					control->aux_data = NULL;
+				}
+				no_rcv_needed = control->do_not_ref_stcb;
+				sctp_free_remote_addr(control->whoFrom);
+				control->data = NULL;
+				sctp_free_a_readq(stcb, control);
+				control = NULL;
+				if ((freed_so_far >= rwnd_req) &&
+				    (no_rcv_needed == 0))
+					sctp_user_rcvd(stcb, &freed_so_far, hold_rlock, rwnd_req);
+
+			} else {
+				/*
+				 * The user did not read all of this
+				 * message, turn off the returned MSG_EOR
+				 * since we are leaving more behind on the
+				 * control to read.
+				 */
+#ifdef INVARIANTS
+				if (control->end_added &&
+				    (control->data == NULL) &&
+				    (control->tail_mbuf == NULL)) {
+					panic("Gak, control->length is corrupt?");
+				}
+#endif
+				no_rcv_needed = control->do_not_ref_stcb;
+				out_flags &= ~MSG_EOR;
+			}
+		}
+		if (out_flags & MSG_EOR) {
+			goto release;
+		}
+		if ((uio->uio_resid == 0) ||
+		    ((in_eeor_mode) && (copied_so_far >= max(so->so_rcv.sb_lowat, 1)))
+		    ) {
+			goto release;
+		}
+		/*
+		 * If I hit here the receiver wants more and this message is
+		 * NOT done (pd-api). So two questions. Can we block? if not
+		 * we are done. Did the user NOT set MSG_WAITALL?
+		 */
+		if (block_allowed == 0) {
+			goto release;
+		}
+		/*
+		 * We need to wait for more data a few things: - We don't
+		 * sbunlock() so we don't get someone else reading. - We
+		 * must be sure to account for the case where what is added
+		 * is NOT to our control when we wakeup.
+		 */
+
+		/*
+		 * Do we need to tell the transport a rwnd update might be
+		 * needed before we go to sleep?
+		 */
+		if (((stcb) && (in_flags & MSG_PEEK) == 0) &&
+		    ((freed_so_far >= rwnd_req) &&
+		    (control->do_not_ref_stcb == 0) &&
+		    (no_rcv_needed == 0))) {
+			sctp_user_rcvd(stcb, &freed_so_far, hold_rlock, rwnd_req);
+		}
+wait_some_more:
+		if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
+			goto release;
+		}
+		if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)
+			goto release;
+
+		if (hold_rlock == 1) {
+			SCTP_INP_READ_UNLOCK(inp);
+			hold_rlock = 0;
+		}
+		if (hold_sblock == 0) {
+			SOCKBUF_LOCK(&so->so_rcv);
+			hold_sblock = 1;
+		}
+		if ((copied_so_far) && (control->length == 0) &&
+		    (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE))
+		    ) {
+			goto release;
+		}
+		if (so->so_rcv.sb_cc <= control->held_length) {
+			error = sbwait(&so->so_rcv);
+			if (error) {
+				goto release;
+			}
+			control->held_length = 0;
+		}
+		if (hold_sblock) {
+			SOCKBUF_UNLOCK(&so->so_rcv);
+			hold_sblock = 0;
+		}
+		if (control->length == 0) {
+			/* still nothing here */
+			if (control->end_added == 1) {
+				/* he aborted, or is done i.e.did a shutdown */
+				out_flags |= MSG_EOR;
+				if (control->pdapi_aborted) {
+					if ((control->do_not_ref_stcb == 0) && ((control->spec_flags & M_NOTIFICATION) == 0))
+						control->stcb->asoc.strmin[control->sinfo_stream].delivery_started = 0;
+
+					out_flags |= MSG_TRUNC;
+				} else {
+					if ((control->do_not_ref_stcb == 0) && ((control->spec_flags & M_NOTIFICATION) == 0))
+						control->stcb->asoc.strmin[control->sinfo_stream].delivery_started = 0;
+				}
+				goto done_with_control;
+			}
+			if (so->so_rcv.sb_cc > held_length) {
+				control->held_length = so->so_rcv.sb_cc;
+				held_length = 0;
+			}
+			goto wait_some_more;
+		} else if (control->data == NULL) {
+			/*
+			 * we must re-sync since data is probably being
+			 * added
+			 */
+			SCTP_INP_READ_LOCK(inp);
+			if ((control->length > 0) && (control->data == NULL)) {
+				/*
+				 * big trouble.. we have the lock and its
+				 * corrupt?
+				 */
+#ifdef INVARIANTS
+				panic("Impossible data==NULL length !=0");
+#endif
+				out_flags |= MSG_EOR;
+				out_flags |= MSG_TRUNC;
+				control->length = 0;
+				SCTP_INP_READ_UNLOCK(inp);
+				goto done_with_control;
+			}
+			SCTP_INP_READ_UNLOCK(inp);
+			/* We will fall around to get more data */
+		}
+		goto get_more_data;
+	} else {
+		/*-
+		 * Give caller back the mbuf chain,
+		 * store in uio_resid the length
+		 */
+		wakeup_read_socket = 0;
+		if ((control->end_added == 0) ||
+		    (TAILQ_NEXT(control, next) == NULL)) {
+			/* Need to get rlock */
+			if (hold_rlock == 0) {
+				SCTP_INP_READ_LOCK(inp);
+				hold_rlock = 1;
+			}
+		}
+		if (control->end_added) {
+			out_flags |= MSG_EOR;
+			if ((control->do_not_ref_stcb == 0) && ((control->spec_flags & M_NOTIFICATION) == 0))
+				control->stcb->asoc.strmin[control->sinfo_stream].delivery_started = 0;
+		}
+		if (control->spec_flags & M_NOTIFICATION) {
+			out_flags |= MSG_NOTIFICATION;
+		}
+		uio->uio_resid = control->length;
+		*mp = control->data;
+		m = control->data;
+		while (m) {
+			if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+				sctp_sblog(&so->so_rcv,
+				    control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBFREE, SCTP_BUF_LEN(m));
+			}
+			sctp_sbfree(control, stcb, &so->so_rcv, m);
+			freed_so_far += SCTP_BUF_LEN(m);
+			freed_so_far += MSIZE;
+			if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+				sctp_sblog(&so->so_rcv,
+				    control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
+			}
+			m = SCTP_BUF_NEXT(m);
+		}
+		control->data = control->tail_mbuf = NULL;
+		control->length = 0;
+		if (out_flags & MSG_EOR) {
+			/* Done with this control */
+			goto done_with_control;
+		}
+	}
+release:
+	if (hold_rlock == 1) {
+		SCTP_INP_READ_UNLOCK(inp);
+		hold_rlock = 0;
+	}
+	if (hold_sblock == 1) {
+		SOCKBUF_UNLOCK(&so->so_rcv);
+		hold_sblock = 0;
+	}
+	sbunlock(&so->so_rcv);
+	sockbuf_lock = 0;
+
+release_unlocked:
+	if (hold_sblock) {
+		SOCKBUF_UNLOCK(&so->so_rcv);
+		hold_sblock = 0;
+	}
+	if ((stcb) && (in_flags & MSG_PEEK) == 0) {
+		if ((freed_so_far >= rwnd_req) &&
+		    (control && (control->do_not_ref_stcb == 0)) &&
+		    (no_rcv_needed == 0))
+			sctp_user_rcvd(stcb, &freed_so_far, hold_rlock, rwnd_req);
+	}
+	if (msg_flags)
+		*msg_flags = out_flags;
+out:
+	if (((out_flags & MSG_EOR) == 0) &&
+	    ((in_flags & MSG_PEEK) == 0) &&
+	    (sinfo) &&
+	    (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO))) {
+		struct sctp_extrcvinfo *s_extra;
+
+		s_extra = (struct sctp_extrcvinfo *)sinfo;
+		s_extra->sreinfo_next_flags = SCTP_NO_NEXT_MSG;
+	}
+	if (hold_rlock == 1) {
+		SCTP_INP_READ_UNLOCK(inp);
+		hold_rlock = 0;
+	}
+	if (hold_sblock) {
+		SOCKBUF_UNLOCK(&so->so_rcv);
+		hold_sblock = 0;
+	}
+	if (sockbuf_lock) {
+		sbunlock(&so->so_rcv);
+	}
+	if (freecnt_applied) {
+		/*
+		 * The lock on the socket buffer protects us so the free
+		 * code will stop. But since we used the socketbuf lock and
+		 * the sender uses the tcb_lock to increment, we need to use
+		 * the atomic add to the refcnt.
+		 */
+		if (stcb == NULL) {
+			panic("stcb for refcnt has gone NULL?");
+		}
+		atomic_add_int(&stcb->asoc.refcnt, -1);
+		freecnt_applied = 0;
+		/* Save the value back for next time */
+		stcb->freed_by_sorcv_sincelast = freed_so_far;
+	}
+	if (sctp_logging_level & SCTP_RECV_RWND_LOGGING_ENABLE) {
+		if (stcb) {
+			sctp_misc_ints(SCTP_SORECV_DONE,
+			    freed_so_far,
+			    ((uio) ? (slen - uio->uio_resid) : slen),
+			    stcb->asoc.my_rwnd,
+			    so->so_rcv.sb_cc);
+		} else {
+			sctp_misc_ints(SCTP_SORECV_DONE,
+			    freed_so_far,
+			    ((uio) ? (slen - uio->uio_resid) : slen),
+			    0,
+			    so->so_rcv.sb_cc);
+		}
+	}
+	if (wakeup_read_socket) {
+		sctp_sorwakeup(inp, so);
+	}
+	return (error);
+}
+
+
+#ifdef SCTP_MBUF_LOGGING
+struct mbuf *
+sctp_m_free(struct mbuf *m)
+{
+	if (sctp_logging_level & SCTP_MBUF_LOGGING_ENABLE) {
+		if (SCTP_BUF_IS_EXTENDED(m)) {
+			sctp_log_mb(m, SCTP_MBUF_IFREE);
+		}
+	}
+	return (m_free(m));
+}
+
+void 
+sctp_m_freem(struct mbuf *mb)
+{
+	while (mb != NULL)
+		mb = sctp_m_free(mb);
+}
+
+#endif
+
+int
+sctp_dynamic_set_primary(struct sockaddr *sa, uint32_t vrf_id)
+{
+	/*
+	 * Given a local address. For all associations that holds the
+	 * address, request a peer-set-primary.
+	 */
+	struct sctp_ifa *ifa;
+	struct sctp_laddr *wi;
+
+	ifa = sctp_find_ifa_by_addr(sa, vrf_id, 0);
+	if (ifa == NULL) {
+		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, EADDRNOTAVAIL);
+		return (EADDRNOTAVAIL);
+	}
+	/*
+	 * Now that we have the ifa we must awaken the iterator with this
+	 * message.
+	 */
+	wi = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_laddr, struct sctp_laddr);
+	if (wi == NULL) {
+		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOMEM);
+		return (ENOMEM);
+	}
+	/* Now incr the count and int wi structure */
+	SCTP_INCR_LADDR_COUNT();
+	bzero(wi, sizeof(*wi));
+	(void)SCTP_GETTIME_TIMEVAL(&wi->start_time);
+	wi->ifa = ifa;
+	wi->action = SCTP_SET_PRIM_ADDR;
+	atomic_add_int(&ifa->refcount, 1);
+
+	/* Now add it to the work queue */
+	SCTP_IPI_ITERATOR_WQ_LOCK();
+	/*
+	 * Should this really be a tailq? As it is we will process the
+	 * newest first :-0
+	 */
+	LIST_INSERT_HEAD(&sctppcbinfo.addr_wq, wi, sctp_nxt_addr);
+	sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ,
+	    (struct sctp_inpcb *)NULL,
+	    (struct sctp_tcb *)NULL,
+	    (struct sctp_nets *)NULL);
+	SCTP_IPI_ITERATOR_WQ_UNLOCK();
+	return (0);
+}
+
+
+
+
+int
+sctp_soreceive(struct socket *so,
+    struct sockaddr **psa,
+    struct uio *uio,
+    struct mbuf **mp0,
+    struct mbuf **controlp,
+    int *flagsp)
+{
+	int error, fromlen;
+	uint8_t sockbuf[256];
+	struct sockaddr *from;
+	struct sctp_extrcvinfo sinfo;
+	int filling_sinfo = 1;
+	struct sctp_inpcb *inp;
+
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	/* pickup the assoc we are reading from */
+	if (inp == NULL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+		return (EINVAL);
+	}
+	if ((sctp_is_feature_off(inp,
+	    SCTP_PCB_FLAGS_RECVDATAIOEVNT)) ||
+	    (controlp == NULL)) {
+		/* user does not want the sndrcv ctl */
+		filling_sinfo = 0;
+	}
+	if (psa) {
+		from = (struct sockaddr *)sockbuf;
+		fromlen = sizeof(sockbuf);
+		from->sa_len = 0;
+	} else {
+		from = NULL;
+		fromlen = 0;
+	}
+
+	error = sctp_sorecvmsg(so, uio, mp0, from, fromlen, flagsp,
+	    (struct sctp_sndrcvinfo *)&sinfo, filling_sinfo);
+	if ((controlp) && (filling_sinfo)) {
+		/* copy back the sinfo in a CMSG format */
+		if (filling_sinfo)
+			*controlp = sctp_build_ctl_nchunk(inp,
+			    (struct sctp_sndrcvinfo *)&sinfo);
+		else
+			*controlp = NULL;
+	}
+	if (psa) {
+		/* copy back the address info */
+		if (from && from->sa_len) {
+			*psa = sodupsockaddr(from, M_NOWAIT);
+		} else {
+			*psa = NULL;
+		}
+	}
+	return (error);
+}
+
+
+int 
+sctp_l_soreceive(struct socket *so,
+    struct sockaddr **name,
+    struct uio *uio,
+    char **controlp,
+    int *controllen,
+    int *flag)
+{
+	int error, fromlen;
+	uint8_t sockbuf[256];
+	struct sockaddr *from;
+	struct sctp_extrcvinfo sinfo;
+	int filling_sinfo = 1;
+	struct sctp_inpcb *inp;
+
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	/* pickup the assoc we are reading from */
+	if (inp == NULL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+		return (EINVAL);
+	}
+	if ((sctp_is_feature_off(inp,
+	    SCTP_PCB_FLAGS_RECVDATAIOEVNT)) ||
+	    (controlp == NULL)) {
+		/* user does not want the sndrcv ctl */
+		filling_sinfo = 0;
+	}
+	if (name) {
+		from = (struct sockaddr *)sockbuf;
+		fromlen = sizeof(sockbuf);
+		from->sa_len = 0;
+	} else {
+		from = NULL;
+		fromlen = 0;
+	}
+
+	error = sctp_sorecvmsg(so, uio,
+	    (struct mbuf **)NULL,
+	    from, fromlen, flag,
+	    (struct sctp_sndrcvinfo *)&sinfo,
+	    filling_sinfo);
+	if ((controlp) && (filling_sinfo)) {
+		/*
+		 * copy back the sinfo in a CMSG format note that the caller
+		 * has reponsibility for freeing the memory.
+		 */
+		if (filling_sinfo)
+			*controlp = sctp_build_ctl_cchunk(inp,
+			    controllen,
+			    (struct sctp_sndrcvinfo *)&sinfo);
+	}
+	if (name) {
+		/* copy back the address info */
+		if (from && from->sa_len) {
+			*name = sodupsockaddr(from, M_WAIT);
+		} else {
+			*name = NULL;
+		}
+	}
+	return (error);
+}
+
+
+
+
+
+
+
+int
+sctp_connectx_helper_add(struct sctp_tcb *stcb, struct sockaddr *addr,
+    int totaddr, int *error)
+{
+	int added = 0;
+	int i;
+	struct sctp_inpcb *inp;
+	struct sockaddr *sa;
+	size_t incr = 0;
+
+	sa = addr;
+	inp = stcb->sctp_ep;
+	*error = 0;
+	for (i = 0; i < totaddr; i++) {
+		if (sa->sa_family == AF_INET) {
+			incr = sizeof(struct sockaddr_in);
+			if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) {
+				/* assoc gone no un-lock */
+				SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ENOBUFS);
+				(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_7);
+				*error = ENOBUFS;
+				goto out_now;
+			}
+			added++;
+		} else if (sa->sa_family == AF_INET6) {
+			incr = sizeof(struct sockaddr_in6);
+			if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) {
+				/* assoc gone no un-lock */
+				SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ENOBUFS);
+				(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_8);
+				*error = ENOBUFS;
+				goto out_now;
+			}
+			added++;
+		}
+		sa = (struct sockaddr *)((caddr_t)sa + incr);
+	}
+out_now:
+	return (added);
+}
+
+struct sctp_tcb *
+sctp_connectx_helper_find(struct sctp_inpcb *inp, struct sockaddr *addr,
+    int *totaddr, int *num_v4, int *num_v6, int *error,
+    int limit, int *bad_addr)
+{
+	struct sockaddr *sa;
+	struct sctp_tcb *stcb = NULL;
+	size_t incr, at, i;
+
+	at = incr = 0;
+	sa = addr;
+	*error = *num_v6 = *num_v4 = 0;
+	/* account and validate addresses */
+	for (i = 0; i < (size_t)*totaddr; i++) {
+		if (sa->sa_family == AF_INET) {
+			(*num_v4) += 1;
+			incr = sizeof(struct sockaddr_in);
+			if (sa->sa_len != incr) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+				*error = EINVAL;
+				*bad_addr = 1;
+				return (NULL);
+			}
+		} else if (sa->sa_family == AF_INET6) {
+			struct sockaddr_in6 *sin6;
+
+			sin6 = (struct sockaddr_in6 *)sa;
+			if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+				/* Must be non-mapped for connectx */
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+				*error = EINVAL;
+				*bad_addr = 1;
+				return (NULL);
+			}
+			(*num_v6) += 1;
+			incr = sizeof(struct sockaddr_in6);
+			if (sa->sa_len != incr) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+				*error = EINVAL;
+				*bad_addr = 1;
+				return (NULL);
+			}
+		} else {
+			*totaddr = i;
+			/* we are done */
+			break;
+		}
+		SCTP_INP_INCR_REF(inp);
+		stcb = sctp_findassociation_ep_addr(&inp, sa, NULL, NULL, NULL);
+		if (stcb != NULL) {
+			/* Already have or am bring up an association */
+			return (stcb);
+		} else {
+			SCTP_INP_DECR_REF(inp);
+		}
+		if ((at + incr) > (size_t)limit) {
+			*totaddr = i;
+			break;
+		}
+		sa = (struct sockaddr *)((caddr_t)sa + incr);
+	}
+	return ((struct sctp_tcb *)NULL);
+}
+
+/*
+ * sctp_bindx(ADD) for one address.
+ * assumes all arguments are valid/checked by caller.
+ */
+void
+sctp_bindx_add_address(struct socket *so, struct sctp_inpcb *inp,
+    struct sockaddr *sa, sctp_assoc_t assoc_id,
+    uint32_t vrf_id, int *error, void *p)
+{
+	struct sockaddr *addr_touse;
+	struct sockaddr_in sin;
+
+	/* see if we're bound all already! */
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+		*error = EINVAL;
+		return;
+	}
+	addr_touse = sa;
+#if defined(INET6)
+	if (sa->sa_family == AF_INET6) {
+		struct sockaddr_in6 *sin6;
+
+		if (sa->sa_len != sizeof(struct sockaddr_in6)) {
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+			*error = EINVAL;
+			return;
+		}
+		if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) {
+			/* can only bind v6 on PF_INET6 sockets */
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+			*error = EINVAL;
+			return;
+		}
+		sin6 = (struct sockaddr_in6 *)addr_touse;
+		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+			if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+			    SCTP_IPV6_V6ONLY(inp)) {
+				/* can't bind v4-mapped on PF_INET sockets */
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+				*error = EINVAL;
+				return;
+			}
+			in6_sin6_2_sin(&sin, sin6);
+			addr_touse = (struct sockaddr *)&sin;
+		}
+	}
+#endif
+	if (sa->sa_family == AF_INET) {
+		if (sa->sa_len != sizeof(struct sockaddr_in)) {
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+			*error = EINVAL;
+			return;
+		}
+		if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+		    SCTP_IPV6_V6ONLY(inp)) {
+			/* can't bind v4 on PF_INET sockets */
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+			*error = EINVAL;
+			return;
+		}
+	}
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) {
+		if (p == NULL) {
+			/* Can't get proc for Net/Open BSD */
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+			*error = EINVAL;
+			return;
+		}
+		*error = sctp_inpcb_bind(so, addr_touse, NULL, p);
+		return;
+	}
+	/*
+	 * No locks required here since bind and mgmt_ep_sa all do their own
+	 * locking. If we do something for the FIX: below we may need to
+	 * lock in that case.
+	 */
+	if (assoc_id == 0) {
+		/* add the address */
+		struct sctp_inpcb *lep;
+		struct sockaddr_in *lsin = (struct sockaddr_in *)addr_touse;
+
+		/* validate the incoming port */
+		if ((lsin->sin_port != 0) &&
+		    (lsin->sin_port != inp->sctp_lport)) {
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+			*error = EINVAL;
+			return;
+		} else {
+			/* user specified 0 port, set it to existing port */
+			lsin->sin_port = inp->sctp_lport;
+		}
+
+		lep = sctp_pcb_findep(addr_touse, 1, 0, vrf_id);
+		if (lep != NULL) {
+			/*
+			 * We must decrement the refcount since we have the
+			 * ep already and are binding. No remove going on
+			 * here.
+			 */
+			SCTP_INP_DECR_REF(inp);
+		}
+		if (lep == inp) {
+			/* already bound to it.. ok */
+			return;
+		} else if (lep == NULL) {
+			((struct sockaddr_in *)addr_touse)->sin_port = 0;
+			*error = sctp_addr_mgmt_ep_sa(inp, addr_touse,
+			    SCTP_ADD_IP_ADDRESS,
+			    vrf_id, NULL);
+		} else {
+			*error = EADDRINUSE;
+		}
+		if (*error)
+			return;
+	} else {
+		/*
+		 * FIX: decide whether we allow assoc based bindx
+		 */
+	}
+}
+
+/*
+ * sctp_bindx(DELETE) for one address.
+ * assumes all arguments are valid/checked by caller.
+ */
+void
+sctp_bindx_delete_address(struct socket *so, struct sctp_inpcb *inp,
+    struct sockaddr *sa, sctp_assoc_t assoc_id,
+    uint32_t vrf_id, int *error)
+{
+	struct sockaddr *addr_touse;
+	struct sockaddr_in sin;
+
+	/* see if we're bound all already! */
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+		*error = EINVAL;
+		return;
+	}
+	addr_touse = sa;
+#if defined(INET6)
+	if (sa->sa_family == AF_INET6) {
+		struct sockaddr_in6 *sin6;
+
+		if (sa->sa_len != sizeof(struct sockaddr_in6)) {
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+			*error = EINVAL;
+			return;
+		}
+		if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) {
+			/* can only bind v6 on PF_INET6 sockets */
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+			*error = EINVAL;
+			return;
+		}
+		sin6 = (struct sockaddr_in6 *)addr_touse;
+		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+			if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+			    SCTP_IPV6_V6ONLY(inp)) {
+				/* can't bind mapped-v4 on PF_INET sockets */
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+				*error = EINVAL;
+				return;
+			}
+			in6_sin6_2_sin(&sin, sin6);
+			addr_touse = (struct sockaddr *)&sin;
+		}
+	}
+#endif
+	if (sa->sa_family == AF_INET) {
+		if (sa->sa_len != sizeof(struct sockaddr_in)) {
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+			*error = EINVAL;
+			return;
+		}
+		if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+		    SCTP_IPV6_V6ONLY(inp)) {
+			/* can't bind v4 on PF_INET sockets */
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+			*error = EINVAL;
+			return;
+		}
+	}
+	/*
+	 * No lock required mgmt_ep_sa does its own locking. If the FIX:
+	 * below is ever changed we may need to lock before calling
+	 * association level binding.
+	 */
+	if (assoc_id == 0) {
+		/* delete the address */
+		*error = sctp_addr_mgmt_ep_sa(inp, addr_touse,
+		    SCTP_DEL_IP_ADDRESS,
+		    vrf_id, NULL);
+	} else {
+		/*
+		 * FIX: decide whether we allow assoc based bindx
+		 */
+	}
+}
+
+/*
+ * returns the valid local address count for an assoc, taking into account
+ * all scoping rules
+ */
+int
+sctp_local_addr_count(struct sctp_tcb *stcb)
+{
+	int loopback_scope, ipv4_local_scope, local_scope, site_scope;
+	int ipv4_addr_legal, ipv6_addr_legal;
+	struct sctp_vrf *vrf;
+	struct sctp_ifn *sctp_ifn;
+	struct sctp_ifa *sctp_ifa;
+	int count = 0;
+
+	/* Turn on all the appropriate scopes */
+	loopback_scope = stcb->asoc.loopback_scope;
+	ipv4_local_scope = stcb->asoc.ipv4_local_scope;
+	local_scope = stcb->asoc.local_scope;
+	site_scope = stcb->asoc.site_scope;
+	ipv4_addr_legal = ipv6_addr_legal = 0;
+	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+		ipv6_addr_legal = 1;
+		if (SCTP_IPV6_V6ONLY(stcb->sctp_ep) == 0) {
+			ipv4_addr_legal = 1;
+		}
+	} else {
+		ipv4_addr_legal = 1;
+	}
+
+	SCTP_IPI_ADDR_RLOCK();
+	vrf = sctp_find_vrf(stcb->asoc.vrf_id);
+	if (vrf == NULL) {
+		/* no vrf, no addresses */
+		SCTP_IPI_ADDR_RUNLOCK();
+		return (0);
+	}
+	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		/*
+		 * bound all case: go through all ifns on the vrf
+		 */
+		LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+			if ((loopback_scope == 0) &&
+			    SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
+				continue;
+			}
+			LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+				if (sctp_is_addr_restricted(stcb, sctp_ifa))
+					continue;
+
+				if ((sctp_ifa->address.sa.sa_family == AF_INET) &&
+				    (ipv4_addr_legal)) {
+					struct sockaddr_in *sin;
+
+					sin = (struct sockaddr_in *)&sctp_ifa->address.sa;
+					if (sin->sin_addr.s_addr == 0) {
+						/* skip unspecified addrs */
+						continue;
+					}
+					if ((ipv4_local_scope == 0) &&
+					    (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) {
+						continue;
+					}
+					/* count this one */
+					count++;
+				} else if ((sctp_ifa->address.sa.sa_family == AF_INET6) &&
+				    (ipv6_addr_legal)) {
+					struct sockaddr_in6 *sin6;
+
+					sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sa;
+					if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+						continue;
+					}
+					if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+						if (local_scope == 0)
+							continue;
+						if (sin6->sin6_scope_id == 0) {
+							if (sa6_recoverscope(sin6) != 0)
+								/*
+								 * bad link
+								 * local
+								 * address
+								 */
+								continue;
+						}
+					}
+					if ((site_scope == 0) &&
+					    (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) {
+						continue;
+					}
+					/* count this one */
+					count++;
+				}
+			}
+		}
+	} else {
+		/*
+		 * subset bound case
+		 */
+		struct sctp_laddr *laddr;
+
+		LIST_FOREACH(laddr, &stcb->sctp_ep->sctp_addr_list,
+		    sctp_nxt_addr) {
+			if (sctp_is_addr_restricted(stcb, laddr->ifa)) {
+				continue;
+			}
+			/* count this one */
+			count++;
+		}
+	}
+	SCTP_IPI_ADDR_RUNLOCK();
+	return (count);
+}
+
+#if defined(SCTP_LOCAL_TRACE_BUF)
+
+void
+sctp_log_trace(uint32_t subsys, const char *str SCTP_UNUSED, uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint32_t e, uint32_t f)
+{
+	uint32_t saveindex, newindex;
+
+	do {
+		saveindex = sctp_log.index;
+		if (saveindex >= SCTP_MAX_LOGGING_SIZE) {
+			newindex = 1;
+		} else {
+			newindex = saveindex + 1;
+		}
+	} while (atomic_cmpset_int(&sctp_log.index, saveindex, newindex) == 0);
+	if (saveindex >= SCTP_MAX_LOGGING_SIZE) {
+		saveindex = 0;
+	}
+	sctp_log.entry[saveindex].timestamp = SCTP_GET_CYCLECOUNT;
+	sctp_log.entry[saveindex].subsys = subsys;
+	sctp_log.entry[saveindex].params[0] = a;
+	sctp_log.entry[saveindex].params[1] = b;
+	sctp_log.entry[saveindex].params[2] = c;
+	sctp_log.entry[saveindex].params[3] = d;
+	sctp_log.entry[saveindex].params[4] = e;
+	sctp_log.entry[saveindex].params[5] = f;
+}
+
+#endif
Index: ip_icmp.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_icmp.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/ip_icmp.c -L sys/netinet/ip_icmp.c -u -r1.1.1.2 -r1.2
--- sys/netinet/ip_icmp.c
+++ sys/netinet/ip_icmp.c
@@ -27,15 +27,16 @@
  * SUCH DAMAGE.
  *
  *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
- * $FreeBSD: src/sys/netinet/ip_icmp.c,v 1.101.2.2 2006/02/16 17:50:57 andre Exp $
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_icmp.c,v 1.118 2007/10/07 20:44:23 silby Exp $");
+
 #include "opt_ipsec.h"
 #include "opt_mac.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/mac.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
@@ -54,24 +55,21 @@
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_var.h>
+#include <netinet/ip_options.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcpip.h>
 #include <netinet/icmp_var.h>
 
 #ifdef IPSEC
-#include <netinet6/ipsec.h>
-#include <netkey/key.h>
-#endif
-
-#ifdef FAST_IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/key.h>
-#define	IPSEC
 #endif
 
 #include <machine/in_cksum.h>
 
+#include <security/mac/mac_framework.h>
+
 /*
  * ICMP routines: error generation, receive packet processing, and
  * routines to turnaround packets back to the originator, and
@@ -92,19 +90,19 @@
 
 static int	drop_redirect = 0;
 SYSCTL_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW,
-	&drop_redirect, 0, "");
+	&drop_redirect, 0, "Ignore ICMP redirects");
 
 static int	log_redirect = 0;
 SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW,
-	&log_redirect, 0, "");
+	&log_redirect, 0, "Log ICMP redirects to the console");
 
 static int      icmplim = 200;
 SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW,
-	&icmplim, 0, "");
+	&icmplim, 0, "Maximum number of ICMP responses per second");
 
 static int	icmplim_output = 1;
 SYSCTL_INT(_net_inet_icmp, OID_AUTO, icmplim_output, CTLFLAG_RW,
-	&icmplim_output, 0, "");
+	&icmplim_output, 0, "Enable rate limiting of ICMP responses");
 
 static char	reply_src[IFNAMSIZ];
 SYSCTL_STRING(_net_inet_icmp, OID_AUTO, reply_src, CTLFLAG_RW,
@@ -143,11 +141,7 @@
  * in response to bad packet ip.
  */
 void
-icmp_error(n, type, code, dest, mtu)
-	struct mbuf *n;
-	int type, code;
-	n_long dest;
-	int mtu;
+icmp_error(struct mbuf *n, int type, int code, n_long dest, int mtu)
 {
 	register struct ip *oip = mtod(n, struct ip *), *nip;
 	register unsigned oiphlen = oip->ip_hl << 2;
@@ -291,9 +285,7 @@
  * Process a received ICMP message.
  */
 void
-icmp_input(m, off)
-	struct mbuf *m;
-	int off;
+icmp_input(struct mbuf *m, int off)
 {
 	struct icmp *icp;
 	struct in_ifaddr *ia;
@@ -620,8 +612,7 @@
  * Reflect the ip packet back to the source
  */
 static void
-icmp_reflect(m)
-	struct mbuf *m;
+icmp_reflect(struct mbuf *m)
 {
 	struct ip *ip = mtod(m, struct ip *);
 	struct ifaddr *ifa;
@@ -725,7 +716,7 @@
 		 */
 		cp = (u_char *) (ip + 1);
 		if ((opts = ip_srcroute(m)) == 0 &&
-		    (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
+		    (opts = m_gethdr(M_DONTWAIT, MT_DATA))) {
 			opts->m_len = sizeof(struct in_addr);
 			mtod(opts, struct in_addr *)->s_addr = 0;
 		}
@@ -800,9 +791,7 @@
  * after supplying a checksum.
  */
 static void
-icmp_send(m, opts)
-	register struct mbuf *m;
-	struct mbuf *opts;
+icmp_send(struct mbuf *m, struct mbuf *opts)
 {
 	register struct ip *ip = mtod(m, struct ip *);
 	register int hlen;
@@ -829,7 +818,7 @@
 }
 
 n_time
-iptime()
+iptime(void)
 {
 	struct timeval atv;
 	u_long t;
@@ -845,9 +834,7 @@
  * is returned; otherwise, a smaller value is returned.
  */
 int
-ip_next_mtu(mtu, dir)
-	int mtu;
-	int dir;
+ip_next_mtu(int mtu, int dir)
 {
 	static int mtutab[] = {
 		65535, 32000, 17914, 8166, 4352, 2002, 1492, 1280, 1006, 508,
@@ -903,7 +890,8 @@
 		{ "icmp ping response" },
 		{ "icmp tstamp response" },
 		{ "closed port RST response" },
-		{ "open port RST response" }
+		{ "open port RST response" },
+		{ "icmp6 unreach response" }
 	};
 
 	/*
--- /dev/null
+++ sys/netinet/sctp_output.h
@@ -0,0 +1,215 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_output.h,v 1.14 2005/03/06 16:04:18 itojun Exp $	 */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_output.h,v 1.13 2007/10/01 03:22:28 rrs Exp $");
+
+#ifndef __sctp_output_h__
+#define __sctp_output_h__
+
+#include <netinet/sctp_header.h>
+
+#if defined(_KERNEL)
+
+
+struct mbuf *
+sctp_add_addresses_to_i_ia(struct sctp_inpcb *inp,
+    struct sctp_scoping *scope,
+    struct mbuf *m_at,
+    int cnt_inits_to);
+
+
+int sctp_is_addr_restricted(struct sctp_tcb *, struct sctp_ifa *);
+
+
+int
+sctp_is_address_in_scope(struct sctp_ifa *ifa,
+    int ipv4_addr_legal,
+    int ipv6_addr_legal,
+    int loopback_scope,
+    int ipv4_local_scope,
+    int local_scope,
+    int site_scope,
+    int do_update);
+int
+    sctp_is_addr_in_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa);
+
+struct sctp_ifa *
+sctp_source_address_selection(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    sctp_route_t * ro, struct sctp_nets *net,
+    int non_asoc_addr_ok, uint32_t vrf_id);
+
+int
+    sctp_v6src_match_nexthop(struct sockaddr_in6 *src6, sctp_route_t * ro);
+int
+    sctp_v4src_match_nexthop(struct sctp_ifa *sifa, sctp_route_t * ro);
+
+void 
+sctp_send_initiate(struct sctp_inpcb *, struct sctp_tcb *, int
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+);
+
+void
+sctp_send_initiate_ack(struct sctp_inpcb *, struct sctp_tcb *,
+    struct mbuf *, int, int, struct sctphdr *, struct sctp_init_chunk *,
+    uint32_t, int);
+
+struct mbuf *
+sctp_arethere_unrecognized_parameters(struct mbuf *, int, int *,
+    struct sctp_chunkhdr *);
+void sctp_queue_op_err(struct sctp_tcb *, struct mbuf *);
+
+int
+sctp_send_cookie_echo(struct mbuf *, int, struct sctp_tcb *,
+    struct sctp_nets *);
+
+void sctp_send_cookie_ack(struct sctp_tcb *);
+
+void
+sctp_send_heartbeat_ack(struct sctp_tcb *, struct mbuf *, int, int,
+    struct sctp_nets *);
+
+
+void sctp_send_shutdown(struct sctp_tcb *, struct sctp_nets *);
+
+void sctp_send_shutdown_ack(struct sctp_tcb *, struct sctp_nets *);
+
+void sctp_send_shutdown_complete(struct sctp_tcb *, struct sctp_nets *);
+
+void 
+sctp_send_shutdown_complete2(struct mbuf *, int, struct sctphdr *,
+    uint32_t);
+
+void sctp_send_asconf(struct sctp_tcb *, struct sctp_nets *, int addr_locked);
+
+void sctp_send_asconf_ack(struct sctp_tcb *);
+
+int sctp_get_frag_point(struct sctp_tcb *, struct sctp_association *);
+
+void sctp_toss_old_cookies(struct sctp_tcb *, struct sctp_association *);
+
+void sctp_toss_old_asconf(struct sctp_tcb *);
+
+void sctp_fix_ecn_echo(struct sctp_association *);
+
+int
+sctp_output(struct sctp_inpcb *, struct mbuf *, struct sockaddr *,
+    struct mbuf *, struct thread *, int);
+
+void
+sctp_insert_on_wheel(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    struct sctp_stream_out *strq, int holdslock);
+
+void 
+sctp_chunk_output(struct sctp_inpcb *, struct sctp_tcb *, int, int
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+);
+void 
+sctp_send_abort_tcb(struct sctp_tcb *, struct mbuf *, int
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+);
+
+void send_forward_tsn(struct sctp_tcb *, struct sctp_association *);
+
+void sctp_send_sack(struct sctp_tcb *);
+
+int sctp_send_hb(struct sctp_tcb *, int, struct sctp_nets *);
+
+void sctp_send_ecn_echo(struct sctp_tcb *, struct sctp_nets *, uint32_t);
+
+
+void
+sctp_send_packet_dropped(struct sctp_tcb *, struct sctp_nets *, struct mbuf *,
+    int, int);
+
+
+
+void sctp_send_cwr(struct sctp_tcb *, struct sctp_nets *, uint32_t);
+
+
+void
+sctp_add_stream_reset_out(struct sctp_tmit_chunk *chk,
+    int number_entries, uint16_t * list,
+    uint32_t seq, uint32_t resp_seq, uint32_t last_sent);
+
+void
+sctp_add_stream_reset_in(struct sctp_tmit_chunk *chk,
+    int number_entries, uint16_t * list,
+    uint32_t seq);
+
+void
+sctp_add_stream_reset_tsn(struct sctp_tmit_chunk *chk,
+    uint32_t seq);
+
+void
+sctp_add_stream_reset_result(struct sctp_tmit_chunk *chk,
+    uint32_t resp_seq, uint32_t result);
+
+void
+sctp_add_stream_reset_result_tsn(struct sctp_tmit_chunk *chk,
+    uint32_t resp_seq, uint32_t result,
+    uint32_t send_una, uint32_t recv_next);
+
+int
+sctp_send_str_reset_req(struct sctp_tcb *stcb,
+    int number_entries, uint16_t * list,
+    uint8_t send_out_req, uint32_t resp_seq,
+    uint8_t send_in_req,
+    uint8_t send_tsn_req);
+
+
+void
+sctp_send_abort(struct mbuf *, int, struct sctphdr *, uint32_t,
+    struct mbuf *, uint32_t);
+
+void sctp_send_operr_to(struct mbuf *, int, struct mbuf *, uint32_t, uint32_t);
+
+int
+sctp_sosend(struct socket *so,
+    struct sockaddr *addr,
+    struct uio *uio,
+    struct mbuf *top,
+    struct mbuf *control,
+    int flags,
+    struct thread *p
+);
+
+#endif
+#endif
--- /dev/null
+++ sys/netinet/ip_ipsec.h
@@ -0,0 +1,42 @@
+/*-
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *      The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/netinet/ip_ipsec.h,v 1.2 2007/08/05 16:16:15 bz Exp $
+ */
+
+#ifndef _NETINET_IP_IPSEC_H_
+#define _NETINET_IP_IPSEC_H_
+
+int	ip_ipsec_filtertunnel(struct mbuf *);
+int	ip_ipsec_fwd(struct mbuf *);
+int	ip_ipsec_input(struct mbuf *);
+int	ip_ipsec_mtu(struct mbuf *);
+int	ip_ipsec_output(struct mbuf **, struct inpcb *, int *, int *,
+		struct route **, struct route *, struct sockaddr_in **,
+		struct in_ifaddr **, struct ifnet **);
+#endif
--- /dev/null
+++ sys/netinet/sctp_auth.c
@@ -0,0 +1,2477 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_auth.c,v 1.18.4.1 2008/01/31 17:21:50 rrs Exp $");
+
+#include <netinet/sctp_os.h>
+#include <netinet/sctp.h>
+#include <netinet/sctp_header.h>
+#include <netinet/sctp_pcb.h>
+#include <netinet/sctp_var.h>
+#include <netinet/sctp_sysctl.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_indata.h>
+#include <netinet/sctp_output.h>
+#include <netinet/sctp_auth.h>
+
+#ifdef SCTP_DEBUG
+#define SCTP_AUTH_DEBUG		(sctp_debug_on & SCTP_DEBUG_AUTH1)
+#define SCTP_AUTH_DEBUG2	(sctp_debug_on & SCTP_DEBUG_AUTH2)
+#endif				/* SCTP_DEBUG */
+
+
+void
+sctp_clear_chunklist(sctp_auth_chklist_t * chklist)
+{
+	bzero(chklist, sizeof(*chklist));
+	/* chklist->num_chunks = 0; */
+}
+
+sctp_auth_chklist_t *
+sctp_alloc_chunklist(void)
+{
+	sctp_auth_chklist_t *chklist;
+
+	SCTP_MALLOC(chklist, sctp_auth_chklist_t *, sizeof(*chklist),
+	    SCTP_M_AUTH_CL);
+	if (chklist == NULL) {
+		SCTPDBG(SCTP_DEBUG_AUTH1, "sctp_alloc_chunklist: failed to get memory!\n");
+	} else {
+		sctp_clear_chunklist(chklist);
+	}
+	return (chklist);
+}
+
+void
+sctp_free_chunklist(sctp_auth_chklist_t * list)
+{
+	if (list != NULL)
+		SCTP_FREE(list, SCTP_M_AUTH_CL);
+}
+
+sctp_auth_chklist_t *
+sctp_copy_chunklist(sctp_auth_chklist_t * list)
+{
+	sctp_auth_chklist_t *new_list;
+
+	if (list == NULL)
+		return (NULL);
+
+	/* get a new list */
+	new_list = sctp_alloc_chunklist();
+	if (new_list == NULL)
+		return (NULL);
+	/* copy it */
+	bcopy(list, new_list, sizeof(*new_list));
+
+	return (new_list);
+}
+
+
+/*
+ * add a chunk to the required chunks list
+ */
+int
+sctp_auth_add_chunk(uint8_t chunk, sctp_auth_chklist_t * list)
+{
+	if (list == NULL)
+		return (-1);
+
+	/* is chunk restricted? */
+	if ((chunk == SCTP_INITIATION) ||
+	    (chunk == SCTP_INITIATION_ACK) ||
+	    (chunk == SCTP_SHUTDOWN_COMPLETE) ||
+	    (chunk == SCTP_AUTHENTICATION)) {
+		return (-1);
+	}
+	if (list->chunks[chunk] == 0) {
+		list->chunks[chunk] = 1;
+		list->num_chunks++;
+		SCTPDBG(SCTP_DEBUG_AUTH1,
+		    "SCTP: added chunk %u (0x%02x) to Auth list\n",
+		    chunk, chunk);
+	}
+	return (0);
+}
+
+/*
+ * delete a chunk from the required chunks list
+ */
+int
+sctp_auth_delete_chunk(uint8_t chunk, sctp_auth_chklist_t * list)
+{
+	if (list == NULL)
+		return (-1);
+
+	/* is chunk restricted? */
+	if ((chunk == SCTP_ASCONF) ||
+	    (chunk == SCTP_ASCONF_ACK)) {
+		return (-1);
+	}
+	if (list->chunks[chunk] == 1) {
+		list->chunks[chunk] = 0;
+		list->num_chunks--;
+		SCTPDBG(SCTP_DEBUG_AUTH1,
+		    "SCTP: deleted chunk %u (0x%02x) from Auth list\n",
+		    chunk, chunk);
+	}
+	return (0);
+}
+
+size_t
+sctp_auth_get_chklist_size(const sctp_auth_chklist_t * list)
+{
+	if (list == NULL)
+		return (0);
+	else
+		return (list->num_chunks);
+}
+
+/*
+ * set the default list of chunks requiring AUTH
+ */
+void
+sctp_auth_set_default_chunks(sctp_auth_chklist_t * list)
+{
+	(void)sctp_auth_add_chunk(SCTP_ASCONF, list);
+	(void)sctp_auth_add_chunk(SCTP_ASCONF_ACK, list);
+}
+
+/*
+ * return the current number and list of required chunks caller must
+ * guarantee ptr has space for up to 256 bytes
+ */
+int
+sctp_serialize_auth_chunks(const sctp_auth_chklist_t * list, uint8_t * ptr)
+{
+	int i, count = 0;
+
+	if (list == NULL)
+		return (0);
+
+	for (i = 0; i < 256; i++) {
+		if (list->chunks[i] != 0) {
+			*ptr++ = i;
+			count++;
+		}
+	}
+	return (count);
+}
+
+int
+sctp_pack_auth_chunks(const sctp_auth_chklist_t * list, uint8_t * ptr)
+{
+	int i, size = 0;
+
+	if (list == NULL)
+		return (0);
+
+	if (list->num_chunks <= 32) {
+		/* just list them, one byte each */
+		for (i = 0; i < 256; i++) {
+			if (list->chunks[i] != 0) {
+				*ptr++ = i;
+				size++;
+			}
+		}
+	} else {
+		int index, offset;
+
+		/* pack into a 32 byte bitfield */
+		for (i = 0; i < 256; i++) {
+			if (list->chunks[i] != 0) {
+				index = i / 8;
+				offset = i % 8;
+				ptr[index] |= (1 << offset);
+			}
+		}
+		size = 32;
+	}
+	return (size);
+}
+
+int
+sctp_unpack_auth_chunks(const uint8_t * ptr, uint8_t num_chunks,
+    sctp_auth_chklist_t * list)
+{
+	int i;
+	int size;
+
+	if (list == NULL)
+		return (0);
+
+	if (num_chunks <= 32) {
+		/* just pull them, one byte each */
+		for (i = 0; i < num_chunks; i++) {
+			(void)sctp_auth_add_chunk(*ptr++, list);
+		}
+		size = num_chunks;
+	} else {
+		int index, offset;
+
+		/* unpack from a 32 byte bitfield */
+		for (index = 0; index < 32; index++) {
+			for (offset = 0; offset < 8; offset++) {
+				if (ptr[index] & (1 << offset)) {
+					(void)sctp_auth_add_chunk((index * 8) + offset, list);
+				}
+			}
+		}
+		size = 32;
+	}
+	return (size);
+}
+
+
+/*
+ * allocate structure space for a key of length keylen
+ */
+sctp_key_t *
+sctp_alloc_key(uint32_t keylen)
+{
+	sctp_key_t *new_key;
+
+	SCTP_MALLOC(new_key, sctp_key_t *, sizeof(*new_key) + keylen,
+	    SCTP_M_AUTH_KY);
+	if (new_key == NULL) {
+		/* out of memory */
+		return (NULL);
+	}
+	new_key->keylen = keylen;
+	return (new_key);
+}
+
+void
+sctp_free_key(sctp_key_t * key)
+{
+	if (key != NULL)
+		SCTP_FREE(key, SCTP_M_AUTH_KY);
+}
+
+void
+sctp_print_key(sctp_key_t * key, const char *str)
+{
+	uint32_t i;
+
+	if (key == NULL) {
+		printf("%s: [Null key]\n", str);
+		return;
+	}
+	printf("%s: len %u, ", str, key->keylen);
+	if (key->keylen) {
+		for (i = 0; i < key->keylen; i++)
+			printf("%02x", key->key[i]);
+		printf("\n");
+	} else {
+		printf("[Null key]\n");
+	}
+}
+
+void
+sctp_show_key(sctp_key_t * key, const char *str)
+{
+	uint32_t i;
+
+	if (key == NULL) {
+		printf("%s: [Null key]\n", str);
+		return;
+	}
+	printf("%s: len %u, ", str, key->keylen);
+	if (key->keylen) {
+		for (i = 0; i < key->keylen; i++)
+			printf("%02x", key->key[i]);
+		printf("\n");
+	} else {
+		printf("[Null key]\n");
+	}
+}
+
+static uint32_t
+sctp_get_keylen(sctp_key_t * key)
+{
+	if (key != NULL)
+		return (key->keylen);
+	else
+		return (0);
+}
+
+/*
+ * generate a new random key of length 'keylen'
+ */
+sctp_key_t *
+sctp_generate_random_key(uint32_t keylen)
+{
+	sctp_key_t *new_key;
+
+	/* validate keylen */
+	if (keylen > SCTP_AUTH_RANDOM_SIZE_MAX)
+		keylen = SCTP_AUTH_RANDOM_SIZE_MAX;
+
+	new_key = sctp_alloc_key(keylen);
+	if (new_key == NULL) {
+		/* out of memory */
+		return (NULL);
+	}
+	SCTP_READ_RANDOM(new_key->key, keylen);
+	new_key->keylen = keylen;
+	return (new_key);
+}
+
+sctp_key_t *
+sctp_set_key(uint8_t * key, uint32_t keylen)
+{
+	sctp_key_t *new_key;
+
+	new_key = sctp_alloc_key(keylen);
+	if (new_key == NULL) {
+		/* out of memory */
+		return (NULL);
+	}
+	bcopy(key, new_key->key, keylen);
+	return (new_key);
+}
+
+/*
+ * given two keys of variable size, compute which key is "larger/smaller"
+ * returns: 1 if key1 > key2 -1 if key1 < key2 0 if key1 = key2
+ */
+static int
+sctp_compare_key(sctp_key_t * key1, sctp_key_t * key2)
+{
+	uint32_t maxlen;
+	uint32_t i;
+	uint32_t key1len, key2len;
+	uint8_t *key_1, *key_2;
+	uint8_t temp[SCTP_AUTH_RANDOM_SIZE_MAX];
+
+	/* sanity/length check */
+	key1len = sctp_get_keylen(key1);
+	key2len = sctp_get_keylen(key2);
+	if ((key1len == 0) && (key2len == 0))
+		return (0);
+	else if (key1len == 0)
+		return (-1);
+	else if (key2len == 0)
+		return (1);
+
+	if (key1len != key2len) {
+		if (key1len >= key2len)
+			maxlen = key1len;
+		else
+			maxlen = key2len;
+		bzero(temp, maxlen);
+		if (key1len < maxlen) {
+			/* prepend zeroes to key1 */
+			bcopy(key1->key, temp + (maxlen - key1len), key1len);
+			key_1 = temp;
+			key_2 = key2->key;
+		} else {
+			/* prepend zeroes to key2 */
+			bcopy(key2->key, temp + (maxlen - key2len), key2len);
+			key_1 = key1->key;
+			key_2 = temp;
+		}
+	} else {
+		maxlen = key1len;
+		key_1 = key1->key;
+		key_2 = key2->key;
+	}
+
+	for (i = 0; i < maxlen; i++) {
+		if (*key_1 > *key_2)
+			return (1);
+		else if (*key_1 < *key_2)
+			return (-1);
+		key_1++;
+		key_2++;
+	}
+
+	/* keys are equal value, so check lengths */
+	if (key1len == key2len)
+		return (0);
+	else if (key1len < key2len)
+		return (-1);
+	else
+		return (1);
+}
+
+/*
+ * generate the concatenated keying material based on the two keys and the
+ * shared key (if available). draft-ietf-tsvwg-auth specifies the specific
+ * order for concatenation
+ */
+sctp_key_t *
+sctp_compute_hashkey(sctp_key_t * key1, sctp_key_t * key2, sctp_key_t * shared)
+{
+	uint32_t keylen;
+	sctp_key_t *new_key;
+	uint8_t *key_ptr;
+
+	keylen = sctp_get_keylen(key1) + sctp_get_keylen(key2) +
+	    sctp_get_keylen(shared);
+
+	if (keylen > 0) {
+		/* get space for the new key */
+		new_key = sctp_alloc_key(keylen);
+		if (new_key == NULL) {
+			/* out of memory */
+			return (NULL);
+		}
+		new_key->keylen = keylen;
+		key_ptr = new_key->key;
+	} else {
+		/* all keys empty/null?! */
+		return (NULL);
+	}
+
+	/* concatenate the keys */
+	if (sctp_compare_key(key1, key2) <= 0) {
+#ifdef SCTP_AUTH_DRAFT_04
+		/* key is key1 + shared + key2 */
+		if (sctp_get_keylen(key1)) {
+			bcopy(key1->key, key_ptr, key1->keylen);
+			key_ptr += key1->keylen;
+		}
+		if (sctp_get_keylen(shared)) {
+			bcopy(shared->key, key_ptr, shared->keylen);
+			key_ptr += shared->keylen;
+		}
+		if (sctp_get_keylen(key2)) {
+			bcopy(key2->key, key_ptr, key2->keylen);
+			key_ptr += key2->keylen;
+		}
+#else
+		/* key is shared + key1 + key2 */
+		if (sctp_get_keylen(shared)) {
+			bcopy(shared->key, key_ptr, shared->keylen);
+			key_ptr += shared->keylen;
+		}
+		if (sctp_get_keylen(key1)) {
+			bcopy(key1->key, key_ptr, key1->keylen);
+			key_ptr += key1->keylen;
+		}
+		if (sctp_get_keylen(key2)) {
+			bcopy(key2->key, key_ptr, key2->keylen);
+			key_ptr += key2->keylen;
+		}
+#endif
+	} else {
+#ifdef SCTP_AUTH_DRAFT_04
+		/* key is key2 + shared + key1 */
+		if (sctp_get_keylen(key2)) {
+			bcopy(key2->key, key_ptr, key2->keylen);
+			key_ptr += key2->keylen;
+		}
+		if (sctp_get_keylen(shared)) {
+			bcopy(shared->key, key_ptr, shared->keylen);
+			key_ptr += shared->keylen;
+		}
+		if (sctp_get_keylen(key1)) {
+			bcopy(key1->key, key_ptr, key1->keylen);
+			key_ptr += key1->keylen;
+		}
+#else
+		/* key is shared + key2 + key1 */
+		if (sctp_get_keylen(shared)) {
+			bcopy(shared->key, key_ptr, shared->keylen);
+			key_ptr += shared->keylen;
+		}
+		if (sctp_get_keylen(key2)) {
+			bcopy(key2->key, key_ptr, key2->keylen);
+			key_ptr += key2->keylen;
+		}
+		if (sctp_get_keylen(key1)) {
+			bcopy(key1->key, key_ptr, key1->keylen);
+			key_ptr += key1->keylen;
+		}
+#endif
+	}
+	return (new_key);
+}
+
+
+sctp_sharedkey_t *
+sctp_alloc_sharedkey(void)
+{
+	sctp_sharedkey_t *new_key;
+
+	SCTP_MALLOC(new_key, sctp_sharedkey_t *, sizeof(*new_key),
+	    SCTP_M_AUTH_KY);
+	if (new_key == NULL) {
+		/* out of memory */
+		return (NULL);
+	}
+	new_key->keyid = 0;
+	new_key->key = NULL;
+	return (new_key);
+}
+
+void
+sctp_free_sharedkey(sctp_sharedkey_t * skey)
+{
+	if (skey != NULL) {
+		if (skey->key != NULL)
+			sctp_free_key(skey->key);
+		SCTP_FREE(skey, SCTP_M_AUTH_KY);
+	}
+}
+
+sctp_sharedkey_t *
+sctp_find_sharedkey(struct sctp_keyhead *shared_keys, uint16_t key_id)
+{
+	sctp_sharedkey_t *skey;
+
+	LIST_FOREACH(skey, shared_keys, next) {
+		if (skey->keyid == key_id)
+			return (skey);
+	}
+	return (NULL);
+}
+
+void
+sctp_insert_sharedkey(struct sctp_keyhead *shared_keys,
+    sctp_sharedkey_t * new_skey)
+{
+	sctp_sharedkey_t *skey;
+
+	if ((shared_keys == NULL) || (new_skey == NULL))
+		return;
+
+	/* insert into an empty list? */
+	if (SCTP_LIST_EMPTY(shared_keys)) {
+		LIST_INSERT_HEAD(shared_keys, new_skey, next);
+		return;
+	}
+	/* insert into the existing list, ordered by key id */
+	LIST_FOREACH(skey, shared_keys, next) {
+		if (new_skey->keyid < skey->keyid) {
+			/* insert it before here */
+			LIST_INSERT_BEFORE(skey, new_skey, next);
+			return;
+		} else if (new_skey->keyid == skey->keyid) {
+			/* replace the existing key */
+			SCTPDBG(SCTP_DEBUG_AUTH1,
+			    "replacing shared key id %u\n",
+			    new_skey->keyid);
+			LIST_INSERT_BEFORE(skey, new_skey, next);
+			LIST_REMOVE(skey, next);
+			sctp_free_sharedkey(skey);
+			return;
+		}
+		if (LIST_NEXT(skey, next) == NULL) {
+			/* belongs at the end of the list */
+			LIST_INSERT_AFTER(skey, new_skey, next);
+			return;
+		}
+	}
+}
+
+static sctp_sharedkey_t *
+sctp_copy_sharedkey(const sctp_sharedkey_t * skey)
+{
+	sctp_sharedkey_t *new_skey;
+
+	if (skey == NULL)
+		return (NULL);
+	new_skey = sctp_alloc_sharedkey();
+	if (new_skey == NULL)
+		return (NULL);
+	if (skey->key != NULL)
+		new_skey->key = sctp_set_key(skey->key->key, skey->key->keylen);
+	else
+		new_skey->key = NULL;
+	new_skey->keyid = skey->keyid;
+	return (new_skey);
+}
+
+int
+sctp_copy_skeylist(const struct sctp_keyhead *src, struct sctp_keyhead *dest)
+{
+	sctp_sharedkey_t *skey, *new_skey;
+	int count = 0;
+
+	if ((src == NULL) || (dest == NULL))
+		return (0);
+	LIST_FOREACH(skey, src, next) {
+		new_skey = sctp_copy_sharedkey(skey);
+		if (new_skey != NULL) {
+			sctp_insert_sharedkey(dest, new_skey);
+			count++;
+		}
+	}
+	return (count);
+}
+
+
+sctp_hmaclist_t *
+sctp_alloc_hmaclist(uint8_t num_hmacs)
+{
+	sctp_hmaclist_t *new_list;
+	int alloc_size;
+
+	alloc_size = sizeof(*new_list) + num_hmacs * sizeof(new_list->hmac[0]);
+	SCTP_MALLOC(new_list, sctp_hmaclist_t *, alloc_size,
+	    SCTP_M_AUTH_HL);
+	if (new_list == NULL) {
+		/* out of memory */
+		return (NULL);
+	}
+	new_list->max_algo = num_hmacs;
+	new_list->num_algo = 0;
+	return (new_list);
+}
+
+void
+sctp_free_hmaclist(sctp_hmaclist_t * list)
+{
+	if (list != NULL) {
+		SCTP_FREE(list, SCTP_M_AUTH_HL);
+		list = NULL;
+	}
+}
+
+int
+sctp_auth_add_hmacid(sctp_hmaclist_t * list, uint16_t hmac_id)
+{
+	int i;
+
+	if (list == NULL)
+		return (-1);
+	if (list->num_algo == list->max_algo) {
+		SCTPDBG(SCTP_DEBUG_AUTH1,
+		    "SCTP: HMAC id list full, ignoring add %u\n", hmac_id);
+		return (-1);
+	}
+	if ((hmac_id != SCTP_AUTH_HMAC_ID_SHA1) &&
+#ifdef HAVE_SHA224
+	    (hmac_id != SCTP_AUTH_HMAC_ID_SHA224) &&
+#endif
+#ifdef HAVE_SHA2
+	    (hmac_id != SCTP_AUTH_HMAC_ID_SHA256) &&
+	    (hmac_id != SCTP_AUTH_HMAC_ID_SHA384) &&
+	    (hmac_id != SCTP_AUTH_HMAC_ID_SHA512) &&
+#endif
+	    (hmac_id != SCTP_AUTH_HMAC_ID_MD5)) {
+		return (-1);
+	}
+	/* Now is it already in the list */
+	for (i = 0; i < list->num_algo; i++) {
+		if (list->hmac[i] == hmac_id) {
+			/* already in list */
+			return (-1);
+		}
+	}
+	SCTPDBG(SCTP_DEBUG_AUTH1, "SCTP: add HMAC id %u to list\n", hmac_id);
+	list->hmac[list->num_algo++] = hmac_id;
+	return (0);
+}
+
+sctp_hmaclist_t *
+sctp_copy_hmaclist(sctp_hmaclist_t * list)
+{
+	sctp_hmaclist_t *new_list;
+	int i;
+
+	if (list == NULL)
+		return (NULL);
+	/* get a new list */
+	new_list = sctp_alloc_hmaclist(list->max_algo);
+	if (new_list == NULL)
+		return (NULL);
+	/* copy it */
+	new_list->max_algo = list->max_algo;
+	new_list->num_algo = list->num_algo;
+	for (i = 0; i < list->num_algo; i++)
+		new_list->hmac[i] = list->hmac[i];
+	return (new_list);
+}
+
+sctp_hmaclist_t *
+sctp_default_supported_hmaclist(void)
+{
+	sctp_hmaclist_t *new_list;
+
+	new_list = sctp_alloc_hmaclist(2);
+	if (new_list == NULL)
+		return (NULL);
+	(void)sctp_auth_add_hmacid(new_list, SCTP_AUTH_HMAC_ID_SHA1);
+	(void)sctp_auth_add_hmacid(new_list, SCTP_AUTH_HMAC_ID_SHA256);
+	return (new_list);
+}
+
+/*
+ * HMAC algos are listed in priority/preference order find the best HMAC id
+ * to use for the peer based on local support
+ */
+uint16_t
+sctp_negotiate_hmacid(sctp_hmaclist_t * peer, sctp_hmaclist_t * local)
+{
+	int i, j;
+
+	if ((local == NULL) || (peer == NULL))
+		return (SCTP_AUTH_HMAC_ID_RSVD);
+
+	for (i = 0; i < peer->num_algo; i++) {
+		for (j = 0; j < local->num_algo; j++) {
+			if (peer->hmac[i] == local->hmac[j]) {
+#ifndef SCTP_AUTH_DRAFT_04
+				/* "skip" MD5 as it's been deprecated */
+				if (peer->hmac[i] == SCTP_AUTH_HMAC_ID_MD5)
+					continue;
+#endif
+
+				/* found the "best" one */
+				SCTPDBG(SCTP_DEBUG_AUTH1,
+				    "SCTP: negotiated peer HMAC id %u\n",
+				    peer->hmac[i]);
+				return (peer->hmac[i]);
+			}
+		}
+	}
+	/* didn't find one! */
+	return (SCTP_AUTH_HMAC_ID_RSVD);
+}
+
+/*
+ * serialize the HMAC algo list and return space used caller must guarantee
+ * ptr has appropriate space
+ */
+int
+sctp_serialize_hmaclist(sctp_hmaclist_t * list, uint8_t * ptr)
+{
+	int i;
+	uint16_t hmac_id;
+
+	if (list == NULL)
+		return (0);
+
+	for (i = 0; i < list->num_algo; i++) {
+		hmac_id = htons(list->hmac[i]);
+		bcopy(&hmac_id, ptr, sizeof(hmac_id));
+		ptr += sizeof(hmac_id);
+	}
+	return (list->num_algo * sizeof(hmac_id));
+}
+
+int
+sctp_verify_hmac_param(struct sctp_auth_hmac_algo *hmacs, uint32_t num_hmacs)
+{
+	uint32_t i;
+	uint16_t hmac_id;
+	uint32_t sha1_supported = 0;
+
+	for (i = 0; i < num_hmacs; i++) {
+		hmac_id = ntohs(hmacs->hmac_ids[i]);
+		if (hmac_id == SCTP_AUTH_HMAC_ID_SHA1)
+			sha1_supported = 1;
+	}
+	/* all HMAC id's are supported */
+	if (sha1_supported == 0)
+		return (-1);
+	else
+		return (0);
+}
+
+sctp_authinfo_t *
+sctp_alloc_authinfo(void)
+{
+	sctp_authinfo_t *new_authinfo;
+
+	SCTP_MALLOC(new_authinfo, sctp_authinfo_t *, sizeof(*new_authinfo),
+	    SCTP_M_AUTH_IF);
+
+	if (new_authinfo == NULL) {
+		/* out of memory */
+		return (NULL);
+	}
+	bzero(new_authinfo, sizeof(*new_authinfo));
+	return (new_authinfo);
+}
+
+void
+sctp_free_authinfo(sctp_authinfo_t * authinfo)
+{
+	if (authinfo == NULL)
+		return;
+
+	if (authinfo->random != NULL)
+		sctp_free_key(authinfo->random);
+	if (authinfo->peer_random != NULL)
+		sctp_free_key(authinfo->peer_random);
+	if (authinfo->assoc_key != NULL)
+		sctp_free_key(authinfo->assoc_key);
+	if (authinfo->recv_key != NULL)
+		sctp_free_key(authinfo->recv_key);
+
+	/* We are NOT dynamically allocating authinfo's right now... */
+	/* SCTP_FREE(authinfo, SCTP_M_AUTH_??); */
+}
+
+
+uint32_t
+sctp_get_auth_chunk_len(uint16_t hmac_algo)
+{
+	int size;
+
+	size = sizeof(struct sctp_auth_chunk) + sctp_get_hmac_digest_len(hmac_algo);
+	return (SCTP_SIZE32(size));
+}
+
+uint32_t
+sctp_get_hmac_digest_len(uint16_t hmac_algo)
+{
+	switch (hmac_algo) {
+	case SCTP_AUTH_HMAC_ID_SHA1:
+		return (SCTP_AUTH_DIGEST_LEN_SHA1);
+	case SCTP_AUTH_HMAC_ID_MD5:
+		return (SCTP_AUTH_DIGEST_LEN_MD5);
+#ifdef HAVE_SHA224
+	case SCTP_AUTH_HMAC_ID_SHA224:
+		return (SCTP_AUTH_DIGEST_LEN_SHA224);
+#endif
+#ifdef HAVE_SHA2
+	case SCTP_AUTH_HMAC_ID_SHA256:
+		return (SCTP_AUTH_DIGEST_LEN_SHA256);
+	case SCTP_AUTH_HMAC_ID_SHA384:
+		return (SCTP_AUTH_DIGEST_LEN_SHA384);
+	case SCTP_AUTH_HMAC_ID_SHA512:
+		return (SCTP_AUTH_DIGEST_LEN_SHA512);
+#endif
+	default:
+		/* unknown HMAC algorithm: can't do anything */
+		return (0);
+	}			/* end switch */
+}
+
+static inline int
+sctp_get_hmac_block_len(uint16_t hmac_algo)
+{
+	switch (hmac_algo) {
+		case SCTP_AUTH_HMAC_ID_SHA1:
+		case SCTP_AUTH_HMAC_ID_MD5:
+#ifdef HAVE_SHA224
+		case SCTP_AUTH_HMAC_ID_SHA224:
+#endif
+		return (64);
+#ifdef HAVE_SHA2
+	case SCTP_AUTH_HMAC_ID_SHA256:
+		return (64);
+	case SCTP_AUTH_HMAC_ID_SHA384:
+	case SCTP_AUTH_HMAC_ID_SHA512:
+		return (128);
+#endif
+	case SCTP_AUTH_HMAC_ID_RSVD:
+	default:
+		/* unknown HMAC algorithm: can't do anything */
+		return (0);
+	}			/* end switch */
+}
+
+static void
+sctp_hmac_init(uint16_t hmac_algo, sctp_hash_context_t * ctx)
+{
+	switch (hmac_algo) {
+		case SCTP_AUTH_HMAC_ID_SHA1:
+		SHA1_Init(&ctx->sha1);
+		break;
+	case SCTP_AUTH_HMAC_ID_MD5:
+		MD5_Init(&ctx->md5);
+		break;
+#ifdef HAVE_SHA224
+	case SCTP_AUTH_HMAC_ID_SHA224:
+		break;
+#endif
+#ifdef HAVE_SHA2
+	case SCTP_AUTH_HMAC_ID_SHA256:
+		SHA256_Init(&ctx->sha256);
+		break;
+	case SCTP_AUTH_HMAC_ID_SHA384:
+		SHA384_Init(&ctx->sha384);
+		break;
+	case SCTP_AUTH_HMAC_ID_SHA512:
+		SHA512_Init(&ctx->sha512);
+		break;
+#endif
+	case SCTP_AUTH_HMAC_ID_RSVD:
+	default:
+		/* unknown HMAC algorithm: can't do anything */
+		return;
+	}			/* end switch */
+}
+
+static void
+sctp_hmac_update(uint16_t hmac_algo, sctp_hash_context_t * ctx,
+    uint8_t * text, uint32_t textlen)
+{
+	switch (hmac_algo) {
+		case SCTP_AUTH_HMAC_ID_SHA1:
+		SHA1_Update(&ctx->sha1, text, textlen);
+		break;
+	case SCTP_AUTH_HMAC_ID_MD5:
+		MD5_Update(&ctx->md5, text, textlen);
+		break;
+#ifdef HAVE_SHA224
+	case SCTP_AUTH_HMAC_ID_SHA224:
+		break;
+#endif
+#ifdef HAVE_SHA2
+	case SCTP_AUTH_HMAC_ID_SHA256:
+		SHA256_Update(&ctx->sha256, text, textlen);
+		break;
+	case SCTP_AUTH_HMAC_ID_SHA384:
+		SHA384_Update(&ctx->sha384, text, textlen);
+		break;
+	case SCTP_AUTH_HMAC_ID_SHA512:
+		SHA512_Update(&ctx->sha512, text, textlen);
+		break;
+#endif
+	case SCTP_AUTH_HMAC_ID_RSVD:
+	default:
+		/* unknown HMAC algorithm: can't do anything */
+		return;
+	}			/* end switch */
+}
+
+static void
+sctp_hmac_final(uint16_t hmac_algo, sctp_hash_context_t * ctx,
+    uint8_t * digest)
+{
+	switch (hmac_algo) {
+		case SCTP_AUTH_HMAC_ID_SHA1:
+		SHA1_Final(digest, &ctx->sha1);
+		break;
+	case SCTP_AUTH_HMAC_ID_MD5:
+		MD5_Final(digest, &ctx->md5);
+		break;
+#ifdef HAVE_SHA224
+	case SCTP_AUTH_HMAC_ID_SHA224:
+		break;
+#endif
+#ifdef HAVE_SHA2
+	case SCTP_AUTH_HMAC_ID_SHA256:
+		SHA256_Final(digest, &ctx->sha256);
+		break;
+	case SCTP_AUTH_HMAC_ID_SHA384:
+		/* SHA384 is truncated SHA512 */
+		SHA384_Final(digest, &ctx->sha384);
+		break;
+	case SCTP_AUTH_HMAC_ID_SHA512:
+		SHA512_Final(digest, &ctx->sha512);
+		break;
+#endif
+	case SCTP_AUTH_HMAC_ID_RSVD:
+	default:
+		/* unknown HMAC algorithm: can't do anything */
+		return;
+	}			/* end switch */
+}
+
+/*
+ * Keyed-Hashing for Message Authentication: FIPS 198 (RFC 2104)
+ *
+ * Compute the HMAC digest using the desired hash key, text, and HMAC
+ * algorithm.  Resulting digest is placed in 'digest' and digest length
+ * is returned, if the HMAC was performed.
+ *
+ * WARNING: it is up to the caller to supply sufficient space to hold the
+ * resultant digest.
+ */
+uint32_t
+sctp_hmac(uint16_t hmac_algo, uint8_t * key, uint32_t keylen,
+    uint8_t * text, uint32_t textlen, uint8_t * digest)
+{
+	uint32_t digestlen;
+	uint32_t blocklen;
+	sctp_hash_context_t ctx;
+	uint8_t ipad[128], opad[128];	/* keyed hash inner/outer pads */
+	uint8_t temp[SCTP_AUTH_DIGEST_LEN_MAX];
+	uint32_t i;
+
+	/* sanity check the material and length */
+	if ((key == NULL) || (keylen == 0) || (text == NULL) ||
+	    (textlen == 0) || (digest == NULL)) {
+		/* can't do HMAC with empty key or text or digest store */
+		return (0);
+	}
+	/* validate the hmac algo and get the digest length */
+	digestlen = sctp_get_hmac_digest_len(hmac_algo);
+	if (digestlen == 0)
+		return (0);
+
+	/* hash the key if it is longer than the hash block size */
+	blocklen = sctp_get_hmac_block_len(hmac_algo);
+	if (keylen > blocklen) {
+		sctp_hmac_init(hmac_algo, &ctx);
+		sctp_hmac_update(hmac_algo, &ctx, key, keylen);
+		sctp_hmac_final(hmac_algo, &ctx, temp);
+		/* set the hashed key as the key */
+		keylen = digestlen;
+		key = temp;
+	}
+	/* initialize the inner/outer pads with the key and "append" zeroes */
+	bzero(ipad, blocklen);
+	bzero(opad, blocklen);
+	bcopy(key, ipad, keylen);
+	bcopy(key, opad, keylen);
+
+	/* XOR the key with ipad and opad values */
+	for (i = 0; i < blocklen; i++) {
+		ipad[i] ^= 0x36;
+		opad[i] ^= 0x5c;
+	}
+
+	/* perform inner hash */
+	sctp_hmac_init(hmac_algo, &ctx);
+	sctp_hmac_update(hmac_algo, &ctx, ipad, blocklen);
+	sctp_hmac_update(hmac_algo, &ctx, text, textlen);
+	sctp_hmac_final(hmac_algo, &ctx, temp);
+
+	/* perform outer hash */
+	sctp_hmac_init(hmac_algo, &ctx);
+	sctp_hmac_update(hmac_algo, &ctx, opad, blocklen);
+	sctp_hmac_update(hmac_algo, &ctx, temp, digestlen);
+	sctp_hmac_final(hmac_algo, &ctx, digest);
+
+	return (digestlen);
+}
+
+/* mbuf version */
+uint32_t
+sctp_hmac_m(uint16_t hmac_algo, uint8_t * key, uint32_t keylen,
+    struct mbuf *m, uint32_t m_offset, uint8_t * digest, uint32_t trailer)
+{
+	uint32_t digestlen;
+	uint32_t blocklen;
+	sctp_hash_context_t ctx;
+	uint8_t ipad[128], opad[128];	/* keyed hash inner/outer pads */
+	uint8_t temp[SCTP_AUTH_DIGEST_LEN_MAX];
+	uint32_t i;
+	struct mbuf *m_tmp;
+
+	/* sanity check the material and length */
+	if ((key == NULL) || (keylen == 0) || (m == NULL) || (digest == NULL)) {
+		/* can't do HMAC with empty key or text or digest store */
+		return (0);
+	}
+	/* validate the hmac algo and get the digest length */
+	digestlen = sctp_get_hmac_digest_len(hmac_algo);
+	if (digestlen == 0)
+		return (0);
+
+	/* hash the key if it is longer than the hash block size */
+	blocklen = sctp_get_hmac_block_len(hmac_algo);
+	if (keylen > blocklen) {
+		sctp_hmac_init(hmac_algo, &ctx);
+		sctp_hmac_update(hmac_algo, &ctx, key, keylen);
+		sctp_hmac_final(hmac_algo, &ctx, temp);
+		/* set the hashed key as the key */
+		keylen = digestlen;
+		key = temp;
+	}
+	/* initialize the inner/outer pads with the key and "append" zeroes */
+	bzero(ipad, blocklen);
+	bzero(opad, blocklen);
+	bcopy(key, ipad, keylen);
+	bcopy(key, opad, keylen);
+
+	/* XOR the key with ipad and opad values */
+	for (i = 0; i < blocklen; i++) {
+		ipad[i] ^= 0x36;
+		opad[i] ^= 0x5c;
+	}
+
+	/* perform inner hash */
+	sctp_hmac_init(hmac_algo, &ctx);
+	sctp_hmac_update(hmac_algo, &ctx, ipad, blocklen);
+	/* find the correct starting mbuf and offset (get start of text) */
+	m_tmp = m;
+	while ((m_tmp != NULL) && (m_offset >= (uint32_t) SCTP_BUF_LEN(m_tmp))) {
+		m_offset -= SCTP_BUF_LEN(m_tmp);
+		m_tmp = SCTP_BUF_NEXT(m_tmp);
+	}
+	/* now use the rest of the mbuf chain for the text */
+	while (m_tmp != NULL) {
+		if ((SCTP_BUF_NEXT(m_tmp) == NULL) && trailer) {
+			sctp_hmac_update(hmac_algo, &ctx, mtod(m_tmp, uint8_t *) + m_offset,
+			    SCTP_BUF_LEN(m_tmp) - (trailer + m_offset));
+		} else {
+			sctp_hmac_update(hmac_algo, &ctx, mtod(m_tmp, uint8_t *) + m_offset,
+			    SCTP_BUF_LEN(m_tmp) - m_offset);
+		}
+
+		/* clear the offset since it's only for the first mbuf */
+		m_offset = 0;
+		m_tmp = SCTP_BUF_NEXT(m_tmp);
+	}
+	sctp_hmac_final(hmac_algo, &ctx, temp);
+
+	/* perform outer hash */
+	sctp_hmac_init(hmac_algo, &ctx);
+	sctp_hmac_update(hmac_algo, &ctx, opad, blocklen);
+	sctp_hmac_update(hmac_algo, &ctx, temp, digestlen);
+	sctp_hmac_final(hmac_algo, &ctx, digest);
+
+	return (digestlen);
+}
+
+/*
+ * verify the HMAC digest using the desired hash key, text, and HMAC
+ * algorithm. Returns -1 on error, 0 on success.
+ */
+int
+sctp_verify_hmac(uint16_t hmac_algo, uint8_t * key, uint32_t keylen,
+    uint8_t * text, uint32_t textlen,
+    uint8_t * digest, uint32_t digestlen)
+{
+	uint32_t len;
+	uint8_t temp[SCTP_AUTH_DIGEST_LEN_MAX];
+
+	/* sanity check the material and length */
+	if ((key == NULL) || (keylen == 0) ||
+	    (text == NULL) || (textlen == 0) || (digest == NULL)) {
+		/* can't do HMAC with empty key or text or digest */
+		return (-1);
+	}
+	len = sctp_get_hmac_digest_len(hmac_algo);
+	if ((len == 0) || (digestlen != len))
+		return (-1);
+
+	/* compute the expected hash */
+	if (sctp_hmac(hmac_algo, key, keylen, text, textlen, temp) != len)
+		return (-1);
+
+	if (memcmp(digest, temp, digestlen) != 0)
+		return (-1);
+	else
+		return (0);
+}
+
+
+/*
+ * computes the requested HMAC using a key struct (which may be modified if
+ * the keylen exceeds the HMAC block len).
+ */
+uint32_t
+sctp_compute_hmac(uint16_t hmac_algo, sctp_key_t * key, uint8_t * text,
+    uint32_t textlen, uint8_t * digest)
+{
+	uint32_t digestlen;
+	uint32_t blocklen;
+	sctp_hash_context_t ctx;
+	uint8_t temp[SCTP_AUTH_DIGEST_LEN_MAX];
+
+	/* sanity check */
+	if ((key == NULL) || (text == NULL) || (textlen == 0) ||
+	    (digest == NULL)) {
+		/* can't do HMAC with empty key or text or digest store */
+		return (0);
+	}
+	/* validate the hmac algo and get the digest length */
+	digestlen = sctp_get_hmac_digest_len(hmac_algo);
+	if (digestlen == 0)
+		return (0);
+
+	/* hash the key if it is longer than the hash block size */
+	blocklen = sctp_get_hmac_block_len(hmac_algo);
+	if (key->keylen > blocklen) {
+		sctp_hmac_init(hmac_algo, &ctx);
+		sctp_hmac_update(hmac_algo, &ctx, key->key, key->keylen);
+		sctp_hmac_final(hmac_algo, &ctx, temp);
+		/* save the hashed key as the new key */
+		key->keylen = digestlen;
+		bcopy(temp, key->key, key->keylen);
+	}
+	return (sctp_hmac(hmac_algo, key->key, key->keylen, text, textlen,
+	    digest));
+}
+
+/* mbuf version */
+uint32_t
+sctp_compute_hmac_m(uint16_t hmac_algo, sctp_key_t * key, struct mbuf *m,
+    uint32_t m_offset, uint8_t * digest)
+{
+	uint32_t digestlen;
+	uint32_t blocklen;
+	sctp_hash_context_t ctx;
+	uint8_t temp[SCTP_AUTH_DIGEST_LEN_MAX];
+
+	/* sanity check */
+	if ((key == NULL) || (m == NULL) || (digest == NULL)) {
+		/* can't do HMAC with empty key or text or digest store */
+		return (0);
+	}
+	/* validate the hmac algo and get the digest length */
+	digestlen = sctp_get_hmac_digest_len(hmac_algo);
+	if (digestlen == 0)
+		return (0);
+
+	/* hash the key if it is longer than the hash block size */
+	blocklen = sctp_get_hmac_block_len(hmac_algo);
+	if (key->keylen > blocklen) {
+		sctp_hmac_init(hmac_algo, &ctx);
+		sctp_hmac_update(hmac_algo, &ctx, key->key, key->keylen);
+		sctp_hmac_final(hmac_algo, &ctx, temp);
+		/* save the hashed key as the new key */
+		key->keylen = digestlen;
+		bcopy(temp, key->key, key->keylen);
+	}
+	return (sctp_hmac_m(hmac_algo, key->key, key->keylen, m, m_offset, digest, 0));
+}
+
+int
+sctp_auth_is_supported_hmac(sctp_hmaclist_t * list, uint16_t id)
+{
+	int i;
+
+	if ((list == NULL) || (id == SCTP_AUTH_HMAC_ID_RSVD))
+		return (0);
+
+	for (i = 0; i < list->num_algo; i++)
+		if (list->hmac[i] == id)
+			return (1);
+
+	/* not in the list */
+	return (0);
+}
+
+
+/*
+ * clear any cached key(s) if they match the given key id on an association
+ * the cached key(s) will be recomputed and re-cached at next use. ASSUMES
+ * TCB_LOCK is already held
+ */
+void
+sctp_clear_cachedkeys(struct sctp_tcb *stcb, uint16_t keyid)
+{
+	if (stcb == NULL)
+		return;
+
+	if (keyid == stcb->asoc.authinfo.assoc_keyid) {
+		sctp_free_key(stcb->asoc.authinfo.assoc_key);
+		stcb->asoc.authinfo.assoc_key = NULL;
+	}
+	if (keyid == stcb->asoc.authinfo.recv_keyid) {
+		sctp_free_key(stcb->asoc.authinfo.recv_key);
+		stcb->asoc.authinfo.recv_key = NULL;
+	}
+}
+
+/*
+ * clear any cached key(s) if they match the given key id for all assocs on
+ * an association ASSUMES INP_WLOCK is already held
+ */
+void
+sctp_clear_cachedkeys_ep(struct sctp_inpcb *inp, uint16_t keyid)
+{
+	struct sctp_tcb *stcb;
+
+	if (inp == NULL)
+		return;
+
+	/* clear the cached keys on all assocs on this instance */
+	LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+		SCTP_TCB_LOCK(stcb);
+		sctp_clear_cachedkeys(stcb, keyid);
+		SCTP_TCB_UNLOCK(stcb);
+	}
+}
+
+/*
+ * delete a shared key from an association ASSUMES TCB_LOCK is already held
+ */
+int
+sctp_delete_sharedkey(struct sctp_tcb *stcb, uint16_t keyid)
+{
+	sctp_sharedkey_t *skey;
+
+	if (stcb == NULL)
+		return (-1);
+
+	/* is the keyid the assoc active sending key */
+	if (keyid == stcb->asoc.authinfo.assoc_keyid)
+		return (-1);
+
+	/* does the key exist? */
+	skey = sctp_find_sharedkey(&stcb->asoc.shared_keys, keyid);
+	if (skey == NULL)
+		return (-1);
+
+	/* remove it */
+	LIST_REMOVE(skey, next);
+	sctp_free_sharedkey(skey);	/* frees skey->key as well */
+
+	/* clear any cached keys */
+	sctp_clear_cachedkeys(stcb, keyid);
+	return (0);
+}
+
+/*
+ * deletes a shared key from the endpoint ASSUMES INP_WLOCK is already held
+ */
+int
+sctp_delete_sharedkey_ep(struct sctp_inpcb *inp, uint16_t keyid)
+{
+	sctp_sharedkey_t *skey;
+	struct sctp_tcb *stcb;
+
+	if (inp == NULL)
+		return (-1);
+
+	/* is the keyid the active sending key on the endpoint or any assoc */
+	if (keyid == inp->sctp_ep.default_keyid)
+		return (-1);
+	LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+		SCTP_TCB_LOCK(stcb);
+		if (keyid == stcb->asoc.authinfo.assoc_keyid) {
+			SCTP_TCB_UNLOCK(stcb);
+			return (-1);
+		}
+		SCTP_TCB_UNLOCK(stcb);
+	}
+
+	/* does the key exist? */
+	skey = sctp_find_sharedkey(&inp->sctp_ep.shared_keys, keyid);
+	if (skey == NULL)
+		return (-1);
+
+	/* remove it */
+	LIST_REMOVE(skey, next);
+	sctp_free_sharedkey(skey);	/* frees skey->key as well */
+
+	/* clear any cached keys */
+	sctp_clear_cachedkeys_ep(inp, keyid);
+	return (0);
+}
+
+/*
+ * set the active key on an association ASSUME TCB_LOCK is already held
+ */
+int
+sctp_auth_setactivekey(struct sctp_tcb *stcb, uint16_t keyid)
+{
+	sctp_sharedkey_t *skey = NULL;
+	sctp_key_t *key = NULL;
+	int using_ep_key = 0;
+
+	/* find the key on the assoc */
+	skey = sctp_find_sharedkey(&stcb->asoc.shared_keys, keyid);
+	if (skey == NULL) {
+		/* if not on the assoc, find the key on the endpoint */
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_INP_RLOCK(stcb->sctp_ep);
+		SCTP_TCB_LOCK(stcb);
+		atomic_add_int(&stcb->asoc.refcnt, -1);
+		skey = sctp_find_sharedkey(&stcb->sctp_ep->sctp_ep.shared_keys,
+		    keyid);
+		using_ep_key = 1;
+	}
+	if (skey == NULL) {
+		/* that key doesn't exist */
+		if (using_ep_key) {
+			SCTP_INP_RUNLOCK(stcb->sctp_ep);
+		}
+		return (-1);
+	}
+	/* get the shared key text */
+	key = skey->key;
+
+	/* free any existing cached key */
+	if (stcb->asoc.authinfo.assoc_key != NULL)
+		sctp_free_key(stcb->asoc.authinfo.assoc_key);
+	/* compute a new assoc key and cache it */
+	stcb->asoc.authinfo.assoc_key =
+	    sctp_compute_hashkey(stcb->asoc.authinfo.random,
+	    stcb->asoc.authinfo.peer_random, key);
+	stcb->asoc.authinfo.assoc_keyid = keyid;
+#ifdef SCTP_DEBUG
+	if (SCTP_AUTH_DEBUG)
+		sctp_print_key(stcb->asoc.authinfo.assoc_key, "Assoc Key");
+#endif
+
+	if (using_ep_key) {
+		SCTP_INP_RUNLOCK(stcb->sctp_ep);
+	}
+	return (0);
+}
+
+/*
+ * set the active key on an endpoint ASSUMES INP_WLOCK is already held
+ */
+int
+sctp_auth_setactivekey_ep(struct sctp_inpcb *inp, uint16_t keyid)
+{
+	sctp_sharedkey_t *skey;
+
+	/* find the key */
+	skey = sctp_find_sharedkey(&inp->sctp_ep.shared_keys, keyid);
+	if (skey == NULL) {
+		/* that key doesn't exist */
+		return (-1);
+	}
+	inp->sctp_ep.default_keyid = keyid;
+	return (0);
+}
+
+/*
+ * get local authentication parameters from cookie (from INIT-ACK)
+ */
+void
+sctp_auth_get_cookie_params(struct sctp_tcb *stcb, struct mbuf *m,
+    uint32_t offset, uint32_t length)
+{
+	struct sctp_paramhdr *phdr, tmp_param;
+	uint16_t plen, ptype;
+	uint8_t random_store[SCTP_PARAM_BUFFER_SIZE];
+	struct sctp_auth_random *p_random = NULL;
+	uint16_t random_len = 0;
+	uint8_t hmacs_store[SCTP_PARAM_BUFFER_SIZE];
+	struct sctp_auth_hmac_algo *hmacs = NULL;
+	uint16_t hmacs_len = 0;
+	uint8_t chunks_store[SCTP_PARAM_BUFFER_SIZE];
+	struct sctp_auth_chunk_list *chunks = NULL;
+	uint16_t num_chunks = 0;
+	sctp_key_t *new_key;
+	uint32_t keylen;
+
+	/* convert to upper bound */
+	length += offset;
+
+	phdr = (struct sctp_paramhdr *)sctp_m_getptr(m, offset,
+	    sizeof(struct sctp_paramhdr), (uint8_t *) & tmp_param);
+	while (phdr != NULL) {
+		ptype = ntohs(phdr->param_type);
+		plen = ntohs(phdr->param_length);
+
+		if ((plen == 0) || (offset + plen > length))
+			break;
+
+		if (ptype == SCTP_RANDOM) {
+			if (plen > sizeof(random_store))
+				break;
+			phdr = sctp_get_next_param(m, offset,
+			    (struct sctp_paramhdr *)random_store, min(plen, sizeof(random_store)));
+			if (phdr == NULL)
+				return;
+			/* save the random and length for the key */
+			p_random = (struct sctp_auth_random *)phdr;
+			random_len = plen - sizeof(*p_random);
+		} else if (ptype == SCTP_HMAC_LIST) {
+			int num_hmacs;
+			int i;
+
+			if (plen > sizeof(hmacs_store))
+				break;
+			phdr = sctp_get_next_param(m, offset,
+			    (struct sctp_paramhdr *)hmacs_store, min(plen, sizeof(hmacs_store)));
+			if (phdr == NULL)
+				return;
+			/* save the hmacs list and num for the key */
+			hmacs = (struct sctp_auth_hmac_algo *)phdr;
+			hmacs_len = plen - sizeof(*hmacs);
+			num_hmacs = hmacs_len / sizeof(hmacs->hmac_ids[0]);
+			if (stcb->asoc.local_hmacs != NULL)
+				sctp_free_hmaclist(stcb->asoc.local_hmacs);
+			stcb->asoc.local_hmacs = sctp_alloc_hmaclist(num_hmacs);
+			if (stcb->asoc.local_hmacs != NULL) {
+				for (i = 0; i < num_hmacs; i++) {
+					(void)sctp_auth_add_hmacid(stcb->asoc.local_hmacs,
+					    ntohs(hmacs->hmac_ids[i]));
+				}
+			}
+		} else if (ptype == SCTP_CHUNK_LIST) {
+			int i;
+
+			if (plen > sizeof(chunks_store))
+				break;
+			phdr = sctp_get_next_param(m, offset,
+			    (struct sctp_paramhdr *)chunks_store, min(plen, sizeof(chunks_store)));
+			if (phdr == NULL)
+				return;
+			chunks = (struct sctp_auth_chunk_list *)phdr;
+			num_chunks = plen - sizeof(*chunks);
+			/* save chunks list and num for the key */
+			if (stcb->asoc.local_auth_chunks != NULL)
+				sctp_clear_chunklist(stcb->asoc.local_auth_chunks);
+			else
+				stcb->asoc.local_auth_chunks = sctp_alloc_chunklist();
+			for (i = 0; i < num_chunks; i++) {
+				(void)sctp_auth_add_chunk(chunks->chunk_types[i],
+				    stcb->asoc.local_auth_chunks);
+			}
+		}
+		/* get next parameter */
+		offset += SCTP_SIZE32(plen);
+		if (offset + sizeof(struct sctp_paramhdr) > length)
+			break;
+		phdr = (struct sctp_paramhdr *)sctp_m_getptr(m, offset, sizeof(struct sctp_paramhdr),
+		    (uint8_t *) & tmp_param);
+	}
+	/* concatenate the full random key */
+#ifdef SCTP_AUTH_DRAFT_04
+	keylen = random_len;
+	new_key = sctp_alloc_key(keylen);
+	if (new_key != NULL) {
+		/* copy in the RANDOM */
+		if (p_random != NULL)
+			bcopy(p_random->random_data, new_key->key, random_len);
+	}
+#else
+	keylen = sizeof(*p_random) + random_len + sizeof(*chunks) + num_chunks +
+	    sizeof(*hmacs) + hmacs_len;
+	new_key = sctp_alloc_key(keylen);
+	if (new_key != NULL) {
+		/* copy in the RANDOM */
+		if (p_random != NULL) {
+			keylen = sizeof(*p_random) + random_len;
+			bcopy(p_random, new_key->key, keylen);
+		}
+		/* append in the AUTH chunks */
+		if (chunks != NULL) {
+			bcopy(chunks, new_key->key + keylen,
+			    sizeof(*chunks) + num_chunks);
+			keylen += sizeof(*chunks) + num_chunks;
+		}
+		/* append in the HMACs */
+		if (hmacs != NULL) {
+			bcopy(hmacs, new_key->key + keylen,
+			    sizeof(*hmacs) + hmacs_len);
+		}
+	}
+#endif
+	if (stcb->asoc.authinfo.random != NULL)
+		sctp_free_key(stcb->asoc.authinfo.random);
+	stcb->asoc.authinfo.random = new_key;
+	stcb->asoc.authinfo.random_len = random_len;
+#ifdef SCTP_AUTH_DRAFT_04
+	/* don't include the chunks and hmacs for draft -04 */
+	stcb->asoc.authinfo.random->keylen = random_len;
+#endif
+	sctp_clear_cachedkeys(stcb, stcb->asoc.authinfo.assoc_keyid);
+	sctp_clear_cachedkeys(stcb, stcb->asoc.authinfo.recv_keyid);
+
+	/* negotiate what HMAC to use for the peer */
+	stcb->asoc.peer_hmac_id = sctp_negotiate_hmacid(stcb->asoc.peer_hmacs,
+	    stcb->asoc.local_hmacs);
+	/* copy defaults from the endpoint */
+	/* FIX ME: put in cookie? */
+	stcb->asoc.authinfo.assoc_keyid = stcb->sctp_ep->sctp_ep.default_keyid;
+}
+
+/*
+ * compute and fill in the HMAC digest for a packet
+ */
+void
+sctp_fill_hmac_digest_m(struct mbuf *m, uint32_t auth_offset,
+    struct sctp_auth_chunk *auth, struct sctp_tcb *stcb)
+{
+	uint32_t digestlen;
+	sctp_sharedkey_t *skey;
+	sctp_key_t *key;
+
+	if ((stcb == NULL) || (auth == NULL))
+		return;
+
+	/* zero the digest + chunk padding */
+	digestlen = sctp_get_hmac_digest_len(stcb->asoc.peer_hmac_id);
+	bzero(auth->hmac, SCTP_SIZE32(digestlen));
+	/* is an assoc key cached? */
+	if (stcb->asoc.authinfo.assoc_key == NULL) {
+		skey = sctp_find_sharedkey(&stcb->asoc.shared_keys,
+		    stcb->asoc.authinfo.assoc_keyid);
+		if (skey == NULL) {
+			/* not in the assoc list, so check the endpoint list */
+			skey = sctp_find_sharedkey(&stcb->sctp_ep->sctp_ep.shared_keys,
+			    stcb->asoc.authinfo.assoc_keyid);
+		}
+		/* the only way skey is NULL is if null key id 0 is used */
+		if (skey != NULL)
+			key = skey->key;
+		else
+			key = NULL;
+		/* compute a new assoc key and cache it */
+		stcb->asoc.authinfo.assoc_key =
+		    sctp_compute_hashkey(stcb->asoc.authinfo.random,
+		    stcb->asoc.authinfo.peer_random, key);
+		SCTPDBG(SCTP_DEBUG_AUTH1, "caching key id %u\n",
+		    stcb->asoc.authinfo.assoc_keyid);
+#ifdef SCTP_DEBUG
+		if (SCTP_AUTH_DEBUG)
+			sctp_print_key(stcb->asoc.authinfo.assoc_key,
+			    "Assoc Key");
+#endif
+	}
+	/* set in the active key id */
+	auth->shared_key_id = htons(stcb->asoc.authinfo.assoc_keyid);
+
+	/* compute and fill in the digest */
+	(void)sctp_compute_hmac_m(stcb->asoc.peer_hmac_id,
+	    stcb->asoc.authinfo.assoc_key,
+	    m, auth_offset, auth->hmac);
+}
+
+
+static void
+sctp_bzero_m(struct mbuf *m, uint32_t m_offset, uint32_t size)
+{
+	struct mbuf *m_tmp;
+	uint8_t *data;
+
+	/* sanity check */
+	if (m == NULL)
+		return;
+
+	/* find the correct starting mbuf and offset (get start position) */
+	m_tmp = m;
+	while ((m_tmp != NULL) && (m_offset >= (uint32_t) SCTP_BUF_LEN(m_tmp))) {
+		m_offset -= SCTP_BUF_LEN(m_tmp);
+		m_tmp = SCTP_BUF_NEXT(m_tmp);
+	}
+	/* now use the rest of the mbuf chain */
+	while ((m_tmp != NULL) && (size > 0)) {
+		data = mtod(m_tmp, uint8_t *) + m_offset;
+		if (size > (uint32_t) SCTP_BUF_LEN(m_tmp)) {
+			bzero(data, SCTP_BUF_LEN(m_tmp));
+			size -= SCTP_BUF_LEN(m_tmp);
+		} else {
+			bzero(data, size);
+			size = 0;
+		}
+		/* clear the offset since it's only for the first mbuf */
+		m_offset = 0;
+		m_tmp = SCTP_BUF_NEXT(m_tmp);
+	}
+}
+
+/*
+ * process the incoming Authentication chunk return codes: -1 on any
+ * authentication error 0 on authentication verification
+ */
+int
+sctp_handle_auth(struct sctp_tcb *stcb, struct sctp_auth_chunk *auth,
+    struct mbuf *m, uint32_t offset)
+{
+	uint16_t chunklen;
+	uint16_t shared_key_id;
+	uint16_t hmac_id;
+	sctp_sharedkey_t *skey;
+	uint32_t digestlen;
+	uint8_t digest[SCTP_AUTH_DIGEST_LEN_MAX];
+	uint8_t computed_digest[SCTP_AUTH_DIGEST_LEN_MAX];
+
+	/* auth is checked for NULL by caller */
+	chunklen = ntohs(auth->ch.chunk_length);
+	if (chunklen < sizeof(*auth)) {
+		SCTP_STAT_INCR(sctps_recvauthfailed);
+		return (-1);
+	}
+	SCTP_STAT_INCR(sctps_recvauth);
+
+	/* get the auth params */
+	shared_key_id = ntohs(auth->shared_key_id);
+	hmac_id = ntohs(auth->hmac_id);
+	SCTPDBG(SCTP_DEBUG_AUTH1,
+	    "SCTP AUTH Chunk: shared key %u, HMAC id %u\n",
+	    shared_key_id, hmac_id);
+
+	/* is the indicated HMAC supported? */
+	if (!sctp_auth_is_supported_hmac(stcb->asoc.local_hmacs, hmac_id)) {
+		struct mbuf *m_err;
+		struct sctp_auth_invalid_hmac *err;
+
+		SCTP_STAT_INCR(sctps_recvivalhmacid);
+		SCTPDBG(SCTP_DEBUG_AUTH1,
+		    "SCTP Auth: unsupported HMAC id %u\n",
+		    hmac_id);
+		/*
+		 * report this in an Error Chunk: Unsupported HMAC
+		 * Identifier
+		 */
+		m_err = sctp_get_mbuf_for_msg(sizeof(*err), 0, M_DONTWAIT,
+		    1, MT_HEADER);
+		if (m_err != NULL) {
+			/* pre-reserve some space */
+			SCTP_BUF_RESV_UF(m_err, sizeof(struct sctp_chunkhdr));
+			/* fill in the error */
+			err = mtod(m_err, struct sctp_auth_invalid_hmac *);
+			bzero(err, sizeof(*err));
+			err->ph.param_type = htons(SCTP_CAUSE_UNSUPPORTED_HMACID);
+			err->ph.param_length = htons(sizeof(*err));
+			err->hmac_id = ntohs(hmac_id);
+			SCTP_BUF_LEN(m_err) = sizeof(*err);
+			/* queue it */
+			sctp_queue_op_err(stcb, m_err);
+		}
+		return (-1);
+	}
+	/* get the indicated shared key, if available */
+	if ((stcb->asoc.authinfo.recv_key == NULL) ||
+	    (stcb->asoc.authinfo.recv_keyid != shared_key_id)) {
+		/* find the shared key on the assoc first */
+		skey = sctp_find_sharedkey(&stcb->asoc.shared_keys, shared_key_id);
+		if (skey == NULL) {
+			/* if not on the assoc, find it on the endpoint */
+			skey = sctp_find_sharedkey(&stcb->sctp_ep->sctp_ep.shared_keys,
+			    shared_key_id);
+		}
+		/* if the shared key isn't found, discard the chunk */
+		if (skey == NULL) {
+			SCTP_STAT_INCR(sctps_recvivalkeyid);
+			SCTPDBG(SCTP_DEBUG_AUTH1,
+			    "SCTP Auth: unknown key id %u\n",
+			    shared_key_id);
+			return (-1);
+		}
+		/* generate a notification if this is a new key id */
+		if (stcb->asoc.authinfo.recv_keyid != shared_key_id)
+			/*
+			 * sctp_ulp_notify(SCTP_NOTIFY_AUTH_NEW_KEY, stcb,
+			 * shared_key_id, (void
+			 * *)stcb->asoc.authinfo.recv_keyid);
+			 */
+			sctp_notify_authentication(stcb, SCTP_AUTH_NEWKEY,
+			    shared_key_id, stcb->asoc.authinfo.recv_keyid);
+		/* compute a new recv assoc key and cache it */
+		if (stcb->asoc.authinfo.recv_key != NULL)
+			sctp_free_key(stcb->asoc.authinfo.recv_key);
+		stcb->asoc.authinfo.recv_key =
+		    sctp_compute_hashkey(stcb->asoc.authinfo.random,
+		    stcb->asoc.authinfo.peer_random, skey->key);
+		stcb->asoc.authinfo.recv_keyid = shared_key_id;
+#ifdef SCTP_DEBUG
+		if (SCTP_AUTH_DEBUG)
+			sctp_print_key(stcb->asoc.authinfo.recv_key, "Recv Key");
+#endif
+	}
+	/* validate the digest length */
+	digestlen = sctp_get_hmac_digest_len(hmac_id);
+	if (chunklen < (sizeof(*auth) + digestlen)) {
+		/* invalid digest length */
+		SCTP_STAT_INCR(sctps_recvauthfailed);
+		SCTPDBG(SCTP_DEBUG_AUTH1,
+		    "SCTP Auth: chunk too short for HMAC\n");
+		return (-1);
+	}
+	/* save a copy of the digest, zero the pseudo header, and validate */
+	bcopy(auth->hmac, digest, digestlen);
+	sctp_bzero_m(m, offset + sizeof(*auth), SCTP_SIZE32(digestlen));
+	(void)sctp_compute_hmac_m(hmac_id, stcb->asoc.authinfo.recv_key,
+	    m, offset, computed_digest);
+
+	/* compare the computed digest with the one in the AUTH chunk */
+	if (memcmp(digest, computed_digest, digestlen) != 0) {
+		SCTP_STAT_INCR(sctps_recvauthfailed);
+		SCTPDBG(SCTP_DEBUG_AUTH1,
+		    "SCTP Auth: HMAC digest check failed\n");
+		return (-1);
+	}
+	return (0);
+}
+
+/*
+ * Generate NOTIFICATION
+ */
+void
+sctp_notify_authentication(struct sctp_tcb *stcb, uint32_t indication,
+    uint16_t keyid, uint16_t alt_keyid)
+{
+	struct mbuf *m_notify;
+	struct sctp_authkey_event *auth;
+	struct sctp_queued_to_read *control;
+
+	if ((stcb == NULL) ||
+	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+	    (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET)
+	    ) {
+		/* If the socket is gone we are out of here */
+		return;
+	}
+	if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_AUTHEVNT))
+		/* event not enabled */
+		return;
+
+	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_authkey_event),
+	    0, M_DONTWAIT, 1, MT_HEADER);
+	if (m_notify == NULL)
+		/* no space left */
+		return;
+
+	SCTP_BUF_LEN(m_notify) = 0;
+	auth = mtod(m_notify, struct sctp_authkey_event *);
+	auth->auth_type = SCTP_AUTHENTICATION_EVENT;
+	auth->auth_flags = 0;
+	auth->auth_length = sizeof(*auth);
+	auth->auth_keynumber = keyid;
+	auth->auth_altkeynumber = alt_keyid;
+	auth->auth_indication = indication;
+	auth->auth_assoc_id = sctp_get_associd(stcb);
+
+	SCTP_BUF_LEN(m_notify) = sizeof(*auth);
+	SCTP_BUF_NEXT(m_notify) = NULL;
+
+	/* append to socket */
+	control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+	    0, 0, 0, 0, 0, 0, m_notify);
+	if (control == NULL) {
+		/* no memory */
+		sctp_m_freem(m_notify);
+		return;
+	}
+	control->spec_flags = M_NOTIFICATION;
+	control->length = SCTP_BUF_LEN(m_notify);
+	/* not that we need this */
+	control->tail_mbuf = m_notify;
+	sctp_add_to_readq(stcb->sctp_ep, stcb, control,
+	    &stcb->sctp_socket->so_rcv, 1, SCTP_SO_NOT_LOCKED);
+}
+
+
+/*
+ * validates the AUTHentication related parameters in an INIT/INIT-ACK
+ * Note: currently only used for INIT as INIT-ACK is handled inline
+ * with sctp_load_addresses_from_init()
+ */
+int
+sctp_validate_init_auth_params(struct mbuf *m, int offset, int limit)
+{
+	struct sctp_paramhdr *phdr, parm_buf;
+	uint16_t ptype, plen;
+	int peer_supports_asconf = 0;
+	int peer_supports_auth = 0;
+	int got_random = 0, got_hmacs = 0, got_chklist = 0;
+	uint8_t saw_asconf = 0;
+	uint8_t saw_asconf_ack = 0;
+
+	/* go through each of the params. */
+	phdr = sctp_get_next_param(m, offset, &parm_buf, sizeof(parm_buf));
+	while (phdr) {
+		ptype = ntohs(phdr->param_type);
+		plen = ntohs(phdr->param_length);
+
+		if (offset + plen > limit) {
+			break;
+		}
+		if (plen < sizeof(struct sctp_paramhdr)) {
+			break;
+		}
+		if (ptype == SCTP_SUPPORTED_CHUNK_EXT) {
+			/* A supported extension chunk */
+			struct sctp_supported_chunk_types_param *pr_supported;
+			uint8_t local_store[SCTP_PARAM_BUFFER_SIZE];
+			int num_ent, i;
+
+			phdr = sctp_get_next_param(m, offset,
+			    (struct sctp_paramhdr *)&local_store, min(plen, sizeof(local_store)));
+			if (phdr == NULL) {
+				return (-1);
+			}
+			pr_supported = (struct sctp_supported_chunk_types_param *)phdr;
+			num_ent = plen - sizeof(struct sctp_paramhdr);
+			for (i = 0; i < num_ent; i++) {
+				switch (pr_supported->chunk_types[i]) {
+				case SCTP_ASCONF:
+				case SCTP_ASCONF_ACK:
+					peer_supports_asconf = 1;
+					break;
+				case SCTP_AUTHENTICATION:
+					peer_supports_auth = 1;
+					break;
+				default:
+					/* one we don't care about */
+					break;
+				}
+			}
+		} else if (ptype == SCTP_RANDOM) {
+			got_random = 1;
+			/* enforce the random length */
+			if (plen != (sizeof(struct sctp_auth_random) +
+			    SCTP_AUTH_RANDOM_SIZE_REQUIRED)) {
+				SCTPDBG(SCTP_DEBUG_AUTH1,
+				    "SCTP: invalid RANDOM len\n");
+				return (-1);
+			}
+		} else if (ptype == SCTP_HMAC_LIST) {
+			uint8_t store[SCTP_PARAM_BUFFER_SIZE];
+			struct sctp_auth_hmac_algo *hmacs;
+			int num_hmacs;
+
+			if (plen > sizeof(store))
+				break;
+			phdr = sctp_get_next_param(m, offset,
+			    (struct sctp_paramhdr *)store, min(plen, sizeof(store)));
+			if (phdr == NULL)
+				return (-1);
+			hmacs = (struct sctp_auth_hmac_algo *)phdr;
+			num_hmacs = (plen - sizeof(*hmacs)) /
+			    sizeof(hmacs->hmac_ids[0]);
+			/* validate the hmac list */
+			if (sctp_verify_hmac_param(hmacs, num_hmacs)) {
+				SCTPDBG(SCTP_DEBUG_AUTH1,
+				    "SCTP: invalid HMAC param\n");
+				return (-1);
+			}
+			got_hmacs = 1;
+		} else if (ptype == SCTP_CHUNK_LIST) {
+			int i, num_chunks;
+			uint8_t chunks_store[SCTP_SMALL_CHUNK_STORE];
+
+			/* did the peer send a non-empty chunk list? */
+			struct sctp_auth_chunk_list *chunks = NULL;
+
+			phdr = sctp_get_next_param(m, offset,
+			    (struct sctp_paramhdr *)chunks_store,
+			    min(plen, sizeof(chunks_store)));
+			if (phdr == NULL)
+				return (-1);
+
+			/*-
+			 * Flip through the list and mark that the
+			 * peer supports asconf/asconf_ack.
+			 */
+			chunks = (struct sctp_auth_chunk_list *)phdr;
+			num_chunks = plen - sizeof(*chunks);
+			for (i = 0; i < num_chunks; i++) {
+				/* record asconf/asconf-ack if listed */
+				if (chunks->chunk_types[i] == SCTP_ASCONF)
+					saw_asconf = 1;
+				if (chunks->chunk_types[i] == SCTP_ASCONF_ACK)
+					saw_asconf_ack = 1;
+
+			}
+			if (num_chunks)
+				got_chklist = 1;
+		}
+		offset += SCTP_SIZE32(plen);
+		if (offset >= limit) {
+			break;
+		}
+		phdr = sctp_get_next_param(m, offset, &parm_buf,
+		    sizeof(parm_buf));
+	}
+	/* validate authentication required parameters */
+	if (got_random && got_hmacs) {
+		peer_supports_auth = 1;
+	} else {
+		peer_supports_auth = 0;
+	}
+	if (!peer_supports_auth && got_chklist) {
+		SCTPDBG(SCTP_DEBUG_AUTH1,
+		    "SCTP: peer sent chunk list w/o AUTH\n");
+		return (-1);
+	}
+	if (!sctp_asconf_auth_nochk && peer_supports_asconf &&
+	    !peer_supports_auth) {
+		SCTPDBG(SCTP_DEBUG_AUTH1,
+		    "SCTP: peer supports ASCONF but not AUTH\n");
+		return (-1);
+	} else if ((peer_supports_asconf) && (peer_supports_auth) &&
+	    ((saw_asconf == 0) || (saw_asconf_ack == 0))) {
+		return (-2);
+	}
+	return (0);
+}
+
+void
+sctp_initialize_auth_params(struct sctp_inpcb *inp, struct sctp_tcb *stcb)
+{
+	uint16_t chunks_len = 0;
+	uint16_t hmacs_len = 0;
+	uint16_t random_len = SCTP_AUTH_RANDOM_SIZE_DEFAULT;
+	sctp_key_t *new_key;
+	uint16_t keylen;
+
+	/* initialize hmac list from endpoint */
+	stcb->asoc.local_hmacs = sctp_copy_hmaclist(inp->sctp_ep.local_hmacs);
+	if (stcb->asoc.local_hmacs != NULL) {
+		hmacs_len = stcb->asoc.local_hmacs->num_algo *
+		    sizeof(stcb->asoc.local_hmacs->hmac[0]);
+	}
+	/* initialize auth chunks list from endpoint */
+	stcb->asoc.local_auth_chunks =
+	    sctp_copy_chunklist(inp->sctp_ep.local_auth_chunks);
+	if (stcb->asoc.local_auth_chunks != NULL) {
+		int i;
+
+		for (i = 0; i < 256; i++) {
+			if (stcb->asoc.local_auth_chunks->chunks[i])
+				chunks_len++;
+		}
+	}
+	/* copy defaults from the endpoint */
+	stcb->asoc.authinfo.assoc_keyid = inp->sctp_ep.default_keyid;
+
+	/* now set the concatenated key (random + chunks + hmacs) */
+#ifdef SCTP_AUTH_DRAFT_04
+	/* don't include the chunks and hmacs for draft -04 */
+	keylen = random_len;
+	new_key = sctp_generate_random_key(keylen);
+#else
+	/* key includes parameter headers */
+	keylen = (3 * sizeof(struct sctp_paramhdr)) + random_len + chunks_len +
+	    hmacs_len;
+	new_key = sctp_alloc_key(keylen);
+	if (new_key != NULL) {
+		struct sctp_paramhdr *ph;
+		int plen;
+
+		/* generate and copy in the RANDOM */
+		ph = (struct sctp_paramhdr *)new_key->key;
+		ph->param_type = htons(SCTP_RANDOM);
+		plen = sizeof(*ph) + random_len;
+		ph->param_length = htons(plen);
+		SCTP_READ_RANDOM(new_key->key + sizeof(*ph), random_len);
+		keylen = plen;
+
+		/* append in the AUTH chunks */
+		/* NOTE: currently we always have chunks to list */
+		ph = (struct sctp_paramhdr *)(new_key->key + keylen);
+		ph->param_type = htons(SCTP_CHUNK_LIST);
+		plen = sizeof(*ph) + chunks_len;
+		ph->param_length = htons(plen);
+		keylen += sizeof(*ph);
+		if (stcb->asoc.local_auth_chunks) {
+			int i;
+
+			for (i = 0; i < 256; i++) {
+				if (stcb->asoc.local_auth_chunks->chunks[i])
+					new_key->key[keylen++] = i;
+			}
+		}
+		/* append in the HMACs */
+		ph = (struct sctp_paramhdr *)(new_key->key + keylen);
+		ph->param_type = htons(SCTP_HMAC_LIST);
+		plen = sizeof(*ph) + hmacs_len;
+		ph->param_length = htons(plen);
+		keylen += sizeof(*ph);
+		(void)sctp_serialize_hmaclist(stcb->asoc.local_hmacs,
+		    new_key->key + keylen);
+	}
+#endif
+	if (stcb->asoc.authinfo.random != NULL)
+		sctp_free_key(stcb->asoc.authinfo.random);
+	stcb->asoc.authinfo.random = new_key;
+	stcb->asoc.authinfo.random_len = random_len;
+}
+
+
+#ifdef SCTP_HMAC_TEST
+/*
+ * HMAC and key concatenation tests
+ */
+static void
+sctp_print_digest(uint8_t * digest, uint32_t digestlen, const char *str)
+{
+	uint32_t i;
+
+	printf("\n%s: 0x", str);
+	if (digest == NULL)
+		return;
+
+	for (i = 0; i < digestlen; i++)
+		printf("%02x", digest[i]);
+}
+
+static int
+sctp_test_hmac(const char *str, uint16_t hmac_id, uint8_t * key,
+    uint32_t keylen, uint8_t * text, uint32_t textlen,
+    uint8_t * digest, uint32_t digestlen)
+{
+	uint8_t computed_digest[SCTP_AUTH_DIGEST_LEN_MAX];
+
+	printf("\n%s:", str);
+	sctp_hmac(hmac_id, key, keylen, text, textlen, computed_digest);
+	sctp_print_digest(digest, digestlen, "Expected digest");
+	sctp_print_digest(computed_digest, digestlen, "Computed digest");
+	if (memcmp(digest, computed_digest, digestlen) != 0) {
+		printf("\nFAILED");
+		return (-1);
+	} else {
+		printf("\nPASSED");
+		return (0);
+	}
+}
+
+
+/*
+ * RFC 2202: HMAC-SHA1 test cases
+ */
+void
+sctp_test_hmac_sha1(void)
+{
+	uint8_t *digest;
+	uint8_t key[128];
+	uint32_t keylen;
+	uint8_t text[128];
+	uint32_t textlen;
+	uint32_t digestlen = 20;
+	int failed = 0;
+
+	/*
+	 * test_case =     1 key =
+	 * 0x0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b key_len =       20
+	 * data =          "Hi There" data_len =      8 digest =
+	 * 0xb617318655057264e28bc0b6fb378c8ef146be00
+	 */
+	keylen = 20;
+	memset(key, 0x0b, keylen);
+	textlen = 8;
+	strcpy(text, "Hi There");
+	digest = "\xb6\x17\x31\x86\x55\x05\x72\x64\xe2\x8b\xc0\xb6\xfb\x37\x8c\x8e\xf1\x46\xbe\x00";
+	if (sctp_test_hmac("SHA1 test case 1", SCTP_AUTH_HMAC_ID_SHA1, key, keylen,
+	    text, textlen, digest, digestlen) < 0)
+		failed++;
+
+	/*
+	 * test_case =     2 key =           "Jefe" key_len =       4 data =
+	 * "what do ya want for nothing?" data_len =      28 digest =
+	 * 0xeffcdf6ae5eb2fa2d27416d5f184df9c259a7c79
+	 */
+	keylen = 4;
+	strcpy(key, "Jefe");
+	textlen = 28;
+	strcpy(text, "what do ya want for nothing?");
+	digest = "\xef\xfc\xdf\x6a\xe5\xeb\x2f\xa2\xd2\x74\x16\xd5\xf1\x84\xdf\x9c\x25\x9a\x7c\x79";
+	if (sctp_test_hmac("SHA1 test case 2", SCTP_AUTH_HMAC_ID_SHA1, key, keylen,
+	    text, textlen, digest, digestlen) < 0)
+		failed++;
+
+	/*
+	 * test_case =     3 key =
+	 * 0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa key_len =       20
+	 * data =          0xdd repeated 50 times data_len =      50 digest
+	 * = 0x125d7342b9ac11cd91a39af48aa17b4f63f175d3
+	 */
+	keylen = 20;
+	memset(key, 0xaa, keylen);
+	textlen = 50;
+	memset(text, 0xdd, textlen);
+	digest = "\x12\x5d\x73\x42\xb9\xac\x11\xcd\x91\xa3\x9a\xf4\x8a\xa1\x7b\x4f\x63\xf1\x75\xd3";
+	if (sctp_test_hmac("SHA1 test case 3", SCTP_AUTH_HMAC_ID_SHA1, key, keylen,
+	    text, textlen, digest, digestlen) < 0)
+		failed++;
+
+	/*
+	 * test_case =     4 key =
+	 * 0x0102030405060708090a0b0c0d0e0f10111213141516171819 key_len = 25
+	 * data =          0xcd repeated 50 times data_len =      50 digest
+	 * =        0x4c9007f4026250c6bc8414f9bf50c86c2d7235da
+	 */
+	keylen = 25;
+	memcpy(key, "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19", keylen);
+	textlen = 50;
+	memset(text, 0xcd, textlen);
+	digest = "\x4c\x90\x07\xf4\x02\x62\x50\xc6\xbc\x84\x14\xf9\xbf\x50\xc8\x6c\x2d\x72\x35\xda";
+	if (sctp_test_hmac("SHA1 test case 4", SCTP_AUTH_HMAC_ID_SHA1, key, keylen,
+	    text, textlen, digest, digestlen) < 0)
+		failed++;
+
+	/*
+	 * test_case =     5 key =
+	 * 0x0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c key_len =       20
+	 * data =          "Test With Truncation" data_len =      20 digest
+	 * = 0x4c1a03424b55e07fe7f27be1d58bb9324a9a5a04 digest-96 =
+	 * 0x4c1a03424b55e07fe7f27be1
+	 */
+	keylen = 20;
+	memset(key, 0x0c, keylen);
+	textlen = 20;
+	strcpy(text, "Test With Truncation");
+	digest = "\x4c\x1a\x03\x42\x4b\x55\xe0\x7f\xe7\xf2\x7b\xe1\xd5\x8b\xb9\x32\x4a\x9a\x5a\x04";
+	if (sctp_test_hmac("SHA1 test case 5", SCTP_AUTH_HMAC_ID_SHA1, key, keylen,
+	    text, textlen, digest, digestlen) < 0)
+		failed++;
+
+	/*
+	 * test_case =     6 key =           0xaa repeated 80 times key_len
+	 * = 80 data =          "Test Using Larger Than Block-Size Key -
+	 * Hash Key First" data_len =      54 digest =
+	 * 0xaa4ae5e15272d00e95705637ce8a3b55ed402112
+	 */
+	keylen = 80;
+	memset(key, 0xaa, keylen);
+	textlen = 54;
+	strcpy(text, "Test Using Larger Than Block-Size Key - Hash Key First");
+	digest = "\xaa\x4a\xe5\xe1\x52\x72\xd0\x0e\x95\x70\x56\x37\xce\x8a\x3b\x55\xed\x40\x21\x12";
+	if (sctp_test_hmac("SHA1 test case 6", SCTP_AUTH_HMAC_ID_SHA1, key, keylen,
+	    text, textlen, digest, digestlen) < 0)
+		failed++;
+
+	/*
+	 * test_case =     7 key =           0xaa repeated 80 times key_len
+	 * = 80 data =          "Test Using Larger Than Block-Size Key and
+	 * Larger Than One Block-Size Data" data_len =      73 digest =
+	 * 0xe8e99d0f45237d786d6bbaa7965c7808bbff1a91
+	 */
+	keylen = 80;
+	memset(key, 0xaa, keylen);
+	textlen = 73;
+	strcpy(text, "Test Using Larger Than Block-Size Key and Larger Than One Block-Size Data");
+	digest = "\xe8\xe9\x9d\x0f\x45\x23\x7d\x78\x6d\x6b\xba\xa7\x96\x5c\x78\x08\xbb\xff\x1a\x91";
+	if (sctp_test_hmac("SHA1 test case 7", SCTP_AUTH_HMAC_ID_SHA1, key, keylen,
+	    text, textlen, digest, digestlen) < 0)
+		failed++;
+
+	/* done with all tests */
+	if (failed)
+		printf("\nSHA1 test results: %d cases failed", failed);
+	else
+		printf("\nSHA1 test results: all test cases passed");
+}
+
+/*
+ * RFC 2202: HMAC-MD5 test cases
+ */
+void
+sctp_test_hmac_md5(void)
+{
+	uint8_t *digest;
+	uint8_t key[128];
+	uint32_t keylen;
+	uint8_t text[128];
+	uint32_t textlen;
+	uint32_t digestlen = 16;
+	int failed = 0;
+
+	/*
+	 * test_case =     1 key = 0x0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b
+	 * key_len =       16 data = "Hi There" data_len =      8 digest =
+	 * 0x9294727a3638bb1c13f48ef8158bfc9d
+	 */
+	keylen = 16;
+	memset(key, 0x0b, keylen);
+	textlen = 8;
+	strcpy(text, "Hi There");
+	digest = "\x92\x94\x72\x7a\x36\x38\xbb\x1c\x13\xf4\x8e\xf8\x15\x8b\xfc\x9d";
+	if (sctp_test_hmac("MD5 test case 1", SCTP_AUTH_HMAC_ID_MD5, key, keylen,
+	    text, textlen, digest, digestlen) < 0)
+		failed++;
+
+	/*
+	 * test_case =     2 key =           "Jefe" key_len =       4 data =
+	 * "what do ya want for nothing?" data_len =      28 digest =
+	 * 0x750c783e6ab0b503eaa86e310a5db738
+	 */
+	keylen = 4;
+	strcpy(key, "Jefe");
+	textlen = 28;
+	strcpy(text, "what do ya want for nothing?");
+	digest = "\x75\x0c\x78\x3e\x6a\xb0\xb5\x03\xea\xa8\x6e\x31\x0a\x5d\xb7\x38";
+	if (sctp_test_hmac("MD5 test case 2", SCTP_AUTH_HMAC_ID_MD5, key, keylen,
+	    text, textlen, digest, digestlen) < 0)
+		failed++;
+
+	/*
+	 * test_case =     3 key = 0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+	 * key_len =       16 data = 0xdd repeated 50 times data_len = 50
+	 * digest = 0x56be34521d144c88dbb8c733f0e8b3f6
+	 */
+	keylen = 16;
+	memset(key, 0xaa, keylen);
+	textlen = 50;
+	memset(text, 0xdd, textlen);
+	digest = "\x56\xbe\x34\x52\x1d\x14\x4c\x88\xdb\xb8\xc7\x33\xf0\xe8\xb3\xf6";
+	if (sctp_test_hmac("MD5 test case 3", SCTP_AUTH_HMAC_ID_MD5, key, keylen,
+	    text, textlen, digest, digestlen) < 0)
+		failed++;
+
+	/*
+	 * test_case =     4 key =
+	 * 0x0102030405060708090a0b0c0d0e0f10111213141516171819 key_len = 25
+	 * data =          0xcd repeated 50 times data_len =      50 digest
+	 * =        0x697eaf0aca3a3aea3a75164746ffaa79
+	 */
+	keylen = 25;
+	memcpy(key, "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19", keylen);
+	textlen = 50;
+	memset(text, 0xcd, textlen);
+	digest = "\x69\x7e\xaf\x0a\xca\x3a\x3a\xea\x3a\x75\x16\x47\x46\xff\xaa\x79";
+	if (sctp_test_hmac("MD5 test case 4", SCTP_AUTH_HMAC_ID_MD5, key, keylen,
+	    text, textlen, digest, digestlen) < 0)
+		failed++;
+
+	/*
+	 * test_case =     5 key = 0x0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c
+	 * key_len =       16 data = "Test With Truncation" data_len = 20
+	 * digest = 0x56461ef2342edc00f9bab995690efd4c digest-96
+	 * 0x56461ef2342edc00f9bab995
+	 */
+	keylen = 16;
+	memset(key, 0x0c, keylen);
+	textlen = 20;
+	strcpy(text, "Test With Truncation");
+	digest = "\x56\x46\x1e\xf2\x34\x2e\xdc\x00\xf9\xba\xb9\x95\x69\x0e\xfd\x4c";
+	if (sctp_test_hmac("MD5 test case 5", SCTP_AUTH_HMAC_ID_MD5, key, keylen,
+	    text, textlen, digest, digestlen) < 0)
+		failed++;
+
+	/*
+	 * test_case =     6 key =           0xaa repeated 80 times key_len
+	 * = 80 data =          "Test Using Larger Than Block-Size Key -
+	 * Hash Key First" data_len =      54 digest =
+	 * 0x6b1ab7fe4bd7bf8f0b62e6ce61b9d0cd
+	 */
+	keylen = 80;
+	memset(key, 0xaa, keylen);
+	textlen = 54;
+	strcpy(text, "Test Using Larger Than Block-Size Key - Hash Key First");
+	digest = "\x6b\x1a\xb7\xfe\x4b\xd7\xbf\x8f\x0b\x62\xe6\xce\x61\xb9\xd0\xcd";
+	if (sctp_test_hmac("MD5 test case 6", SCTP_AUTH_HMAC_ID_MD5, key, keylen,
+	    text, textlen, digest, digestlen) < 0)
+		failed++;
+
+	/*
+	 * test_case =     7 key =           0xaa repeated 80 times key_len
+	 * = 80 data =          "Test Using Larger Than Block-Size Key and
+	 * Larger Than One Block-Size Data" data_len =      73 digest =
+	 * 0x6f630fad67cda0ee1fb1f562db3aa53e
+	 */
+	keylen = 80;
+	memset(key, 0xaa, keylen);
+	textlen = 73;
+	strcpy(text, "Test Using Larger Than Block-Size Key and Larger Than One Block-Size Data");
+	digest = "\x6f\x63\x0f\xad\x67\xcd\xa0\xee\x1f\xb1\xf5\x62\xdb\x3a\xa5\x3e";
+	if (sctp_test_hmac("MD5 test case 7", SCTP_AUTH_HMAC_ID_MD5, key, keylen,
+	    text, textlen, digest, digestlen) < 0)
+		failed++;
+
+	/* done with all tests */
+	if (failed)
+		printf("\nMD5 test results: %d cases failed", failed);
+	else
+		printf("\nMD5 test results: all test cases passed");
+}
+
+/*
+ * test assoc key concatenation
+ */
+static int
+sctp_test_key_concatenation(sctp_key_t * key1, sctp_key_t * key2,
+    sctp_key_t * expected_key)
+{
+	sctp_key_t *key;
+	int ret_val;
+
+	sctp_show_key(key1, "\nkey1");
+	sctp_show_key(key2, "\nkey2");
+	key = sctp_compute_hashkey(key1, key2, NULL);
+	sctp_show_key(expected_key, "\nExpected");
+	sctp_show_key(key, "\nComputed");
+	if (memcmp(key, expected_key, expected_key->keylen) != 0) {
+		printf("\nFAILED");
+		ret_val = -1;
+	} else {
+		printf("\nPASSED");
+		ret_val = 0;
+	}
+	sctp_free_key(key1);
+	sctp_free_key(key2);
+	sctp_free_key(expected_key);
+	sctp_free_key(key);
+	return (ret_val);
+}
+
+
+void
+sctp_test_authkey(void)
+{
+	sctp_key_t *key1, *key2, *expected_key;
+	int failed = 0;
+
+	/* test case 1 */
+	key1 = sctp_set_key("\x01\x01\x01\x01", 4);
+	key2 = sctp_set_key("\x01\x02\x03\x04", 4);
+	expected_key = sctp_set_key("\x01\x01\x01\x01\x01\x02\x03\x04", 8);
+	if (sctp_test_key_concatenation(key1, key2, expected_key) < 0)
+		failed++;
+
+	/* test case 2 */
+	key1 = sctp_set_key("\x00\x00\x00\x01", 4);
+	key2 = sctp_set_key("\x02", 1);
+	expected_key = sctp_set_key("\x00\x00\x00\x01\x02", 5);
+	if (sctp_test_key_concatenation(key1, key2, expected_key) < 0)
+		failed++;
+
+	/* test case 3 */
+	key1 = sctp_set_key("\x01", 1);
+	key2 = sctp_set_key("\x00\x00\x00\x02", 4);
+	expected_key = sctp_set_key("\x01\x00\x00\x00\x02", 5);
+	if (sctp_test_key_concatenation(key1, key2, expected_key) < 0)
+		failed++;
+
+	/* test case 4 */
+	key1 = sctp_set_key("\x00\x00\x00\x01", 4);
+	key2 = sctp_set_key("\x01", 1);
+	expected_key = sctp_set_key("\x01\x00\x00\x00\x01", 5);
+	if (sctp_test_key_concatenation(key1, key2, expected_key) < 0)
+		failed++;
+
+	/* test case 5 */
+	key1 = sctp_set_key("\x01", 1);
+	key2 = sctp_set_key("\x00\x00\x00\x01", 4);
+	expected_key = sctp_set_key("\x01\x00\x00\x00\x01", 5);
+	if (sctp_test_key_concatenation(key1, key2, expected_key) < 0)
+		failed++;
+
+	/* test case 6 */
+	key1 = sctp_set_key("\x00\x00\x00\x00\x01\x02\x03\x04\x05\x06\x07", 11);
+	key2 = sctp_set_key("\x00\x00\x00\x00\x01\x02\x03\x04\x05\x06\x08", 11);
+	expected_key = sctp_set_key("\x00\x00\x00\x00\x01\x02\x03\x04\x05\x06\x07\x00\x00\x00\x00\x01\x02\x03\x04\x05\x06\x08", 22);
+	if (sctp_test_key_concatenation(key1, key2, expected_key) < 0)
+		failed++;
+
+	/* test case 7 */
+	key1 = sctp_set_key("\x00\x00\x00\x00\x01\x02\x03\x04\x05\x06\x08", 11);
+	key2 = sctp_set_key("\x00\x00\x00\x00\x01\x02\x03\x04\x05\x06\x07", 11);
+	expected_key = sctp_set_key("\x00\x00\x00\x00\x01\x02\x03\x04\x05\x06\x07\x00\x00\x00\x00\x01\x02\x03\x04\x05\x06\x08", 22);
+	if (sctp_test_key_concatenation(key1, key2, expected_key) < 0)
+		failed++;
+
+	/* done with all tests */
+	if (failed)
+		printf("\nKey concatenation test results: %d cases failed", failed);
+	else
+		printf("\nKey concatenation test results: all test cases passed");
+}
+
+
+#if defined(STANDALONE_HMAC_TEST)
+int
+main(void)
+{
+	sctp_test_hmac_sha1();
+	sctp_test_hmac_md5();
+	sctp_test_authkey();
+}
+
+#endif				/* STANDALONE_HMAC_TEST */
+
+#endif				/* SCTP_HMAC_TEST */
--- /dev/null
+++ sys/netinet/sctp_lock_bsd.h
@@ -0,0 +1,403 @@
+#ifndef __sctp_lock_bsd_h__
+#define __sctp_lock_bsd_h__
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * General locking concepts: The goal of our locking is to of course provide
+ * consistency and yet minimize overhead. We will attempt to use
+ * non-recursive locks which are supposed to be quite inexpensive. Now in
+ * order to do this the goal is that most functions are not aware of locking.
+ * Once we have a TCB we lock it and unlock when we are through. This means
+ * that the TCB lock is kind-of a "global" lock when working on an
+ * association. Caution must be used when asserting a TCB_LOCK since if we
+ * recurse we deadlock.
+ *
+ * Most other locks (INP and INFO) attempt to localize the locking i.e. we try
+ * to contain the lock and unlock within the function that needs to lock it.
+ * This sometimes mean we do extra locks and unlocks and lose a bit of
+ * efficency, but if the performance statements about non-recursive locks are
+ * true this should not be a problem.  One issue that arises with this only
+ * lock when needed is that if an implicit association setup is done we have
+ * a problem. If at the time I lookup an association I have NULL in the tcb
+ * return, by the time I call to create the association some other processor
+ * could have created it. This is what the CREATE lock on the endpoint.
+ * Places where we will be implicitly creating the association OR just
+ * creating an association (the connect call) will assert the CREATE_INP
+ * lock. This will assure us that during all the lookup of INP and INFO if
+ * another creator is also locking/looking up we can gate the two to
+ * synchronize. So the CREATE_INP lock is also another one we must use
+ * extreme caution in locking to make sure we don't hit a re-entrancy issue.
+ *
+ * For non FreeBSD 5.x we provide a bunch of EMPTY lock macros so we can
+ * blatantly put locks everywhere and they reduce to nothing on
+ * NetBSD/OpenBSD and FreeBSD 4.x
+ *
+ */
+
+/*
+ * When working with the global SCTP lists we lock and unlock the INP_INFO
+ * lock. So when we go to lookup an association we will want to do a
+ * SCTP_INP_INFO_RLOCK() and then when we want to add a new association to
+ * the sctppcbinfo list's we will do a SCTP_INP_INFO_WLOCK().
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_lock_bsd.h,v 1.10 2007/09/18 15:16:38 rrs Exp $");
+
+
+extern struct sctp_foo_stuff sctp_logoff[];
+extern int sctp_logoff_stuff;
+
+#define SCTP_IPI_COUNT_INIT()
+
+#define SCTP_STATLOG_INIT_LOCK()
+#define SCTP_STATLOG_LOCK()
+#define SCTP_STATLOG_UNLOCK()
+#define SCTP_STATLOG_DESTROY()
+
+#define SCTP_INP_INFO_LOCK_DESTROY() do { \
+        if(rw_wowned(sctppcbinfo.ipi_ep_mtx)) { \
+             rw_wunlock(&sctppcbinfo.ipi_ep_mtx); \
+        } \
+        rw_destroy(sctppcbinfo.ipi_ep_mtx); \
+      }  while (0)
+
+#define SCTP_INP_INFO_LOCK_INIT() \
+        rw_init(&sctppcbinfo.ipi_ep_mtx, "sctp-info");
+
+
+#define SCTP_INP_INFO_RLOCK()	do { 					\
+             rw_rlock(&sctppcbinfo.ipi_ep_mtx);                         \
+} while (0)
+
+
+#define SCTP_INP_INFO_WLOCK()	do { 					\
+            rw_wlock(&sctppcbinfo.ipi_ep_mtx);                         \
+} while (0)
+
+
+#define SCTP_INP_INFO_RUNLOCK()		rw_runlock(&sctppcbinfo.ipi_ep_mtx)
+#define SCTP_INP_INFO_WUNLOCK()		rw_wunlock(&sctppcbinfo.ipi_ep_mtx)
+
+
+#define SCTP_IPI_ADDR_INIT() \
+        rw_init(&sctppcbinfo.ipi_addr_mtx, "sctp-addr")
+
+#define SCTP_IPI_ADDR_DESTROY() do  { \
+        if(rw_wowned(sctppcbinfo.ipi_addr_mtx)) { \
+             rw_wunlock(&sctppcbinfo.ipi_addr_mtx); \
+        } \
+	rw_destroy(&sctppcbinfo.ipi_addr_mtx) \
+      }  while (0)
+
+
+
+#define SCTP_IPI_ADDR_RLOCK()	do { 					\
+             rw_rlock(&sctppcbinfo.ipi_addr_mtx);                         \
+} while (0)
+
+#define SCTP_IPI_ADDR_WLOCK()	do { 					\
+             rw_wlock(&sctppcbinfo.ipi_addr_mtx);                         \
+} while (0)
+
+
+#define SCTP_IPI_ADDR_RUNLOCK()		rw_runlock(&sctppcbinfo.ipi_addr_mtx)
+#define SCTP_IPI_ADDR_WUNLOCK()		rw_wunlock(&sctppcbinfo.ipi_addr_mtx)
+
+
+#define SCTP_IPI_ITERATOR_WQ_INIT() \
+        mtx_init(&sctppcbinfo.ipi_iterator_wq_mtx, "sctp-it-wq", "sctp_it_wq", MTX_DEF)
+
+#define SCTP_IPI_ITERATOR_WQ_DESTROY() \
+	mtx_destroy(&sctppcbinfo.ipi_iterator_wq_mtx)
+
+#define SCTP_IPI_ITERATOR_WQ_LOCK()	do { 					\
+             mtx_lock(&sctppcbinfo.ipi_iterator_wq_mtx);                \
+} while (0)
+
+#define SCTP_IPI_ITERATOR_WQ_UNLOCK()		mtx_unlock(&sctppcbinfo.ipi_iterator_wq_mtx)
+
+
+#define SCTP_IP_PKTLOG_INIT() \
+        mtx_init(&sctppcbinfo.ipi_pktlog_mtx, "sctp-pktlog", "packetlog", MTX_DEF)
+
+
+#define SCTP_IP_PKTLOG_LOCK()	do { 			\
+             mtx_lock(&sctppcbinfo.ipi_pktlog_mtx);     \
+} while (0)
+
+#define SCTP_IP_PKTLOG_UNLOCK()	mtx_unlock(&sctppcbinfo.ipi_pktlog_mtx)
+
+#define SCTP_IP_PKTLOG_DESTROY() \
+	mtx_destroy(&sctppcbinfo.ipi_pktlog_mtx)
+
+
+
+
+
+/*
+ * The INP locks we will use for locking an SCTP endpoint, so for example if
+ * we want to change something at the endpoint level for example random_store
+ * or cookie secrets we lock the INP level.
+ */
+
+#define SCTP_INP_READ_INIT(_inp) \
+	mtx_init(&(_inp)->inp_rdata_mtx, "sctp-read", "inpr", MTX_DEF | MTX_DUPOK)
+
+#define SCTP_INP_READ_DESTROY(_inp) \
+	mtx_destroy(&(_inp)->inp_rdata_mtx)
+
+#define SCTP_INP_READ_LOCK(_inp)	do { \
+        mtx_lock(&(_inp)->inp_rdata_mtx);    \
+} while (0)
+
+
+#define SCTP_INP_READ_UNLOCK(_inp) mtx_unlock(&(_inp)->inp_rdata_mtx)
+
+
+#define SCTP_INP_LOCK_INIT(_inp) \
+	mtx_init(&(_inp)->inp_mtx, "sctp-inp", "inp", MTX_DEF | MTX_DUPOK)
+#define SCTP_ASOC_CREATE_LOCK_INIT(_inp) \
+	mtx_init(&(_inp)->inp_create_mtx, "sctp-create", "inp_create", \
+		 MTX_DEF | MTX_DUPOK)
+
+#define SCTP_INP_LOCK_DESTROY(_inp) \
+	mtx_destroy(&(_inp)->inp_mtx)
+
+#define SCTP_ASOC_CREATE_LOCK_DESTROY(_inp) \
+	mtx_destroy(&(_inp)->inp_create_mtx)
+
+
+#ifdef SCTP_LOCK_LOGGING
+#define SCTP_INP_RLOCK(_inp)	do { 					\
+	if(sctp_logging_level & SCTP_LOCK_LOGGING_ENABLE) sctp_log_lock(_inp, (struct sctp_tcb *)NULL, SCTP_LOG_LOCK_INP);\
+        mtx_lock(&(_inp)->inp_mtx);                                     \
+} while (0)
+
+#define SCTP_INP_WLOCK(_inp)	do { 					\
+	if(sctp_logging_level & SCTP_LOCK_LOGGING_ENABLE) sctp_log_lock(_inp, (struct sctp_tcb *)NULL, SCTP_LOG_LOCK_INP);\
+        mtx_lock(&(_inp)->inp_mtx);                                     \
+} while (0)
+
+#else
+
+#define SCTP_INP_RLOCK(_inp)	do { 					\
+        mtx_lock(&(_inp)->inp_mtx);                                     \
+} while (0)
+
+#define SCTP_INP_WLOCK(_inp)	do { 					\
+        mtx_lock(&(_inp)->inp_mtx);                                     \
+} while (0)
+
+#endif
+
+
+#define SCTP_TCB_SEND_LOCK_INIT(_tcb) \
+	mtx_init(&(_tcb)->tcb_send_mtx, "sctp-send-tcb", "tcbs", MTX_DEF | MTX_DUPOK)
+
+#define SCTP_TCB_SEND_LOCK_DESTROY(_tcb) mtx_destroy(&(_tcb)->tcb_send_mtx)
+
+#define SCTP_TCB_SEND_LOCK(_tcb)  do { \
+	mtx_lock(&(_tcb)->tcb_send_mtx); \
+} while (0)
+
+#define SCTP_TCB_SEND_UNLOCK(_tcb) mtx_unlock(&(_tcb)->tcb_send_mtx)
+
+#define SCTP_INP_INCR_REF(_inp) atomic_add_int(&((_inp)->refcount), 1)
+#define SCTP_INP_DECR_REF(_inp) atomic_add_int(&((_inp)->refcount), -1)
+
+
+#ifdef SCTP_LOCK_LOGGING
+#define SCTP_ASOC_CREATE_LOCK(_inp) \
+	do {								\
+	if(sctp_logging_level & SCTP_LOCK_LOGGING_ENABLE) sctp_log_lock(_inp, (struct sctp_tcb *)NULL, SCTP_LOG_LOCK_CREATE); \
+		mtx_lock(&(_inp)->inp_create_mtx);			\
+	} while (0)
+#else
+
+#define SCTP_ASOC_CREATE_LOCK(_inp) \
+	do {								\
+		mtx_lock(&(_inp)->inp_create_mtx);			\
+	} while (0)
+#endif
+
+#define SCTP_INP_RUNLOCK(_inp)		mtx_unlock(&(_inp)->inp_mtx)
+#define SCTP_INP_WUNLOCK(_inp)		mtx_unlock(&(_inp)->inp_mtx)
+#define SCTP_ASOC_CREATE_UNLOCK(_inp)	mtx_unlock(&(_inp)->inp_create_mtx)
+
+/*
+ * For the majority of things (once we have found the association) we will
+ * lock the actual association mutex. This will protect all the assoiciation
+ * level queues and streams and such. We will need to lock the socket layer
+ * when we stuff data up into the receiving sb_mb. I.e. we will need to do an
+ * extra SOCKBUF_LOCK(&so->so_rcv) even though the association is locked.
+ */
+
+#define SCTP_TCB_LOCK_INIT(_tcb) \
+	mtx_init(&(_tcb)->tcb_mtx, "sctp-tcb", "tcb", MTX_DEF | MTX_DUPOK)
+
+#define SCTP_TCB_LOCK_DESTROY(_tcb)	mtx_destroy(&(_tcb)->tcb_mtx)
+
+#ifdef SCTP_LOCK_LOGGING
+#define SCTP_TCB_LOCK(_tcb)  do {					\
+	if(sctp_logging_level & SCTP_LOCK_LOGGING_ENABLE)  sctp_log_lock(_tcb->sctp_ep, _tcb, SCTP_LOG_LOCK_TCB);          \
+	mtx_lock(&(_tcb)->tcb_mtx);                                     \
+} while (0)
+
+#else
+#define SCTP_TCB_LOCK(_tcb)  do {					\
+	mtx_lock(&(_tcb)->tcb_mtx);                                     \
+} while (0)
+
+#endif
+
+
+#define SCTP_TCB_TRYLOCK(_tcb) 	mtx_trylock(&(_tcb)->tcb_mtx)
+
+#define SCTP_TCB_UNLOCK(_tcb)		mtx_unlock(&(_tcb)->tcb_mtx)
+
+#define SCTP_TCB_UNLOCK_IFOWNED(_tcb)	      do { \
+                                                if (mtx_owned(&(_tcb)->tcb_mtx)) \
+                                                     mtx_unlock(&(_tcb)->tcb_mtx); \
+                                              } while (0)
+
+
+
+#ifdef INVARIANTS
+#define SCTP_TCB_LOCK_ASSERT(_tcb) do { \
+                            if (mtx_owned(&(_tcb)->tcb_mtx) == 0) \
+                                panic("Don't own TCB lock"); \
+                            } while (0)
+#else
+#define SCTP_TCB_LOCK_ASSERT(_tcb)
+#endif
+
+#define SCTP_ITERATOR_LOCK_INIT() \
+        mtx_init(&sctppcbinfo.it_mtx, "sctp-it", "iterator", MTX_DEF)
+
+#ifdef INVARIANTS
+#define SCTP_ITERATOR_LOCK() \
+	do {								\
+		if (mtx_owned(&sctppcbinfo.it_mtx))			\
+			panic("Iterator Lock");				\
+		mtx_lock(&sctppcbinfo.it_mtx);				\
+	} while (0)
+#else
+#define SCTP_ITERATOR_LOCK() \
+	do {								\
+		mtx_lock(&sctppcbinfo.it_mtx);				\
+	} while (0)
+
+#endif
+
+#define SCTP_ITERATOR_UNLOCK()	        mtx_unlock(&sctppcbinfo.it_mtx)
+#define SCTP_ITERATOR_LOCK_DESTROY()	mtx_destroy(&sctppcbinfo.it_mtx)
+
+
+#define SCTP_INCR_EP_COUNT() \
+                do { \
+		       atomic_add_int(&sctppcbinfo.ipi_count_ep, 1); \
+	        } while (0)
+
+#define SCTP_DECR_EP_COUNT() \
+                do { \
+		       atomic_subtract_int(&sctppcbinfo.ipi_count_ep, 1); \
+	        } while (0)
+
+#define SCTP_INCR_ASOC_COUNT() \
+                do { \
+	               atomic_add_int(&sctppcbinfo.ipi_count_asoc, 1); \
+	        } while (0)
+
+#define SCTP_DECR_ASOC_COUNT() \
+                do { \
+	               atomic_subtract_int(&sctppcbinfo.ipi_count_asoc, 1); \
+	        } while (0)
+
+#define SCTP_INCR_LADDR_COUNT() \
+                do { \
+	               atomic_add_int(&sctppcbinfo.ipi_count_laddr, 1); \
+	        } while (0)
+
+#define SCTP_DECR_LADDR_COUNT() \
+                do { \
+	               atomic_subtract_int(&sctppcbinfo.ipi_count_laddr, 1); \
+	        } while (0)
+
+#define SCTP_INCR_RADDR_COUNT() \
+                do { \
+ 	               atomic_add_int(&sctppcbinfo.ipi_count_raddr, 1); \
+	        } while (0)
+
+#define SCTP_DECR_RADDR_COUNT() \
+                do { \
+ 	               atomic_subtract_int(&sctppcbinfo.ipi_count_raddr,1); \
+	        } while (0)
+
+#define SCTP_INCR_CHK_COUNT() \
+                do { \
+  	               atomic_add_int(&sctppcbinfo.ipi_count_chunk, 1); \
+	        } while (0)
+
+#define SCTP_DECR_CHK_COUNT() \
+                do { \
+                       if(sctppcbinfo.ipi_count_chunk == 0) \
+                             panic("chunk count to 0?");    \
+  	               atomic_subtract_int(&sctppcbinfo.ipi_count_chunk, 1); \
+	        } while (0)
+
+#define SCTP_INCR_READQ_COUNT() \
+                do { \
+		       atomic_add_int(&sctppcbinfo.ipi_count_readq,1); \
+	        } while (0)
+
+#define SCTP_DECR_READQ_COUNT() \
+                do { \
+		       atomic_subtract_int(&sctppcbinfo.ipi_count_readq, 1); \
+	        } while (0)
+
+#define SCTP_INCR_STRMOQ_COUNT() \
+                do { \
+		       atomic_add_int(&sctppcbinfo.ipi_count_strmoq, 1); \
+	        } while (0)
+
+#define SCTP_DECR_STRMOQ_COUNT() \
+                do { \
+		       atomic_subtract_int(&sctppcbinfo.ipi_count_strmoq, 1); \
+	        } while (0)
+
+
+#if defined(SCTP_SO_LOCK_TESTING)
+#define SCTP_INP_SO(sctpinp)	(sctpinp)->ip_inp.inp.inp_socket
+#define SCTP_SOCKET_LOCK(so, refcnt)
+#define SCTP_SOCKET_UNLOCK(so, refcnt)
+#endif
+
+#endif
--- /dev/null
+++ sys/netinet/sctp_output.c
@@ -0,0 +1,12458 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_output.c,v 1.46 2005/03/06 16:04:17 itojun Exp $	 */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_output.c,v 1.58.2.3 2007/12/09 20:23:47 rrs Exp $");
+
+#include <netinet/sctp_os.h>
+#include <sys/proc.h>
+#include <netinet/sctp_var.h>
+#include <netinet/sctp_sysctl.h>
+#include <netinet/sctp_header.h>
+#include <netinet/sctp_pcb.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_output.h>
+#include <netinet/sctp_uio.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_auth.h>
+#include <netinet/sctp_timer.h>
+#include <netinet/sctp_asconf.h>
+#include <netinet/sctp_indata.h>
+#include <netinet/sctp_bsd_addr.h>
+#include <netinet/sctp_input.h>
+
+
+
+#define SCTP_MAX_GAPS_INARRAY 4
+struct sack_track {
+	uint8_t right_edge;	/* mergable on the right edge */
+	uint8_t left_edge;	/* mergable on the left edge */
+	uint8_t num_entries;
+	uint8_t spare;
+	struct sctp_gap_ack_block gaps[SCTP_MAX_GAPS_INARRAY];
+};
+
+struct sack_track sack_array[256] = {
+	{0, 0, 0, 0,		/* 0x00 */
+		{{0, 0},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 1, 0,		/* 0x01 */
+		{{0, 0},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x02 */
+		{{1, 1},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 1, 0,		/* 0x03 */
+		{{0, 1},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x04 */
+		{{2, 2},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x05 */
+		{{0, 0},
+		{2, 2},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x06 */
+		{{1, 2},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 1, 0,		/* 0x07 */
+		{{0, 2},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x08 */
+		{{3, 3},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x09 */
+		{{0, 0},
+		{3, 3},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x0a */
+		{{1, 1},
+		{3, 3},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x0b */
+		{{0, 1},
+		{3, 3},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x0c */
+		{{2, 3},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x0d */
+		{{0, 0},
+		{2, 3},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x0e */
+		{{1, 3},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 1, 0,		/* 0x0f */
+		{{0, 3},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x10 */
+		{{4, 4},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x11 */
+		{{0, 0},
+		{4, 4},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x12 */
+		{{1, 1},
+		{4, 4},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x13 */
+		{{0, 1},
+		{4, 4},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x14 */
+		{{2, 2},
+		{4, 4},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x15 */
+		{{0, 0},
+		{2, 2},
+		{4, 4},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x16 */
+		{{1, 2},
+		{4, 4},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x17 */
+		{{0, 2},
+		{4, 4},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x18 */
+		{{3, 4},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x19 */
+		{{0, 0},
+		{3, 4},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x1a */
+		{{1, 1},
+		{3, 4},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x1b */
+		{{0, 1},
+		{3, 4},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x1c */
+		{{2, 4},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x1d */
+		{{0, 0},
+		{2, 4},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x1e */
+		{{1, 4},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 1, 0,		/* 0x1f */
+		{{0, 4},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x20 */
+		{{5, 5},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x21 */
+		{{0, 0},
+		{5, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x22 */
+		{{1, 1},
+		{5, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x23 */
+		{{0, 1},
+		{5, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x24 */
+		{{2, 2},
+		{5, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x25 */
+		{{0, 0},
+		{2, 2},
+		{5, 5},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x26 */
+		{{1, 2},
+		{5, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x27 */
+		{{0, 2},
+		{5, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x28 */
+		{{3, 3},
+		{5, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x29 */
+		{{0, 0},
+		{3, 3},
+		{5, 5},
+		{0, 0}
+		}
+	},
+	{0, 0, 3, 0,		/* 0x2a */
+		{{1, 1},
+		{3, 3},
+		{5, 5},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x2b */
+		{{0, 1},
+		{3, 3},
+		{5, 5},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x2c */
+		{{2, 3},
+		{5, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x2d */
+		{{0, 0},
+		{2, 3},
+		{5, 5},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x2e */
+		{{1, 3},
+		{5, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x2f */
+		{{0, 3},
+		{5, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x30 */
+		{{4, 5},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x31 */
+		{{0, 0},
+		{4, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x32 */
+		{{1, 1},
+		{4, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x33 */
+		{{0, 1},
+		{4, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x34 */
+		{{2, 2},
+		{4, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x35 */
+		{{0, 0},
+		{2, 2},
+		{4, 5},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x36 */
+		{{1, 2},
+		{4, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x37 */
+		{{0, 2},
+		{4, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x38 */
+		{{3, 5},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x39 */
+		{{0, 0},
+		{3, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x3a */
+		{{1, 1},
+		{3, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x3b */
+		{{0, 1},
+		{3, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x3c */
+		{{2, 5},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x3d */
+		{{0, 0},
+		{2, 5},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x3e */
+		{{1, 5},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 1, 0,		/* 0x3f */
+		{{0, 5},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x40 */
+		{{6, 6},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x41 */
+		{{0, 0},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x42 */
+		{{1, 1},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x43 */
+		{{0, 1},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x44 */
+		{{2, 2},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x45 */
+		{{0, 0},
+		{2, 2},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x46 */
+		{{1, 2},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x47 */
+		{{0, 2},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x48 */
+		{{3, 3},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x49 */
+		{{0, 0},
+		{3, 3},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 3, 0,		/* 0x4a */
+		{{1, 1},
+		{3, 3},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x4b */
+		{{0, 1},
+		{3, 3},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x4c */
+		{{2, 3},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x4d */
+		{{0, 0},
+		{2, 3},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x4e */
+		{{1, 3},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x4f */
+		{{0, 3},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x50 */
+		{{4, 4},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x51 */
+		{{0, 0},
+		{4, 4},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 3, 0,		/* 0x52 */
+		{{1, 1},
+		{4, 4},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x53 */
+		{{0, 1},
+		{4, 4},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 3, 0,		/* 0x54 */
+		{{2, 2},
+		{4, 4},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{1, 0, 4, 0,		/* 0x55 */
+		{{0, 0},
+		{2, 2},
+		{4, 4},
+		{6, 6}
+		}
+	},
+	{0, 0, 3, 0,		/* 0x56 */
+		{{1, 2},
+		{4, 4},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x57 */
+		{{0, 2},
+		{4, 4},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x58 */
+		{{3, 4},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x59 */
+		{{0, 0},
+		{3, 4},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 3, 0,		/* 0x5a */
+		{{1, 1},
+		{3, 4},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x5b */
+		{{0, 1},
+		{3, 4},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x5c */
+		{{2, 4},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x5d */
+		{{0, 0},
+		{2, 4},
+		{6, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x5e */
+		{{1, 4},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x5f */
+		{{0, 4},
+		{6, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x60 */
+		{{5, 6},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x61 */
+		{{0, 0},
+		{5, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x62 */
+		{{1, 1},
+		{5, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x63 */
+		{{0, 1},
+		{5, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x64 */
+		{{2, 2},
+		{5, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x65 */
+		{{0, 0},
+		{2, 2},
+		{5, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x66 */
+		{{1, 2},
+		{5, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x67 */
+		{{0, 2},
+		{5, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x68 */
+		{{3, 3},
+		{5, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x69 */
+		{{0, 0},
+		{3, 3},
+		{5, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 3, 0,		/* 0x6a */
+		{{1, 1},
+		{3, 3},
+		{5, 6},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x6b */
+		{{0, 1},
+		{3, 3},
+		{5, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x6c */
+		{{2, 3},
+		{5, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x6d */
+		{{0, 0},
+		{2, 3},
+		{5, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x6e */
+		{{1, 3},
+		{5, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x6f */
+		{{0, 3},
+		{5, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x70 */
+		{{4, 6},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x71 */
+		{{0, 0},
+		{4, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x72 */
+		{{1, 1},
+		{4, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x73 */
+		{{0, 1},
+		{4, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x74 */
+		{{2, 2},
+		{4, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 3, 0,		/* 0x75 */
+		{{0, 0},
+		{2, 2},
+		{4, 6},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x76 */
+		{{1, 2},
+		{4, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x77 */
+		{{0, 2},
+		{4, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x78 */
+		{{3, 6},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x79 */
+		{{0, 0},
+		{3, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 2, 0,		/* 0x7a */
+		{{1, 1},
+		{3, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x7b */
+		{{0, 1},
+		{3, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x7c */
+		{{2, 6},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 2, 0,		/* 0x7d */
+		{{0, 0},
+		{2, 6},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 0, 1, 0,		/* 0x7e */
+		{{1, 6},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 0, 1, 0,		/* 0x7f */
+		{{0, 6},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 1, 0,		/* 0x80 */
+		{{7, 7},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0x81 */
+		{{0, 0},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0x82 */
+		{{1, 1},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0x83 */
+		{{0, 1},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0x84 */
+		{{2, 2},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0x85 */
+		{{0, 0},
+		{2, 2},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0x86 */
+		{{1, 2},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0x87 */
+		{{0, 2},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0x88 */
+		{{3, 3},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0x89 */
+		{{0, 0},
+		{3, 3},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0x8a */
+		{{1, 1},
+		{3, 3},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0x8b */
+		{{0, 1},
+		{3, 3},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0x8c */
+		{{2, 3},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0x8d */
+		{{0, 0},
+		{2, 3},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0x8e */
+		{{1, 3},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0x8f */
+		{{0, 3},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0x90 */
+		{{4, 4},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0x91 */
+		{{0, 0},
+		{4, 4},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0x92 */
+		{{1, 1},
+		{4, 4},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0x93 */
+		{{0, 1},
+		{4, 4},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0x94 */
+		{{2, 2},
+		{4, 4},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 4, 0,		/* 0x95 */
+		{{0, 0},
+		{2, 2},
+		{4, 4},
+		{7, 7}
+		}
+	},
+	{0, 1, 3, 0,		/* 0x96 */
+		{{1, 2},
+		{4, 4},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0x97 */
+		{{0, 2},
+		{4, 4},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0x98 */
+		{{3, 4},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0x99 */
+		{{0, 0},
+		{3, 4},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0x9a */
+		{{1, 1},
+		{3, 4},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0x9b */
+		{{0, 1},
+		{3, 4},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0x9c */
+		{{2, 4},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0x9d */
+		{{0, 0},
+		{2, 4},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0x9e */
+		{{1, 4},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0x9f */
+		{{0, 4},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xa0 */
+		{{5, 5},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xa1 */
+		{{0, 0},
+		{5, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xa2 */
+		{{1, 1},
+		{5, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xa3 */
+		{{0, 1},
+		{5, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xa4 */
+		{{2, 2},
+		{5, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 4, 0,		/* 0xa5 */
+		{{0, 0},
+		{2, 2},
+		{5, 5},
+		{7, 7}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xa6 */
+		{{1, 2},
+		{5, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xa7 */
+		{{0, 2},
+		{5, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xa8 */
+		{{3, 3},
+		{5, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 4, 0,		/* 0xa9 */
+		{{0, 0},
+		{3, 3},
+		{5, 5},
+		{7, 7}
+		}
+	},
+	{0, 1, 4, 0,		/* 0xaa */
+		{{1, 1},
+		{3, 3},
+		{5, 5},
+		{7, 7}
+		}
+	},
+	{1, 1, 4, 0,		/* 0xab */
+		{{0, 1},
+		{3, 3},
+		{5, 5},
+		{7, 7}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xac */
+		{{2, 3},
+		{5, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 4, 0,		/* 0xad */
+		{{0, 0},
+		{2, 3},
+		{5, 5},
+		{7, 7}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xae */
+		{{1, 3},
+		{5, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xaf */
+		{{0, 3},
+		{5, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xb0 */
+		{{4, 5},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xb1 */
+		{{0, 0},
+		{4, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xb2 */
+		{{1, 1},
+		{4, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xb3 */
+		{{0, 1},
+		{4, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xb4 */
+		{{2, 2},
+		{4, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 4, 0,		/* 0xb5 */
+		{{0, 0},
+		{2, 2},
+		{4, 5},
+		{7, 7}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xb6 */
+		{{1, 2},
+		{4, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xb7 */
+		{{0, 2},
+		{4, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xb8 */
+		{{3, 5},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xb9 */
+		{{0, 0},
+		{3, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xba */
+		{{1, 1},
+		{3, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xbb */
+		{{0, 1},
+		{3, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xbc */
+		{{2, 5},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xbd */
+		{{0, 0},
+		{2, 5},
+		{7, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xbe */
+		{{1, 5},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xbf */
+		{{0, 5},
+		{7, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 1, 0,		/* 0xc0 */
+		{{6, 7},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xc1 */
+		{{0, 0},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xc2 */
+		{{1, 1},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xc3 */
+		{{0, 1},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xc4 */
+		{{2, 2},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xc5 */
+		{{0, 0},
+		{2, 2},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xc6 */
+		{{1, 2},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xc7 */
+		{{0, 2},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xc8 */
+		{{3, 3},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xc9 */
+		{{0, 0},
+		{3, 3},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xca */
+		{{1, 1},
+		{3, 3},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xcb */
+		{{0, 1},
+		{3, 3},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xcc */
+		{{2, 3},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xcd */
+		{{0, 0},
+		{2, 3},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xce */
+		{{1, 3},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xcf */
+		{{0, 3},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xd0 */
+		{{4, 4},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xd1 */
+		{{0, 0},
+		{4, 4},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xd2 */
+		{{1, 1},
+		{4, 4},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xd3 */
+		{{0, 1},
+		{4, 4},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xd4 */
+		{{2, 2},
+		{4, 4},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 4, 0,		/* 0xd5 */
+		{{0, 0},
+		{2, 2},
+		{4, 4},
+		{6, 7}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xd6 */
+		{{1, 2},
+		{4, 4},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xd7 */
+		{{0, 2},
+		{4, 4},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xd8 */
+		{{3, 4},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xd9 */
+		{{0, 0},
+		{3, 4},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xda */
+		{{1, 1},
+		{3, 4},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xdb */
+		{{0, 1},
+		{3, 4},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xdc */
+		{{2, 4},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xdd */
+		{{0, 0},
+		{2, 4},
+		{6, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xde */
+		{{1, 4},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xdf */
+		{{0, 4},
+		{6, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 1, 0,		/* 0xe0 */
+		{{5, 7},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xe1 */
+		{{0, 0},
+		{5, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xe2 */
+		{{1, 1},
+		{5, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xe3 */
+		{{0, 1},
+		{5, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xe4 */
+		{{2, 2},
+		{5, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xe5 */
+		{{0, 0},
+		{2, 2},
+		{5, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xe6 */
+		{{1, 2},
+		{5, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xe7 */
+		{{0, 2},
+		{5, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xe8 */
+		{{3, 3},
+		{5, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xe9 */
+		{{0, 0},
+		{3, 3},
+		{5, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 3, 0,		/* 0xea */
+		{{1, 1},
+		{3, 3},
+		{5, 7},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xeb */
+		{{0, 1},
+		{3, 3},
+		{5, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xec */
+		{{2, 3},
+		{5, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xed */
+		{{0, 0},
+		{2, 3},
+		{5, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xee */
+		{{1, 3},
+		{5, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xef */
+		{{0, 3},
+		{5, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 1, 0,		/* 0xf0 */
+		{{4, 7},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xf1 */
+		{{0, 0},
+		{4, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xf2 */
+		{{1, 1},
+		{4, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xf3 */
+		{{0, 1},
+		{4, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xf4 */
+		{{2, 2},
+		{4, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 3, 0,		/* 0xf5 */
+		{{0, 0},
+		{2, 2},
+		{4, 7},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xf6 */
+		{{1, 2},
+		{4, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xf7 */
+		{{0, 2},
+		{4, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 1, 0,		/* 0xf8 */
+		{{3, 7},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xf9 */
+		{{0, 0},
+		{3, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 2, 0,		/* 0xfa */
+		{{1, 1},
+		{3, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xfb */
+		{{0, 1},
+		{3, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 1, 0,		/* 0xfc */
+		{{2, 7},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 2, 0,		/* 0xfd */
+		{{0, 0},
+		{2, 7},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{0, 1, 1, 0,		/* 0xfe */
+		{{1, 7},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	},
+	{1, 1, 1, 0,		/* 0xff */
+		{{0, 7},
+		{0, 0},
+		{0, 0},
+		{0, 0}
+		}
+	}
+};
+
+
+int
+sctp_is_address_in_scope(struct sctp_ifa *ifa,
+    int ipv4_addr_legal,
+    int ipv6_addr_legal,
+    int loopback_scope,
+    int ipv4_local_scope,
+    int local_scope,
+    int site_scope,
+    int do_update)
+{
+	if ((loopback_scope == 0) &&
+	    (ifa->ifn_p) && SCTP_IFN_IS_IFT_LOOP(ifa->ifn_p)) {
+		/*
+		 * skip loopback if not in scope *
+		 */
+		return (0);
+	}
+	if ((ifa->address.sa.sa_family == AF_INET) && ipv4_addr_legal) {
+		struct sockaddr_in *sin;
+
+		sin = (struct sockaddr_in *)&ifa->address.sin;
+		if (sin->sin_addr.s_addr == 0) {
+			/* not in scope , unspecified */
+			return (0);
+		}
+		if ((ipv4_local_scope == 0) &&
+		    (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) {
+			/* private address not in scope */
+			return (0);
+		}
+	} else if ((ifa->address.sa.sa_family == AF_INET6) && ipv6_addr_legal) {
+		struct sockaddr_in6 *sin6;
+
+		/*
+		 * Must update the flags,  bummer, which means any IFA locks
+		 * must now be applied HERE <->
+		 */
+		if (do_update) {
+			sctp_gather_internal_ifa_flags(ifa);
+		}
+		if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
+			return (0);
+		}
+		/* ok to use deprecated addresses? */
+		sin6 = (struct sockaddr_in6 *)&ifa->address.sin6;
+		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+			/* skip unspecifed addresses */
+			return (0);
+		}
+		if (		/* (local_scope == 0) && */
+		    (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))) {
+			return (0);
+		}
+		if ((site_scope == 0) &&
+		    (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) {
+			return (0);
+		}
+	} else {
+		return (0);
+	}
+	return (1);
+}
+
+static struct mbuf *
+sctp_add_addr_to_mbuf(struct mbuf *m, struct sctp_ifa *ifa)
+{
+	struct sctp_paramhdr *parmh;
+	struct mbuf *mret;
+	int len;
+
+	if (ifa->address.sa.sa_family == AF_INET) {
+		len = sizeof(struct sctp_ipv4addr_param);
+	} else if (ifa->address.sa.sa_family == AF_INET6) {
+		len = sizeof(struct sctp_ipv6addr_param);
+	} else {
+		/* unknown type */
+		return (m);
+	}
+	if (M_TRAILINGSPACE(m) >= len) {
+		/* easy side we just drop it on the end */
+		parmh = (struct sctp_paramhdr *)(SCTP_BUF_AT(m, SCTP_BUF_LEN(m)));
+		mret = m;
+	} else {
+		/* Need more space */
+		mret = m;
+		while (SCTP_BUF_NEXT(mret) != NULL) {
+			mret = SCTP_BUF_NEXT(mret);
+		}
+		SCTP_BUF_NEXT(mret) = sctp_get_mbuf_for_msg(len, 0, M_DONTWAIT, 1, MT_DATA);
+		if (SCTP_BUF_NEXT(mret) == NULL) {
+			/* We are hosed, can't add more addresses */
+			return (m);
+		}
+		mret = SCTP_BUF_NEXT(mret);
+		parmh = mtod(mret, struct sctp_paramhdr *);
+	}
+	/* now add the parameter */
+	if (ifa->address.sa.sa_family == AF_INET) {
+		struct sctp_ipv4addr_param *ipv4p;
+		struct sockaddr_in *sin;
+
+		sin = (struct sockaddr_in *)&ifa->address.sin;
+		ipv4p = (struct sctp_ipv4addr_param *)parmh;
+		parmh->param_type = htons(SCTP_IPV4_ADDRESS);
+		parmh->param_length = htons(len);
+		ipv4p->addr = sin->sin_addr.s_addr;
+		SCTP_BUF_LEN(mret) += len;
+	} else if (ifa->address.sa.sa_family == AF_INET6) {
+		struct sctp_ipv6addr_param *ipv6p;
+		struct sockaddr_in6 *sin6;
+
+		sin6 = (struct sockaddr_in6 *)&ifa->address.sin6;
+		ipv6p = (struct sctp_ipv6addr_param *)parmh;
+		parmh->param_type = htons(SCTP_IPV6_ADDRESS);
+		parmh->param_length = htons(len);
+		memcpy(ipv6p->addr, &sin6->sin6_addr,
+		    sizeof(ipv6p->addr));
+		/* clear embedded scope in the address */
+		in6_clearscope((struct in6_addr *)ipv6p->addr);
+		SCTP_BUF_LEN(mret) += len;
+	} else {
+		return (m);
+	}
+	return (mret);
+}
+
+
+struct mbuf *
+sctp_add_addresses_to_i_ia(struct sctp_inpcb *inp, struct sctp_scoping *scope,
+    struct mbuf *m_at, int cnt_inits_to)
+{
+	struct sctp_vrf *vrf = NULL;
+	int cnt, limit_out = 0, total_count;
+	uint32_t vrf_id;
+
+	vrf_id = inp->def_vrf_id;
+	SCTP_IPI_ADDR_RLOCK();
+	vrf = sctp_find_vrf(vrf_id);
+	if (vrf == NULL) {
+		SCTP_IPI_ADDR_RUNLOCK();
+		return (m_at);
+	}
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		struct sctp_ifa *sctp_ifap;
+		struct sctp_ifn *sctp_ifnp;
+
+		cnt = cnt_inits_to;
+		if (vrf->total_ifa_count > SCTP_COUNT_LIMIT) {
+			limit_out = 1;
+			cnt = SCTP_ADDRESS_LIMIT;
+			goto skip_count;
+		}
+		LIST_FOREACH(sctp_ifnp, &vrf->ifnlist, next_ifn) {
+			if ((scope->loopback_scope == 0) &&
+			    SCTP_IFN_IS_IFT_LOOP(sctp_ifnp)) {
+				/*
+				 * Skip loopback devices if loopback_scope
+				 * not set
+				 */
+				continue;
+			}
+			LIST_FOREACH(sctp_ifap, &sctp_ifnp->ifalist, next_ifa) {
+				if (sctp_is_address_in_scope(sctp_ifap,
+				    scope->ipv4_addr_legal,
+				    scope->ipv6_addr_legal,
+				    scope->loopback_scope,
+				    scope->ipv4_local_scope,
+				    scope->local_scope,
+				    scope->site_scope, 1) == 0) {
+					continue;
+				}
+				cnt++;
+				if (cnt > SCTP_ADDRESS_LIMIT) {
+					break;
+				}
+			}
+			if (cnt > SCTP_ADDRESS_LIMIT) {
+				break;
+			}
+		}
+skip_count:
+		if (cnt > 1) {
+			total_count = 0;
+			LIST_FOREACH(sctp_ifnp, &vrf->ifnlist, next_ifn) {
+				cnt = 0;
+				if ((scope->loopback_scope == 0) &&
+				    SCTP_IFN_IS_IFT_LOOP(sctp_ifnp)) {
+					/*
+					 * Skip loopback devices if
+					 * loopback_scope not set
+					 */
+					continue;
+				}
+				LIST_FOREACH(sctp_ifap, &sctp_ifnp->ifalist, next_ifa) {
+					if (sctp_is_address_in_scope(sctp_ifap,
+					    scope->ipv4_addr_legal,
+					    scope->ipv6_addr_legal,
+					    scope->loopback_scope,
+					    scope->ipv4_local_scope,
+					    scope->local_scope,
+					    scope->site_scope, 0) == 0) {
+						continue;
+					}
+					m_at = sctp_add_addr_to_mbuf(m_at, sctp_ifap);
+					if (limit_out) {
+						cnt++;
+						total_count++;
+						if (cnt >= 2) {
+							/*
+							 * two from each
+							 * address
+							 */
+							break;
+						}
+						if (total_count > SCTP_ADDRESS_LIMIT) {
+							/* No more addresses */
+							break;
+						}
+					}
+				}
+			}
+		}
+	} else {
+		struct sctp_laddr *laddr;
+
+		cnt = cnt_inits_to;
+		/* First, how many ? */
+		LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+			if (laddr->ifa == NULL) {
+				continue;
+			}
+			if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED)
+				/*
+				 * Address being deleted by the system, dont
+				 * list.
+				 */
+				continue;
+			if (laddr->action == SCTP_DEL_IP_ADDRESS) {
+				/*
+				 * Address being deleted on this ep don't
+				 * list.
+				 */
+				continue;
+			}
+			if (sctp_is_address_in_scope(laddr->ifa,
+			    scope->ipv4_addr_legal,
+			    scope->ipv6_addr_legal,
+			    scope->loopback_scope,
+			    scope->ipv4_local_scope,
+			    scope->local_scope,
+			    scope->site_scope, 1) == 0) {
+				continue;
+			}
+			cnt++;
+		}
+		if (cnt > SCTP_ADDRESS_LIMIT) {
+			limit_out = 1;
+		}
+		/*
+		 * To get through a NAT we only list addresses if we have
+		 * more than one. That way if you just bind a single address
+		 * we let the source of the init dictate our address.
+		 */
+		if (cnt > 1) {
+			LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+				cnt = 0;
+				if (laddr->ifa == NULL) {
+					continue;
+				}
+				if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED)
+					continue;
+
+				if (sctp_is_address_in_scope(laddr->ifa,
+				    scope->ipv4_addr_legal,
+				    scope->ipv6_addr_legal,
+				    scope->loopback_scope,
+				    scope->ipv4_local_scope,
+				    scope->local_scope,
+				    scope->site_scope, 0) == 0) {
+					continue;
+				}
+				m_at = sctp_add_addr_to_mbuf(m_at, laddr->ifa);
+				cnt++;
+				if (cnt >= SCTP_ADDRESS_LIMIT) {
+					break;
+				}
+			}
+		}
+	}
+	SCTP_IPI_ADDR_RUNLOCK();
+	return (m_at);
+}
+
+static struct sctp_ifa *
+sctp_is_ifa_addr_preferred(struct sctp_ifa *ifa,
+    uint8_t dest_is_loop,
+    uint8_t dest_is_priv,
+    sa_family_t fam)
+{
+	uint8_t dest_is_global = 0;
+
+	/* dest_is_priv is true if destination is a private address */
+	/* dest_is_loop is true if destination is a loopback addresses */
+
+	/*
+	 * Here we determine if its a preferred address. A preferred address
+	 * means it is the same scope or higher scope then the destination.
+	 * L = loopback, P = private, G = global
+	 * ----------------------------------------- src    |  dest | result
+	 * ---------------------------------------- L     |    L  |    yes
+	 * ----------------------------------------- P     |    L  |
+	 * yes-v4 no-v6 ----------------------------------------- G     |
+	 * L  |    yes-v4 no-v6 ----------------------------------------- L
+	 * |    P  |    no ----------------------------------------- P     |
+	 * P  |    yes ----------------------------------------- G     |
+	 * P  |    no ----------------------------------------- L     |    G
+	 * |    no ----------------------------------------- P     |    G  |
+	 * no ----------------------------------------- G     |    G  |
+	 * yes -----------------------------------------
+	 */
+
+	if (ifa->address.sa.sa_family != fam) {
+		/* forget mis-matched family */
+		return (NULL);
+	}
+	if ((dest_is_priv == 0) && (dest_is_loop == 0)) {
+		dest_is_global = 1;
+	}
+	SCTPDBG(SCTP_DEBUG_OUTPUT2, "Is destination preferred:");
+	SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &ifa->address.sa);
+	/* Ok the address may be ok */
+	if (fam == AF_INET6) {
+		/* ok to use deprecated addresses? */
+		if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
+			SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:1\n");
+			return (NULL);
+		}
+		if (ifa->src_is_priv) {
+			if (dest_is_loop) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:2\n");
+				return (NULL);
+			}
+		}
+		if (ifa->src_is_glob) {
+			if (dest_is_loop) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:3\n");
+				return (NULL);
+			}
+		}
+	}
+	/*
+	 * Now that we know what is what, implement or table this could in
+	 * theory be done slicker (it used to be), but this is
+	 * straightforward and easier to validate :-)
+	 */
+	SCTPDBG(SCTP_DEBUG_OUTPUT3, "src_loop:%d src_priv:%d src_glob:%d\n",
+	    ifa->src_is_loop, ifa->src_is_priv, ifa->src_is_glob);
+	SCTPDBG(SCTP_DEBUG_OUTPUT3, "dest_loop:%d dest_priv:%d dest_glob:%d\n",
+	    dest_is_loop, dest_is_priv, dest_is_global);
+
+	if ((ifa->src_is_loop) && (dest_is_priv)) {
+		SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:4\n");
+		return (NULL);
+	}
+	if ((ifa->src_is_glob) && (dest_is_priv)) {
+		SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:5\n");
+		return (NULL);
+	}
+	if ((ifa->src_is_loop) && (dest_is_global)) {
+		SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:6\n");
+		return (NULL);
+	}
+	if ((ifa->src_is_priv) && (dest_is_global)) {
+		SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:7\n");
+		return (NULL);
+	}
+	SCTPDBG(SCTP_DEBUG_OUTPUT3, "YES\n");
+	/* its a preferred address */
+	return (ifa);
+}
+
+static struct sctp_ifa *
+sctp_is_ifa_addr_acceptable(struct sctp_ifa *ifa,
+    uint8_t dest_is_loop,
+    uint8_t dest_is_priv,
+    sa_family_t fam)
+{
+	uint8_t dest_is_global = 0;
+
+
+	/*
+	 * Here we determine if its a acceptable address. A acceptable
+	 * address means it is the same scope or higher scope but we can
+	 * allow for NAT which means its ok to have a global dest and a
+	 * private src.
+	 * 
+	 * L = loopback, P = private, G = global
+	 * ----------------------------------------- src    |  dest | result
+	 * ----------------------------------------- L     |   L   |    yes
+	 * ----------------------------------------- P     |   L   |
+	 * yes-v4 no-v6 ----------------------------------------- G     |
+	 * L   |    yes ----------------------------------------- L     |
+	 * P   |    no ----------------------------------------- P     |   P
+	 * |    yes ----------------------------------------- G     |   P
+	 * |    yes - May not work -----------------------------------------
+	 * L     |   G   |    no ----------------------------------------- P
+	 * |   G   |    yes - May not work
+	 * ----------------------------------------- G     |   G   |    yes
+	 * -----------------------------------------
+	 */
+
+	if (ifa->address.sa.sa_family != fam) {
+		/* forget non matching family */
+		return (NULL);
+	}
+	/* Ok the address may be ok */
+	if ((dest_is_loop == 0) && (dest_is_priv == 0)) {
+		dest_is_global = 1;
+	}
+	if (fam == AF_INET6) {
+		/* ok to use deprecated addresses? */
+		if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
+			return (NULL);
+		}
+		if (ifa->src_is_priv) {
+			/* Special case, linklocal to loop */
+			if (dest_is_loop)
+				return (NULL);
+		}
+	}
+	/*
+	 * Now that we know what is what, implement our table. This could in
+	 * theory be done slicker (it used to be), but this is
+	 * straightforward and easier to validate :-)
+	 */
+	if ((ifa->src_is_loop == 1) && (dest_is_priv)) {
+		return (NULL);
+	}
+	if ((ifa->src_is_loop == 1) && (dest_is_global)) {
+		return (NULL);
+	}
+	/* its an acceptable address */
+	return (ifa);
+}
+
+int
+sctp_is_addr_restricted(struct sctp_tcb *stcb, struct sctp_ifa *ifa)
+{
+	struct sctp_laddr *laddr;
+
+	if (stcb == NULL) {
+		/* There are no restrictions, no TCB :-) */
+		return (0);
+	}
+	LIST_FOREACH(laddr, &stcb->asoc.sctp_restricted_addrs, sctp_nxt_addr) {
+		if (laddr->ifa == NULL) {
+			SCTPDBG(SCTP_DEBUG_OUTPUT1, "%s: NULL ifa\n",
+			    __FUNCTION__);
+			continue;
+		}
+		if (laddr->ifa == ifa) {
+			/* Yes it is on the list */
+			return (1);
+		}
+	}
+	return (0);
+}
+
+
+int
+sctp_is_addr_in_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa)
+{
+	struct sctp_laddr *laddr;
+
+	if (ifa == NULL)
+		return (0);
+	LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+		if (laddr->ifa == NULL) {
+			SCTPDBG(SCTP_DEBUG_OUTPUT1, "%s: NULL ifa\n",
+			    __FUNCTION__);
+			continue;
+		}
+		if ((laddr->ifa == ifa) && laddr->action == 0)
+			/* same pointer */
+			return (1);
+	}
+	return (0);
+}
+
+
+
+static struct sctp_ifa *
+sctp_choose_boundspecific_inp(struct sctp_inpcb *inp,
+    sctp_route_t * ro,
+    uint32_t vrf_id,
+    int non_asoc_addr_ok,
+    uint8_t dest_is_priv,
+    uint8_t dest_is_loop,
+    sa_family_t fam)
+{
+	struct sctp_laddr *laddr, *starting_point;
+	void *ifn;
+	int resettotop = 0;
+	struct sctp_ifn *sctp_ifn;
+	struct sctp_ifa *sctp_ifa, *sifa;
+	struct sctp_vrf *vrf;
+	uint32_t ifn_index;
+
+	vrf = sctp_find_vrf(vrf_id);
+	if (vrf == NULL)
+		return (NULL);
+
+	ifn = SCTP_GET_IFN_VOID_FROM_ROUTE(ro);
+	ifn_index = SCTP_GET_IF_INDEX_FROM_ROUTE(ro);
+	sctp_ifn = sctp_find_ifn(ifn, ifn_index);
+	/*
+	 * first question, is the ifn we will emit on in our list, if so, we
+	 * want such an address. Note that we first looked for a preferred
+	 * address.
+	 */
+	if (sctp_ifn) {
+		/* is a preferred one on the interface we route out? */
+		LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+			if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
+			    (non_asoc_addr_ok == 0))
+				continue;
+			sifa = sctp_is_ifa_addr_preferred(sctp_ifa,
+			    dest_is_loop,
+			    dest_is_priv, fam);
+			if (sifa == NULL)
+				continue;
+			if (sctp_is_addr_in_ep(inp, sifa)) {
+				atomic_add_int(&sifa->refcount, 1);
+				return (sifa);
+			}
+		}
+	}
+	/*
+	 * ok, now we now need to find one on the list of the addresses. We
+	 * can't get one on the emitting interface so let's find first a
+	 * preferred one. If not that an acceptable one otherwise... we
+	 * return NULL.
+	 */
+	starting_point = inp->next_addr_touse;
+once_again:
+	if (inp->next_addr_touse == NULL) {
+		inp->next_addr_touse = LIST_FIRST(&inp->sctp_addr_list);
+		resettotop = 1;
+	}
+	for (laddr = inp->next_addr_touse; laddr;
+	    laddr = LIST_NEXT(laddr, sctp_nxt_addr)) {
+		if (laddr->ifa == NULL) {
+			/* address has been removed */
+			continue;
+		}
+		if (laddr->action == SCTP_DEL_IP_ADDRESS) {
+			/* address is being deleted */
+			continue;
+		}
+		sifa = sctp_is_ifa_addr_preferred(laddr->ifa, dest_is_loop,
+		    dest_is_priv, fam);
+		if (sifa == NULL)
+			continue;
+		atomic_add_int(&sifa->refcount, 1);
+		return (sifa);
+	}
+	if (resettotop == 0) {
+		inp->next_addr_touse = NULL;
+		goto once_again;
+	}
+	inp->next_addr_touse = starting_point;
+	resettotop = 0;
+once_again_too:
+	if (inp->next_addr_touse == NULL) {
+		inp->next_addr_touse = LIST_FIRST(&inp->sctp_addr_list);
+		resettotop = 1;
+	}
+	/* ok, what about an acceptable address in the inp */
+	for (laddr = inp->next_addr_touse; laddr;
+	    laddr = LIST_NEXT(laddr, sctp_nxt_addr)) {
+		if (laddr->ifa == NULL) {
+			/* address has been removed */
+			continue;
+		}
+		if (laddr->action == SCTP_DEL_IP_ADDRESS) {
+			/* address is being deleted */
+			continue;
+		}
+		sifa = sctp_is_ifa_addr_acceptable(laddr->ifa, dest_is_loop,
+		    dest_is_priv, fam);
+		if (sifa == NULL)
+			continue;
+		atomic_add_int(&sifa->refcount, 1);
+		return (sifa);
+	}
+	if (resettotop == 0) {
+		inp->next_addr_touse = NULL;
+		goto once_again_too;
+	}
+	/*
+	 * no address bound can be a source for the destination we are in
+	 * trouble
+	 */
+	return (NULL);
+}
+
+
+
+static struct sctp_ifa *
+sctp_choose_boundspecific_stcb(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_nets *net,
+    sctp_route_t * ro,
+    uint32_t vrf_id,
+    uint8_t dest_is_priv,
+    uint8_t dest_is_loop,
+    int non_asoc_addr_ok,
+    sa_family_t fam)
+{
+	struct sctp_laddr *laddr, *starting_point;
+	void *ifn;
+	struct sctp_ifn *sctp_ifn;
+	struct sctp_ifa *sctp_ifa, *sifa;
+	uint8_t start_at_beginning = 0;
+	struct sctp_vrf *vrf;
+	uint32_t ifn_index;
+
+	/*
+	 * first question, is the ifn we will emit on in our list, if so, we
+	 * want that one.
+	 */
+	vrf = sctp_find_vrf(vrf_id);
+	if (vrf == NULL)
+		return (NULL);
+
+	ifn = SCTP_GET_IFN_VOID_FROM_ROUTE(ro);
+	ifn_index = SCTP_GET_IF_INDEX_FROM_ROUTE(ro);
+	sctp_ifn = sctp_find_ifn(ifn, ifn_index);
+
+	/*
+	 * first question, is the ifn we will emit on in our list?  If so,
+	 * we want that one. First we look for a preferred. Second, we go
+	 * for an acceptable.
+	 */
+	if (sctp_ifn) {
+		/* first try for a preferred address on the ep */
+		LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+			if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0))
+				continue;
+			if (sctp_is_addr_in_ep(inp, sctp_ifa)) {
+				sifa = sctp_is_ifa_addr_preferred(sctp_ifa, dest_is_loop, dest_is_priv, fam);
+				if (sifa == NULL)
+					continue;
+				if ((non_asoc_addr_ok == 0) &&
+				    (sctp_is_addr_restricted(stcb, sifa))) {
+					/* on the no-no list */
+					continue;
+				}
+				atomic_add_int(&sifa->refcount, 1);
+				return (sifa);
+			}
+		}
+		/* next try for an acceptable address on the ep */
+		LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+			if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0))
+				continue;
+			if (sctp_is_addr_in_ep(inp, sctp_ifa)) {
+				sifa = sctp_is_ifa_addr_acceptable(sctp_ifa, dest_is_loop, dest_is_priv, fam);
+				if (sifa == NULL)
+					continue;
+				if ((non_asoc_addr_ok == 0) &&
+				    (sctp_is_addr_restricted(stcb, sifa))) {
+					/* on the no-no list */
+					continue;
+				}
+				atomic_add_int(&sifa->refcount, 1);
+				return (sifa);
+			}
+		}
+
+	}
+	/*
+	 * if we can't find one like that then we must look at all addresses
+	 * bound to pick one at first preferable then secondly acceptable.
+	 */
+	starting_point = stcb->asoc.last_used_address;
+sctp_from_the_top:
+	if (stcb->asoc.last_used_address == NULL) {
+		start_at_beginning = 1;
+		stcb->asoc.last_used_address = LIST_FIRST(&inp->sctp_addr_list);
+	}
+	/* search beginning with the last used address */
+	for (laddr = stcb->asoc.last_used_address; laddr;
+	    laddr = LIST_NEXT(laddr, sctp_nxt_addr)) {
+		if (laddr->ifa == NULL) {
+			/* address has been removed */
+			continue;
+		}
+		if (laddr->action == SCTP_DEL_IP_ADDRESS) {
+			/* address is being deleted */
+			continue;
+		}
+		sifa = sctp_is_ifa_addr_preferred(laddr->ifa, dest_is_loop, dest_is_priv, fam);
+		if (sifa == NULL)
+			continue;
+		if ((non_asoc_addr_ok == 0) &&
+		    (sctp_is_addr_restricted(stcb, sifa))) {
+			/* on the no-no list */
+			continue;
+		}
+		stcb->asoc.last_used_address = laddr;
+		atomic_add_int(&sifa->refcount, 1);
+		return (sifa);
+	}
+	if (start_at_beginning == 0) {
+		stcb->asoc.last_used_address = NULL;
+		goto sctp_from_the_top;
+	}
+	/* now try for any higher scope than the destination */
+	stcb->asoc.last_used_address = starting_point;
+	start_at_beginning = 0;
+sctp_from_the_top2:
+	if (stcb->asoc.last_used_address == NULL) {
+		start_at_beginning = 1;
+		stcb->asoc.last_used_address = LIST_FIRST(&inp->sctp_addr_list);
+	}
+	/* search beginning with the last used address */
+	for (laddr = stcb->asoc.last_used_address; laddr;
+	    laddr = LIST_NEXT(laddr, sctp_nxt_addr)) {
+		if (laddr->ifa == NULL) {
+			/* address has been removed */
+			continue;
+		}
+		if (laddr->action == SCTP_DEL_IP_ADDRESS) {
+			/* address is being deleted */
+			continue;
+		}
+		sifa = sctp_is_ifa_addr_acceptable(laddr->ifa, dest_is_loop,
+		    dest_is_priv, fam);
+		if (sifa == NULL)
+			continue;
+		if ((non_asoc_addr_ok == 0) &&
+		    (sctp_is_addr_restricted(stcb, sifa))) {
+			/* on the no-no list */
+			continue;
+		}
+		stcb->asoc.last_used_address = laddr;
+		atomic_add_int(&sifa->refcount, 1);
+		return (sifa);
+	}
+	if (start_at_beginning == 0) {
+		stcb->asoc.last_used_address = NULL;
+		goto sctp_from_the_top2;
+	}
+	return (NULL);
+}
+
+static struct sctp_ifa *
+sctp_select_nth_preferred_addr_from_ifn_boundall(struct sctp_ifn *ifn,
+    struct sctp_tcb *stcb,
+    int non_asoc_addr_ok,
+    uint8_t dest_is_loop,
+    uint8_t dest_is_priv,
+    int addr_wanted,
+    sa_family_t fam,
+    sctp_route_t * ro
+)
+{
+	struct sctp_ifa *ifa, *sifa;
+	int num_eligible_addr = 0;
+
+	LIST_FOREACH(ifa, &ifn->ifalist, next_ifa) {
+		if ((ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
+		    (non_asoc_addr_ok == 0))
+			continue;
+		sifa = sctp_is_ifa_addr_preferred(ifa, dest_is_loop,
+		    dest_is_priv, fam);
+		if (sifa == NULL)
+			continue;
+		/*
+		 * Check if the IPv6 address matches to next-hop. In the
+		 * mobile case, old IPv6 address may be not deleted from the
+		 * interface. Then, the interface has previous and new
+		 * addresses.  We should use one corresponding to the
+		 * next-hop.  (by micchie)
+		 */
+		if (stcb && fam == AF_INET6 &&
+		    sctp_is_mobility_feature_on(stcb->sctp_ep, SCTP_MOBILITY_BASE)) {
+			if (sctp_v6src_match_nexthop(&sifa->address.sin6, ro)
+			    == 0) {
+				continue;
+			}
+		}
+		/* Avoid topologically incorrect IPv4 address */
+		if (stcb && fam == AF_INET &&
+		    sctp_is_mobility_feature_on(stcb->sctp_ep, SCTP_MOBILITY_BASE)) {
+			if (sctp_v4src_match_nexthop(sifa, ro) == 0) {
+				continue;
+			}
+		}
+		if (stcb) {
+			if ((non_asoc_addr_ok == 0) &&
+			    sctp_is_addr_restricted(stcb, sifa)) {
+				/*
+				 * It is restricted for some reason..
+				 * probably not yet added.
+				 */
+				continue;
+			}
+		}
+		if (num_eligible_addr >= addr_wanted) {
+			return (sifa);
+		}
+		num_eligible_addr++;
+	}
+	return (NULL);
+}
+
+
+static int
+sctp_count_num_preferred_boundall(struct sctp_ifn *ifn,
+    struct sctp_tcb *stcb,
+    int non_asoc_addr_ok,
+    uint8_t dest_is_loop,
+    uint8_t dest_is_priv,
+    sa_family_t fam)
+{
+	struct sctp_ifa *ifa, *sifa;
+	int num_eligible_addr = 0;
+
+	LIST_FOREACH(ifa, &ifn->ifalist, next_ifa) {
+		if ((ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
+		    (non_asoc_addr_ok == 0)) {
+			continue;
+		}
+		sifa = sctp_is_ifa_addr_preferred(ifa, dest_is_loop,
+		    dest_is_priv, fam);
+		if (sifa == NULL) {
+			continue;
+		}
+		if (stcb) {
+			if ((non_asoc_addr_ok == 0) &&
+			    sctp_is_addr_restricted(stcb, sifa)) {
+				/*
+				 * It is restricted for some reason..
+				 * probably not yet added.
+				 */
+				continue;
+			}
+		}
+		num_eligible_addr++;
+	}
+	return (num_eligible_addr);
+}
+
+static struct sctp_ifa *
+sctp_choose_boundall(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_nets *net,
+    sctp_route_t * ro,
+    uint32_t vrf_id,
+    uint8_t dest_is_priv,
+    uint8_t dest_is_loop,
+    int non_asoc_addr_ok,
+    sa_family_t fam)
+{
+	int cur_addr_num = 0, num_preferred = 0;
+	void *ifn;
+	struct sctp_ifn *sctp_ifn, *looked_at = NULL, *emit_ifn;
+	struct sctp_ifa *sctp_ifa, *sifa;
+	uint32_t ifn_index;
+	struct sctp_vrf *vrf;
+
+	/*-
+	 * For boundall we can use any address in the association.
+	 * If non_asoc_addr_ok is set we can use any address (at least in
+	 * theory). So we look for preferred addresses first. If we find one,
+	 * we use it. Otherwise we next try to get an address on the
+	 * interface, which we should be able to do (unless non_asoc_addr_ok
+	 * is false and we are routed out that way). In these cases where we
+	 * can't use the address of the interface we go through all the
+	 * ifn's looking for an address we can use and fill that in. Punting
+	 * means we send back address 0, which will probably cause problems
+	 * actually since then IP will fill in the address of the route ifn,
+	 * which means we probably already rejected it.. i.e. here comes an
+	 * abort :-<.
+	 */
+	vrf = sctp_find_vrf(vrf_id);
+	if (vrf == NULL)
+		return (NULL);
+
+	ifn = SCTP_GET_IFN_VOID_FROM_ROUTE(ro);
+	ifn_index = SCTP_GET_IF_INDEX_FROM_ROUTE(ro);
+	emit_ifn = looked_at = sctp_ifn = sctp_find_ifn(ifn, ifn_index);
+	if (sctp_ifn == NULL) {
+		/* ?? We don't have this guy ?? */
+		SCTPDBG(SCTP_DEBUG_OUTPUT2, "No ifn emit interface?\n");
+		goto bound_all_plan_b;
+	}
+	SCTPDBG(SCTP_DEBUG_OUTPUT2, "ifn_index:%d name:%s is emit interface\n",
+	    ifn_index, sctp_ifn->ifn_name);
+
+	if (net) {
+		cur_addr_num = net->indx_of_eligible_next_to_use;
+	}
+	num_preferred = sctp_count_num_preferred_boundall(sctp_ifn,
+	    stcb,
+	    non_asoc_addr_ok,
+	    dest_is_loop,
+	    dest_is_priv, fam);
+	SCTPDBG(SCTP_DEBUG_OUTPUT2, "Found %d preferred source addresses for intf:%s\n",
+	    num_preferred, sctp_ifn->ifn_name);
+	if (num_preferred == 0) {
+		/*
+		 * no eligible addresses, we must use some other interface
+		 * address if we can find one.
+		 */
+		goto bound_all_plan_b;
+	}
+	/*
+	 * Ok we have num_eligible_addr set with how many we can use, this
+	 * may vary from call to call due to addresses being deprecated
+	 * etc..
+	 */
+	if (cur_addr_num >= num_preferred) {
+		cur_addr_num = 0;
+	}
+	/*
+	 * select the nth address from the list (where cur_addr_num is the
+	 * nth) and 0 is the first one, 1 is the second one etc...
+	 */
+	SCTPDBG(SCTP_DEBUG_OUTPUT2, "cur_addr_num:%d\n", cur_addr_num);
+
+	sctp_ifa = sctp_select_nth_preferred_addr_from_ifn_boundall(sctp_ifn, stcb, non_asoc_addr_ok, dest_is_loop,
+	    dest_is_priv, cur_addr_num, fam, ro);
+
+	/* if sctp_ifa is NULL something changed??, fall to plan b. */
+	if (sctp_ifa) {
+		atomic_add_int(&sctp_ifa->refcount, 1);
+		if (net) {
+			/* save off where the next one we will want */
+			net->indx_of_eligible_next_to_use = cur_addr_num + 1;
+		}
+		return (sctp_ifa);
+	}
+	/*
+	 * plan_b: Look at all interfaces and find a preferred address. If
+	 * no preferred fall through to plan_c.
+	 */
+bound_all_plan_b:
+	SCTPDBG(SCTP_DEBUG_OUTPUT2, "Trying Plan B\n");
+	LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+		SCTPDBG(SCTP_DEBUG_OUTPUT2, "Examine interface %s\n",
+		    sctp_ifn->ifn_name);
+		if (dest_is_loop == 0 && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
+			/* wrong base scope */
+			SCTPDBG(SCTP_DEBUG_OUTPUT2, "skip\n");
+			continue;
+		}
+		if ((sctp_ifn == looked_at) && looked_at) {
+			/* already looked at this guy */
+			SCTPDBG(SCTP_DEBUG_OUTPUT2, "already seen\n");
+			continue;
+		}
+		num_preferred = sctp_count_num_preferred_boundall(sctp_ifn, stcb, non_asoc_addr_ok,
+		    dest_is_loop, dest_is_priv, fam);
+		SCTPDBG(SCTP_DEBUG_OUTPUT2,
+		    "Found ifn:%p %d preferred source addresses\n",
+		    ifn, num_preferred);
+		if (num_preferred == 0) {
+			/* None on this interface. */
+			SCTPDBG(SCTP_DEBUG_OUTPUT2, "No prefered -- skipping to next\n");
+			continue;
+		}
+		SCTPDBG(SCTP_DEBUG_OUTPUT2,
+		    "num preferred:%d on interface:%p cur_addr_num:%d\n",
+		    num_preferred, sctp_ifn, cur_addr_num);
+
+		/*
+		 * Ok we have num_eligible_addr set with how many we can
+		 * use, this may vary from call to call due to addresses
+		 * being deprecated etc..
+		 */
+		if (cur_addr_num >= num_preferred) {
+			cur_addr_num = 0;
+		}
+		sifa = sctp_select_nth_preferred_addr_from_ifn_boundall(sctp_ifn, stcb, non_asoc_addr_ok, dest_is_loop,
+		    dest_is_priv, cur_addr_num, fam, ro);
+		if (sifa == NULL)
+			continue;
+		if (net) {
+			net->indx_of_eligible_next_to_use = cur_addr_num + 1;
+			SCTPDBG(SCTP_DEBUG_OUTPUT2, "we selected %d\n",
+			    cur_addr_num);
+			SCTPDBG(SCTP_DEBUG_OUTPUT2, "Source:");
+			SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &sifa->address.sa);
+			SCTPDBG(SCTP_DEBUG_OUTPUT2, "Dest:");
+			SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &net->ro._l_addr.sa);
+		}
+		atomic_add_int(&sifa->refcount, 1);
+		return (sifa);
+
+	}
+
+	/* plan_c: do we have an acceptable address on the emit interface */
+	SCTPDBG(SCTP_DEBUG_OUTPUT2, "Trying Plan C: find acceptable on interface\n");
+	if (emit_ifn == NULL) {
+		goto plan_d;
+	}
+	LIST_FOREACH(sctp_ifa, &emit_ifn->ifalist, next_ifa) {
+		if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
+		    (non_asoc_addr_ok == 0))
+			continue;
+		sifa = sctp_is_ifa_addr_acceptable(sctp_ifa, dest_is_loop,
+		    dest_is_priv, fam);
+		if (sifa == NULL)
+			continue;
+		if (stcb) {
+			if ((non_asoc_addr_ok == 0) &&
+			    sctp_is_addr_restricted(stcb, sifa)) {
+				/*
+				 * It is restricted for some reason..
+				 * probably not yet added.
+				 */
+				continue;
+			}
+		}
+		atomic_add_int(&sifa->refcount, 1);
+		return (sifa);
+	}
+plan_d:
+	/*
+	 * plan_d: We are in trouble. No preferred address on the emit
+	 * interface. And not even a preferred address on all interfaces. Go
+	 * out and see if we can find an acceptable address somewhere
+	 * amongst all interfaces.
+	 */
+	SCTPDBG(SCTP_DEBUG_OUTPUT2, "Trying Plan D\n");
+	LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+		if (dest_is_loop == 0 && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
+			/* wrong base scope */
+			continue;
+		}
+		if ((sctp_ifn == looked_at) && looked_at)
+			/* already looked at this guy */
+			continue;
+
+		LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+			if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
+			    (non_asoc_addr_ok == 0))
+				continue;
+			sifa = sctp_is_ifa_addr_acceptable(sctp_ifa,
+			    dest_is_loop,
+			    dest_is_priv, fam);
+			if (sifa == NULL)
+				continue;
+			if (stcb) {
+				if ((non_asoc_addr_ok == 0) &&
+				    sctp_is_addr_restricted(stcb, sifa)) {
+					/*
+					 * It is restricted for some
+					 * reason.. probably not yet added.
+					 */
+					continue;
+				}
+			}
+			atomic_add_int(&sifa->refcount, 1);
+			return (sifa);
+		}
+	}
+	/*
+	 * Ok we can find NO address to source from that is not on our
+	 * restricted list and non_asoc_address is NOT ok, or it is on our
+	 * restricted list. We can't source to it :-(
+	 */
+	return (NULL);
+}
+
+
+
+/* tcb may be NULL */
+struct sctp_ifa *
+sctp_source_address_selection(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    sctp_route_t * ro,
+    struct sctp_nets *net,
+    int non_asoc_addr_ok, uint32_t vrf_id)
+{
+	struct sockaddr_in *to = (struct sockaddr_in *)&ro->ro_dst;
+	struct sockaddr_in6 *to6 = (struct sockaddr_in6 *)&ro->ro_dst;
+	struct sctp_ifa *answer;
+	uint8_t dest_is_priv, dest_is_loop;
+	sa_family_t fam;
+
+	/*-
+	 * Rules: - Find the route if needed, cache if I can. - Look at
+	 * interface address in route, Is it in the bound list. If so we
+	 * have the best source. - If not we must rotate amongst the
+	 * addresses.
+	 *
+	 * Cavets and issues
+	 *
+	 * Do we need to pay attention to scope. We can have a private address
+	 * or a global address we are sourcing or sending to. So if we draw
+	 * it out
+	 * zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+	 * For V4
+         *------------------------------------------
+	 *      source     *      dest  *  result
+	 * -----------------------------------------
+	 * <a>  Private    *    Global  *  NAT
+	 * -----------------------------------------
+	 * <b>  Private    *    Private *  No problem
+	 * -----------------------------------------
+         * <c>  Global     *    Private *  Huh, How will this work?
+	 * -----------------------------------------
+         * <d>  Global     *    Global  *  No Problem
+         *------------------------------------------
+	 * zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+	 * For V6
+         *------------------------------------------
+	 *      source     *      dest  *  result
+	 * -----------------------------------------
+	 * <a>  Linklocal  *    Global  *
+	 * -----------------------------------------
+	 * <b>  Linklocal  * Linklocal  *  No problem
+	 * -----------------------------------------
+         * <c>  Global     * Linklocal  *  Huh, How will this work?
+	 * -----------------------------------------
+         * <d>  Global     *    Global  *  No Problem
+         *------------------------------------------
+	 * zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+         *
+	 * And then we add to that what happens if there are multiple addresses
+	 * assigned to an interface. Remember the ifa on a ifn is a linked
+	 * list of addresses. So one interface can have more than one IP
+	 * address. What happens if we have both a private and a global
+	 * address? Do we then use context of destination to sort out which
+	 * one is best? And what about NAT's sending P->G may get you a NAT
+	 * translation, or should you select the G thats on the interface in
+	 * preference.
+	 *
+	 * Decisions:
+	 *
+	 * - count the number of addresses on the interface.
+         * - if it is one, no problem except case <c>.
+         *   For <a> we will assume a NAT out there.
+	 * - if there are more than one, then we need to worry about scope P
+	 *   or G. We should prefer G -> G and P -> P if possible.
+	 *   Then as a secondary fall back to mixed types G->P being a last
+	 *   ditch one.
+         * - The above all works for bound all, but bound specific we need to
+	 *   use the same concept but instead only consider the bound
+	 *   addresses. If the bound set is NOT assigned to the interface then
+	 *   we must use rotation amongst the bound addresses..
+	 */
+	if (ro->ro_rt == NULL) {
+		/*
+		 * Need a route to cache.
+		 */
+		SCTP_RTALLOC(ro, vrf_id);
+	}
+	if (ro->ro_rt == NULL) {
+		return (NULL);
+	}
+	fam = to->sin_family;
+	dest_is_priv = dest_is_loop = 0;
+	/* Setup our scopes for the destination */
+	if (fam == AF_INET) {
+		/* Scope based on outbound address */
+		if ((IN4_ISPRIVATE_ADDRESS(&to->sin_addr))) {
+			dest_is_priv = 1;
+		} else if (IN4_ISLOOPBACK_ADDRESS(&to->sin_addr)) {
+			dest_is_loop = 1;
+			if (net != NULL) {
+				/* mark it as local */
+				net->addr_is_local = 1;
+			}
+		}
+	} else if (fam == AF_INET6) {
+		/* Scope based on outbound address */
+		if (IN6_IS_ADDR_LOOPBACK(&to6->sin6_addr)) {
+			/*
+			 * If the route goes to the loopback address OR the
+			 * address is a loopback address, we are loopback
+			 * scope. But we don't use dest_is_priv (link local
+			 * addresses).
+			 */
+			dest_is_loop = 1;
+			if (net != NULL) {
+				/* mark it as local */
+				net->addr_is_local = 1;
+			}
+		} else if (IN6_IS_ADDR_LINKLOCAL(&to6->sin6_addr)) {
+			dest_is_priv = 1;
+		}
+	}
+	SCTPDBG(SCTP_DEBUG_OUTPUT2, "Select source addr for:");
+	SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)to);
+	SCTP_IPI_ADDR_RLOCK();
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		/*
+		 * Bound all case
+		 */
+		answer = sctp_choose_boundall(inp, stcb, net, ro, vrf_id,
+		    dest_is_priv, dest_is_loop,
+		    non_asoc_addr_ok, fam);
+		SCTP_IPI_ADDR_RUNLOCK();
+		return (answer);
+	}
+	/*
+	 * Subset bound case
+	 */
+	if (stcb) {
+		answer = sctp_choose_boundspecific_stcb(inp, stcb, net, ro,
+		    vrf_id, dest_is_priv,
+		    dest_is_loop,
+		    non_asoc_addr_ok, fam);
+	} else {
+		answer = sctp_choose_boundspecific_inp(inp, ro, vrf_id,
+		    non_asoc_addr_ok,
+		    dest_is_priv,
+		    dest_is_loop, fam);
+	}
+	SCTP_IPI_ADDR_RUNLOCK();
+	return (answer);
+}
+
+static int
+sctp_find_cmsg(int c_type, void *data, struct mbuf *control, int cpsize)
+{
+	struct cmsghdr cmh;
+	int tlen, at;
+
+	tlen = SCTP_BUF_LEN(control);
+	at = 0;
+	/*
+	 * Independent of how many mbufs, find the c_type inside the control
+	 * structure and copy out the data.
+	 */
+	while (at < tlen) {
+		if ((tlen - at) < (int)CMSG_ALIGN(sizeof(cmh))) {
+			/* not enough room for one more we are done. */
+			return (0);
+		}
+		m_copydata(control, at, sizeof(cmh), (caddr_t)&cmh);
+		if (((int)cmh.cmsg_len + at) > tlen) {
+			/*
+			 * this is real messed up since there is not enough
+			 * data here to cover the cmsg header. We are done.
+			 */
+			return (0);
+		}
+		if ((cmh.cmsg_level == IPPROTO_SCTP) &&
+		    (c_type == cmh.cmsg_type)) {
+			/* found the one we want, copy it out */
+			at += CMSG_ALIGN(sizeof(struct cmsghdr));
+			if ((int)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < cpsize) {
+				/*
+				 * space of cmsg_len after header not big
+				 * enough
+				 */
+				return (0);
+			}
+			m_copydata(control, at, cpsize, data);
+			return (1);
+		} else {
+			at += CMSG_ALIGN(cmh.cmsg_len);
+			if (cmh.cmsg_len == 0) {
+				break;
+			}
+		}
+	}
+	/* not found */
+	return (0);
+}
+
+static struct mbuf *
+sctp_add_cookie(struct sctp_inpcb *inp, struct mbuf *init, int init_offset,
+    struct mbuf *initack, int initack_offset, struct sctp_state_cookie *stc_in, uint8_t ** signature)
+{
+	struct mbuf *copy_init, *copy_initack, *m_at, *sig, *mret;
+	struct sctp_state_cookie *stc;
+	struct sctp_paramhdr *ph;
+	uint8_t *foo;
+	int sig_offset;
+	uint16_t cookie_sz;
+
+	mret = NULL;
+	mret = sctp_get_mbuf_for_msg((sizeof(struct sctp_state_cookie) +
+	    sizeof(struct sctp_paramhdr)), 0,
+	    M_DONTWAIT, 1, MT_DATA);
+	if (mret == NULL) {
+		return (NULL);
+	}
+	copy_init = SCTP_M_COPYM(init, init_offset, M_COPYALL, M_DONTWAIT);
+	if (copy_init == NULL) {
+		sctp_m_freem(mret);
+		return (NULL);
+	}
+	copy_initack = SCTP_M_COPYM(initack, initack_offset, M_COPYALL,
+	    M_DONTWAIT);
+	if (copy_initack == NULL) {
+		sctp_m_freem(mret);
+		sctp_m_freem(copy_init);
+		return (NULL);
+	}
+	/* easy side we just drop it on the end */
+	ph = mtod(mret, struct sctp_paramhdr *);
+	SCTP_BUF_LEN(mret) = sizeof(struct sctp_state_cookie) +
+	    sizeof(struct sctp_paramhdr);
+	stc = (struct sctp_state_cookie *)((caddr_t)ph +
+	    sizeof(struct sctp_paramhdr));
+	ph->param_type = htons(SCTP_STATE_COOKIE);
+	ph->param_length = 0;	/* fill in at the end */
+	/* Fill in the stc cookie data */
+	memcpy(stc, stc_in, sizeof(struct sctp_state_cookie));
+
+	/* tack the INIT and then the INIT-ACK onto the chain */
+	cookie_sz = 0;
+	m_at = mret;
+	for (m_at = mret; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
+		cookie_sz += SCTP_BUF_LEN(m_at);
+		if (SCTP_BUF_NEXT(m_at) == NULL) {
+			SCTP_BUF_NEXT(m_at) = copy_init;
+			break;
+		}
+	}
+
+	for (m_at = copy_init; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
+		cookie_sz += SCTP_BUF_LEN(m_at);
+		if (SCTP_BUF_NEXT(m_at) == NULL) {
+			SCTP_BUF_NEXT(m_at) = copy_initack;
+			break;
+		}
+	}
+
+	for (m_at = copy_initack; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
+		cookie_sz += SCTP_BUF_LEN(m_at);
+		if (SCTP_BUF_NEXT(m_at) == NULL) {
+			break;
+		}
+	}
+	sig = sctp_get_mbuf_for_msg(SCTP_SECRET_SIZE, 0, M_DONTWAIT, 1, MT_DATA);
+	if (sig == NULL) {
+		/* no space, so free the entire chain */
+		sctp_m_freem(mret);
+		return (NULL);
+	}
+	SCTP_BUF_LEN(sig) = 0;
+	SCTP_BUF_NEXT(m_at) = sig;
+	sig_offset = 0;
+	foo = (uint8_t *) (mtod(sig, caddr_t)+sig_offset);
+	memset(foo, 0, SCTP_SIGNATURE_SIZE);
+	*signature = foo;
+	SCTP_BUF_LEN(sig) += SCTP_SIGNATURE_SIZE;
+	cookie_sz += SCTP_SIGNATURE_SIZE;
+	ph->param_length = htons(cookie_sz);
+	return (mret);
+}
+
+
+static uint8_t
+sctp_get_ect(struct sctp_tcb *stcb,
+    struct sctp_tmit_chunk *chk)
+{
+	uint8_t this_random;
+
+	/* Huh? */
+	if (sctp_ecn_enable == 0)
+		return (0);
+
+	if (sctp_ecn_nonce == 0)
+		/* no nonce, always return ECT0 */
+		return (SCTP_ECT0_BIT);
+
+	if (stcb->asoc.peer_supports_ecn_nonce == 0) {
+		/* Peer does NOT support it, so we send a ECT0 only */
+		return (SCTP_ECT0_BIT);
+	}
+	if (chk == NULL)
+		return (SCTP_ECT0_BIT);
+
+	if ((stcb->asoc.hb_random_idx > 3) ||
+	    ((stcb->asoc.hb_random_idx == 3) &&
+	    (stcb->asoc.hb_ect_randombit > 7))) {
+		uint32_t rndval;
+
+warp_drive_sa:
+		rndval = sctp_select_initial_TSN(&stcb->sctp_ep->sctp_ep);
+		memcpy(stcb->asoc.hb_random_values, &rndval,
+		    sizeof(stcb->asoc.hb_random_values));
+		this_random = stcb->asoc.hb_random_values[0];
+		stcb->asoc.hb_random_idx = 0;
+		stcb->asoc.hb_ect_randombit = 0;
+	} else {
+		if (stcb->asoc.hb_ect_randombit > 7) {
+			stcb->asoc.hb_ect_randombit = 0;
+			stcb->asoc.hb_random_idx++;
+			if (stcb->asoc.hb_random_idx > 3) {
+				goto warp_drive_sa;
+			}
+		}
+		this_random = stcb->asoc.hb_random_values[stcb->asoc.hb_random_idx];
+	}
+	if ((this_random >> stcb->asoc.hb_ect_randombit) & 0x01) {
+		if (chk != NULL)
+			/* ECN Nonce stuff */
+			chk->rec.data.ect_nonce = SCTP_ECT1_BIT;
+		stcb->asoc.hb_ect_randombit++;
+		return (SCTP_ECT1_BIT);
+	} else {
+		stcb->asoc.hb_ect_randombit++;
+		return (SCTP_ECT0_BIT);
+	}
+}
+
+static int
+sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,	/* may be NULL */
+    struct sctp_nets *net,
+    struct sockaddr *to,
+    struct mbuf *m,
+    uint32_t auth_offset,
+    struct sctp_auth_chunk *auth,
+    int nofragment_flag,
+    int ecn_ok,
+    struct sctp_tmit_chunk *chk,
+    int out_of_asoc_ok,
+    int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+/* nofragment_flag to tell if IP_DF should be set (IPv4 only) */
+{
+	/*
+	 * Given a mbuf chain (via SCTP_BUF_NEXT()) that holds a packet
+	 * header WITH an SCTPHDR but no IP header, endpoint inp and sa
+	 * structure: - fill in the HMAC digest of any AUTH chunk in the
+	 * packet. - calculate and fill in the SCTP checksum. - prepend an
+	 * IP address header. - if boundall use INADDR_ANY. - if
+	 * boundspecific do source address selection. - set fragmentation
+	 * option for ipV4. - On return from IP output, check/adjust mtu
+	 * size of output interface and smallest_mtu size as well.
+	 */
+	/* Will need ifdefs around this */
+	struct mbuf *o_pak;
+	struct mbuf *newm;
+	struct sctphdr *sctphdr;
+	int packet_length;
+	uint32_t csum;
+	int ret;
+	uint32_t vrf_id;
+	sctp_route_t *ro = NULL;
+
+	if ((net) && (net->dest_state & SCTP_ADDR_OUT_OF_SCOPE)) {
+		SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EFAULT);
+		sctp_m_freem(m);
+		return (EFAULT);
+	}
+	if (stcb) {
+		vrf_id = stcb->asoc.vrf_id;
+	} else {
+		vrf_id = inp->def_vrf_id;
+	}
+
+	/* fill in the HMAC digest for any AUTH chunk in the packet */
+	if ((auth != NULL) && (stcb != NULL)) {
+		sctp_fill_hmac_digest_m(m, auth_offset, auth, stcb);
+	}
+	/* Calculate the csum and fill in the length of the packet */
+	sctphdr = mtod(m, struct sctphdr *);
+	if (sctp_no_csum_on_loopback &&
+	    (stcb) &&
+	    (to->sa_family == AF_INET) &&
+	    (stcb->asoc.loopback_scope)) {
+		sctphdr->checksum = 0;
+		/*
+		 * This can probably now be taken out since my audit shows
+		 * no more bad pktlen's coming in. But we will wait a while
+		 * yet.
+		 */
+		packet_length = sctp_calculate_len(m);
+	} else {
+		sctphdr->checksum = 0;
+		csum = sctp_calculate_sum(m, &packet_length, 0);
+		sctphdr->checksum = csum;
+	}
+
+	if (to->sa_family == AF_INET) {
+		struct ip *ip = NULL;
+		sctp_route_t iproute;
+		uint8_t tos_value;
+
+		newm = sctp_get_mbuf_for_msg(sizeof(struct ip), 1, M_DONTWAIT, 1, MT_DATA);
+		if (newm == NULL) {
+			sctp_m_freem(m);
+			SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+			return (ENOMEM);
+		}
+		SCTP_ALIGN_TO_END(newm, sizeof(struct ip));
+		SCTP_BUF_LEN(newm) = sizeof(struct ip);
+		packet_length += sizeof(struct ip);
+		SCTP_BUF_NEXT(newm) = m;
+		m = newm;
+		ip = mtod(m, struct ip *);
+		ip->ip_v = IPVERSION;
+		ip->ip_hl = (sizeof(struct ip) >> 2);
+		if (net) {
+			tos_value = net->tos_flowlabel & 0x000000ff;
+		} else {
+			tos_value = inp->ip_inp.inp.inp_ip_tos;
+		}
+		if (nofragment_flag) {
+#if defined(WITH_CONVERT_IP_OFF) || defined(__FreeBSD__) || defined(__APPLE__)
+			ip->ip_off = IP_DF;
+#else
+			ip->ip_off = htons(IP_DF);
+#endif
+		} else
+			ip->ip_off = 0;
+
+		/* FreeBSD has a function for ip_id's */
+		ip->ip_id = ip_newid();
+
+		ip->ip_ttl = inp->ip_inp.inp.inp_ip_ttl;
+		ip->ip_len = packet_length;
+		if (stcb) {
+			if ((stcb->asoc.ecn_allowed) && ecn_ok) {
+				/* Enable ECN */
+				ip->ip_tos = ((u_char)(tos_value & 0xfc) | sctp_get_ect(stcb, chk));
+			} else {
+				/* No ECN */
+				ip->ip_tos = (u_char)(tos_value & 0xfc);
+			}
+		} else {
+			/* no association at all */
+			ip->ip_tos = (tos_value & 0xfc);
+		}
+		ip->ip_p = IPPROTO_SCTP;
+		ip->ip_sum = 0;
+		if (net == NULL) {
+			ro = &iproute;
+			memset(&iproute, 0, sizeof(iproute));
+			memcpy(&ro->ro_dst, to, to->sa_len);
+		} else {
+			ro = (sctp_route_t *) & net->ro;
+		}
+		/* Now the address selection part */
+		ip->ip_dst.s_addr = ((struct sockaddr_in *)to)->sin_addr.s_addr;
+
+		/* call the routine to select the src address */
+		if (net) {
+			if (net->ro._s_addr && (net->ro._s_addr->localifa_flags & (SCTP_BEING_DELETED | SCTP_ADDR_IFA_UNUSEABLE))) {
+				sctp_free_ifa(net->ro._s_addr);
+				net->ro._s_addr = NULL;
+				net->src_addr_selected = 0;
+				if (ro->ro_rt) {
+					RTFREE(ro->ro_rt);
+					ro->ro_rt = NULL;
+				}
+			}
+			if (net->src_addr_selected == 0) {
+				if (out_of_asoc_ok) {
+					/* do not cache */
+					goto temp_v4_src;
+				}
+				/* Cache the source address */
+				net->ro._s_addr = sctp_source_address_selection(inp, stcb,
+				    ro, net, out_of_asoc_ok,
+				    vrf_id);
+				net->src_addr_selected = 1;
+			}
+			if (net->ro._s_addr == NULL) {
+				/* No route to host */
+				net->src_addr_selected = 0;
+				goto no_route;
+			}
+			ip->ip_src = net->ro._s_addr->address.sin.sin_addr;
+		} else {
+			struct sctp_ifa *_lsrc;
+
+	temp_v4_src:
+			_lsrc = sctp_source_address_selection(inp, stcb, ro,
+			    net,
+			    out_of_asoc_ok,
+			    vrf_id);
+			if (_lsrc == NULL) {
+				goto no_route;
+			}
+			ip->ip_src = _lsrc->address.sin.sin_addr;
+			sctp_free_ifa(_lsrc);
+		}
+
+		/*
+		 * If source address selection fails and we find no route
+		 * then the ip_output should fail as well with a
+		 * NO_ROUTE_TO_HOST type error. We probably should catch
+		 * that somewhere and abort the association right away
+		 * (assuming this is an INIT being sent).
+		 */
+		if ((ro->ro_rt == NULL)) {
+			/*
+			 * src addr selection failed to find a route (or
+			 * valid source addr), so we can't get there from
+			 * here (yet)!
+			 */
+	no_route:
+			SCTPDBG(SCTP_DEBUG_OUTPUT1,
+			    "%s: dropped packet - no valid source addr\n",
+			    __FUNCTION__);
+			if (net) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT1,
+				    "Destination was ");
+				SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT1,
+				    &net->ro._l_addr.sa);
+				if (net->dest_state & SCTP_ADDR_CONFIRMED) {
+					if ((net->dest_state & SCTP_ADDR_REACHABLE) && stcb) {
+						SCTPDBG(SCTP_DEBUG_OUTPUT1, "no route takes interface %p down\n", net);
+						sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN,
+						    stcb,
+						    SCTP_FAILED_THRESHOLD,
+						    (void *)net,
+						    so_locked);
+						net->dest_state &= ~SCTP_ADDR_REACHABLE;
+						net->dest_state |= SCTP_ADDR_NOT_REACHABLE;
+						/*
+						 * JRS 5/14/07 - If a
+						 * destination is
+						 * unreachable, the PF bit
+						 * is turned off.  This
+						 * allows an unambiguous use
+						 * of the PF bit for
+						 * destinations that are
+						 * reachable but potentially
+						 * failed. If the
+						 * destination is set to the
+						 * unreachable state, also
+						 * set the destination to
+						 * the PF state.
+						 */
+						/*
+						 * Add debug message here if
+						 * destination is not in PF
+						 * state.
+						 */
+						/*
+						 * Stop any running T3
+						 * timers here?
+						 */
+						if (sctp_cmt_on_off && sctp_cmt_pf) {
+							net->dest_state &= ~SCTP_ADDR_PF;
+							SCTPDBG(SCTP_DEBUG_OUTPUT1, "Destination %p moved from PF to unreachable.\n",
+							    net);
+						}
+					}
+				}
+				if (stcb) {
+					if (net == stcb->asoc.primary_destination) {
+						/* need a new primary */
+						struct sctp_nets *alt;
+
+						alt = sctp_find_alternate_net(stcb, net, 0);
+						if (alt != net) {
+							if (sctp_set_primary_addr(stcb,
+							    (struct sockaddr *)NULL,
+							    alt) == 0) {
+								net->dest_state |= SCTP_ADDR_WAS_PRIMARY;
+								if (net->ro._s_addr) {
+									sctp_free_ifa(net->ro._s_addr);
+									net->ro._s_addr = NULL;
+								}
+								net->src_addr_selected = 0;
+							}
+						}
+					}
+				}
+			}
+			SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH);
+			sctp_m_freem(m);
+			return (EHOSTUNREACH);
+		}
+		if (ro != &iproute) {
+			memcpy(&iproute, ro, sizeof(*ro));
+		}
+		SCTPDBG(SCTP_DEBUG_OUTPUT3, "Calling ipv4 output routine from low level src addr:%x\n",
+		    (uint32_t) (ntohl(ip->ip_src.s_addr)));
+		SCTPDBG(SCTP_DEBUG_OUTPUT3, "Destination is %x\n",
+		    (uint32_t) (ntohl(ip->ip_dst.s_addr)));
+		SCTPDBG(SCTP_DEBUG_OUTPUT3, "RTP route is %p through\n",
+		    ro->ro_rt);
+
+		if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
+			/* failed to prepend data, give up */
+			SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+			sctp_m_freem(m);
+			return (ENOMEM);
+		}
+#ifdef  SCTP_PACKET_LOGGING
+		if (sctp_logging_level & SCTP_LAST_PACKET_TRACING)
+			sctp_packet_log(m, packet_length);
+#endif
+		SCTP_ATTACH_CHAIN(o_pak, m, packet_length);
+
+		/* send it out.  table id is taken from stcb */
+		SCTP_IP_OUTPUT(ret, o_pak, ro, stcb, vrf_id);
+
+		SCTP_STAT_INCR(sctps_sendpackets);
+		SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
+		if (ret)
+			SCTP_STAT_INCR(sctps_senderrors);
+
+		SCTPDBG(SCTP_DEBUG_OUTPUT3, "IP output returns %d\n", ret);
+		if (net == NULL) {
+			/* free tempy routes */
+			if (ro->ro_rt) {
+				RTFREE(ro->ro_rt);
+				ro->ro_rt = NULL;
+			}
+		} else {
+			/* PMTU check versus smallest asoc MTU goes here */
+			if ((ro->ro_rt != NULL) &&
+			    (net->ro._s_addr)) {
+				uint32_t mtu;
+
+				mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, ro->ro_rt);
+				if (mtu &&
+				    (stcb->asoc.smallest_mtu > mtu)) {
+#ifdef SCTP_PRINT_FOR_B_AND_M
+					SCTP_PRINTF("sctp_mtu_size_reset called after ip_output mtu-change:%d\n",
+					    mtu);
+#endif
+					sctp_mtu_size_reset(inp, &stcb->asoc, mtu);
+					net->mtu = mtu;
+				}
+			} else if (ro->ro_rt == NULL) {
+				/* route was freed */
+				if (net->ro._s_addr &&
+				    net->src_addr_selected) {
+					sctp_free_ifa(net->ro._s_addr);
+					net->ro._s_addr = NULL;
+				}
+				net->src_addr_selected = 0;
+			}
+		}
+		return (ret);
+	}
+#ifdef INET6
+	else if (to->sa_family == AF_INET6) {
+		uint32_t flowlabel;
+		struct ip6_hdr *ip6h;
+		struct route_in6 ip6route;
+		struct ifnet *ifp;
+		u_char flowTop;
+		uint16_t flowBottom;
+		u_char tosBottom, tosTop;
+		struct sockaddr_in6 *sin6, tmp, *lsa6, lsa6_tmp;
+		int prev_scope = 0;
+		struct sockaddr_in6 lsa6_storage;
+		int error;
+		u_short prev_port = 0;
+
+		if (net != NULL) {
+			flowlabel = net->tos_flowlabel;
+		} else {
+			flowlabel = ((struct in6pcb *)inp)->in6p_flowinfo;
+		}
+
+		newm = sctp_get_mbuf_for_msg(sizeof(struct ip6_hdr), 1, M_DONTWAIT, 1, MT_DATA);
+		if (newm == NULL) {
+			sctp_m_freem(m);
+			SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+			return (ENOMEM);
+		}
+		SCTP_ALIGN_TO_END(newm, sizeof(struct ip6_hdr));
+		SCTP_BUF_LEN(newm) = sizeof(struct ip6_hdr);
+		packet_length += sizeof(struct ip6_hdr);
+		SCTP_BUF_NEXT(newm) = m;
+		m = newm;
+
+		ip6h = mtod(m, struct ip6_hdr *);
+		/*
+		 * We assume here that inp_flow is in host byte order within
+		 * the TCB!
+		 */
+		flowBottom = flowlabel & 0x0000ffff;
+		flowTop = ((flowlabel & 0x000f0000) >> 16);
+		tosTop = (((flowlabel & 0xf0) >> 4) | IPV6_VERSION);
+		/* protect *sin6 from overwrite */
+		sin6 = (struct sockaddr_in6 *)to;
+		tmp = *sin6;
+		sin6 = &tmp;
+
+		/* KAME hack: embed scopeid */
+		if (sa6_embedscope(sin6, ip6_use_defzone) != 0) {
+			SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+			return (EINVAL);
+		}
+		if (net == NULL) {
+			memset(&ip6route, 0, sizeof(ip6route));
+			ro = (sctp_route_t *) & ip6route;
+			memcpy(&ro->ro_dst, sin6, sin6->sin6_len);
+		} else {
+			ro = (sctp_route_t *) & net->ro;
+		}
+		if (stcb != NULL) {
+			if ((stcb->asoc.ecn_allowed) && ecn_ok) {
+				/* Enable ECN */
+				tosBottom = (((((struct in6pcb *)inp)->in6p_flowinfo & 0x0c) | sctp_get_ect(stcb, chk)) << 4);
+			} else {
+				/* No ECN */
+				tosBottom = ((((struct in6pcb *)inp)->in6p_flowinfo & 0x0c) << 4);
+			}
+		} else {
+			/* we could get no asoc if it is a O-O-T-B packet */
+			tosBottom = ((((struct in6pcb *)inp)->in6p_flowinfo & 0x0c) << 4);
+		}
+		ip6h->ip6_flow = htonl(((tosTop << 24) | ((tosBottom | flowTop) << 16) | flowBottom));
+		ip6h->ip6_nxt = IPPROTO_SCTP;
+		ip6h->ip6_plen = (packet_length - sizeof(struct ip6_hdr));
+		ip6h->ip6_dst = sin6->sin6_addr;
+
+		/*
+		 * Add SRC address selection here: we can only reuse to a
+		 * limited degree the kame src-addr-sel, since we can try
+		 * their selection but it may not be bound.
+		 */
+		bzero(&lsa6_tmp, sizeof(lsa6_tmp));
+		lsa6_tmp.sin6_family = AF_INET6;
+		lsa6_tmp.sin6_len = sizeof(lsa6_tmp);
+		lsa6 = &lsa6_tmp;
+		if (net) {
+			if (net->ro._s_addr && (net->ro._s_addr->localifa_flags & (SCTP_BEING_DELETED | SCTP_ADDR_IFA_UNUSEABLE))) {
+				sctp_free_ifa(net->ro._s_addr);
+				net->ro._s_addr = NULL;
+				net->src_addr_selected = 0;
+				if (ro->ro_rt) {
+					RTFREE(ro->ro_rt);
+					ro->ro_rt = NULL;
+				}
+			}
+			if (net->src_addr_selected == 0) {
+				if (out_of_asoc_ok) {
+					/* do not cache */
+					goto temp_v6_src;
+				}
+				/* Cache the source address */
+				net->ro._s_addr = sctp_source_address_selection(inp,
+				    stcb,
+				    ro,
+				    net,
+				    out_of_asoc_ok,
+				    vrf_id);
+				net->src_addr_selected = 1;
+			}
+			if (net->ro._s_addr == NULL) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT3, "V6:No route to host\n");
+				net->src_addr_selected = 0;
+				goto no_route;
+			}
+			lsa6->sin6_addr = net->ro._s_addr->address.sin6.sin6_addr;
+		} else {
+			struct sctp_ifa *_lsrc;
+
+	temp_v6_src:
+			_lsrc = sctp_source_address_selection(inp, stcb, ro,
+			    net,
+			    out_of_asoc_ok,
+			    vrf_id);
+			if (_lsrc == NULL) {
+				goto no_route;
+			}
+			lsa6->sin6_addr = _lsrc->address.sin6.sin6_addr;
+			sctp_free_ifa(_lsrc);
+		}
+		lsa6->sin6_port = inp->sctp_lport;
+
+		if ((ro->ro_rt == NULL)) {
+			/*
+			 * src addr selection failed to find a route (or
+			 * valid source addr), so we can't get there from
+			 * here!
+			 */
+			goto no_route;
+		}
+		/*
+		 * XXX: sa6 may not have a valid sin6_scope_id in the
+		 * non-SCOPEDROUTING case.
+		 */
+		bzero(&lsa6_storage, sizeof(lsa6_storage));
+		lsa6_storage.sin6_family = AF_INET6;
+		lsa6_storage.sin6_len = sizeof(lsa6_storage);
+		if ((error = sa6_recoverscope(&lsa6_storage)) != 0) {
+			SCTPDBG(SCTP_DEBUG_OUTPUT3, "recover scope fails error %d\n", error);
+			sctp_m_freem(m);
+			return (error);
+		}
+		/* XXX */
+		lsa6_storage.sin6_addr = lsa6->sin6_addr;
+		lsa6_storage.sin6_port = inp->sctp_lport;
+		lsa6 = &lsa6_storage;
+		ip6h->ip6_src = lsa6->sin6_addr;
+
+		/*
+		 * We set the hop limit now since there is a good chance
+		 * that our ro pointer is now filled
+		 */
+		ip6h->ip6_hlim = SCTP_GET_HLIM(inp, ro);
+		ifp = SCTP_GET_IFN_VOID_FROM_ROUTE(ro);
+
+#ifdef SCTP_DEBUG
+		/* Copy to be sure something bad is not happening */
+		sin6->sin6_addr = ip6h->ip6_dst;
+		lsa6->sin6_addr = ip6h->ip6_src;
+#endif
+
+		SCTPDBG(SCTP_DEBUG_OUTPUT3, "Calling ipv6 output routine from low level\n");
+		SCTPDBG(SCTP_DEBUG_OUTPUT3, "src: ");
+		SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT3, (struct sockaddr *)lsa6);
+		SCTPDBG(SCTP_DEBUG_OUTPUT3, "dst: ");
+		SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT3, (struct sockaddr *)sin6);
+		if (net) {
+			sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
+			/* preserve the port and scope for link local send */
+			prev_scope = sin6->sin6_scope_id;
+			prev_port = sin6->sin6_port;
+		}
+		if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
+			/* failed to prepend data, give up */
+			sctp_m_freem(m);
+			SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+			return (ENOMEM);
+		}
+#ifdef  SCTP_PACKET_LOGGING
+		if (sctp_logging_level & SCTP_LAST_PACKET_TRACING)
+			sctp_packet_log(m, packet_length);
+#endif
+		SCTP_ATTACH_CHAIN(o_pak, m, packet_length);
+
+		/* send it out. table id is taken from stcb */
+		SCTP_IP6_OUTPUT(ret, o_pak, (struct route_in6 *)ro, &ifp,
+		    stcb, vrf_id);
+
+		if (net) {
+			/* for link local this must be done */
+			sin6->sin6_scope_id = prev_scope;
+			sin6->sin6_port = prev_port;
+		}
+		SCTPDBG(SCTP_DEBUG_OUTPUT3, "return from send is %d\n", ret);
+		SCTP_STAT_INCR(sctps_sendpackets);
+		SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
+		if (ret) {
+			SCTP_STAT_INCR(sctps_senderrors);
+		}
+		if (net == NULL) {
+			/* Now if we had a temp route free it */
+			if (ro->ro_rt) {
+				RTFREE(ro->ro_rt);
+			}
+		} else {
+			/* PMTU check versus smallest asoc MTU goes here */
+			if (ro->ro_rt == NULL) {
+				/* Route was freed */
+				if (net->ro._s_addr &&
+				    net->src_addr_selected) {
+					sctp_free_ifa(net->ro._s_addr);
+					net->ro._s_addr = NULL;
+				}
+				net->src_addr_selected = 0;
+			}
+			if ((ro->ro_rt != NULL) &&
+			    (net->ro._s_addr)) {
+				uint32_t mtu;
+
+				mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, ro->ro_rt);
+				if (mtu &&
+				    (stcb->asoc.smallest_mtu > mtu)) {
+#ifdef SCTP_PRINT_FOR_B_AND_M
+					SCTP_PRINTF("sctp_mtu_size_reset called after ip6_output mtu-change:%d\n",
+					    mtu);
+#endif
+					sctp_mtu_size_reset(inp, &stcb->asoc, mtu);
+					net->mtu = mtu;
+				}
+			} else if (ifp) {
+				if (ND_IFINFO(ifp)->linkmtu &&
+				    (stcb->asoc.smallest_mtu > ND_IFINFO(ifp)->linkmtu)) {
+#ifdef SCTP_PRINT_FOR_B_AND_M
+					SCTP_PRINTF("sctp_mtu_size_reset called via ifp ND_IFINFO() linkmtu:%d\n",
+					    ND_IFINFO(ifp)->linkmtu);
+#endif
+					sctp_mtu_size_reset(inp,
+					    &stcb->asoc,
+					    ND_IFINFO(ifp)->linkmtu);
+				}
+			}
+		}
+		return (ret);
+	}
+#endif
+	else {
+		SCTPDBG(SCTP_DEBUG_OUTPUT1, "Unknown protocol (TSNH) type %d\n",
+		    ((struct sockaddr *)to)->sa_family);
+		sctp_m_freem(m);
+		SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EFAULT);
+		return (EFAULT);
+	}
+}
+
+
+void
+sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	struct mbuf *m, *m_at, *mp_last;
+	struct sctp_nets *net;
+	struct sctp_init_msg *initm;
+	struct sctp_supported_addr_param *sup_addr;
+	struct sctp_ecn_supported_param *ecn;
+	struct sctp_prsctp_supported_param *prsctp;
+	struct sctp_ecn_nonce_supported_param *ecn_nonce;
+	struct sctp_supported_chunk_types_param *pr_supported;
+	int cnt_inits_to = 0;
+	int padval, ret;
+	int num_ext;
+	int p_len;
+
+	/* INIT's always go to the primary (and usually ONLY address) */
+	mp_last = NULL;
+	net = stcb->asoc.primary_destination;
+	if (net == NULL) {
+		net = TAILQ_FIRST(&stcb->asoc.nets);
+		if (net == NULL) {
+			/* TSNH */
+			return;
+		}
+		/* we confirm any address we send an INIT to */
+		net->dest_state &= ~SCTP_ADDR_UNCONFIRMED;
+		(void)sctp_set_primary_addr(stcb, NULL, net);
+	} else {
+		/* we confirm any address we send an INIT to */
+		net->dest_state &= ~SCTP_ADDR_UNCONFIRMED;
+	}
+	SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT\n");
+	if (((struct sockaddr *)&(net->ro._l_addr))->sa_family == AF_INET6) {
+		/*
+		 * special hook, if we are sending to link local it will not
+		 * show up in our private address count.
+		 */
+		struct sockaddr_in6 *sin6l;
+
+		sin6l = &net->ro._l_addr.sin6;
+		if (IN6_IS_ADDR_LINKLOCAL(&sin6l->sin6_addr))
+			cnt_inits_to = 1;
+	}
+	if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
+		/* This case should not happen */
+		SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT - failed timer?\n");
+		return;
+	}
+	/* start the INIT timer */
+	sctp_timer_start(SCTP_TIMER_TYPE_INIT, inp, stcb, net);
+
+	m = sctp_get_mbuf_for_msg(MCLBYTES, 1, M_DONTWAIT, 1, MT_DATA);
+	if (m == NULL) {
+		/* No memory, INIT timer will re-attempt. */
+		SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT - mbuf?\n");
+		return;
+	}
+	SCTP_BUF_LEN(m) = sizeof(struct sctp_init_msg);
+	/*
+	 * assume peer supports asconf in order to be able to queue local
+	 * address changes while an INIT is in flight and before the assoc
+	 * is established.
+	 */
+	stcb->asoc.peer_supports_asconf = 1;
+	/* Now lets put the SCTP header in place */
+	initm = mtod(m, struct sctp_init_msg *);
+	initm->sh.src_port = inp->sctp_lport;
+	initm->sh.dest_port = stcb->rport;
+	initm->sh.v_tag = 0;
+	initm->sh.checksum = 0;	/* calculate later */
+	/* now the chunk header */
+	initm->msg.ch.chunk_type = SCTP_INITIATION;
+	initm->msg.ch.chunk_flags = 0;
+	/* fill in later from mbuf we build */
+	initm->msg.ch.chunk_length = 0;
+	/* place in my tag */
+	initm->msg.init.initiate_tag = htonl(stcb->asoc.my_vtag);
+	/* set up some of the credits. */
+	initm->msg.init.a_rwnd = htonl(max(SCTP_SB_LIMIT_RCV(inp->sctp_socket),
+	    SCTP_MINIMAL_RWND));
+
+	initm->msg.init.num_outbound_streams = htons(stcb->asoc.pre_open_streams);
+	initm->msg.init.num_inbound_streams = htons(stcb->asoc.max_inbound_streams);
+	initm->msg.init.initial_tsn = htonl(stcb->asoc.init_seq_number);
+	/* now the address restriction */
+	sup_addr = (struct sctp_supported_addr_param *)((caddr_t)initm +
+	    sizeof(*initm));
+	sup_addr->ph.param_type = htons(SCTP_SUPPORTED_ADDRTYPE);
+	/* we support 2 types IPv6/IPv4 */
+	sup_addr->ph.param_length = htons(sizeof(*sup_addr) +
+	    sizeof(uint16_t));
+	sup_addr->addr_type[0] = htons(SCTP_IPV4_ADDRESS);
+	sup_addr->addr_type[1] = htons(SCTP_IPV6_ADDRESS);
+	SCTP_BUF_LEN(m) += sizeof(*sup_addr) + sizeof(uint16_t);
+
+	if (inp->sctp_ep.adaptation_layer_indicator) {
+		struct sctp_adaptation_layer_indication *ali;
+
+		ali = (struct sctp_adaptation_layer_indication *)(
+		    (caddr_t)sup_addr + sizeof(*sup_addr) + sizeof(uint16_t));
+		ali->ph.param_type = htons(SCTP_ULP_ADAPTATION);
+		ali->ph.param_length = htons(sizeof(*ali));
+		ali->indication = ntohl(inp->sctp_ep.adaptation_layer_indicator);
+		SCTP_BUF_LEN(m) += sizeof(*ali);
+		ecn = (struct sctp_ecn_supported_param *)((caddr_t)ali +
+		    sizeof(*ali));
+	} else {
+		ecn = (struct sctp_ecn_supported_param *)((caddr_t)sup_addr +
+		    sizeof(*sup_addr) + sizeof(uint16_t));
+	}
+
+	/* now any cookie time extensions */
+	if (stcb->asoc.cookie_preserve_req) {
+		struct sctp_cookie_perserve_param *cookie_preserve;
+
+		cookie_preserve = (struct sctp_cookie_perserve_param *)(ecn);
+		cookie_preserve->ph.param_type = htons(SCTP_COOKIE_PRESERVE);
+		cookie_preserve->ph.param_length = htons(
+		    sizeof(*cookie_preserve));
+		cookie_preserve->time = htonl(stcb->asoc.cookie_preserve_req);
+		SCTP_BUF_LEN(m) += sizeof(*cookie_preserve);
+		ecn = (struct sctp_ecn_supported_param *)(
+		    (caddr_t)cookie_preserve + sizeof(*cookie_preserve));
+		stcb->asoc.cookie_preserve_req = 0;
+	}
+	/* ECN parameter */
+	if (sctp_ecn_enable == 1) {
+		ecn->ph.param_type = htons(SCTP_ECN_CAPABLE);
+		ecn->ph.param_length = htons(sizeof(*ecn));
+		SCTP_BUF_LEN(m) += sizeof(*ecn);
+		prsctp = (struct sctp_prsctp_supported_param *)((caddr_t)ecn +
+		    sizeof(*ecn));
+	} else {
+		prsctp = (struct sctp_prsctp_supported_param *)((caddr_t)ecn);
+	}
+	/* And now tell the peer we do pr-sctp */
+	prsctp->ph.param_type = htons(SCTP_PRSCTP_SUPPORTED);
+	prsctp->ph.param_length = htons(sizeof(*prsctp));
+	SCTP_BUF_LEN(m) += sizeof(*prsctp);
+
+	/* And now tell the peer we do all the extensions */
+	pr_supported = (struct sctp_supported_chunk_types_param *)
+	    ((caddr_t)prsctp + sizeof(*prsctp));
+	pr_supported->ph.param_type = htons(SCTP_SUPPORTED_CHUNK_EXT);
+	num_ext = 0;
+	pr_supported->chunk_types[num_ext++] = SCTP_ASCONF;
+	pr_supported->chunk_types[num_ext++] = SCTP_ASCONF_ACK;
+	pr_supported->chunk_types[num_ext++] = SCTP_FORWARD_CUM_TSN;
+	pr_supported->chunk_types[num_ext++] = SCTP_PACKET_DROPPED;
+	pr_supported->chunk_types[num_ext++] = SCTP_STREAM_RESET;
+	if (!sctp_auth_disable)
+		pr_supported->chunk_types[num_ext++] = SCTP_AUTHENTICATION;
+	p_len = sizeof(*pr_supported) + num_ext;
+	pr_supported->ph.param_length = htons(p_len);
+	bzero((caddr_t)pr_supported + p_len, SCTP_SIZE32(p_len) - p_len);
+	SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+
+	/* ECN nonce: And now tell the peer we support ECN nonce */
+	if (sctp_ecn_nonce) {
+		ecn_nonce = (struct sctp_ecn_nonce_supported_param *)
+		    ((caddr_t)pr_supported + SCTP_SIZE32(p_len));
+		ecn_nonce->ph.param_type = htons(SCTP_ECN_NONCE_SUPPORTED);
+		ecn_nonce->ph.param_length = htons(sizeof(*ecn_nonce));
+		SCTP_BUF_LEN(m) += sizeof(*ecn_nonce);
+	}
+	/* add authentication parameters */
+	if (!sctp_auth_disable) {
+		struct sctp_auth_random *randp;
+		struct sctp_auth_hmac_algo *hmacs;
+		struct sctp_auth_chunk_list *chunks;
+
+		/* attach RANDOM parameter, if available */
+		if (stcb->asoc.authinfo.random != NULL) {
+			randp = (struct sctp_auth_random *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+			p_len = sizeof(*randp) + stcb->asoc.authinfo.random_len;
+#ifdef SCTP_AUTH_DRAFT_04
+			randp->ph.param_type = htons(SCTP_RANDOM);
+			randp->ph.param_length = htons(p_len);
+			bcopy(stcb->asoc.authinfo.random->key,
+			    randp->random_data,
+			    stcb->asoc.authinfo.random_len);
+#else
+			/* random key already contains the header */
+			bcopy(stcb->asoc.authinfo.random->key, randp, p_len);
+#endif
+			/* zero out any padding required */
+			bzero((caddr_t)randp + p_len, SCTP_SIZE32(p_len) - p_len);
+			SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+		}
+		/* add HMAC_ALGO parameter */
+		hmacs = (struct sctp_auth_hmac_algo *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+		p_len = sctp_serialize_hmaclist(stcb->asoc.local_hmacs,
+		    (uint8_t *) hmacs->hmac_ids);
+		if (p_len > 0) {
+			p_len += sizeof(*hmacs);
+			hmacs->ph.param_type = htons(SCTP_HMAC_LIST);
+			hmacs->ph.param_length = htons(p_len);
+			/* zero out any padding required */
+			bzero((caddr_t)hmacs + p_len, SCTP_SIZE32(p_len) - p_len);
+			SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+		}
+		/* add CHUNKS parameter */
+		chunks = (struct sctp_auth_chunk_list *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+		p_len = sctp_serialize_auth_chunks(stcb->asoc.local_auth_chunks,
+		    chunks->chunk_types);
+		if (p_len > 0) {
+			p_len += sizeof(*chunks);
+			chunks->ph.param_type = htons(SCTP_CHUNK_LIST);
+			chunks->ph.param_length = htons(p_len);
+			/* zero out any padding required */
+			bzero((caddr_t)chunks + p_len, SCTP_SIZE32(p_len) - p_len);
+			SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+		}
+	}
+	m_at = m;
+	/* now the addresses */
+	{
+		struct sctp_scoping scp;
+
+		/*
+		 * To optimize this we could put the scoping stuff into a
+		 * structure and remove the individual uint8's from the
+		 * assoc structure. Then we could just sifa in the address
+		 * within the stcb.. but for now this is a quick hack to get
+		 * the address stuff teased apart.
+		 */
+		scp.ipv4_addr_legal = stcb->asoc.ipv4_addr_legal;
+		scp.ipv6_addr_legal = stcb->asoc.ipv6_addr_legal;
+		scp.loopback_scope = stcb->asoc.loopback_scope;
+		scp.ipv4_local_scope = stcb->asoc.ipv4_local_scope;
+		scp.local_scope = stcb->asoc.local_scope;
+		scp.site_scope = stcb->asoc.site_scope;
+
+		m_at = sctp_add_addresses_to_i_ia(inp, &scp, m_at, cnt_inits_to);
+	}
+
+	/* calulate the size and update pkt header and chunk header */
+	p_len = 0;
+	for (m_at = m; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
+		if (SCTP_BUF_NEXT(m_at) == NULL)
+			mp_last = m_at;
+		p_len += SCTP_BUF_LEN(m_at);
+	}
+	initm->msg.ch.chunk_length = htons((p_len - sizeof(struct sctphdr)));
+	/*
+	 * We sifa 0 here to NOT set IP_DF if its IPv4, we ignore the return
+	 * here since the timer will drive a retranmission.
+	 */
+
+	/* I don't expect this to execute but we will be safe here */
+	padval = p_len % 4;
+	if ((padval) && (mp_last)) {
+		/*
+		 * The compiler worries that mp_last may not be set even
+		 * though I think it is impossible :-> however we add
+		 * mp_last here just in case.
+		 */
+		ret = sctp_add_pad_tombuf(mp_last, (4 - padval));
+		if (ret) {
+			/* Houston we have a problem, no space */
+			sctp_m_freem(m);
+			return;
+		}
+		p_len += padval;
+	}
+	SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT - calls lowlevel_output\n");
+	ret = sctp_lowlevel_chunk_output(inp, stcb, net,
+	    (struct sockaddr *)&net->ro._l_addr,
+	    m, 0, NULL, 0, 0, NULL, 0, so_locked);
+	SCTPDBG(SCTP_DEBUG_OUTPUT4, "lowlevel_output - %d\n", ret);
+	SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+	sctp_timer_start(SCTP_TIMER_TYPE_INIT, inp, stcb, net);
+	(void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
+}
+
+struct mbuf *
+sctp_arethere_unrecognized_parameters(struct mbuf *in_initpkt,
+    int param_offset, int *abort_processing, struct sctp_chunkhdr *cp)
+{
+	/*
+	 * Given a mbuf containing an INIT or INIT-ACK with the param_offset
+	 * being equal to the beginning of the params i.e. (iphlen +
+	 * sizeof(struct sctp_init_msg) parse through the parameters to the
+	 * end of the mbuf verifying that all parameters are known.
+	 * 
+	 * For unknown parameters build and return a mbuf with
+	 * UNRECOGNIZED_PARAMETER errors. If the flags indicate to stop
+	 * processing this chunk stop, and set *abort_processing to 1.
+	 * 
+	 * By having param_offset be pre-set to where parameters begin it is
+	 * hoped that this routine may be reused in the future by new
+	 * features.
+	 */
+	struct sctp_paramhdr *phdr, params;
+
+	struct mbuf *mat, *op_err;
+	char tempbuf[SCTP_PARAM_BUFFER_SIZE];
+	int at, limit, pad_needed;
+	uint16_t ptype, plen, padded_size;
+	int err_at;
+
+	*abort_processing = 0;
+	mat = in_initpkt;
+	err_at = 0;
+	limit = ntohs(cp->chunk_length) - sizeof(struct sctp_init_chunk);
+	at = param_offset;
+	op_err = NULL;
+	SCTPDBG(SCTP_DEBUG_OUTPUT1, "Check for unrecognized param's\n");
+	phdr = sctp_get_next_param(mat, at, &params, sizeof(params));
+	while ((phdr != NULL) && ((size_t)limit >= sizeof(struct sctp_paramhdr))) {
+		ptype = ntohs(phdr->param_type);
+		plen = ntohs(phdr->param_length);
+		if ((plen > limit) || (plen < sizeof(struct sctp_paramhdr))) {
+			/* wacked parameter */
+			SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error %d\n", plen);
+			goto invalid_size;
+		}
+		limit -= SCTP_SIZE32(plen);
+		/*-
+		 * All parameters for all chunks that we know/understand are
+		 * listed here. We process them other places and make
+		 * appropriate stop actions per the upper bits. However this
+		 * is the generic routine processor's can call to get back
+		 * an operr.. to either incorporate (init-ack) or send.
+		 */
+		padded_size = SCTP_SIZE32(plen);
+		switch (ptype) {
+			/* Param's with variable size */
+		case SCTP_HEARTBEAT_INFO:
+		case SCTP_STATE_COOKIE:
+		case SCTP_UNRECOG_PARAM:
+		case SCTP_ERROR_CAUSE_IND:
+			/* ok skip fwd */
+			at += padded_size;
+			break;
+			/* Param's with variable size within a range */
+		case SCTP_CHUNK_LIST:
+		case SCTP_SUPPORTED_CHUNK_EXT:
+			if (padded_size > (sizeof(struct sctp_supported_chunk_types_param) + (sizeof(uint8_t) * SCTP_MAX_SUPPORTED_EXT))) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error chklist %d\n", plen);
+				goto invalid_size;
+			}
+			at += padded_size;
+			break;
+		case SCTP_SUPPORTED_ADDRTYPE:
+			if (padded_size > SCTP_MAX_ADDR_PARAMS_SIZE) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error supaddrtype %d\n", plen);
+				goto invalid_size;
+			}
+			at += padded_size;
+			break;
+		case SCTP_RANDOM:
+			if (padded_size > (sizeof(struct sctp_auth_random) + SCTP_RANDOM_MAX_SIZE)) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error random %d\n", plen);
+				goto invalid_size;
+			}
+			at += padded_size;
+			break;
+		case SCTP_SET_PRIM_ADDR:
+		case SCTP_DEL_IP_ADDRESS:
+		case SCTP_ADD_IP_ADDRESS:
+			if ((padded_size != sizeof(struct sctp_asconf_addrv4_param)) &&
+			    (padded_size != sizeof(struct sctp_asconf_addr_param))) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error setprim %d\n", plen);
+				goto invalid_size;
+			}
+			at += padded_size;
+			break;
+			/* Param's with a fixed size */
+		case SCTP_IPV4_ADDRESS:
+			if (padded_size != sizeof(struct sctp_ipv4addr_param)) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error ipv4 addr %d\n", plen);
+				goto invalid_size;
+			}
+			at += padded_size;
+			break;
+		case SCTP_IPV6_ADDRESS:
+			if (padded_size != sizeof(struct sctp_ipv6addr_param)) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error ipv6 addr %d\n", plen);
+				goto invalid_size;
+			}
+			at += padded_size;
+			break;
+		case SCTP_COOKIE_PRESERVE:
+			if (padded_size != sizeof(struct sctp_cookie_perserve_param)) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error cookie-preserve %d\n", plen);
+				goto invalid_size;
+			}
+			at += padded_size;
+			break;
+		case SCTP_ECN_NONCE_SUPPORTED:
+		case SCTP_PRSCTP_SUPPORTED:
+			if (padded_size != sizeof(struct sctp_paramhdr)) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error ecnnonce/prsctp %d\n", plen);
+				goto invalid_size;
+			}
+			at += padded_size;
+			break;
+		case SCTP_ECN_CAPABLE:
+			if (padded_size != sizeof(struct sctp_ecn_supported_param)) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error ecn %d\n", plen);
+				goto invalid_size;
+			}
+			at += padded_size;
+			break;
+		case SCTP_ULP_ADAPTATION:
+			if (padded_size != sizeof(struct sctp_adaptation_layer_indication)) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error adapatation %d\n", plen);
+				goto invalid_size;
+			}
+			at += padded_size;
+			break;
+		case SCTP_SUCCESS_REPORT:
+			if (padded_size != sizeof(struct sctp_asconf_paramhdr)) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error success %d\n", plen);
+				goto invalid_size;
+			}
+			at += padded_size;
+			break;
+		case SCTP_HOSTNAME_ADDRESS:
+			{
+				/* We can NOT handle HOST NAME addresses!! */
+				int l_len;
+
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Can't handle hostname addresses.. abort processing\n");
+				*abort_processing = 1;
+				if (op_err == NULL) {
+					/* Ok need to try to get a mbuf */
+					l_len = sizeof(struct ip6_hdr) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+					l_len += plen;
+					l_len += sizeof(struct sctp_paramhdr);
+					op_err = sctp_get_mbuf_for_msg(l_len, 0, M_DONTWAIT, 1, MT_DATA);
+					if (op_err) {
+						SCTP_BUF_LEN(op_err) = 0;
+						/*
+						 * pre-reserve space for ip
+						 * and sctp header  and
+						 * chunk hdr
+						 */
+						SCTP_BUF_RESV_UF(op_err, sizeof(struct ip6_hdr));
+						SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr));
+						SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
+					}
+				}
+				if (op_err) {
+					/* If we have space */
+					struct sctp_paramhdr s;
+
+					if (err_at % 4) {
+						uint32_t cpthis = 0;
+
+						pad_needed = 4 - (err_at % 4);
+						m_copyback(op_err, err_at, pad_needed, (caddr_t)&cpthis);
+						err_at += pad_needed;
+					}
+					s.param_type = htons(SCTP_CAUSE_UNRESOLVABLE_ADDR);
+					s.param_length = htons(sizeof(s) + plen);
+					m_copyback(op_err, err_at, sizeof(s), (caddr_t)&s);
+					err_at += sizeof(s);
+					phdr = sctp_get_next_param(mat, at, (struct sctp_paramhdr *)tempbuf, min(sizeof(tempbuf), plen));
+					if (phdr == NULL) {
+						sctp_m_freem(op_err);
+						/*
+						 * we are out of memory but
+						 * we still need to have a
+						 * look at what to do (the
+						 * system is in trouble
+						 * though).
+						 */
+						return (NULL);
+					}
+					m_copyback(op_err, err_at, plen, (caddr_t)phdr);
+					err_at += plen;
+				}
+				return (op_err);
+				break;
+			}
+		default:
+			/*
+			 * we do not recognize the parameter figure out what
+			 * we do.
+			 */
+			SCTPDBG(SCTP_DEBUG_OUTPUT1, "Hit default param %x\n", ptype);
+			if ((ptype & 0x4000) == 0x4000) {
+				/* Report bit is set?? */
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "report op err\n");
+				if (op_err == NULL) {
+					int l_len;
+
+					/* Ok need to try to get an mbuf */
+					l_len = sizeof(struct ip6_hdr) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+					l_len += plen;
+					l_len += sizeof(struct sctp_paramhdr);
+					op_err = sctp_get_mbuf_for_msg(l_len, 0, M_DONTWAIT, 1, MT_DATA);
+					if (op_err) {
+						SCTP_BUF_LEN(op_err) = 0;
+						SCTP_BUF_RESV_UF(op_err, sizeof(struct ip6_hdr));
+						SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr));
+						SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
+					}
+				}
+				if (op_err) {
+					/* If we have space */
+					struct sctp_paramhdr s;
+
+					if (err_at % 4) {
+						uint32_t cpthis = 0;
+
+						pad_needed = 4 - (err_at % 4);
+						m_copyback(op_err, err_at, pad_needed, (caddr_t)&cpthis);
+						err_at += pad_needed;
+					}
+					s.param_type = htons(SCTP_UNRECOG_PARAM);
+					s.param_length = htons(sizeof(s) + plen);
+					m_copyback(op_err, err_at, sizeof(s), (caddr_t)&s);
+					err_at += sizeof(s);
+					if (plen > sizeof(tempbuf)) {
+						plen = sizeof(tempbuf);
+					}
+					phdr = sctp_get_next_param(mat, at, (struct sctp_paramhdr *)tempbuf, min(sizeof(tempbuf), plen));
+					if (phdr == NULL) {
+						sctp_m_freem(op_err);
+						/*
+						 * we are out of memory but
+						 * we still need to have a
+						 * look at what to do (the
+						 * system is in trouble
+						 * though).
+						 */
+						op_err = NULL;
+						goto more_processing;
+					}
+					m_copyback(op_err, err_at, plen, (caddr_t)phdr);
+					err_at += plen;
+				}
+			}
+	more_processing:
+			if ((ptype & 0x8000) == 0x0000) {
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "stop proc\n");
+				return (op_err);
+			} else {
+				/* skip this chunk and continue processing */
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "move on\n");
+				at += SCTP_SIZE32(plen);
+			}
+			break;
+
+		}
+		phdr = sctp_get_next_param(mat, at, &params, sizeof(params));
+	}
+	return (op_err);
+invalid_size:
+	SCTPDBG(SCTP_DEBUG_OUTPUT1, "abort flag set\n");
+	*abort_processing = 1;
+	if ((op_err == NULL) && phdr) {
+		int l_len;
+
+		l_len = sizeof(struct ip6_hdr) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+		l_len += (2 * sizeof(struct sctp_paramhdr));
+		op_err = sctp_get_mbuf_for_msg(l_len, 0, M_DONTWAIT, 1, MT_DATA);
+		if (op_err) {
+			SCTP_BUF_LEN(op_err) = 0;
+			SCTP_BUF_RESV_UF(op_err, sizeof(struct ip6_hdr));
+			SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr));
+			SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
+		}
+	}
+	if ((op_err) && phdr) {
+		struct sctp_paramhdr s;
+
+		if (err_at % 4) {
+			uint32_t cpthis = 0;
+
+			pad_needed = 4 - (err_at % 4);
+			m_copyback(op_err, err_at, pad_needed, (caddr_t)&cpthis);
+			err_at += pad_needed;
+		}
+		s.param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+		s.param_length = htons(sizeof(s) + sizeof(struct sctp_paramhdr));
+		m_copyback(op_err, err_at, sizeof(s), (caddr_t)&s);
+		err_at += sizeof(s);
+		/* Only copy back the p-hdr that caused the issue */
+		m_copyback(op_err, err_at, sizeof(struct sctp_paramhdr), (caddr_t)phdr);
+	}
+	return (op_err);
+}
+
+static int
+sctp_are_there_new_addresses(struct sctp_association *asoc,
+    struct mbuf *in_initpkt, int iphlen, int offset)
+{
+	/*
+	 * Given a INIT packet, look through the packet to verify that there
+	 * are NO new addresses. As we go through the parameters add reports
+	 * of any un-understood parameters that require an error.  Also we
+	 * must return (1) to drop the packet if we see a un-understood
+	 * parameter that tells us to drop the chunk.
+	 */
+	struct sockaddr_in sin4, *sa4;
+	struct sockaddr_in6 sin6, *sa6;
+	struct sockaddr *sa_touse;
+	struct sockaddr *sa;
+	struct sctp_paramhdr *phdr, params;
+	struct ip *iph;
+	struct mbuf *mat;
+	uint16_t ptype, plen;
+	int err_at;
+	uint8_t fnd;
+	struct sctp_nets *net;
+
+	memset(&sin4, 0, sizeof(sin4));
+	memset(&sin6, 0, sizeof(sin6));
+	sin4.sin_family = AF_INET;
+	sin4.sin_len = sizeof(sin4);
+	sin6.sin6_family = AF_INET6;
+	sin6.sin6_len = sizeof(sin6);
+
+	sa_touse = NULL;
+	/* First what about the src address of the pkt ? */
+	iph = mtod(in_initpkt, struct ip *);
+	if (iph->ip_v == IPVERSION) {
+		/* source addr is IPv4 */
+		sin4.sin_addr = iph->ip_src;
+		sa_touse = (struct sockaddr *)&sin4;
+	} else if (iph->ip_v == (IPV6_VERSION >> 4)) {
+		/* source addr is IPv6 */
+		struct ip6_hdr *ip6h;
+
+		ip6h = mtod(in_initpkt, struct ip6_hdr *);
+		sin6.sin6_addr = ip6h->ip6_src;
+		sa_touse = (struct sockaddr *)&sin6;
+	} else {
+		return (1);
+	}
+
+	fnd = 0;
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		sa = (struct sockaddr *)&net->ro._l_addr;
+		if (sa->sa_family == sa_touse->sa_family) {
+			if (sa->sa_family == AF_INET) {
+				sa4 = (struct sockaddr_in *)sa;
+				if (sa4->sin_addr.s_addr ==
+				    sin4.sin_addr.s_addr) {
+					fnd = 1;
+					break;
+				}
+			} else if (sa->sa_family == AF_INET6) {
+				sa6 = (struct sockaddr_in6 *)sa;
+				if (SCTP6_ARE_ADDR_EQUAL(&sa6->sin6_addr,
+				    &sin6.sin6_addr)) {
+					fnd = 1;
+					break;
+				}
+			}
+		}
+	}
+	if (fnd == 0) {
+		/* New address added! no need to look futher. */
+		return (1);
+	}
+	/* Ok so far lets munge through the rest of the packet */
+	mat = in_initpkt;
+	err_at = 0;
+	sa_touse = NULL;
+	offset += sizeof(struct sctp_init_chunk);
+	phdr = sctp_get_next_param(mat, offset, &params, sizeof(params));
+	while (phdr) {
+		ptype = ntohs(phdr->param_type);
+		plen = ntohs(phdr->param_length);
+		if (ptype == SCTP_IPV4_ADDRESS) {
+			struct sctp_ipv4addr_param *p4, p4_buf;
+
+			phdr = sctp_get_next_param(mat, offset,
+			    (struct sctp_paramhdr *)&p4_buf, sizeof(p4_buf));
+			if (plen != sizeof(struct sctp_ipv4addr_param) ||
+			    phdr == NULL) {
+				return (1);
+			}
+			p4 = (struct sctp_ipv4addr_param *)phdr;
+			sin4.sin_addr.s_addr = p4->addr;
+			sa_touse = (struct sockaddr *)&sin4;
+		} else if (ptype == SCTP_IPV6_ADDRESS) {
+			struct sctp_ipv6addr_param *p6, p6_buf;
+
+			phdr = sctp_get_next_param(mat, offset,
+			    (struct sctp_paramhdr *)&p6_buf, sizeof(p6_buf));
+			if (plen != sizeof(struct sctp_ipv6addr_param) ||
+			    phdr == NULL) {
+				return (1);
+			}
+			p6 = (struct sctp_ipv6addr_param *)phdr;
+			memcpy((caddr_t)&sin6.sin6_addr, p6->addr,
+			    sizeof(p6->addr));
+			sa_touse = (struct sockaddr *)&sin4;
+		}
+		if (sa_touse) {
+			/* ok, sa_touse points to one to check */
+			fnd = 0;
+			TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+				sa = (struct sockaddr *)&net->ro._l_addr;
+				if (sa->sa_family != sa_touse->sa_family) {
+					continue;
+				}
+				if (sa->sa_family == AF_INET) {
+					sa4 = (struct sockaddr_in *)sa;
+					if (sa4->sin_addr.s_addr ==
+					    sin4.sin_addr.s_addr) {
+						fnd = 1;
+						break;
+					}
+				} else if (sa->sa_family == AF_INET6) {
+					sa6 = (struct sockaddr_in6 *)sa;
+					if (SCTP6_ARE_ADDR_EQUAL(
+					    &sa6->sin6_addr, &sin6.sin6_addr)) {
+						fnd = 1;
+						break;
+					}
+				}
+			}
+			if (!fnd) {
+				/* New addr added! no need to look further */
+				return (1);
+			}
+		}
+		offset += SCTP_SIZE32(plen);
+		phdr = sctp_get_next_param(mat, offset, &params, sizeof(params));
+	}
+	return (0);
+}
+
+/*
+ * Given a MBUF chain that was sent into us containing an INIT. Build a
+ * INIT-ACK with COOKIE and send back. We assume that the in_initpkt has done
+ * a pullup to include IPv6/4header, SCTP header and initial part of INIT
+ * message (i.e. the struct sctp_init_msg).
+ */
+void
+sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct mbuf *init_pkt, int iphlen, int offset, struct sctphdr *sh,
+    struct sctp_init_chunk *init_chk, uint32_t vrf_id, int hold_inp_lock)
+{
+	struct sctp_association *asoc;
+	struct mbuf *m, *m_at, *m_tmp, *m_cookie, *op_err, *mp_last;
+	struct sctp_init_msg *initackm_out;
+	struct sctp_ecn_supported_param *ecn;
+	struct sctp_prsctp_supported_param *prsctp;
+	struct sctp_ecn_nonce_supported_param *ecn_nonce;
+	struct sctp_supported_chunk_types_param *pr_supported;
+	struct sockaddr_storage store;
+	struct sockaddr_in *sin;
+	struct sockaddr_in6 *sin6;
+	sctp_route_t *ro;
+	struct ip *iph;
+	struct ip6_hdr *ip6;
+	struct sockaddr *to;
+	struct sctp_state_cookie stc;
+	struct sctp_nets *net = NULL;
+	uint8_t *signature = NULL;
+	int cnt_inits_to = 0;
+	uint16_t his_limit, i_want;
+	int abort_flag, padval;
+	int num_ext;
+	int p_len;
+	struct socket *so;
+
+	if (stcb)
+		asoc = &stcb->asoc;
+	else
+		asoc = NULL;
+	mp_last = NULL;
+	if ((asoc != NULL) &&
+	    (SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_WAIT) &&
+	    (sctp_are_there_new_addresses(asoc, init_pkt, iphlen, offset))) {
+		/* new addresses, out of here in non-cookie-wait states */
+		/*
+		 * Send a ABORT, we don't add the new address error clause
+		 * though we even set the T bit and copy in the 0 tag.. this
+		 * looks no different than if no listener was present.
+		 */
+		sctp_send_abort(init_pkt, iphlen, sh, 0, NULL, vrf_id);
+		return;
+	}
+	abort_flag = 0;
+	op_err = sctp_arethere_unrecognized_parameters(init_pkt,
+	    (offset + sizeof(struct sctp_init_chunk)),
+	    &abort_flag, (struct sctp_chunkhdr *)init_chk);
+	if (abort_flag) {
+		sctp_send_abort(init_pkt, iphlen, sh,
+		    init_chk->init.initiate_tag, op_err, vrf_id);
+		return;
+	}
+	m = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+	if (m == NULL) {
+		/* No memory, INIT timer will re-attempt. */
+		if (op_err)
+			sctp_m_freem(op_err);
+		return;
+	}
+	SCTP_BUF_LEN(m) = sizeof(struct sctp_init_msg);
+
+	/* the time I built cookie */
+	(void)SCTP_GETTIME_TIMEVAL(&stc.time_entered);
+
+	/* populate any tie tags */
+	if (asoc != NULL) {
+		/* unlock before tag selections */
+		stc.tie_tag_my_vtag = asoc->my_vtag_nonce;
+		stc.tie_tag_peer_vtag = asoc->peer_vtag_nonce;
+		stc.cookie_life = asoc->cookie_life;
+		net = asoc->primary_destination;
+	} else {
+		stc.tie_tag_my_vtag = 0;
+		stc.tie_tag_peer_vtag = 0;
+		/* life I will award this cookie */
+		stc.cookie_life = inp->sctp_ep.def_cookie_life;
+	}
+
+	/* copy in the ports for later check */
+	stc.myport = sh->dest_port;
+	stc.peerport = sh->src_port;
+
+	/*
+	 * If we wanted to honor cookie life extentions, we would add to
+	 * stc.cookie_life. For now we should NOT honor any extension
+	 */
+	stc.site_scope = stc.local_scope = stc.loopback_scope = 0;
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+		struct inpcb *in_inp;
+
+		/* Its a V6 socket */
+		in_inp = (struct inpcb *)inp;
+		stc.ipv6_addr_legal = 1;
+		/* Now look at the binding flag to see if V4 will be legal */
+		if (SCTP_IPV6_V6ONLY(in_inp) == 0) {
+			stc.ipv4_addr_legal = 1;
+		} else {
+			/* V4 addresses are NOT legal on the association */
+			stc.ipv4_addr_legal = 0;
+		}
+	} else {
+		/* Its a V4 socket, no - V6 */
+		stc.ipv4_addr_legal = 1;
+		stc.ipv6_addr_legal = 0;
+	}
+
+#ifdef SCTP_DONT_DO_PRIVADDR_SCOPE
+	stc.ipv4_scope = 1;
+#else
+	stc.ipv4_scope = 0;
+#endif
+	/* now for scope setup */
+	memset((caddr_t)&store, 0, sizeof(store));
+	sin = (struct sockaddr_in *)&store;
+	sin6 = (struct sockaddr_in6 *)&store;
+	if (net == NULL) {
+		to = (struct sockaddr *)&store;
+		iph = mtod(init_pkt, struct ip *);
+		if (iph->ip_v == IPVERSION) {
+			struct sctp_ifa *addr;
+			sctp_route_t iproute;
+
+			sin->sin_family = AF_INET;
+			sin->sin_len = sizeof(struct sockaddr_in);
+			sin->sin_port = sh->src_port;
+			sin->sin_addr = iph->ip_src;
+			/* lookup address */
+			stc.address[0] = sin->sin_addr.s_addr;
+			stc.address[1] = 0;
+			stc.address[2] = 0;
+			stc.address[3] = 0;
+			stc.addr_type = SCTP_IPV4_ADDRESS;
+			/* local from address */
+			memset(&iproute, 0, sizeof(iproute));
+			ro = &iproute;
+			memcpy(&ro->ro_dst, sin, sizeof(*sin));
+			addr = sctp_source_address_selection(inp, NULL,
+			    ro, NULL, 0,
+			    vrf_id);
+			if (addr == NULL)
+				return;
+
+			if (ro->ro_rt) {
+				RTFREE(ro->ro_rt);
+				ro->ro_rt = NULL;
+			}
+			stc.laddress[0] = addr->address.sin.sin_addr.s_addr;
+			stc.laddress[1] = 0;
+			stc.laddress[2] = 0;
+			stc.laddress[3] = 0;
+			stc.laddr_type = SCTP_IPV4_ADDRESS;
+			/* scope_id is only for v6 */
+			stc.scope_id = 0;
+#ifndef SCTP_DONT_DO_PRIVADDR_SCOPE
+			if (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
+				stc.ipv4_scope = 1;
+			}
+#else
+			stc.ipv4_scope = 1;
+#endif				/* SCTP_DONT_DO_PRIVADDR_SCOPE */
+			/* Must use the address in this case */
+			if (sctp_is_address_on_local_host((struct sockaddr *)sin, vrf_id)) {
+				stc.loopback_scope = 1;
+				stc.ipv4_scope = 1;
+				stc.site_scope = 1;
+				stc.local_scope = 0;
+			}
+			sctp_free_ifa(addr);
+		} else if (iph->ip_v == (IPV6_VERSION >> 4)) {
+			struct sctp_ifa *addr;
+			struct route_in6 iproute6;
+
+			ip6 = mtod(init_pkt, struct ip6_hdr *);
+			sin6->sin6_family = AF_INET6;
+			sin6->sin6_len = sizeof(struct sockaddr_in6);
+			sin6->sin6_port = sh->src_port;
+			sin6->sin6_addr = ip6->ip6_src;
+			/* lookup address */
+			memcpy(&stc.address, &sin6->sin6_addr,
+			    sizeof(struct in6_addr));
+			sin6->sin6_scope_id = 0;
+			stc.addr_type = SCTP_IPV6_ADDRESS;
+			stc.scope_id = 0;
+			if (sctp_is_address_on_local_host((struct sockaddr *)sin6, vrf_id)) {
+				stc.loopback_scope = 1;
+				stc.local_scope = 0;
+				stc.site_scope = 1;
+				stc.ipv4_scope = 1;
+			} else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+				/*
+				 * If the new destination is a LINK_LOCAL we
+				 * must have common both site and local
+				 * scope. Don't set local scope though since
+				 * we must depend on the source to be added
+				 * implicitly. We cannot assure just because
+				 * we share one link that all links are
+				 * common.
+				 */
+				stc.local_scope = 0;
+				stc.site_scope = 1;
+				stc.ipv4_scope = 1;
+				/*
+				 * we start counting for the private address
+				 * stuff at 1. since the link local we
+				 * source from won't show up in our scoped
+				 * count.
+				 */
+				cnt_inits_to = 1;
+				/* pull out the scope_id from incoming pkt */
+				/* FIX ME: does this have scope from rcvif? */
+				(void)sa6_recoverscope(sin6);
+
+				sa6_embedscope(sin6, ip6_use_defzone);
+				stc.scope_id = sin6->sin6_scope_id;
+			} else if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) {
+				/*
+				 * If the new destination is SITE_LOCAL then
+				 * we must have site scope in common.
+				 */
+				stc.site_scope = 1;
+			}
+			/* local from address */
+			memset(&iproute6, 0, sizeof(iproute6));
+			ro = (sctp_route_t *) & iproute6;
+			memcpy(&ro->ro_dst, sin6, sizeof(*sin6));
+			addr = sctp_source_address_selection(inp, NULL,
+			    ro, NULL, 0, vrf_id);
+			if (addr == NULL)
+				return;
+
+			if (ro->ro_rt) {
+				RTFREE(ro->ro_rt);
+				ro->ro_rt = NULL;
+			}
+			memcpy(&stc.laddress, &addr->address.sin6.sin6_addr, sizeof(struct in6_addr));
+			stc.laddr_type = SCTP_IPV6_ADDRESS;
+			sctp_free_ifa(addr);
+		}
+	} else {
+		/* set the scope per the existing tcb */
+		struct sctp_nets *lnet;
+
+		stc.loopback_scope = asoc->loopback_scope;
+		stc.ipv4_scope = asoc->ipv4_local_scope;
+		stc.site_scope = asoc->site_scope;
+		stc.local_scope = asoc->local_scope;
+		TAILQ_FOREACH(lnet, &asoc->nets, sctp_next) {
+			if (lnet->ro._l_addr.sin6.sin6_family == AF_INET6) {
+				if (IN6_IS_ADDR_LINKLOCAL(&lnet->ro._l_addr.sin6.sin6_addr)) {
+					/*
+					 * if we have a LL address, start
+					 * counting at 1.
+					 */
+					cnt_inits_to = 1;
+				}
+			}
+		}
+
+		/* use the net pointer */
+		to = (struct sockaddr *)&net->ro._l_addr;
+		if (to->sa_family == AF_INET) {
+			sin = (struct sockaddr_in *)to;
+			stc.address[0] = sin->sin_addr.s_addr;
+			stc.address[1] = 0;
+			stc.address[2] = 0;
+			stc.address[3] = 0;
+			stc.addr_type = SCTP_IPV4_ADDRESS;
+			if (net->src_addr_selected == 0) {
+				/*
+				 * strange case here, the INIT should have
+				 * did the selection.
+				 */
+				net->ro._s_addr = sctp_source_address_selection(inp,
+				    stcb, (sctp_route_t *) & net->ro,
+				    net, 0, vrf_id);
+				if (net->ro._s_addr == NULL)
+					return;
+
+				net->src_addr_selected = 1;
+
+			}
+			stc.laddress[0] = net->ro._s_addr->address.sin.sin_addr.s_addr;
+			stc.laddress[1] = 0;
+			stc.laddress[2] = 0;
+			stc.laddress[3] = 0;
+			stc.laddr_type = SCTP_IPV4_ADDRESS;
+		} else if (to->sa_family == AF_INET6) {
+			sin6 = (struct sockaddr_in6 *)to;
+			memcpy(&stc.address, &sin6->sin6_addr,
+			    sizeof(struct in6_addr));
+			stc.addr_type = SCTP_IPV6_ADDRESS;
+			if (net->src_addr_selected == 0) {
+				/*
+				 * strange case here, the INIT should have
+				 * did the selection.
+				 */
+				net->ro._s_addr = sctp_source_address_selection(inp,
+				    stcb, (sctp_route_t *) & net->ro,
+				    net, 0, vrf_id);
+				if (net->ro._s_addr == NULL)
+					return;
+
+				net->src_addr_selected = 1;
+			}
+			memcpy(&stc.laddress, &net->ro._s_addr->address.sin6.sin6_addr,
+			    sizeof(struct in6_addr));
+			stc.laddr_type = SCTP_IPV6_ADDRESS;
+		}
+	}
+	/* Now lets put the SCTP header in place */
+	initackm_out = mtod(m, struct sctp_init_msg *);
+	initackm_out->sh.src_port = inp->sctp_lport;
+	initackm_out->sh.dest_port = sh->src_port;
+	initackm_out->sh.v_tag = init_chk->init.initiate_tag;
+	/* Save it off for quick ref */
+	stc.peers_vtag = init_chk->init.initiate_tag;
+	initackm_out->sh.checksum = 0;	/* calculate later */
+	/* who are we */
+	memcpy(stc.identification, SCTP_VERSION_STRING,
+	    min(strlen(SCTP_VERSION_STRING), sizeof(stc.identification)));
+	/* now the chunk header */
+	initackm_out->msg.ch.chunk_type = SCTP_INITIATION_ACK;
+	initackm_out->msg.ch.chunk_flags = 0;
+	/* fill in later from mbuf we build */
+	initackm_out->msg.ch.chunk_length = 0;
+	/* place in my tag */
+	if ((asoc != NULL) &&
+	    ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
+	    (SCTP_GET_STATE(asoc) == SCTP_STATE_INUSE) ||
+	    (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED))) {
+		/* re-use the v-tags and init-seq here */
+		initackm_out->msg.init.initiate_tag = htonl(asoc->my_vtag);
+		initackm_out->msg.init.initial_tsn = htonl(asoc->init_seq_number);
+	} else {
+		uint32_t vtag, itsn;
+
+		if (hold_inp_lock) {
+			SCTP_INP_INCR_REF(inp);
+			SCTP_INP_RUNLOCK(inp);
+		}
+		if (asoc) {
+			atomic_add_int(&asoc->refcnt, 1);
+			SCTP_TCB_UNLOCK(stcb);
+			vtag = sctp_select_a_tag(inp, 1);
+			initackm_out->msg.init.initiate_tag = htonl(vtag);
+			/* get a TSN to use too */
+			itsn = sctp_select_initial_TSN(&inp->sctp_ep);
+			initackm_out->msg.init.initial_tsn = htonl(itsn);
+			SCTP_TCB_LOCK(stcb);
+			atomic_add_int(&asoc->refcnt, -1);
+		} else {
+			vtag = sctp_select_a_tag(inp, 1);
+			initackm_out->msg.init.initiate_tag = htonl(vtag);
+			/* get a TSN to use too */
+			initackm_out->msg.init.initial_tsn = htonl(sctp_select_initial_TSN(&inp->sctp_ep));
+		}
+		if (hold_inp_lock) {
+			SCTP_INP_RLOCK(inp);
+			SCTP_INP_DECR_REF(inp);
+		}
+	}
+	/* save away my tag to */
+	stc.my_vtag = initackm_out->msg.init.initiate_tag;
+
+	/* set up some of the credits. */
+	so = inp->sctp_socket;
+	if (so == NULL) {
+		/* memory problem */
+		sctp_m_freem(m);
+		return;
+	} else {
+		initackm_out->msg.init.a_rwnd = htonl(max(SCTP_SB_LIMIT_RCV(so), SCTP_MINIMAL_RWND));
+	}
+	/* set what I want */
+	his_limit = ntohs(init_chk->init.num_inbound_streams);
+	/* choose what I want */
+	if (asoc != NULL) {
+		if (asoc->streamoutcnt > inp->sctp_ep.pre_open_stream_count) {
+			i_want = asoc->streamoutcnt;
+		} else {
+			i_want = inp->sctp_ep.pre_open_stream_count;
+		}
+	} else {
+		i_want = inp->sctp_ep.pre_open_stream_count;
+	}
+	if (his_limit < i_want) {
+		/* I Want more :< */
+		initackm_out->msg.init.num_outbound_streams = init_chk->init.num_inbound_streams;
+	} else {
+		/* I can have what I want :> */
+		initackm_out->msg.init.num_outbound_streams = htons(i_want);
+	}
+	/* tell him his limt. */
+	initackm_out->msg.init.num_inbound_streams =
+	    htons(inp->sctp_ep.max_open_streams_intome);
+	/* setup the ECN pointer */
+
+	if (inp->sctp_ep.adaptation_layer_indicator) {
+		struct sctp_adaptation_layer_indication *ali;
+
+		ali = (struct sctp_adaptation_layer_indication *)(
+		    (caddr_t)initackm_out + sizeof(*initackm_out));
+		ali->ph.param_type = htons(SCTP_ULP_ADAPTATION);
+		ali->ph.param_length = htons(sizeof(*ali));
+		ali->indication = ntohl(inp->sctp_ep.adaptation_layer_indicator);
+		SCTP_BUF_LEN(m) += sizeof(*ali);
+		ecn = (struct sctp_ecn_supported_param *)((caddr_t)ali +
+		    sizeof(*ali));
+	} else {
+		ecn = (struct sctp_ecn_supported_param *)(
+		    (caddr_t)initackm_out + sizeof(*initackm_out));
+	}
+
+	/* ECN parameter */
+	if (sctp_ecn_enable == 1) {
+		ecn->ph.param_type = htons(SCTP_ECN_CAPABLE);
+		ecn->ph.param_length = htons(sizeof(*ecn));
+		SCTP_BUF_LEN(m) += sizeof(*ecn);
+
+		prsctp = (struct sctp_prsctp_supported_param *)((caddr_t)ecn +
+		    sizeof(*ecn));
+	} else {
+		prsctp = (struct sctp_prsctp_supported_param *)((caddr_t)ecn);
+	}
+	/* And now tell the peer we do  pr-sctp */
+	prsctp->ph.param_type = htons(SCTP_PRSCTP_SUPPORTED);
+	prsctp->ph.param_length = htons(sizeof(*prsctp));
+	SCTP_BUF_LEN(m) += sizeof(*prsctp);
+
+	/* And now tell the peer we do all the extensions */
+	pr_supported = (struct sctp_supported_chunk_types_param *)
+	    ((caddr_t)prsctp + sizeof(*prsctp));
+
+	pr_supported->ph.param_type = htons(SCTP_SUPPORTED_CHUNK_EXT);
+	num_ext = 0;
+	pr_supported->chunk_types[num_ext++] = SCTP_ASCONF;
+	pr_supported->chunk_types[num_ext++] = SCTP_ASCONF_ACK;
+	pr_supported->chunk_types[num_ext++] = SCTP_FORWARD_CUM_TSN;
+	pr_supported->chunk_types[num_ext++] = SCTP_PACKET_DROPPED;
+	pr_supported->chunk_types[num_ext++] = SCTP_STREAM_RESET;
+	if (!sctp_auth_disable)
+		pr_supported->chunk_types[num_ext++] = SCTP_AUTHENTICATION;
+	p_len = sizeof(*pr_supported) + num_ext;
+	pr_supported->ph.param_length = htons(p_len);
+	bzero((caddr_t)pr_supported + p_len, SCTP_SIZE32(p_len) - p_len);
+	SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+
+	/* ECN nonce: And now tell the peer we support ECN nonce */
+	if (sctp_ecn_nonce) {
+		ecn_nonce = (struct sctp_ecn_nonce_supported_param *)
+		    ((caddr_t)pr_supported + SCTP_SIZE32(p_len));
+		ecn_nonce->ph.param_type = htons(SCTP_ECN_NONCE_SUPPORTED);
+		ecn_nonce->ph.param_length = htons(sizeof(*ecn_nonce));
+		SCTP_BUF_LEN(m) += sizeof(*ecn_nonce);
+	}
+	/* add authentication parameters */
+	if (!sctp_auth_disable) {
+		struct sctp_auth_random *randp;
+		struct sctp_auth_hmac_algo *hmacs;
+		struct sctp_auth_chunk_list *chunks;
+		uint16_t random_len;
+
+		/* generate and add RANDOM parameter */
+		random_len = SCTP_AUTH_RANDOM_SIZE_DEFAULT;
+		randp = (struct sctp_auth_random *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+		randp->ph.param_type = htons(SCTP_RANDOM);
+		p_len = sizeof(*randp) + random_len;
+		randp->ph.param_length = htons(p_len);
+		SCTP_READ_RANDOM(randp->random_data, random_len);
+		/* zero out any padding required */
+		bzero((caddr_t)randp + p_len, SCTP_SIZE32(p_len) - p_len);
+		SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+
+		/* add HMAC_ALGO parameter */
+		hmacs = (struct sctp_auth_hmac_algo *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+		p_len = sctp_serialize_hmaclist(inp->sctp_ep.local_hmacs,
+		    (uint8_t *) hmacs->hmac_ids);
+		if (p_len > 0) {
+			p_len += sizeof(*hmacs);
+			hmacs->ph.param_type = htons(SCTP_HMAC_LIST);
+			hmacs->ph.param_length = htons(p_len);
+			/* zero out any padding required */
+			bzero((caddr_t)hmacs + p_len, SCTP_SIZE32(p_len) - p_len);
+			SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+		}
+		/* add CHUNKS parameter */
+		chunks = (struct sctp_auth_chunk_list *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+		p_len = sctp_serialize_auth_chunks(inp->sctp_ep.local_auth_chunks,
+		    chunks->chunk_types);
+		if (p_len > 0) {
+			p_len += sizeof(*chunks);
+			chunks->ph.param_type = htons(SCTP_CHUNK_LIST);
+			chunks->ph.param_length = htons(p_len);
+			/* zero out any padding required */
+			bzero((caddr_t)chunks + p_len, SCTP_SIZE32(p_len) - p_len);
+			SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+		}
+	}
+	m_at = m;
+	/* now the addresses */
+	{
+		struct sctp_scoping scp;
+
+		/*
+		 * To optimize this we could put the scoping stuff into a
+		 * structure and remove the individual uint8's from the stc
+		 * structure. Then we could just sifa in the address within
+		 * the stc.. but for now this is a quick hack to get the
+		 * address stuff teased apart.
+		 */
+		scp.ipv4_addr_legal = stc.ipv4_addr_legal;
+		scp.ipv6_addr_legal = stc.ipv6_addr_legal;
+		scp.loopback_scope = stc.loopback_scope;
+		scp.ipv4_local_scope = stc.ipv4_scope;
+		scp.local_scope = stc.local_scope;
+		scp.site_scope = stc.site_scope;
+		m_at = sctp_add_addresses_to_i_ia(inp, &scp, m_at, cnt_inits_to);
+	}
+
+	/* tack on the operational error if present */
+	if (op_err) {
+		struct mbuf *ol;
+		int llen;
+
+		llen = 0;
+		ol = op_err;
+		while (ol) {
+			llen += SCTP_BUF_LEN(ol);
+			ol = SCTP_BUF_NEXT(ol);
+		}
+		if (llen % 4) {
+			/* must add a pad to the param */
+			uint32_t cpthis = 0;
+			int padlen;
+
+			padlen = 4 - (llen % 4);
+			m_copyback(op_err, llen, padlen, (caddr_t)&cpthis);
+		}
+		while (SCTP_BUF_NEXT(m_at) != NULL) {
+			m_at = SCTP_BUF_NEXT(m_at);
+		}
+		SCTP_BUF_NEXT(m_at) = op_err;
+		while (SCTP_BUF_NEXT(m_at) != NULL) {
+			m_at = SCTP_BUF_NEXT(m_at);
+		}
+	}
+	/* pre-calulate the size and update pkt header and chunk header */
+	p_len = 0;
+	for (m_tmp = m; m_tmp; m_tmp = SCTP_BUF_NEXT(m_tmp)) {
+		p_len += SCTP_BUF_LEN(m_tmp);
+		if (SCTP_BUF_NEXT(m_tmp) == NULL) {
+			/* m_tmp should now point to last one */
+			break;
+		}
+	}
+
+	/* Now we must build a cookie */
+	m_cookie = sctp_add_cookie(inp, init_pkt, offset, m,
+	    sizeof(struct sctphdr), &stc, &signature);
+	if (m_cookie == NULL) {
+		/* memory problem */
+		sctp_m_freem(m);
+		return;
+	}
+	/* Now append the cookie to the end and update the space/size */
+	SCTP_BUF_NEXT(m_tmp) = m_cookie;
+
+	for (m_tmp = m_cookie; m_tmp; m_tmp = SCTP_BUF_NEXT(m_tmp)) {
+		p_len += SCTP_BUF_LEN(m_tmp);
+		if (SCTP_BUF_NEXT(m_tmp) == NULL) {
+			/* m_tmp should now point to last one */
+			mp_last = m_tmp;
+			break;
+		}
+	}
+	/*
+	 * Place in the size, but we don't include the last pad (if any) in
+	 * the INIT-ACK.
+	 */
+	initackm_out->msg.ch.chunk_length = htons((p_len - sizeof(struct sctphdr)));
+
+	/*
+	 * Time to sign the cookie, we don't sign over the cookie signature
+	 * though thus we set trailer.
+	 */
+	(void)sctp_hmac_m(SCTP_HMAC,
+	    (uint8_t *) inp->sctp_ep.secret_key[(int)(inp->sctp_ep.current_secret_number)],
+	    SCTP_SECRET_SIZE, m_cookie, sizeof(struct sctp_paramhdr),
+	    (uint8_t *) signature, SCTP_SIGNATURE_SIZE);
+	/*
+	 * We sifa 0 here to NOT set IP_DF if its IPv4, we ignore the return
+	 * here since the timer will drive a retranmission.
+	 */
+	padval = p_len % 4;
+	if ((padval) && (mp_last)) {
+		/* see my previous comments on mp_last */
+		int ret;
+
+		ret = sctp_add_pad_tombuf(mp_last, (4 - padval));
+		if (ret) {
+			/* Houston we have a problem, no space */
+			sctp_m_freem(m);
+			return;
+		}
+		p_len += padval;
+	}
+	(void)sctp_lowlevel_chunk_output(inp, NULL, NULL, to, m, 0, NULL, 0, 0,
+	    NULL, 0, SCTP_SO_NOT_LOCKED);
+	SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+}
+
+
+void
+sctp_insert_on_wheel(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    struct sctp_stream_out *strq, int holds_lock)
+{
+	struct sctp_stream_out *stre, *strn;
+
+	if (holds_lock == 0) {
+		SCTP_TCB_SEND_LOCK(stcb);
+	}
+	if ((strq->next_spoke.tqe_next) ||
+	    (strq->next_spoke.tqe_prev)) {
+		/* already on wheel */
+		goto outof_here;
+	}
+	stre = TAILQ_FIRST(&asoc->out_wheel);
+	if (stre == NULL) {
+		/* only one on wheel */
+		TAILQ_INSERT_HEAD(&asoc->out_wheel, strq, next_spoke);
+		goto outof_here;
+	}
+	for (; stre; stre = strn) {
+		strn = TAILQ_NEXT(stre, next_spoke);
+		if (stre->stream_no > strq->stream_no) {
+			TAILQ_INSERT_BEFORE(stre, strq, next_spoke);
+			goto outof_here;
+		} else if (stre->stream_no == strq->stream_no) {
+			/* huh, should not happen */
+			goto outof_here;
+		} else if (strn == NULL) {
+			/* next one is null */
+			TAILQ_INSERT_AFTER(&asoc->out_wheel, stre, strq,
+			    next_spoke);
+		}
+	}
+outof_here:
+	if (holds_lock == 0) {
+		SCTP_TCB_SEND_UNLOCK(stcb);
+	}
+}
+
+static void
+sctp_remove_from_wheel(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    struct sctp_stream_out *strq)
+{
+	/* take off and then setup so we know it is not on the wheel */
+	SCTP_TCB_SEND_LOCK(stcb);
+	if (TAILQ_FIRST(&strq->outqueue)) {
+		/* more was added */
+		SCTP_TCB_SEND_UNLOCK(stcb);
+		return;
+	}
+	TAILQ_REMOVE(&asoc->out_wheel, strq, next_spoke);
+	strq->next_spoke.tqe_next = NULL;
+	strq->next_spoke.tqe_prev = NULL;
+	SCTP_TCB_SEND_UNLOCK(stcb);
+}
+
+static void
+sctp_prune_prsctp(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    struct sctp_sndrcvinfo *srcv,
+    int dataout)
+{
+	int freed_spc = 0;
+	struct sctp_tmit_chunk *chk, *nchk;
+
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	if ((asoc->peer_supports_prsctp) &&
+	    (asoc->sent_queue_cnt_removeable > 0)) {
+		TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
+			/*
+			 * Look for chunks marked with the PR_SCTP flag AND
+			 * the buffer space flag. If the one being sent is
+			 * equal or greater priority then purge the old one
+			 * and free some space.
+			 */
+			if (PR_SCTP_BUF_ENABLED(chk->flags)) {
+				/*
+				 * This one is PR-SCTP AND buffer space
+				 * limited type
+				 */
+				if (chk->rec.data.timetodrop.tv_sec >= (long)srcv->sinfo_timetolive) {
+					/*
+					 * Lower numbers equates to higher
+					 * priority so if the one we are
+					 * looking at has a larger or equal
+					 * priority we want to drop the data
+					 * and NOT retransmit it.
+					 */
+					if (chk->data) {
+						/*
+						 * We release the book_size
+						 * if the mbuf is here
+						 */
+						int ret_spc;
+						int cause;
+
+						if (chk->sent > SCTP_DATAGRAM_UNSENT)
+							cause = SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT;
+						else
+							cause = SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_UNSENT;
+						ret_spc = sctp_release_pr_sctp_chunk(stcb, chk,
+						    cause,
+						    &asoc->sent_queue, SCTP_SO_LOCKED);
+						freed_spc += ret_spc;
+						if (freed_spc >= dataout) {
+							return;
+						}
+					}	/* if chunk was present */
+				}	/* if of sufficent priority */
+			}	/* if chunk has enabled */
+		}		/* tailqforeach */
+
+		chk = TAILQ_FIRST(&asoc->send_queue);
+		while (chk) {
+			nchk = TAILQ_NEXT(chk, sctp_next);
+			/* Here we must move to the sent queue and mark */
+			if (PR_SCTP_TTL_ENABLED(chk->flags)) {
+				if (chk->rec.data.timetodrop.tv_sec >= (long)srcv->sinfo_timetolive) {
+					if (chk->data) {
+						/*
+						 * We release the book_size
+						 * if the mbuf is here
+						 */
+						int ret_spc;
+
+						ret_spc = sctp_release_pr_sctp_chunk(stcb, chk,
+						    SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_UNSENT,
+						    &asoc->send_queue, SCTP_SO_LOCKED);
+
+						freed_spc += ret_spc;
+						if (freed_spc >= dataout) {
+							return;
+						}
+					}	/* end if chk->data */
+				}	/* end if right class */
+			}	/* end if chk pr-sctp */
+			chk = nchk;
+		}		/* end while (chk) */
+	}			/* if enabled in asoc */
+}
+
+int
+sctp_get_frag_point(struct sctp_tcb *stcb,
+    struct sctp_association *asoc)
+{
+	int siz, ovh;
+
+	/*
+	 * For endpoints that have both v6 and v4 addresses we must reserve
+	 * room for the ipv6 header, for those that are only dealing with V4
+	 * we use a larger frag point.
+	 */
+	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+		ovh = SCTP_MED_OVERHEAD;
+	} else {
+		ovh = SCTP_MED_V4_OVERHEAD;
+	}
+
+	if (stcb->asoc.sctp_frag_point > asoc->smallest_mtu)
+		siz = asoc->smallest_mtu - ovh;
+	else
+		siz = (stcb->asoc.sctp_frag_point - ovh);
+	/*
+	 * if (siz > (MCLBYTES-sizeof(struct sctp_data_chunk))) {
+	 */
+	/* A data chunk MUST fit in a cluster */
+	/* siz = (MCLBYTES - sizeof(struct sctp_data_chunk)); */
+	/* } */
+
+	/* adjust for an AUTH chunk if DATA requires auth */
+	if (sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.peer_auth_chunks))
+		siz -= sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
+
+	if (siz % 4) {
+		/* make it an even word boundary please */
+		siz -= (siz % 4);
+	}
+	return (siz);
+}
+
+static void
+sctp_set_prsctp_policy(struct sctp_tcb *stcb,
+    struct sctp_stream_queue_pending *sp)
+{
+	sp->pr_sctp_on = 0;
+	if (stcb->asoc.peer_supports_prsctp) {
+		/*
+		 * We assume that the user wants PR_SCTP_TTL if the user
+		 * provides a positive lifetime but does not specify any
+		 * PR_SCTP policy. This is a BAD assumption and causes
+		 * problems at least with the U-Vancovers MPI folks. I will
+		 * change this to be no policy means NO PR-SCTP.
+		 */
+		if (PR_SCTP_ENABLED(sp->sinfo_flags)) {
+			sp->act_flags |= PR_SCTP_POLICY(sp->sinfo_flags);
+			sp->pr_sctp_on = 1;
+		} else {
+			return;
+		}
+		switch (PR_SCTP_POLICY(sp->sinfo_flags)) {
+		case CHUNK_FLAGS_PR_SCTP_BUF:
+			/*
+			 * Time to live is a priority stored in tv_sec when
+			 * doing the buffer drop thing.
+			 */
+			sp->ts.tv_sec = sp->timetolive;
+			sp->ts.tv_usec = 0;
+			break;
+		case CHUNK_FLAGS_PR_SCTP_TTL:
+			{
+				struct timeval tv;
+
+				(void)SCTP_GETTIME_TIMEVAL(&sp->ts);
+				tv.tv_sec = sp->timetolive / 1000;
+				tv.tv_usec = (sp->timetolive * 1000) % 1000000;
+				timevaladd(&sp->ts, &tv);
+			}
+			break;
+		case CHUNK_FLAGS_PR_SCTP_RTX:
+			/*
+			 * Time to live is a the number or retransmissions
+			 * stored in tv_sec.
+			 */
+			sp->ts.tv_sec = sp->timetolive;
+			sp->ts.tv_usec = 0;
+			break;
+		default:
+			SCTPDBG(SCTP_DEBUG_USRREQ1,
+			    "Unknown PR_SCTP policy %u.\n",
+			    PR_SCTP_POLICY(sp->sinfo_flags));
+			break;
+		}
+	}
+}
+
+static int
+sctp_msg_append(struct sctp_tcb *stcb,
+    struct sctp_nets *net,
+    struct mbuf *m,
+    struct sctp_sndrcvinfo *srcv, int hold_stcb_lock)
+{
+	int error = 0, holds_lock;
+	struct mbuf *at;
+	struct sctp_stream_queue_pending *sp = NULL;
+	struct sctp_stream_out *strm;
+
+	/*
+	 * Given an mbuf chain, put it into the association send queue and
+	 * place it on the wheel
+	 */
+	holds_lock = hold_stcb_lock;
+	if (srcv->sinfo_stream >= stcb->asoc.streamoutcnt) {
+		/* Invalid stream number */
+		SCTP_LTRACE_ERR_RET_PKT(m, NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+		error = EINVAL;
+		goto out_now;
+	}
+	if ((stcb->asoc.stream_locked) &&
+	    (stcb->asoc.stream_locked_on != srcv->sinfo_stream)) {
+		SCTP_LTRACE_ERR_RET_PKT(m, NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+		error = EINVAL;
+		goto out_now;
+	}
+	strm = &stcb->asoc.strmout[srcv->sinfo_stream];
+	/* Now can we send this? */
+	if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_SENT) ||
+	    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
+	    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
+	    (stcb->asoc.state & SCTP_STATE_SHUTDOWN_PENDING)) {
+		/* got data while shutting down */
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET);
+		error = ECONNRESET;
+		goto out_now;
+	}
+	sctp_alloc_a_strmoq(stcb, sp);
+	if (sp == NULL) {
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+		error = ENOMEM;
+		goto out_now;
+	}
+	sp->sinfo_flags = srcv->sinfo_flags;
+	sp->timetolive = srcv->sinfo_timetolive;
+	sp->ppid = srcv->sinfo_ppid;
+	sp->context = srcv->sinfo_context;
+	sp->strseq = 0;
+	if (sp->sinfo_flags & SCTP_ADDR_OVER) {
+		sp->net = net;
+		sp->addr_over = 1;
+	} else {
+		sp->net = stcb->asoc.primary_destination;
+		sp->addr_over = 0;
+	}
+	atomic_add_int(&sp->net->ref_count, 1);
+	(void)SCTP_GETTIME_TIMEVAL(&sp->ts);
+	sp->stream = srcv->sinfo_stream;
+	sp->msg_is_complete = 1;
+	sp->sender_all_done = 1;
+	sp->some_taken = 0;
+	sp->data = m;
+	sp->tail_mbuf = NULL;
+	sp->length = 0;
+	at = m;
+	sctp_set_prsctp_policy(stcb, sp);
+	/*
+	 * We could in theory (for sendall) sifa the length in, but we would
+	 * still have to hunt through the chain since we need to setup the
+	 * tail_mbuf
+	 */
+	while (at) {
+		if (SCTP_BUF_NEXT(at) == NULL)
+			sp->tail_mbuf = at;
+		sp->length += SCTP_BUF_LEN(at);
+		at = SCTP_BUF_NEXT(at);
+	}
+	SCTP_TCB_SEND_LOCK(stcb);
+	sctp_snd_sb_alloc(stcb, sp->length);
+	atomic_add_int(&stcb->asoc.stream_queue_cnt, 1);
+	TAILQ_INSERT_TAIL(&strm->outqueue, sp, next);
+	if ((srcv->sinfo_flags & SCTP_UNORDERED) == 0) {
+		sp->strseq = strm->next_sequence_sent;
+		strm->next_sequence_sent++;
+	}
+	if ((strm->next_spoke.tqe_next == NULL) &&
+	    (strm->next_spoke.tqe_prev == NULL)) {
+		/* Not on wheel, insert */
+		sctp_insert_on_wheel(stcb, &stcb->asoc, strm, 1);
+	}
+	m = NULL;
+	SCTP_TCB_SEND_UNLOCK(stcb);
+out_now:
+	if (m) {
+		sctp_m_freem(m);
+	}
+	return (error);
+}
+
+
+static struct mbuf *
+sctp_copy_mbufchain(struct mbuf *clonechain,
+    struct mbuf *outchain,
+    struct mbuf **endofchain,
+    int can_take_mbuf,
+    int sizeofcpy,
+    uint8_t copy_by_ref)
+{
+	struct mbuf *m;
+	struct mbuf *appendchain;
+	caddr_t cp;
+	int len;
+
+	if (endofchain == NULL) {
+		/* error */
+error_out:
+		if (outchain)
+			sctp_m_freem(outchain);
+		return (NULL);
+	}
+	if (can_take_mbuf) {
+		appendchain = clonechain;
+	} else {
+		if (!copy_by_ref &&
+		    (sizeofcpy <= (int)((((sctp_mbuf_threshold_count - 1) * MLEN) + MHLEN)))
+		    ) {
+			/* Its not in a cluster */
+			if (*endofchain == NULL) {
+				/* lets get a mbuf cluster */
+				if (outchain == NULL) {
+					/* This is the general case */
+			new_mbuf:
+					outchain = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_HEADER);
+					if (outchain == NULL) {
+						goto error_out;
+					}
+					SCTP_BUF_LEN(outchain) = 0;
+					*endofchain = outchain;
+					/* get the prepend space */
+					SCTP_BUF_RESV_UF(outchain, (SCTP_FIRST_MBUF_RESV + 4));
+				} else {
+					/*
+					 * We really should not get a NULL
+					 * in endofchain
+					 */
+					/* find end */
+					m = outchain;
+					while (m) {
+						if (SCTP_BUF_NEXT(m) == NULL) {
+							*endofchain = m;
+							break;
+						}
+						m = SCTP_BUF_NEXT(m);
+					}
+					/* sanity */
+					if (*endofchain == NULL) {
+						/*
+						 * huh, TSNH XXX maybe we
+						 * should panic
+						 */
+						sctp_m_freem(outchain);
+						goto new_mbuf;
+					}
+				}
+				/* get the new end of length */
+				len = M_TRAILINGSPACE(*endofchain);
+			} else {
+				/* how much is left at the end? */
+				len = M_TRAILINGSPACE(*endofchain);
+			}
+			/* Find the end of the data, for appending */
+			cp = (mtod((*endofchain), caddr_t)+SCTP_BUF_LEN((*endofchain)));
+
+			/* Now lets copy it out */
+			if (len >= sizeofcpy) {
+				/* It all fits, copy it in */
+				m_copydata(clonechain, 0, sizeofcpy, cp);
+				SCTP_BUF_LEN((*endofchain)) += sizeofcpy;
+			} else {
+				/* fill up the end of the chain */
+				if (len > 0) {
+					m_copydata(clonechain, 0, len, cp);
+					SCTP_BUF_LEN((*endofchain)) += len;
+					/* now we need another one */
+					sizeofcpy -= len;
+				}
+				m = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_HEADER);
+				if (m == NULL) {
+					/* We failed */
+					goto error_out;
+				}
+				SCTP_BUF_NEXT((*endofchain)) = m;
+				*endofchain = m;
+				cp = mtod((*endofchain), caddr_t);
+				m_copydata(clonechain, len, sizeofcpy, cp);
+				SCTP_BUF_LEN((*endofchain)) += sizeofcpy;
+			}
+			return (outchain);
+		} else {
+			/* copy the old fashion way */
+			appendchain = SCTP_M_COPYM(clonechain, 0, M_COPYALL, M_DONTWAIT);
+		}
+	}
+	if (appendchain == NULL) {
+		/* error */
+		if (outchain)
+			sctp_m_freem(outchain);
+		return (NULL);
+	}
+	if (outchain) {
+		/* tack on to the end */
+		if (*endofchain != NULL) {
+			SCTP_BUF_NEXT(((*endofchain))) = appendchain;
+		} else {
+			m = outchain;
+			while (m) {
+				if (SCTP_BUF_NEXT(m) == NULL) {
+					SCTP_BUF_NEXT(m) = appendchain;
+					break;
+				}
+				m = SCTP_BUF_NEXT(m);
+			}
+		}
+		/*
+		 * save off the end and update the end-chain postion
+		 */
+		m = appendchain;
+		while (m) {
+			if (SCTP_BUF_NEXT(m) == NULL) {
+				*endofchain = m;
+				break;
+			}
+			m = SCTP_BUF_NEXT(m);
+		}
+		return (outchain);
+	} else {
+		/* save off the end and update the end-chain postion */
+		m = appendchain;
+		while (m) {
+			if (SCTP_BUF_NEXT(m) == NULL) {
+				*endofchain = m;
+				break;
+			}
+			m = SCTP_BUF_NEXT(m);
+		}
+		return (appendchain);
+	}
+}
+
+int
+sctp_med_chunk_output(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    int *num_out,
+    int *reason_code,
+    int control_only, int *cwnd_full, int from_where,
+    struct timeval *now, int *now_filled, int frag_point, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+);
+
+static void
+sctp_sendall_iterator(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr,
+    uint32_t val)
+{
+	struct sctp_copy_all *ca;
+	struct mbuf *m;
+	int ret = 0;
+	int added_control = 0;
+	int un_sent, do_chunk_output = 1;
+	struct sctp_association *asoc;
+
+	ca = (struct sctp_copy_all *)ptr;
+	if (ca->m == NULL) {
+		return;
+	}
+	if (ca->inp != inp) {
+		/* TSNH */
+		return;
+	}
+	if ((ca->m) && ca->sndlen) {
+		m = SCTP_M_COPYM(ca->m, 0, M_COPYALL, M_DONTWAIT);
+		if (m == NULL) {
+			/* can't copy so we are done */
+			ca->cnt_failed++;
+			return;
+		}
+	} else {
+		m = NULL;
+	}
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	if (ca->sndrcv.sinfo_flags & SCTP_ABORT) {
+		/* Abort this assoc with m as the user defined reason */
+		if (m) {
+			struct sctp_paramhdr *ph;
+
+			SCTP_BUF_PREPEND(m, sizeof(struct sctp_paramhdr), M_DONTWAIT);
+			if (m) {
+				ph = mtod(m, struct sctp_paramhdr *);
+				ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
+				ph->param_length = htons(ca->sndlen);
+			}
+			/*
+			 * We add one here to keep the assoc from
+			 * dis-appearing on us.
+			 */
+			atomic_add_int(&stcb->asoc.refcnt, 1);
+			sctp_abort_an_association(inp, stcb,
+			    SCTP_RESPONSE_TO_USER_REQ,
+			    m, SCTP_SO_NOT_LOCKED);
+			/*
+			 * sctp_abort_an_association calls sctp_free_asoc()
+			 * free association will NOT free it since we
+			 * incremented the refcnt .. we do this to prevent
+			 * it being freed and things getting tricky since we
+			 * could end up (from free_asoc) calling inpcb_free
+			 * which would get a recursive lock call to the
+			 * iterator lock.. But as a consequence of that the
+			 * stcb will return to us un-locked.. since
+			 * free_asoc returns with either no TCB or the TCB
+			 * unlocked, we must relock.. to unlock in the
+			 * iterator timer :-0
+			 */
+			SCTP_TCB_LOCK(stcb);
+			atomic_add_int(&stcb->asoc.refcnt, -1);
+			goto no_chunk_output;
+		}
+	} else {
+		if (m) {
+			ret = sctp_msg_append(stcb, stcb->asoc.primary_destination, m,
+			    &ca->sndrcv, 1);
+		}
+		asoc = &stcb->asoc;
+		if (ca->sndrcv.sinfo_flags & SCTP_EOF) {
+			/* shutdown this assoc */
+			int cnt;
+
+			cnt = sctp_is_there_unsent_data(stcb);
+
+			if (TAILQ_EMPTY(&asoc->send_queue) &&
+			    TAILQ_EMPTY(&asoc->sent_queue) &&
+			    (cnt == 0)) {
+				if (asoc->locked_on_sending) {
+					goto abort_anyway;
+				}
+				/*
+				 * there is nothing queued to send, so I'm
+				 * done...
+				 */
+				if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
+				    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+				    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+					/*
+					 * only send SHUTDOWN the first time
+					 * through
+					 */
+					sctp_send_shutdown(stcb, stcb->asoc.primary_destination);
+					if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
+						SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+					}
+					SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+					SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb,
+					    asoc->primary_destination);
+					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
+					    asoc->primary_destination);
+					added_control = 1;
+					do_chunk_output = 0;
+				}
+			} else {
+				/*
+				 * we still got (or just got) data to send,
+				 * so set SHUTDOWN_PENDING
+				 */
+				/*
+				 * XXX sockets draft says that SCTP_EOF
+				 * should be sent with no data.  currently,
+				 * we will allow user data to be sent first
+				 * and move to SHUTDOWN-PENDING
+				 */
+				if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
+				    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+				    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+					if (asoc->locked_on_sending) {
+						/*
+						 * Locked to send out the
+						 * data
+						 */
+						struct sctp_stream_queue_pending *sp;
+
+						sp = TAILQ_LAST(&asoc->locked_on_sending->outqueue, sctp_streamhead);
+						if (sp) {
+							if ((sp->length == 0) && (sp->msg_is_complete == 0))
+								asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+						}
+					}
+					asoc->state |= SCTP_STATE_SHUTDOWN_PENDING;
+					if (TAILQ_EMPTY(&asoc->send_queue) &&
+					    TAILQ_EMPTY(&asoc->sent_queue) &&
+					    (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
+				abort_anyway:
+						atomic_add_int(&stcb->asoc.refcnt, 1);
+						sctp_abort_an_association(stcb->sctp_ep, stcb,
+						    SCTP_RESPONSE_TO_USER_REQ,
+						    NULL, SCTP_SO_NOT_LOCKED);
+						atomic_add_int(&stcb->asoc.refcnt, -1);
+						goto no_chunk_output;
+					}
+					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
+					    asoc->primary_destination);
+				}
+			}
+
+		}
+	}
+	un_sent = ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) +
+	    ((stcb->asoc.chunks_on_out_queue - stcb->asoc.total_flight_count) * sizeof(struct sctp_data_chunk)));
+
+	if ((sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY)) &&
+	    (stcb->asoc.total_flight > 0) &&
+	    (un_sent < (int)(stcb->asoc.smallest_mtu - SCTP_MIN_OVERHEAD))
+	    ) {
+		do_chunk_output = 0;
+	}
+	if (do_chunk_output)
+		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_NOT_LOCKED);
+	else if (added_control) {
+		int num_out = 0, reason = 0, cwnd_full = 0, now_filled = 0;
+		struct timeval now;
+		int frag_point;
+
+		frag_point = sctp_get_frag_point(stcb, &stcb->asoc);
+		(void)sctp_med_chunk_output(inp, stcb, &stcb->asoc, &num_out,
+		    &reason, 1, &cwnd_full, 1, &now, &now_filled, frag_point, SCTP_SO_NOT_LOCKED);
+	}
+no_chunk_output:
+	if (ret) {
+		ca->cnt_failed++;
+	} else {
+		ca->cnt_sent++;
+	}
+}
+
+static void
+sctp_sendall_completes(void *ptr, uint32_t val)
+{
+	struct sctp_copy_all *ca;
+
+	ca = (struct sctp_copy_all *)ptr;
+	/*
+	 * Do a notify here? Kacheong suggests that the notify be done at
+	 * the send time.. so you would push up a notification if any send
+	 * failed. Don't know if this is feasable since the only failures we
+	 * have is "memory" related and if you cannot get an mbuf to send
+	 * the data you surely can't get an mbuf to send up to notify the
+	 * user you can't send the data :->
+	 */
+
+	/* now free everything */
+	sctp_m_freem(ca->m);
+	SCTP_FREE(ca, SCTP_M_COPYAL);
+}
+
+
+#define	MC_ALIGN(m, len) do {						\
+	SCTP_BUF_RESV_UF(m, ((MCLBYTES - (len)) & ~(sizeof(long) - 1));	\
+} while (0)
+
+
+
+static struct mbuf *
+sctp_copy_out_all(struct uio *uio, int len)
+{
+	struct mbuf *ret, *at;
+	int left, willcpy, cancpy, error;
+
+	ret = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_WAIT, 1, MT_DATA);
+	if (ret == NULL) {
+		/* TSNH */
+		return (NULL);
+	}
+	left = len;
+	SCTP_BUF_LEN(ret) = 0;
+	/* save space for the data chunk header */
+	cancpy = M_TRAILINGSPACE(ret);
+	willcpy = min(cancpy, left);
+	at = ret;
+	while (left > 0) {
+		/* Align data to the end */
+		error = uiomove(mtod(at, caddr_t), willcpy, uio);
+		if (error) {
+	err_out_now:
+			sctp_m_freem(at);
+			return (NULL);
+		}
+		SCTP_BUF_LEN(at) = willcpy;
+		SCTP_BUF_NEXT_PKT(at) = SCTP_BUF_NEXT(at) = 0;
+		left -= willcpy;
+		if (left > 0) {
+			SCTP_BUF_NEXT(at) = sctp_get_mbuf_for_msg(left, 0, M_WAIT, 1, MT_DATA);
+			if (SCTP_BUF_NEXT(at) == NULL) {
+				goto err_out_now;
+			}
+			at = SCTP_BUF_NEXT(at);
+			SCTP_BUF_LEN(at) = 0;
+			cancpy = M_TRAILINGSPACE(at);
+			willcpy = min(cancpy, left);
+		}
+	}
+	return (ret);
+}
+
+static int
+sctp_sendall(struct sctp_inpcb *inp, struct uio *uio, struct mbuf *m,
+    struct sctp_sndrcvinfo *srcv)
+{
+	int ret;
+	struct sctp_copy_all *ca;
+
+	SCTP_MALLOC(ca, struct sctp_copy_all *, sizeof(struct sctp_copy_all),
+	    SCTP_M_COPYAL);
+	if (ca == NULL) {
+		sctp_m_freem(m);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+		return (ENOMEM);
+	}
+	memset(ca, 0, sizeof(struct sctp_copy_all));
+
+	ca->inp = inp;
+	memcpy(&ca->sndrcv, srcv, sizeof(struct sctp_nonpad_sndrcvinfo));
+	/*
+	 * take off the sendall flag, it would be bad if we failed to do
+	 * this :-0
+	 */
+	ca->sndrcv.sinfo_flags &= ~SCTP_SENDALL;
+	/* get length and mbuf chain */
+	if (uio) {
+		ca->sndlen = uio->uio_resid;
+		ca->m = sctp_copy_out_all(uio, ca->sndlen);
+		if (ca->m == NULL) {
+			SCTP_FREE(ca, SCTP_M_COPYAL);
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+			return (ENOMEM);
+		}
+	} else {
+		/* Gather the length of the send */
+		struct mbuf *mat;
+
+		mat = m;
+		ca->sndlen = 0;
+		while (m) {
+			ca->sndlen += SCTP_BUF_LEN(m);
+			m = SCTP_BUF_NEXT(m);
+		}
+		ca->m = mat;
+	}
+	ret = sctp_initiate_iterator(NULL, sctp_sendall_iterator, NULL,
+	    SCTP_PCB_ANY_FLAGS, SCTP_PCB_ANY_FEATURES,
+	    SCTP_ASOC_ANY_STATE,
+	    (void *)ca, 0,
+	    sctp_sendall_completes, inp, 1);
+	if (ret) {
+		SCTP_PRINTF("Failed to initiate iterator for sendall\n");
+		SCTP_FREE(ca, SCTP_M_COPYAL);
+		SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EFAULT);
+		return (EFAULT);
+	}
+	return (0);
+}
+
+
+void
+sctp_toss_old_cookies(struct sctp_tcb *stcb, struct sctp_association *asoc)
+{
+	struct sctp_tmit_chunk *chk, *nchk;
+
+	chk = TAILQ_FIRST(&asoc->control_send_queue);
+	while (chk) {
+		nchk = TAILQ_NEXT(chk, sctp_next);
+		if (chk->rec.chunk_id.id == SCTP_COOKIE_ECHO) {
+			TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next);
+			if (chk->data) {
+				sctp_m_freem(chk->data);
+				chk->data = NULL;
+			}
+			asoc->ctrl_queue_cnt--;
+			sctp_free_a_chunk(stcb, chk);
+		}
+		chk = nchk;
+	}
+}
+
+void
+sctp_toss_old_asconf(struct sctp_tcb *stcb)
+{
+	struct sctp_association *asoc;
+	struct sctp_tmit_chunk *chk, *chk_tmp;
+
+	asoc = &stcb->asoc;
+	for (chk = TAILQ_FIRST(&asoc->control_send_queue); chk != NULL;
+	    chk = chk_tmp) {
+		/* get next chk */
+		chk_tmp = TAILQ_NEXT(chk, sctp_next);
+		/* find SCTP_ASCONF chunk in queue (only one ever in queue) */
+		if (chk->rec.chunk_id.id == SCTP_ASCONF) {
+			TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next);
+			if (chk->data) {
+				sctp_m_freem(chk->data);
+				chk->data = NULL;
+			}
+			asoc->ctrl_queue_cnt--;
+			sctp_free_a_chunk(stcb, chk);
+		}
+	}
+}
+
+
+static void
+sctp_clean_up_datalist(struct sctp_tcb *stcb,
+
+    struct sctp_association *asoc,
+    struct sctp_tmit_chunk **data_list,
+    int bundle_at,
+    struct sctp_nets *net)
+{
+	int i;
+	struct sctp_tmit_chunk *tp1;
+
+	for (i = 0; i < bundle_at; i++) {
+		/* off of the send queue */
+		if (i) {
+			/*
+			 * Any chunk NOT 0 you zap the time chunk 0 gets
+			 * zapped or set based on if a RTO measurment is
+			 * needed.
+			 */
+			data_list[i]->do_rtt = 0;
+		}
+		/* record time */
+		data_list[i]->sent_rcv_time = net->last_sent_time;
+		data_list[i]->rec.data.fast_retran_tsn = data_list[i]->rec.data.TSN_seq;
+		TAILQ_REMOVE(&asoc->send_queue,
+		    data_list[i],
+		    sctp_next);
+		/* on to the sent queue */
+		tp1 = TAILQ_LAST(&asoc->sent_queue, sctpchunk_listhead);
+		if ((tp1) && (compare_with_wrap(tp1->rec.data.TSN_seq,
+		    data_list[i]->rec.data.TSN_seq, MAX_TSN))) {
+			struct sctp_tmit_chunk *tpp;
+
+			/* need to move back */
+	back_up_more:
+			tpp = TAILQ_PREV(tp1, sctpchunk_listhead, sctp_next);
+			if (tpp == NULL) {
+				TAILQ_INSERT_BEFORE(tp1, data_list[i], sctp_next);
+				goto all_done;
+			}
+			tp1 = tpp;
+			if (compare_with_wrap(tp1->rec.data.TSN_seq,
+			    data_list[i]->rec.data.TSN_seq, MAX_TSN)) {
+				goto back_up_more;
+			}
+			TAILQ_INSERT_AFTER(&asoc->sent_queue, tp1, data_list[i], sctp_next);
+		} else {
+			TAILQ_INSERT_TAIL(&asoc->sent_queue,
+			    data_list[i],
+			    sctp_next);
+		}
+all_done:
+		/* This does not lower until the cum-ack passes it */
+		asoc->sent_queue_cnt++;
+		asoc->send_queue_cnt--;
+		if ((asoc->peers_rwnd <= 0) &&
+		    (asoc->total_flight == 0) &&
+		    (bundle_at == 1)) {
+			/* Mark the chunk as being a window probe */
+			SCTP_STAT_INCR(sctps_windowprobed);
+		}
+#ifdef SCTP_AUDITING_ENABLED
+		sctp_audit_log(0xC2, 3);
+#endif
+		data_list[i]->sent = SCTP_DATAGRAM_SENT;
+		data_list[i]->snd_count = 1;
+		data_list[i]->rec.data.chunk_was_revoked = 0;
+		if (sctp_logging_level & SCTP_FLIGHT_LOGGING_ENABLE) {
+			sctp_misc_ints(SCTP_FLIGHT_LOG_UP,
+			    data_list[i]->whoTo->flight_size,
+			    data_list[i]->book_size,
+			    (uintptr_t) data_list[i]->whoTo,
+			    data_list[i]->rec.data.TSN_seq);
+		}
+		sctp_flight_size_increase(data_list[i]);
+		sctp_total_flight_increase(stcb, data_list[i]);
+		if (sctp_logging_level & SCTP_LOG_RWND_ENABLE) {
+			sctp_log_rwnd(SCTP_DECREASE_PEER_RWND,
+			    asoc->peers_rwnd, data_list[i]->send_size, sctp_peer_chunk_oh);
+		}
+		asoc->peers_rwnd = sctp_sbspace_sub(asoc->peers_rwnd,
+		    (uint32_t) (data_list[i]->send_size + sctp_peer_chunk_oh));
+		if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
+			/* SWS sender side engages */
+			asoc->peers_rwnd = 0;
+		}
+	}
+}
+
+static void
+sctp_clean_up_ctl(struct sctp_tcb *stcb, struct sctp_association *asoc)
+{
+	struct sctp_tmit_chunk *chk, *nchk;
+
+	for (chk = TAILQ_FIRST(&asoc->control_send_queue);
+	    chk; chk = nchk) {
+		nchk = TAILQ_NEXT(chk, sctp_next);
+		if ((chk->rec.chunk_id.id == SCTP_SELECTIVE_ACK) ||
+		    (chk->rec.chunk_id.id == SCTP_HEARTBEAT_REQUEST) ||
+		    (chk->rec.chunk_id.id == SCTP_HEARTBEAT_ACK) ||
+		    (chk->rec.chunk_id.id == SCTP_SHUTDOWN) ||
+		    (chk->rec.chunk_id.id == SCTP_SHUTDOWN_ACK) ||
+		    (chk->rec.chunk_id.id == SCTP_OPERATION_ERROR) ||
+		    (chk->rec.chunk_id.id == SCTP_PACKET_DROPPED) ||
+		    (chk->rec.chunk_id.id == SCTP_COOKIE_ACK) ||
+		    (chk->rec.chunk_id.id == SCTP_ECN_CWR) ||
+		    (chk->rec.chunk_id.id == SCTP_ASCONF_ACK)) {
+			/* Stray chunks must be cleaned up */
+	clean_up_anyway:
+			TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next);
+			if (chk->data) {
+				sctp_m_freem(chk->data);
+				chk->data = NULL;
+			}
+			asoc->ctrl_queue_cnt--;
+			sctp_free_a_chunk(stcb, chk);
+		} else if (chk->rec.chunk_id.id == SCTP_STREAM_RESET) {
+			/* special handling, we must look into the param */
+			if (chk != asoc->str_reset) {
+				goto clean_up_anyway;
+			}
+		}
+	}
+}
+
+
+static int
+sctp_can_we_split_this(struct sctp_tcb *stcb,
+    uint32_t length,
+    uint32_t goal_mtu, uint32_t frag_point, int eeor_on)
+{
+	/*
+	 * Make a decision on if I should split a msg into multiple parts.
+	 * This is only asked of incomplete messages.
+	 */
+	if (eeor_on) {
+		/*
+		 * If we are doing EEOR we need to always send it if its the
+		 * entire thing, since it might be all the guy is putting in
+		 * the hopper.
+		 */
+		if (goal_mtu >= length) {
+			/*-
+			 * If we have data outstanding,
+			 * we get another chance when the sack
+			 * arrives to transmit - wait for more data
+			 */
+			if (stcb->asoc.total_flight == 0) {
+				/*
+				 * If nothing is in flight, we zero the
+				 * packet counter.
+				 */
+				return (length);
+			}
+			return (0);
+
+		} else {
+			/* You can fill the rest */
+			return (goal_mtu);
+		}
+	}
+	/*-
+	 * For those strange folk that make the send buffer
+	 * smaller than our fragmentation point, we can't
+	 * get a full msg in so we have to allow splitting.
+	 */
+	if (SCTP_SB_LIMIT_SND(stcb->sctp_socket) < frag_point) {
+		return (length);
+	}
+	if ((length <= goal_mtu) ||
+	    ((length - goal_mtu) < sctp_min_residual)) {
+		/* Sub-optimial residual don't split in non-eeor mode. */
+		return (0);
+	}
+	/*
+	 * If we reach here length is larger than the goal_mtu. Do we wish
+	 * to split it for the sake of packet putting together?
+	 */
+	if (goal_mtu >= min(sctp_min_split_point, frag_point)) {
+		/* Its ok to split it */
+		return (min(goal_mtu, frag_point));
+	}
+	/* Nope, can't split */
+	return (0);
+
+}
+
+static uint32_t
+sctp_move_to_outqueue(struct sctp_tcb *stcb, struct sctp_nets *net,
+    struct sctp_stream_out *strq,
+    uint32_t goal_mtu,
+    uint32_t frag_point,
+    int *locked,
+    int *giveup,
+    int eeor_mode,
+    int *bail)
+{
+	/* Move from the stream to the send_queue keeping track of the total */
+	struct sctp_association *asoc;
+	struct sctp_stream_queue_pending *sp;
+	struct sctp_tmit_chunk *chk;
+	struct sctp_data_chunk *dchkh;
+	uint32_t to_move, length;
+	uint8_t rcv_flags = 0;
+	uint8_t some_taken;
+	uint8_t send_lock_up = 0;
+
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	asoc = &stcb->asoc;
+one_more_time:
+	/* sa_ignore FREED_MEMORY */
+	sp = TAILQ_FIRST(&strq->outqueue);
+	if (sp == NULL) {
+		*locked = 0;
+		if (send_lock_up == 0) {
+			SCTP_TCB_SEND_LOCK(stcb);
+			send_lock_up = 1;
+		}
+		sp = TAILQ_FIRST(&strq->outqueue);
+		if (sp) {
+			goto one_more_time;
+		}
+		if (strq->last_msg_incomplete) {
+			SCTP_PRINTF("Huh? Stream:%d lm_in_c=%d but queue is NULL\n",
+			    strq->stream_no,
+			    strq->last_msg_incomplete);
+			strq->last_msg_incomplete = 0;
+		}
+		to_move = 0;
+		if (send_lock_up) {
+			SCTP_TCB_SEND_UNLOCK(stcb);
+			send_lock_up = 0;
+		}
+		goto out_of;
+	}
+	if ((sp->msg_is_complete) && (sp->length == 0)) {
+		if (sp->sender_all_done) {
+			/*
+			 * We are doing differed cleanup. Last time through
+			 * when we took all the data the sender_all_done was
+			 * not set.
+			 */
+			if (sp->put_last_out == 0) {
+				SCTP_PRINTF("Gak, put out entire msg with NO end!-1\n");
+				SCTP_PRINTF("sender_done:%d len:%d msg_comp:%d put_last_out:%d send_lock:%d\n",
+				    sp->sender_all_done,
+				    sp->length,
+				    sp->msg_is_complete,
+				    sp->put_last_out,
+				    send_lock_up);
+			}
+			if ((TAILQ_NEXT(sp, next) == NULL) && (send_lock_up == 0)) {
+				SCTP_TCB_SEND_LOCK(stcb);
+				send_lock_up = 1;
+			}
+			atomic_subtract_int(&asoc->stream_queue_cnt, 1);
+			TAILQ_REMOVE(&strq->outqueue, sp, next);
+			sctp_free_remote_addr(sp->net);
+			if (sp->data) {
+				sctp_m_freem(sp->data);
+				sp->data = NULL;
+			}
+			sctp_free_a_strmoq(stcb, sp);
+
+			/* we can't be locked to it */
+			*locked = 0;
+			stcb->asoc.locked_on_sending = NULL;
+			if (send_lock_up) {
+				SCTP_TCB_SEND_UNLOCK(stcb);
+				send_lock_up = 0;
+			}
+			/* back to get the next msg */
+			goto one_more_time;
+		} else {
+			/*
+			 * sender just finished this but still holds a
+			 * reference
+			 */
+			*locked = 1;
+			*giveup = 1;
+			to_move = 0;
+			goto out_of;
+		}
+	} else {
+		/* is there some to get */
+		if (sp->length == 0) {
+			/* no */
+			*locked = 1;
+			*giveup = 1;
+			to_move = 0;
+			goto out_of;
+		}
+	}
+	some_taken = sp->some_taken;
+	if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+		sp->msg_is_complete = 1;
+	}
+re_look:
+	length = sp->length;
+	if (sp->msg_is_complete) {
+		/* The message is complete */
+		to_move = min(length, frag_point);
+		if (to_move == length) {
+			/* All of it fits in the MTU */
+			if (sp->some_taken) {
+				rcv_flags |= SCTP_DATA_LAST_FRAG;
+				sp->put_last_out = 1;
+			} else {
+				rcv_flags |= SCTP_DATA_NOT_FRAG;
+				sp->put_last_out = 1;
+			}
+		} else {
+			/* Not all of it fits, we fragment */
+			if (sp->some_taken == 0) {
+				rcv_flags |= SCTP_DATA_FIRST_FRAG;
+			}
+			sp->some_taken = 1;
+		}
+	} else {
+		to_move = sctp_can_we_split_this(stcb, length, goal_mtu,
+		    frag_point, eeor_mode);
+		if (to_move) {
+			/*-
+		         * We use a snapshot of length in case it
+		         * is expanding during the compare.
+		         */
+			uint32_t llen;
+
+			llen = length;
+			if (to_move >= llen) {
+				to_move = llen;
+				if (send_lock_up == 0) {
+					/*-
+				         * We are taking all of an incomplete msg
+				         * thus we need a send lock.
+				         */
+					SCTP_TCB_SEND_LOCK(stcb);
+					send_lock_up = 1;
+					if (sp->msg_is_complete) {
+						/*
+						 * the sender finished the
+						 * msg
+						 */
+						goto re_look;
+					}
+				}
+			}
+			if (sp->some_taken == 0) {
+				rcv_flags |= SCTP_DATA_FIRST_FRAG;
+				sp->some_taken = 1;
+			}
+		} else {
+			/* Nothing to take. */
+			if (sp->some_taken) {
+				*locked = 1;
+			}
+			*giveup = 1;
+			to_move = 0;
+			goto out_of;
+		}
+	}
+
+	/* If we reach here, we can copy out a chunk */
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		/* No chunk memory */
+		*giveup = 1;
+		to_move = 0;
+		goto out_of;
+	}
+	/*
+	 * Setup for unordered if needed by looking at the user sent info
+	 * flags.
+	 */
+	if (sp->sinfo_flags & SCTP_UNORDERED) {
+		rcv_flags |= SCTP_DATA_UNORDERED;
+	}
+	/* clear out the chunk before setting up */
+	memset(chk, 0, sizeof(*chk));
+	chk->rec.data.rcv_flags = rcv_flags;
+
+	if (to_move >= length) {
+		/* we think we can steal the whole thing */
+		if ((sp->sender_all_done == 0) && (send_lock_up == 0)) {
+			SCTP_TCB_SEND_LOCK(stcb);
+			send_lock_up = 1;
+		}
+		if (to_move < sp->length) {
+			/* bail, it changed */
+			goto dont_do_it;
+		}
+		chk->data = sp->data;
+		chk->last_mbuf = sp->tail_mbuf;
+		/* register the stealing */
+		sp->data = sp->tail_mbuf = NULL;
+	} else {
+		struct mbuf *m;
+
+dont_do_it:
+		chk->data = SCTP_M_COPYM(sp->data, 0, to_move, M_DONTWAIT);
+		chk->last_mbuf = NULL;
+		if (chk->data == NULL) {
+			sp->some_taken = some_taken;
+			sctp_free_a_chunk(stcb, chk);
+			*bail = 1;
+			to_move = 0;
+			goto out_of;
+		}
+		/* Pull off the data */
+		m_adj(sp->data, to_move);
+		/* Now lets work our way down and compact it */
+		m = sp->data;
+		while (m && (SCTP_BUF_LEN(m) == 0)) {
+			sp->data = SCTP_BUF_NEXT(m);
+			SCTP_BUF_NEXT(m) = NULL;
+			if (sp->tail_mbuf == m) {
+				/*-
+				 * Freeing tail? TSNH since
+				 * we supposedly were taking less
+				 * than the sp->length.
+				 */
+#ifdef INVARIANTS
+				panic("Huh, freing tail? - TSNH");
+#else
+				SCTP_PRINTF("Huh, freeing tail? - TSNH\n");
+				sp->tail_mbuf = sp->data = NULL;
+				sp->length = 0;
+#endif
+
+			}
+			sctp_m_free(m);
+			m = sp->data;
+		}
+	}
+	if (SCTP_BUF_IS_EXTENDED(chk->data)) {
+		chk->copy_by_ref = 1;
+	} else {
+		chk->copy_by_ref = 0;
+	}
+	/*
+	 * get last_mbuf and counts of mb useage This is ugly but hopefully
+	 * its only one mbuf.
+	 */
+	if (chk->last_mbuf == NULL) {
+		chk->last_mbuf = chk->data;
+		while (SCTP_BUF_NEXT(chk->last_mbuf) != NULL) {
+			chk->last_mbuf = SCTP_BUF_NEXT(chk->last_mbuf);
+		}
+	}
+	if (to_move > length) {
+		/*- This should not happen either
+		 * since we always lower to_move to the size
+		 * of sp->length if its larger.
+		 */
+#ifdef INVARIANTS
+		panic("Huh, how can to_move be larger?");
+#else
+		SCTP_PRINTF("Huh, how can to_move be larger?\n");
+		sp->length = 0;
+#endif
+	} else {
+		atomic_subtract_int(&sp->length, to_move);
+	}
+	if (M_LEADINGSPACE(chk->data) < (int)sizeof(struct sctp_data_chunk)) {
+		/* Not enough room for a chunk header, get some */
+		struct mbuf *m;
+
+		m = sctp_get_mbuf_for_msg(1, 0, M_DONTWAIT, 0, MT_DATA);
+		if (m == NULL) {
+			/*
+			 * we're in trouble here. _PREPEND below will free
+			 * all the data if there is no leading space, so we
+			 * must put the data back and restore.
+			 */
+			if (send_lock_up == 0) {
+				SCTP_TCB_SEND_LOCK(stcb);
+				send_lock_up = 1;
+			}
+			if (chk->data == NULL) {
+				/* unsteal the data */
+				sp->data = chk->data;
+				sp->tail_mbuf = chk->last_mbuf;
+			} else {
+				struct mbuf *m_tmp;
+
+				/* reassemble the data */
+				m_tmp = sp->data;
+				sp->data = chk->data;
+				SCTP_BUF_NEXT(chk->last_mbuf) = m_tmp;
+			}
+			sp->some_taken = some_taken;
+			atomic_add_int(&sp->length, to_move);
+			chk->data = NULL;
+			*bail = 1;
+			sctp_free_a_chunk(stcb, chk);
+			to_move = 0;
+			goto out_of;
+		} else {
+			SCTP_BUF_LEN(m) = 0;
+			SCTP_BUF_NEXT(m) = chk->data;
+			chk->data = m;
+			M_ALIGN(chk->data, 4);
+		}
+	}
+	SCTP_BUF_PREPEND(chk->data, sizeof(struct sctp_data_chunk), M_DONTWAIT);
+	if (chk->data == NULL) {
+		/* HELP, TSNH since we assured it would not above? */
+#ifdef INVARIANTS
+		panic("prepend failes HELP?");
+#else
+		SCTP_PRINTF("prepend fails HELP?\n");
+		sctp_free_a_chunk(stcb, chk);
+#endif
+		*bail = 1;
+		to_move = 0;
+		goto out_of;
+	}
+	sctp_snd_sb_alloc(stcb, sizeof(struct sctp_data_chunk));
+	chk->book_size = chk->send_size = (to_move +
+	    sizeof(struct sctp_data_chunk));
+	chk->book_size_scale = 0;
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+
+	chk->flags = 0;
+	chk->asoc = &stcb->asoc;
+	chk->pad_inplace = 0;
+	chk->no_fr_allowed = 0;
+	chk->rec.data.stream_seq = sp->strseq;
+	chk->rec.data.stream_number = sp->stream;
+	chk->rec.data.payloadtype = sp->ppid;
+	chk->rec.data.context = sp->context;
+	chk->rec.data.doing_fast_retransmit = 0;
+	chk->rec.data.ect_nonce = 0;	/* ECN Nonce */
+
+	chk->rec.data.timetodrop = sp->ts;
+	chk->flags = sp->act_flags;
+	chk->addr_over = sp->addr_over;
+
+	chk->whoTo = net;
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+
+	chk->rec.data.TSN_seq = atomic_fetchadd_int(&asoc->sending_seq, 1);
+	if (sctp_logging_level & SCTP_LOG_AT_SEND_2_OUTQ) {
+		sctp_misc_ints(SCTP_STRMOUT_LOG_SEND,
+		    (uintptr_t) stcb, sp->length,
+		    (uint32_t) ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq),
+		    chk->rec.data.TSN_seq);
+	}
+	dchkh = mtod(chk->data, struct sctp_data_chunk *);
+	/*
+	 * Put the rest of the things in place now. Size was done earlier in
+	 * previous loop prior to padding.
+	 */
+
+#ifdef SCTP_ASOCLOG_OF_TSNS
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	if (asoc->tsn_out_at >= SCTP_TSN_LOG_SIZE) {
+		asoc->tsn_out_at = 0;
+		asoc->tsn_out_wrapped = 1;
+	}
+	asoc->out_tsnlog[asoc->tsn_out_at].tsn = chk->rec.data.TSN_seq;
+	asoc->out_tsnlog[asoc->tsn_out_at].strm = chk->rec.data.stream_number;
+	asoc->out_tsnlog[asoc->tsn_out_at].seq = chk->rec.data.stream_seq;
+	asoc->out_tsnlog[asoc->tsn_out_at].sz = chk->send_size;
+	asoc->out_tsnlog[asoc->tsn_out_at].flgs = chk->rec.data.rcv_flags;
+	asoc->out_tsnlog[asoc->tsn_out_at].stcb = (void *)stcb;
+	asoc->out_tsnlog[asoc->tsn_out_at].in_pos = asoc->tsn_out_at;
+	asoc->out_tsnlog[asoc->tsn_out_at].in_out = 2;
+	asoc->tsn_out_at++;
+#endif
+
+	dchkh->ch.chunk_type = SCTP_DATA;
+	dchkh->ch.chunk_flags = chk->rec.data.rcv_flags;
+	dchkh->dp.tsn = htonl(chk->rec.data.TSN_seq);
+	dchkh->dp.stream_id = htons(strq->stream_no);
+	dchkh->dp.stream_sequence = htons(chk->rec.data.stream_seq);
+	dchkh->dp.protocol_id = chk->rec.data.payloadtype;
+	dchkh->ch.chunk_length = htons(chk->send_size);
+	/* Now advance the chk->send_size by the actual pad needed. */
+	if (chk->send_size < SCTP_SIZE32(chk->book_size)) {
+		/* need a pad */
+		struct mbuf *lm;
+		int pads;
+
+		pads = SCTP_SIZE32(chk->book_size) - chk->send_size;
+		if (sctp_pad_lastmbuf(chk->data, pads, chk->last_mbuf) == 0) {
+			chk->pad_inplace = 1;
+		}
+		if ((lm = SCTP_BUF_NEXT(chk->last_mbuf)) != NULL) {
+			/* pad added an mbuf */
+			chk->last_mbuf = lm;
+		}
+		chk->send_size += pads;
+	}
+	/* We only re-set the policy if it is on */
+	if (sp->pr_sctp_on) {
+		sctp_set_prsctp_policy(stcb, sp);
+		asoc->pr_sctp_cnt++;
+		chk->pr_sctp_on = 1;
+	} else {
+		chk->pr_sctp_on = 0;
+	}
+	if (sp->msg_is_complete && (sp->length == 0) && (sp->sender_all_done)) {
+		/* All done pull and kill the message */
+		atomic_subtract_int(&asoc->stream_queue_cnt, 1);
+		if (sp->put_last_out == 0) {
+			SCTP_PRINTF("Gak, put out entire msg with NO end!-2\n");
+			SCTP_PRINTF("sender_done:%d len:%d msg_comp:%d put_last_out:%d send_lock:%d\n",
+			    sp->sender_all_done,
+			    sp->length,
+			    sp->msg_is_complete,
+			    sp->put_last_out,
+			    send_lock_up);
+		}
+		if ((send_lock_up == 0) && (TAILQ_NEXT(sp, next) == NULL)) {
+			SCTP_TCB_SEND_LOCK(stcb);
+			send_lock_up = 1;
+		}
+		TAILQ_REMOVE(&strq->outqueue, sp, next);
+		sctp_free_remote_addr(sp->net);
+		if (sp->data) {
+			sctp_m_freem(sp->data);
+			sp->data = NULL;
+		}
+		sctp_free_a_strmoq(stcb, sp);
+
+		/* we can't be locked to it */
+		*locked = 0;
+		stcb->asoc.locked_on_sending = NULL;
+	} else {
+		/* more to go, we are locked */
+		*locked = 1;
+	}
+	asoc->chunks_on_out_queue++;
+	TAILQ_INSERT_TAIL(&asoc->send_queue, chk, sctp_next);
+	asoc->send_queue_cnt++;
+out_of:
+	if (send_lock_up) {
+		SCTP_TCB_SEND_UNLOCK(stcb);
+		send_lock_up = 0;
+	}
+	return (to_move);
+}
+
+
+static struct sctp_stream_out *
+sctp_select_a_stream(struct sctp_tcb *stcb, struct sctp_association *asoc)
+{
+	struct sctp_stream_out *strq;
+
+	/* Find the next stream to use */
+	if (asoc->last_out_stream == NULL) {
+		strq = asoc->last_out_stream = TAILQ_FIRST(&asoc->out_wheel);
+		if (asoc->last_out_stream == NULL) {
+			/* huh nothing on the wheel, TSNH */
+			return (NULL);
+		}
+		goto done_it;
+	}
+	strq = TAILQ_NEXT(asoc->last_out_stream, next_spoke);
+done_it:
+	if (strq == NULL) {
+		strq = asoc->last_out_stream = TAILQ_FIRST(&asoc->out_wheel);
+	}
+	/* Save off the last stream */
+	asoc->last_out_stream = strq;
+	return (strq);
+
+}
+
+
+static void
+sctp_fill_outqueue(struct sctp_tcb *stcb,
+    struct sctp_nets *net, int frag_point, int eeor_mode, int *quit_now)
+{
+	struct sctp_association *asoc;
+	struct sctp_stream_out *strq, *strqn, *strqt;
+	int goal_mtu, moved_how_much, total_moved = 0, bail = 0;
+	int locked, giveup;
+	struct sctp_stream_queue_pending *sp;
+
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	asoc = &stcb->asoc;
+#ifdef INET6
+	if (net->ro._l_addr.sin6.sin6_family == AF_INET6) {
+		goal_mtu = net->mtu - SCTP_MIN_OVERHEAD;
+	} else {
+		/* ?? not sure what else to do */
+		goal_mtu = net->mtu - SCTP_MIN_V4_OVERHEAD;
+	}
+#else
+	goal_mtu = net->mtu - SCTP_MIN_OVERHEAD;
+	mtu_fromwheel = 0;
+#endif
+	/* Need an allowance for the data chunk header too */
+	goal_mtu -= sizeof(struct sctp_data_chunk);
+
+	/* must make even word boundary */
+	goal_mtu &= 0xfffffffc;
+	if (asoc->locked_on_sending) {
+		/* We are stuck on one stream until the message completes. */
+		strqn = strq = asoc->locked_on_sending;
+		locked = 1;
+	} else {
+		strqn = strq = sctp_select_a_stream(stcb, asoc);
+		locked = 0;
+	}
+
+	while ((goal_mtu > 0) && strq) {
+		sp = TAILQ_FIRST(&strq->outqueue);
+		/*
+		 * If CMT is off, we must validate that the stream in
+		 * question has the first item pointed towards are network
+		 * destionation requested by the caller. Note that if we
+		 * turn out to be locked to a stream (assigning TSN's then
+		 * we must stop, since we cannot look for another stream
+		 * with data to send to that destination). In CMT's case, by
+		 * skipping this check, we will send one data packet towards
+		 * the requested net.
+		 */
+		if (sp == NULL) {
+			break;
+		}
+		if ((sp->net != net) && (sctp_cmt_on_off == 0)) {
+			/* none for this network */
+			if (locked) {
+				break;
+			} else {
+				strq = sctp_select_a_stream(stcb, asoc);
+				if (strq == NULL)
+					/* none left */
+					break;
+				if (strqn == strq) {
+					/* I have circled */
+					break;
+				}
+				continue;
+			}
+		}
+		giveup = 0;
+		bail = 0;
+		moved_how_much = sctp_move_to_outqueue(stcb, net, strq, goal_mtu, frag_point, &locked,
+		    &giveup, eeor_mode, &bail);
+		asoc->last_out_stream = strq;
+		if (locked) {
+			asoc->locked_on_sending = strq;
+			if ((moved_how_much == 0) || (giveup) || bail)
+				/* no more to move for now */
+				break;
+		} else {
+			asoc->locked_on_sending = NULL;
+			strqt = sctp_select_a_stream(stcb, asoc);
+			if (TAILQ_FIRST(&strq->outqueue) == NULL) {
+				if (strq == strqn) {
+					/* Must move start to next one */
+					strqn = TAILQ_NEXT(asoc->last_out_stream, next_spoke);
+					if (strqn == NULL) {
+						strqn = TAILQ_FIRST(&asoc->out_wheel);
+						if (strqn == NULL) {
+							break;
+						}
+					}
+				}
+				sctp_remove_from_wheel(stcb, asoc, strq);
+			}
+			if ((giveup) || bail) {
+				break;
+			}
+			strq = strqt;
+			if (strq == NULL) {
+				break;
+			}
+		}
+		total_moved += moved_how_much;
+		goal_mtu -= (moved_how_much + sizeof(struct sctp_data_chunk));
+		goal_mtu &= 0xfffffffc;
+	}
+	if (bail)
+		*quit_now = 1;
+
+	if (total_moved == 0) {
+		if ((sctp_cmt_on_off == 0) &&
+		    (net == stcb->asoc.primary_destination)) {
+			/* ran dry for primary network net */
+			SCTP_STAT_INCR(sctps_primary_randry);
+		} else if (sctp_cmt_on_off) {
+			/* ran dry with CMT on */
+			SCTP_STAT_INCR(sctps_cmt_randry);
+		}
+	}
+}
+
+void
+sctp_fix_ecn_echo(struct sctp_association *asoc)
+{
+	struct sctp_tmit_chunk *chk;
+
+	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
+		if (chk->rec.chunk_id.id == SCTP_ECN_ECHO) {
+			chk->sent = SCTP_DATAGRAM_UNSENT;
+		}
+	}
+}
+
+static void
+sctp_move_to_an_alt(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    struct sctp_nets *net)
+{
+	struct sctp_tmit_chunk *chk;
+	struct sctp_nets *a_net;
+
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	/*
+	 * JRS 5/14/07 - If CMT PF is turned on, find an alternate
+	 * destination using the PF algorithm for finding alternate
+	 * destinations.
+	 */
+	if (sctp_cmt_on_off && sctp_cmt_pf) {
+		a_net = sctp_find_alternate_net(stcb, net, 2);
+	} else {
+		a_net = sctp_find_alternate_net(stcb, net, 0);
+	}
+	if ((a_net != net) &&
+	    ((a_net->dest_state & SCTP_ADDR_REACHABLE) == SCTP_ADDR_REACHABLE)) {
+		/*
+		 * We only proceed if a valid alternate is found that is not
+		 * this one and is reachable. Here we must move all chunks
+		 * queued in the send queue off of the destination address
+		 * to our alternate.
+		 */
+		TAILQ_FOREACH(chk, &asoc->send_queue, sctp_next) {
+			if (chk->whoTo == net) {
+				/* Move the chunk to our alternate */
+				sctp_free_remote_addr(chk->whoTo);
+				chk->whoTo = a_net;
+				atomic_add_int(&a_net->ref_count, 1);
+			}
+		}
+	}
+}
+
+int
+sctp_med_chunk_output(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    int *num_out,
+    int *reason_code,
+    int control_only, int *cwnd_full, int from_where,
+    struct timeval *now, int *now_filled, int frag_point, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	/*
+	 * Ok this is the generic chunk service queue. we must do the
+	 * following: - Service the stream queue that is next, moving any
+	 * message (note I must get a complete message i.e. FIRST/MIDDLE and
+	 * LAST to the out queue in one pass) and assigning TSN's - Check to
+	 * see if the cwnd/rwnd allows any output, if so we go ahead and
+	 * fomulate and send the low level chunks. Making sure to combine
+	 * any control in the control chunk queue also.
+	 */
+	struct sctp_nets *net;
+	struct mbuf *outchain, *endoutchain;
+	struct sctp_tmit_chunk *chk, *nchk;
+	struct sctphdr *shdr;
+
+	/* temp arrays for unlinking */
+	struct sctp_tmit_chunk *data_list[SCTP_MAX_DATA_BUNDLING];
+	int no_fragmentflg, error;
+	int one_chunk, hbflag, skip_data_for_this_net;
+	int asconf, cookie, no_out_cnt;
+	int bundle_at, ctl_cnt, no_data_chunks, cwnd_full_ind, eeor_mode;
+	unsigned int mtu, r_mtu, omtu, mx_mtu, to_out;
+	struct sctp_nets *start_at, *old_startat = NULL, *send_start_at;
+	int tsns_sent = 0;
+	uint32_t auth_offset = 0;
+	struct sctp_auth_chunk *auth = NULL;
+
+	/*
+	 * JRS 5/14/07 - Add flag for whether a heartbeat is sent to the
+	 * destination.
+	 */
+	int pf_hbflag = 0;
+	int quit_now = 0;
+
+	*num_out = 0;
+	cwnd_full_ind = 0;
+
+	if ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) ||
+	    (asoc->state & SCTP_STATE_SHUTDOWN_RECEIVED) ||
+	    (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR))) {
+		eeor_mode = 1;
+	} else {
+		eeor_mode = 0;
+	}
+	ctl_cnt = no_out_cnt = asconf = cookie = 0;
+	/*
+	 * First lets prime the pump. For each destination, if there is room
+	 * in the flight size, attempt to pull an MTU's worth out of the
+	 * stream queues into the general send_queue
+	 */
+#ifdef SCTP_AUDITING_ENABLED
+	sctp_audit_log(0xC2, 2);
+#endif
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	hbflag = 0;
+	if ((control_only) || (asoc->stream_reset_outstanding))
+		no_data_chunks = 1;
+	else
+		no_data_chunks = 0;
+
+	/* Nothing to possible to send? */
+	if (TAILQ_EMPTY(&asoc->control_send_queue) &&
+	    TAILQ_EMPTY(&asoc->send_queue) &&
+	    TAILQ_EMPTY(&asoc->out_wheel)) {
+		*reason_code = 9;
+		return (0);
+	}
+	if (asoc->peers_rwnd == 0) {
+		/* No room in peers rwnd */
+		*cwnd_full = 1;
+		*reason_code = 1;
+		if (asoc->total_flight > 0) {
+			/* we are allowed one chunk in flight */
+			no_data_chunks = 1;
+		}
+	}
+	if ((no_data_chunks == 0) && (!TAILQ_EMPTY(&asoc->out_wheel))) {
+		if (sctp_cmt_on_off) {
+			/*
+			 * for CMT we start at the next one past the one we
+			 * last added data to.
+			 */
+			if (TAILQ_FIRST(&asoc->send_queue) != NULL) {
+				goto skip_the_fill_from_streams;
+			}
+			if (asoc->last_net_data_came_from) {
+				net = TAILQ_NEXT(asoc->last_net_data_came_from, sctp_next);
+				if (net == NULL) {
+					net = TAILQ_FIRST(&asoc->nets);
+				}
+			} else {
+				/* back to start */
+				net = TAILQ_FIRST(&asoc->nets);
+			}
+
+			/*
+			 * JRI-TODO: CMT-MPI. Simply set the first
+			 * destination (net) to be optimized for the next
+			 * message to be pulled out of the outwheel. 1. peek
+			 * at outwheel 2. If large message, set net =
+			 * highest_cwnd 3. If small message, set net =
+			 * lowest rtt
+			 */
+		} else {
+			net = asoc->primary_destination;
+			if (net == NULL) {
+				/* TSNH */
+				net = TAILQ_FIRST(&asoc->nets);
+			}
+		}
+		start_at = net;
+
+one_more_time:
+		for (; net != NULL; net = TAILQ_NEXT(net, sctp_next)) {
+			net->window_probe = 0;
+			if (old_startat && (old_startat == net)) {
+				break;
+			}
+			/*
+			 * JRI: if dest is unreachable or unconfirmed, do
+			 * not send data to it
+			 */
+			if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) || (net->dest_state & SCTP_ADDR_UNCONFIRMED)) {
+				continue;
+			}
+			/*
+			 * JRI: if dest is in PF state, do not send data to
+			 * it
+			 */
+			if (sctp_cmt_on_off && sctp_cmt_pf && (net->dest_state & SCTP_ADDR_PF)) {
+				continue;
+			}
+			if ((sctp_cmt_on_off == 0) && (net->ref_count < 2)) {
+				/* nothing can be in queue for this guy */
+				continue;
+			}
+			if (net->flight_size >= net->cwnd) {
+				/* skip this network, no room */
+				cwnd_full_ind++;
+				continue;
+			}
+			/*
+			 * JRI : this for loop we are in takes in each net,
+			 * if its's got space in cwnd and has data sent to
+			 * it (when CMT is off) then it calls
+			 * sctp_fill_outqueue for the net. This gets data on
+			 * the send queue for that network.
+			 * 
+			 * In sctp_fill_outqueue TSN's are assigned and data is
+			 * copied out of the stream buffers. Note mostly
+			 * copy by reference (we hope).
+			 */
+			if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+				sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FILL_OUTQ_CALLED);
+			}
+			sctp_fill_outqueue(stcb, net, frag_point, eeor_mode, &quit_now);
+			if (quit_now) {
+				/* memory alloc failure */
+				no_data_chunks = 1;
+				goto skip_the_fill_from_streams;
+			}
+		}
+		if (start_at != TAILQ_FIRST(&asoc->nets)) {
+			/* got to pick up the beginning stuff. */
+			old_startat = start_at;
+			start_at = net = TAILQ_FIRST(&asoc->nets);
+			if (old_startat)
+				goto one_more_time;
+		}
+	}
+skip_the_fill_from_streams:
+	*cwnd_full = cwnd_full_ind;
+
+	/* now service each destination and send out what we can for it */
+	/* Nothing to send? */
+	if ((TAILQ_FIRST(&asoc->control_send_queue) == NULL) &&
+	    (TAILQ_FIRST(&asoc->send_queue) == NULL)) {
+		*reason_code = 8;
+		return (0);
+	}
+	if (no_data_chunks) {
+		chk = TAILQ_FIRST(&asoc->control_send_queue);
+	} else {
+		chk = TAILQ_FIRST(&asoc->send_queue);
+	}
+	if (chk) {
+		send_start_at = chk->whoTo;
+	} else {
+		send_start_at = TAILQ_FIRST(&asoc->nets);
+	}
+	old_startat = NULL;
+again_one_more_time:
+	for (net = send_start_at; net != NULL; net = TAILQ_NEXT(net, sctp_next)) {
+		/* how much can we send? */
+		/* SCTPDBG("Examine for sending net:%x\n", (uint32_t)net); */
+		if (old_startat && (old_startat == net)) {
+			/* through list ocmpletely. */
+			break;
+		}
+		tsns_sent = 0;
+		if (net->ref_count < 2) {
+			/*
+			 * Ref-count of 1 so we cannot have data or control
+			 * queued to this address. Skip it.
+			 */
+			continue;
+		}
+		ctl_cnt = bundle_at = 0;
+		endoutchain = outchain = NULL;
+		no_fragmentflg = 1;
+		one_chunk = 0;
+		if (net->dest_state & SCTP_ADDR_UNCONFIRMED) {
+			skip_data_for_this_net = 1;
+		} else {
+			skip_data_for_this_net = 0;
+		}
+		if ((net->ro.ro_rt) && (net->ro.ro_rt->rt_ifp)) {
+			/*
+			 * if we have a route and an ifp check to see if we
+			 * have room to send to this guy
+			 */
+			struct ifnet *ifp;
+
+			ifp = net->ro.ro_rt->rt_ifp;
+			if ((ifp->if_snd.ifq_len + 2) >= ifp->if_snd.ifq_maxlen) {
+				SCTP_STAT_INCR(sctps_ifnomemqueued);
+				if (sctp_logging_level & SCTP_LOG_MAXBURST_ENABLE) {
+					sctp_log_maxburst(stcb, net, ifp->if_snd.ifq_len, ifp->if_snd.ifq_maxlen, SCTP_MAX_IFP_APPLIED);
+				}
+				continue;
+			}
+		}
+		if (((struct sockaddr *)&net->ro._l_addr)->sa_family == AF_INET) {
+			mtu = net->mtu - (sizeof(struct ip) + sizeof(struct sctphdr));
+		} else {
+			mtu = net->mtu - (sizeof(struct ip6_hdr) + sizeof(struct sctphdr));
+		}
+		mx_mtu = mtu;
+		to_out = 0;
+		if (mtu > asoc->peers_rwnd) {
+			if (asoc->total_flight > 0) {
+				/* We have a packet in flight somewhere */
+				r_mtu = asoc->peers_rwnd;
+			} else {
+				/* We are always allowed to send one MTU out */
+				one_chunk = 1;
+				r_mtu = mtu;
+			}
+		} else {
+			r_mtu = mtu;
+		}
+		/************************/
+		/* Control transmission */
+		/************************/
+		/* Now first lets go through the control queue */
+		for (chk = TAILQ_FIRST(&asoc->control_send_queue);
+		    chk; chk = nchk) {
+			nchk = TAILQ_NEXT(chk, sctp_next);
+			if (chk->whoTo != net) {
+				/*
+				 * No, not sent to the network we are
+				 * looking at
+				 */
+				continue;
+			}
+			if (chk->data == NULL) {
+				continue;
+			}
+			if (chk->sent != SCTP_DATAGRAM_UNSENT) {
+				/*
+				 * It must be unsent. Cookies and ASCONF's
+				 * hang around but there timers will force
+				 * when marked for resend.
+				 */
+				continue;
+			}
+			/*
+			 * if no AUTH is yet included and this chunk
+			 * requires it, make sure to account for it.  We
+			 * don't apply the size until the AUTH chunk is
+			 * actually added below in case there is no room for
+			 * this chunk. NOTE: we overload the use of "omtu"
+			 * here
+			 */
+			if ((auth == NULL) &&
+			    sctp_auth_is_required_chunk(chk->rec.chunk_id.id,
+			    stcb->asoc.peer_auth_chunks)) {
+				omtu = sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
+			} else
+				omtu = 0;
+			/* Here we do NOT factor the r_mtu */
+			if ((chk->send_size < (int)(mtu - omtu)) ||
+			    (chk->flags & CHUNK_FLAGS_FRAGMENT_OK)) {
+				/*
+				 * We probably should glom the mbuf chain
+				 * from the chk->data for control but the
+				 * problem is it becomes yet one more level
+				 * of tracking to do if for some reason
+				 * output fails. Then I have got to
+				 * reconstruct the merged control chain.. el
+				 * yucko.. for now we take the easy way and
+				 * do the copy
+				 */
+				/*
+				 * Add an AUTH chunk, if chunk requires it
+				 * save the offset into the chain for AUTH
+				 */
+				if ((auth == NULL) &&
+				    (sctp_auth_is_required_chunk(chk->rec.chunk_id.id,
+				    stcb->asoc.peer_auth_chunks))) {
+					outchain = sctp_add_auth_chunk(outchain,
+					    &endoutchain,
+					    &auth,
+					    &auth_offset,
+					    stcb,
+					    chk->rec.chunk_id.id);
+					SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+				}
+				outchain = sctp_copy_mbufchain(chk->data, outchain, &endoutchain,
+				    (int)chk->rec.chunk_id.can_take_data,
+				    chk->send_size, chk->copy_by_ref);
+				if (outchain == NULL) {
+					*reason_code = 8;
+					SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+					return (ENOMEM);
+				}
+				SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+				/* update our MTU size */
+				if (mtu > (chk->send_size + omtu))
+					mtu -= (chk->send_size + omtu);
+				else
+					mtu = 0;
+				to_out += (chk->send_size + omtu);
+				/* Do clear IP_DF ? */
+				if (chk->flags & CHUNK_FLAGS_FRAGMENT_OK) {
+					no_fragmentflg = 0;
+				}
+				if (chk->rec.chunk_id.can_take_data)
+					chk->data = NULL;
+				/* Mark things to be removed, if needed */
+				if ((chk->rec.chunk_id.id == SCTP_SELECTIVE_ACK) ||
+				    (chk->rec.chunk_id.id == SCTP_HEARTBEAT_REQUEST) ||
+				    (chk->rec.chunk_id.id == SCTP_HEARTBEAT_ACK) ||
+				    (chk->rec.chunk_id.id == SCTP_SHUTDOWN) ||
+				    (chk->rec.chunk_id.id == SCTP_SHUTDOWN_ACK) ||
+				    (chk->rec.chunk_id.id == SCTP_OPERATION_ERROR) ||
+				    (chk->rec.chunk_id.id == SCTP_COOKIE_ACK) ||
+				    (chk->rec.chunk_id.id == SCTP_ECN_CWR) ||
+				    (chk->rec.chunk_id.id == SCTP_PACKET_DROPPED) ||
+				    (chk->rec.chunk_id.id == SCTP_ASCONF_ACK)) {
+
+					if (chk->rec.chunk_id.id == SCTP_HEARTBEAT_REQUEST) {
+						hbflag = 1;
+						/*
+						 * JRS 5/14/07 - Set the
+						 * flag to say a heartbeat
+						 * is being sent.
+						 */
+						pf_hbflag = 1;
+					}
+					/* remove these chunks at the end */
+					if (chk->rec.chunk_id.id == SCTP_SELECTIVE_ACK) {
+						/* turn off the timer */
+						if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
+							sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
+							    inp, stcb, net, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_1);
+						}
+					}
+					ctl_cnt++;
+				} else {
+					/*
+					 * Other chunks, since they have
+					 * timers running (i.e. COOKIE or
+					 * ASCONF) we just "trust" that it
+					 * gets sent or retransmitted.
+					 */
+					ctl_cnt++;
+					if (chk->rec.chunk_id.id == SCTP_COOKIE_ECHO) {
+						cookie = 1;
+						no_out_cnt = 1;
+					} else if (chk->rec.chunk_id.id == SCTP_ASCONF) {
+						/*
+						 * set hb flag since we can
+						 * use these for RTO
+						 */
+						hbflag = 1;
+						asconf = 1;
+						/*
+						 * should sysctl this: don't
+						 * bundle data with ASCONF
+						 * since it requires AUTH
+						 */
+						no_data_chunks = 1;
+					}
+					chk->sent = SCTP_DATAGRAM_SENT;
+					chk->snd_count++;
+				}
+				if (mtu == 0) {
+					/*
+					 * Ok we are out of room but we can
+					 * output without effecting the
+					 * flight size since this little guy
+					 * is a control only packet.
+					 */
+					if (asconf) {
+						sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp, stcb, net);
+						/*
+						 * do NOT clear the asconf
+						 * flag as it is used to do
+						 * appropriate source
+						 * address selection.
+						 */
+					}
+					if (cookie) {
+						sctp_timer_start(SCTP_TIMER_TYPE_COOKIE, inp, stcb, net);
+						cookie = 0;
+					}
+					SCTP_BUF_PREPEND(outchain, sizeof(struct sctphdr), M_DONTWAIT);
+					if (outchain == NULL) {
+						/* no memory */
+						SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOBUFS);
+						error = ENOBUFS;
+						goto error_out_again;
+					}
+					shdr = mtod(outchain, struct sctphdr *);
+					shdr->src_port = inp->sctp_lport;
+					shdr->dest_port = stcb->rport;
+					shdr->v_tag = htonl(stcb->asoc.peer_vtag);
+					shdr->checksum = 0;
+					auth_offset += sizeof(struct sctphdr);
+					if ((error = sctp_lowlevel_chunk_output(inp, stcb, net,
+					    (struct sockaddr *)&net->ro._l_addr,
+					    outchain, auth_offset, auth,
+					    no_fragmentflg, 0, NULL, asconf, so_locked))) {
+						if (error == ENOBUFS) {
+							asoc->ifp_had_enobuf = 1;
+							SCTP_STAT_INCR(sctps_lowlevelerr);
+						}
+						if (from_where == 0) {
+							SCTP_STAT_INCR(sctps_lowlevelerrusr);
+						}
+				error_out_again:
+						/* error, could not output */
+						if (hbflag) {
+							if (*now_filled == 0) {
+								(void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
+								*now_filled = 1;
+								*now = net->last_sent_time;
+							} else {
+								net->last_sent_time = *now;
+							}
+							hbflag = 0;
+						}
+						if (error == EHOSTUNREACH) {
+							/*
+							 * Destination went
+							 * unreachable
+							 * during this send
+							 */
+							sctp_move_to_an_alt(stcb, asoc, net);
+						}
+						*reason_code = 7;
+						continue;
+					} else
+						asoc->ifp_had_enobuf = 0;
+					/* Only HB or ASCONF advances time */
+					if (hbflag) {
+						if (*now_filled == 0) {
+							(void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
+							*now_filled = 1;
+							*now = net->last_sent_time;
+						} else {
+							net->last_sent_time = *now;
+						}
+						hbflag = 0;
+					}
+					/*
+					 * increase the number we sent, if a
+					 * cookie is sent we don't tell them
+					 * any was sent out.
+					 */
+					outchain = endoutchain = NULL;
+					auth = NULL;
+					auth_offset = 0;
+					if (!no_out_cnt)
+						*num_out += ctl_cnt;
+					/* recalc a clean slate and setup */
+					if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+						mtu = (net->mtu - SCTP_MIN_OVERHEAD);
+					} else {
+						mtu = (net->mtu - SCTP_MIN_V4_OVERHEAD);
+					}
+					to_out = 0;
+					no_fragmentflg = 1;
+				}
+			}
+		}
+		/*********************/
+		/* Data transmission */
+		/*********************/
+		/*
+		 * if AUTH for DATA is required and no AUTH has been added
+		 * yet, account for this in the mtu now... if no data can be
+		 * bundled, this adjustment won't matter anyways since the
+		 * packet will be going out...
+		 */
+		if ((auth == NULL) &&
+		    sctp_auth_is_required_chunk(SCTP_DATA,
+		    stcb->asoc.peer_auth_chunks)) {
+			mtu -= sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
+		}
+		/* now lets add any data within the MTU constraints */
+		if (((struct sockaddr *)&net->ro._l_addr)->sa_family == AF_INET) {
+			if (net->mtu > (sizeof(struct ip) + sizeof(struct sctphdr)))
+				omtu = net->mtu - (sizeof(struct ip) + sizeof(struct sctphdr));
+			else
+				omtu = 0;
+		} else {
+			if (net->mtu > (sizeof(struct ip6_hdr) + sizeof(struct sctphdr)))
+				omtu = net->mtu - (sizeof(struct ip6_hdr) + sizeof(struct sctphdr));
+			else
+				omtu = 0;
+		}
+		if ((((asoc->state & SCTP_STATE_OPEN) == SCTP_STATE_OPEN) && (skip_data_for_this_net == 0)) ||
+		    (cookie)) {
+			for (chk = TAILQ_FIRST(&asoc->send_queue); chk; chk = nchk) {
+				if (no_data_chunks) {
+					/* let only control go out */
+					*reason_code = 1;
+					break;
+				}
+				if (net->flight_size >= net->cwnd) {
+					/* skip this net, no room for data */
+					*reason_code = 2;
+					break;
+				}
+				nchk = TAILQ_NEXT(chk, sctp_next);
+				if (chk->whoTo != net) {
+					/* No, not sent to this net */
+					continue;
+				}
+				if ((chk->send_size > omtu) && ((chk->flags & CHUNK_FLAGS_FRAGMENT_OK) == 0)) {
+					/*-
+					 * strange, we have a chunk that is
+					 * to big for its destination and
+					 * yet no fragment ok flag.
+					 * Something went wrong when the
+					 * PMTU changed...we did not mark
+					 * this chunk for some reason?? I
+					 * will fix it here by letting IP
+					 * fragment it for now and printing
+					 * a warning. This really should not
+					 * happen ...
+					 */
+					SCTP_PRINTF("Warning chunk of %d bytes > mtu:%d and yet PMTU disc missed\n",
+					    chk->send_size, mtu);
+					chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
+				}
+				if (((chk->send_size <= mtu) && (chk->send_size <= r_mtu)) ||
+				    ((chk->flags & CHUNK_FLAGS_FRAGMENT_OK) && (chk->send_size <= asoc->peers_rwnd))) {
+					/* ok we will add this one */
+
+					/*
+					 * Add an AUTH chunk, if chunk
+					 * requires it, save the offset into
+					 * the chain for AUTH
+					 */
+					if ((auth == NULL) &&
+					    (sctp_auth_is_required_chunk(SCTP_DATA,
+					    stcb->asoc.peer_auth_chunks))) {
+
+						outchain = sctp_add_auth_chunk(outchain,
+						    &endoutchain,
+						    &auth,
+						    &auth_offset,
+						    stcb,
+						    SCTP_DATA);
+						SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+					}
+					outchain = sctp_copy_mbufchain(chk->data, outchain, &endoutchain, 0,
+					    chk->send_size, chk->copy_by_ref);
+					if (outchain == NULL) {
+						SCTPDBG(SCTP_DEBUG_OUTPUT3, "No memory?\n");
+						if (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
+							sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
+						}
+						*reason_code = 3;
+						SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+						return (ENOMEM);
+					}
+					/* upate our MTU size */
+					/* Do clear IP_DF ? */
+					if (chk->flags & CHUNK_FLAGS_FRAGMENT_OK) {
+						no_fragmentflg = 0;
+					}
+					/* unsigned subtraction of mtu */
+					if (mtu > chk->send_size)
+						mtu -= chk->send_size;
+					else
+						mtu = 0;
+					/* unsigned subtraction of r_mtu */
+					if (r_mtu > chk->send_size)
+						r_mtu -= chk->send_size;
+					else
+						r_mtu = 0;
+
+					to_out += chk->send_size;
+					if ((to_out > mx_mtu) && no_fragmentflg) {
+#ifdef INVARIANTS
+						panic("Exceeding mtu of %d out size is %d", mx_mtu, to_out);
+#else
+						SCTP_PRINTF("Exceeding mtu of %d out size is %d\n",
+						    mx_mtu, to_out);
+#endif
+					}
+					chk->window_probe = 0;
+					data_list[bundle_at++] = chk;
+					if (bundle_at >= SCTP_MAX_DATA_BUNDLING) {
+						mtu = 0;
+						break;
+					}
+					if (chk->sent == SCTP_DATAGRAM_UNSENT) {
+						if ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == 0) {
+							SCTP_STAT_INCR_COUNTER64(sctps_outorderchunks);
+						} else {
+							SCTP_STAT_INCR_COUNTER64(sctps_outunorderchunks);
+						}
+						if (((chk->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG) == SCTP_DATA_LAST_FRAG) &&
+						    ((chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) == 0))
+							/*
+							 * Count number of
+							 * user msg's that
+							 * were fragmented
+							 * we do this by
+							 * counting when we
+							 * see a LAST
+							 * fragment only.
+							 */
+							SCTP_STAT_INCR_COUNTER64(sctps_fragusrmsgs);
+					}
+					if ((mtu == 0) || (r_mtu == 0) || (one_chunk)) {
+						if (one_chunk) {
+							data_list[0]->window_probe = 1;
+							net->window_probe = 1;
+						}
+						break;
+					}
+				} else {
+					/*
+					 * Must be sent in order of the
+					 * TSN's (on a network)
+					 */
+					break;
+				}
+			}	/* for (chunk gather loop for this net) */
+		}		/* if asoc.state OPEN */
+		/* Is there something to send for this destination? */
+		if (outchain) {
+			/* We may need to start a control timer or two */
+			if (asconf) {
+				sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp,
+				    stcb, net);
+				/*
+				 * do NOT clear the asconf flag as it is
+				 * used to do appropriate source address
+				 * selection.
+				 */
+			}
+			if (cookie) {
+				sctp_timer_start(SCTP_TIMER_TYPE_COOKIE, inp, stcb, net);
+				cookie = 0;
+			}
+			/* must start a send timer if data is being sent */
+			if (bundle_at && (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer))) {
+				/*
+				 * no timer running on this destination
+				 * restart it.
+				 */
+				sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
+			} else if (sctp_cmt_on_off && sctp_cmt_pf && pf_hbflag && ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)
+			    && (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer))) {
+				/*
+				 * JRS 5/14/07 - If a HB has been sent to a
+				 * PF destination and no T3 timer is
+				 * currently running, start the T3 timer to
+				 * track the HBs that were sent.
+				 */
+				sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
+			}
+			/* Now send it, if there is anything to send :> */
+			SCTP_BUF_PREPEND(outchain, sizeof(struct sctphdr), M_DONTWAIT);
+			if (outchain == NULL) {
+				/* out of mbufs */
+				SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOBUFS);
+				error = ENOBUFS;
+				goto errored_send;
+			}
+			shdr = mtod(outchain, struct sctphdr *);
+			shdr->src_port = inp->sctp_lport;
+			shdr->dest_port = stcb->rport;
+			shdr->v_tag = htonl(stcb->asoc.peer_vtag);
+			shdr->checksum = 0;
+			auth_offset += sizeof(struct sctphdr);
+			if ((error = sctp_lowlevel_chunk_output(inp,
+			    stcb,
+			    net,
+			    (struct sockaddr *)&net->ro._l_addr,
+			    outchain,
+			    auth_offset,
+			    auth,
+			    no_fragmentflg,
+			    bundle_at,
+			    data_list[0],
+			    asconf, so_locked))) {
+				/* error, we could not output */
+				if (error == ENOBUFS) {
+					SCTP_STAT_INCR(sctps_lowlevelerr);
+					asoc->ifp_had_enobuf = 1;
+				}
+				if (from_where == 0) {
+					SCTP_STAT_INCR(sctps_lowlevelerrusr);
+				}
+		errored_send:
+				SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error);
+				if (hbflag) {
+					if (*now_filled == 0) {
+						(void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
+						*now_filled = 1;
+						*now = net->last_sent_time;
+					} else {
+						net->last_sent_time = *now;
+					}
+					hbflag = 0;
+				}
+				if (error == EHOSTUNREACH) {
+					/*
+					 * Destination went unreachable
+					 * during this send
+					 */
+					sctp_move_to_an_alt(stcb, asoc, net);
+				}
+				*reason_code = 6;
+				/*-
+				 * I add this line to be paranoid. As far as
+				 * I can tell the continue, takes us back to
+				 * the top of the for, but just to make sure
+				 * I will reset these again here.
+				 */
+				ctl_cnt = bundle_at = 0;
+				continue;	/* This takes us back to the
+						 * for() for the nets. */
+			} else {
+				asoc->ifp_had_enobuf = 0;
+			}
+			outchain = endoutchain = NULL;
+			auth = NULL;
+			auth_offset = 0;
+			if (bundle_at || hbflag) {
+				/* For data/asconf and hb set time */
+				if (*now_filled == 0) {
+					(void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
+					*now_filled = 1;
+					*now = net->last_sent_time;
+				} else {
+					net->last_sent_time = *now;
+				}
+			}
+			if (!no_out_cnt) {
+				*num_out += (ctl_cnt + bundle_at);
+			}
+			if (bundle_at) {
+				/* setup for a RTO measurement */
+				tsns_sent = data_list[0]->rec.data.TSN_seq;
+				/* fill time if not already filled */
+				if (*now_filled == 0) {
+					(void)SCTP_GETTIME_TIMEVAL(&asoc->time_last_sent);
+					*now_filled = 1;
+					*now = asoc->time_last_sent;
+				} else {
+					asoc->time_last_sent = *now;
+				}
+				data_list[0]->do_rtt = 1;
+				SCTP_STAT_INCR_BY(sctps_senddata, bundle_at);
+				sctp_clean_up_datalist(stcb, asoc, data_list, bundle_at, net);
+				if (sctp_early_fr) {
+					if (net->flight_size < net->cwnd) {
+						/* start or restart it */
+						if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+							sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, inp, stcb, net,
+							    SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_2);
+						}
+						SCTP_STAT_INCR(sctps_earlyfrstrout);
+						sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, inp, stcb, net);
+					} else {
+						/* stop it if its running */
+						if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+							SCTP_STAT_INCR(sctps_earlyfrstpout);
+							sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, inp, stcb, net,
+							    SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_3);
+						}
+					}
+				}
+			}
+			if (one_chunk) {
+				break;
+			}
+		}
+		if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+			sctp_log_cwnd(stcb, net, tsns_sent, SCTP_CWND_LOG_FROM_SEND);
+		}
+	}
+	if (old_startat == NULL) {
+		old_startat = send_start_at;
+		send_start_at = TAILQ_FIRST(&asoc->nets);
+		if (old_startat)
+			goto again_one_more_time;
+	}
+	/*
+	 * At the end there should be no NON timed chunks hanging on this
+	 * queue.
+	 */
+	if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+		sctp_log_cwnd(stcb, net, *num_out, SCTP_CWND_LOG_FROM_SEND);
+	}
+	if ((*num_out == 0) && (*reason_code == 0)) {
+		*reason_code = 4;
+	} else {
+		*reason_code = 5;
+	}
+	sctp_clean_up_ctl(stcb, asoc);
+	return (0);
+}
+
+void
+sctp_queue_op_err(struct sctp_tcb *stcb, struct mbuf *op_err)
+{
+	/*-
+	 * Prepend a OPERATIONAL_ERROR chunk header and put on the end of
+	 * the control chunk queue.
+	 */
+	struct sctp_chunkhdr *hdr;
+	struct sctp_tmit_chunk *chk;
+	struct mbuf *mat;
+
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		/* no memory */
+		sctp_m_freem(op_err);
+		return;
+	}
+	chk->copy_by_ref = 0;
+	SCTP_BUF_PREPEND(op_err, sizeof(struct sctp_chunkhdr), M_DONTWAIT);
+	if (op_err == NULL) {
+		sctp_free_a_chunk(stcb, chk);
+		return;
+	}
+	chk->send_size = 0;
+	mat = op_err;
+	while (mat != NULL) {
+		chk->send_size += SCTP_BUF_LEN(mat);
+		mat = SCTP_BUF_NEXT(mat);
+	}
+	chk->rec.chunk_id.id = SCTP_OPERATION_ERROR;
+	chk->rec.chunk_id.can_take_data = 1;
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	chk->flags = 0;
+	chk->asoc = &stcb->asoc;
+	chk->data = op_err;
+	chk->whoTo = chk->asoc->primary_destination;
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+	hdr = mtod(op_err, struct sctp_chunkhdr *);
+	hdr->chunk_type = SCTP_OPERATION_ERROR;
+	hdr->chunk_flags = 0;
+	hdr->chunk_length = htons(chk->send_size);
+	TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue,
+	    chk,
+	    sctp_next);
+	chk->asoc->ctrl_queue_cnt++;
+}
+
+int
+sctp_send_cookie_echo(struct mbuf *m,
+    int offset,
+    struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	/*-
+	 * pull out the cookie and put it at the front of the control chunk
+	 * queue.
+	 */
+	int at;
+	struct mbuf *cookie;
+	struct sctp_paramhdr parm, *phdr;
+	struct sctp_chunkhdr *hdr;
+	struct sctp_tmit_chunk *chk;
+	uint16_t ptype, plen;
+
+	/* First find the cookie in the param area */
+	cookie = NULL;
+	at = offset + sizeof(struct sctp_init_chunk);
+
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	do {
+		phdr = sctp_get_next_param(m, at, &parm, sizeof(parm));
+		if (phdr == NULL) {
+			return (-3);
+		}
+		ptype = ntohs(phdr->param_type);
+		plen = ntohs(phdr->param_length);
+		if (ptype == SCTP_STATE_COOKIE) {
+			int pad;
+
+			/* found the cookie */
+			if ((pad = (plen % 4))) {
+				plen += 4 - pad;
+			}
+			cookie = SCTP_M_COPYM(m, at, plen, M_DONTWAIT);
+			if (cookie == NULL) {
+				/* No memory */
+				return (-2);
+			}
+			break;
+		}
+		at += SCTP_SIZE32(plen);
+	} while (phdr);
+	if (cookie == NULL) {
+		/* Did not find the cookie */
+		return (-3);
+	}
+	/* ok, we got the cookie lets change it into a cookie echo chunk */
+
+	/* first the change from param to cookie */
+	hdr = mtod(cookie, struct sctp_chunkhdr *);
+	hdr->chunk_type = SCTP_COOKIE_ECHO;
+	hdr->chunk_flags = 0;
+	/* get the chunk stuff now and place it in the FRONT of the queue */
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		/* no memory */
+		sctp_m_freem(cookie);
+		return (-5);
+	}
+	chk->copy_by_ref = 0;
+	chk->send_size = plen;
+	chk->rec.chunk_id.id = SCTP_COOKIE_ECHO;
+	chk->rec.chunk_id.can_take_data = 0;
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	chk->flags = CHUNK_FLAGS_FRAGMENT_OK;
+	chk->asoc = &stcb->asoc;
+	chk->data = cookie;
+	chk->whoTo = chk->asoc->primary_destination;
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+	TAILQ_INSERT_HEAD(&chk->asoc->control_send_queue, chk, sctp_next);
+	chk->asoc->ctrl_queue_cnt++;
+	return (0);
+}
+
+void
+sctp_send_heartbeat_ack(struct sctp_tcb *stcb,
+    struct mbuf *m,
+    int offset,
+    int chk_length,
+    struct sctp_nets *net)
+{
+	/*
+	 * take a HB request and make it into a HB ack and send it.
+	 */
+	struct mbuf *outchain;
+	struct sctp_chunkhdr *chdr;
+	struct sctp_tmit_chunk *chk;
+
+
+	if (net == NULL)
+		/* must have a net pointer */
+		return;
+
+	outchain = SCTP_M_COPYM(m, offset, chk_length, M_DONTWAIT);
+	if (outchain == NULL) {
+		/* gak out of memory */
+		return;
+	}
+	chdr = mtod(outchain, struct sctp_chunkhdr *);
+	chdr->chunk_type = SCTP_HEARTBEAT_ACK;
+	chdr->chunk_flags = 0;
+	if (chk_length % 4) {
+		/* need pad */
+		uint32_t cpthis = 0;
+		int padlen;
+
+		padlen = 4 - (chk_length % 4);
+		m_copyback(outchain, chk_length, padlen, (caddr_t)&cpthis);
+	}
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		/* no memory */
+		sctp_m_freem(outchain);
+		return;
+	}
+	chk->copy_by_ref = 0;
+	chk->send_size = chk_length;
+	chk->rec.chunk_id.id = SCTP_HEARTBEAT_ACK;
+	chk->rec.chunk_id.can_take_data = 1;
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	chk->flags = 0;
+	chk->asoc = &stcb->asoc;
+	chk->data = outchain;
+	chk->whoTo = net;
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+	TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
+	chk->asoc->ctrl_queue_cnt++;
+}
+
+void
+sctp_send_cookie_ack(struct sctp_tcb *stcb)
+{
+	/* formulate and queue a cookie-ack back to sender */
+	struct mbuf *cookie_ack;
+	struct sctp_chunkhdr *hdr;
+	struct sctp_tmit_chunk *chk;
+
+	cookie_ack = NULL;
+	SCTP_TCB_LOCK_ASSERT(stcb);
+
+	cookie_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_chunkhdr), 0, M_DONTWAIT, 1, MT_HEADER);
+	if (cookie_ack == NULL) {
+		/* no mbuf's */
+		return;
+	}
+	SCTP_BUF_RESV_UF(cookie_ack, SCTP_MIN_OVERHEAD);
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		/* no memory */
+		sctp_m_freem(cookie_ack);
+		return;
+	}
+	chk->copy_by_ref = 0;
+	chk->send_size = sizeof(struct sctp_chunkhdr);
+	chk->rec.chunk_id.id = SCTP_COOKIE_ACK;
+	chk->rec.chunk_id.can_take_data = 1;
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	chk->flags = 0;
+	chk->asoc = &stcb->asoc;
+	chk->data = cookie_ack;
+	if (chk->asoc->last_control_chunk_from != NULL) {
+		chk->whoTo = chk->asoc->last_control_chunk_from;
+	} else {
+		chk->whoTo = chk->asoc->primary_destination;
+	}
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+	hdr = mtod(cookie_ack, struct sctp_chunkhdr *);
+	hdr->chunk_type = SCTP_COOKIE_ACK;
+	hdr->chunk_flags = 0;
+	hdr->chunk_length = htons(chk->send_size);
+	SCTP_BUF_LEN(cookie_ack) = chk->send_size;
+	TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
+	chk->asoc->ctrl_queue_cnt++;
+	return;
+}
+
+
+void
+sctp_send_shutdown_ack(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	/* formulate and queue a SHUTDOWN-ACK back to the sender */
+	struct mbuf *m_shutdown_ack;
+	struct sctp_shutdown_ack_chunk *ack_cp;
+	struct sctp_tmit_chunk *chk;
+
+	m_shutdown_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_ack_chunk), 0, M_DONTWAIT, 1, MT_HEADER);
+	if (m_shutdown_ack == NULL) {
+		/* no mbuf's */
+		return;
+	}
+	SCTP_BUF_RESV_UF(m_shutdown_ack, SCTP_MIN_OVERHEAD);
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		/* no memory */
+		sctp_m_freem(m_shutdown_ack);
+		return;
+	}
+	chk->copy_by_ref = 0;
+	chk->send_size = sizeof(struct sctp_chunkhdr);
+	chk->rec.chunk_id.id = SCTP_SHUTDOWN_ACK;
+	chk->rec.chunk_id.can_take_data = 1;
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	chk->flags = 0;
+	chk->asoc = &stcb->asoc;
+	chk->data = m_shutdown_ack;
+	chk->whoTo = net;
+	atomic_add_int(&net->ref_count, 1);
+
+	ack_cp = mtod(m_shutdown_ack, struct sctp_shutdown_ack_chunk *);
+	ack_cp->ch.chunk_type = SCTP_SHUTDOWN_ACK;
+	ack_cp->ch.chunk_flags = 0;
+	ack_cp->ch.chunk_length = htons(chk->send_size);
+	SCTP_BUF_LEN(m_shutdown_ack) = chk->send_size;
+	TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
+	chk->asoc->ctrl_queue_cnt++;
+	return;
+}
+
+void
+sctp_send_shutdown(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	/* formulate and queue a SHUTDOWN to the sender */
+	struct mbuf *m_shutdown;
+	struct sctp_shutdown_chunk *shutdown_cp;
+	struct sctp_tmit_chunk *chk;
+
+	m_shutdown = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_chunk), 0, M_DONTWAIT, 1, MT_HEADER);
+	if (m_shutdown == NULL) {
+		/* no mbuf's */
+		return;
+	}
+	SCTP_BUF_RESV_UF(m_shutdown, SCTP_MIN_OVERHEAD);
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		/* no memory */
+		sctp_m_freem(m_shutdown);
+		return;
+	}
+	chk->copy_by_ref = 0;
+	chk->send_size = sizeof(struct sctp_shutdown_chunk);
+	chk->rec.chunk_id.id = SCTP_SHUTDOWN;
+	chk->rec.chunk_id.can_take_data = 1;
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	chk->flags = 0;
+	chk->asoc = &stcb->asoc;
+	chk->data = m_shutdown;
+	chk->whoTo = net;
+	atomic_add_int(&net->ref_count, 1);
+
+	shutdown_cp = mtod(m_shutdown, struct sctp_shutdown_chunk *);
+	shutdown_cp->ch.chunk_type = SCTP_SHUTDOWN;
+	shutdown_cp->ch.chunk_flags = 0;
+	shutdown_cp->ch.chunk_length = htons(chk->send_size);
+	shutdown_cp->cumulative_tsn_ack = htonl(stcb->asoc.cumulative_tsn);
+	SCTP_BUF_LEN(m_shutdown) = chk->send_size;
+	TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
+	chk->asoc->ctrl_queue_cnt++;
+	return;
+}
+
+void
+sctp_send_asconf(struct sctp_tcb *stcb, struct sctp_nets *net, int addr_locked)
+{
+	/*
+	 * formulate and queue an ASCONF to the peer. ASCONF parameters
+	 * should be queued on the assoc queue.
+	 */
+	struct sctp_tmit_chunk *chk;
+	struct mbuf *m_asconf;
+	struct sctp_asconf_chunk *acp;
+	int len;
+
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	/* compose an ASCONF chunk, maximum length is PMTU */
+	m_asconf = sctp_compose_asconf(stcb, &len, addr_locked);
+	if (m_asconf == NULL) {
+		return;
+	}
+	acp = mtod(m_asconf, struct sctp_asconf_chunk *);
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		/* no memory */
+		sctp_m_freem(m_asconf);
+		return;
+	}
+	chk->copy_by_ref = 0;
+	chk->data = m_asconf;
+	chk->send_size = len;
+	chk->rec.chunk_id.id = SCTP_ASCONF;
+	chk->rec.chunk_id.can_take_data = 0;
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	chk->flags = 0;
+	chk->asoc = &stcb->asoc;
+	chk->whoTo = chk->asoc->primary_destination;
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+	TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
+	chk->asoc->ctrl_queue_cnt++;
+	return;
+}
+
+void
+sctp_send_asconf_ack(struct sctp_tcb *stcb)
+{
+	/*
+	 * formulate and queue a asconf-ack back to sender. the asconf-ack
+	 * must be stored in the tcb.
+	 */
+	struct sctp_tmit_chunk *chk;
+	struct sctp_asconf_ack *ack, *latest_ack;
+	struct mbuf *m_ack, *m;
+	struct sctp_nets *net = NULL;
+
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	/* Get the latest ASCONF-ACK */
+	latest_ack = TAILQ_LAST(&stcb->asoc.asconf_ack_sent, sctp_asconf_ackhead);
+	if (latest_ack == NULL) {
+		return;
+	}
+	if (latest_ack->last_sent_to != NULL &&
+	    latest_ack->last_sent_to == stcb->asoc.last_control_chunk_from) {
+		/* we're doing a retransmission */
+		net = sctp_find_alternate_net(stcb, stcb->asoc.last_control_chunk_from, 0);
+		if (net == NULL) {
+			/* no alternate */
+			if (stcb->asoc.last_control_chunk_from == NULL)
+				net = stcb->asoc.primary_destination;
+			else
+				net = stcb->asoc.last_control_chunk_from;
+		}
+	} else {
+		/* normal case */
+		if (stcb->asoc.last_control_chunk_from == NULL)
+			net = stcb->asoc.primary_destination;
+		else
+			net = stcb->asoc.last_control_chunk_from;
+	}
+	latest_ack->last_sent_to = net;
+
+	TAILQ_FOREACH(ack, &stcb->asoc.asconf_ack_sent, next) {
+		if (ack->data == NULL) {
+			continue;
+		}
+		/* copy the asconf_ack */
+		m_ack = SCTP_M_COPYM(ack->data, 0, M_COPYALL, M_DONTWAIT);
+		if (m_ack == NULL) {
+			/* couldn't copy it */
+			return;
+		}
+		sctp_alloc_a_chunk(stcb, chk);
+		if (chk == NULL) {
+			/* no memory */
+			if (m_ack)
+				sctp_m_freem(m_ack);
+			return;
+		}
+		chk->copy_by_ref = 0;
+
+		chk->whoTo = net;
+		chk->data = m_ack;
+		chk->send_size = 0;
+		/* Get size */
+		m = m_ack;
+		chk->send_size = ack->len;
+		chk->rec.chunk_id.id = SCTP_ASCONF_ACK;
+		chk->rec.chunk_id.can_take_data = 1;
+		chk->sent = SCTP_DATAGRAM_UNSENT;
+		chk->snd_count = 0;
+		chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;	/* XXX */
+		chk->asoc = &stcb->asoc;
+		atomic_add_int(&chk->whoTo->ref_count, 1);
+
+		TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
+		chk->asoc->ctrl_queue_cnt++;
+	}
+	return;
+}
+
+
+static int
+sctp_chunk_retransmission(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    int *cnt_out, struct timeval *now, int *now_filled, int *fr_done, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	/*-
+	 * send out one MTU of retransmission. If fast_retransmit is
+	 * happening we ignore the cwnd. Otherwise we obey the cwnd and
+	 * rwnd. For a Cookie or Asconf in the control chunk queue we
+	 * retransmit them by themselves.
+	 *
+	 * For data chunks we will pick out the lowest TSN's in the sent_queue
+	 * marked for resend and bundle them all together (up to a MTU of
+	 * destination). The address to send to should have been
+	 * selected/changed where the retransmission was marked (i.e. in FR
+	 * or t3-timeout routines).
+	 */
+	struct sctp_tmit_chunk *data_list[SCTP_MAX_DATA_BUNDLING];
+	struct sctp_tmit_chunk *chk, *fwd;
+	struct mbuf *m, *endofchain;
+	struct sctphdr *shdr;
+	int asconf;
+	struct sctp_nets *net = NULL;
+	uint32_t tsns_sent = 0;
+	int no_fragmentflg, bundle_at, cnt_thru;
+	unsigned int mtu;
+	int error, i, one_chunk, fwd_tsn, ctl_cnt, tmr_started;
+	struct sctp_auth_chunk *auth = NULL;
+	uint32_t auth_offset = 0;
+	uint32_t dmtu = 0;
+
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	tmr_started = ctl_cnt = bundle_at = error = 0;
+	no_fragmentflg = 1;
+	asconf = 0;
+	fwd_tsn = 0;
+	*cnt_out = 0;
+	fwd = NULL;
+	endofchain = m = NULL;
+#ifdef SCTP_AUDITING_ENABLED
+	sctp_audit_log(0xC3, 1);
+#endif
+	if ((TAILQ_EMPTY(&asoc->sent_queue)) &&
+	    (TAILQ_EMPTY(&asoc->control_send_queue))) {
+		SCTPDBG(SCTP_DEBUG_OUTPUT1, "SCTP hits empty queue with cnt set to %d?\n",
+		    asoc->sent_queue_retran_cnt);
+		asoc->sent_queue_cnt = 0;
+		asoc->sent_queue_cnt_removeable = 0;
+		/* send back 0/0 so we enter normal transmission */
+		*cnt_out = 0;
+		return (0);
+	}
+	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
+		if ((chk->rec.chunk_id.id == SCTP_COOKIE_ECHO) ||
+		    (chk->rec.chunk_id.id == SCTP_ASCONF) ||
+		    (chk->rec.chunk_id.id == SCTP_STREAM_RESET) ||
+		    (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN)) {
+			if (chk->rec.chunk_id.id == SCTP_STREAM_RESET) {
+				if (chk != asoc->str_reset) {
+					/*
+					 * not eligible for retran if its
+					 * not ours
+					 */
+					continue;
+				}
+			}
+			ctl_cnt++;
+			if (chk->rec.chunk_id.id == SCTP_ASCONF) {
+				no_fragmentflg = 1;
+				asconf = 1;
+			}
+			if (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN) {
+				fwd_tsn = 1;
+				fwd = chk;
+			}
+			/*
+			 * Add an AUTH chunk, if chunk requires it save the
+			 * offset into the chain for AUTH
+			 */
+			if ((auth == NULL) &&
+			    (sctp_auth_is_required_chunk(chk->rec.chunk_id.id,
+			    stcb->asoc.peer_auth_chunks))) {
+				m = sctp_add_auth_chunk(m, &endofchain,
+				    &auth, &auth_offset,
+				    stcb,
+				    chk->rec.chunk_id.id);
+			}
+			m = sctp_copy_mbufchain(chk->data, m, &endofchain, 0, chk->send_size, chk->copy_by_ref);
+			break;
+		}
+	}
+	one_chunk = 0;
+	cnt_thru = 0;
+	/* do we have control chunks to retransmit? */
+	if (m != NULL) {
+		/* Start a timer no matter if we suceed or fail */
+		if (chk->rec.chunk_id.id == SCTP_COOKIE_ECHO) {
+			sctp_timer_start(SCTP_TIMER_TYPE_COOKIE, inp, stcb, chk->whoTo);
+		} else if (chk->rec.chunk_id.id == SCTP_ASCONF)
+			sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp, stcb, chk->whoTo);
+
+		SCTP_BUF_PREPEND(m, sizeof(struct sctphdr), M_DONTWAIT);
+		if (m == NULL) {
+			SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOBUFS);
+			return (ENOBUFS);
+		}
+		shdr = mtod(m, struct sctphdr *);
+		shdr->src_port = inp->sctp_lport;
+		shdr->dest_port = stcb->rport;
+		shdr->v_tag = htonl(stcb->asoc.peer_vtag);
+		shdr->checksum = 0;
+		auth_offset += sizeof(struct sctphdr);
+		chk->snd_count++;	/* update our count */
+
+		if ((error = sctp_lowlevel_chunk_output(inp, stcb, chk->whoTo,
+		    (struct sockaddr *)&chk->whoTo->ro._l_addr, m, auth_offset,
+		    auth, no_fragmentflg, 0, NULL, asconf, so_locked))) {
+			SCTP_STAT_INCR(sctps_lowlevelerr);
+			return (error);
+		}
+		m = endofchain = NULL;
+		auth = NULL;
+		auth_offset = 0;
+		/*
+		 * We don't want to mark the net->sent time here since this
+		 * we use this for HB and retrans cannot measure RTT
+		 */
+		/* (void)SCTP_GETTIME_TIMEVAL(&chk->whoTo->last_sent_time); */
+		*cnt_out += 1;
+		chk->sent = SCTP_DATAGRAM_SENT;
+		sctp_ucount_decr(asoc->sent_queue_retran_cnt);
+		if (fwd_tsn == 0) {
+			return (0);
+		} else {
+			/* Clean up the fwd-tsn list */
+			sctp_clean_up_ctl(stcb, asoc);
+			return (0);
+		}
+	}
+	/*
+	 * Ok, it is just data retransmission we need to do or that and a
+	 * fwd-tsn with it all.
+	 */
+	if (TAILQ_EMPTY(&asoc->sent_queue)) {
+		return (SCTP_RETRAN_DONE);
+	}
+	if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED) ||
+	    (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT)) {
+		/* not yet open, resend the cookie and that is it */
+		return (1);
+	}
+#ifdef SCTP_AUDITING_ENABLED
+	sctp_auditing(20, inp, stcb, NULL);
+#endif
+	TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
+		if (chk->sent != SCTP_DATAGRAM_RESEND) {
+			/* No, not sent to this net or not ready for rtx */
+			continue;
+		}
+		if ((sctp_max_retran_chunk) && (chk->snd_count >= sctp_max_retran_chunk)) {
+			/* Gak, we have exceeded max unlucky retran, abort! */
+			SCTP_PRINTF("Gak, chk->snd_count:%d >= max:%d - send abort\n",
+			    chk->snd_count,
+			    sctp_max_retran_chunk);
+			atomic_add_int(&stcb->asoc.refcnt, 1);
+			sctp_abort_an_association(stcb->sctp_ep, stcb, 0, NULL, so_locked);
+			SCTP_TCB_LOCK(stcb);
+			atomic_subtract_int(&stcb->asoc.refcnt, 1);
+			return (SCTP_RETRAN_EXIT);
+		}
+		/* pick up the net */
+		net = chk->whoTo;
+		if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+			mtu = (net->mtu - SCTP_MIN_OVERHEAD);
+		} else {
+			mtu = net->mtu - SCTP_MIN_V4_OVERHEAD;
+		}
+
+		if ((asoc->peers_rwnd < mtu) && (asoc->total_flight > 0)) {
+			/* No room in peers rwnd */
+			uint32_t tsn;
+
+			tsn = asoc->last_acked_seq + 1;
+			if (tsn == chk->rec.data.TSN_seq) {
+				/*
+				 * we make a special exception for this
+				 * case. The peer has no rwnd but is missing
+				 * the lowest chunk.. which is probably what
+				 * is holding up the rwnd.
+				 */
+				goto one_chunk_around;
+			}
+			return (1);
+		}
+one_chunk_around:
+		if (asoc->peers_rwnd < mtu) {
+			one_chunk = 1;
+			if ((asoc->peers_rwnd == 0) &&
+			    (asoc->total_flight == 0)) {
+				chk->window_probe = 1;
+				chk->whoTo->window_probe = 1;
+			}
+		}
+#ifdef SCTP_AUDITING_ENABLED
+		sctp_audit_log(0xC3, 2);
+#endif
+		bundle_at = 0;
+		m = NULL;
+		net->fast_retran_ip = 0;
+		if (chk->rec.data.doing_fast_retransmit == 0) {
+			/*
+			 * if no FR in progress skip destination that have
+			 * flight_size > cwnd.
+			 */
+			if (net->flight_size >= net->cwnd) {
+				continue;
+			}
+		} else {
+			/*
+			 * Mark the destination net to have FR recovery
+			 * limits put on it.
+			 */
+			*fr_done = 1;
+			net->fast_retran_ip = 1;
+		}
+
+		/*
+		 * if no AUTH is yet included and this chunk requires it,
+		 * make sure to account for it.  We don't apply the size
+		 * until the AUTH chunk is actually added below in case
+		 * there is no room for this chunk.
+		 */
+		if ((auth == NULL) &&
+		    sctp_auth_is_required_chunk(SCTP_DATA,
+		    stcb->asoc.peer_auth_chunks)) {
+			dmtu = sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
+		} else
+			dmtu = 0;
+
+		if ((chk->send_size <= (mtu - dmtu)) ||
+		    (chk->flags & CHUNK_FLAGS_FRAGMENT_OK)) {
+			/* ok we will add this one */
+			if ((auth == NULL) &&
+			    (sctp_auth_is_required_chunk(SCTP_DATA,
+			    stcb->asoc.peer_auth_chunks))) {
+				m = sctp_add_auth_chunk(m, &endofchain,
+				    &auth, &auth_offset,
+				    stcb, SCTP_DATA);
+			}
+			m = sctp_copy_mbufchain(chk->data, m, &endofchain, 0, chk->send_size, chk->copy_by_ref);
+			if (m == NULL) {
+				SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+				return (ENOMEM);
+			}
+			/* Do clear IP_DF ? */
+			if (chk->flags & CHUNK_FLAGS_FRAGMENT_OK) {
+				no_fragmentflg = 0;
+			}
+			/* upate our MTU size */
+			if (mtu > (chk->send_size + dmtu))
+				mtu -= (chk->send_size + dmtu);
+			else
+				mtu = 0;
+			data_list[bundle_at++] = chk;
+			if (one_chunk && (asoc->total_flight <= 0)) {
+				SCTP_STAT_INCR(sctps_windowprobed);
+			}
+		}
+		if (one_chunk == 0) {
+			/*
+			 * now are there anymore forward from chk to pick
+			 * up?
+			 */
+			fwd = TAILQ_NEXT(chk, sctp_next);
+			while (fwd) {
+				if (fwd->sent != SCTP_DATAGRAM_RESEND) {
+					/* Nope, not for retran */
+					fwd = TAILQ_NEXT(fwd, sctp_next);
+					continue;
+				}
+				if (fwd->whoTo != net) {
+					/* Nope, not the net in question */
+					fwd = TAILQ_NEXT(fwd, sctp_next);
+					continue;
+				}
+				if ((auth == NULL) &&
+				    sctp_auth_is_required_chunk(SCTP_DATA,
+				    stcb->asoc.peer_auth_chunks)) {
+					dmtu = sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
+				} else
+					dmtu = 0;
+				if (fwd->send_size <= (mtu - dmtu)) {
+					if ((auth == NULL) &&
+					    (sctp_auth_is_required_chunk(SCTP_DATA,
+					    stcb->asoc.peer_auth_chunks))) {
+						m = sctp_add_auth_chunk(m,
+						    &endofchain,
+						    &auth, &auth_offset,
+						    stcb,
+						    SCTP_DATA);
+					}
+					m = sctp_copy_mbufchain(fwd->data, m, &endofchain, 0, fwd->send_size, fwd->copy_by_ref);
+					if (m == NULL) {
+						SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+						return (ENOMEM);
+					}
+					/* Do clear IP_DF ? */
+					if (fwd->flags & CHUNK_FLAGS_FRAGMENT_OK) {
+						no_fragmentflg = 0;
+					}
+					/* upate our MTU size */
+					if (mtu > (fwd->send_size + dmtu))
+						mtu -= (fwd->send_size + dmtu);
+					else
+						mtu = 0;
+					data_list[bundle_at++] = fwd;
+					if (bundle_at >= SCTP_MAX_DATA_BUNDLING) {
+						break;
+					}
+					fwd = TAILQ_NEXT(fwd, sctp_next);
+				} else {
+					/* can't fit so we are done */
+					break;
+				}
+			}
+		}
+		/* Is there something to send for this destination? */
+		if (m) {
+			/*
+			 * No matter if we fail/or suceed we should start a
+			 * timer. A failure is like a lost IP packet :-)
+			 */
+			if (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
+				/*
+				 * no timer running on this destination
+				 * restart it.
+				 */
+				sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
+				tmr_started = 1;
+			}
+			SCTP_BUF_PREPEND(m, sizeof(struct sctphdr), M_DONTWAIT);
+			if (m == NULL) {
+				SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOBUFS);
+				return (ENOBUFS);
+			}
+			shdr = mtod(m, struct sctphdr *);
+			shdr->src_port = inp->sctp_lport;
+			shdr->dest_port = stcb->rport;
+			shdr->v_tag = htonl(stcb->asoc.peer_vtag);
+			shdr->checksum = 0;
+			auth_offset += sizeof(struct sctphdr);
+			/* Now lets send it, if there is anything to send :> */
+			if ((error = sctp_lowlevel_chunk_output(inp, stcb, net,
+			    (struct sockaddr *)&net->ro._l_addr, m, auth_offset,
+			    auth, no_fragmentflg, 0, NULL, asconf, so_locked))) {
+				/* error, we could not output */
+				SCTP_STAT_INCR(sctps_lowlevelerr);
+				return (error);
+			}
+			m = endofchain = NULL;
+			auth = NULL;
+			auth_offset = 0;
+			/* For HB's */
+			/*
+			 * We don't want to mark the net->sent time here
+			 * since this we use this for HB and retrans cannot
+			 * measure RTT
+			 */
+			/* (void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time); */
+
+			/* For auto-close */
+			cnt_thru++;
+			if (*now_filled == 0) {
+				(void)SCTP_GETTIME_TIMEVAL(&asoc->time_last_sent);
+				*now = asoc->time_last_sent;
+				*now_filled = 1;
+			} else {
+				asoc->time_last_sent = *now;
+			}
+			*cnt_out += bundle_at;
+#ifdef SCTP_AUDITING_ENABLED
+			sctp_audit_log(0xC4, bundle_at);
+#endif
+			if (bundle_at) {
+				tsns_sent = data_list[0]->rec.data.TSN_seq;
+			}
+			for (i = 0; i < bundle_at; i++) {
+				SCTP_STAT_INCR(sctps_sendretransdata);
+				data_list[i]->sent = SCTP_DATAGRAM_SENT;
+				/*
+				 * When we have a revoked data, and we
+				 * retransmit it, then we clear the revoked
+				 * flag since this flag dictates if we
+				 * subtracted from the fs
+				 */
+				if (data_list[i]->rec.data.chunk_was_revoked) {
+					/* Deflate the cwnd */
+					data_list[i]->whoTo->cwnd -= data_list[i]->book_size;
+					data_list[i]->rec.data.chunk_was_revoked = 0;
+				}
+				data_list[i]->snd_count++;
+				sctp_ucount_decr(asoc->sent_queue_retran_cnt);
+				/* record the time */
+				data_list[i]->sent_rcv_time = asoc->time_last_sent;
+				if (data_list[i]->book_size_scale) {
+					/*
+					 * need to double the book size on
+					 * this one
+					 */
+					data_list[i]->book_size_scale = 0;
+					/*
+					 * Since we double the booksize, we
+					 * must also double the output queue
+					 * size, since this get shrunk when
+					 * we free by this amount.
+					 */
+					atomic_add_int(&((asoc)->total_output_queue_size), data_list[i]->book_size);
+					data_list[i]->book_size *= 2;
+
+
+				} else {
+					if (sctp_logging_level & SCTP_LOG_RWND_ENABLE) {
+						sctp_log_rwnd(SCTP_DECREASE_PEER_RWND,
+						    asoc->peers_rwnd, data_list[i]->send_size, sctp_peer_chunk_oh);
+					}
+					asoc->peers_rwnd = sctp_sbspace_sub(asoc->peers_rwnd,
+					    (uint32_t) (data_list[i]->send_size +
+					    sctp_peer_chunk_oh));
+				}
+				if (sctp_logging_level & SCTP_FLIGHT_LOGGING_ENABLE) {
+					sctp_misc_ints(SCTP_FLIGHT_LOG_UP_RSND,
+					    data_list[i]->whoTo->flight_size,
+					    data_list[i]->book_size,
+					    (uintptr_t) data_list[i]->whoTo,
+					    data_list[i]->rec.data.TSN_seq);
+				}
+				sctp_flight_size_increase(data_list[i]);
+				sctp_total_flight_increase(stcb, data_list[i]);
+				if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
+					/* SWS sender side engages */
+					asoc->peers_rwnd = 0;
+				}
+				if ((i == 0) &&
+				    (data_list[i]->rec.data.doing_fast_retransmit)) {
+					SCTP_STAT_INCR(sctps_sendfastretrans);
+					if ((data_list[i] == TAILQ_FIRST(&asoc->sent_queue)) &&
+					    (tmr_started == 0)) {
+						/*-
+						 * ok we just fast-retrans'd
+						 * the lowest TSN, i.e the
+						 * first on the list. In
+						 * this case we want to give
+						 * some more time to get a
+						 * SACK back without a
+						 * t3-expiring.
+						 */
+						sctp_timer_stop(SCTP_TIMER_TYPE_SEND, inp, stcb, net,
+						    SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_4);
+						sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
+					}
+				}
+			}
+			if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+				sctp_log_cwnd(stcb, net, tsns_sent, SCTP_CWND_LOG_FROM_RESEND);
+			}
+#ifdef SCTP_AUDITING_ENABLED
+			sctp_auditing(21, inp, stcb, NULL);
+#endif
+		} else {
+			/* None will fit */
+			return (1);
+		}
+		if (asoc->sent_queue_retran_cnt <= 0) {
+			/* all done we have no more to retran */
+			asoc->sent_queue_retran_cnt = 0;
+			break;
+		}
+		if (one_chunk) {
+			/* No more room in rwnd */
+			return (1);
+		}
+		/* stop the for loop here. we sent out a packet */
+		break;
+	}
+	return (0);
+}
+
+
+static int
+sctp_timer_validation(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    int ret)
+{
+	struct sctp_nets *net;
+
+	/* Validate that a timer is running somewhere */
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
+			/* Here is a timer */
+			return (ret);
+		}
+	}
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	/* Gak, we did not have a timer somewhere */
+	SCTPDBG(SCTP_DEBUG_OUTPUT3, "Deadlock avoided starting timer on a dest at retran\n");
+	sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, asoc->primary_destination);
+	return (ret);
+}
+
+void
+sctp_chunk_output(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    int from_where,
+    int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	/*-
+	 * Ok this is the generic chunk service queue. we must do the
+	 * following:
+	 * - See if there are retransmits pending, if so we must
+	 *   do these first.
+	 * - Service the stream queue that is next, moving any
+	 *   message (note I must get a complete message i.e.
+	 *   FIRST/MIDDLE and LAST to the out queue in one pass) and assigning
+	 *   TSN's
+	 * - Check to see if the cwnd/rwnd allows any output, if so we
+	 *   go ahead and fomulate and send the low level chunks. Making sure
+	 *   to combine any control in the control chunk queue also.
+	 */
+	struct sctp_association *asoc;
+	struct sctp_nets *net;
+	int error = 0, num_out = 0, tot_out = 0, ret = 0, reason_code = 0,
+	    burst_cnt = 0, burst_limit = 0;
+	struct timeval now;
+	int now_filled = 0;
+	int cwnd_full = 0;
+	int nagle_on = 0;
+	int frag_point = sctp_get_frag_point(stcb, &stcb->asoc);
+	int un_sent = 0;
+	int fr_done, tot_frs = 0;
+
+	asoc = &stcb->asoc;
+	if (from_where == SCTP_OUTPUT_FROM_USR_SEND) {
+		if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NODELAY)) {
+			nagle_on = 0;
+		} else {
+			nagle_on = 1;
+		}
+	}
+	SCTP_TCB_LOCK_ASSERT(stcb);
+
+	un_sent = (stcb->asoc.total_output_queue_size - stcb->asoc.total_flight);
+
+	if ((un_sent <= 0) &&
+	    (TAILQ_EMPTY(&asoc->control_send_queue)) &&
+	    (asoc->sent_queue_retran_cnt == 0)) {
+		/* Nothing to do unless there is something to be sent left */
+		return;
+	}
+	/*
+	 * Do we have something to send, data or control AND a sack timer
+	 * running, if so piggy-back the sack.
+	 */
+	if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
+		sctp_send_sack(stcb);
+		(void)SCTP_OS_TIMER_STOP(&stcb->asoc.dack_timer.timer);
+	}
+	while (asoc->sent_queue_retran_cnt) {
+		/*-
+		 * Ok, it is retransmission time only, we send out only ONE
+		 * packet with a single call off to the retran code.
+		 */
+		if (from_where == SCTP_OUTPUT_FROM_COOKIE_ACK) {
+			/*-
+			 * Special hook for handling cookiess discarded
+			 * by peer that carried data. Send cookie-ack only
+			 * and then the next call with get the retran's.
+			 */
+			(void)sctp_med_chunk_output(inp, stcb, asoc, &num_out, &reason_code, 1,
+			    &cwnd_full, from_where,
+			    &now, &now_filled, frag_point, so_locked);
+			return;
+		} else if (from_where != SCTP_OUTPUT_FROM_HB_TMR) {
+			/* if its not from a HB then do it */
+			fr_done = 0;
+			ret = sctp_chunk_retransmission(inp, stcb, asoc, &num_out, &now, &now_filled, &fr_done, so_locked);
+			if (fr_done) {
+				tot_frs++;
+			}
+		} else {
+			/*
+			 * its from any other place, we don't allow retran
+			 * output (only control)
+			 */
+			ret = 1;
+		}
+		if (ret > 0) {
+			/* Can't send anymore */
+			/*-
+			 * now lets push out control by calling med-level
+			 * output once. this assures that we WILL send HB's
+			 * if queued too.
+			 */
+			(void)sctp_med_chunk_output(inp, stcb, asoc, &num_out, &reason_code, 1,
+			    &cwnd_full, from_where,
+			    &now, &now_filled, frag_point, so_locked);
+#ifdef SCTP_AUDITING_ENABLED
+			sctp_auditing(8, inp, stcb, NULL);
+#endif
+			(void)sctp_timer_validation(inp, stcb, asoc, ret);
+			return;
+		}
+		if (ret < 0) {
+			/*-
+			 * The count was off.. retran is not happening so do
+			 * the normal retransmission.
+			 */
+#ifdef SCTP_AUDITING_ENABLED
+			sctp_auditing(9, inp, stcb, NULL);
+#endif
+			if (ret == SCTP_RETRAN_EXIT) {
+				return;
+			}
+			break;
+		}
+		if (from_where == SCTP_OUTPUT_FROM_T3) {
+			/* Only one transmission allowed out of a timeout */
+#ifdef SCTP_AUDITING_ENABLED
+			sctp_auditing(10, inp, stcb, NULL);
+#endif
+			/* Push out any control */
+			(void)sctp_med_chunk_output(inp, stcb, asoc, &num_out, &reason_code, 1, &cwnd_full, from_where,
+			    &now, &now_filled, frag_point, so_locked);
+			return;
+		}
+		if (tot_frs > asoc->max_burst) {
+			/* Hit FR burst limit */
+			return;
+		}
+		if ((num_out == 0) && (ret == 0)) {
+
+			/* No more retrans to send */
+			break;
+		}
+	}
+#ifdef SCTP_AUDITING_ENABLED
+	sctp_auditing(12, inp, stcb, NULL);
+#endif
+	/* Check for bad destinations, if they exist move chunks around. */
+	burst_limit = asoc->max_burst;
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) ==
+		    SCTP_ADDR_NOT_REACHABLE) {
+			/*-
+			 * if possible move things off of this address we
+			 * still may send below due to the dormant state but
+			 * we try to find an alternate address to send to
+			 * and if we have one we move all queued data on the
+			 * out wheel to this alternate address.
+			 */
+			if (net->ref_count > 1)
+				sctp_move_to_an_alt(stcb, asoc, net);
+		} else if (sctp_cmt_on_off && sctp_cmt_pf && ((net->dest_state & SCTP_ADDR_PF) ==
+		    SCTP_ADDR_PF)) {
+			/*
+			 * JRS 5/14/07 - If CMT PF is on and the current
+			 * destination is in PF state, move all queued data
+			 * to an alternate desination.
+			 */
+			if (net->ref_count > 1)
+				sctp_move_to_an_alt(stcb, asoc, net);
+		} else {
+			/*-
+			 * if ((asoc->sat_network) || (net->addr_is_local))
+			 * { burst_limit = asoc->max_burst *
+			 * SCTP_SAT_NETWORK_BURST_INCR; }
+			 */
+			if (sctp_use_cwnd_based_maxburst) {
+				if ((net->flight_size + (burst_limit * net->mtu)) < net->cwnd) {
+					/*
+					 * JRS - Use the congestion control
+					 * given in the congestion control
+					 * module
+					 */
+					asoc->cc_functions.sctp_cwnd_update_after_output(stcb, net, burst_limit);
+					if (sctp_logging_level & SCTP_LOG_MAXBURST_ENABLE) {
+						sctp_log_maxburst(stcb, net, 0, burst_limit, SCTP_MAX_BURST_APPLIED);
+					}
+					SCTP_STAT_INCR(sctps_maxburstqueued);
+				}
+				net->fast_retran_ip = 0;
+			} else {
+				if (net->flight_size == 0) {
+					/* Should be decaying the cwnd here */
+					;
+				}
+			}
+		}
+
+	}
+	burst_cnt = 0;
+	cwnd_full = 0;
+	do {
+		error = sctp_med_chunk_output(inp, stcb, asoc, &num_out,
+		    &reason_code, 0, &cwnd_full, from_where,
+		    &now, &now_filled, frag_point, so_locked);
+		if (error) {
+			SCTPDBG(SCTP_DEBUG_OUTPUT1, "Error %d was returned from med-c-op\n", error);
+			if (sctp_logging_level & SCTP_LOG_MAXBURST_ENABLE) {
+				sctp_log_maxburst(stcb, asoc->primary_destination, error, burst_cnt, SCTP_MAX_BURST_ERROR_STOP);
+			}
+			if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+				sctp_log_cwnd(stcb, NULL, error, SCTP_SEND_NOW_COMPLETES);
+				sctp_log_cwnd(stcb, NULL, 0xdeadbeef, SCTP_SEND_NOW_COMPLETES);
+			}
+			break;
+		}
+		SCTPDBG(SCTP_DEBUG_OUTPUT3, "m-c-o put out %d\n", num_out);
+
+		tot_out += num_out;
+		burst_cnt++;
+		if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+			sctp_log_cwnd(stcb, NULL, num_out, SCTP_SEND_NOW_COMPLETES);
+			if (num_out == 0) {
+				sctp_log_cwnd(stcb, NULL, reason_code, SCTP_SEND_NOW_COMPLETES);
+			}
+		}
+		if (nagle_on) {
+			/*-
+			 * When nagle is on, we look at how much is un_sent, then
+			 * if its smaller than an MTU and we have data in
+			 * flight we stop.
+			 */
+			un_sent = ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) +
+			    ((stcb->asoc.chunks_on_out_queue - stcb->asoc.total_flight_count)
+			    * sizeof(struct sctp_data_chunk)));
+			if ((un_sent < (int)(stcb->asoc.smallest_mtu - SCTP_MIN_OVERHEAD)) &&
+			    (stcb->asoc.total_flight > 0)) {
+				break;
+			}
+		}
+		if (TAILQ_EMPTY(&asoc->control_send_queue) &&
+		    TAILQ_EMPTY(&asoc->send_queue) &&
+		    TAILQ_EMPTY(&asoc->out_wheel)) {
+			/* Nothing left to send */
+			break;
+		}
+		if ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) <= 0) {
+			/* Nothing left to send */
+			break;
+		}
+	} while (num_out && (sctp_use_cwnd_based_maxburst ||
+	    (burst_cnt < burst_limit)));
+
+	if (sctp_use_cwnd_based_maxburst == 0) {
+		if (burst_cnt >= burst_limit) {
+			SCTP_STAT_INCR(sctps_maxburstqueued);
+			asoc->burst_limit_applied = 1;
+			if (sctp_logging_level & SCTP_LOG_MAXBURST_ENABLE) {
+				sctp_log_maxburst(stcb, asoc->primary_destination, 0, burst_cnt, SCTP_MAX_BURST_APPLIED);
+			}
+		} else {
+			asoc->burst_limit_applied = 0;
+		}
+	}
+	if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+		sctp_log_cwnd(stcb, NULL, tot_out, SCTP_SEND_NOW_COMPLETES);
+	}
+	SCTPDBG(SCTP_DEBUG_OUTPUT1, "Ok, we have put out %d chunks\n",
+	    tot_out);
+
+	/*-
+	 * Now we need to clean up the control chunk chain if a ECNE is on
+	 * it. It must be marked as UNSENT again so next call will continue
+	 * to send it until such time that we get a CWR, to remove it.
+	 */
+	if (stcb->asoc.ecn_echo_cnt_onq)
+		sctp_fix_ecn_echo(asoc);
+	return;
+}
+
+
+int
+sctp_output(inp, m, addr, control, p, flags)
+	struct sctp_inpcb *inp;
+	struct mbuf *m;
+	struct sockaddr *addr;
+	struct mbuf *control;
+	struct thread *p;
+	int flags;
+{
+	if (inp == NULL) {
+		SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+		return (EINVAL);
+	}
+	if (inp->sctp_socket == NULL) {
+		SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+		return (EINVAL);
+	}
+	return (sctp_sosend(inp->sctp_socket,
+	    addr,
+	    (struct uio *)NULL,
+	    m,
+	    control,
+	    flags, p
+	    ));
+}
+
+void
+send_forward_tsn(struct sctp_tcb *stcb,
+    struct sctp_association *asoc)
+{
+	struct sctp_tmit_chunk *chk;
+	struct sctp_forward_tsn_chunk *fwdtsn;
+
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
+		if (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN) {
+			/* mark it to unsent */
+			chk->sent = SCTP_DATAGRAM_UNSENT;
+			chk->snd_count = 0;
+			/* Do we correct its output location? */
+			if (chk->whoTo != asoc->primary_destination) {
+				sctp_free_remote_addr(chk->whoTo);
+				chk->whoTo = asoc->primary_destination;
+				atomic_add_int(&chk->whoTo->ref_count, 1);
+			}
+			goto sctp_fill_in_rest;
+		}
+	}
+	/* Ok if we reach here we must build one */
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		return;
+	}
+	chk->copy_by_ref = 0;
+	chk->rec.chunk_id.id = SCTP_FORWARD_CUM_TSN;
+	chk->rec.chunk_id.can_take_data = 0;
+	chk->asoc = asoc;
+	chk->whoTo = NULL;
+
+	chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+	if (chk->data == NULL) {
+		sctp_free_a_chunk(stcb, chk);
+		return;
+	}
+	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	chk->whoTo = asoc->primary_destination;
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+	TAILQ_INSERT_TAIL(&asoc->control_send_queue, chk, sctp_next);
+	asoc->ctrl_queue_cnt++;
+sctp_fill_in_rest:
+	/*-
+	 * Here we go through and fill out the part that deals with
+	 * stream/seq of the ones we skip.
+	 */
+	SCTP_BUF_LEN(chk->data) = 0;
+	{
+		struct sctp_tmit_chunk *at, *tp1, *last;
+		struct sctp_strseq *strseq;
+		unsigned int cnt_of_space, i, ovh;
+		unsigned int space_needed;
+		unsigned int cnt_of_skipped = 0;
+
+		TAILQ_FOREACH(at, &asoc->sent_queue, sctp_next) {
+			if (at->sent != SCTP_FORWARD_TSN_SKIP) {
+				/* no more to look at */
+				break;
+			}
+			if (at->rec.data.rcv_flags & SCTP_DATA_UNORDERED) {
+				/* We don't report these */
+				continue;
+			}
+			cnt_of_skipped++;
+		}
+		space_needed = (sizeof(struct sctp_forward_tsn_chunk) +
+		    (cnt_of_skipped * sizeof(struct sctp_strseq)));
+
+		cnt_of_space = M_TRAILINGSPACE(chk->data);
+
+		if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+			ovh = SCTP_MIN_OVERHEAD;
+		} else {
+			ovh = SCTP_MIN_V4_OVERHEAD;
+		}
+		if (cnt_of_space > (asoc->smallest_mtu - ovh)) {
+			/* trim to a mtu size */
+			cnt_of_space = asoc->smallest_mtu - ovh;
+		}
+		if (cnt_of_space < space_needed) {
+			/*-
+			 * ok we must trim down the chunk by lowering the
+			 * advance peer ack point.
+			 */
+			cnt_of_skipped = (cnt_of_space -
+			    ((sizeof(struct sctp_forward_tsn_chunk)) /
+			    sizeof(struct sctp_strseq)));
+			/*-
+			 * Go through and find the TSN that will be the one
+			 * we report.
+			 */
+			at = TAILQ_FIRST(&asoc->sent_queue);
+			for (i = 0; i < cnt_of_skipped; i++) {
+				tp1 = TAILQ_NEXT(at, sctp_next);
+				at = tp1;
+			}
+			last = at;
+			/*-
+			 * last now points to last one I can report, update
+			 * peer ack point
+			 */
+			asoc->advanced_peer_ack_point = last->rec.data.TSN_seq;
+			space_needed -= (cnt_of_skipped * sizeof(struct sctp_strseq));
+		}
+		chk->send_size = space_needed;
+		/* Setup the chunk */
+		fwdtsn = mtod(chk->data, struct sctp_forward_tsn_chunk *);
+		fwdtsn->ch.chunk_length = htons(chk->send_size);
+		fwdtsn->ch.chunk_flags = 0;
+		fwdtsn->ch.chunk_type = SCTP_FORWARD_CUM_TSN;
+		fwdtsn->new_cumulative_tsn = htonl(asoc->advanced_peer_ack_point);
+		chk->send_size = (sizeof(struct sctp_forward_tsn_chunk) +
+		    (cnt_of_skipped * sizeof(struct sctp_strseq)));
+		SCTP_BUF_LEN(chk->data) = chk->send_size;
+		fwdtsn++;
+		/*-
+		 * Move pointer to after the fwdtsn and transfer to the
+		 * strseq pointer.
+		 */
+		strseq = (struct sctp_strseq *)fwdtsn;
+		/*-
+		 * Now populate the strseq list. This is done blindly
+		 * without pulling out duplicate stream info. This is
+		 * inefficent but won't harm the process since the peer will
+		 * look at these in sequence and will thus release anything.
+		 * It could mean we exceed the PMTU and chop off some that
+		 * we could have included.. but this is unlikely (aka 1432/4
+		 * would mean 300+ stream seq's would have to be reported in
+		 * one FWD-TSN. With a bit of work we can later FIX this to
+		 * optimize and pull out duplcates.. but it does add more
+		 * overhead. So for now... not!
+		 */
+		at = TAILQ_FIRST(&asoc->sent_queue);
+		for (i = 0; i < cnt_of_skipped; i++) {
+			tp1 = TAILQ_NEXT(at, sctp_next);
+			if (at->rec.data.rcv_flags & SCTP_DATA_UNORDERED) {
+				/* We don't report these */
+				i--;
+				at = tp1;
+				continue;
+			}
+			strseq->stream = ntohs(at->rec.data.stream_number);
+			strseq->sequence = ntohs(at->rec.data.stream_seq);
+			strseq++;
+			at = tp1;
+		}
+	}
+	return;
+
+}
+
+void
+sctp_send_sack(struct sctp_tcb *stcb)
+{
+	/*-
+	 * Queue up a SACK in the control queue. We must first check to see
+	 * if a SACK is somehow on the control queue. If so, we will take
+	 * and and remove the old one.
+	 */
+	struct sctp_association *asoc;
+	struct sctp_tmit_chunk *chk, *a_chk;
+	struct sctp_sack_chunk *sack;
+	struct sctp_gap_ack_block *gap_descriptor;
+	struct sack_track *selector;
+	int mergeable = 0;
+	int offset;
+	caddr_t limit;
+	uint32_t *dup;
+	int limit_reached = 0;
+	unsigned int i, jstart, siz, j;
+	unsigned int num_gap_blocks = 0, space;
+	int num_dups = 0;
+	int space_req;
+
+	a_chk = NULL;
+	asoc = &stcb->asoc;
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	if (asoc->last_data_chunk_from == NULL) {
+		/* Hmm we never received anything */
+		return;
+	}
+	sctp_set_rwnd(stcb, asoc);
+	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
+		if (chk->rec.chunk_id.id == SCTP_SELECTIVE_ACK) {
+			/* Hmm, found a sack already on queue, remove it */
+			TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next);
+			asoc->ctrl_queue_cnt++;
+			a_chk = chk;
+			if (a_chk->data) {
+				sctp_m_freem(a_chk->data);
+				a_chk->data = NULL;
+			}
+			sctp_free_remote_addr(a_chk->whoTo);
+			a_chk->whoTo = NULL;
+			break;
+		}
+	}
+	if (a_chk == NULL) {
+		sctp_alloc_a_chunk(stcb, a_chk);
+		if (a_chk == NULL) {
+			/* No memory so we drop the idea, and set a timer */
+			if (stcb->asoc.delayed_ack) {
+				sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
+				    stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_5);
+				sctp_timer_start(SCTP_TIMER_TYPE_RECV,
+				    stcb->sctp_ep, stcb, NULL);
+			} else {
+				stcb->asoc.send_sack = 1;
+			}
+			return;
+		}
+		a_chk->copy_by_ref = 0;
+		/* a_chk->rec.chunk_id.id = SCTP_SELECTIVE_ACK; */
+		a_chk->rec.chunk_id.id = SCTP_SELECTIVE_ACK;
+		a_chk->rec.chunk_id.can_take_data = 1;
+	}
+	/* Clear our pkt counts */
+	asoc->data_pkts_seen = 0;
+
+	a_chk->asoc = asoc;
+	a_chk->snd_count = 0;
+	a_chk->send_size = 0;	/* fill in later */
+	a_chk->sent = SCTP_DATAGRAM_UNSENT;
+	a_chk->whoTo = NULL;
+
+	if ((asoc->numduptsns) ||
+	    (asoc->last_data_chunk_from->dest_state & SCTP_ADDR_NOT_REACHABLE)
+	    ) {
+		/*-
+		 * Ok, we have some duplicates or the destination for the
+		 * sack is unreachable, lets see if we can select an
+		 * alternate than asoc->last_data_chunk_from
+		 */
+		if ((!(asoc->last_data_chunk_from->dest_state &
+		    SCTP_ADDR_NOT_REACHABLE)) &&
+		    (asoc->used_alt_onsack > asoc->numnets)) {
+			/* We used an alt last time, don't this time */
+			a_chk->whoTo = NULL;
+		} else {
+			asoc->used_alt_onsack++;
+			a_chk->whoTo = sctp_find_alternate_net(stcb, asoc->last_data_chunk_from, 0);
+		}
+		if (a_chk->whoTo == NULL) {
+			/* Nope, no alternate */
+			a_chk->whoTo = asoc->last_data_chunk_from;
+			asoc->used_alt_onsack = 0;
+		}
+	} else {
+		/*
+		 * No duplicates so we use the last place we received data
+		 * from.
+		 */
+		asoc->used_alt_onsack = 0;
+		a_chk->whoTo = asoc->last_data_chunk_from;
+	}
+	if (a_chk->whoTo) {
+		atomic_add_int(&a_chk->whoTo->ref_count, 1);
+	}
+	if (asoc->highest_tsn_inside_map == asoc->cumulative_tsn) {
+		/* no gaps */
+		space_req = sizeof(struct sctp_sack_chunk);
+	} else {
+		/* gaps get a cluster */
+		space_req = MCLBYTES;
+	}
+	/* Ok now lets formulate a MBUF with our sack */
+	a_chk->data = sctp_get_mbuf_for_msg(space_req, 0, M_DONTWAIT, 1, MT_DATA);
+	if ((a_chk->data == NULL) ||
+	    (a_chk->whoTo == NULL)) {
+		/* rats, no mbuf memory */
+		if (a_chk->data) {
+			/* was a problem with the destination */
+			sctp_m_freem(a_chk->data);
+			a_chk->data = NULL;
+		}
+		sctp_free_a_chunk(stcb, a_chk);
+		/* sa_ignore NO_NULL_CHK */
+		if (stcb->asoc.delayed_ack) {
+			sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
+			    stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_6);
+			sctp_timer_start(SCTP_TIMER_TYPE_RECV,
+			    stcb->sctp_ep, stcb, NULL);
+		} else {
+			stcb->asoc.send_sack = 1;
+		}
+		return;
+	}
+	/* ok, lets go through and fill it in */
+	SCTP_BUF_RESV_UF(a_chk->data, SCTP_MIN_OVERHEAD);
+	space = M_TRAILINGSPACE(a_chk->data);
+	if (space > (a_chk->whoTo->mtu - SCTP_MIN_OVERHEAD)) {
+		space = (a_chk->whoTo->mtu - SCTP_MIN_OVERHEAD);
+	}
+	limit = mtod(a_chk->data, caddr_t);
+	limit += space;
+
+	sack = mtod(a_chk->data, struct sctp_sack_chunk *);
+	sack->ch.chunk_type = SCTP_SELECTIVE_ACK;
+	/* 0x01 is used by nonce for ecn */
+	if ((sctp_ecn_enable) &&
+	    (sctp_ecn_nonce) &&
+	    (asoc->peer_supports_ecn_nonce))
+		sack->ch.chunk_flags = (asoc->receiver_nonce_sum & SCTP_SACK_NONCE_SUM);
+	else
+		sack->ch.chunk_flags = 0;
+
+	if (sctp_cmt_on_off && sctp_cmt_use_dac) {
+		/*-
+		 * CMT DAC algorithm: If 2 (i.e., 0x10) packets have been
+		 * received, then set high bit to 1, else 0. Reset
+		 * pkts_rcvd.
+		 */
+		sack->ch.chunk_flags |= (asoc->cmt_dac_pkts_rcvd << 6);
+		asoc->cmt_dac_pkts_rcvd = 0;
+	}
+#ifdef SCTP_ASOCLOG_OF_TSNS
+	stcb->asoc.cumack_logsnt[stcb->asoc.cumack_log_atsnt] = asoc->cumulative_tsn;
+	stcb->asoc.cumack_log_atsnt++;
+	if (stcb->asoc.cumack_log_atsnt >= SCTP_TSN_LOG_SIZE) {
+		stcb->asoc.cumack_log_atsnt = 0;
+	}
+#endif
+	sack->sack.cum_tsn_ack = htonl(asoc->cumulative_tsn);
+	sack->sack.a_rwnd = htonl(asoc->my_rwnd);
+	asoc->my_last_reported_rwnd = asoc->my_rwnd;
+
+	/* reset the readers interpretation */
+	stcb->freed_by_sorcv_sincelast = 0;
+
+	gap_descriptor = (struct sctp_gap_ack_block *)((caddr_t)sack + sizeof(struct sctp_sack_chunk));
+
+	siz = (((asoc->highest_tsn_inside_map - asoc->mapping_array_base_tsn) + 1) + 7) / 8;
+	if (compare_with_wrap(asoc->mapping_array_base_tsn, asoc->cumulative_tsn, MAX_TSN)) {
+		offset = 1;
+		/*-
+		 * cum-ack behind the mapping array, so we start and use all
+		 * entries.
+		 */
+		jstart = 0;
+	} else {
+		offset = asoc->mapping_array_base_tsn - asoc->cumulative_tsn;
+		/*-
+		 * we skip the first one when the cum-ack is at or above the
+		 * mapping array base. Note this only works if
+		 */
+		jstart = 1;
+	}
+	if (compare_with_wrap(asoc->highest_tsn_inside_map, asoc->cumulative_tsn, MAX_TSN)) {
+		/* we have a gap .. maybe */
+		for (i = 0; i < siz; i++) {
+			selector = &sack_array[asoc->mapping_array[i]];
+			if (mergeable && selector->right_edge) {
+				/*
+				 * Backup, left and right edges were ok to
+				 * merge.
+				 */
+				num_gap_blocks--;
+				gap_descriptor--;
+			}
+			if (selector->num_entries == 0)
+				mergeable = 0;
+			else {
+				for (j = jstart; j < selector->num_entries; j++) {
+					if (mergeable && selector->right_edge) {
+						/*
+						 * do a merge by NOT setting
+						 * the left side
+						 */
+						mergeable = 0;
+					} else {
+						/*
+						 * no merge, set the left
+						 * side
+						 */
+						mergeable = 0;
+						gap_descriptor->start = htons((selector->gaps[j].start + offset));
+					}
+					gap_descriptor->end = htons((selector->gaps[j].end + offset));
+					num_gap_blocks++;
+					gap_descriptor++;
+					if (((caddr_t)gap_descriptor + sizeof(struct sctp_gap_ack_block)) > limit) {
+						/* no more room */
+						limit_reached = 1;
+						break;
+					}
+				}
+				if (selector->left_edge) {
+					mergeable = 1;
+				}
+			}
+			if (limit_reached) {
+				/* Reached the limit stop */
+				break;
+			}
+			jstart = 0;
+			offset += 8;
+		}
+		if (num_gap_blocks == 0) {
+			/*
+			 * slide not yet happened, and somehow we got called
+			 * to send a sack. Cumack needs to move up.
+			 */
+			int abort_flag = 0;
+
+			asoc->cumulative_tsn = asoc->highest_tsn_inside_map;
+			sack->sack.cum_tsn_ack = htonl(asoc->cumulative_tsn);
+			sctp_sack_check(stcb, 0, 0, &abort_flag);
+		}
+	}
+	/* now we must add any dups we are going to report. */
+	if ((limit_reached == 0) && (asoc->numduptsns)) {
+		dup = (uint32_t *) gap_descriptor;
+		for (i = 0; i < asoc->numduptsns; i++) {
+			*dup = htonl(asoc->dup_tsns[i]);
+			dup++;
+			num_dups++;
+			if (((caddr_t)dup + sizeof(uint32_t)) > limit) {
+				/* no more room */
+				break;
+			}
+		}
+		asoc->numduptsns = 0;
+	}
+	/*
+	 * now that the chunk is prepared queue it to the control chunk
+	 * queue.
+	 */
+	a_chk->send_size = (sizeof(struct sctp_sack_chunk) +
+	    (num_gap_blocks * sizeof(struct sctp_gap_ack_block)) +
+	    (num_dups * sizeof(int32_t)));
+	SCTP_BUF_LEN(a_chk->data) = a_chk->send_size;
+	sack->sack.num_gap_ack_blks = htons(num_gap_blocks);
+	sack->sack.num_dup_tsns = htons(num_dups);
+	sack->ch.chunk_length = htons(a_chk->send_size);
+	TAILQ_INSERT_TAIL(&asoc->control_send_queue, a_chk, sctp_next);
+	asoc->ctrl_queue_cnt++;
+	asoc->send_sack = 0;
+	SCTP_STAT_INCR(sctps_sendsacks);
+	return;
+}
+
+
+void
+sctp_send_abort_tcb(struct sctp_tcb *stcb, struct mbuf *operr, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
+{
+	struct mbuf *m_abort;
+	struct mbuf *m_out = NULL, *m_end = NULL;
+	struct sctp_abort_chunk *abort = NULL;
+	int sz;
+	uint32_t auth_offset = 0;
+	struct sctp_auth_chunk *auth = NULL;
+	struct sctphdr *shdr;
+
+	/*-
+	 * Add an AUTH chunk, if chunk requires it and save the offset into
+	 * the chain for AUTH
+	 */
+	if (sctp_auth_is_required_chunk(SCTP_ABORT_ASSOCIATION,
+	    stcb->asoc.peer_auth_chunks)) {
+		m_out = sctp_add_auth_chunk(m_out, &m_end, &auth, &auth_offset,
+		    stcb, SCTP_ABORT_ASSOCIATION);
+	}
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	m_abort = sctp_get_mbuf_for_msg(sizeof(struct sctp_abort_chunk), 0, M_DONTWAIT, 1, MT_HEADER);
+	if (m_abort == NULL) {
+		/* no mbuf's */
+		if (m_out)
+			sctp_m_freem(m_out);
+		return;
+	}
+	/* link in any error */
+	SCTP_BUF_NEXT(m_abort) = operr;
+	sz = 0;
+	if (operr) {
+		struct mbuf *n;
+
+		n = operr;
+		while (n) {
+			sz += SCTP_BUF_LEN(n);
+			n = SCTP_BUF_NEXT(n);
+		}
+	}
+	SCTP_BUF_LEN(m_abort) = sizeof(*abort);
+	if (m_out == NULL) {
+		/* NO Auth chunk prepended, so reserve space in front */
+		SCTP_BUF_RESV_UF(m_abort, SCTP_MIN_OVERHEAD);
+		m_out = m_abort;
+	} else {
+		/* Put AUTH chunk at the front of the chain */
+		SCTP_BUF_NEXT(m_end) = m_abort;
+	}
+
+	/* fill in the ABORT chunk */
+	abort = mtod(m_abort, struct sctp_abort_chunk *);
+	abort->ch.chunk_type = SCTP_ABORT_ASSOCIATION;
+	abort->ch.chunk_flags = 0;
+	abort->ch.chunk_length = htons(sizeof(*abort) + sz);
+
+	/* prepend and fill in the SCTP header */
+	SCTP_BUF_PREPEND(m_out, sizeof(struct sctphdr), M_DONTWAIT);
+	if (m_out == NULL) {
+		/* TSNH: no memory */
+		return;
+	}
+	shdr = mtod(m_out, struct sctphdr *);
+	shdr->src_port = stcb->sctp_ep->sctp_lport;
+	shdr->dest_port = stcb->rport;
+	shdr->v_tag = htonl(stcb->asoc.peer_vtag);
+	shdr->checksum = 0;
+	auth_offset += sizeof(struct sctphdr);
+
+	(void)sctp_lowlevel_chunk_output(stcb->sctp_ep, stcb,
+	    stcb->asoc.primary_destination,
+	    (struct sockaddr *)&stcb->asoc.primary_destination->ro._l_addr,
+	    m_out, auth_offset, auth, 1, 0, NULL, 0, so_locked);
+	SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+}
+
+void
+sctp_send_shutdown_complete(struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	/* formulate and SEND a SHUTDOWN-COMPLETE */
+	struct mbuf *m_shutdown_comp;
+	struct sctp_shutdown_complete_msg *comp_cp;
+
+	m_shutdown_comp = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_complete_msg), 0, M_DONTWAIT, 1, MT_HEADER);
+	if (m_shutdown_comp == NULL) {
+		/* no mbuf's */
+		return;
+	}
+	comp_cp = mtod(m_shutdown_comp, struct sctp_shutdown_complete_msg *);
+	comp_cp->shut_cmp.ch.chunk_type = SCTP_SHUTDOWN_COMPLETE;
+	comp_cp->shut_cmp.ch.chunk_flags = 0;
+	comp_cp->shut_cmp.ch.chunk_length = htons(sizeof(struct sctp_shutdown_complete_chunk));
+	comp_cp->sh.src_port = stcb->sctp_ep->sctp_lport;
+	comp_cp->sh.dest_port = stcb->rport;
+	comp_cp->sh.v_tag = htonl(stcb->asoc.peer_vtag);
+	comp_cp->sh.checksum = 0;
+
+	SCTP_BUF_LEN(m_shutdown_comp) = sizeof(struct sctp_shutdown_complete_msg);
+	(void)sctp_lowlevel_chunk_output(stcb->sctp_ep, stcb, net,
+	    (struct sockaddr *)&net->ro._l_addr,
+	    m_shutdown_comp, 0, NULL, 1, 0, NULL, 0, SCTP_SO_NOT_LOCKED);
+	SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+	return;
+}
+
+void
+sctp_send_shutdown_complete2(struct mbuf *m, int iphlen, struct sctphdr *sh,
+    uint32_t vrf_id)
+{
+	/* formulate and SEND a SHUTDOWN-COMPLETE */
+	struct mbuf *o_pak;
+	struct mbuf *mout;
+	struct ip *iph, *iph_out;
+	struct ip6_hdr *ip6, *ip6_out;
+	int offset_out, len, mlen;
+	struct sctp_shutdown_complete_msg *comp_cp;
+
+	/* Get room for the largest message */
+	len = (sizeof(struct ip6_hdr) + sizeof(struct sctp_shutdown_complete_msg));
+	mout = sctp_get_mbuf_for_msg(len, 1, M_DONTWAIT, 1, MT_DATA);
+	if (mout == NULL) {
+		return;
+	}
+	SCTP_BUF_LEN(mout) = len;
+	iph = mtod(m, struct ip *);
+	iph_out = NULL;
+	ip6_out = NULL;
+	offset_out = 0;
+	if (iph->ip_v == IPVERSION) {
+		SCTP_BUF_LEN(mout) = sizeof(struct ip) +
+		    sizeof(struct sctp_shutdown_complete_msg);
+		SCTP_BUF_NEXT(mout) = NULL;
+		iph_out = mtod(mout, struct ip *);
+
+		/* Fill in the IP header for the ABORT */
+		iph_out->ip_v = IPVERSION;
+		iph_out->ip_hl = (sizeof(struct ip) / 4);
+		iph_out->ip_tos = (u_char)0;
+		iph_out->ip_id = 0;
+		iph_out->ip_off = 0;
+		iph_out->ip_ttl = MAXTTL;
+		iph_out->ip_p = IPPROTO_SCTP;
+		iph_out->ip_src.s_addr = iph->ip_dst.s_addr;
+		iph_out->ip_dst.s_addr = iph->ip_src.s_addr;
+
+		/* let IP layer calculate this */
+		iph_out->ip_sum = 0;
+		offset_out += sizeof(*iph_out);
+		comp_cp = (struct sctp_shutdown_complete_msg *)(
+		    (caddr_t)iph_out + offset_out);
+	} else if (iph->ip_v == (IPV6_VERSION >> 4)) {
+		ip6 = (struct ip6_hdr *)iph;
+		SCTP_BUF_LEN(mout) = sizeof(struct ip6_hdr) +
+		    sizeof(struct sctp_shutdown_complete_msg);
+		SCTP_BUF_NEXT(mout) = NULL;
+		ip6_out = mtod(mout, struct ip6_hdr *);
+
+		/* Fill in the IPv6 header for the ABORT */
+		ip6_out->ip6_flow = ip6->ip6_flow;
+		ip6_out->ip6_hlim = ip6_defhlim;
+		ip6_out->ip6_nxt = IPPROTO_SCTP;
+		ip6_out->ip6_src = ip6->ip6_dst;
+		ip6_out->ip6_dst = ip6->ip6_src;
+		/*
+		 * ?? The old code had both the iph len + payload, I think
+		 * this is wrong and would never have worked
+		 */
+		ip6_out->ip6_plen = sizeof(struct sctp_shutdown_complete_msg);
+		offset_out += sizeof(*ip6_out);
+		comp_cp = (struct sctp_shutdown_complete_msg *)(
+		    (caddr_t)ip6_out + offset_out);
+	} else {
+		/* Currently not supported. */
+		return;
+	}
+	if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
+		/* no mbuf's */
+		sctp_m_freem(mout);
+		return;
+	}
+	/* Now copy in and fill in the ABORT tags etc. */
+	comp_cp->sh.src_port = sh->dest_port;
+	comp_cp->sh.dest_port = sh->src_port;
+	comp_cp->sh.checksum = 0;
+	comp_cp->sh.v_tag = sh->v_tag;
+	comp_cp->shut_cmp.ch.chunk_flags = SCTP_HAD_NO_TCB;
+	comp_cp->shut_cmp.ch.chunk_type = SCTP_SHUTDOWN_COMPLETE;
+	comp_cp->shut_cmp.ch.chunk_length = htons(sizeof(struct sctp_shutdown_complete_chunk));
+
+	/* add checksum */
+	comp_cp->sh.checksum = sctp_calculate_sum(mout, NULL, offset_out);
+	if (iph_out != NULL) {
+		sctp_route_t ro;
+		int ret;
+		struct sctp_tcb *stcb = NULL;
+
+		mlen = SCTP_BUF_LEN(mout);
+		bzero(&ro, sizeof ro);
+		/* set IPv4 length */
+		iph_out->ip_len = mlen;
+#ifdef  SCTP_PACKET_LOGGING
+		if (sctp_logging_level & SCTP_LAST_PACKET_TRACING)
+			sctp_packet_log(mout, mlen);
+#endif
+		SCTP_ATTACH_CHAIN(o_pak, mout, mlen);
+
+		/* out it goes */
+		SCTP_IP_OUTPUT(ret, o_pak, &ro, stcb, vrf_id);
+
+		/* Free the route if we got one back */
+		if (ro.ro_rt)
+			RTFREE(ro.ro_rt);
+	} else if (ip6_out != NULL) {
+		struct route_in6 ro;
+		int ret;
+		struct sctp_tcb *stcb = NULL;
+		struct ifnet *ifp = NULL;
+
+		bzero(&ro, sizeof(ro));
+		mlen = SCTP_BUF_LEN(mout);
+#ifdef  SCTP_PACKET_LOGGING
+		if (sctp_logging_level & SCTP_LAST_PACKET_TRACING)
+			sctp_packet_log(mout, mlen);
+#endif
+		SCTP_ATTACH_CHAIN(o_pak, mout, mlen);
+		SCTP_IP6_OUTPUT(ret, o_pak, &ro, &ifp, stcb, vrf_id);
+
+		/* Free the route if we got one back */
+		if (ro.ro_rt)
+			RTFREE(ro.ro_rt);
+	}
+	SCTP_STAT_INCR(sctps_sendpackets);
+	SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
+	SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+	return;
+
+}
+
+static struct sctp_nets *
+sctp_select_hb_destination(struct sctp_tcb *stcb, struct timeval *now)
+{
+	struct sctp_nets *net, *hnet;
+	int ms_goneby, highest_ms, state_overide = 0;
+
+	(void)SCTP_GETTIME_TIMEVAL(now);
+	highest_ms = 0;
+	hnet = NULL;
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+		if (
+		    ((net->dest_state & SCTP_ADDR_NOHB) && ((net->dest_state & SCTP_ADDR_UNCONFIRMED) == 0)) ||
+		    (net->dest_state & SCTP_ADDR_OUT_OF_SCOPE)
+		    ) {
+			/*
+			 * Skip this guy from consideration if HB is off AND
+			 * its confirmed
+			 */
+			continue;
+		}
+		if (sctp_destination_is_reachable(stcb, (struct sockaddr *)&net->ro._l_addr) == 0) {
+			/* skip this dest net from consideration */
+			continue;
+		}
+		if (net->last_sent_time.tv_sec) {
+			/* Sent to so we subtract */
+			ms_goneby = (now->tv_sec - net->last_sent_time.tv_sec) * 1000;
+		} else
+			/* Never been sent to */
+			ms_goneby = 0x7fffffff;
+		/*-
+		 * When the address state is unconfirmed but still
+		 * considered reachable, we HB at a higher rate. Once it
+		 * goes confirmed OR reaches the "unreachable" state, thenw
+		 * we cut it back to HB at a more normal pace.
+		 */
+		if ((net->dest_state & (SCTP_ADDR_UNCONFIRMED | SCTP_ADDR_NOT_REACHABLE)) == SCTP_ADDR_UNCONFIRMED) {
+			state_overide = 1;
+		} else {
+			state_overide = 0;
+		}
+
+		if ((((unsigned int)ms_goneby >= net->RTO) || (state_overide)) &&
+		    (ms_goneby > highest_ms)) {
+			highest_ms = ms_goneby;
+			hnet = net;
+		}
+	}
+	if (hnet &&
+	    ((hnet->dest_state & (SCTP_ADDR_UNCONFIRMED | SCTP_ADDR_NOT_REACHABLE)) == SCTP_ADDR_UNCONFIRMED)) {
+		state_overide = 1;
+	} else {
+		state_overide = 0;
+	}
+
+	if (hnet && highest_ms && (((unsigned int)highest_ms >= hnet->RTO) || state_overide)) {
+		/*-
+		 * Found the one with longest delay bounds OR it is
+		 * unconfirmed and still not marked unreachable.
+		 */
+		SCTPDBG(SCTP_DEBUG_OUTPUT4, "net:%p is the hb winner -", hnet);
+#ifdef SCTP_DEBUG
+		if (hnet) {
+			SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT4,
+			    (struct sockaddr *)&hnet->ro._l_addr);
+		} else {
+			SCTPDBG(SCTP_DEBUG_OUTPUT4, " none\n");
+		}
+#endif
+		/* update the timer now */
+		hnet->last_sent_time = *now;
+		return (hnet);
+	}
+	/* Nothing to HB */
+	return (NULL);
+}
+
+int
+sctp_send_hb(struct sctp_tcb *stcb, int user_req, struct sctp_nets *u_net)
+{
+	struct sctp_tmit_chunk *chk;
+	struct sctp_nets *net;
+	struct sctp_heartbeat_chunk *hb;
+	struct timeval now;
+	struct sockaddr_in *sin;
+	struct sockaddr_in6 *sin6;
+
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	if (user_req == 0) {
+		net = sctp_select_hb_destination(stcb, &now);
+		if (net == NULL) {
+			/*-
+			 * All our busy none to send to, just start the
+			 * timer again.
+			 */
+			if (stcb->asoc.state == 0) {
+				return (0);
+			}
+			sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT,
+			    stcb->sctp_ep,
+			    stcb,
+			    net);
+			return (0);
+		}
+	} else {
+		net = u_net;
+		if (net == NULL) {
+			return (0);
+		}
+		(void)SCTP_GETTIME_TIMEVAL(&now);
+	}
+	sin = (struct sockaddr_in *)&net->ro._l_addr;
+	if (sin->sin_family != AF_INET) {
+		if (sin->sin_family != AF_INET6) {
+			/* huh */
+			return (0);
+		}
+	}
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		SCTPDBG(SCTP_DEBUG_OUTPUT4, "Gak, can't get a chunk for hb\n");
+		return (0);
+	}
+	chk->copy_by_ref = 0;
+	chk->rec.chunk_id.id = SCTP_HEARTBEAT_REQUEST;
+	chk->rec.chunk_id.can_take_data = 1;
+	chk->asoc = &stcb->asoc;
+	chk->send_size = sizeof(struct sctp_heartbeat_chunk);
+
+	chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_DONTWAIT, 1, MT_HEADER);
+	if (chk->data == NULL) {
+		sctp_free_a_chunk(stcb, chk);
+		return (0);
+	}
+	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
+	SCTP_BUF_LEN(chk->data) = chk->send_size;
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	chk->whoTo = net;
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+	/* Now we have a mbuf that we can fill in with the details */
+	hb = mtod(chk->data, struct sctp_heartbeat_chunk *);
+	memset(hb, 0, sizeof(struct sctp_heartbeat_chunk));
+	/* fill out chunk header */
+	hb->ch.chunk_type = SCTP_HEARTBEAT_REQUEST;
+	hb->ch.chunk_flags = 0;
+	hb->ch.chunk_length = htons(chk->send_size);
+	/* Fill out hb parameter */
+	hb->heartbeat.hb_info.ph.param_type = htons(SCTP_HEARTBEAT_INFO);
+	hb->heartbeat.hb_info.ph.param_length = htons(sizeof(struct sctp_heartbeat_info_param));
+	hb->heartbeat.hb_info.time_value_1 = now.tv_sec;
+	hb->heartbeat.hb_info.time_value_2 = now.tv_usec;
+	/* Did our user request this one, put it in */
+	hb->heartbeat.hb_info.user_req = user_req;
+	hb->heartbeat.hb_info.addr_family = sin->sin_family;
+	hb->heartbeat.hb_info.addr_len = sin->sin_len;
+	if (net->dest_state & SCTP_ADDR_UNCONFIRMED) {
+		/*
+		 * we only take from the entropy pool if the address is not
+		 * confirmed.
+		 */
+		net->heartbeat_random1 = hb->heartbeat.hb_info.random_value1 = sctp_select_initial_TSN(&stcb->sctp_ep->sctp_ep);
+		net->heartbeat_random2 = hb->heartbeat.hb_info.random_value2 = sctp_select_initial_TSN(&stcb->sctp_ep->sctp_ep);
+	} else {
+		net->heartbeat_random1 = hb->heartbeat.hb_info.random_value1 = 0;
+		net->heartbeat_random2 = hb->heartbeat.hb_info.random_value2 = 0;
+	}
+	if (sin->sin_family == AF_INET) {
+		memcpy(hb->heartbeat.hb_info.address, &sin->sin_addr, sizeof(sin->sin_addr));
+	} else if (sin->sin_family == AF_INET6) {
+		/* We leave the scope the way it is in our lookup table. */
+		sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
+		memcpy(hb->heartbeat.hb_info.address, &sin6->sin6_addr, sizeof(sin6->sin6_addr));
+	} else {
+		/* huh compiler bug */
+		return (0);
+	}
+
+	/*
+	 * JRS 5/14/07 - In CMT PF, the T3 timer is used to track
+	 * PF-heartbeats.  Because of this, threshold management is done by
+	 * the t3 timer handler, and does not need to be done upon the send
+	 * of a PF-heartbeat. If CMT PF is on and the destination to which a
+	 * heartbeat is being sent is in PF state, do NOT do threshold
+	 * management.
+	 */
+	if ((sctp_cmt_pf == 0) || ((net->dest_state & SCTP_ADDR_PF) != SCTP_ADDR_PF)) {
+		/* ok we have a destination that needs a beat */
+		/* lets do the theshold management Qiaobing style */
+		if (sctp_threshold_management(stcb->sctp_ep, stcb, net,
+		    stcb->asoc.max_send_times)) {
+			/*-
+			 * we have lost the association, in a way this is
+			 * quite bad since we really are one less time since
+			 * we really did not send yet. This is the down side
+			 * to the Q's style as defined in the RFC and not my
+			 * alternate style defined in the RFC.
+			 */
+			if (chk->data != NULL) {
+				sctp_m_freem(chk->data);
+				chk->data = NULL;
+			}
+			/*
+			 * Here we do NOT use the macro since the
+			 * association is now gone.
+			 */
+			if (chk->whoTo) {
+				sctp_free_remote_addr(chk->whoTo);
+				chk->whoTo = NULL;
+			}
+			sctp_free_a_chunk((struct sctp_tcb *)NULL, chk);
+			return (-1);
+		}
+	}
+	net->hb_responded = 0;
+	TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue, chk, sctp_next);
+	stcb->asoc.ctrl_queue_cnt++;
+	SCTP_STAT_INCR(sctps_sendheartbeat);
+	/*-
+	 * Call directly med level routine to put out the chunk. It will
+	 * always tumble out control chunks aka HB but it may even tumble
+	 * out data too.
+	 */
+	return (1);
+}
+
+void
+sctp_send_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net,
+    uint32_t high_tsn)
+{
+	struct sctp_association *asoc;
+	struct sctp_ecne_chunk *ecne;
+	struct sctp_tmit_chunk *chk;
+
+	asoc = &stcb->asoc;
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
+		if (chk->rec.chunk_id.id == SCTP_ECN_ECHO) {
+			/* found a previous ECN_ECHO update it if needed */
+			ecne = mtod(chk->data, struct sctp_ecne_chunk *);
+			ecne->tsn = htonl(high_tsn);
+			return;
+		}
+	}
+	/* nope could not find one to update so we must build one */
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		return;
+	}
+	chk->copy_by_ref = 0;
+	SCTP_STAT_INCR(sctps_sendecne);
+	chk->rec.chunk_id.id = SCTP_ECN_ECHO;
+	chk->rec.chunk_id.can_take_data = 0;
+	chk->asoc = &stcb->asoc;
+	chk->send_size = sizeof(struct sctp_ecne_chunk);
+	chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_DONTWAIT, 1, MT_HEADER);
+	if (chk->data == NULL) {
+		sctp_free_a_chunk(stcb, chk);
+		return;
+	}
+	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
+	SCTP_BUF_LEN(chk->data) = chk->send_size;
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	chk->whoTo = net;
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+	stcb->asoc.ecn_echo_cnt_onq++;
+	ecne = mtod(chk->data, struct sctp_ecne_chunk *);
+	ecne->ch.chunk_type = SCTP_ECN_ECHO;
+	ecne->ch.chunk_flags = 0;
+	ecne->ch.chunk_length = htons(sizeof(struct sctp_ecne_chunk));
+	ecne->tsn = htonl(high_tsn);
+	TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue, chk, sctp_next);
+	asoc->ctrl_queue_cnt++;
+}
+
+void
+sctp_send_packet_dropped(struct sctp_tcb *stcb, struct sctp_nets *net,
+    struct mbuf *m, int iphlen, int bad_crc)
+{
+	struct sctp_association *asoc;
+	struct sctp_pktdrop_chunk *drp;
+	struct sctp_tmit_chunk *chk;
+	uint8_t *datap;
+	int len;
+	int was_trunc = 0;
+	struct ip *iph;
+	int fullsz = 0, extra = 0;
+	long spc;
+	int offset;
+	struct sctp_chunkhdr *ch, chunk_buf;
+	unsigned int chk_length;
+
+	if (!stcb) {
+		return;
+	}
+	asoc = &stcb->asoc;
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	if (asoc->peer_supports_pktdrop == 0) {
+		/*-
+		 * peer must declare support before I send one.
+		 */
+		return;
+	}
+	if (stcb->sctp_socket == NULL) {
+		return;
+	}
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		return;
+	}
+	chk->copy_by_ref = 0;
+	iph = mtod(m, struct ip *);
+	if (iph == NULL) {
+		sctp_free_a_chunk(stcb, chk);
+		return;
+	}
+	if (iph->ip_v == IPVERSION) {
+		/* IPv4 */
+		len = chk->send_size = iph->ip_len;
+	} else {
+		struct ip6_hdr *ip6h;
+
+		/* IPv6 */
+		ip6h = mtod(m, struct ip6_hdr *);
+		len = chk->send_size = htons(ip6h->ip6_plen);
+	}
+	/* Validate that we do not have an ABORT in here. */
+	offset = iphlen + sizeof(struct sctphdr);
+	ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset,
+	    sizeof(*ch), (uint8_t *) & chunk_buf);
+	while (ch != NULL) {
+		chk_length = ntohs(ch->chunk_length);
+		if (chk_length < sizeof(*ch)) {
+			/* break to abort land */
+			break;
+		}
+		switch (ch->chunk_type) {
+		case SCTP_PACKET_DROPPED:
+		case SCTP_ABORT_ASSOCIATION:
+			/*-
+			 * we don't respond with an PKT-DROP to an ABORT
+			 * or PKT-DROP
+			 */
+			sctp_free_a_chunk(stcb, chk);
+			return;
+		default:
+			break;
+		}
+		offset += SCTP_SIZE32(chk_length);
+		ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset,
+		    sizeof(*ch), (uint8_t *) & chunk_buf);
+	}
+
+	if ((len + SCTP_MAX_OVERHEAD + sizeof(struct sctp_pktdrop_chunk)) >
+	    min(stcb->asoc.smallest_mtu, MCLBYTES)) {
+		/*
+		 * only send 1 mtu worth, trim off the excess on the end.
+		 */
+		fullsz = len - extra;
+		len = min(stcb->asoc.smallest_mtu, MCLBYTES) - SCTP_MAX_OVERHEAD;
+		was_trunc = 1;
+	}
+	chk->asoc = &stcb->asoc;
+	chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+	if (chk->data == NULL) {
+jump_out:
+		sctp_free_a_chunk(stcb, chk);
+		return;
+	}
+	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
+	drp = mtod(chk->data, struct sctp_pktdrop_chunk *);
+	if (drp == NULL) {
+		sctp_m_freem(chk->data);
+		chk->data = NULL;
+		goto jump_out;
+	}
+	chk->book_size = SCTP_SIZE32((chk->send_size + sizeof(struct sctp_pktdrop_chunk) +
+	    sizeof(struct sctphdr) + SCTP_MED_OVERHEAD));
+	chk->book_size_scale = 0;
+	if (was_trunc) {
+		drp->ch.chunk_flags = SCTP_PACKET_TRUNCATED;
+		drp->trunc_len = htons(fullsz);
+		/*
+		 * Len is already adjusted to size minus overhead above take
+		 * out the pkt_drop chunk itself from it.
+		 */
+		chk->send_size = len - sizeof(struct sctp_pktdrop_chunk);
+		len = chk->send_size;
+	} else {
+		/* no truncation needed */
+		drp->ch.chunk_flags = 0;
+		drp->trunc_len = htons(0);
+	}
+	if (bad_crc) {
+		drp->ch.chunk_flags |= SCTP_BADCRC;
+	}
+	chk->send_size += sizeof(struct sctp_pktdrop_chunk);
+	SCTP_BUF_LEN(chk->data) = chk->send_size;
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	if (net) {
+		/* we should hit here */
+		chk->whoTo = net;
+	} else {
+		chk->whoTo = asoc->primary_destination;
+	}
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+	chk->rec.chunk_id.id = SCTP_PACKET_DROPPED;
+	chk->rec.chunk_id.can_take_data = 1;
+	drp->ch.chunk_type = SCTP_PACKET_DROPPED;
+	drp->ch.chunk_length = htons(chk->send_size);
+	spc = SCTP_SB_LIMIT_RCV(stcb->sctp_socket);
+	if (spc < 0) {
+		spc = 0;
+	}
+	drp->bottle_bw = htonl(spc);
+	if (asoc->my_rwnd) {
+		drp->current_onq = htonl(asoc->size_on_reasm_queue +
+		    asoc->size_on_all_streams +
+		    asoc->my_rwnd_control_len +
+		    stcb->sctp_socket->so_rcv.sb_cc);
+	} else {
+		/*-
+		 * If my rwnd is 0, possibly from mbuf depletion as well as
+		 * space used, tell the peer there is NO space aka onq == bw
+		 */
+		drp->current_onq = htonl(spc);
+	}
+	drp->reserved = 0;
+	datap = drp->data;
+	m_copydata(m, iphlen, len, (caddr_t)datap);
+	TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue, chk, sctp_next);
+	asoc->ctrl_queue_cnt++;
+}
+
+void
+sctp_send_cwr(struct sctp_tcb *stcb, struct sctp_nets *net, uint32_t high_tsn)
+{
+	struct sctp_association *asoc;
+	struct sctp_cwr_chunk *cwr;
+	struct sctp_tmit_chunk *chk;
+
+	asoc = &stcb->asoc;
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
+		if (chk->rec.chunk_id.id == SCTP_ECN_CWR) {
+			/* found a previous ECN_CWR update it if needed */
+			cwr = mtod(chk->data, struct sctp_cwr_chunk *);
+			if (compare_with_wrap(high_tsn, ntohl(cwr->tsn),
+			    MAX_TSN)) {
+				cwr->tsn = htonl(high_tsn);
+			}
+			return;
+		}
+	}
+	/* nope could not find one to update so we must build one */
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		return;
+	}
+	chk->copy_by_ref = 0;
+	chk->rec.chunk_id.id = SCTP_ECN_CWR;
+	chk->rec.chunk_id.can_take_data = 1;
+	chk->asoc = &stcb->asoc;
+	chk->send_size = sizeof(struct sctp_cwr_chunk);
+	chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_DONTWAIT, 1, MT_HEADER);
+	if (chk->data == NULL) {
+		sctp_free_a_chunk(stcb, chk);
+		return;
+	}
+	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
+	SCTP_BUF_LEN(chk->data) = chk->send_size;
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	chk->whoTo = net;
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+	cwr = mtod(chk->data, struct sctp_cwr_chunk *);
+	cwr->ch.chunk_type = SCTP_ECN_CWR;
+	cwr->ch.chunk_flags = 0;
+	cwr->ch.chunk_length = htons(sizeof(struct sctp_cwr_chunk));
+	cwr->tsn = htonl(high_tsn);
+	TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue, chk, sctp_next);
+	asoc->ctrl_queue_cnt++;
+}
+
+void
+sctp_add_stream_reset_out(struct sctp_tmit_chunk *chk,
+    int number_entries, uint16_t * list,
+    uint32_t seq, uint32_t resp_seq, uint32_t last_sent)
+{
+	int len, old_len, i;
+	struct sctp_stream_reset_out_request *req_out;
+	struct sctp_chunkhdr *ch;
+
+	ch = mtod(chk->data, struct sctp_chunkhdr *);
+
+
+	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
+
+	/* get to new offset for the param. */
+	req_out = (struct sctp_stream_reset_out_request *)((caddr_t)ch + len);
+	/* now how long will this param be? */
+	len = (sizeof(struct sctp_stream_reset_out_request) + (sizeof(uint16_t) * number_entries));
+	req_out->ph.param_type = htons(SCTP_STR_RESET_OUT_REQUEST);
+	req_out->ph.param_length = htons(len);
+	req_out->request_seq = htonl(seq);
+	req_out->response_seq = htonl(resp_seq);
+	req_out->send_reset_at_tsn = htonl(last_sent);
+	if (number_entries) {
+		for (i = 0; i < number_entries; i++) {
+			req_out->list_of_streams[i] = htons(list[i]);
+		}
+	}
+	if (SCTP_SIZE32(len) > len) {
+		/*-
+		 * Need to worry about the pad we may end up adding to the
+		 * end. This is easy since the struct is either aligned to 4
+		 * bytes or 2 bytes off.
+		 */
+		req_out->list_of_streams[number_entries] = 0;
+	}
+	/* now fix the chunk length */
+	ch->chunk_length = htons(len + old_len);
+	chk->book_size = len + old_len;
+	chk->book_size_scale = 0;
+	chk->send_size = SCTP_SIZE32(chk->book_size);
+	SCTP_BUF_LEN(chk->data) = chk->send_size;
+	return;
+}
+
+
+void
+sctp_add_stream_reset_in(struct sctp_tmit_chunk *chk,
+    int number_entries, uint16_t * list,
+    uint32_t seq)
+{
+	int len, old_len, i;
+	struct sctp_stream_reset_in_request *req_in;
+	struct sctp_chunkhdr *ch;
+
+	ch = mtod(chk->data, struct sctp_chunkhdr *);
+
+
+	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
+
+	/* get to new offset for the param. */
+	req_in = (struct sctp_stream_reset_in_request *)((caddr_t)ch + len);
+	/* now how long will this param be? */
+	len = (sizeof(struct sctp_stream_reset_in_request) + (sizeof(uint16_t) * number_entries));
+	req_in->ph.param_type = htons(SCTP_STR_RESET_IN_REQUEST);
+	req_in->ph.param_length = htons(len);
+	req_in->request_seq = htonl(seq);
+	if (number_entries) {
+		for (i = 0; i < number_entries; i++) {
+			req_in->list_of_streams[i] = htons(list[i]);
+		}
+	}
+	if (SCTP_SIZE32(len) > len) {
+		/*-
+		 * Need to worry about the pad we may end up adding to the
+		 * end. This is easy since the struct is either aligned to 4
+		 * bytes or 2 bytes off.
+		 */
+		req_in->list_of_streams[number_entries] = 0;
+	}
+	/* now fix the chunk length */
+	ch->chunk_length = htons(len + old_len);
+	chk->book_size = len + old_len;
+	chk->book_size_scale = 0;
+	chk->send_size = SCTP_SIZE32(chk->book_size);
+	SCTP_BUF_LEN(chk->data) = chk->send_size;
+	return;
+}
+
+
+void
+sctp_add_stream_reset_tsn(struct sctp_tmit_chunk *chk,
+    uint32_t seq)
+{
+	int len, old_len;
+	struct sctp_stream_reset_tsn_request *req_tsn;
+	struct sctp_chunkhdr *ch;
+
+	ch = mtod(chk->data, struct sctp_chunkhdr *);
+
+
+	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
+
+	/* get to new offset for the param. */
+	req_tsn = (struct sctp_stream_reset_tsn_request *)((caddr_t)ch + len);
+	/* now how long will this param be? */
+	len = sizeof(struct sctp_stream_reset_tsn_request);
+	req_tsn->ph.param_type = htons(SCTP_STR_RESET_TSN_REQUEST);
+	req_tsn->ph.param_length = htons(len);
+	req_tsn->request_seq = htonl(seq);
+
+	/* now fix the chunk length */
+	ch->chunk_length = htons(len + old_len);
+	chk->send_size = len + old_len;
+	chk->book_size = SCTP_SIZE32(chk->send_size);
+	chk->book_size_scale = 0;
+	SCTP_BUF_LEN(chk->data) = SCTP_SIZE32(chk->send_size);
+	return;
+}
+
+void
+sctp_add_stream_reset_result(struct sctp_tmit_chunk *chk,
+    uint32_t resp_seq, uint32_t result)
+{
+	int len, old_len;
+	struct sctp_stream_reset_response *resp;
+	struct sctp_chunkhdr *ch;
+
+	ch = mtod(chk->data, struct sctp_chunkhdr *);
+
+
+	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
+
+	/* get to new offset for the param. */
+	resp = (struct sctp_stream_reset_response *)((caddr_t)ch + len);
+	/* now how long will this param be? */
+	len = sizeof(struct sctp_stream_reset_response);
+	resp->ph.param_type = htons(SCTP_STR_RESET_RESPONSE);
+	resp->ph.param_length = htons(len);
+	resp->response_seq = htonl(resp_seq);
+	resp->result = ntohl(result);
+
+	/* now fix the chunk length */
+	ch->chunk_length = htons(len + old_len);
+	chk->book_size = len + old_len;
+	chk->book_size_scale = 0;
+	chk->send_size = SCTP_SIZE32(chk->book_size);
+	SCTP_BUF_LEN(chk->data) = chk->send_size;
+	return;
+
+}
+
+
+void
+sctp_add_stream_reset_result_tsn(struct sctp_tmit_chunk *chk,
+    uint32_t resp_seq, uint32_t result,
+    uint32_t send_una, uint32_t recv_next)
+{
+	int len, old_len;
+	struct sctp_stream_reset_response_tsn *resp;
+	struct sctp_chunkhdr *ch;
+
+	ch = mtod(chk->data, struct sctp_chunkhdr *);
+
+
+	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
+
+	/* get to new offset for the param. */
+	resp = (struct sctp_stream_reset_response_tsn *)((caddr_t)ch + len);
+	/* now how long will this param be? */
+	len = sizeof(struct sctp_stream_reset_response_tsn);
+	resp->ph.param_type = htons(SCTP_STR_RESET_RESPONSE);
+	resp->ph.param_length = htons(len);
+	resp->response_seq = htonl(resp_seq);
+	resp->result = htonl(result);
+	resp->senders_next_tsn = htonl(send_una);
+	resp->receivers_next_tsn = htonl(recv_next);
+
+	/* now fix the chunk length */
+	ch->chunk_length = htons(len + old_len);
+	chk->book_size = len + old_len;
+	chk->send_size = SCTP_SIZE32(chk->book_size);
+	chk->book_size_scale = 0;
+	SCTP_BUF_LEN(chk->data) = chk->send_size;
+	return;
+}
+
+
+int
+sctp_send_str_reset_req(struct sctp_tcb *stcb,
+    int number_entries, uint16_t * list,
+    uint8_t send_out_req, uint32_t resp_seq,
+    uint8_t send_in_req,
+    uint8_t send_tsn_req)
+{
+
+	struct sctp_association *asoc;
+	struct sctp_tmit_chunk *chk;
+	struct sctp_chunkhdr *ch;
+	uint32_t seq;
+
+	asoc = &stcb->asoc;
+	if (asoc->stream_reset_outstanding) {
+		/*-
+		 * Already one pending, must get ACK back to clear the flag.
+		 */
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EBUSY);
+		return (EBUSY);
+	}
+	if ((send_out_req == 0) && (send_in_req == 0) && (send_tsn_req == 0)) {
+		/* nothing to do */
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+		return (EINVAL);
+	}
+	if (send_tsn_req && (send_out_req || send_in_req)) {
+		/* error, can't do that */
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+		return (EINVAL);
+	}
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+		return (ENOMEM);
+	}
+	chk->copy_by_ref = 0;
+	chk->rec.chunk_id.id = SCTP_STREAM_RESET;
+	chk->rec.chunk_id.can_take_data = 0;
+	chk->asoc = &stcb->asoc;
+	chk->book_size = sizeof(struct sctp_chunkhdr);
+	chk->send_size = SCTP_SIZE32(chk->book_size);
+	chk->book_size_scale = 0;
+
+	chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+	if (chk->data == NULL) {
+		sctp_free_a_chunk(stcb, chk);
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+		return (ENOMEM);
+	}
+	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
+
+	/* setup chunk parameters */
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	chk->whoTo = asoc->primary_destination;
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+
+	ch = mtod(chk->data, struct sctp_chunkhdr *);
+	ch->chunk_type = SCTP_STREAM_RESET;
+	ch->chunk_flags = 0;
+	ch->chunk_length = htons(chk->book_size);
+	SCTP_BUF_LEN(chk->data) = chk->send_size;
+
+	seq = stcb->asoc.str_reset_seq_out;
+	if (send_out_req) {
+		sctp_add_stream_reset_out(chk, number_entries, list,
+		    seq, resp_seq, (stcb->asoc.sending_seq - 1));
+		asoc->stream_reset_out_is_outstanding = 1;
+		seq++;
+		asoc->stream_reset_outstanding++;
+	}
+	if (send_in_req) {
+		sctp_add_stream_reset_in(chk, number_entries, list, seq);
+		asoc->stream_reset_outstanding++;
+	}
+	if (send_tsn_req) {
+		sctp_add_stream_reset_tsn(chk, seq);
+		asoc->stream_reset_outstanding++;
+	}
+	asoc->str_reset = chk;
+
+	/* insert the chunk for sending */
+	TAILQ_INSERT_TAIL(&asoc->control_send_queue,
+	    chk,
+	    sctp_next);
+	asoc->ctrl_queue_cnt++;
+	sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo);
+	return (0);
+}
+
+void
+sctp_send_abort(struct mbuf *m, int iphlen, struct sctphdr *sh, uint32_t vtag,
+    struct mbuf *err_cause, uint32_t vrf_id)
+{
+	/*-
+	 * Formulate the abort message, and send it back down.
+	 */
+	struct mbuf *o_pak;
+	struct mbuf *mout;
+	struct sctp_abort_msg *abm;
+	struct ip *iph, *iph_out;
+	struct ip6_hdr *ip6, *ip6_out;
+	int iphlen_out, len;
+
+	/* don't respond to ABORT with ABORT */
+	if (sctp_is_there_an_abort_here(m, iphlen, &vtag)) {
+		if (err_cause)
+			sctp_m_freem(err_cause);
+		return;
+	}
+	len = (sizeof(struct ip6_hdr) + sizeof(struct sctp_abort_msg));
+
+	mout = sctp_get_mbuf_for_msg(len, 1, M_DONTWAIT, 1, MT_DATA);
+	if (mout == NULL) {
+		if (err_cause)
+			sctp_m_freem(err_cause);
+		return;
+	}
+	iph = mtod(m, struct ip *);
+	iph_out = NULL;
+	ip6_out = NULL;
+	if (iph->ip_v == IPVERSION) {
+		iph_out = mtod(mout, struct ip *);
+		SCTP_BUF_LEN(mout) = sizeof(*iph_out) + sizeof(*abm);
+		SCTP_BUF_NEXT(mout) = err_cause;
+
+		/* Fill in the IP header for the ABORT */
+		iph_out->ip_v = IPVERSION;
+		iph_out->ip_hl = (sizeof(struct ip) / 4);
+		iph_out->ip_tos = (u_char)0;
+		iph_out->ip_id = 0;
+		iph_out->ip_off = 0;
+		iph_out->ip_ttl = MAXTTL;
+		iph_out->ip_p = IPPROTO_SCTP;
+		iph_out->ip_src.s_addr = iph->ip_dst.s_addr;
+		iph_out->ip_dst.s_addr = iph->ip_src.s_addr;
+		/* let IP layer calculate this */
+		iph_out->ip_sum = 0;
+
+		iphlen_out = sizeof(*iph_out);
+		abm = (struct sctp_abort_msg *)((caddr_t)iph_out + iphlen_out);
+	} else if (iph->ip_v == (IPV6_VERSION >> 4)) {
+		ip6 = (struct ip6_hdr *)iph;
+		ip6_out = mtod(mout, struct ip6_hdr *);
+		SCTP_BUF_LEN(mout) = sizeof(*ip6_out) + sizeof(*abm);
+		SCTP_BUF_NEXT(mout) = err_cause;
+
+		/* Fill in the IP6 header for the ABORT */
+		ip6_out->ip6_flow = ip6->ip6_flow;
+		ip6_out->ip6_hlim = ip6_defhlim;
+		ip6_out->ip6_nxt = IPPROTO_SCTP;
+		ip6_out->ip6_src = ip6->ip6_dst;
+		ip6_out->ip6_dst = ip6->ip6_src;
+
+		iphlen_out = sizeof(*ip6_out);
+		abm = (struct sctp_abort_msg *)((caddr_t)ip6_out + iphlen_out);
+	} else {
+		/* Currently not supported */
+		if (err_cause)
+			sctp_m_freem(err_cause);
+		sctp_m_freem(mout);
+		return;
+	}
+
+	abm->sh.src_port = sh->dest_port;
+	abm->sh.dest_port = sh->src_port;
+	abm->sh.checksum = 0;
+	if (vtag == 0) {
+		abm->sh.v_tag = sh->v_tag;
+		abm->msg.ch.chunk_flags = SCTP_HAD_NO_TCB;
+	} else {
+		abm->sh.v_tag = htonl(vtag);
+		abm->msg.ch.chunk_flags = 0;
+	}
+	abm->msg.ch.chunk_type = SCTP_ABORT_ASSOCIATION;
+
+	if (err_cause) {
+		struct mbuf *m_tmp = err_cause;
+		int err_len = 0;
+
+		/* get length of the err_cause chain */
+		while (m_tmp != NULL) {
+			err_len += SCTP_BUF_LEN(m_tmp);
+			m_tmp = SCTP_BUF_NEXT(m_tmp);
+		}
+		len = SCTP_BUF_LEN(mout) + err_len;
+		if (err_len % 4) {
+			/* need pad at end of chunk */
+			uint32_t cpthis = 0;
+			int padlen;
+
+			padlen = 4 - (len % 4);
+			m_copyback(mout, len, padlen, (caddr_t)&cpthis);
+			len += padlen;
+		}
+		abm->msg.ch.chunk_length = htons(sizeof(abm->msg.ch) + err_len);
+	} else {
+		len = SCTP_BUF_LEN(mout);
+		abm->msg.ch.chunk_length = htons(sizeof(abm->msg.ch));
+	}
+
+	/* add checksum */
+	abm->sh.checksum = sctp_calculate_sum(mout, NULL, iphlen_out);
+	if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
+		/* no mbuf's */
+		sctp_m_freem(mout);
+		return;
+	}
+	if (iph_out != NULL) {
+		sctp_route_t ro;
+		struct sctp_tcb *stcb = NULL;
+		int ret;
+
+		/* zap the stack pointer to the route */
+		bzero(&ro, sizeof ro);
+		SCTPDBG(SCTP_DEBUG_OUTPUT2, "sctp_send_abort calling ip_output:\n");
+		SCTPDBG_PKT(SCTP_DEBUG_OUTPUT2, iph_out, &abm->sh);
+		/* set IPv4 length */
+		iph_out->ip_len = len;
+		/* out it goes */
+#ifdef  SCTP_PACKET_LOGGING
+		if (sctp_logging_level & SCTP_LAST_PACKET_TRACING)
+			sctp_packet_log(mout, len);
+#endif
+		SCTP_ATTACH_CHAIN(o_pak, mout, len);
+		SCTP_IP_OUTPUT(ret, o_pak, &ro, stcb, vrf_id);
+
+		/* Free the route if we got one back */
+		if (ro.ro_rt)
+			RTFREE(ro.ro_rt);
+	} else if (ip6_out != NULL) {
+		struct route_in6 ro;
+		int ret;
+		struct sctp_tcb *stcb = NULL;
+		struct ifnet *ifp = NULL;
+
+		/* zap the stack pointer to the route */
+		bzero(&ro, sizeof(ro));
+		SCTPDBG(SCTP_DEBUG_OUTPUT2, "sctp_send_abort calling ip6_output:\n");
+		SCTPDBG_PKT(SCTP_DEBUG_OUTPUT2, (struct ip *)ip6_out, &abm->sh);
+		ip6_out->ip6_plen = len - sizeof(*ip6_out);
+#ifdef  SCTP_PACKET_LOGGING
+		if (sctp_logging_level & SCTP_LAST_PACKET_TRACING)
+			sctp_packet_log(mout, len);
+#endif
+		SCTP_ATTACH_CHAIN(o_pak, mout, len);
+		SCTP_IP6_OUTPUT(ret, o_pak, &ro, &ifp, stcb, vrf_id);
+
+		/* Free the route if we got one back */
+		if (ro.ro_rt)
+			RTFREE(ro.ro_rt);
+	}
+	SCTP_STAT_INCR(sctps_sendpackets);
+	SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
+}
+
+void
+sctp_send_operr_to(struct mbuf *m, int iphlen, struct mbuf *scm, uint32_t vtag,
+    uint32_t vrf_id)
+{
+	struct mbuf *o_pak;
+	struct sctphdr *ihdr;
+	int retcode;
+	struct sctphdr *ohdr;
+	struct sctp_chunkhdr *ophdr;
+	struct ip *iph;
+	struct mbuf *mout;
+
+#ifdef SCTP_DEBUG
+	struct sockaddr_in6 lsa6, fsa6;
+
+#endif
+	uint32_t val;
+	struct mbuf *at;
+	int len;
+
+	iph = mtod(m, struct ip *);
+	ihdr = (struct sctphdr *)((caddr_t)iph + iphlen);
+
+	SCTP_BUF_PREPEND(scm, (sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr)), M_DONTWAIT);
+	if (scm == NULL) {
+		/* can't send because we can't add a mbuf */
+		return;
+	}
+	ohdr = mtod(scm, struct sctphdr *);
+	ohdr->src_port = ihdr->dest_port;
+	ohdr->dest_port = ihdr->src_port;
+	ohdr->v_tag = vtag;
+	ohdr->checksum = 0;
+	ophdr = (struct sctp_chunkhdr *)(ohdr + 1);
+	ophdr->chunk_type = SCTP_OPERATION_ERROR;
+	ophdr->chunk_flags = 0;
+	len = 0;
+	at = scm;
+	while (at) {
+		len += SCTP_BUF_LEN(at);
+		at = SCTP_BUF_NEXT(at);
+	}
+	ophdr->chunk_length = htons(len - sizeof(struct sctphdr));
+	if (len % 4) {
+		/* need padding */
+		uint32_t cpthis = 0;
+		int padlen;
+
+		padlen = 4 - (len % 4);
+		m_copyback(scm, len, padlen, (caddr_t)&cpthis);
+		len += padlen;
+	}
+	val = sctp_calculate_sum(scm, NULL, 0);
+	mout = sctp_get_mbuf_for_msg(sizeof(struct ip6_hdr), 1, M_DONTWAIT, 1, MT_DATA);
+	if (mout == NULL) {
+		sctp_m_freem(scm);
+		return;
+	}
+	SCTP_BUF_NEXT(mout) = scm;
+	if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
+		sctp_m_freem(mout);
+		return;
+	}
+	ohdr->checksum = val;
+	if (iph->ip_v == IPVERSION) {
+		/* V4 */
+		struct ip *out;
+		sctp_route_t ro;
+		struct sctp_tcb *stcb = NULL;
+
+		SCTP_BUF_LEN(mout) = sizeof(struct ip);
+		len += sizeof(struct ip);
+
+		bzero(&ro, sizeof ro);
+		out = mtod(mout, struct ip *);
+		out->ip_v = iph->ip_v;
+		out->ip_hl = (sizeof(struct ip) / 4);
+		out->ip_tos = iph->ip_tos;
+		out->ip_id = iph->ip_id;
+		out->ip_off = 0;
+		out->ip_ttl = MAXTTL;
+		out->ip_p = IPPROTO_SCTP;
+		out->ip_sum = 0;
+		out->ip_src = iph->ip_dst;
+		out->ip_dst = iph->ip_src;
+		out->ip_len = len;
+#ifdef  SCTP_PACKET_LOGGING
+		if (sctp_logging_level & SCTP_LAST_PACKET_TRACING)
+			sctp_packet_log(mout, len);
+#endif
+		SCTP_ATTACH_CHAIN(o_pak, mout, len);
+
+		SCTP_IP_OUTPUT(retcode, o_pak, &ro, stcb, vrf_id);
+
+		SCTP_STAT_INCR(sctps_sendpackets);
+		SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
+		/* Free the route if we got one back */
+		if (ro.ro_rt)
+			RTFREE(ro.ro_rt);
+	} else {
+		/* V6 */
+		struct route_in6 ro;
+		int ret;
+		struct sctp_tcb *stcb = NULL;
+		struct ifnet *ifp = NULL;
+		struct ip6_hdr *out6, *in6;
+
+		SCTP_BUF_LEN(mout) = sizeof(struct ip6_hdr);
+		len += sizeof(struct ip6_hdr);
+		bzero(&ro, sizeof ro);
+		in6 = mtod(m, struct ip6_hdr *);
+		out6 = mtod(mout, struct ip6_hdr *);
+		out6->ip6_flow = in6->ip6_flow;
+		out6->ip6_hlim = ip6_defhlim;
+		out6->ip6_nxt = IPPROTO_SCTP;
+		out6->ip6_src = in6->ip6_dst;
+		out6->ip6_dst = in6->ip6_src;
+		out6->ip6_plen = len - sizeof(struct ip6_hdr);
+
+#ifdef SCTP_DEBUG
+		bzero(&lsa6, sizeof(lsa6));
+		lsa6.sin6_len = sizeof(lsa6);
+		lsa6.sin6_family = AF_INET6;
+		lsa6.sin6_addr = out6->ip6_src;
+		bzero(&fsa6, sizeof(fsa6));
+		fsa6.sin6_len = sizeof(fsa6);
+		fsa6.sin6_family = AF_INET6;
+		fsa6.sin6_addr = out6->ip6_dst;
+#endif
+		SCTPDBG(SCTP_DEBUG_OUTPUT2, "sctp_operr_to calling ipv6 output:\n");
+		SCTPDBG(SCTP_DEBUG_OUTPUT2, "src: ");
+		SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)&lsa6);
+		SCTPDBG(SCTP_DEBUG_OUTPUT2, "dst ");
+		SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)&fsa6);
+
+#ifdef  SCTP_PACKET_LOGGING
+		if (sctp_logging_level & SCTP_LAST_PACKET_TRACING)
+			sctp_packet_log(mout, len);
+#endif
+		SCTP_ATTACH_CHAIN(o_pak, mout, len);
+		SCTP_IP6_OUTPUT(ret, o_pak, &ro, &ifp, stcb, vrf_id);
+
+		SCTP_STAT_INCR(sctps_sendpackets);
+		SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
+		/* Free the route if we got one back */
+		if (ro.ro_rt)
+			RTFREE(ro.ro_rt);
+	}
+}
+
+static struct mbuf *
+sctp_copy_resume(struct sctp_stream_queue_pending *sp,
+    struct uio *uio,
+    struct sctp_sndrcvinfo *srcv,
+    int max_send_len,
+    int user_marks_eor,
+    int *error,
+    uint32_t * sndout,
+    struct mbuf **new_tail)
+{
+	struct mbuf *m;
+
+	m = m_uiotombuf(uio, M_WAITOK, max_send_len, 0,
+	    (M_PKTHDR | (user_marks_eor ? M_EOR : 0)));
+	if (m == NULL) {
+		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+		*error = ENOMEM;
+	} else {
+		*sndout = m_length(m, NULL);
+		*new_tail = m_last(m);
+	}
+	return (m);
+}
+
+static int
+sctp_copy_one(struct sctp_stream_queue_pending *sp,
+    struct uio *uio,
+    int resv_upfront)
+{
+	int left;
+
+	left = sp->length;
+	sp->data = m_uiotombuf(uio, M_WAITOK, sp->length,
+	    resv_upfront, 0);
+	if (sp->data == NULL) {
+		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+		return (ENOMEM);
+	}
+	sp->tail_mbuf = m_last(sp->data);
+	return (0);
+}
+
+
+
+static struct sctp_stream_queue_pending *
+sctp_copy_it_in(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    struct sctp_sndrcvinfo *srcv,
+    struct uio *uio,
+    struct sctp_nets *net,
+    int max_send_len,
+    int user_marks_eor,
+    int *error,
+    int non_blocking)
+{
+	/*-
+	 * This routine must be very careful in its work. Protocol
+	 * processing is up and running so care must be taken to spl...()
+	 * when you need to do something that may effect the stcb/asoc. The
+	 * sb is locked however. When data is copied the protocol processing
+	 * should be enabled since this is a slower operation...
+	 */
+	struct sctp_stream_queue_pending *sp = NULL;
+	int resv_in_first;
+
+	*error = 0;
+	/* Now can we send this? */
+	if ((SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) ||
+	    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
+	    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
+	    (asoc->state & SCTP_STATE_SHUTDOWN_PENDING)) {
+		/* got data while shutting down */
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET);
+		*error = ECONNRESET;
+		goto out_now;
+	}
+	sctp_alloc_a_strmoq(stcb, sp);
+	if (sp == NULL) {
+		SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+		*error = ENOMEM;
+		goto out_now;
+	}
+	sp->act_flags = 0;
+	sp->sender_all_done = 0;
+	sp->sinfo_flags = srcv->sinfo_flags;
+	sp->timetolive = srcv->sinfo_timetolive;
+	sp->ppid = srcv->sinfo_ppid;
+	sp->context = srcv->sinfo_context;
+	sp->strseq = 0;
+	(void)SCTP_GETTIME_TIMEVAL(&sp->ts);
+
+	sp->stream = srcv->sinfo_stream;
+	sp->length = min(uio->uio_resid, max_send_len);
+	if ((sp->length == (uint32_t) uio->uio_resid) &&
+	    ((user_marks_eor == 0) ||
+	    (srcv->sinfo_flags & SCTP_EOF) ||
+	    (user_marks_eor && (srcv->sinfo_flags & SCTP_EOR)))) {
+		sp->msg_is_complete = 1;
+	} else {
+		sp->msg_is_complete = 0;
+	}
+	sp->sender_all_done = 0;
+	sp->some_taken = 0;
+	sp->put_last_out = 0;
+	resv_in_first = sizeof(struct sctp_data_chunk);
+	sp->data = sp->tail_mbuf = NULL;
+	*error = sctp_copy_one(sp, uio, resv_in_first);
+	if (*error) {
+		sctp_free_a_strmoq(stcb, sp);
+		sp = NULL;
+	} else {
+		if (sp->sinfo_flags & SCTP_ADDR_OVER) {
+			sp->net = net;
+			sp->addr_over = 1;
+		} else {
+			sp->net = asoc->primary_destination;
+			sp->addr_over = 0;
+		}
+		atomic_add_int(&sp->net->ref_count, 1);
+		sctp_set_prsctp_policy(stcb, sp);
+	}
+out_now:
+	return (sp);
+}
+
+
+int
+sctp_sosend(struct socket *so,
+    struct sockaddr *addr,
+    struct uio *uio,
+    struct mbuf *top,
+    struct mbuf *control,
+    int flags,
+    struct thread *p
+)
+{
+	struct sctp_inpcb *inp;
+	int error, use_rcvinfo = 0;
+	struct sctp_sndrcvinfo srcv;
+
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (control) {
+		/* process cmsg snd/rcv info (maybe a assoc-id) */
+		if (sctp_find_cmsg(SCTP_SNDRCV, (void *)&srcv, control,
+		    sizeof(srcv))) {
+			/* got one */
+			use_rcvinfo = 1;
+		}
+	}
+	error = sctp_lower_sosend(so, addr, uio, top,
+	    control,
+	    flags,
+	    use_rcvinfo, &srcv
+	    ,p
+	    );
+	return (error);
+}
+
+
+int
+sctp_lower_sosend(struct socket *so,
+    struct sockaddr *addr,
+    struct uio *uio,
+    struct mbuf *i_pak,
+    struct mbuf *control,
+    int flags,
+    int use_rcvinfo,
+    struct sctp_sndrcvinfo *srcv
+    ,
+    struct thread *p
+)
+{
+	unsigned int sndlen = 0, max_len;
+	int error, len;
+	struct mbuf *top = NULL;
+
+#if defined(__NetBSD__) || defined(__OpenBSD_)
+	int s;
+
+#endif
+	int queue_only = 0, queue_only_for_init = 0;
+	int free_cnt_applied = 0;
+	int un_sent = 0;
+	int now_filled = 0;
+	unsigned int inqueue_bytes = 0;
+	struct sctp_block_entry be;
+	struct sctp_inpcb *inp;
+	struct sctp_tcb *stcb = NULL;
+	struct timeval now;
+	struct sctp_nets *net;
+	struct sctp_association *asoc;
+	struct sctp_inpcb *t_inp;
+	int user_marks_eor;
+	int create_lock_applied = 0;
+	int nagle_applies = 0;
+	int some_on_control = 0;
+	int got_all_of_the_send = 0;
+	int hold_tcblock = 0;
+	int non_blocking = 0;
+	int temp_flags = 0;
+	uint32_t local_add_more, local_soresv = 0;
+
+	error = 0;
+	net = NULL;
+	stcb = NULL;
+	asoc = NULL;
+
+	t_inp = inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp == NULL) {
+		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EFAULT);
+		error = EFAULT;
+		if (i_pak) {
+			SCTP_RELEASE_PKT(i_pak);
+		}
+		return (error);
+	}
+	if ((uio == NULL) && (i_pak == NULL)) {
+		SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+		return (EINVAL);
+	}
+	user_marks_eor = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR);
+	atomic_add_int(&inp->total_sends, 1);
+	if (uio) {
+		if (uio->uio_resid < 0) {
+			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+			return (EINVAL);
+		}
+		sndlen = uio->uio_resid;
+	} else {
+		top = SCTP_HEADER_TO_CHAIN(i_pak);
+		sndlen = SCTP_HEADER_LEN(i_pak);
+	}
+	SCTPDBG(SCTP_DEBUG_OUTPUT1, "Send called addr:%p send length %d\n",
+	    addr,
+	    sndlen);
+	/*-
+         * Pre-screen address, if one is given the sin-len
+         * must be set correctly!
+         */
+	if (addr) {
+		if ((addr->sa_family == AF_INET) &&
+		    (addr->sa_len != sizeof(struct sockaddr_in))) {
+			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+			error = EINVAL;
+			goto out_unlocked;
+		} else if ((addr->sa_family == AF_INET6) &&
+		    (addr->sa_len != sizeof(struct sockaddr_in6))) {
+			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+			error = EINVAL;
+			goto out_unlocked;
+		}
+	}
+	hold_tcblock = 0;
+
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
+	    (inp->sctp_socket->so_qlimit)) {
+		/* The listener can NOT send */
+		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EFAULT);
+		error = EFAULT;
+		goto out_unlocked;
+	}
+	if ((use_rcvinfo) && srcv) {
+		if (INVALID_SINFO_FLAG(srcv->sinfo_flags) ||
+		    PR_SCTP_INVALID_POLICY(srcv->sinfo_flags)) {
+			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+			error = EINVAL;
+			goto out_unlocked;
+		}
+		if (srcv->sinfo_flags)
+			SCTP_STAT_INCR(sctps_sends_with_flags);
+
+		if (srcv->sinfo_flags & SCTP_SENDALL) {
+			/* its a sendall */
+			error = sctp_sendall(inp, uio, top, srcv);
+			top = NULL;
+			goto out_unlocked;
+		}
+	}
+	/* now we must find the assoc */
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) ||
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+		SCTP_INP_RLOCK(inp);
+		stcb = LIST_FIRST(&inp->sctp_asoc_list);
+		if (stcb == NULL) {
+			SCTP_INP_RUNLOCK(inp);
+			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOTCONN);
+			error = ENOTCONN;
+			goto out_unlocked;
+		}
+		hold_tcblock = 0;
+		SCTP_INP_RUNLOCK(inp);
+		if (addr) {
+			/* Must locate the net structure if addr given */
+			net = sctp_findnet(stcb, addr);
+			if (net) {
+				/* validate port was 0 or correct */
+				struct sockaddr_in *sin;
+
+				sin = (struct sockaddr_in *)addr;
+				if ((sin->sin_port != 0) &&
+				    (sin->sin_port != stcb->rport)) {
+					net = NULL;
+				}
+			}
+			temp_flags |= SCTP_ADDR_OVER;
+		} else
+			net = stcb->asoc.primary_destination;
+		if (addr && (net == NULL)) {
+			/* Could not find address, was it legal */
+			if (addr->sa_family == AF_INET) {
+				struct sockaddr_in *sin;
+
+				sin = (struct sockaddr_in *)addr;
+				if (sin->sin_addr.s_addr == 0) {
+					if ((sin->sin_port == 0) ||
+					    (sin->sin_port == stcb->rport)) {
+						net = stcb->asoc.primary_destination;
+					}
+				}
+			} else {
+				struct sockaddr_in6 *sin6;
+
+				sin6 = (struct sockaddr_in6 *)addr;
+				if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+					if ((sin6->sin6_port == 0) ||
+					    (sin6->sin6_port == stcb->rport)) {
+						net = stcb->asoc.primary_destination;
+					}
+				}
+			}
+		}
+		if (net == NULL) {
+			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+			error = EINVAL;
+			goto out_unlocked;
+		}
+	} else if (use_rcvinfo && srcv && srcv->sinfo_assoc_id) {
+		stcb = sctp_findassociation_ep_asocid(inp, srcv->sinfo_assoc_id, 0);
+		if (stcb) {
+			if (addr)
+				/*
+				 * Must locate the net structure if addr
+				 * given
+				 */
+				net = sctp_findnet(stcb, addr);
+			else
+				net = stcb->asoc.primary_destination;
+			if ((srcv->sinfo_flags & SCTP_ADDR_OVER) &&
+			    ((net == NULL) || (addr == NULL))) {
+				struct sockaddr_in *sin;
+
+				if (addr == NULL) {
+					SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+					error = EINVAL;
+					goto out_unlocked;
+				}
+				sin = (struct sockaddr_in *)addr;
+				/* Validate port is 0 or correct */
+				if ((sin->sin_port != 0) &&
+				    (sin->sin_port != stcb->rport)) {
+					net = NULL;
+				}
+			}
+		}
+		hold_tcblock = 0;
+	} else if (addr) {
+		/*-
+		 * Since we did not use findep we must
+		 * increment it, and if we don't find a tcb
+		 * decrement it.
+		 */
+		SCTP_INP_WLOCK(inp);
+		SCTP_INP_INCR_REF(inp);
+		SCTP_INP_WUNLOCK(inp);
+		stcb = sctp_findassociation_ep_addr(&t_inp, addr, &net, NULL, NULL);
+		if (stcb == NULL) {
+			SCTP_INP_WLOCK(inp);
+			SCTP_INP_DECR_REF(inp);
+			SCTP_INP_WUNLOCK(inp);
+		} else {
+			hold_tcblock = 1;
+		}
+	}
+	if ((stcb == NULL) && (addr)) {
+		/* Possible implicit send? */
+		SCTP_ASOC_CREATE_LOCK(inp);
+		create_lock_applied = 1;
+		if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+		    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE)) {
+			/* Should I really unlock ? */
+			SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EFAULT);
+			error = EFAULT;
+			goto out_unlocked;
+
+		}
+		if (((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) &&
+		    (addr->sa_family == AF_INET6)) {
+			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+			error = EINVAL;
+			goto out_unlocked;
+		}
+		SCTP_INP_WLOCK(inp);
+		SCTP_INP_INCR_REF(inp);
+		SCTP_INP_WUNLOCK(inp);
+		/* With the lock applied look again */
+		stcb = sctp_findassociation_ep_addr(&t_inp, addr, &net, NULL, NULL);
+		if (stcb == NULL) {
+			SCTP_INP_WLOCK(inp);
+			SCTP_INP_DECR_REF(inp);
+			SCTP_INP_WUNLOCK(inp);
+		} else {
+			hold_tcblock = 1;
+		}
+		if (t_inp != inp) {
+			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOTCONN);
+			error = ENOTCONN;
+			goto out_unlocked;
+		}
+	}
+	if (stcb == NULL) {
+		if (addr == NULL) {
+			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOENT);
+			error = ENOENT;
+			goto out_unlocked;
+		} else {
+			/*
+			 * UDP style, we must go ahead and start the INIT
+			 * process
+			 */
+			uint32_t vrf_id;
+
+			if ((use_rcvinfo) && (srcv) &&
+			    ((srcv->sinfo_flags & SCTP_ABORT) ||
+			    ((srcv->sinfo_flags & SCTP_EOF) &&
+			    (sndlen == 0)))) {
+				/*-
+				 * User asks to abort a non-existant assoc,
+				 * or EOF a non-existant assoc with no data
+				 */
+				SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOENT);
+				error = ENOENT;
+				goto out_unlocked;
+			}
+			/* get an asoc/stcb struct */
+			vrf_id = inp->def_vrf_id;
+#ifdef INVARIANTS
+			if (create_lock_applied == 0) {
+				panic("Error, should hold create lock and I don't?");
+			}
+#endif
+			stcb = sctp_aloc_assoc(inp, addr, 1, &error, 0, vrf_id,
+			    p
+			    );
+			if (stcb == NULL) {
+				/* Error is setup for us in the call */
+				goto out_unlocked;
+			}
+			if (create_lock_applied) {
+				SCTP_ASOC_CREATE_UNLOCK(inp);
+				create_lock_applied = 0;
+			} else {
+				SCTP_PRINTF("Huh-3? create lock should have been on??\n");
+			}
+			/*
+			 * Turn on queue only flag to prevent data from
+			 * being sent
+			 */
+			queue_only = 1;
+			asoc = &stcb->asoc;
+			SCTP_SET_STATE(asoc, SCTP_STATE_COOKIE_WAIT);
+			(void)SCTP_GETTIME_TIMEVAL(&asoc->time_entered);
+
+			/* initialize authentication params for the assoc */
+			sctp_initialize_auth_params(inp, stcb);
+
+			if (control) {
+				/*
+				 * see if a init structure exists in cmsg
+				 * headers
+				 */
+				struct sctp_initmsg initm;
+				int i;
+
+				if (sctp_find_cmsg(SCTP_INIT, (void *)&initm, control,
+				    sizeof(initm))) {
+					/*
+					 * we have an INIT override of the
+					 * default
+					 */
+					if (initm.sinit_max_attempts)
+						asoc->max_init_times = initm.sinit_max_attempts;
+					if (initm.sinit_num_ostreams)
+						asoc->pre_open_streams = initm.sinit_num_ostreams;
+					if (initm.sinit_max_instreams)
+						asoc->max_inbound_streams = initm.sinit_max_instreams;
+					if (initm.sinit_max_init_timeo)
+						asoc->initial_init_rto_max = initm.sinit_max_init_timeo;
+					if (asoc->streamoutcnt < asoc->pre_open_streams) {
+						/* Default is NOT correct */
+						SCTPDBG(SCTP_DEBUG_OUTPUT1, "Ok, defout:%d pre_open:%d\n",
+						    asoc->streamoutcnt, asoc->pre_open_streams);
+						/*
+						 * What happens if this
+						 * fails? we panic ...
+						 */
+						{
+							struct sctp_stream_out *tmp_str;
+							int had_lock = 0;
+
+							if (hold_tcblock) {
+								had_lock = 1;
+								SCTP_TCB_UNLOCK(stcb);
+							}
+							SCTP_MALLOC(tmp_str,
+							    struct sctp_stream_out *,
+							    (asoc->pre_open_streams *
+							    sizeof(struct sctp_stream_out)),
+							    SCTP_M_STRMO);
+							if (had_lock) {
+								SCTP_TCB_LOCK(stcb);
+							}
+							if (tmp_str != NULL) {
+								SCTP_FREE(asoc->strmout, SCTP_M_STRMO);
+								asoc->strmout = tmp_str;
+								asoc->streamoutcnt = asoc->pre_open_streams;
+							} else {
+								asoc->pre_open_streams = asoc->streamoutcnt;
+							}
+						}
+						for (i = 0; i < asoc->streamoutcnt; i++) {
+							/*-
+						         * inbound side must be set
+						         * to 0xffff, also NOTE when
+						         * we get the INIT-ACK back
+						         * (for INIT sender) we MUST
+						         * reduce the count
+						         * (streamoutcnt) but first
+						         * check if we sent to any
+						         * of the upper streams that
+						         * were dropped (if some
+						         * were). Those that were
+						         * dropped must be notified
+						         * to the upper layer as
+						         * failed to send.
+						         */
+							asoc->strmout[i].next_sequence_sent = 0x0;
+							TAILQ_INIT(&asoc->strmout[i].outqueue);
+							asoc->strmout[i].stream_no = i;
+							asoc->strmout[i].last_msg_incomplete = 0;
+							asoc->strmout[i].next_spoke.tqe_next = 0;
+							asoc->strmout[i].next_spoke.tqe_prev = 0;
+						}
+					}
+				}
+			}
+			hold_tcblock = 1;
+			/* out with the INIT */
+			queue_only_for_init = 1;
+			/*-
+		         * we may want to dig in after this call and adjust the MTU
+		         * value. It defaulted to 1500 (constant) but the ro
+		         * structure may now have an update and thus we may need to
+		         * change it BEFORE we append the message.
+		         */
+			net = stcb->asoc.primary_destination;
+			asoc = &stcb->asoc;
+		}
+	}
+	if ((SCTP_SO_IS_NBIO(so)
+	    || (flags & MSG_NBIO)
+	    )) {
+		non_blocking = 1;
+	}
+	asoc = &stcb->asoc;
+
+	if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NO_FRAGMENT)) {
+		if (sndlen > asoc->smallest_mtu) {
+			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EMSGSIZE);
+			error = EMSGSIZE;
+			goto out_unlocked;
+		}
+	}
+	/* would we block? */
+	if (non_blocking) {
+		if (hold_tcblock == 0) {
+			SCTP_TCB_LOCK(stcb);
+			hold_tcblock = 1;
+		}
+		inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * sizeof(struct sctp_data_chunk));
+		if ((SCTP_SB_LIMIT_SND(so) <
+		    (sndlen + inqueue_bytes + stcb->asoc.sb_send_resv)) ||
+		    (stcb->asoc.chunks_on_out_queue >
+		    sctp_max_chunks_on_queue)) {
+			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EWOULDBLOCK);
+			if (sndlen > SCTP_SB_LIMIT_SND(so))
+				error = EMSGSIZE;
+			else
+				error = EWOULDBLOCK;
+			goto out_unlocked;
+		}
+		stcb->asoc.sb_send_resv += sndlen;
+		SCTP_TCB_UNLOCK(stcb);
+		hold_tcblock = 0;
+	} else {
+		atomic_add_int(&stcb->asoc.sb_send_resv, sndlen);
+	}
+	local_soresv = sndlen;
+	/* Keep the stcb from being freed under our feet */
+	if (free_cnt_applied) {
+#ifdef INVARIANTS
+		panic("refcnt already incremented");
+#else
+		printf("refcnt:1 already incremented?\n");
+#endif
+	} else {
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		free_cnt_applied = 1;
+	}
+	if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET);
+		error = ECONNRESET;
+		goto out_unlocked;
+	}
+	if (create_lock_applied) {
+		SCTP_ASOC_CREATE_UNLOCK(inp);
+		create_lock_applied = 0;
+	}
+	if (asoc->stream_reset_outstanding) {
+		/*
+		 * Can't queue any data while stream reset is underway.
+		 */
+		SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EAGAIN);
+		error = EAGAIN;
+		goto out_unlocked;
+	}
+	if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
+	    (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+		queue_only = 1;
+	}
+	if ((use_rcvinfo == 0) || (srcv == NULL)) {
+		/* Grab the default stuff from the asoc */
+		srcv = (struct sctp_sndrcvinfo *)&stcb->asoc.def_send;
+	}
+	/* we are now done with all control */
+	if (control) {
+		sctp_m_freem(control);
+		control = NULL;
+	}
+	if ((SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) ||
+	    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
+	    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
+	    (asoc->state & SCTP_STATE_SHUTDOWN_PENDING)) {
+		if ((use_rcvinfo) &&
+		    (srcv->sinfo_flags & SCTP_ABORT)) {
+			;
+		} else {
+			SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET);
+			error = ECONNRESET;
+			goto out_unlocked;
+		}
+	}
+	/* Ok, we will attempt a msgsnd :> */
+	if (p) {
+		p->td_ru.ru_msgsnd++;
+	}
+	if (stcb) {
+		if (((srcv->sinfo_flags | temp_flags) & SCTP_ADDR_OVER) == 0) {
+			net = stcb->asoc.primary_destination;
+		}
+	}
+	if (net == NULL) {
+		SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+		error = EINVAL;
+		goto out_unlocked;
+	}
+	if ((net->flight_size > net->cwnd) && (sctp_cmt_on_off == 0)) {
+		/*-
+		 * CMT: Added check for CMT above. net above is the primary
+		 * dest. If CMT is ON, sender should always attempt to send
+		 * with the output routine sctp_fill_outqueue() that loops
+		 * through all destination addresses. Therefore, if CMT is
+		 * ON, queue_only is NOT set to 1 here, so that
+		 * sctp_chunk_output() can be called below.
+		 */
+		queue_only = 1;
+
+	} else if (asoc->ifp_had_enobuf) {
+		SCTP_STAT_INCR(sctps_ifnomemqueued);
+		if (net->flight_size > (net->mtu * 2))
+			queue_only = 1;
+		asoc->ifp_had_enobuf = 0;
+	} else {
+		un_sent = ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) +
+		    ((stcb->asoc.chunks_on_out_queue - stcb->asoc.total_flight_count) * sizeof(struct sctp_data_chunk)));
+	}
+	/* Are we aborting? */
+	if (srcv->sinfo_flags & SCTP_ABORT) {
+		struct mbuf *mm;
+		int tot_demand, tot_out = 0, max_out;
+
+		SCTP_STAT_INCR(sctps_sends_with_abort);
+		if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
+		    (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+			/* It has to be up before we abort */
+			/* how big is the user initiated abort? */
+			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+			error = EINVAL;
+			goto out;
+		}
+		if (hold_tcblock) {
+			SCTP_TCB_UNLOCK(stcb);
+			hold_tcblock = 0;
+		}
+		if (top) {
+			struct mbuf *cntm = NULL;
+
+			mm = sctp_get_mbuf_for_msg(1, 0, M_WAIT, 1, MT_DATA);
+			if (sndlen != 0) {
+				cntm = top;
+				while (cntm) {
+					tot_out += SCTP_BUF_LEN(cntm);
+					cntm = SCTP_BUF_NEXT(cntm);
+				}
+			}
+			tot_demand = (tot_out + sizeof(struct sctp_paramhdr));
+		} else {
+			/* Must fit in a MTU */
+			tot_out = sndlen;
+			tot_demand = (tot_out + sizeof(struct sctp_paramhdr));
+			if (tot_demand > SCTP_DEFAULT_ADD_MORE) {
+				/* To big */
+				SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EMSGSIZE);
+				error = EMSGSIZE;
+				goto out;
+			}
+			mm = sctp_get_mbuf_for_msg(tot_demand, 0, M_WAIT, 1, MT_DATA);
+		}
+		if (mm == NULL) {
+			SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+			error = ENOMEM;
+			goto out;
+		}
+		max_out = asoc->smallest_mtu - sizeof(struct sctp_paramhdr);
+		max_out -= sizeof(struct sctp_abort_msg);
+		if (tot_out > max_out) {
+			tot_out = max_out;
+		}
+		if (mm) {
+			struct sctp_paramhdr *ph;
+
+			/* now move forward the data pointer */
+			ph = mtod(mm, struct sctp_paramhdr *);
+			ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
+			ph->param_length = htons((sizeof(struct sctp_paramhdr) + tot_out));
+			ph++;
+			SCTP_BUF_LEN(mm) = tot_out + sizeof(struct sctp_paramhdr);
+			if (top == NULL) {
+				error = uiomove((caddr_t)ph, (int)tot_out, uio);
+				if (error) {
+					/*-
+				         * Here if we can't get his data we
+				         * still abort we just don't get to
+				         * send the users note :-0
+				         */
+					sctp_m_freem(mm);
+					mm = NULL;
+				}
+			} else {
+				if (sndlen != 0) {
+					SCTP_BUF_NEXT(mm) = top;
+				}
+			}
+		}
+		if (hold_tcblock == 0) {
+			SCTP_TCB_LOCK(stcb);
+			hold_tcblock = 1;
+		}
+		atomic_add_int(&stcb->asoc.refcnt, -1);
+		free_cnt_applied = 0;
+		/* release this lock, otherwise we hang on ourselves */
+		sctp_abort_an_association(stcb->sctp_ep, stcb,
+		    SCTP_RESPONSE_TO_USER_REQ,
+		    mm, SCTP_SO_LOCKED);
+		/* now relock the stcb so everything is sane */
+		hold_tcblock = 0;
+		stcb = NULL;
+		/*
+		 * In this case top is already chained to mm avoid double
+		 * free, since we free it below if top != NULL and driver
+		 * would free it after sending the packet out
+		 */
+		if (sndlen != 0) {
+			top = NULL;
+		}
+		goto out_unlocked;
+	}
+	/* Calculate the maximum we can send */
+	inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * sizeof(struct sctp_data_chunk));
+	if (SCTP_SB_LIMIT_SND(so) > inqueue_bytes) {
+		if (non_blocking) {
+			/* we already checked for non-blocking above. */
+			max_len = sndlen;
+		} else {
+			max_len = SCTP_SB_LIMIT_SND(so) - stcb->asoc.total_output_queue_size;
+		}
+	} else {
+		max_len = 0;
+	}
+	if (hold_tcblock) {
+		SCTP_TCB_UNLOCK(stcb);
+		hold_tcblock = 0;
+	}
+	/* Is the stream no. valid? */
+	if (srcv->sinfo_stream >= asoc->streamoutcnt) {
+		/* Invalid stream number */
+		SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+		error = EINVAL;
+		goto out_unlocked;
+	}
+	if (asoc->strmout == NULL) {
+		/* huh? software error */
+		SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EFAULT);
+		error = EFAULT;
+		goto out_unlocked;
+	}
+	/* Unless E_EOR mode is on, we must make a send FIT in one call. */
+	if ((user_marks_eor == 0) &&
+	    (sndlen > SCTP_SB_LIMIT_SND(stcb->sctp_socket))) {
+		/* It will NEVER fit */
+		SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EMSGSIZE);
+		error = EMSGSIZE;
+		goto out_unlocked;
+	}
+	if ((uio == NULL) && user_marks_eor) {
+		/*-
+		 * We do not support eeor mode for
+		 * sending with mbuf chains (like sendfile).
+		 */
+		SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+		error = EINVAL;
+		goto out_unlocked;
+	}
+	if (user_marks_eor) {
+		local_add_more = sctp_add_more_threshold;
+	} else {
+		/*-
+		 * For non-eeor the whole message must fit in
+		 * the socket send buffer.
+		 */
+		local_add_more = sndlen;
+	}
+	len = 0;
+	if (non_blocking) {
+		goto skip_preblock;
+	}
+	if (((max_len <= local_add_more) &&
+	    (SCTP_SB_LIMIT_SND(so) > local_add_more)) ||
+	    ((stcb->asoc.chunks_on_out_queue + stcb->asoc.stream_queue_cnt) > sctp_max_chunks_on_queue)) {	/* if */
+		/* No room right no ! */
+		SOCKBUF_LOCK(&so->so_snd);
+		inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * sizeof(struct sctp_data_chunk));
+		while ((SCTP_SB_LIMIT_SND(so) < (inqueue_bytes + sctp_add_more_threshold)) ||
+		    ((stcb->asoc.stream_queue_cnt + stcb->asoc.chunks_on_out_queue) > sctp_max_chunks_on_queue /* while */ )) {
+
+			if (sctp_logging_level & SCTP_BLK_LOGGING_ENABLE) {
+				sctp_log_block(SCTP_BLOCK_LOG_INTO_BLKA,
+				    so, asoc, sndlen);
+			}
+			be.error = 0;
+			stcb->block_entry = &be;
+			error = sbwait(&so->so_snd);
+			stcb->block_entry = NULL;
+			if (error || so->so_error || be.error) {
+				if (error == 0) {
+					if (so->so_error)
+						error = so->so_error;
+					if (be.error) {
+						error = be.error;
+					}
+				}
+				SOCKBUF_UNLOCK(&so->so_snd);
+				goto out_unlocked;
+			}
+			if (sctp_logging_level & SCTP_BLK_LOGGING_ENABLE) {
+				sctp_log_block(SCTP_BLOCK_LOG_OUTOF_BLK,
+				    so, asoc, stcb->asoc.total_output_queue_size);
+			}
+			if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+				goto out_unlocked;
+			}
+			inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * sizeof(struct sctp_data_chunk));
+		}
+		inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * sizeof(struct sctp_data_chunk));
+		if (SCTP_SB_LIMIT_SND(so) > inqueue_bytes) {
+			max_len = SCTP_SB_LIMIT_SND(so) - inqueue_bytes;
+		} else {
+			max_len = 0;
+		}
+		SOCKBUF_UNLOCK(&so->so_snd);
+	}
+skip_preblock:
+	if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+		goto out_unlocked;
+	}
+	atomic_add_int(&stcb->total_sends, 1);
+	/*
+	 * sndlen covers for mbuf case uio_resid covers for the non-mbuf
+	 * case NOTE: uio will be null when top/mbuf is passed
+	 */
+	if (sndlen == 0) {
+		if (srcv->sinfo_flags & SCTP_EOF) {
+			got_all_of_the_send = 1;
+			goto dataless_eof;
+		} else {
+			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+			error = EINVAL;
+			goto out;
+		}
+	}
+	if (top == NULL) {
+		struct sctp_stream_queue_pending *sp;
+		struct sctp_stream_out *strm;
+		uint32_t sndout, initial_out;
+
+		initial_out = uio->uio_resid;
+
+		SCTP_TCB_SEND_LOCK(stcb);
+		if ((asoc->stream_locked) &&
+		    (asoc->stream_locked_on != srcv->sinfo_stream)) {
+			SCTP_TCB_SEND_UNLOCK(stcb);
+			SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+			error = EINVAL;
+			goto out;
+		}
+		SCTP_TCB_SEND_UNLOCK(stcb);
+
+		strm = &stcb->asoc.strmout[srcv->sinfo_stream];
+		if (strm->last_msg_incomplete == 0) {
+	do_a_copy_in:
+			sp = sctp_copy_it_in(stcb, asoc, srcv, uio, net, max_len, user_marks_eor, &error, non_blocking);
+			if ((sp == NULL) || (error)) {
+				goto out;
+			}
+			SCTP_TCB_SEND_LOCK(stcb);
+			if (sp->msg_is_complete) {
+				strm->last_msg_incomplete = 0;
+				asoc->stream_locked = 0;
+			} else {
+				/*
+				 * Just got locked to this guy in case of an
+				 * interrupt.
+				 */
+				strm->last_msg_incomplete = 1;
+				asoc->stream_locked = 1;
+				asoc->stream_locked_on = srcv->sinfo_stream;
+				sp->sender_all_done = 0;
+			}
+			sctp_snd_sb_alloc(stcb, sp->length);
+			atomic_add_int(&asoc->stream_queue_cnt, 1);
+			if ((srcv->sinfo_flags & SCTP_UNORDERED) == 0) {
+				sp->strseq = strm->next_sequence_sent;
+				if (sctp_logging_level & SCTP_LOG_AT_SEND_2_SCTP) {
+					sctp_misc_ints(SCTP_STRMOUT_LOG_ASSIGN,
+					    (uintptr_t) stcb, sp->length,
+					    (uint32_t) ((srcv->sinfo_stream << 16) | sp->strseq), 0);
+				}
+				strm->next_sequence_sent++;
+			} else {
+				SCTP_STAT_INCR(sctps_sends_with_unord);
+			}
+			TAILQ_INSERT_TAIL(&strm->outqueue, sp, next);
+			if ((strm->next_spoke.tqe_next == NULL) &&
+			    (strm->next_spoke.tqe_prev == NULL)) {
+				/* Not on wheel, insert */
+				sctp_insert_on_wheel(stcb, asoc, strm, 1);
+			}
+			SCTP_TCB_SEND_UNLOCK(stcb);
+		} else {
+			SCTP_TCB_SEND_LOCK(stcb);
+			sp = TAILQ_LAST(&strm->outqueue, sctp_streamhead);
+			SCTP_TCB_SEND_UNLOCK(stcb);
+			if (sp == NULL) {
+				/* ???? Huh ??? last msg is gone */
+#ifdef INVARIANTS
+				panic("Warning: Last msg marked incomplete, yet nothing left?");
+#else
+				SCTP_PRINTF("Warning: Last msg marked incomplete, yet nothing left?\n");
+				strm->last_msg_incomplete = 0;
+#endif
+				goto do_a_copy_in;
+
+			}
+		}
+		while (uio->uio_resid > 0) {
+			/* How much room do we have? */
+			struct mbuf *new_tail, *mm;
+
+			if (SCTP_SB_LIMIT_SND(so) > stcb->asoc.total_output_queue_size)
+				max_len = SCTP_SB_LIMIT_SND(so) - stcb->asoc.total_output_queue_size;
+			else
+				max_len = 0;
+
+			if ((max_len > sctp_add_more_threshold) ||
+			    (max_len && (SCTP_SB_LIMIT_SND(so) < sctp_add_more_threshold)) ||
+			    (uio->uio_resid &&
+			    (uio->uio_resid <= (int)max_len))) {
+				sndout = 0;
+				new_tail = NULL;
+				if (hold_tcblock) {
+					SCTP_TCB_UNLOCK(stcb);
+					hold_tcblock = 0;
+				}
+				mm = sctp_copy_resume(sp, uio, srcv, max_len, user_marks_eor, &error, &sndout, &new_tail);
+				if ((mm == NULL) || error) {
+					if (mm) {
+						sctp_m_freem(mm);
+					}
+					goto out;
+				}
+				/* Update the mbuf and count */
+				SCTP_TCB_SEND_LOCK(stcb);
+				if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+					/*
+					 * we need to get out. Peer probably
+					 * aborted.
+					 */
+					sctp_m_freem(mm);
+					if (stcb->asoc.state & SCTP_PCB_FLAGS_WAS_ABORTED) {
+						SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET);
+						error = ECONNRESET;
+					}
+					SCTP_TCB_SEND_UNLOCK(stcb);
+					goto out;
+				}
+				if (sp->tail_mbuf) {
+					/* tack it to the end */
+					SCTP_BUF_NEXT(sp->tail_mbuf) = mm;
+					sp->tail_mbuf = new_tail;
+				} else {
+					/* A stolen mbuf */
+					sp->data = mm;
+					sp->tail_mbuf = new_tail;
+				}
+				sctp_snd_sb_alloc(stcb, sndout);
+				atomic_add_int(&sp->length, sndout);
+				len += sndout;
+
+				/* Did we reach EOR? */
+				if ((uio->uio_resid == 0) &&
+				    ((user_marks_eor == 0) ||
+				    (srcv->sinfo_flags & SCTP_EOF) ||
+				    (user_marks_eor && (srcv->sinfo_flags & SCTP_EOR)))
+				    ) {
+					sp->msg_is_complete = 1;
+				} else {
+					sp->msg_is_complete = 0;
+				}
+				SCTP_TCB_SEND_UNLOCK(stcb);
+			}
+			if (uio->uio_resid == 0) {
+				/* got it all? */
+				continue;
+			}
+			/* PR-SCTP? */
+			if ((asoc->peer_supports_prsctp) && (asoc->sent_queue_cnt_removeable > 0)) {
+				/*
+				 * This is ugly but we must assure locking
+				 * order
+				 */
+				if (hold_tcblock == 0) {
+					SCTP_TCB_LOCK(stcb);
+					hold_tcblock = 1;
+				}
+				sctp_prune_prsctp(stcb, asoc, srcv, sndlen);
+				inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * sizeof(struct sctp_data_chunk));
+				if (SCTP_SB_LIMIT_SND(so) > stcb->asoc.total_output_queue_size)
+					max_len = SCTP_SB_LIMIT_SND(so) - inqueue_bytes;
+				else
+					max_len = 0;
+				if (max_len > 0) {
+					continue;
+				}
+				SCTP_TCB_UNLOCK(stcb);
+				hold_tcblock = 0;
+			}
+			/* wait for space now */
+			if (non_blocking) {
+				/* Non-blocking io in place out */
+				goto skip_out_eof;
+			}
+			if ((net->flight_size > net->cwnd) &&
+			    (sctp_cmt_on_off == 0)) {
+				queue_only = 1;
+			} else if (asoc->ifp_had_enobuf) {
+				SCTP_STAT_INCR(sctps_ifnomemqueued);
+				if (net->flight_size > (net->mtu * 2)) {
+					queue_only = 1;
+				} else {
+					queue_only = 0;
+				}
+				asoc->ifp_had_enobuf = 0;
+				un_sent = ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) +
+				    ((stcb->asoc.chunks_on_out_queue - stcb->asoc.total_flight_count) *
+				    sizeof(struct sctp_data_chunk)));
+			} else {
+				un_sent = ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) +
+				    ((stcb->asoc.chunks_on_out_queue - stcb->asoc.total_flight_count) *
+				    sizeof(struct sctp_data_chunk)));
+				if (net->flight_size > (net->mtu * stcb->asoc.max_burst)) {
+					queue_only = 1;
+					SCTP_STAT_INCR(sctps_send_burst_avoid);
+				} else if (net->flight_size > net->cwnd) {
+					queue_only = 1;
+					SCTP_STAT_INCR(sctps_send_cwnd_avoid);
+				} else {
+					queue_only = 0;
+				}
+			}
+			if ((sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY)) &&
+			    (stcb->asoc.total_flight > 0) &&
+			    (stcb->asoc.stream_queue_cnt < SCTP_MAX_DATA_BUNDLING) &&
+			    (un_sent < (int)(stcb->asoc.smallest_mtu - SCTP_MIN_OVERHEAD))
+			    ) {
+
+				/*-
+				 * Ok, Nagle is set on and we have data outstanding.
+				 * Don't send anything and let SACKs drive out the
+				 * data unless wen have a "full" segment to send.
+				 */
+				if (sctp_logging_level & SCTP_NAGLE_LOGGING_ENABLE) {
+					sctp_log_nagle_event(stcb, SCTP_NAGLE_APPLIED);
+				}
+				SCTP_STAT_INCR(sctps_naglequeued);
+				nagle_applies = 1;
+			} else {
+				if (sctp_logging_level & SCTP_NAGLE_LOGGING_ENABLE) {
+					if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY))
+						sctp_log_nagle_event(stcb, SCTP_NAGLE_SKIPPED);
+				}
+				SCTP_STAT_INCR(sctps_naglesent);
+				nagle_applies = 0;
+			}
+			/* What about the INIT, send it maybe */
+			if (sctp_logging_level & SCTP_BLK_LOGGING_ENABLE) {
+
+				sctp_misc_ints(SCTP_CWNDLOG_PRESEND, queue_only_for_init, queue_only,
+				    nagle_applies, un_sent);
+				sctp_misc_ints(SCTP_CWNDLOG_PRESEND, stcb->asoc.total_output_queue_size,
+				    stcb->asoc.total_flight,
+				    stcb->asoc.chunks_on_out_queue, stcb->asoc.total_flight_count);
+			}
+			if (queue_only_for_init) {
+				if (hold_tcblock == 0) {
+					SCTP_TCB_LOCK(stcb);
+					hold_tcblock = 1;
+				}
+				if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+					/* a collision took us forward? */
+					queue_only_for_init = 0;
+					queue_only = 0;
+				} else {
+					sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
+					SCTP_SET_STATE(asoc, SCTP_STATE_COOKIE_WAIT);
+					queue_only_for_init = 0;
+					queue_only = 1;
+				}
+			}
+			if ((queue_only == 0) && (nagle_applies == 0)
+			    ) {
+				/*-
+				 * need to start chunk output
+				 * before blocking.. note that if
+				 * a lock is already applied, then
+				 * the input via the net is happening
+				 * and I don't need to start output :-D
+				 */
+				if (hold_tcblock == 0) {
+					if (SCTP_TCB_TRYLOCK(stcb)) {
+						hold_tcblock = 1;
+						sctp_chunk_output(inp,
+						    stcb,
+						    SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED);
+					}
+				} else {
+					sctp_chunk_output(inp,
+					    stcb,
+					    SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED);
+				}
+				if (hold_tcblock == 1) {
+					SCTP_TCB_UNLOCK(stcb);
+					hold_tcblock = 0;
+				}
+			}
+			SOCKBUF_LOCK(&so->so_snd);
+			/*-
+		         * This is a bit strange, but I think it will
+		         * work. The total_output_queue_size is locked and
+		         * protected by the TCB_LOCK, which we just released.
+		         * There is a race that can occur between releasing it
+		         * above, and me getting the socket lock, where sacks
+		         * come in but we have not put the SB_WAIT on the
+		         * so_snd buffer to get the wakeup. After the LOCK
+		         * is applied the sack_processing will also need to
+		         * LOCK the so->so_snd to do the actual sowwakeup(). So
+		         * once we have the socket buffer lock if we recheck the
+		         * size we KNOW we will get to sleep safely with the
+		         * wakeup flag in place.
+		         */
+			if (SCTP_SB_LIMIT_SND(so) <= (stcb->asoc.total_output_queue_size +
+			    min(sctp_add_more_threshold, SCTP_SB_LIMIT_SND(so)))
+			    ) {
+				if (sctp_logging_level & SCTP_BLK_LOGGING_ENABLE) {
+					sctp_log_block(SCTP_BLOCK_LOG_INTO_BLK,
+					    so, asoc, uio->uio_resid);
+				}
+				be.error = 0;
+				stcb->block_entry = &be;
+				error = sbwait(&so->so_snd);
+				stcb->block_entry = NULL;
+
+				if (error || so->so_error || be.error) {
+					if (error == 0) {
+						if (so->so_error)
+							error = so->so_error;
+						if (be.error) {
+							error = be.error;
+						}
+					}
+					SOCKBUF_UNLOCK(&so->so_snd);
+					goto out_unlocked;
+				}
+				if (sctp_logging_level & SCTP_BLK_LOGGING_ENABLE) {
+					sctp_log_block(SCTP_BLOCK_LOG_OUTOF_BLK,
+					    so, asoc, stcb->asoc.total_output_queue_size);
+				}
+			}
+			SOCKBUF_UNLOCK(&so->so_snd);
+			if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+				goto out_unlocked;
+			}
+		}
+		SCTP_TCB_SEND_LOCK(stcb);
+		if (sp) {
+			if (sp->msg_is_complete == 0) {
+				strm->last_msg_incomplete = 1;
+				asoc->stream_locked = 1;
+				asoc->stream_locked_on = srcv->sinfo_stream;
+			} else {
+				sp->sender_all_done = 1;
+				strm->last_msg_incomplete = 0;
+				asoc->stream_locked = 0;
+			}
+		} else {
+			SCTP_PRINTF("Huh no sp TSNH?\n");
+			strm->last_msg_incomplete = 0;
+			asoc->stream_locked = 0;
+		}
+		SCTP_TCB_SEND_UNLOCK(stcb);
+		if (uio->uio_resid == 0) {
+			got_all_of_the_send = 1;
+		}
+	} else if (top) {
+		/* We send in a 0, since we do NOT have any locks */
+		error = sctp_msg_append(stcb, net, top, srcv, 0);
+		top = NULL;
+		if (srcv->sinfo_flags & SCTP_EOF) {
+			/*
+			 * This should only happen for Panda for the mbuf
+			 * send case, which does NOT yet support EEOR mode.
+			 * Thus, we can just set this flag to do the proper
+			 * EOF handling.
+			 */
+			got_all_of_the_send = 1;
+		}
+	}
+	if (error) {
+		goto out;
+	}
+dataless_eof:
+	/* EOF thing ? */
+	if ((srcv->sinfo_flags & SCTP_EOF) &&
+	    (got_all_of_the_send == 1) &&
+	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE)
+	    ) {
+		int cnt;
+
+		SCTP_STAT_INCR(sctps_sends_with_eof);
+		error = 0;
+		if (hold_tcblock == 0) {
+			SCTP_TCB_LOCK(stcb);
+			hold_tcblock = 1;
+		}
+		cnt = sctp_is_there_unsent_data(stcb);
+		if (TAILQ_EMPTY(&asoc->send_queue) &&
+		    TAILQ_EMPTY(&asoc->sent_queue) &&
+		    (cnt == 0)) {
+			if (asoc->locked_on_sending) {
+				goto abort_anyway;
+			}
+			/* there is nothing queued to send, so I'm done... */
+			if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
+			    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+			    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+				/* only send SHUTDOWN the first time through */
+				sctp_send_shutdown(stcb, stcb->asoc.primary_destination);
+				if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
+					SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+				}
+				SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+				SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb,
+				    asoc->primary_destination);
+				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
+				    asoc->primary_destination);
+			}
+		} else {
+			/*-
+		         * we still got (or just got) data to send, so set
+		         * SHUTDOWN_PENDING
+		         */
+			/*-
+		         * XXX sockets draft says that SCTP_EOF should be
+		         * sent with no data.  currently, we will allow user
+		         * data to be sent first and move to
+		         * SHUTDOWN-PENDING
+		         */
+			if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
+			    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+			    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+				if (hold_tcblock == 0) {
+					SCTP_TCB_LOCK(stcb);
+					hold_tcblock = 1;
+				}
+				if (asoc->locked_on_sending) {
+					/* Locked to send out the data */
+					struct sctp_stream_queue_pending *sp;
+
+					sp = TAILQ_LAST(&asoc->locked_on_sending->outqueue, sctp_streamhead);
+					if (sp) {
+						if ((sp->length == 0) && (sp->msg_is_complete == 0))
+							asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+					}
+				}
+				asoc->state |= SCTP_STATE_SHUTDOWN_PENDING;
+				if (TAILQ_EMPTY(&asoc->send_queue) &&
+				    TAILQ_EMPTY(&asoc->sent_queue) &&
+				    (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
+			abort_anyway:
+					if (free_cnt_applied) {
+						atomic_add_int(&stcb->asoc.refcnt, -1);
+						free_cnt_applied = 0;
+					}
+					sctp_abort_an_association(stcb->sctp_ep, stcb,
+					    SCTP_RESPONSE_TO_USER_REQ,
+					    NULL, SCTP_SO_LOCKED);
+					/*
+					 * now relock the stcb so everything
+					 * is sane
+					 */
+					hold_tcblock = 0;
+					stcb = NULL;
+					goto out;
+				}
+				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
+				    asoc->primary_destination);
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_NODELAY);
+			}
+		}
+	}
+skip_out_eof:
+	if (!TAILQ_EMPTY(&stcb->asoc.control_send_queue)) {
+		some_on_control = 1;
+	}
+	if ((net->flight_size > net->cwnd) &&
+	    (sctp_cmt_on_off == 0)) {
+		queue_only = 1;
+	} else if (asoc->ifp_had_enobuf) {
+		SCTP_STAT_INCR(sctps_ifnomemqueued);
+		if (net->flight_size > (net->mtu * 2)) {
+			queue_only = 1;
+		} else {
+			queue_only = 0;
+		}
+		asoc->ifp_had_enobuf = 0;
+		un_sent = ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) +
+		    ((stcb->asoc.chunks_on_out_queue - stcb->asoc.total_flight_count) *
+		    sizeof(struct sctp_data_chunk)));
+	} else {
+		un_sent = ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) +
+		    ((stcb->asoc.chunks_on_out_queue - stcb->asoc.total_flight_count) *
+		    sizeof(struct sctp_data_chunk)));
+		if (net->flight_size > (net->mtu * stcb->asoc.max_burst)) {
+			queue_only = 1;
+			SCTP_STAT_INCR(sctps_send_burst_avoid);
+		} else if (net->flight_size > net->cwnd) {
+			queue_only = 1;
+			SCTP_STAT_INCR(sctps_send_cwnd_avoid);
+		} else {
+			queue_only = 0;
+		}
+	}
+	if ((sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY)) &&
+	    (stcb->asoc.total_flight > 0) &&
+	    (stcb->asoc.stream_queue_cnt < SCTP_MAX_DATA_BUNDLING) &&
+	    (un_sent < (int)(stcb->asoc.smallest_mtu - SCTP_MIN_OVERHEAD))
+	    ) {
+		/*-
+		 * Ok, Nagle is set on and we have data outstanding.
+		 * Don't send anything and let SACKs drive out the
+		 * data unless wen have a "full" segment to send.
+		 */
+		if (sctp_logging_level & SCTP_NAGLE_LOGGING_ENABLE) {
+			sctp_log_nagle_event(stcb, SCTP_NAGLE_APPLIED);
+		}
+		SCTP_STAT_INCR(sctps_naglequeued);
+		nagle_applies = 1;
+	} else {
+		if (sctp_logging_level & SCTP_NAGLE_LOGGING_ENABLE) {
+			if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY))
+				sctp_log_nagle_event(stcb, SCTP_NAGLE_SKIPPED);
+		}
+		SCTP_STAT_INCR(sctps_naglesent);
+		nagle_applies = 0;
+	}
+	if (queue_only_for_init) {
+		if (hold_tcblock == 0) {
+			SCTP_TCB_LOCK(stcb);
+			hold_tcblock = 1;
+		}
+		if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+			/* a collision took us forward? */
+			queue_only_for_init = 0;
+			queue_only = 0;
+		} else {
+			sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
+			SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_COOKIE_WAIT);
+			queue_only_for_init = 0;
+			queue_only = 1;
+		}
+	}
+	if ((queue_only == 0) && (nagle_applies == 0) && (stcb->asoc.peers_rwnd && un_sent)) {
+		/* we can attempt to send too. */
+		if (hold_tcblock == 0) {
+			/*
+			 * If there is activity recv'ing sacks no need to
+			 * send
+			 */
+			if (SCTP_TCB_TRYLOCK(stcb)) {
+				sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED);
+				hold_tcblock = 1;
+			}
+		} else {
+			sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED);
+		}
+	} else if ((queue_only == 0) &&
+		    (stcb->asoc.peers_rwnd == 0) &&
+	    (stcb->asoc.total_flight == 0)) {
+		/* We get to have a probe outstanding */
+		if (hold_tcblock == 0) {
+			hold_tcblock = 1;
+			SCTP_TCB_LOCK(stcb);
+		}
+		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED);
+	} else if (some_on_control) {
+		int num_out, reason, cwnd_full, frag_point;
+
+		/* Here we do control only */
+		if (hold_tcblock == 0) {
+			hold_tcblock = 1;
+			SCTP_TCB_LOCK(stcb);
+		}
+		frag_point = sctp_get_frag_point(stcb, &stcb->asoc);
+		(void)sctp_med_chunk_output(inp, stcb, &stcb->asoc, &num_out,
+		    &reason, 1, &cwnd_full, 1, &now, &now_filled, frag_point, SCTP_SO_LOCKED);
+	}
+	SCTPDBG(SCTP_DEBUG_OUTPUT1, "USR Send complete qo:%d prw:%d unsent:%d tf:%d cooq:%d toqs:%d err:%d",
+	    queue_only, stcb->asoc.peers_rwnd, un_sent,
+	    stcb->asoc.total_flight, stcb->asoc.chunks_on_out_queue,
+	    stcb->asoc.total_output_queue_size, error);
+
+out:
+out_unlocked:
+
+	if (local_soresv && stcb) {
+		atomic_subtract_int(&stcb->asoc.sb_send_resv, sndlen);
+		local_soresv = 0;
+	}
+	if (create_lock_applied) {
+		SCTP_ASOC_CREATE_UNLOCK(inp);
+		create_lock_applied = 0;
+	}
+	if ((stcb) && hold_tcblock) {
+		SCTP_TCB_UNLOCK(stcb);
+	}
+	if (stcb && free_cnt_applied) {
+		atomic_add_int(&stcb->asoc.refcnt, -1);
+	}
+#ifdef INVARIANTS
+	if (stcb) {
+		if (mtx_owned(&stcb->tcb_mtx)) {
+			panic("Leaving with tcb mtx owned?");
+		}
+		if (mtx_owned(&stcb->tcb_send_mtx)) {
+			panic("Leaving with tcb send mtx owned?");
+		}
+	}
+#endif
+	if (top) {
+		sctp_m_freem(top);
+	}
+	if (control) {
+		sctp_m_freem(control);
+	}
+	return (error);
+}
+
+
+/*
+ * generate an AUTHentication chunk, if required
+ */
+struct mbuf *
+sctp_add_auth_chunk(struct mbuf *m, struct mbuf **m_end,
+    struct sctp_auth_chunk **auth_ret, uint32_t * offset,
+    struct sctp_tcb *stcb, uint8_t chunk)
+{
+	struct mbuf *m_auth;
+	struct sctp_auth_chunk *auth;
+	int chunk_len;
+
+	if ((m_end == NULL) || (auth_ret == NULL) || (offset == NULL) ||
+	    (stcb == NULL))
+		return (m);
+
+	/* sysctl disabled auth? */
+	if (sctp_auth_disable)
+		return (m);
+
+	/* peer doesn't do auth... */
+	if (!stcb->asoc.peer_supports_auth) {
+		return (m);
+	}
+	/* does the requested chunk require auth? */
+	if (!sctp_auth_is_required_chunk(chunk, stcb->asoc.peer_auth_chunks)) {
+		return (m);
+	}
+	m_auth = sctp_get_mbuf_for_msg(sizeof(*auth), 0, M_DONTWAIT, 1, MT_HEADER);
+	if (m_auth == NULL) {
+		/* no mbuf's */
+		return (m);
+	}
+	/* reserve some space if this will be the first mbuf */
+	if (m == NULL)
+		SCTP_BUF_RESV_UF(m_auth, SCTP_MIN_OVERHEAD);
+	/* fill in the AUTH chunk details */
+	auth = mtod(m_auth, struct sctp_auth_chunk *);
+	bzero(auth, sizeof(*auth));
+	auth->ch.chunk_type = SCTP_AUTHENTICATION;
+	auth->ch.chunk_flags = 0;
+	chunk_len = sizeof(*auth) +
+	    sctp_get_hmac_digest_len(stcb->asoc.peer_hmac_id);
+	auth->ch.chunk_length = htons(chunk_len);
+	auth->hmac_id = htons(stcb->asoc.peer_hmac_id);
+	/* key id and hmac digest will be computed and filled in upon send */
+
+	/* save the offset where the auth was inserted into the chain */
+	if (m != NULL) {
+		struct mbuf *cn;
+
+		*offset = 0;
+		cn = m;
+		while (cn) {
+			*offset += SCTP_BUF_LEN(cn);
+			cn = SCTP_BUF_NEXT(cn);
+		}
+	} else
+		*offset = 0;
+
+	/* update length and return pointer to the auth chunk */
+	SCTP_BUF_LEN(m_auth) = chunk_len;
+	m = sctp_copy_mbufchain(m_auth, m, m_end, 1, chunk_len, 0);
+	if (auth_ret != NULL)
+		*auth_ret = auth;
+
+	return (m);
+}
+
+int
+sctp_v6src_match_nexthop(struct sockaddr_in6 *src6, sctp_route_t * ro)
+{
+	struct nd_prefix *pfx = NULL;
+	struct nd_pfxrouter *pfxrtr = NULL;
+	struct sockaddr_in6 gw6;
+
+	if (ro == NULL || ro->ro_rt == NULL || src6->sin6_family != AF_INET6)
+		return (0);
+
+	/* get prefix entry of address */
+	LIST_FOREACH(pfx, &nd_prefix, ndpr_entry) {
+		if (pfx->ndpr_stateflags & NDPRF_DETACHED)
+			continue;
+		if (IN6_ARE_MASKED_ADDR_EQUAL(&pfx->ndpr_prefix.sin6_addr,
+		    &src6->sin6_addr, &pfx->ndpr_mask))
+			break;
+	}
+	/* no prefix entry in the prefix list */
+	if (pfx == NULL) {
+		SCTPDBG(SCTP_DEBUG_OUTPUT2, "No prefix entry for ");
+		SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)src6);
+		return (0);
+	}
+	SCTPDBG(SCTP_DEBUG_OUTPUT2, "v6src_match_nexthop(), Prefix entry is ");
+	SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)src6);
+
+	/* search installed gateway from prefix entry */
+	for (pfxrtr = pfx->ndpr_advrtrs.lh_first; pfxrtr; pfxrtr =
+	    pfxrtr->pfr_next) {
+		memset(&gw6, 0, sizeof(struct sockaddr_in6));
+		gw6.sin6_family = AF_INET6;
+		gw6.sin6_len = sizeof(struct sockaddr_in6);
+		memcpy(&gw6.sin6_addr, &pfxrtr->router->rtaddr,
+		    sizeof(struct in6_addr));
+		SCTPDBG(SCTP_DEBUG_OUTPUT2, "prefix router is ");
+		SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)&gw6);
+		SCTPDBG(SCTP_DEBUG_OUTPUT2, "installed router is ");
+		SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, ro->ro_rt->rt_gateway);
+		if (sctp_cmpaddr((struct sockaddr *)&gw6,
+		    ro->ro_rt->rt_gateway)) {
+			SCTPDBG(SCTP_DEBUG_OUTPUT2, "pfxrouter is installed\n");
+			return (1);
+		}
+	}
+	SCTPDBG(SCTP_DEBUG_OUTPUT2, "pfxrouter is not installed\n");
+	return (0);
+}
+int
+sctp_v4src_match_nexthop(struct sctp_ifa *sifa, sctp_route_t * ro)
+{
+	struct sockaddr_in *sin, *mask;
+	struct ifaddr *ifa;
+	struct in_addr srcnetaddr, gwnetaddr;
+
+	if (ro == NULL || ro->ro_rt == NULL ||
+	    sifa->address.sa.sa_family != AF_INET) {
+		return (0);
+	}
+	ifa = (struct ifaddr *)sifa->ifa;
+	mask = (struct sockaddr_in *)(ifa->ifa_netmask);
+	sin = (struct sockaddr_in *)&sifa->address.sin;
+	srcnetaddr.s_addr = (sin->sin_addr.s_addr & mask->sin_addr.s_addr);
+	SCTPDBG(SCTP_DEBUG_OUTPUT1, "match_nexthop4: src address is ");
+	SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &sifa->address.sa);
+	SCTPDBG(SCTP_DEBUG_OUTPUT1, "network address is %x\n", srcnetaddr.s_addr);
+
+	sin = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
+	gwnetaddr.s_addr = (sin->sin_addr.s_addr & mask->sin_addr.s_addr);
+	SCTPDBG(SCTP_DEBUG_OUTPUT1, "match_nexthop4: nexthop is ");
+	SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, ro->ro_rt->rt_gateway);
+	SCTPDBG(SCTP_DEBUG_OUTPUT1, "network address is %x\n", gwnetaddr.s_addr);
+	if (srcnetaddr.s_addr == gwnetaddr.s_addr) {
+		return (1);
+	}
+	return (0);
+}
Index: ip_output.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_output.c,v
retrieving revision 1.6
retrieving revision 1.7
diff -L sys/netinet/ip_output.c -L sys/netinet/ip_output.c -u -r1.6 -r1.7
--- sys/netinet/ip_output.c
+++ sys/netinet/ip_output.c
@@ -27,10 +27,11 @@
  * SUCH DAMAGE.
  *
  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
- * $FreeBSD: src/sys/netinet/ip_output.c,v 1.242.2.8 2006/01/31 16:06:05 andre Exp $
- * $MidnightBSD$
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_output.c,v 1.276 2007/10/07 20:44:23 silby Exp $");
+
 #include "opt_ipfw.h"
 #include "opt_ipsec.h"
 #include "opt_mac.h"
@@ -39,9 +40,9 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
-#include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
+#include <sys/priv.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
@@ -58,26 +59,16 @@
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
-
-#include <machine/in_cksum.h>
-
-static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
+#include <netinet/ip_options.h>
 
 #ifdef IPSEC
-#include <netinet6/ipsec.h>
-#include <netkey/key.h>
-#ifdef IPSEC_DEBUG
-#include <netkey/key_debug.h>
-#else
-#define	KEYDEBUG(lev,arg)
-#endif
-#endif /*IPSEC*/
-
-#ifdef FAST_IPSEC
+#include <netinet/ip_ipsec.h>
 #include <netipsec/ipsec.h>
-#include <netipsec/xform.h>
-#include <netipsec/key.h>
-#endif /*FAST_IPSEC*/
+#endif /* IPSEC*/
+
+#include <machine/in_cksum.h>
+
+#include <security/mac/mac_framework.h>
 
 #define print_ip(x, a, y)	 printf("%s %d.%d.%d.%d%s",\
 				x, (ntohl(a.s_addr)>>24)&0xFF,\
@@ -93,16 +84,8 @@
 	&mbuf_frag_size, 0, "Fragment outgoing mbufs to this size");
 #endif
 
-static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
-static struct ifnet *ip_multicast_if(struct in_addr *, int *);
 static void	ip_mloopback
 	(struct ifnet *, struct mbuf *, struct sockaddr_in *, int);
-static int	ip_getmoptions(struct inpcb *, struct sockopt *);
-static int	ip_pcbopts(struct inpcb *, int, struct mbuf *);
-static int	ip_setmoptions(struct inpcb *, struct sockopt *);
-static struct ip_moptions	*ip_findmoptions(struct inpcb *inp);
-
-int	ip_optcopy(struct ip *, struct ip *);
 
 
 extern	struct protosw inetsw[];
@@ -116,13 +99,14 @@
  * inserted, so must have a NULL opt pointer.
  */
 int
-ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro,
-	int flags, struct ip_moptions *imo, struct inpcb *inp)
+ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
+    struct ip_moptions *imo, struct inpcb *inp)
 {
 	struct ip *ip;
 	struct ifnet *ifp = NULL;	/* keep compiler happy */
 	struct mbuf *m0;
 	int hlen = sizeof (struct ip);
+	int mtu;
 	int len, error = 0;
 	struct sockaddr_in *dst = NULL;	/* keep compiler happy */
 	struct in_ifaddr *ia = NULL;
@@ -132,18 +116,8 @@
 #ifdef IPFIREWALL_FORWARD
 	struct m_tag *fwd_tag = NULL;
 #endif
-#ifdef IPSEC
-	struct secpolicy *sp = NULL;
-#endif
-#ifdef FAST_IPSEC
-	struct secpolicy *sp = NULL;
-	struct tdb_ident *tdbi;
-	struct m_tag *mtag;
-	int s;
-#endif /* FAST_IPSEC */
-
 	M_ASSERTPKTHDR(m);
-	
+
 	if (ro == NULL) {
 		ro = &iproute;
 		bzero(ro, sizeof (*ro));
@@ -193,7 +167,7 @@
 			  dst->sin_family != AF_INET ||
 			  dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
 		RTFREE(ro->ro_rt);
-		ro->ro_rt = (struct rtentry *)0;
+		ro->ro_rt = (struct rtentry *)NULL;
 	}
 #ifdef IPFIREWALL_FORWARD
 	if (ro->ro_rt == NULL && fwd_tag == NULL) {
@@ -206,10 +180,24 @@
 		dst->sin_addr = ip->ip_dst;
 	}
 	/*
-	 * If routing to interface only,
-	 * short circuit routing lookup.
+	 * If routing to interface only, short circuit routing lookup.
+	 * The use of an all-ones broadcast address implies this; an
+	 * interface is specified by the broadcast address of an interface,
+	 * or the destination address of a ptp interface.
 	 */
-	if (flags & IP_ROUTETOIF) {
+	if (flags & IP_SENDONES) {
+		if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst)))) == NULL &&
+		    (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL) {
+			ipstat.ips_noroute++;
+			error = ENETUNREACH;
+			goto bad;
+		}
+		ip->ip_dst.s_addr = INADDR_BROADCAST;
+		dst->sin_addr = ip->ip_dst;
+		ifp = ia->ia_ifp;
+		ip->ip_ttl = 1;
+		isbroadcast = 1;
+	} else if (flags & IP_ROUTETOIF) {
 		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL &&
 		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == NULL) {
 			ipstat.ips_noroute++;
@@ -251,6 +239,24 @@
 		else
 			isbroadcast = in_broadcast(dst->sin_addr, ifp);
 	}
+	/*
+	 * Calculate MTU.  If we have a route that is up, use that,
+	 * otherwise use the interface's MTU.
+	 */
+	if (ro->ro_rt != NULL && (ro->ro_rt->rt_flags & (RTF_UP|RTF_HOST))) {
+		/*
+		 * This case can happen if the user changed the MTU
+		 * of an interface after enabling IP on it.  Because
+		 * most netifs don't keep track of routes pointing to
+		 * them, there is no way for one to update all its
+		 * routes when the MTU is changed.
+		 */
+		if (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)
+			ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
+		mtu = ro->ro_rt->rt_rmx.rmx_mtu;
+	} else {
+		mtu = ifp->if_mtu;
+	}
 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
 		struct in_multi *inm;
 
@@ -351,7 +357,7 @@
 
 		goto sendit;
 	}
-#ifndef notdef
+
 	/*
 	 * If the source address is not specified yet, use the address
 	 * of the outoing interface.
@@ -362,7 +368,7 @@
 			ip->ip_src = IA_SIN(ia)->sin_addr;
 		}
 	}
-#endif /* notdef */
+
 	/*
 	 * Verify that we have any chance at all of being able to queue the
 	 * packet or packet fragments, unless ALTQ is enabled on the given
@@ -370,10 +376,10 @@
 	 */
 #ifdef ALTQ
 	if ((!ALTQ_IS_ENABLED(&ifp->if_snd)) &&
-	    ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
+	    ((ifp->if_snd.ifq_len + ip->ip_len / mtu + 1) >=
 	    ifp->if_snd.ifq_maxlen))
 #else
-	if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
+	if ((ifp->if_snd.ifq_len + ip->ip_len / mtu + 1) >=
 	    ifp->if_snd.ifq_maxlen)
 #endif /* ALTQ */
 	{
@@ -398,12 +404,10 @@
 			goto bad;
 		}
 		/* don't allow broadcast messages to be fragmented */
-		if (ip->ip_len > ifp->if_mtu) {
+		if (ip->ip_len > mtu) {
 			error = EMSGSIZE;
 			goto bad;
 		}
-		if (flags & IP_SENDONES)
-			ip->ip_dst.s_addr = INADDR_BROADCAST;
 		m->m_flags |= M_BCAST;
 	} else {
 		m->m_flags &= ~M_BCAST;
@@ -411,256 +415,22 @@
 
 sendit:
 #ifdef IPSEC
-	/* get SP for this packet */
-	if (inp == NULL)
-		sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
-		    flags, &error);
-	else
-		sp = ipsec4_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error);
-
-	if (sp == NULL) {
-		ipsecstat.out_inval++;
-		goto bad;
-	}
-
-	error = 0;
-
-	/* check policy */
-	switch (sp->policy) {
-	case IPSEC_POLICY_DISCARD:
-		/*
-		 * This packet is just discarded.
-		 */
-		ipsecstat.out_polvio++;
+	switch(ip_ipsec_output(&m, inp, &flags, &error, &ro, &iproute, &dst, &ia, &ifp)) {
+	case 1:
 		goto bad;
-
-	case IPSEC_POLICY_BYPASS:
-	case IPSEC_POLICY_NONE:
-	case IPSEC_POLICY_TCP:
-		/* no need to do IPsec. */
-		goto skip_ipsec;
-	
-	case IPSEC_POLICY_IPSEC:
-		if (sp->req == NULL) {
-			/* acquire a policy */
-			error = key_spdacquire(sp);
-			goto bad;
-		}
-		break;
-
-	case IPSEC_POLICY_ENTRUST:
+	case -1:
+		goto done;
+	case 0:
 	default:
-		printf("ip_output: Invalid policy found. %d\n", sp->policy);
-	}
-    {
-	struct ipsec_output_state state;
-	bzero(&state, sizeof(state));
-	state.m = m;
-	if (flags & IP_ROUTETOIF) {
-		state.ro = &iproute;
-		bzero(&iproute, sizeof(iproute));
-	} else
-		state.ro = ro;
-	state.dst = (struct sockaddr *)dst;
-
-	ip->ip_sum = 0;
-
-	/*
-	 * XXX
-	 * delayed checksums are not currently compatible with IPsec
-	 */
-	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
-		in_delayed_cksum(m);
-		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
-	}
-
-	ip->ip_len = htons(ip->ip_len);
-	ip->ip_off = htons(ip->ip_off);
-
-	error = ipsec4_output(&state, sp, flags);
-
-	m = state.m;
-	if (flags & IP_ROUTETOIF) {
-		/*
-		 * if we have tunnel mode SA, we may need to ignore
-		 * IP_ROUTETOIF.
-		 */
-		if (state.ro != &iproute || state.ro->ro_rt != NULL) {
-			flags &= ~IP_ROUTETOIF;
-			ro = state.ro;
-		}
-	} else
-		ro = state.ro;
-	dst = (struct sockaddr_in *)state.dst;
-	if (error) {
-		/* mbuf is already reclaimed in ipsec4_output. */
-		m = NULL;
-		switch (error) {
-		case EHOSTUNREACH:
-		case ENETUNREACH:
-		case EMSGSIZE:
-		case ENOBUFS:
-		case ENOMEM:
-			break;
-		default:
-			printf("ip4_output (ipsec): error code %d\n", error);
-			/*fall through*/
-		case ENOENT:
-			/* don't show these error codes to the user */
-			error = 0;
-			break;
-		}
-		goto bad;
+		break;	/* Continue with packet processing. */
 	}
-
-	/* be sure to update variables that are affected by ipsec4_output() */
+	/* Update variables that are affected by ipsec4_output(). */
 	ip = mtod(m, struct ip *);
 	hlen = ip->ip_hl << 2;
-	if (ro->ro_rt == NULL) {
-		if ((flags & IP_ROUTETOIF) == 0) {
-			printf("ip_output: "
-				"can't update route after IPsec processing\n");
-			error = EHOSTUNREACH;	/*XXX*/
-			goto bad;
-		}
-	} else {
-		if (state.encap) {
-			ia = ifatoia(ro->ro_rt->rt_ifa);
-			ifp = ro->ro_rt->rt_ifp;
-		}
-	}
-    }
-
-	/* make it flipped, again. */
-	ip->ip_len = ntohs(ip->ip_len);
-	ip->ip_off = ntohs(ip->ip_off);
-skip_ipsec:
-#endif /*IPSEC*/
-#ifdef FAST_IPSEC
-	/*
-	 * Check the security policy (SP) for the packet and, if
-	 * required, do IPsec-related processing.  There are two
-	 * cases here; the first time a packet is sent through
-	 * it will be untagged and handled by ipsec4_checkpolicy.
-	 * If the packet is resubmitted to ip_output (e.g. after
-	 * AH, ESP, etc. processing), there will be a tag to bypass
-	 * the lookup and related policy checking.
-	 */
-	mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
-	s = splnet();
-	if (mtag != NULL) {
-		tdbi = (struct tdb_ident *)(mtag + 1);
-		sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND);
-		if (sp == NULL)
-			error = -EINVAL;	/* force silent drop */
-		m_tag_delete(m, mtag);
-	} else {
-		sp = ipsec4_checkpolicy(m, IPSEC_DIR_OUTBOUND, flags,
-					&error, inp);
-	}
-	/*
-	 * There are four return cases:
-	 *    sp != NULL	 	    apply IPsec policy
-	 *    sp == NULL, error == 0	    no IPsec handling needed
-	 *    sp == NULL, error == -EINVAL  discard packet w/o error
-	 *    sp == NULL, error != 0	    discard packet, report error
-	 */
-	if (sp != NULL) {
-		/* Loop detection, check if ipsec processing already done */
-		KASSERT(sp->req != NULL, ("ip_output: no ipsec request"));
-		for (mtag = m_tag_first(m); mtag != NULL;
-		     mtag = m_tag_next(m, mtag)) {
-			if (mtag->m_tag_cookie != MTAG_ABI_COMPAT)
-				continue;
-			if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE &&
-			    mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED)
-				continue;
-			/*
-			 * Check if policy has an SA associated with it.
-			 * This can happen when an SP has yet to acquire
-			 * an SA; e.g. on first reference.  If it occurs,
-			 * then we let ipsec4_process_packet do its thing.
-			 */
-			if (sp->req->sav == NULL)
-				break;
-			tdbi = (struct tdb_ident *)(mtag + 1);
-			if (tdbi->spi == sp->req->sav->spi &&
-			    tdbi->proto == sp->req->sav->sah->saidx.proto &&
-			    bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst,
-				 sizeof (union sockaddr_union)) == 0) {
-				/*
-				 * No IPsec processing is needed, free
-				 * reference to SP.
-				 *
-				 * NB: null pointer to avoid free at
-				 *     done: below.
-				 */
-				KEY_FREESP(&sp), sp = NULL;
-				splx(s);
-				goto spd_done;
-			}
-		}
-
-		/*
-		 * Do delayed checksums now because we send before
-		 * this is done in the normal processing path.
-		 */
-		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
-			in_delayed_cksum(m);
-			m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
-		}
-
-		ip->ip_len = htons(ip->ip_len);
-		ip->ip_off = htons(ip->ip_off);
-
-		/* NB: callee frees mbuf */
-		error = ipsec4_process_packet(m, sp->req, flags, 0);
-		/*
-		 * Preserve KAME behaviour: ENOENT can be returned
-		 * when an SA acquire is in progress.  Don't propagate
-		 * this to user-level; it confuses applications.
-		 *
-		 * XXX this will go away when the SADB is redone.
-		 */
-		if (error == ENOENT)
-			error = 0;
-		splx(s);
-		goto done;
-	} else {
-		splx(s);
-
-		if (error != 0) {
-			/*
-			 * Hack: -EINVAL is used to signal that a packet
-			 * should be silently discarded.  This is typically
-			 * because we asked key management for an SA and
-			 * it was delayed (e.g. kicked up to IKE).
-			 */
-			if (error == -EINVAL)
-				error = 0;
-			goto bad;
-		} else {
-			/* No IPsec processing for this packet. */
-		}
-#ifdef notyet
-		/*
-		 * If deferred crypto processing is needed, check that
-		 * the interface supports it.
-		 */ 
-		mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL);
-		if (mtag != NULL && (ifp->if_capenable & IFCAP_IPSEC) == 0) {
-			/* notify IPsec to do its own crypto */
-			ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
-			error = EHOSTUNREACH;
-			goto bad;
-		}
-#endif
-	}
-spd_done:
-#endif /* FAST_IPSEC */
+#endif /* IPSEC */
 
 	/* Jump over all PFIL processing if hooks are not active. */
-	if (inet_pfil_hook.ph_busy_count == -1)
+	if (!PFIL_HOOKED(&inet_pfil_hook))
 		goto passout;
 
 	/* Run through list of hooks for output packets. */
@@ -712,20 +482,11 @@
 	/* Or forward to some other address? */
 	fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
 	if (fwd_tag) {
-#ifndef IPFIREWALL_FORWARD_EXTENDED
-		if (!in_localip(ip->ip_src) && !in_localaddr(ip->ip_dst)) {
-#endif
-			dst = (struct sockaddr_in *)&ro->ro_dst;
-			bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
-			m->m_flags |= M_SKIP_FIREWALL;
-			m_tag_delete(m, fwd_tag);
-			goto again;
-#ifndef IPFIREWALL_FORWARD_EXTENDED
-		} else {
-			m_tag_delete(m, fwd_tag);
-			/* Continue. */
-		}
-#endif
+		dst = (struct sockaddr_in *)&ro->ro_dst;
+		bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
+		m->m_flags |= M_SKIP_FIREWALL;
+		m_tag_delete(m, fwd_tag);
+		goto again;
 	}
 #endif /* IPFIREWALL_FORWARD */
 
@@ -750,50 +511,49 @@
 
 	/*
 	 * If small enough for interface, or the interface will take
-	 * care of the fragmentation for us, can just send directly.
+	 * care of the fragmentation for us, we can just send directly.
 	 */
-	if (ip->ip_len <= ifp->if_mtu || (ifp->if_hwassist & CSUM_FRAGMENT &&
-	    ((ip->ip_off & IP_DF) == 0))) {
+	if (ip->ip_len <= mtu ||
+	    (m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 ||
+	    ((ip->ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) {
 		ip->ip_len = htons(ip->ip_len);
 		ip->ip_off = htons(ip->ip_off);
 		ip->ip_sum = 0;
 		if (sw_csum & CSUM_DELAY_IP)
 			ip->ip_sum = in_cksum(m, hlen);
 
-		/* Record statistics for this interface address. */
+		/*
+		 * Record statistics for this interface address.
+		 * With CSUM_TSO the byte/packet count will be slightly
+		 * incorrect because we count the IP+TCP headers only
+		 * once instead of for every generated packet.
+		 */
 		if (!(flags & IP_FORWARDING) && ia) {
-			ia->ia_ifa.if_opackets++;
+			if (m->m_pkthdr.csum_flags & CSUM_TSO)
+				ia->ia_ifa.if_opackets +=
+				    m->m_pkthdr.len / m->m_pkthdr.tso_segsz;
+			else
+				ia->ia_ifa.if_opackets++;
 			ia->ia_ifa.if_obytes += m->m_pkthdr.len;
 		}
-
-#ifdef IPSEC
-		/* clean ipsec history once it goes out of the node */
-		ipsec_delaux(m);
-#endif
-
 #ifdef MBUF_STRESS_TEST
 		if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
 			m = m_fragment(m, M_DONTWAIT, mbuf_frag_size);
 #endif
+		/*
+		 * Reset layer specific mbuf flags
+		 * to avoid confusing lower layers.
+		 */
+		m->m_flags &= ~(M_PROTOFLAGS);
+
 		error = (*ifp->if_output)(ifp, m,
 				(struct sockaddr *)dst, ro->ro_rt);
 		goto done;
 	}
 
-	if (ip->ip_off & IP_DF) {
+	/* Balk when DF bit is set or the interface didn't support TSO. */
+	if ((ip->ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) {
 		error = EMSGSIZE;
-		/*
-		 * This case can happen if the user changed the MTU
-		 * of an interface after enabling IP on it.  Because
-		 * most netifs don't keep track of routes pointing to
-		 * them, there is no way for one to update all its
-		 * routes when the MTU is changed.
-		 */
-		if (ro != NULL &&
-		    (ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) &&
-		    (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
-			ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
-		}
 		ipstat.ips_cantfrag++;
 		goto bad;
 	}
@@ -802,23 +562,24 @@
 	 * Too large for interface; fragment if possible. If successful,
 	 * on return, m will point to a list of packets to be sent.
 	 */
-	error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist, sw_csum);
+	error = ip_fragment(ip, &m, mtu, ifp->if_hwassist, sw_csum);
 	if (error)
 		goto bad;
 	for (; m; m = m0) {
 		m0 = m->m_nextpkt;
 		m->m_nextpkt = 0;
-#ifdef IPSEC
-		/* clean ipsec history once it goes out of the node */
-		ipsec_delaux(m);
-#endif
 		if (error == 0) {
 			/* Record statistics for this interface address. */
 			if (ia != NULL) {
 				ia->ia_ifa.if_opackets++;
 				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
 			}
-			
+			/*
+			 * Reset layer specific mbuf flags
+			 * to avoid confusing upper layers.
+			 */
+			m->m_flags &= ~(M_PROTOFLAGS);
+
 			error = (*ifp->if_output)(ifp, m,
 			    (struct sockaddr *)dst, ro->ro_rt);
 		} else
@@ -832,17 +593,6 @@
 	if (ro == &iproute && ro->ro_rt) {
 		RTFREE(ro->ro_rt);
 	}
-#ifdef IPSEC
-	if (sp != NULL) {
-		KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
-			printf("DP ip_output call free SP:%p\n", sp));
-		key_freesp(sp);
-	}
-#endif
-#ifdef FAST_IPSEC
-	if (sp != NULL)
-		KEY_FREESP(&sp);
-#endif
 	return (error);
 bad:
 	m_freem(m);
@@ -860,7 +610,7 @@
  */
 int
 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
-	    u_long if_hwassist_flags, int sw_csum)
+    u_long if_hwassist_flags, int sw_csum)
 {
 	int error = 0;
 	int hlen = ip->ip_hl << 2;
@@ -944,7 +694,7 @@
 		struct mbuf *m;
 		int mhlen = sizeof (struct ip);
 
-		MGETHDR(m, M_DONTWAIT, MT_HEADER);
+		MGETHDR(m, M_DONTWAIT, MT_DATA);
 		if (m == NULL) {
 			error = ENOBUFS;
 			ipstat.ips_odropped++;
@@ -1045,141 +795,19 @@
 }
 
 /*
- * Insert IP options into preformed packet.
- * Adjust IP destination as required for IP source routing,
- * as indicated by a non-zero in_addr at the start of the options.
- *
- * XXX This routine assumes that the packet has no options in place.
- */
-static struct mbuf *
-ip_insertoptions(m, opt, phlen)
-	register struct mbuf *m;
-	struct mbuf *opt;
-	int *phlen;
-{
-	register struct ipoption *p = mtod(opt, struct ipoption *);
-	struct mbuf *n;
-	register struct ip *ip = mtod(m, struct ip *);
-	unsigned optlen;
-
-	optlen = opt->m_len - sizeof(p->ipopt_dst);
-	if (optlen + ip->ip_len > IP_MAXPACKET) {
-		*phlen = 0;
-		return (m);		/* XXX should fail */
-	}
-	if (p->ipopt_dst.s_addr)
-		ip->ip_dst = p->ipopt_dst;
-	if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
-		MGETHDR(n, M_DONTWAIT, MT_HEADER);
-		if (n == NULL) {
-			*phlen = 0;
-			return (m);
-		}
-		M_MOVE_PKTHDR(n, m);
-		n->m_pkthdr.rcvif = NULL;
-#ifdef MAC
-		mac_copy_mbuf(m, n);
-#endif
-		n->m_pkthdr.len += optlen;
-		m->m_len -= sizeof(struct ip);
-		m->m_data += sizeof(struct ip);
-		n->m_next = m;
-		m = n;
-		m->m_len = optlen + sizeof(struct ip);
-		m->m_data += max_linkhdr;
-		bcopy(ip, mtod(m, void *), sizeof(struct ip));
-	} else {
-		m->m_data -= optlen;
-		m->m_len += optlen;
-		m->m_pkthdr.len += optlen;
-		bcopy(ip, mtod(m, void *), sizeof(struct ip));
-	}
-	ip = mtod(m, struct ip *);
-	bcopy(p->ipopt_list, ip + 1, optlen);
-	*phlen = sizeof(struct ip) + optlen;
-	ip->ip_v = IPVERSION;
-	ip->ip_hl = *phlen >> 2;
-	ip->ip_len += optlen;
-	return (m);
-}
-
-/*
- * Copy options from ip to jp,
- * omitting those not copied during fragmentation.
- */
-int
-ip_optcopy(ip, jp)
-	struct ip *ip, *jp;
-{
-	register u_char *cp, *dp;
-	int opt, optlen, cnt;
-
-	cp = (u_char *)(ip + 1);
-	dp = (u_char *)(jp + 1);
-	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
-	for (; cnt > 0; cnt -= optlen, cp += optlen) {
-		opt = cp[0];
-		if (opt == IPOPT_EOL)
-			break;
-		if (opt == IPOPT_NOP) {
-			/* Preserve for IP mcast tunnel's LSRR alignment. */
-			*dp++ = IPOPT_NOP;
-			optlen = 1;
-			continue;
-		}
-
-		KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp),
-		    ("ip_optcopy: malformed ipv4 option"));
-		optlen = cp[IPOPT_OLEN];
-		KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen <= cnt,
-		    ("ip_optcopy: malformed ipv4 option"));
-
-		/* bogus lengths should have been caught by ip_dooptions */
-		if (optlen > cnt)
-			optlen = cnt;
-		if (IPOPT_COPIED(opt)) {
-			bcopy(cp, dp, optlen);
-			dp += optlen;
-		}
-	}
-	for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
-		*dp++ = IPOPT_EOL;
-	return (optlen);
-}
-
-/*
  * IP socket option processing.
- *
- * There are two versions of this call in order to work around a race
- * condition in TCP in FreeBSD 6.x.  In the TCP implementation, so->so_pcb
- * can become NULL if the pcb or pcbinfo lock isn't held.  However, when
- * entering ip_ctloutput(), neither lock is held, and finding the pointer to
- * either lock requires follow so->so_pcb, which may be NULL.
- * ip_ctloutput_pcbinfo() accepts the pcbinfo pointer so that the lock can be
- * safely acquired.  This is not required in FreeBSD 7.x because the
- * invariants on so->so_pcb are much stronger, so it cannot become NULL
- * while the socket is in use.
  */
 int
-ip_ctloutput_pcbinfo(so, sopt, pcbinfo)
-	struct socket *so;
-	struct sockopt *sopt;
-	struct inpcbinfo *pcbinfo;
+ip_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct	inpcb *inp = sotoinpcb(so);
 	int	error, optval;
 
-	if (pcbinfo == NULL)
-		pcbinfo = inp->inp_pcbinfo;
-
 	error = optval = 0;
 	if (sopt->sopt_level != IPPROTO_IP) {
 		return (EINVAL);
 	}
 
-	if (inp == NULL)
-		return (EINVAL);
-
 	switch (sopt->sopt_dir) {
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
@@ -1205,15 +833,7 @@
 				m_free(m);
 				break;
 			}
-			INP_INFO_WLOCK(pcbinfo);
-			if (so->so_pcb == NULL) {
-				INP_INFO_WUNLOCK(pcbinfo);
-				m_free(m);
-				error = EINVAL;
-				break;
-			}
 			INP_LOCK(inp);
-			INP_INFO_WUNLOCK(pcbinfo);
 			error = ip_pcbopts(inp, sopt->sopt_name, m);
 			INP_UNLOCK(inp);
 			return (error);
@@ -1234,14 +854,7 @@
 					    sizeof optval);
 			if (error)
 				break;
-			INP_INFO_WLOCK(pcbinfo);
-			if (so->so_pcb == NULL) {
-				INP_INFO_WUNLOCK(pcbinfo);
-				 error = EINVAL;
-				 break;
-			}
-			INP_LOCK(inp);
-			INP_INFO_WUNLOCK(pcbinfo);
+
 			switch (sopt->sopt_name) {
 			case IP_TOS:
 				inp->inp_ip_tos = optval;
@@ -1298,17 +911,31 @@
 				OPTSET(INP_DONTFRAG);
 				break;
 			}
-			INP_UNLOCK(inp);
 			break;
 #undef OPTSET
 
+		/*
+		 * Multicast socket options are processed by the in_mcast
+		 * module.
+		 */
 		case IP_MULTICAST_IF:
 		case IP_MULTICAST_VIF:
 		case IP_MULTICAST_TTL:
 		case IP_MULTICAST_LOOP:
 		case IP_ADD_MEMBERSHIP:
 		case IP_DROP_MEMBERSHIP:
-			error = ip_setmoptions(inp, sopt);
+		case IP_ADD_SOURCE_MEMBERSHIP:
+		case IP_DROP_SOURCE_MEMBERSHIP:
+		case IP_BLOCK_SOURCE:
+		case IP_UNBLOCK_SOURCE:
+		case IP_MSFILTER:
+		case MCAST_JOIN_GROUP:
+		case MCAST_LEAVE_GROUP:
+		case MCAST_JOIN_SOURCE_GROUP:
+		case MCAST_LEAVE_SOURCE_GROUP:
+		case MCAST_BLOCK_SOURCE:
+		case MCAST_UNBLOCK_SOURCE:
+			error = inp_setmoptions(inp, sopt);
 			break;
 
 		case IP_PORTRANGE:
@@ -1317,14 +944,7 @@
 			if (error)
 				break;
 
-			INP_INFO_WLOCK(pcbinfo);
-			if (so->so_pcb == NULL) {
-				INP_INFO_WUNLOCK(pcbinfo);
-				error = EINVAL;
-				break;
-			}
 			INP_LOCK(inp);
-			INP_INFO_WUNLOCK(pcbinfo);
 			switch (optval) {
 			case IP_PORTRANGE_DEFAULT:
 				inp->inp_flags &= ~(INP_LOWPORT);
@@ -1348,7 +968,7 @@
 			INP_UNLOCK(inp);
 			break;
 
-#if defined(IPSEC) || defined(FAST_IPSEC)
+#ifdef IPSEC
 		case IP_IPSEC_POLICY:
 		{
 			caddr_t req;
@@ -1361,21 +981,28 @@
 				break;
 			if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
 				break;
-			priv = (sopt->sopt_td != NULL &&
-				suser(sopt->sopt_td) != 0) ? 0 : 1;
+			if (sopt->sopt_td != NULL) {
+				/*
+				 * XXXRW: Would be more desirable to do this
+				 * one layer down so that we only exercise
+				 * privilege if it is needed.
+				 */
+				error = priv_check(sopt->sopt_td,
+				    PRIV_NETINET_IPSEC);
+				if (error)
+					priv = 0;
+				else
+					priv = 1;
+			} else
+				priv = 1;
 			req = mtod(m, caddr_t);
 			len = m->m_len;
 			optname = sopt->sopt_name;
-			 if (so->so_pcb == NULL) {
-				m_free(m);
-				error = EINVAL;
-				break;
-			}
 			error = ipsec4_set_policy(inp, optname, req, len, priv);
 			m_freem(m);
 			break;
 		}
-#endif /*IPSEC*/
+#endif /* IPSEC */
 
 		default:
 			error = ENOPROTOOPT;
@@ -1467,16 +1094,19 @@
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 
+		/*
+		 * Multicast socket options are processed by the in_mcast
+		 * module.
+		 */
 		case IP_MULTICAST_IF:
 		case IP_MULTICAST_VIF:
 		case IP_MULTICAST_TTL:
 		case IP_MULTICAST_LOOP:
-		case IP_ADD_MEMBERSHIP:
-		case IP_DROP_MEMBERSHIP:
-			error = ip_getmoptions(inp, sopt);
+		case IP_MSFILTER:
+			error = inp_getmoptions(inp, sopt);
 			break;
 
-#if defined(IPSEC) || defined(FAST_IPSEC)
+#ifdef IPSEC
 		case IP_IPSEC_POLICY:
 		{
 			struct mbuf *m = NULL;
@@ -1494,7 +1124,7 @@
 				m_freem(m);
 			break;
 		}
-#endif /*IPSEC*/
+#endif /* IPSEC */
 
 		default:
 			error = ENOPROTOOPT;
@@ -1505,562 +1135,6 @@
 	return (error);
 }
 
-int
-ip_ctloutput(struct socket *so, struct sockopt *sopt)
-{
-	return (ip_ctloutput_pcbinfo(so, sopt, NULL));
-}
-
-/*
- * Set up IP options in pcb for insertion in output packets.
- * Store in mbuf with pointer in pcbopt, adding pseudo-option
- * with destination address if source routed.
- */
-static int
-ip_pcbopts(struct inpcb *inp, int optname, struct mbuf *m)
-{
-	register int cnt, optlen;
-	register u_char *cp;
-	struct mbuf **pcbopt;
-	u_char opt;
-
-	INP_LOCK_ASSERT(inp);
-
-	pcbopt = &inp->inp_options;
-
-	/* turn off any old options */
-	if (*pcbopt)
-		(void)m_free(*pcbopt);
-	*pcbopt = 0;
-	if (m == NULL || m->m_len == 0) {
-		/*
-		 * Only turning off any previous options.
-		 */
-		if (m != NULL)
-			(void)m_free(m);
-		return (0);
-	}
-
-	if (m->m_len % sizeof(int32_t))
-		goto bad;
-	/*
-	 * IP first-hop destination address will be stored before
-	 * actual options; move other options back
-	 * and clear it when none present.
-	 */
-	if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
-		goto bad;
-	cnt = m->m_len;
-	m->m_len += sizeof(struct in_addr);
-	cp = mtod(m, u_char *) + sizeof(struct in_addr);
-	bcopy(mtod(m, void *), cp, (unsigned)cnt);
-	bzero(mtod(m, void *), sizeof(struct in_addr));
-
-	for (; cnt > 0; cnt -= optlen, cp += optlen) {
-		opt = cp[IPOPT_OPTVAL];
-		if (opt == IPOPT_EOL)
-			break;
-		if (opt == IPOPT_NOP)
-			optlen = 1;
-		else {
-			if (cnt < IPOPT_OLEN + sizeof(*cp))
-				goto bad;
-			optlen = cp[IPOPT_OLEN];
-			if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
-				goto bad;
-		}
-		switch (opt) {
-
-		default:
-			break;
-
-		case IPOPT_LSRR:
-		case IPOPT_SSRR:
-			/*
-			 * user process specifies route as:
-			 *	->A->B->C->D
-			 * D must be our final destination (but we can't
-			 * check that since we may not have connected yet).
-			 * A is first hop destination, which doesn't appear in
-			 * actual IP option, but is stored before the options.
-			 */
-			if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
-				goto bad;
-			m->m_len -= sizeof(struct in_addr);
-			cnt -= sizeof(struct in_addr);
-			optlen -= sizeof(struct in_addr);
-			cp[IPOPT_OLEN] = optlen;
-			/*
-			 * Move first hop before start of options.
-			 */
-			bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
-			    sizeof(struct in_addr));
-			/*
-			 * Then copy rest of options back
-			 * to close up the deleted entry.
-			 */
-			bcopy((&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr)),
-			    &cp[IPOPT_OFFSET+1],
-			    (unsigned)cnt - (IPOPT_MINOFF - 1));
-			break;
-		}
-	}
-	if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
-		goto bad;
-	*pcbopt = m;
-	return (0);
-
-bad:
-	(void)m_free(m);
-	return (EINVAL);
-}
-
-/*
- * XXX
- * The whole multicast option thing needs to be re-thought.
- * Several of these options are equally applicable to non-multicast
- * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
- * standard option (IP_TTL).
- */
-
-/*
- * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
- */
-static struct ifnet *
-ip_multicast_if(a, ifindexp)
-	struct in_addr *a;
-	int *ifindexp;
-{
-	int ifindex;
-	struct ifnet *ifp;
-
-	if (ifindexp)
-		*ifindexp = 0;
-	if (ntohl(a->s_addr) >> 24 == 0) {
-		ifindex = ntohl(a->s_addr) & 0xffffff;
-		if (ifindex < 0 || if_index < ifindex)
-			return NULL;
-		ifp = ifnet_byindex(ifindex);
-		if (ifindexp)
-			*ifindexp = ifindex;
-	} else {
-		INADDR_TO_IFP(*a, ifp);
-	}
-	return ifp;
-}
-
-/*
- * Given an inpcb, return its multicast options structure pointer.  Accepts
- * an unlocked inpcb pointer, but will return it locked.  May sleep.
- */
-static struct ip_moptions *
-ip_findmoptions(struct inpcb *inp)
-{
-	struct ip_moptions *imo;
-
-	INP_LOCK(inp);
-	if (inp->inp_moptions != NULL)
-		return (inp->inp_moptions);
-
-	INP_UNLOCK(inp);
-
-	imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK);
-
-	imo->imo_multicast_ifp = NULL;
-	imo->imo_multicast_addr.s_addr = INADDR_ANY;
-	imo->imo_multicast_vif = -1;
-	imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
-	imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
-	imo->imo_num_memberships = 0;
-
-	INP_LOCK(inp);
-	if (inp->inp_moptions != NULL) {
-		free(imo, M_IPMOPTS);
-		return (inp->inp_moptions);
-	}
-	inp->inp_moptions = imo;
-	return (imo);
-}
-
-/*
- * Set the IP multicast options in response to user setsockopt().
- */
-static int
-ip_setmoptions(struct inpcb *inp, struct sockopt *sopt)
-{
-	int error = 0;
-	int i;
-	struct in_addr addr;
-	struct ip_mreq mreq;
-	struct ifnet *ifp;
-	struct ip_moptions *imo;
-	struct route ro;
-	struct sockaddr_in *dst;
-	int ifindex;
-	int s;
-
-	switch (sopt->sopt_name) {
-	/* store an index number for the vif you wanna use in the send */
-	case IP_MULTICAST_VIF:
-		if (legal_vif_num == 0) {
-			error = EOPNOTSUPP;
-			break;
-		}
-		error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
-		if (error)
-			break;
-		if (!legal_vif_num(i) && (i != -1)) {
-			error = EINVAL;
-			break;
-		}
-		imo = ip_findmoptions(inp);
-		imo->imo_multicast_vif = i;
-		INP_UNLOCK(inp);
-		break;
-
-	case IP_MULTICAST_IF:
-		/*
-		 * Select the interface for outgoing multicast packets.
-		 */
-		error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
-		if (error)
-			break;
-		/*
-		 * INADDR_ANY is used to remove a previous selection.
-		 * When no interface is selected, a default one is
-		 * chosen every time a multicast packet is sent.
-		 */
-		imo = ip_findmoptions(inp);
-		if (addr.s_addr == INADDR_ANY) {
-			imo->imo_multicast_ifp = NULL;
-			INP_UNLOCK(inp);
-			break;
-		}
-		/*
-		 * The selected interface is identified by its local
-		 * IP address.  Find the interface and confirm that
-		 * it supports multicasting.
-		 */
-		s = splimp();
-		ifp = ip_multicast_if(&addr, &ifindex);
-		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
-			INP_UNLOCK(inp);
-			splx(s);
-			error = EADDRNOTAVAIL;
-			break;
-		}
-		imo->imo_multicast_ifp = ifp;
-		if (ifindex)
-			imo->imo_multicast_addr = addr;
-		else
-			imo->imo_multicast_addr.s_addr = INADDR_ANY;
-		INP_UNLOCK(inp);
-		splx(s);
-		break;
-
-	case IP_MULTICAST_TTL:
-		/*
-		 * Set the IP time-to-live for outgoing multicast packets.
-		 * The original multicast API required a char argument,
-		 * which is inconsistent with the rest of the socket API.
-		 * We allow either a char or an int.
-		 */
-		if (sopt->sopt_valsize == 1) {
-			u_char ttl;
-			error = sooptcopyin(sopt, &ttl, 1, 1);
-			if (error)
-				break;
-			imo = ip_findmoptions(inp);
-			imo->imo_multicast_ttl = ttl;
-			INP_UNLOCK(inp);
-		} else {
-			u_int ttl;
-			error = sooptcopyin(sopt, &ttl, sizeof ttl, 
-					    sizeof ttl);
-			if (error)
-				break;
-			if (ttl > 255)
-				error = EINVAL;
-			else {
-				imo = ip_findmoptions(inp);
-				imo->imo_multicast_ttl = ttl;
-				INP_UNLOCK(inp);
-			}
-		}
-		break;
-
-	case IP_MULTICAST_LOOP:
-		/*
-		 * Set the loopback flag for outgoing multicast packets.
-		 * Must be zero or one.  The original multicast API required a
-		 * char argument, which is inconsistent with the rest
-		 * of the socket API.  We allow either a char or an int.
-		 */
-		if (sopt->sopt_valsize == 1) {
-			u_char loop;
-			error = sooptcopyin(sopt, &loop, 1, 1);
-			if (error)
-				break;
-			imo = ip_findmoptions(inp);
-			imo->imo_multicast_loop = !!loop;
-			INP_UNLOCK(inp);
-		} else {
-			u_int loop;
-			error = sooptcopyin(sopt, &loop, sizeof loop,
-					    sizeof loop);
-			if (error)
-				break;
-			imo = ip_findmoptions(inp);
-			imo->imo_multicast_loop = !!loop;
-			INP_UNLOCK(inp);
-		}
-		break;
-
-	case IP_ADD_MEMBERSHIP:
-		/*
-		 * Add a multicast group membership.
-		 * Group must be a valid IP multicast address.
-		 */
-		error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
-		if (error)
-			break;
-
-		if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
-			error = EINVAL;
-			break;
-		}
-		s = splimp();
-		/*
-		 * If no interface address was provided, use the interface of
-		 * the route to the given multicast address.
-		 */
-		if (mreq.imr_interface.s_addr == INADDR_ANY) {
-			bzero((caddr_t)&ro, sizeof(ro));
-			dst = (struct sockaddr_in *)&ro.ro_dst;
-			dst->sin_len = sizeof(*dst);
-			dst->sin_family = AF_INET;
-			dst->sin_addr = mreq.imr_multiaddr;
-			rtalloc_ign(&ro, RTF_CLONING);
-			if (ro.ro_rt == NULL) {
-				error = EADDRNOTAVAIL;
-				splx(s);
-				break;
-			}
-			ifp = ro.ro_rt->rt_ifp;
-			RTFREE(ro.ro_rt);
-		}
-		else {
-			ifp = ip_multicast_if(&mreq.imr_interface, NULL);
-		}
-
-		/*
-		 * See if we found an interface, and confirm that it
-		 * supports multicast.
-		 */
-		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
-			error = EADDRNOTAVAIL;
-			splx(s);
-			break;
-		}
-		/*
-		 * See if the membership already exists or if all the
-		 * membership slots are full.
-		 */
-		imo = ip_findmoptions(inp);
-		for (i = 0; i < imo->imo_num_memberships; ++i) {
-			if (imo->imo_membership[i]->inm_ifp == ifp &&
-			    imo->imo_membership[i]->inm_addr.s_addr
-						== mreq.imr_multiaddr.s_addr)
-				break;
-		}
-		if (i < imo->imo_num_memberships) {
-			INP_UNLOCK(inp);
-			error = EADDRINUSE;
-			splx(s);
-			break;
-		}
-		if (i == IP_MAX_MEMBERSHIPS) {
-			INP_UNLOCK(inp);
-			error = ETOOMANYREFS;
-			splx(s);
-			break;
-		}
-		/*
-		 * Everything looks good; add a new record to the multicast
-		 * address list for the given interface.
-		 */
-		if ((imo->imo_membership[i] =
-		    in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) {
-			INP_UNLOCK(inp);
-			error = ENOBUFS;
-			splx(s);
-			break;
-		}
-		++imo->imo_num_memberships;
-		INP_UNLOCK(inp);
-		splx(s);
-		break;
-
-	case IP_DROP_MEMBERSHIP:
-		/*
-		 * Drop a multicast group membership.
-		 * Group must be a valid IP multicast address.
-		 */
-		error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
-		if (error)
-			break;
-
-		if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
-			error = EINVAL;
-			break;
-		}
-
-		s = splimp();
-		/*
-		 * If an interface address was specified, get a pointer
-		 * to its ifnet structure.
-		 */
-		if (mreq.imr_interface.s_addr == INADDR_ANY)
-			ifp = NULL;
-		else {
-			ifp = ip_multicast_if(&mreq.imr_interface, NULL);
-			if (ifp == NULL) {
-				error = EADDRNOTAVAIL;
-				splx(s);
-				break;
-			}
-		}
-		/*
-		 * Find the membership in the membership array.
-		 */
-		imo = ip_findmoptions(inp);
-		for (i = 0; i < imo->imo_num_memberships; ++i) {
-			if ((ifp == NULL ||
-			     imo->imo_membership[i]->inm_ifp == ifp) &&
-			     imo->imo_membership[i]->inm_addr.s_addr ==
-			     mreq.imr_multiaddr.s_addr)
-				break;
-		}
-		if (i == imo->imo_num_memberships) {
-			INP_UNLOCK(inp);
-			error = EADDRNOTAVAIL;
-			splx(s);
-			break;
-		}
-		/*
-		 * Give up the multicast address record to which the
-		 * membership points.
-		 */
-		in_delmulti(imo->imo_membership[i]);
-		/*
-		 * Remove the gap in the membership array.
-		 */
-		for (++i; i < imo->imo_num_memberships; ++i)
-			imo->imo_membership[i-1] = imo->imo_membership[i];
-		--imo->imo_num_memberships;
-		INP_UNLOCK(inp);
-		splx(s);
-		break;
-
-	default:
-		error = EOPNOTSUPP;
-		break;
-	}
-
-	return (error);
-}
-
-/*
- * Return the IP multicast options in response to user getsockopt().
- */
-static int
-ip_getmoptions(struct inpcb *inp, struct sockopt *sopt)
-{
-	struct ip_moptions *imo;
-	struct in_addr addr;
-	struct in_ifaddr *ia;
-	int error, optval;
-	u_char coptval;
-
-	INP_LOCK(inp);
-	imo = inp->inp_moptions;
-
-	error = 0;
-	switch (sopt->sopt_name) {
-	case IP_MULTICAST_VIF: 
-		if (imo != NULL)
-			optval = imo->imo_multicast_vif;
-		else
-			optval = -1;
-		INP_UNLOCK(inp);
-		error = sooptcopyout(sopt, &optval, sizeof optval);
-		break;
-
-	case IP_MULTICAST_IF:
-		if (imo == NULL || imo->imo_multicast_ifp == NULL)
-			addr.s_addr = INADDR_ANY;
-		else if (imo->imo_multicast_addr.s_addr) {
-			/* return the value user has set */
-			addr = imo->imo_multicast_addr;
-		} else {
-			IFP_TO_IA(imo->imo_multicast_ifp, ia);
-			addr.s_addr = (ia == NULL) ? INADDR_ANY
-				: IA_SIN(ia)->sin_addr.s_addr;
-		}
-		INP_UNLOCK(inp);
-		error = sooptcopyout(sopt, &addr, sizeof addr);
-		break;
-
-	case IP_MULTICAST_TTL:
-		if (imo == 0)
-			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
-		else
-			optval = coptval = imo->imo_multicast_ttl;
-		INP_UNLOCK(inp);
-		if (sopt->sopt_valsize == 1)
-			error = sooptcopyout(sopt, &coptval, 1);
-		else
-			error = sooptcopyout(sopt, &optval, sizeof optval);
-		break;
-
-	case IP_MULTICAST_LOOP:
-		if (imo == 0)
-			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
-		else
-			optval = coptval = imo->imo_multicast_loop;
-		INP_UNLOCK(inp);
-		if (sopt->sopt_valsize == 1)
-			error = sooptcopyout(sopt, &coptval, 1);
-		else
-			error = sooptcopyout(sopt, &optval, sizeof optval);
-		break;
-
-	default:
-		INP_UNLOCK(inp);
-		error = ENOPROTOOPT;
-		break;
-	}
-	INP_UNLOCK_ASSERT(inp);
-
-	return (error);
-}
-
-/*
- * Discard the IP multicast options.
- */
-void
-ip_freemoptions(imo)
-	register struct ip_moptions *imo;
-{
-	register int i;
-
-	if (imo != NULL) {
-		for (i = 0; i < imo->imo_num_memberships; ++i)
-			in_delmulti(imo->imo_membership[i]);
-		free(imo, M_IPMOPTS);
-	}
-}
-
 /*
  * Routine called from ip_output() to loop back a copy of an IP multicast
  * packet to the input queue of a specified interface.  Note that this
@@ -2069,11 +1143,8 @@
  * replicating that code here.
  */
 static void
-ip_mloopback(ifp, m, dst, hlen)
-	struct ifnet *ifp;
-	register struct mbuf *m;
-	register struct sockaddr_in *dst;
-	int hlen;
+ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst,
+    int hlen)
 {
 	register struct ip *ip;
 	struct mbuf *copym;
Index: tcp_var.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_var.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -L sys/netinet/tcp_var.h -L sys/netinet/tcp_var.h -u -r1.3 -r1.4
--- sys/netinet/tcp_var.h
+++ sys/netinet/tcp_var.h
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp_var.h	8.4 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_var.h,v 1.126.2.1 2006/03/01 21:13:29 andre Exp $
+ * $FreeBSD: src/sys/netinet/tcp_var.h,v 1.157.2.1 2007/12/07 05:46:09 kmacy Exp $
  */
 
 #ifndef _NETINET_TCP_VAR_H_
@@ -75,6 +75,18 @@
 
 #define tcp6cb		tcpcb  /* for KAME src sync over BSD*'s */
 
+/* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */
+#ifdef INET6
+#define ND6_HINT(tp)						\
+do {								\
+	if ((tp) && (tp)->t_inpcb &&				\
+	    ((tp)->t_inpcb->inp_vflag & INP_IPV6) != 0)		\
+		nd6_nud_hint(NULL, NULL, 0);			\
+} while (0)
+#else
+#define ND6_HINT(tp)
+#endif
+
 /*
  * Tcp control block, one per tcp; fields:
  * Organized for 16 byte cacheline efficiency.
@@ -84,11 +96,7 @@
 	int	t_segqlen;		/* segment reassembly queue length */
 	int	t_dupacks;		/* consecutive dup acks recd */
 
-	struct	callout *tt_rexmt;	/* retransmit timer */
-	struct	callout *tt_persist;	/* retransmit persistence */
-	struct	callout *tt_keep;	/* keepalive */
-	struct	callout *tt_2msl;	/* 2*msl TIME_WAIT timer */
-	struct	callout *tt_delack;	/* delayed ACK timer */
+	struct tcp_timer *t_timers;	/* All the TCP timers in one struct */
 
 	struct	inpcb *t_inpcb;		/* back pointer to internet pcb */
 	int	t_state;		/* state of this connection */
@@ -114,6 +122,7 @@
 #define	TF_WASFRECOVERY	0x200000	/* was in NewReno Fast Recovery */
 #define	TF_SIGNATURE	0x400000	/* require MD5 digests (RFC2385) */
 #define	TF_FORCEDATA	0x800000	/* force out a byte */
+#define	TF_TSO		0x1000000	/* TSO enabled on this connection */
 
 	tcp_seq	snd_una;		/* send unacknowledged */
 	tcp_seq	snd_max;		/* highest sequence number sent;
@@ -173,10 +182,10 @@
 	u_char	snd_scale;		/* window scaling for send window */
 	u_char	rcv_scale;		/* window scaling for recv window */
 	u_char	request_r_scale;	/* pending window scaling */
-	u_char	requested_s_scale;
-	u_long	ts_recent;		/* timestamp echo data */
-
+	u_int32_t  ts_recent;		/* timestamp echo data */
 	u_long	ts_recent_age;		/* when last updated */
+	u_int32_t  ts_offset;		/* our timestamp offset */
+
 	tcp_seq	last_ack_sent;
 /* experimental */
 	u_long	snd_cwnd_prev;		/* cwnd prior to retransmit */
@@ -184,12 +193,7 @@
 	tcp_seq	snd_recover_prev;	/* snd_recover prior to retransmit */
 	u_long	t_badrxtwin;		/* window for retransmit recovery */
 	u_char	snd_limited;		/* segments limited transmitted */
-/* anti DoS counters */
-	u_long	rcv_second;		/* start of interval second */
-	u_long	rcv_pps;		/* received packets per second */
-	u_long	rcv_byps;		/* received bytes per second */
 /* SACK related state */
-	int	sack_enable;		/* enable SACK for this connection */
 	int	snd_numholes;		/* number of holes seen by sender */
 	TAILQ_HEAD(sackhole_head, sackhole) snd_holes;
 					/* SACK scoreboard (sorted) */
@@ -202,6 +206,7 @@
 	int	t_rttlow;		/* smallest observerved RTT */
 	u_int32_t	rfbuf_ts;	/* recv buffer autoscaling timestamp */
 	int	rfbuf_cnt;		/* recv buffer autoscaling byte count */
+	void	*t_pspare[5];		/* toe usrreqs / toepcb * / congestion algo / vimage / 1 general use */ 
 };
 
 #define IN_FASTRECOVERY(tp)	(tp->t_flags & TF_FASTRECOVERY)
@@ -227,58 +232,32 @@
  * Structure to hold TCP options that are only used during segment
  * processing (in tcp_input), but not held in the tcpcb.
  * It's basically used to reduce the number of parameters
- * to tcp_dooptions.
+ * to tcp_dooptions and tcp_addoptions.
+ * The binary order of the to_flags is relevant for packing of the
+ * options in tcp_addoptions.
  */
 struct tcpopt {
 	u_long		to_flags;	/* which options are present */
-#define TOF_TS		0x0001		/* timestamp */
-#define	TOF_MSS		0x0010
-#define	TOF_SCALE	0x0020
-#define	TOF_SIGNATURE	0x0040		/* signature option present */
-#define	TOF_SIGLEN	0x0080		/* signature length valid (RFC2385) */
-#define	TOF_SACK	0x0100		/* Peer sent SACK option */
-	u_int32_t	to_tsval;
-	u_int32_t	to_tsecr;
-	u_int16_t	to_mss;
-	u_int8_t	to_requested_s_scale;
+#define	TOF_MSS		0x0001		/* maximum segment size */
+#define	TOF_SCALE	0x0002		/* window scaling */
+#define	TOF_SACKPERM	0x0004		/* SACK permitted */
+#define	TOF_TS		0x0010		/* timestamp */
+#define	TOF_SIGNATURE	0x0040		/* TCP-MD5 signature option (RFC2385) */
+#define	TOF_SACK	0x0080		/* Peer sent SACK option */
+#define	TOF_MAXOPT	0x0100
+	u_int32_t	to_tsval;	/* new timestamp */
+	u_int32_t	to_tsecr;	/* reflected timestamp */
+	u_int16_t	to_mss;		/* maximum segment size */
+	u_int8_t	to_wscale;	/* window scaling */
 	u_int8_t	to_nsacks;	/* number of SACK blocks */
 	u_char		*to_sacks;	/* pointer to the first SACK blocks */
+	u_char		*to_signature;	/* pointer to the TCP-MD5 signature */
 };
 
-#ifdef _NETINET_IN_PCB_H_
-struct syncache {
-	inp_gen_t	sc_inp_gencnt;		/* pointer check */
-	struct		tcpcb *sc_tp;		/* tcb for listening socket */
-	struct		mbuf *sc_ipopts;	/* source route */
-	struct		in_conninfo sc_inc;	/* addresses */
-	u_int32_t	sc_tsrecent;
-	u_int32_t	sc_flowlabel;		/* IPv6 flowlabel */
-	tcp_seq		sc_irs;			/* seq from peer */
-	tcp_seq		sc_iss;			/* our ISS */
-	u_long		sc_rxttime;		/* retransmit time */
-	u_int16_t	sc_rxtslot;		/* retransmit counter */
-	u_int16_t	sc_peer_mss;		/* peer's MSS */
-	u_int16_t	sc_wnd;			/* advertised window */
-	u_int8_t	sc_requested_s_scale:4,
-			sc_request_r_scale:4;
-	u_int8_t	sc_flags;
-#define SCF_NOOPT	0x01			/* no TCP options */
-#define SCF_WINSCALE	0x02			/* negotiated window scaling */
-#define SCF_TIMESTAMP	0x04			/* negotiated timestamps */
-#define SCF_UNREACH	0x10			/* icmp unreachable received */
-#define SCF_SIGNATURE	0x20			/* send MD5 digests */
-#define SCF_SACK	0x80			/* send SACK option */
-	TAILQ_ENTRY(syncache)	sc_hash;
-	TAILQ_ENTRY(syncache)	sc_timerq;
-};
-
-struct syncache_head {
-	TAILQ_HEAD(, syncache)	sch_bucket;
-	u_int		sch_length;
-};
-#else
-struct in_conninfo;
-#endif /* _NETINET_IN_PCB_H_ */
+/*
+ * Flags for tcp_dooptions.
+ */
+#define	TO_SYN		0x01		/* parse SYN-only options */
 
 struct hc_metrics_lite {	/* must stay in sync with hc_metrics */
 	u_long	rmx_mtu;	/* MTU for this path */
@@ -291,6 +270,10 @@
 	u_long	rmx_recvpipe;   /* inbound delay-bandwidth product */
 };
 
+#ifndef _NETINET_IN_PCB_H_
+struct in_conninfo;
+#endif /* _NETINET_IN_PCB_H_ */
+
 struct tcptw {
 	struct inpcb	*tw_inpcb;	/* XXX back pointer to internet pcb */
 	tcp_seq		snd_nxt;
@@ -301,9 +284,10 @@
 	u_short		tw_so_options;	/* copy of so_options */
 	struct ucred	*tw_cred;	/* user credentials */
 	u_long		t_recent;
+	u_int32_t	ts_offset;	/* our timestamp offset */
 	u_long		t_starttime;
 	int		tw_time;
-	LIST_ENTRY(tcptw) tw_2msl;
+	TAILQ_ENTRY(tcptw) tw_2msl;
 };
 
 #define	intotcpcb(ip)	((struct tcpcb *)(ip)->inp_ppcb)
@@ -437,6 +421,8 @@
 	u_long	tcps_hc_added;		/* entry added to hostcache */
 	u_long	tcps_hc_bucketoverflow;	/* hostcache per bucket limit hit */
 
+	u_long  tcps_finwait2_drops;    /* Drop FIN_WAIT_2 connection after time limit */
+
 	/* SACK related stats */
 	u_long	tcps_sack_recovery_episode; /* SACK recovery episodes */
 	u_long  tcps_sack_rexmits;	    /* SACK rexmit segments   */
@@ -479,6 +465,7 @@
 #define	TCPCTL_SACK		14	/* Selective Acknowledgement,rfc 2018 */
 #define	TCPCTL_DROP		15	/* drop tcp connection */
 #define	TCPCTL_MAXID		16
+#define TCPCTL_FINWAIT2_TIMEOUT        17
 
 #define TCPCTL_NAMES { \
 	{ 0, 0 }, \
@@ -502,28 +489,33 @@
 #ifdef SYSCTL_DECL
 SYSCTL_DECL(_net_inet_tcp);
 SYSCTL_DECL(_net_inet_tcp_sack);
+MALLOC_DECLARE(M_TCPLOG);
 #endif
 
 extern	struct inpcbhead tcb;		/* head of queue of active tcpcb's */
 extern	struct inpcbinfo tcbinfo;
 extern	struct tcpstat tcpstat;	/* tcp statistics */
+extern	int tcp_log_in_vain;
 extern	int tcp_mssdflt;	/* XXX */
 extern	int tcp_minmss;
-extern	int tcp_minmssoverload;
 extern	int tcp_delack_enabled;
 extern	int tcp_do_newreno;
 extern	int path_mtu_discovery;
 extern	int ss_fltsz;
 extern	int ss_fltsz_local;
 
-extern	int tcp_do_sack;	/* SACK enabled/disabled */
+extern	int tcp_do_sack;		/* SACK enabled/disabled */
+extern	int tcp_sc_rst_sock_fail;	/* RST on sock alloc failure */
 
+int	 tcp_addoptions(struct tcpopt *, u_char *);
 struct tcpcb *
 	 tcp_close(struct tcpcb *);
+void	 tcp_discardcb(struct tcpcb *);
 void	 tcp_twstart(struct tcpcb *);
+#if 0
 int	 tcp_twrecycleable(struct tcptw *tw);
-struct tcptw *
-	 tcp_twclose(struct tcptw *_tw, int _reuse);
+#endif
+void	 tcp_twclose(struct tcptw *_tw, int _reuse);
 void	 tcp_ctlinput(int, struct sockaddr *, void *);
 int	 tcp_ctloutput(struct socket *, struct sockopt *);
 struct tcpcb *
@@ -532,10 +524,13 @@
 void	 tcp_fasttimo(void);
 void	 tcp_init(void);
 void	 tcp_fini(void *);
+char 	*tcp_log_addrs(struct in_conninfo *, struct tcphdr *, void *,
+	    const void *);
+int	 tcp_reass(struct tcpcb *, struct tcphdr *, int *, struct mbuf *);
 void	 tcp_reass_init(void);
 void	 tcp_input(struct mbuf *, int);
-u_long	 tcp_maxmtu(struct in_conninfo *);
-u_long	 tcp_maxmtu6(struct in_conninfo *);
+u_long	 tcp_maxmtu(struct in_conninfo *, int *);
+u_long	 tcp_maxmtu6(struct in_conninfo *, int *);
 void	 tcp_mss(struct tcpcb *, int);
 int	 tcp_mssopt(struct in_conninfo *);
 struct inpcb *
@@ -547,6 +542,10 @@
 int	 tcp_output(struct tcpcb *);
 void	 tcp_respond(struct tcpcb *, void *,
 	    struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int);
+void	 tcp_tw_init(void);
+void	 tcp_tw_zone_change(void);
+int	 tcp_twcheck(struct inpcb *, struct tcpopt *, struct tcphdr *,
+	    struct mbuf *, int);
 int	 tcp_twrespond(struct tcptw *, int);
 void	 tcp_setpersist(struct tcpcb *);
 #ifdef TCP_SIGNATURE
@@ -556,18 +555,10 @@
 struct tcptemp *
 	 tcpip_maketemplate(struct inpcb *);
 void	 tcpip_fillheaders(struct inpcb *, void *, void *);
-struct tcpcb *
-	 tcp_timers(struct tcpcb *, int);
-void	 tcp_trace(int, int, struct tcpcb *, void *, struct tcphdr *, int);
+void	 tcp_timer_activate(struct tcpcb *, int, u_int);
+int	 tcp_timer_active(struct tcpcb *, int);
+void	 tcp_trace(short, short, struct tcpcb *, void *, struct tcphdr *, int);
 void	 tcp_xmit_bandwidth_limit(struct tcpcb *tp, tcp_seq ack_seq);
-void	 syncache_init(void);
-void	 syncache_unreach(struct in_conninfo *, struct tcphdr *);
-int	 syncache_expand(struct in_conninfo *, struct tcphdr *,
-	     struct socket **, struct mbuf *);
-int	 syncache_add(struct in_conninfo *, struct tcpopt *,
-	     struct tcphdr *, struct socket **, struct mbuf *);
-void	 syncache_chkrst(struct in_conninfo *, struct tcphdr *);
-void	 syncache_badack(struct in_conninfo *);
 /*
  * All tcp_hc_* functions are IPv4 and IPv6 (via in_conninfo)
  */
--- /dev/null
+++ sys/netinet/ip_ipsec.c
@@ -0,0 +1,387 @@
+/*-
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *      The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_ipsec.c,v 1.8 2007/10/07 20:44:23 silby Exp $");
+
+#include "opt_ipsec.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_options.h>
+#include <netinet/ip_ipsec.h>
+
+#include <machine/in_cksum.h>
+
+#ifdef IPSEC
+#include <netipsec/ipsec.h>
+#include <netipsec/xform.h>
+#include <netipsec/key.h>
+#endif /*IPSEC*/
+
+extern	struct protosw inetsw[];
+
+/*
+ * Check if we have to jump over firewall processing for this packet.
+ * Called from ip_input().
+ * 1 = jump over firewall, 0 = packet goes through firewall.
+ */
+int
+ip_ipsec_filtertunnel(struct mbuf *m)
+{
+#if defined(IPSEC) && !defined(IPSEC_FILTERTUNNEL)
+	/*
+	 * Bypass packet filtering for packets from a tunnel.
+	 */
+	if (m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL)
+		return 1;
+#endif
+	return 0;
+}
+
+/*
+ * Check if this packet has an active SA and needs to be dropped instead
+ * of forwarded.
+ * Called from ip_input().
+ * 1 = drop packet, 0 = forward packet.
+ */
+int
+ip_ipsec_fwd(struct mbuf *m)
+{
+#ifdef IPSEC
+	struct m_tag *mtag;
+	struct tdb_ident *tdbi;
+	struct secpolicy *sp;
+	int s, error;
+
+	mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
+	s = splnet();
+	if (mtag != NULL) {
+		tdbi = (struct tdb_ident *)(mtag + 1);
+		sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
+	} else {
+		sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
+					   IP_FORWARDING, &error);   
+	}
+	if (sp == NULL) {	/* NB: can happen if error */
+		splx(s);
+		/*XXX error stat???*/
+		DPRINTF(("ip_input: no SP for forwarding\n"));	/*XXX*/
+		return 1;
+	}
+
+	/*
+	 * Check security policy against packet attributes.
+	 */
+	error = ipsec_in_reject(sp, m);
+	KEY_FREESP(&sp);
+	splx(s);
+	if (error) {
+		ipstat.ips_cantforward++;
+		return 1;
+	}
+#endif /* IPSEC */
+	return 0;
+}
+
+/*
+ * Check if protocol type doesn't have a further header and do IPSEC
+ * decryption or reject right now.  Protocols with further headers get
+ * their IPSEC treatment within the protocol specific processing.
+ * Called from ip_input().
+ * 1 = drop packet, 0 = continue processing packet.
+ */
+int
+ip_ipsec_input(struct mbuf *m)
+{
+	struct ip *ip = mtod(m, struct ip *);
+#ifdef IPSEC
+	struct m_tag *mtag;
+	struct tdb_ident *tdbi;
+	struct secpolicy *sp;
+	int s, error;
+	/*
+	 * enforce IPsec policy checking if we are seeing last header.
+	 * note that we do not visit this with protocols with pcb layer
+	 * code - like udp/tcp/raw ip.
+	 */
+	if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) {
+		/*
+		 * Check if the packet has already had IPsec processing
+		 * done.  If so, then just pass it along.  This tag gets
+		 * set during AH, ESP, etc. input handling, before the
+		 * packet is returned to the ip input queue for delivery.
+		 */ 
+		mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
+		s = splnet();
+		if (mtag != NULL) {
+			tdbi = (struct tdb_ident *)(mtag + 1);
+			sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
+		} else {
+			sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
+						   IP_FORWARDING, &error);   
+		}
+		if (sp != NULL) {
+			/*
+			 * Check security policy against packet attributes.
+			 */
+			error = ipsec_in_reject(sp, m);
+			KEY_FREESP(&sp);
+		} else {
+			/* XXX error stat??? */
+			error = EINVAL;
+			DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/
+			return 1;
+		}
+		splx(s);
+		if (error)
+			return 1;
+	}
+#endif /* IPSEC */
+	return 0;
+}
+
+/*
+ * Compute the MTU for a forwarded packet that gets IPSEC encapsulated.
+ * Called from ip_forward().
+ * Returns MTU suggestion for ICMP needfrag reply.
+ */
+int
+ip_ipsec_mtu(struct mbuf *m)
+{
+	int mtu = 0;
+	/*
+	 * If the packet is routed over IPsec tunnel, tell the
+	 * originator the tunnel MTU.
+	 *	tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
+	 * XXX quickhack!!!
+	 */
+	struct secpolicy *sp = NULL;
+	int ipsecerror;
+	int ipsechdr;
+	struct route *ro;
+	sp = ipsec_getpolicybyaddr(m,
+				   IPSEC_DIR_OUTBOUND,
+				   IP_FORWARDING,
+				   &ipsecerror);
+	if (sp != NULL) {
+		/* count IPsec header size */
+		ipsechdr = ipsec4_hdrsiz(m,
+					 IPSEC_DIR_OUTBOUND,
+					 NULL);
+
+		/*
+		 * find the correct route for outer IPv4
+		 * header, compute tunnel MTU.
+		 */
+		if (sp->req != NULL &&
+		    sp->req->sav != NULL &&
+		    sp->req->sav->sah != NULL) {
+			ro = &sp->req->sav->sah->sa_route;
+			if (ro->ro_rt && ro->ro_rt->rt_ifp) {
+				mtu =
+				    ro->ro_rt->rt_rmx.rmx_mtu ?
+				    ro->ro_rt->rt_rmx.rmx_mtu :
+				    ro->ro_rt->rt_ifp->if_mtu;
+				mtu -= ipsechdr;
+			}
+		}
+		KEY_FREESP(&sp);
+	}
+	return mtu;
+}
+
+/*
+ * 
+ * Called from ip_output().
+ * 1 = drop packet, 0 = continue processing packet,
+ * -1 = packet was reinjected and stop processing packet
+ */
+int
+ip_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error,
+    struct route **ro, struct route *iproute, struct sockaddr_in **dst,
+    struct in_ifaddr **ia, struct ifnet **ifp)
+{
+#ifdef IPSEC
+	struct secpolicy *sp = NULL;
+	struct ip *ip = mtod(*m, struct ip *);
+	struct tdb_ident *tdbi;
+	struct m_tag *mtag;
+	int s;
+	/*
+	 * Check the security policy (SP) for the packet and, if
+	 * required, do IPsec-related processing.  There are two
+	 * cases here; the first time a packet is sent through
+	 * it will be untagged and handled by ipsec4_checkpolicy.
+	 * If the packet is resubmitted to ip_output (e.g. after
+	 * AH, ESP, etc. processing), there will be a tag to bypass
+	 * the lookup and related policy checking.
+	 */
+	mtag = m_tag_find(*m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
+	s = splnet();
+	if (mtag != NULL) {
+		tdbi = (struct tdb_ident *)(mtag + 1);
+		sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND);
+		if (sp == NULL)
+			*error = -EINVAL;	/* force silent drop */
+		m_tag_delete(*m, mtag);
+	} else {
+		sp = ipsec4_checkpolicy(*m, IPSEC_DIR_OUTBOUND, *flags,
+					error, inp);
+	}
+	/*
+	 * There are four return cases:
+	 *    sp != NULL	 	    apply IPsec policy
+	 *    sp == NULL, error == 0	    no IPsec handling needed
+	 *    sp == NULL, error == -EINVAL  discard packet w/o error
+	 *    sp == NULL, error != 0	    discard packet, report error
+	 */
+	if (sp != NULL) {
+		/* Loop detection, check if ipsec processing already done */
+		KASSERT(sp->req != NULL, ("ip_output: no ipsec request"));
+		for (mtag = m_tag_first(*m); mtag != NULL;
+		     mtag = m_tag_next(*m, mtag)) {
+			if (mtag->m_tag_cookie != MTAG_ABI_COMPAT)
+				continue;
+			if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE &&
+			    mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED)
+				continue;
+			/*
+			 * Check if policy has an SA associated with it.
+			 * This can happen when an SP has yet to acquire
+			 * an SA; e.g. on first reference.  If it occurs,
+			 * then we let ipsec4_process_packet do its thing.
+			 */
+			if (sp->req->sav == NULL)
+				break;
+			tdbi = (struct tdb_ident *)(mtag + 1);
+			if (tdbi->spi == sp->req->sav->spi &&
+			    tdbi->proto == sp->req->sav->sah->saidx.proto &&
+			    bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst,
+				 sizeof (union sockaddr_union)) == 0) {
+				/*
+				 * No IPsec processing is needed, free
+				 * reference to SP.
+				 *
+				 * NB: null pointer to avoid free at
+				 *     done: below.
+				 */
+				KEY_FREESP(&sp), sp = NULL;
+				splx(s);
+				goto done;
+			}
+		}
+
+		/*
+		 * Do delayed checksums now because we send before
+		 * this is done in the normal processing path.
+		 */
+		if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+			in_delayed_cksum(*m);
+			(*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+		}
+
+		ip->ip_len = htons(ip->ip_len);
+		ip->ip_off = htons(ip->ip_off);
+
+		/* NB: callee frees mbuf */
+		*error = ipsec4_process_packet(*m, sp->req, *flags, 0);
+		/*
+		 * Preserve KAME behaviour: ENOENT can be returned
+		 * when an SA acquire is in progress.  Don't propagate
+		 * this to user-level; it confuses applications.
+		 *
+		 * XXX this will go away when the SADB is redone.
+		 */
+		if (*error == ENOENT)
+			*error = 0;
+		splx(s);
+		goto reinjected;
+	} else {	/* sp == NULL */
+		splx(s);
+
+		if (*error != 0) {
+			/*
+			 * Hack: -EINVAL is used to signal that a packet
+			 * should be silently discarded.  This is typically
+			 * because we asked key management for an SA and
+			 * it was delayed (e.g. kicked up to IKE).
+			 */
+			if (*error == -EINVAL)
+				*error = 0;
+			goto bad;
+		} else {
+			/* No IPsec processing for this packet. */
+		}
+#ifdef notyet
+		/*
+		 * If deferred crypto processing is needed, check that
+		 * the interface supports it.
+		 */ 
+		mtag = m_tag_find(*m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL);
+		if (mtag != NULL && ((*ifp)->if_capenable & IFCAP_IPSEC) == 0) {
+			/* notify IPsec to do its own crypto */
+			ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
+			*error = EHOSTUNREACH;
+			goto bad;
+		}
+#endif
+	}
+done:
+	if (sp != NULL)
+		KEY_FREESP(&sp);
+	return 0;
+reinjected:
+	if (sp != NULL)
+		KEY_FREESP(&sp);
+	return -1;
+bad:
+	if (sp != NULL)
+		KEY_FREESP(&sp);
+	return 1;
+#endif /* IPSEC */
+	return 0;
+}
--- /dev/null
+++ sys/netinet/sctp_pcb.h
@@ -0,0 +1,589 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_pcb.h,v 1.21 2005/07/16 01:18:47 suz Exp $	 */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_pcb.h,v 1.31.2.1 2007/11/06 02:48:03 rrs Exp $");
+
+#ifndef __sctp_pcb_h__
+#define __sctp_pcb_h__
+
+#include <netinet/sctp_os.h>
+#include <netinet/sctp.h>
+#include <netinet/sctp_constants.h>
+
+LIST_HEAD(sctppcbhead, sctp_inpcb);
+LIST_HEAD(sctpasochead, sctp_tcb);
+LIST_HEAD(sctpladdr, sctp_laddr);
+LIST_HEAD(sctpvtaghead, sctp_tagblock);
+LIST_HEAD(sctp_vrflist, sctp_vrf);
+LIST_HEAD(sctp_ifnlist, sctp_ifn);
+LIST_HEAD(sctp_ifalist, sctp_ifa);
+TAILQ_HEAD(sctp_readhead, sctp_queued_to_read);
+TAILQ_HEAD(sctp_streamhead, sctp_stream_queue_pending);
+
+#include <netinet/sctp_structs.h>
+#include <netinet/sctp_auth.h>
+
+#define SCTP_PCBHASH_ALLADDR(port, mask) (port & mask)
+#define SCTP_PCBHASH_ASOC(tag, mask) (tag & mask)
+
+struct sctp_vrf {
+	LIST_ENTRY(sctp_vrf) next_vrf;
+	struct sctp_ifalist *vrf_addr_hash;
+	struct sctp_ifnlist ifnlist;
+	uint32_t vrf_id;
+	uint32_t tbl_id_v4;	/* default v4 table id */
+	uint32_t tbl_id_v6;	/* default v6 table id */
+	uint32_t total_ifa_count;
+	u_long vrf_addr_hashmark;
+	uint32_t refcount;
+};
+
+struct sctp_ifn {
+	struct sctp_ifalist ifalist;
+	struct sctp_vrf *vrf;
+	         LIST_ENTRY(sctp_ifn) next_ifn;
+	         LIST_ENTRY(sctp_ifn) next_bucket;
+	void *ifn_p;		/* never access without appropriate lock */
+	uint32_t ifn_mtu;
+	uint32_t ifn_type;
+	uint32_t ifn_index;	/* shorthand way to look at ifn for reference */
+	uint32_t refcount;	/* number of reference held should be >=
+				 * ifa_count */
+	uint32_t ifa_count;	/* IFA's we hold (in our list - ifalist) */
+	uint32_t num_v6;	/* number of v6 addresses */
+	uint32_t num_v4;	/* number of v4 addresses */
+	uint32_t registered_af;	/* registered address family for i/f events */
+	char ifn_name[SCTP_IFNAMSIZ];
+};
+
+/* SCTP local IFA flags */
+#define SCTP_ADDR_VALID         0x00000001	/* its up and active */
+#define SCTP_BEING_DELETED      0x00000002	/* being deleted, when
+						 * refcount = 0. Note that it
+						 * is pulled from the ifn list
+						 * and ifa_p is nulled right
+						 * away but it cannot be freed
+						 * until the last *net
+						 * pointing to it is deleted. */
+#define SCTP_ADDR_DEFER_USE     0x00000004	/* Hold off using this one */
+#define SCTP_ADDR_IFA_UNUSEABLE 0x00000008
+
+struct sctp_ifa {
+	LIST_ENTRY(sctp_ifa) next_ifa;
+	LIST_ENTRY(sctp_ifa) next_bucket;
+	struct sctp_ifn *ifn_p;	/* back pointer to parent ifn */
+	void *ifa;		/* pointer to ifa, needed for flag update for
+				 * that we MUST lock appropriate locks. This
+				 * is for V6. */
+	union sctp_sockstore address;
+	uint32_t refcount;	/* number of folks refering to this */
+	uint32_t flags;
+	uint32_t localifa_flags;
+	uint32_t vrf_id;	/* vrf_id of this addr (for deleting) */
+	uint8_t src_is_loop;
+	uint8_t src_is_priv;
+	uint8_t src_is_glob;
+	uint8_t resv;
+};
+
+struct sctp_laddr {
+	LIST_ENTRY(sctp_laddr) sctp_nxt_addr;	/* next in list */
+	struct sctp_ifa *ifa;
+	uint32_t action;	/* Used during asconf and adding if no-zero
+				 * src-addr selection will not consider this
+				 * address. */
+	struct timeval start_time;	/* time when this address was created */
+};
+
+struct sctp_block_entry {
+	int error;
+};
+
+struct sctp_timewait {
+	uint32_t tv_sec_at_expire;	/* the seconds from boot to expire */
+	uint32_t v_tag;		/* the vtag that can not be reused */
+};
+
+struct sctp_tagblock {
+	LIST_ENTRY(sctp_tagblock) sctp_nxt_tagblock;
+	struct sctp_timewait vtag_block[SCTP_NUMBER_IN_VTAG_BLOCK];
+};
+
+struct sctp_epinfo {
+	struct sctpasochead *sctp_asochash;
+	u_long hashasocmark;
+
+	struct sctppcbhead *sctp_ephash;
+	u_long hashmark;
+
+	struct sctpasochead *sctp_restarthash;
+	u_long hashrestartmark;
+	/*-
+	 * The TCP model represents a substantial overhead in that we get an
+	 * additional hash table to keep explicit connections in. The
+	 * listening TCP endpoint will exist in the usual ephash above and
+	 * accept only INIT's. It will be incapable of sending off an INIT.
+	 * When a dg arrives we must look in the normal ephash. If we find a
+	 * TCP endpoint that will tell us to go to the specific endpoint
+	 * hash and re-hash to find the right assoc/socket. If we find a UDP
+	 * model socket we then must complete the lookup. If this fails,
+	 * i.e. no association can be found then we must continue to see if
+	 * a sctp_peeloff()'d socket is in the tcpephash (a spun off socket
+	 * acts like a TCP model connected socket).
+	 */
+	struct sctppcbhead *sctp_tcpephash;
+	u_long hashtcpmark;
+	uint32_t hashtblsize;
+
+	struct sctp_vrflist *sctp_vrfhash;
+	u_long hashvrfmark;
+
+	struct sctp_ifnlist *vrf_ifn_hash;
+	u_long vrf_ifn_hashmark;
+
+	struct sctppcbhead listhead;
+	struct sctpladdr addr_wq;
+
+	struct sctpiterators iteratorhead;
+
+	/* ep zone info */
+	sctp_zone_t ipi_zone_ep;
+	sctp_zone_t ipi_zone_asoc;
+	sctp_zone_t ipi_zone_laddr;
+	sctp_zone_t ipi_zone_net;
+	sctp_zone_t ipi_zone_chunk;
+	sctp_zone_t ipi_zone_readq;
+	sctp_zone_t ipi_zone_strmoq;
+	sctp_zone_t ipi_zone_asconf_ack;
+
+	struct rwlock ipi_ep_mtx;
+	struct mtx it_mtx;
+	struct mtx ipi_iterator_wq_mtx;
+	struct rwlock ipi_addr_mtx;
+	struct mtx ipi_pktlog_mtx;
+	uint32_t ipi_count_ep;
+
+	/* assoc/tcb zone info */
+	uint32_t ipi_count_asoc;
+
+	/* local addrlist zone info */
+	uint32_t ipi_count_laddr;
+
+	/* remote addrlist zone info */
+	uint32_t ipi_count_raddr;
+
+	/* chunk structure list for output */
+	uint32_t ipi_count_chunk;
+
+	/* socket queue zone info */
+	uint32_t ipi_count_readq;
+
+	/* socket queue zone info */
+	uint32_t ipi_count_strmoq;
+
+	/* Number of vrfs */
+	uint32_t ipi_count_vrfs;
+
+	/* Number of ifns */
+	uint32_t ipi_count_ifns;
+
+	/* Number of ifas */
+	uint32_t ipi_count_ifas;
+
+	/* system wide number of free chunks hanging around */
+	uint32_t ipi_free_chunks;
+	uint32_t ipi_free_strmoq;
+
+
+	struct sctpvtaghead vtag_timewait[SCTP_STACK_VTAG_HASH_SIZE_A];
+
+	/* address work queue handling */
+#if defined(SCTP_USE_THREAD_BASED_ITERATOR)
+	uint32_t iterator_running;
+	SCTP_PROCESS_STRUCT thread_proc;
+#endif
+	struct sctp_timer addr_wq_timer;
+
+};
+
+/*-
+ * Here we have all the relevant information for each SCTP entity created. We
+ * will need to modify this as approprate. We also need to figure out how to
+ * access /dev/random.
+ */
+struct sctp_pcb {
+	unsigned int time_of_secret_change;	/* number of seconds from
+						 * timeval.tv_sec */
+	uint32_t secret_key[SCTP_HOW_MANY_SECRETS][SCTP_NUMBER_OF_SECRETS];
+	unsigned int size_of_a_cookie;
+
+	unsigned int sctp_timeoutticks[SCTP_NUM_TMRS];
+	unsigned int sctp_minrto;
+	unsigned int sctp_maxrto;
+	unsigned int initial_rto;
+	int initial_init_rto_max;
+
+	unsigned int sctp_sack_freq;
+	uint32_t sctp_sws_sender;
+	uint32_t sctp_sws_receiver;
+
+	uint32_t sctp_default_cc_module;
+	/* authentication related fields */
+	struct sctp_keyhead shared_keys;
+	sctp_auth_chklist_t *local_auth_chunks;
+	sctp_hmaclist_t *local_hmacs;
+	uint16_t default_keyid;
+
+	/* various thresholds */
+	/* Max times I will init at a guy */
+	uint16_t max_init_times;
+
+	/* Max times I will send before we consider someone dead */
+	uint16_t max_send_times;
+
+	uint16_t def_net_failure;
+
+	/* number of streams to pre-open on a association */
+	uint16_t pre_open_stream_count;
+	uint16_t max_open_streams_intome;
+
+	/* random number generator */
+	uint32_t random_counter;
+	uint8_t random_numbers[SCTP_SIGNATURE_ALOC_SIZE];
+	uint8_t random_store[SCTP_SIGNATURE_ALOC_SIZE];
+
+	/*
+	 * This timer is kept running per endpoint.  When it fires it will
+	 * change the secret key.  The default is once a hour
+	 */
+	struct sctp_timer signature_change;
+
+	/* Zero copy full buffer timer */
+	struct sctp_timer zero_copy_timer;
+	/* Zero copy app to transport (sendq) read repulse timer */
+	struct sctp_timer zero_copy_sendq_timer;
+	uint32_t def_cookie_life;
+	/* defaults to 0 */
+	int auto_close_time;
+	uint32_t initial_sequence_debug;
+	uint32_t adaptation_layer_indicator;
+	uint32_t store_at;
+	uint8_t max_burst;
+	char current_secret_number;
+	char last_secret_number;
+};
+
+#ifndef SCTP_ALIGNMENT
+#define SCTP_ALIGNMENT 32
+#endif
+
+#ifndef SCTP_ALIGNM1
+#define SCTP_ALIGNM1 (SCTP_ALIGNMENT-1)
+#endif
+
+#define sctp_lport ip_inp.inp.inp_lport
+
+struct sctp_pcbtsn_rlog {
+	uint32_t vtag;
+	uint16_t strm;
+	uint16_t seq;
+	uint16_t sz;
+	uint16_t flgs;
+};
+
+#define SCTP_READ_LOG_SIZE 135	/* we choose the number to make a pcb a page */
+
+
+struct sctp_inpcb {
+	/*-
+	 * put an inpcb in front of it all, kind of a waste but we need to
+	 * for compatability with all the other stuff.
+	 */
+	union {
+		struct inpcb inp;
+		char align[(sizeof(struct in6pcb) + SCTP_ALIGNM1) &
+		        ~SCTP_ALIGNM1];
+	}     ip_inp;
+
+
+	/* Socket buffer lock protects read_queue and of course sb_cc */
+	struct sctp_readhead read_queue;
+
+	              LIST_ENTRY(sctp_inpcb) sctp_list;	/* lists all endpoints */
+	/* hash of all endpoints for model */
+	              LIST_ENTRY(sctp_inpcb) sctp_hash;
+	/* count of local addresses bound, 0 if bound all */
+	int laddr_count;
+
+	/* list of addrs in use by the EP, NULL if bound-all */
+	struct sctpladdr sctp_addr_list;
+	/*
+	 * used for source address selection rotation when we are subset
+	 * bound
+	 */
+	struct sctp_laddr *next_addr_touse;
+
+	/* back pointer to our socket */
+	struct socket *sctp_socket;
+	uint32_t sctp_flags;	/* INP state flag set */
+	uint32_t sctp_features;	/* Feature flags */
+	uint32_t sctp_mobility_features;	/* Mobility  Feature flags */
+	struct sctp_pcb sctp_ep;/* SCTP ep data */
+	/* head of the hash of all associations */
+	struct sctpasochead *sctp_tcbhash;
+	u_long sctp_hashmark;
+	/* head of the list of all associations */
+	struct sctpasochead sctp_asoc_list;
+#ifdef SCTP_TRACK_FREED_ASOCS
+	struct sctpasochead sctp_asoc_free_list;
+#endif
+	struct sctp_iterator *inp_starting_point_for_iterator;
+	uint32_t sctp_frag_point;
+	uint32_t partial_delivery_point;
+	uint32_t sctp_context;
+	struct sctp_nonpad_sndrcvinfo def_send;
+	/*-
+	 * These three are here for the sosend_dgram
+	 * (pkt, pkt_last and control).
+	 * routine. However, I don't think anyone in
+	 * the current FreeBSD kernel calls this. So
+	 * they are candidates with sctp_sendm for
+	 * de-supporting.
+	 */
+	struct mbuf *pkt, *pkt_last;
+	struct mbuf *control;
+	struct mtx inp_mtx;
+	struct mtx inp_create_mtx;
+	struct mtx inp_rdata_mtx;
+	int32_t refcount;
+	uint32_t def_vrf_id;
+	uint32_t total_sends;
+	uint32_t total_recvs;
+	uint32_t last_abort_code;
+	uint32_t total_nospaces;
+#ifdef SCTP_ASOCLOG_OF_TSNS
+	struct sctp_pcbtsn_rlog readlog[SCTP_READ_LOG_SIZE];
+	uint32_t readlog_index;
+#endif
+};
+
+struct sctp_tcb {
+	struct socket *sctp_socket;	/* back pointer to socket */
+	struct sctp_inpcb *sctp_ep;	/* back pointer to ep */
+	           LIST_ENTRY(sctp_tcb) sctp_tcbhash;	/* next link in hash
+							 * table */
+	           LIST_ENTRY(sctp_tcb) sctp_tcblist;	/* list of all of the
+							 * TCB's */
+	           LIST_ENTRY(sctp_tcb) sctp_tcbrestarhash;	/* next link in restart
+								 * hash table */
+	           LIST_ENTRY(sctp_tcb) sctp_asocs;	/* vtag hash list */
+	struct sctp_block_entry *block_entry;	/* pointer locked by  socket
+						 * send buffer */
+	struct sctp_association asoc;
+	/*-
+	 * freed_by_sorcv_sincelast is protected by the sockbuf_lock NOT the
+	 * tcb_lock. Its special in this way to help avoid extra mutex calls
+	 * in the reading of data.
+	 */
+	uint32_t freed_by_sorcv_sincelast;
+	uint32_t total_sends;
+	uint32_t total_recvs;
+	int freed_from_where;
+	uint16_t rport;		/* remote port in network format */
+	uint16_t resv;
+	struct mtx tcb_mtx;
+	struct mtx tcb_send_mtx;
+};
+
+
+
+#include <netinet/sctp_lock_bsd.h>
+
+
+#if defined(_KERNEL)
+
+extern struct sctp_epinfo sctppcbinfo;
+
+int SCTP6_ARE_ADDR_EQUAL(struct in6_addr *a, struct in6_addr *b);
+
+void sctp_fill_pcbinfo(struct sctp_pcbinfo *);
+
+struct sctp_ifn *
+         sctp_find_ifn(void *ifn, uint32_t ifn_index);
+
+struct sctp_vrf *sctp_allocate_vrf(int vrfid);
+struct sctp_vrf *sctp_find_vrf(uint32_t vrfid);
+void sctp_free_vrf(struct sctp_vrf *vrf);
+
+/*-
+ * Change address state, can be used if
+ * O/S supports telling transports about
+ * changes to IFA/IFN's (link layer triggers).
+ * If a ifn goes down, we will do src-addr-selection
+ * and NOT use that, as a source address. This does
+ * not stop the routing system from routing out
+ * that interface, but we won't put it as a source.
+ */
+void sctp_mark_ifa_addr_down(uint32_t vrf_id, struct sockaddr *addr, const char *if_name, uint32_t ifn_index);
+void sctp_mark_ifa_addr_up(uint32_t vrf_id, struct sockaddr *addr, const char *if_name, uint32_t ifn_index);
+
+struct sctp_ifa *
+sctp_add_addr_to_vrf(uint32_t vrfid,
+    void *ifn, uint32_t ifn_index, uint32_t ifn_type,
+    const char *if_name,
+    void *ifa, struct sockaddr *addr, uint32_t ifa_flags,
+    int dynamic_add);
+
+void sctp_update_ifn_mtu(uint32_t ifn_index, uint32_t mtu);
+
+void sctp_free_ifn(struct sctp_ifn *sctp_ifnp);
+void sctp_free_ifa(struct sctp_ifa *sctp_ifap);
+
+
+void 
+sctp_del_addr_from_vrf(uint32_t vrfid, struct sockaddr *addr,
+    uint32_t ifn_index, const char *if_name);
+
+
+
+struct sctp_nets *sctp_findnet(struct sctp_tcb *, struct sockaddr *);
+
+struct sctp_inpcb *sctp_pcb_findep(struct sockaddr *, int, int, uint32_t);
+
+int 
+sctp_inpcb_bind(struct socket *, struct sockaddr *,
+    struct sctp_ifa *, struct thread *);
+
+struct sctp_tcb *
+sctp_findassociation_addr(struct mbuf *, int, int,
+    struct sctphdr *, struct sctp_chunkhdr *, struct sctp_inpcb **,
+    struct sctp_nets **, uint32_t vrf_id);
+
+struct sctp_tcb *
+sctp_findassociation_addr_sa(struct sockaddr *,
+    struct sockaddr *, struct sctp_inpcb **, struct sctp_nets **, int, uint32_t);
+
+void
+sctp_move_pcb_and_assoc(struct sctp_inpcb *, struct sctp_inpcb *,
+    struct sctp_tcb *);
+
+/*-
+ * For this call ep_addr, the to is the destination endpoint address of the
+ * peer (relative to outbound). The from field is only used if the TCP model
+ * is enabled and helps distingush amongst the subset bound (non-boundall).
+ * The TCP model MAY change the actual ep field, this is why it is passed.
+ */
+struct sctp_tcb *
+sctp_findassociation_ep_addr(struct sctp_inpcb **,
+    struct sockaddr *, struct sctp_nets **, struct sockaddr *,
+    struct sctp_tcb *);
+
+struct sctp_tcb *
+sctp_findassociation_ep_asocid(struct sctp_inpcb *,
+    sctp_assoc_t, int);
+
+struct sctp_tcb *
+sctp_findassociation_ep_asconf(struct mbuf *, int, int,
+    struct sctphdr *, struct sctp_inpcb **, struct sctp_nets **);
+
+int sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id);
+
+int sctp_is_address_on_local_host(struct sockaddr *addr, uint32_t vrf_id);
+
+void sctp_inpcb_free(struct sctp_inpcb *, int, int);
+
+struct sctp_tcb *
+sctp_aloc_assoc(struct sctp_inpcb *, struct sockaddr *,
+    int, int *, uint32_t, uint32_t, struct thread *);
+
+int sctp_free_assoc(struct sctp_inpcb *, struct sctp_tcb *, int, int);
+
+
+void sctp_delete_from_timewait(uint32_t);
+
+int sctp_is_in_timewait(uint32_t tag);
+
+void
+     sctp_add_vtag_to_timewait(uint32_t, uint32_t);
+
+void sctp_add_local_addr_ep(struct sctp_inpcb *, struct sctp_ifa *, uint32_t);
+
+int sctp_insert_laddr(struct sctpladdr *, struct sctp_ifa *, uint32_t);
+
+void sctp_remove_laddr(struct sctp_laddr *);
+
+void sctp_del_local_addr_ep(struct sctp_inpcb *, struct sctp_ifa *);
+
+int sctp_add_remote_addr(struct sctp_tcb *, struct sockaddr *, int, int);
+
+void sctp_remove_net(struct sctp_tcb *, struct sctp_nets *);
+
+int sctp_del_remote_addr(struct sctp_tcb *, struct sockaddr *);
+
+void sctp_pcb_init(void);
+
+
+void sctp_add_local_addr_restricted(struct sctp_tcb *, struct sctp_ifa *);
+void sctp_del_local_addr_restricted(struct sctp_tcb *, struct sctp_ifa *);
+
+int
+sctp_load_addresses_from_init(struct sctp_tcb *, struct mbuf *, int, int,
+    int, struct sctphdr *, struct sockaddr *);
+
+int
+sctp_set_primary_addr(struct sctp_tcb *, struct sockaddr *,
+    struct sctp_nets *);
+
+int sctp_is_vtag_good(struct sctp_inpcb *, uint32_t, struct timeval *, int);
+
+/* void sctp_drain(void); */
+
+int sctp_destination_is_reachable(struct sctp_tcb *, struct sockaddr *);
+
+/*-
+ * Null in last arg inpcb indicate run on ALL ep's. Specific inp in last arg
+ * indicates run on ONLY assoc's of the specified endpoint.
+ */
+int
+sctp_initiate_iterator(inp_func inpf,
+    asoc_func af,
+    inp_func inpe,
+    uint32_t, uint32_t,
+    uint32_t, void *,
+    uint32_t,
+    end_func ef,
+    struct sctp_inpcb *,
+    uint8_t co_off);
+
+#endif				/* _KERNEL */
+#endif				/* !__sctp_pcb_h__ */
Index: tcp_usrreq.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.7
retrieving revision 1.8
diff -L sys/netinet/tcp_usrreq.c -L sys/netinet/tcp_usrreq.c -u -r1.7 -r1.8
--- sys/netinet/tcp_usrreq.c
+++ sys/netinet/tcp_usrreq.c
@@ -1,6 +1,8 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
- *	The Regents of the University of California.  All rights reserved.
+ *	The Regents of the University of California.
+ * Copyright (c) 2006-2007 Robert N. M. Watson
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -27,12 +29,12 @@
  * SUCH DAMAGE.
  *
  *	From: @(#)tcp_usrreq.c	8.2 (Berkeley) 1/3/94
- * $FreeBSD: src/sys/netinet/tcp_usrreq.c,v 1.124.2.2 2005/11/04 20:26:14 ume Exp $
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: src/sys/netinet/tcp_usrreq.c,v 1.163.2.1.2.1 2008/01/26 13:57:33 rwatson Exp $");
 
+#include "opt_ddb.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_tcpdebug.h"
@@ -52,6 +54,10 @@
 #include <sys/proc.h>
 #include <sys/jail.h>
 
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
+
 #include <net/if.h>
 #include <net/route.h>
 
@@ -83,10 +89,6 @@
 /*
  * TCP protocol interface to socket abstraction.
  */
-#ifdef TCPDEBUG
-extern	char *tcpstates[];	/* XXX ??? */
-#endif
-
 static int	tcp_attach(struct socket *);
 static int	tcp_connect(struct tcpcb *, struct sockaddr *,
 		    struct thread *td);
@@ -94,10 +96,8 @@
 static int	tcp6_connect(struct tcpcb *, struct sockaddr *,
 		    struct thread *td);
 #endif /* INET6 */
-static struct tcpcb *
-		tcp_disconnect(struct tcpcb *);
-static struct tcpcb *
-		tcp_usrclosed(struct tcpcb *);
+static void	tcp_disconnect(struct tcpcb *);
+static void	tcp_usrclosed(struct tcpcb *);
 static void	tcp_fill_info(struct tcpcb *, struct tcp_info *);
 
 #ifdef TCPDEBUG
@@ -118,16 +118,14 @@
 static int
 tcp_usr_attach(struct socket *so, int proto, struct thread *td)
 {
-	int error;
 	struct inpcb *inp;
-	struct tcpcb *tp = 0;
+	struct tcpcb *tp = NULL;
+	int error;
+	TCPDEBUG0;
 
-	INP_INFO_WLOCK(&tcbinfo);
 	inp = sotoinpcb(so);
-	if (inp) {
-		error = EISCONN;
-		goto out;
-	}
+	KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL"));
+	TCPDEBUG1();
 
 	error = tcp_attach(so);
 	if (error)
@@ -139,80 +137,128 @@
 	inp = sotoinpcb(so);
 	tp = intotcpcb(inp);
 out:
-	INP_INFO_WUNLOCK(&tcbinfo);
+	TCPDEBUG2(PRU_ATTACH);
 	return error;
 }
 
 /*
+ * tcp_detach is called when the socket layer loses its final reference
+ * to the socket, be it a file descriptor reference, a reference from TCP,
+ * etc.  At this point, there is only one case in which we will keep around
+ * inpcb state: time wait.
+ *
+ * This function can probably be re-absorbed back into tcp_usr_detach() now
+ * that there is a single detach path.
+ */
+static void
+tcp_detach(struct socket *so, struct inpcb *inp)
+{
+	struct tcpcb *tp;
+#ifdef INET6
+	int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != 0;
+#endif
+
+	INP_INFO_WLOCK_ASSERT(&tcbinfo);
+	INP_LOCK_ASSERT(inp);
+
+	KASSERT(so->so_pcb == inp, ("tcp_detach: so_pcb != inp"));
+	KASSERT(inp->inp_socket == so, ("tcp_detach: inp_socket != so"));
+
+	tp = intotcpcb(inp);
+
+	if (inp->inp_vflag & INP_TIMEWAIT) {
+		/*
+		 * There are two cases to handle: one in which the time wait
+		 * state is being discarded (INP_DROPPED), and one in which
+		 * this connection will remain in timewait.  In the former,
+		 * it is time to discard all state (except tcptw, which has
+		 * already been discarded by the timewait close code, which
+		 * should be further up the call stack somewhere).  In the
+		 * latter case, we detach from the socket, but leave the pcb
+		 * present until timewait ends.
+		 *
+		 * XXXRW: Would it be cleaner to free the tcptw here?
+		 */
+		if (inp->inp_vflag & INP_DROPPED) {
+			KASSERT(tp == NULL, ("tcp_detach: INP_TIMEWAIT && "
+			    "INP_DROPPED && tp != NULL"));
+#ifdef INET6
+			if (isipv6) {
+				in6_pcbdetach(inp);
+				in6_pcbfree(inp);
+			} else {
+#endif
+				in_pcbdetach(inp);
+				in_pcbfree(inp);
+#ifdef INET6
+			}
+#endif
+		} else {
+#ifdef INET6
+			if (isipv6)
+				in6_pcbdetach(inp);
+			else
+#endif
+				in_pcbdetach(inp);
+			INP_UNLOCK(inp);
+		}
+	} else {
+		/*
+		 * If the connection is not in timewait, we consider two
+		 * two conditions: one in which no further processing is
+		 * necessary (dropped || embryonic), and one in which TCP is
+		 * not yet done, but no longer requires the socket, so the
+		 * pcb will persist for the time being.
+		 *
+		 * XXXRW: Does the second case still occur?
+		 */
+		if (inp->inp_vflag & INP_DROPPED ||
+		    tp->t_state < TCPS_SYN_SENT) {
+			tcp_discardcb(tp);
+#ifdef INET6
+			if (isipv6) {
+				in6_pcbdetach(inp);
+				in6_pcbfree(inp);
+			} else {
+#endif
+				in_pcbdetach(inp);
+				in_pcbfree(inp);
+#ifdef INET6
+			}
+#endif
+		} else {
+#ifdef INET6
+			if (isipv6)
+				in6_pcbdetach(inp);
+			else
+#endif
+				in_pcbdetach(inp);
+		}
+	}
+}
+
+/*
  * pru_detach() detaches the TCP protocol from the socket.
  * If the protocol state is non-embryonic, then can't
  * do this directly: have to initiate a pru_disconnect(),
  * which may finish later; embryonic TCB's can just
  * be discarded here.
  */
-static int
+static void
 tcp_usr_detach(struct socket *so)
 {
-	int error = 0;
 	struct inpcb *inp;
-	struct tcpcb *tp;
-	TCPDEBUG0;
 
-	INP_INFO_WLOCK(&tcbinfo);
 	inp = sotoinpcb(so);
-	if (inp == NULL) {
-		INP_INFO_WUNLOCK(&tcbinfo);
-		return error;
-	}
+	KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL"));
+	INP_INFO_WLOCK(&tcbinfo);
 	INP_LOCK(inp);
-	tp = intotcpcb(inp);
-	TCPDEBUG1();
-	tp = tcp_disconnect(tp);
-
-	TCPDEBUG2(PRU_DETACH);
-	if (tp)
-		INP_UNLOCK(inp);
+	KASSERT(inp->inp_socket != NULL,
+	    ("tcp_usr_detach: inp_socket == NULL"));
+	tcp_detach(so, inp);
 	INP_INFO_WUNLOCK(&tcbinfo);
-	return error;
 }
 
-#define INI_NOLOCK	0
-#define INI_READ	1
-#define INI_WRITE	2
-
-#define	COMMON_START()						\
-	TCPDEBUG0;						\
-	do {							\
-		if (inirw == INI_READ)				\
-			INP_INFO_RLOCK(&tcbinfo);		\
-		else if (inirw == INI_WRITE)			\
-			INP_INFO_WLOCK(&tcbinfo);		\
-		inp = sotoinpcb(so);				\
-		if (inp == 0) {					\
-			if (inirw == INI_READ)			\
-				INP_INFO_RUNLOCK(&tcbinfo);	\
-			else if (inirw == INI_WRITE)		\
-				INP_INFO_WUNLOCK(&tcbinfo);	\
-			return EINVAL;				\
-		}						\
-		INP_LOCK(inp);					\
-		if (inirw == INI_READ)				\
-			INP_INFO_RUNLOCK(&tcbinfo);		\
-		tp = intotcpcb(inp);				\
-		TCPDEBUG1();					\
-} while(0)
-
-#define COMMON_END(req)						\
-out:	TCPDEBUG2(req);						\
-	do {							\
-		if (tp)						\
-			INP_UNLOCK(inp);			\
-		if (inirw == INI_WRITE)				\
-			INP_INFO_WUNLOCK(&tcbinfo);		\
-		return error;					\
-		goto out;					\
-} while(0)
-
 /*
  * Give the socket an address.
  */
@@ -221,9 +267,8 @@
 {
 	int error = 0;
 	struct inpcb *inp;
-	struct tcpcb *tp;
+	struct tcpcb *tp = NULL;
 	struct sockaddr_in *sinp;
-	const int inirw = INI_WRITE;
 
 	sinp = (struct sockaddr_in *)nam;
 	if (nam->sa_len != sizeof (*sinp))
@@ -236,11 +281,24 @@
 	    IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
 		return (EAFNOSUPPORT);
 
-	COMMON_START();
-	error = in_pcbbind(inp, nam, td->td_ucred);
-	if (error)
+	TCPDEBUG0;
+	INP_INFO_WLOCK(&tcbinfo);
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL"));
+	INP_LOCK(inp);
+	if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
+		error = EINVAL;
 		goto out;
-	COMMON_END(PRU_BIND);
+	}
+	tp = intotcpcb(inp);
+	TCPDEBUG1();
+	error = in_pcbbind(inp, nam, td->td_ucred);
+out:
+	TCPDEBUG2(PRU_BIND);
+	INP_UNLOCK(inp);
+	INP_INFO_WUNLOCK(&tcbinfo);
+
+	return (error);
 }
 
 #ifdef INET6
@@ -249,9 +307,8 @@
 {
 	int error = 0;
 	struct inpcb *inp;
-	struct tcpcb *tp;
+	struct tcpcb *tp = NULL;
 	struct sockaddr_in6 *sin6p;
-	const int inirw = INI_WRITE;
 
 	sin6p = (struct sockaddr_in6 *)nam;
 	if (nam->sa_len != sizeof (*sin6p))
@@ -264,7 +321,17 @@
 	    IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
 		return (EAFNOSUPPORT);
 
-	COMMON_START();
+	TCPDEBUG0;
+	INP_INFO_WLOCK(&tcbinfo);
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL"));
+	INP_LOCK(inp);
+	if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
+		error = EINVAL;
+		goto out;
+	}
+	tp = intotcpcb(inp);
+	TCPDEBUG1();
 	inp->inp_vflag &= ~INP_IPV4;
 	inp->inp_vflag |= INP_IPV6;
 	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
@@ -282,9 +349,11 @@
 		}
 	}
 	error = in6_pcbbind(inp, nam, td->td_ucred);
-	if (error)
-		goto out;
-	COMMON_END(PRU_BIND);
+out:
+	TCPDEBUG2(PRU_BIND);
+	INP_UNLOCK(inp);
+	INP_INFO_WUNLOCK(&tcbinfo);
+	return (error);
 }
 #endif /* INET6 */
 
@@ -292,36 +361,59 @@
  * Prepare to accept connections.
  */
 static int
-tcp_usr_listen(struct socket *so, struct thread *td)
+tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
 {
 	int error = 0;
 	struct inpcb *inp;
-	struct tcpcb *tp;
-	const int inirw = INI_WRITE;
+	struct tcpcb *tp = NULL;
 
-	COMMON_START();
+	TCPDEBUG0;
+	INP_INFO_WLOCK(&tcbinfo);
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL"));
+	INP_LOCK(inp);
+	if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
+		error = EINVAL;
+		goto out;
+	}
+	tp = intotcpcb(inp);
+	TCPDEBUG1();
 	SOCK_LOCK(so);
 	error = solisten_proto_check(so);
 	if (error == 0 && inp->inp_lport == 0)
 		error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
 	if (error == 0) {
 		tp->t_state = TCPS_LISTEN;
-		solisten_proto(so);
+		solisten_proto(so, backlog);
 	}
 	SOCK_UNLOCK(so);
-	COMMON_END(PRU_LISTEN);
+
+out:
+	TCPDEBUG2(PRU_LISTEN);
+	INP_UNLOCK(inp);
+	INP_INFO_WUNLOCK(&tcbinfo);
+	return (error);
 }
 
 #ifdef INET6
 static int
-tcp6_usr_listen(struct socket *so, struct thread *td)
+tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
 {
 	int error = 0;
 	struct inpcb *inp;
-	struct tcpcb *tp;
-	const int inirw = INI_WRITE;
+	struct tcpcb *tp = NULL;
 
-	COMMON_START();
+	TCPDEBUG0;
+	INP_INFO_WLOCK(&tcbinfo);
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL"));
+	INP_LOCK(inp);
+	if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
+		error = EINVAL;
+		goto out;
+	}
+	tp = intotcpcb(inp);
+	TCPDEBUG1();
 	SOCK_LOCK(so);
 	error = solisten_proto_check(so);
 	if (error == 0 && inp->inp_lport == 0) {
@@ -332,10 +424,15 @@
 	}
 	if (error == 0) {
 		tp->t_state = TCPS_LISTEN;
-		solisten_proto(so);
+		solisten_proto(so, backlog);
 	}
 	SOCK_UNLOCK(so);
-	COMMON_END(PRU_LISTEN);
+
+out:
+	TCPDEBUG2(PRU_LISTEN);
+	INP_UNLOCK(inp);
+	INP_INFO_WUNLOCK(&tcbinfo);
+	return (error);
 }
 #endif /* INET6 */
 
@@ -351,9 +448,8 @@
 {
 	int error = 0;
 	struct inpcb *inp;
-	struct tcpcb *tp;
+	struct tcpcb *tp = NULL;
 	struct sockaddr_in *sinp;
-	const int inirw = INI_WRITE;
 
 	sinp = (struct sockaddr_in *)nam;
 	if (nam->sa_len != sizeof (*sinp))
@@ -367,11 +463,25 @@
 	if (jailed(td->td_ucred))
 		prison_remote_ip(td->td_ucred, 0, &sinp->sin_addr.s_addr);
 
-	COMMON_START();
+	TCPDEBUG0;
+	INP_INFO_WLOCK(&tcbinfo);
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL"));
+	INP_LOCK(inp);
+	if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
+		error = EINVAL;
+		goto out;
+	}
+	tp = intotcpcb(inp);
+	TCPDEBUG1();
 	if ((error = tcp_connect(tp, nam, td)) != 0)
 		goto out;
 	error = tcp_output(tp);
-	COMMON_END(PRU_CONNECT);
+out:
+	TCPDEBUG2(PRU_CONNECT);
+	INP_UNLOCK(inp);
+	INP_INFO_WUNLOCK(&tcbinfo);
+	return (error);
 }
 
 #ifdef INET6
@@ -380,9 +490,10 @@
 {
 	int error = 0;
 	struct inpcb *inp;
-	struct tcpcb *tp;
+	struct tcpcb *tp = NULL;
 	struct sockaddr_in6 *sin6p;
-	const int inirw = INI_WRITE;
+
+	TCPDEBUG0;
 
 	sin6p = (struct sockaddr_in6 *)nam;
 	if (nam->sa_len != sizeof (*sin6p))
@@ -394,7 +505,16 @@
 	    && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
 		return (EAFNOSUPPORT);
 
-	COMMON_START();
+	INP_INFO_WLOCK(&tcbinfo);
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL"));
+	INP_LOCK(inp);
+	if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
+		error = EINVAL;
+		goto out;
+	}
+	tp = intotcpcb(inp);
+	TCPDEBUG1();
 	if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
 		struct sockaddr_in sin;
 
@@ -417,7 +537,12 @@
 	if ((error = tcp6_connect(tp, nam, td)) != 0)
 		goto out;
 	error = tcp_output(tp);
-	COMMON_END(PRU_CONNECT);
+
+out:
+	TCPDEBUG2(PRU_CONNECT);
+	INP_UNLOCK(inp);
+	INP_INFO_WUNLOCK(&tcbinfo);
+	return (error);
 }
 #endif /* INET6 */
 
@@ -435,14 +560,27 @@
 static int
 tcp_usr_disconnect(struct socket *so)
 {
-	int error = 0;
 	struct inpcb *inp;
-	struct tcpcb *tp;
-	const int inirw = INI_WRITE;
+	struct tcpcb *tp = NULL;
+	int error = 0;
 
-	COMMON_START();
-	tp = tcp_disconnect(tp);
-	COMMON_END(PRU_DISCONNECT);
+	TCPDEBUG0;
+	INP_INFO_WLOCK(&tcbinfo);
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL"));
+	INP_LOCK(inp);
+	if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
+		error = ECONNRESET;
+		goto out;
+	}
+	tp = intotcpcb(inp);
+	TCPDEBUG1();
+	tcp_disconnect(tp);
+out:
+	TCPDEBUG2(PRU_DISCONNECT);
+	INP_UNLOCK(inp);
+	INP_INFO_WUNLOCK(&tcbinfo);
+	return (error);
 }
 
 /*
@@ -460,33 +598,32 @@
 	in_port_t port = 0;
 	TCPDEBUG0;
 
-	if (so->so_state & SS_ISDISCONNECTED) {
-		error = ECONNABORTED;
-		goto out;
-	}
+	if (so->so_state & SS_ISDISCONNECTED)
+		return (ECONNABORTED);
 
-	INP_INFO_RLOCK(&tcbinfo);
 	inp = sotoinpcb(so);
-	if (!inp) {
-		INP_INFO_RUNLOCK(&tcbinfo);
-		return (EINVAL);
-	}
+	KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL"));
+	INP_INFO_RLOCK(&tcbinfo);
 	INP_LOCK(inp);
-	INP_INFO_RUNLOCK(&tcbinfo);
+	if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
+		error = ECONNABORTED;
+		goto out;
+	}
 	tp = intotcpcb(inp);
 	TCPDEBUG1();
 
 	/*
-	 * We inline in_setpeeraddr and COMMON_END here, so that we can
+	 * We inline in_getpeeraddr and COMMON_END here, so that we can
 	 * copy the data of interest and defer the malloc until after we
 	 * release the lock.
 	 */
 	port = inp->inp_fport;
 	addr = inp->inp_faddr;
 
-out:	TCPDEBUG2(PRU_ACCEPT);
-	if (tp)
-		INP_UNLOCK(inp);
+out:
+	TCPDEBUG2(PRU_ACCEPT);
+	INP_UNLOCK(inp);
+	INP_INFO_RUNLOCK(&tcbinfo);
 	if (error == 0)
 		*nam = in_sockaddr(port, &addr);
 	return error;
@@ -505,21 +642,19 @@
 	int v4 = 0;
 	TCPDEBUG0;
 
-	if (so->so_state & SS_ISDISCONNECTED) {
-		error = ECONNABORTED;
-		goto out;
-	}
+	if (so->so_state & SS_ISDISCONNECTED)
+		return (ECONNABORTED);
 
-	INP_INFO_RLOCK(&tcbinfo);
 	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_RUNLOCK(&tcbinfo);
-		return (EINVAL);
-	}
+	KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL"));
 	INP_LOCK(inp);
-	INP_INFO_RUNLOCK(&tcbinfo);
+	if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
+		error = ECONNABORTED;
+		goto out;
+	}
 	tp = intotcpcb(inp);
 	TCPDEBUG1();
+
 	/*
 	 * We inline in6_mapped_peeraddr and COMMON_END here, so that we can
 	 * copy the data of interest and defer the malloc until after we
@@ -534,9 +669,9 @@
 		addr6 = inp->in6p_faddr;
 	}
 
-out:	TCPDEBUG2(PRU_ACCEPT);
-	if (tp)
-		INP_UNLOCK(inp);
+out:
+	TCPDEBUG2(PRU_ACCEPT);
+	INP_UNLOCK(inp);
 	if (error == 0) {
 		if (v4)
 			*nam = in6_v4mapsin6_sockaddr(port, &addr);
@@ -548,27 +683,6 @@
 #endif /* INET6 */
 
 /*
- * This is the wrapper function for in_setsockaddr. We just pass down
- * the pcbinfo for in_setsockaddr to lock. We don't want to do the locking
- * here because in_setsockaddr will call malloc and can block.
- */
-static int
-tcp_sockaddr(struct socket *so, struct sockaddr **nam)
-{
-	return (in_setsockaddr(so, nam, &tcbinfo));
-}
-
-/*
- * This is the wrapper function for in_setpeeraddr. We just pass down
- * the pcbinfo for in_setpeeraddr to lock.
- */
-static int
-tcp_peeraddr(struct socket *so, struct sockaddr **nam)
-{
-	return (in_setpeeraddr(so, nam, &tcbinfo));
-}
-
-/*
  * Mark the connection as being incapable of further output.
  */
 static int
@@ -576,15 +690,29 @@
 {
 	int error = 0;
 	struct inpcb *inp;
-	struct tcpcb *tp;
-	const int inirw = INI_WRITE;
+	struct tcpcb *tp = NULL;
 
-	COMMON_START();
+	TCPDEBUG0;
+	INP_INFO_WLOCK(&tcbinfo);
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("inp == NULL"));
+	INP_LOCK(inp);
+	if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
+		error = ECONNRESET;
+		goto out;
+	}
+	tp = intotcpcb(inp);
+	TCPDEBUG1();
 	socantsendmore(so);
-	tp = tcp_usrclosed(tp);
-	if (tp)
-		error = tcp_output(tp);
-	COMMON_END(PRU_SHUTDOWN);
+	tcp_usrclosed(tp);
+	error = tcp_output(tp);
+
+out:
+	TCPDEBUG2(PRU_SHUTDOWN);
+	INP_UNLOCK(inp);
+	INP_INFO_WUNLOCK(&tcbinfo);
+
+	return (error);
 }
 
 /*
@@ -593,14 +721,26 @@
 static int
 tcp_usr_rcvd(struct socket *so, int flags)
 {
-	int error = 0;
 	struct inpcb *inp;
-	struct tcpcb *tp;
-	const int inirw = INI_READ;
+	struct tcpcb *tp = NULL;
+	int error = 0;
 
-	COMMON_START();
+	TCPDEBUG0;
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL"));
+	INP_LOCK(inp);
+	if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
+		error = ECONNRESET;
+		goto out;
+	}
+	tp = intotcpcb(inp);
+	TCPDEBUG1();
 	tcp_output(tp);
-	COMMON_END(PRU_RCVD);
+
+out:
+	TCPDEBUG2(PRU_RCVD);
+	INP_UNLOCK(inp);
+	return (error);
 }
 
 /*
@@ -612,41 +752,41 @@
  */
 static int
 tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
-	     struct sockaddr *nam, struct mbuf *control, struct thread *td)
+    struct sockaddr *nam, struct mbuf *control, struct thread *td)
 {
 	int error = 0;
 	struct inpcb *inp;
-	struct tcpcb *tp;
-	int unlocked = 0;
+	struct tcpcb *tp = NULL;
+	int headlocked = 0;
 #ifdef INET6
 	int isipv6;
 #endif
 	TCPDEBUG0;
 
 	/*
-	 * Need write lock here because this function might call
-	 * tcp_connect or tcp_usrclosed.
-	 * We really want to have to this function upgrade from read lock
-	 * to write lock.  XXX
+	 * We require the pcbinfo lock in two cases:
+	 *
+	 * (1) An implied connect is taking place, which can result in
+	 *     binding IPs and ports and hence modification of the pcb hash
+	 *     chains.
+	 *
+	 * (2) PRUS_EOF is set, resulting in explicit close on the send.
 	 */
-	INP_INFO_WLOCK(&tcbinfo);
+	if ((nam != NULL) || (flags & PRUS_EOF)) {
+		INP_INFO_WLOCK(&tcbinfo);
+		headlocked = 1;
+	}
 	inp = sotoinpcb(so);
-	if (inp == NULL) {
-		/*
-		 * OOPS! we lost a race, the TCP session got reset after
-		 * we checked SBS_CANTSENDMORE, eg: while doing uiomove or a
-		 * network interrupt in the non-splnet() section of sosend().
-		 */
-		if (m)
-			m_freem(m);
+	KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
+	INP_LOCK(inp);
+	if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
 		if (control)
 			m_freem(control);
-		error = ECONNRESET;	/* XXX EPIPE? */
-		tp = NULL;
-		TCPDEBUG1();
+		if (m)
+			m_freem(m);
+		error = ECONNRESET;
 		goto out;
 	}
-	INP_LOCK(inp);
 #ifdef INET6
 	isipv6 = nam && nam->sa_family == AF_INET6;
 #endif /* INET6 */
@@ -672,6 +812,7 @@
 			 * initialize maxseg/maxopd using peer's cached
 			 * MSS.
 			 */
+			INP_INFO_WLOCK_ASSERT(&tcbinfo);
 #ifdef INET6
 			if (isipv6)
 				error = tcp6_connect(tp, nam, td);
@@ -683,17 +824,19 @@
 			tp->snd_wnd = TTCP_CLIENT_SND_WND;
 			tcp_mss(tp, -1);
 		}
-
 		if (flags & PRUS_EOF) {
 			/*
 			 * Close the send side of the connection after
 			 * the data is sent.
 			 */
+			INP_INFO_WLOCK_ASSERT(&tcbinfo);
 			socantsendmore(so);
-			tp = tcp_usrclosed(tp);
+			tcp_usrclosed(tp);
+		}
+		if (headlocked) {
+			INP_INFO_WUNLOCK(&tcbinfo);
+			headlocked = 0;
 		}
-		INP_INFO_WUNLOCK(&tcbinfo);
-		unlocked = 1;
 		if (tp != NULL) {
 			if (flags & PRUS_MORETOCOME)
 				tp->t_flags |= TF_MORETOCOME;
@@ -702,6 +845,9 @@
 				tp->t_flags &= ~TF_MORETOCOME;
 		}
 	} else {
+		/*
+		 * XXXRW: PRUS_EOF not implemented with PRUS_OOB?
+		 */
 		SOCKBUF_LOCK(&so->so_snd);
 		if (sbspace(&so->so_snd) < -512) {
 			SOCKBUF_UNLOCK(&so->so_snd);
@@ -726,6 +872,7 @@
 			 * initialize maxseg/maxopd using peer's cached
 			 * MSS.
 			 */
+			INP_INFO_WLOCK_ASSERT(&tcbinfo);
 #ifdef INET6
 			if (isipv6)
 				error = tcp6_connect(tp, nam, td);
@@ -736,9 +883,12 @@
 				goto out;
 			tp->snd_wnd = TTCP_CLIENT_SND_WND;
 			tcp_mss(tp, -1);
+			INP_INFO_WUNLOCK(&tcbinfo);
+			headlocked = 0;
+		} else if (nam) {
+			INP_INFO_WUNLOCK(&tcbinfo);
+			headlocked = 0;
 		}
-		INP_INFO_WUNLOCK(&tcbinfo);
-		unlocked = 1;
 		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
 		tp->t_flags |= TF_FORCEDATA;
 		error = tcp_output(tp);
@@ -747,27 +897,87 @@
 out:
 	TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB :
 		  ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
-	if (tp)
-		INP_UNLOCK(inp);
-	if (!unlocked)
+	INP_UNLOCK(inp);
+	if (headlocked)
 		INP_INFO_WUNLOCK(&tcbinfo);
 	return (error);
 }
 
 /*
- * Abort the TCP.
+ * Abort the TCP.  Drop the connection abruptly.
  */
-static int
+static void
 tcp_usr_abort(struct socket *so)
 {
-	int error = 0;
 	struct inpcb *inp;
-	struct tcpcb *tp;
-	const int inirw = INI_WRITE;
+	struct tcpcb *tp = NULL;
+	TCPDEBUG0;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL"));
+
+	INP_INFO_WLOCK(&tcbinfo);
+	INP_LOCK(inp);
+	KASSERT(inp->inp_socket != NULL,
+	    ("tcp_usr_abort: inp_socket == NULL"));
+
+	/*
+	 * If we still have full TCP state, and we're not dropped, drop.
+	 */
+	if (!(inp->inp_vflag & INP_TIMEWAIT) &&
+	    !(inp->inp_vflag & INP_DROPPED)) {
+		tp = intotcpcb(inp);
+		TCPDEBUG1();
+		tcp_drop(tp, ECONNABORTED);
+		TCPDEBUG2(PRU_ABORT);
+	}
+	if (!(inp->inp_vflag & INP_DROPPED)) {
+		SOCK_LOCK(so);
+		so->so_state |= SS_PROTOREF;
+		SOCK_UNLOCK(so);
+		inp->inp_vflag |= INP_SOCKREF;
+	}
+	INP_UNLOCK(inp);
+	INP_INFO_WUNLOCK(&tcbinfo);
+}
+
+/*
+ * TCP socket is closed.  Start friendly disconnect.
+ */
+static void
+tcp_usr_close(struct socket *so)
+{
+	struct inpcb *inp;
+	struct tcpcb *tp = NULL;
+	TCPDEBUG0;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL"));
+
+	INP_INFO_WLOCK(&tcbinfo);
+	INP_LOCK(inp);
+	KASSERT(inp->inp_socket != NULL,
+	    ("tcp_usr_close: inp_socket == NULL"));
 
-	COMMON_START();
-	tp = tcp_drop(tp, ECONNABORTED);
-	COMMON_END(PRU_ABORT);
+	/*
+	 * If we still have full TCP state, and we're not dropped, initiate
+	 * a disconnect.
+	 */
+	if (!(inp->inp_vflag & INP_TIMEWAIT) &&
+	    !(inp->inp_vflag & INP_DROPPED)) {
+		tp = intotcpcb(inp);
+		TCPDEBUG1();
+		tcp_disconnect(tp);
+		TCPDEBUG2(PRU_CLOSE);
+	}
+	if (!(inp->inp_vflag & INP_DROPPED)) {
+		SOCK_LOCK(so);
+		so->so_state |= SS_PROTOREF;
+		SOCK_UNLOCK(so);
+		inp->inp_vflag |= INP_SOCKREF;
+	}
+	INP_UNLOCK(inp);
+	INP_INFO_WUNLOCK(&tcbinfo);
 }
 
 /*
@@ -778,10 +988,18 @@
 {
 	int error = 0;
 	struct inpcb *inp;
-	struct tcpcb *tp;
-	const int inirw = INI_READ;
+	struct tcpcb *tp = NULL;
 
-	COMMON_START();
+	TCPDEBUG0;
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL"));
+	INP_LOCK(inp);
+	if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
+		error = ECONNRESET;
+		goto out;
+	}
+	tp = intotcpcb(inp);
+	TCPDEBUG1();
 	if ((so->so_oobmark == 0 &&
 	     (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
 	    so->so_options & SO_OOBINLINE ||
@@ -797,7 +1015,11 @@
 	*mtod(m, caddr_t) = tp->t_iobc;
 	if ((flags & MSG_PEEK) == 0)
 		tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
-	COMMON_END(PRU_RCVOOB);
+
+out:
+	TCPDEBUG2(PRU_RCVOOB);
+	INP_UNLOCK(inp);
+	return (error);
 }
 
 struct pr_usrreqs tcp_usrreqs = {
@@ -810,13 +1032,14 @@
 	.pru_detach =		tcp_usr_detach,
 	.pru_disconnect =	tcp_usr_disconnect,
 	.pru_listen =		tcp_usr_listen,
-	.pru_peeraddr =		tcp_peeraddr,
+	.pru_peeraddr =		in_getpeeraddr,
 	.pru_rcvd =		tcp_usr_rcvd,
 	.pru_rcvoob =		tcp_usr_rcvoob,
 	.pru_send =		tcp_usr_send,
 	.pru_shutdown =		tcp_usr_shutdown,
-	.pru_sockaddr =		tcp_sockaddr,
-	.pru_sosetlabel =	in_pcbsosetlabel
+	.pru_sockaddr =		in_getsockaddr,
+	.pru_sosetlabel =	in_pcbsosetlabel,
+	.pru_close =		tcp_usr_close,
 };
 
 #ifdef INET6
@@ -836,7 +1059,8 @@
 	.pru_send =		tcp_usr_send,
 	.pru_shutdown =		tcp_usr_shutdown,
 	.pru_sockaddr =		in6_mapped_sockaddr,
- 	.pru_sosetlabel =	in_pcbsosetlabel
+ 	.pru_sosetlabel =	in_pcbsosetlabel,
+	.pru_close =		tcp_usr_close,
 };
 #endif /* INET6 */
 
@@ -859,6 +1083,9 @@
 	u_short lport;
 	int error;
 
+	INP_INFO_WLOCK_ASSERT(&tcbinfo);
+	INP_LOCK_ASSERT(inp);
+
 	if (inp->inp_lport == 0) {
 		error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
 		if (error)
@@ -884,16 +1111,16 @@
 	/*
 	 * Compute window scaling to request:
 	 * Scale to fit into sweet spot.  See tcp_syncache.c.
-	 * XXX: This should be moved to tcp_output().
+	 * XXX: This should move to tcp_output().
 	 */
 	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
-	    (TCP_MAXWIN << tp->request_r_scale) < sb_max)       /* XXX */
+	    (TCP_MAXWIN << tp->request_r_scale) < sb_max)
 		tp->request_r_scale++;
 
 	soisconnecting(so);
 	tcpstat.tcps_connattempt++;
 	tp->t_state = TCPS_SYN_SENT;
-	callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
+	tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
 	tp->iss = tcp_new_isn(tp);
 	tp->t_bw_rtseq = tp->iss;
 	tcp_sendseqinit(tp);
@@ -911,6 +1138,9 @@
 	struct in6_addr *addr6;
 	int error;
 
+	INP_INFO_WLOCK_ASSERT(&tcbinfo);
+	INP_LOCK_ASSERT(inp);
+
 	if (inp->inp_lport == 0) {
 		error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
 		if (error)
@@ -953,7 +1183,7 @@
 	soisconnecting(so);
 	tcpstat.tcps_connattempt++;
 	tp->t_state = TCPS_SYN_SENT;
-	callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
+	tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
 	tp->iss = tcp_new_isn(tp);
 	tp->t_bw_rtseq = tp->iss;
 	tcp_sendseqinit(tp);
@@ -979,13 +1209,17 @@
 	ti->tcpi_state = tp->t_state;
 	if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
 		ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
-	if (tp->sack_enable)
+	if (tp->t_flags & TF_SACK_PERMIT)
 		ti->tcpi_options |= TCPI_OPT_SACK;
 	if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
 		ti->tcpi_options |= TCPI_OPT_WSCALE;
 		ti->tcpi_snd_wscale = tp->snd_scale;
 		ti->tcpi_rcv_wscale = tp->rcv_scale;
 	}
+
+	ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT;
+	ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT;
+
 	ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
 	ti->tcpi_snd_cwnd = tp->snd_cwnd;
 
@@ -1016,14 +1250,9 @@
 	struct	tcp_info ti;
 
 	error = 0;
-	INP_INFO_RLOCK(&tcbinfo);
 	inp = sotoinpcb(so);
-	if (inp == NULL) {
-		INP_INFO_RUNLOCK(&tcbinfo);
-		return (ECONNRESET);
-	}
+	KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL"));
 	INP_LOCK(inp);
-	INP_INFO_RUNLOCK(&tcbinfo);
 	if (sopt->sopt_level != IPPROTO_TCP) {
 		INP_UNLOCK(inp);
 #ifdef INET6
@@ -1031,9 +1260,13 @@
 			error = ip6_ctloutput(so, sopt);
 		else
 #endif /* INET6 */
-		error = ip_ctloutput_pcbinfo(so, sopt, &tcbinfo);
+		error = ip_ctloutput(so, sopt);
 		return (error);
 	}
+	if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
+		error = ECONNRESET;
+		goto out;
+	}
 	tp = intotcpcb(inp);
 
 	switch (sopt->sopt_dir) {
@@ -1148,6 +1381,7 @@
 		}
 		break;
 	}
+out:
 	INP_UNLOCK(inp);
 	return (error);
 }
@@ -1158,10 +1392,10 @@
  * be set by the route).
  */
 u_long	tcp_sendspace = 1024*32;
-SYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW,
+SYSCTL_ULONG(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW,
     &tcp_sendspace , 0, "Maximum outgoing TCP datagram size");
 u_long	tcp_recvspace = 1024*64;
-SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
+SYSCTL_ULONG(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
     &tcp_recvspace , 0, "Maximum incoming TCP datagram size");
 
 /*
@@ -1179,8 +1413,6 @@
 	int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != 0;
 #endif
 
-	INP_INFO_WLOCK_ASSERT(&tcbinfo);
-
 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
 		error = soreserve(so, tcp_sendspace, tcp_recvspace);
 		if (error)
@@ -1188,9 +1420,12 @@
 	}
 	so->so_rcv.sb_flags |= SB_AUTOSIZE;
 	so->so_snd.sb_flags |= SB_AUTOSIZE;
-	error = in_pcballoc(so, &tcbinfo, "tcpinp");
-	if (error)
+	INP_INFO_WLOCK(&tcbinfo);
+	error = in_pcballoc(so, &tcbinfo);
+	if (error) {
+		INP_INFO_WUNLOCK(&tcbinfo);
 		return (error);
+	}
 	inp = sotoinpcb(so);
 #ifdef INET6
 	if (isipv6) {
@@ -1201,22 +1436,24 @@
 #endif
 	inp->inp_vflag |= INP_IPV4;
 	tp = tcp_newtcpcb(inp);
-	if (tp == 0) {
-		int nofd = so->so_state & SS_NOFDREF;	/* XXX */
-
-		so->so_state &= ~SS_NOFDREF;	/* don't free the socket yet */
-
-		INP_LOCK(inp);
+	if (tp == NULL) {
 #ifdef INET6
-		if (isipv6)
+		if (isipv6) {
 			in6_pcbdetach(inp);
-		else
+			in6_pcbfree(inp);
+		} else {
 #endif
-		in_pcbdetach(inp);
-		so->so_state |= nofd;
+			in_pcbdetach(inp);
+			in_pcbfree(inp);
+#ifdef INET6
+		}
+#endif
+		INP_INFO_WUNLOCK(&tcbinfo);
 		return (ENOBUFS);
 	}
 	tp->t_state = TCPS_CLOSED;
+	INP_UNLOCK(inp);
+	INP_INFO_WUNLOCK(&tcbinfo);
 	return (0);
 }
 
@@ -1228,7 +1465,7 @@
  * current input data; switch states based on user close, and
  * send segment to peer (with FIN).
  */
-static struct tcpcb *
+static void
 tcp_disconnect(struct tcpcb *tp)
 {
 	struct inpcb *inp = tp->t_inpcb;
@@ -1237,18 +1474,25 @@
 	INP_INFO_WLOCK_ASSERT(&tcbinfo);
 	INP_LOCK_ASSERT(inp);
 
-	if (tp->t_state < TCPS_ESTABLISHED)
+	/*
+	 * Neither tcp_close() nor tcp_drop() should return NULL, as the
+	 * socket is still open.
+	 */
+	if (tp->t_state < TCPS_ESTABLISHED) {
 		tp = tcp_close(tp);
-	else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
+		KASSERT(tp != NULL,
+		    ("tcp_disconnect: tcp_close() returned NULL"));
+	} else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
 		tp = tcp_drop(tp, 0);
-	else {
+		KASSERT(tp != NULL,
+		    ("tcp_disconnect: tcp_drop() returned NULL"));
+	} else {
 		soisdisconnecting(so);
 		sbflush(&so->so_rcv);
-		tp = tcp_usrclosed(tp);
-		if (tp)
-			(void) tcp_output(tp);
+		tcp_usrclosed(tp);
+		if (!(inp->inp_vflag & INP_DROPPED))
+			tcp_output(tp);
 	}
-	return (tp);
 }
 
 /*
@@ -1261,7 +1505,7 @@
  * for peer to send FIN or not respond to keep-alives, etc.
  * We can let the user exit from the close as soon as the FIN is acked.
  */
-static struct tcpcb *
+static void
 tcp_usrclosed(struct tcpcb *tp)
 {
 
@@ -1273,6 +1517,12 @@
 	case TCPS_LISTEN:
 		tp->t_state = TCPS_CLOSED;
 		tp = tcp_close(tp);
+		/*
+		 * tcp_close() should never return NULL here as the socket is
+		 * still open.
+		 */
+		KASSERT(tp != NULL,
+		    ("tcp_usrclosed: tcp_close() returned NULL"));
 		break;
 
 	case TCPS_SYN_SENT:
@@ -1291,9 +1541,322 @@
 	if (tp->t_state >= TCPS_FIN_WAIT_2) {
 		soisdisconnected(tp->t_inpcb->inp_socket);
 		/* Prevent the connection hanging in FIN_WAIT_2 forever. */
-		if (tp->t_state == TCPS_FIN_WAIT_2)
-			callout_reset(tp->tt_2msl, tcp_maxidle,
-				      tcp_timer_2msl, tp);
+		if (tp->t_state == TCPS_FIN_WAIT_2) {
+			int timeout;
+
+			timeout = (tcp_fast_finwait2_recycle) ? 
+			    tcp_finwait2_timeout : tcp_maxidle;
+			tcp_timer_activate(tp, TT_2MSL, timeout);
+		}
+	}
+}
+
+#ifdef DDB
+static void
+db_print_indent(int indent)
+{
+	int i;
+
+	for (i = 0; i < indent; i++)
+		db_printf(" ");
+}
+
+static void
+db_print_tstate(int t_state)
+{
+
+	switch (t_state) {
+	case TCPS_CLOSED:
+		db_printf("TCPS_CLOSED");
+		return;
+
+	case TCPS_LISTEN:
+		db_printf("TCPS_LISTEN");
+		return;
+
+	case TCPS_SYN_SENT:
+		db_printf("TCPS_SYN_SENT");
+		return;
+
+	case TCPS_SYN_RECEIVED:
+		db_printf("TCPS_SYN_RECEIVED");
+		return;
+
+	case TCPS_ESTABLISHED:
+		db_printf("TCPS_ESTABLISHED");
+		return;
+
+	case TCPS_CLOSE_WAIT:
+		db_printf("TCPS_CLOSE_WAIT");
+		return;
+
+	case TCPS_FIN_WAIT_1:
+		db_printf("TCPS_FIN_WAIT_1");
+		return;
+
+	case TCPS_CLOSING:
+		db_printf("TCPS_CLOSING");
+		return;
+
+	case TCPS_LAST_ACK:
+		db_printf("TCPS_LAST_ACK");
+		return;
+
+	case TCPS_FIN_WAIT_2:
+		db_printf("TCPS_FIN_WAIT_2");
+		return;
+
+	case TCPS_TIME_WAIT:
+		db_printf("TCPS_TIME_WAIT");
+		return;
+
+	default:
+		db_printf("unknown");
+		return;
+	}
+}
+
+static void
+db_print_tflags(u_int t_flags)
+{
+	int comma;
+
+	comma = 0;
+	if (t_flags & TF_ACKNOW) {
+		db_printf("%sTF_ACKNOW", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_DELACK) {
+		db_printf("%sTF_DELACK", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_NODELAY) {
+		db_printf("%sTF_NODELAY", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_NOOPT) {
+		db_printf("%sTF_NOOPT", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_SENTFIN) {
+		db_printf("%sTF_SENTFIN", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_REQ_SCALE) {
+		db_printf("%sTF_REQ_SCALE", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_RCVD_SCALE) {
+		db_printf("%sTF_RECVD_SCALE", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_REQ_TSTMP) {
+		db_printf("%sTF_REQ_TSTMP", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_RCVD_TSTMP) {
+		db_printf("%sTF_RCVD_TSTMP", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_SACK_PERMIT) {
+		db_printf("%sTF_SACK_PERMIT", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_NEEDSYN) {
+		db_printf("%sTF_NEEDSYN", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_NEEDFIN) {
+		db_printf("%sTF_NEEDFIN", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_NOPUSH) {
+		db_printf("%sTF_NOPUSH", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_NOPUSH) {
+		db_printf("%sTF_NOPUSH", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_MORETOCOME) {
+		db_printf("%sTF_MORETOCOME", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_LQ_OVERFLOW) {
+		db_printf("%sTF_LQ_OVERFLOW", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_LASTIDLE) {
+		db_printf("%sTF_LASTIDLE", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_RXWIN0SENT) {
+		db_printf("%sTF_RXWIN0SENT", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_FASTRECOVERY) {
+		db_printf("%sTF_FASTRECOVERY", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_WASFRECOVERY) {
+		db_printf("%sTF_WASFRECOVERY", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_SIGNATURE) {
+		db_printf("%sTF_SIGNATURE", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_FORCEDATA) {
+		db_printf("%sTF_FORCEDATA", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_TSO) {
+		db_printf("%sTF_TSO", comma ? ", " : "");
+		comma = 1;
+	}
+}
+
+static void
+db_print_toobflags(char t_oobflags)
+{
+	int comma;
+
+	comma = 0;
+	if (t_oobflags & TCPOOB_HAVEDATA) {
+		db_printf("%sTCPOOB_HAVEDATA", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_oobflags & TCPOOB_HADDATA) {
+		db_printf("%sTCPOOB_HADDATA", comma ? ", " : "");
+		comma = 1;
+	}
+}
+
+static void
+db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
+{
+
+	db_print_indent(indent);
+	db_printf("%s at %p\n", name, tp);
+
+	indent += 2;
+
+	db_print_indent(indent);
+	db_printf("t_segq first: %p   t_segqlen: %d   t_dupacks: %d\n",
+	   LIST_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
+
+	db_print_indent(indent);
+	db_printf("tt_rexmt: %p   tt_persist: %p   tt_keep: %p\n",
+	    &tp->t_timers->tt_rexmt, &tp->t_timers->tt_persist, &tp->t_timers->tt_keep);
+
+	db_print_indent(indent);
+	db_printf("tt_2msl: %p   tt_delack: %p   t_inpcb: %p\n", &tp->t_timers->tt_2msl,
+	    &tp->t_timers->tt_delack, tp->t_inpcb);
+
+	db_print_indent(indent);
+	db_printf("t_state: %d (", tp->t_state);
+	db_print_tstate(tp->t_state);
+	db_printf(")\n");
+
+	db_print_indent(indent);
+	db_printf("t_flags: 0x%x (", tp->t_flags);
+	db_print_tflags(tp->t_flags);
+	db_printf(")\n");
+
+	db_print_indent(indent);
+	db_printf("snd_una: 0x%08x   snd_max: 0x%08x   snd_nxt: x0%08x\n",
+	    tp->snd_una, tp->snd_max, tp->snd_nxt);
+
+	db_print_indent(indent);
+	db_printf("snd_up: 0x%08x   snd_wl1: 0x%08x   snd_wl2: 0x%08x\n",
+	   tp->snd_up, tp->snd_wl1, tp->snd_wl2);
+
+	db_print_indent(indent);
+	db_printf("iss: 0x%08x   irs: 0x%08x   rcv_nxt: 0x%08x\n",
+	    tp->iss, tp->irs, tp->rcv_nxt);
+
+	db_print_indent(indent);
+	db_printf("rcv_adv: 0x%08x   rcv_wnd: %lu   rcv_up: 0x%08x\n",
+	    tp->rcv_adv, tp->rcv_wnd, tp->rcv_up);
+
+	db_print_indent(indent);
+	db_printf("snd_wnd: %lu   snd_cwnd: %lu   snd_bwnd: %lu\n",
+	   tp->snd_wnd, tp->snd_cwnd, tp->snd_bwnd);
+
+	db_print_indent(indent);
+	db_printf("snd_ssthresh: %lu   snd_bandwidth: %lu   snd_recover: "
+	    "0x%08x\n", tp->snd_ssthresh, tp->snd_bandwidth,
+	    tp->snd_recover);
+
+	db_print_indent(indent);
+	db_printf("t_maxopd: %u   t_rcvtime: %lu   t_startime: %lu\n",
+	    tp->t_maxopd, tp->t_rcvtime, tp->t_starttime);
+
+	db_print_indent(indent);
+	db_printf("t_rttime: %d   t_rtsq: 0x%08x   t_bw_rtttime: %d\n",
+	    tp->t_rtttime, tp->t_rtseq, tp->t_bw_rtttime);
+
+	db_print_indent(indent);
+	db_printf("t_bw_rtseq: 0x%08x   t_rxtcur: %d   t_maxseg: %u   "
+	    "t_srtt: %d\n", tp->t_bw_rtseq, tp->t_rxtcur, tp->t_maxseg,
+	    tp->t_srtt);
+
+	db_print_indent(indent);
+	db_printf("t_rttvar: %d   t_rxtshift: %d   t_rttmin: %u   "
+	    "t_rttbest: %u\n", tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin,
+	    tp->t_rttbest);
+
+	db_print_indent(indent);
+	db_printf("t_rttupdated: %lu   max_sndwnd: %lu   t_softerror: %d\n",
+	    tp->t_rttupdated, tp->max_sndwnd, tp->t_softerror);
+
+	db_print_indent(indent);
+	db_printf("t_oobflags: 0x%x (", tp->t_oobflags);
+	db_print_toobflags(tp->t_oobflags);
+	db_printf(")   t_iobc: 0x%02x\n", tp->t_iobc);
+
+	db_print_indent(indent);
+	db_printf("snd_scale: %u   rcv_scale: %u   request_r_scale: %u\n",
+	    tp->snd_scale, tp->rcv_scale, tp->request_r_scale);
+
+	db_print_indent(indent);
+	db_printf("ts_recent: %u   ts_recent_age: %lu\n",
+	    tp->ts_recent, tp->ts_recent_age);
+
+	db_print_indent(indent);
+	db_printf("ts_offset: %u   last_ack_sent: 0x%08x   snd_cwnd_prev: "
+	    "%lu\n", tp->ts_offset, tp->last_ack_sent, tp->snd_cwnd_prev);
+
+	db_print_indent(indent);
+	db_printf("snd_ssthresh_prev: %lu   snd_recover_prev: 0x%08x   "
+	    "t_badrxtwin: %lu\n", tp->snd_ssthresh_prev,
+	    tp->snd_recover_prev, tp->t_badrxtwin);
+
+	db_print_indent(indent);
+	db_printf("snd_numholes: %d  snd_holes first: %p\n",
+	    tp->snd_numholes, TAILQ_FIRST(&tp->snd_holes));
+
+	db_print_indent(indent);
+	db_printf("snd_fack: 0x%08x   rcv_numsacks: %d   sack_newdata: "
+	    "0x%08x\n", tp->snd_fack, tp->rcv_numsacks, tp->sack_newdata);
+
+	/* Skip sackblks, sackhint. */
+
+	db_print_indent(indent);
+	db_printf("t_rttlow: %d   rfbuf_ts: %u   rfbuf_cnt: %d\n",
+	    tp->t_rttlow, tp->rfbuf_ts, tp->rfbuf_cnt);
+}
+
+DB_SHOW_COMMAND(tcpcb, db_show_tcpcb)
+{
+	struct tcpcb *tp;
+
+	if (!have_addr) {
+		db_printf("usage: show tcpcb <addr>\n");
+		return;
 	}
-	return (tp);
+	tp = (struct tcpcb *)addr;
+
+	db_print_tcpcb(tp, "tcpcb", 0);
 }
+#endif
--- /dev/null
+++ sys/netinet/sctp_input.h
@@ -0,0 +1,57 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_input.h,v 1.6 2005/03/06 16:04:17 itojun Exp $	 */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_input.h,v 1.7 2007/07/02 19:22:22 rrs Exp $");
+
+#ifndef __sctp_input_h__
+#define __sctp_input_h__
+
+#if defined(_KERNEL)
+void
+sctp_common_input_processing(struct mbuf **, int, int, int,
+    struct sctphdr *, struct sctp_chunkhdr *, struct sctp_inpcb *,
+    struct sctp_tcb *, struct sctp_nets *, uint8_t, uint32_t);
+
+struct sctp_stream_reset_out_request *
+sctp_find_stream_reset(struct sctp_tcb *stcb, uint32_t seq,
+    struct sctp_tmit_chunk **bchk);
+
+void 
+sctp_reset_in_stream(struct sctp_tcb *stcb, int number_entries,
+    uint16_t * list);
+
+
+int sctp_is_there_unsent_data(struct sctp_tcb *stcb);
+
+#endif
+#endif
--- /dev/null
+++ sys/netinet/sctp_os_bsd.h
@@ -0,0 +1,465 @@
+/*-
+ * Copyright (c) 2006-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_os_bsd.h,v 1.33 2007/09/18 15:16:38 rrs Exp $");
+#ifndef __sctp_os_bsd_h__
+#define __sctp_os_bsd_h__
+/*
+ * includes
+ */
+#include "opt_ipsec.h"
+#include "opt_compat.h"
+#include "opt_inet6.h"
+#include "opt_inet.h"
+#include "opt_sctp.h"
+#include <sys/param.h>
+#include <sys/ktr.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/jail.h>
+#include <sys/sysctl.h>
+#include <sys/resourcevar.h>
+#include <sys/uio.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/kthread.h>
+#include <sys/priv.h>
+#include <sys/random.h>
+#include <sys/limits.h>
+#include <sys/queue.h>
+#include <machine/cpu.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/if_var.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/icmp_var.h>
+
+
+#ifdef IPSEC
+#include <netipsec/ipsec.h>
+#include <netipsec/key.h>
+#endif				/* IPSEC */
+
+#ifdef INET6
+#include <sys/domain.h>
+#ifdef IPSEC
+#include <netipsec/ipsec6.h>
+#endif
+#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/in6_pcb.h>
+#include <netinet/icmp6.h>
+#include <netinet6/ip6protosw.h>
+#include <netinet6/nd6.h>
+#include <netinet6/scope6_var.h>
+#endif				/* INET6 */
+
+
+#include <netinet/ip_options.h>
+
+#ifndef in6pcb
+#define in6pcb		inpcb
+#endif
+/* Declare all the malloc names for all the various mallocs */
+MALLOC_DECLARE(SCTP_M_MAP);
+MALLOC_DECLARE(SCTP_M_STRMI);
+MALLOC_DECLARE(SCTP_M_STRMO);
+MALLOC_DECLARE(SCTP_M_ASC_ADDR);
+MALLOC_DECLARE(SCTP_M_ASC_IT);
+MALLOC_DECLARE(SCTP_M_AUTH_CL);
+MALLOC_DECLARE(SCTP_M_AUTH_KY);
+MALLOC_DECLARE(SCTP_M_AUTH_HL);
+MALLOC_DECLARE(SCTP_M_AUTH_IF);
+MALLOC_DECLARE(SCTP_M_STRESET);
+MALLOC_DECLARE(SCTP_M_CMSG);
+MALLOC_DECLARE(SCTP_M_COPYAL);
+MALLOC_DECLARE(SCTP_M_VRF);
+MALLOC_DECLARE(SCTP_M_IFA);
+MALLOC_DECLARE(SCTP_M_IFN);
+MALLOC_DECLARE(SCTP_M_TIMW);
+MALLOC_DECLARE(SCTP_M_MVRF);
+MALLOC_DECLARE(SCTP_M_ITER);
+MALLOC_DECLARE(SCTP_M_SOCKOPT);
+
+#if defined(SCTP_LOCAL_TRACE_BUF)
+
+#define SCTP_GET_CYCLECOUNT get_cyclecount()
+#define SCTP_CTR6 sctp_log_trace
+
+#else
+#define SCTP_CTR6 CTR6
+#endif
+
+
+/*
+ *
+ */
+#define USER_ADDR_NULL	(NULL)	/* FIX ME: temp */
+#define SCTP_LIST_EMPTY(list)	LIST_EMPTY(list)
+
+#if defined(SCTP_DEBUG)
+#define SCTPDBG(level, params...)					\
+{									\
+    do {								\
+	if (sctp_debug_on & level ) {					\
+	    printf(params);						\
+	}								\
+    } while (0);							\
+}
+#define SCTPDBG_ADDR(level, addr)					\
+{									\
+    do {								\
+	if (sctp_debug_on & level ) {					\
+	    sctp_print_address(addr);					\
+	}								\
+    } while (0);							\
+}
+#define SCTPDBG_PKT(level, iph, sh)					\
+{									\
+    do {								\
+	    if (sctp_debug_on & level) {				\
+		    sctp_print_address_pkt(iph, sh);			\
+	    }								\
+    } while (0);							\
+}
+#else
+#define SCTPDBG(level, params...)
+#define SCTPDBG_ADDR(level, addr)
+#define SCTPDBG_PKT(level, iph, sh)
+#endif
+#define SCTP_PRINTF(params...)	printf(params)
+
+#ifdef SCTP_LTRACE_CHUNKS
+#define SCTP_LTRACE_CHK(a, b, c, d) if(sctp_logging_level & SCTP_LTRACE_CHUNK_ENABLE) CTR6(KTR_SUBSYS, "SCTP:%d[%d]:%x-%x-%x-%x", SCTP_LOG_CHUNK_PROC, 0, a, b, c, d)
+#else
+#define SCTP_LTRACE_CHK(a, b, c, d)
+#endif
+
+#ifdef SCTP_LTRACE_ERRORS
+#define SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, file, err) if(sctp_logging_level & SCTP_LTRACE_ERROR_ENABLE) \
+                                                         printf("mbuf:%p inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \
+								     m, inp, stcb, net, file, __LINE__, err);
+#define SCTP_LTRACE_ERR_RET(inp, stcb, net, file, err) if(sctp_logging_level & SCTP_LTRACE_ERROR_ENABLE) \
+                                                          printf("inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \
+								     inp, stcb, net, file, __LINE__, err);
+#else
+#define SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, file, err)
+#define SCTP_LTRACE_ERR_RET(inp, stcb, net, file, err)
+#endif
+
+
+/*
+ * Local address and interface list handling
+ */
+#define SCTP_MAX_VRF_ID		0
+#define SCTP_SIZE_OF_VRF_HASH	3
+#define SCTP_IFNAMSIZ		IFNAMSIZ
+#define SCTP_DEFAULT_VRFID	0
+#define SCTP_VRF_ADDR_HASH_SIZE	16
+#define SCTP_VRF_IFN_HASH_SIZE	3
+#define	SCTP_INIT_VRF_TABLEID(vrf)
+
+#define SCTP_IFN_IS_IFT_LOOP(ifn) ((ifn)->ifn_type == IFT_LOOP)
+
+/*
+ * Access to IFN's to help with src-addr-selection
+ */
+/* This could return VOID if the index works but for BSD we provide both. */
+#define SCTP_GET_IFN_VOID_FROM_ROUTE(ro) (void *)ro->ro_rt->rt_ifp
+#define SCTP_GET_IF_INDEX_FROM_ROUTE(ro) (ro)->ro_rt->rt_ifp->if_index
+#define SCTP_ROUTE_HAS_VALID_IFN(ro) ((ro)->ro_rt && (ro)->ro_rt->rt_ifp)
+
+/*
+ * general memory allocation
+ */
+#define SCTP_MALLOC(var, type, size, name) \
+    do { \
+	MALLOC(var, type, size, name, M_NOWAIT); \
+    } while (0)
+
+#define SCTP_FREE(var, type)	FREE(var, type)
+
+#define SCTP_MALLOC_SONAME(var, type, size) \
+    do { \
+	MALLOC(var, type, size, M_SONAME, M_WAITOK | M_ZERO); \
+    } while (0)
+
+#define SCTP_FREE_SONAME(var)	FREE(var, M_SONAME)
+
+#define SCTP_PROCESS_STRUCT struct proc *
+
+/*
+ * zone allocation functions
+ */
+#include <vm/uma.h>
+/* SCTP_ZONE_INIT: initialize the zone */
+typedef struct uma_zone *sctp_zone_t;
+
+#define UMA_ZFLAG_FULL	0x0020
+#define SCTP_ZONE_INIT(zone, name, size, number) { \
+	zone = uma_zcreate(name, size, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,\
+		UMA_ZFLAG_FULL); \
+	uma_zone_set_max(zone, number); \
+}
+
+/* SCTP_ZONE_GET: allocate element from the zone */
+#define SCTP_ZONE_GET(zone, type) \
+	(type *)uma_zalloc(zone, M_NOWAIT);
+
+/* SCTP_ZONE_FREE: free element from the zone */
+#define SCTP_ZONE_FREE(zone, element) \
+	uma_zfree(zone, element);
+#define SCTP_HASH_INIT(size, hashmark) hashinit_flags(size, M_PCB, hashmark, HASH_NOWAIT)
+#define SCTP_HASH_FREE(table, hashmark) hashdestroy(table, M_PCB, hashmark)
+
+#define SCTP_M_COPYM	m_copym
+
+/*
+ * timers
+ */
+#include <sys/callout.h>
+typedef struct callout sctp_os_timer_t;
+
+#define SCTP_OS_TIMER_INIT(tmr)	callout_init(tmr, 1)
+#define SCTP_OS_TIMER_START	callout_reset
+#define SCTP_OS_TIMER_STOP	callout_stop
+#define SCTP_OS_TIMER_STOP_DRAIN callout_drain
+#define SCTP_OS_TIMER_PENDING	callout_pending
+#define SCTP_OS_TIMER_ACTIVE	callout_active
+#define SCTP_OS_TIMER_DEACTIVATE callout_deactivate
+
+#define sctp_get_tick_count() (ticks)
+
+/* The packed define for 64 bit platforms */
+#define SCTP_PACKED __attribute__((packed))
+#define SCTP_UNUSED __attribute__((unused))
+
+/*
+ * Functions
+ */
+/* Mbuf manipulation and access macros  */
+#define SCTP_BUF_LEN(m) (m->m_len)
+#define SCTP_BUF_NEXT(m) (m->m_next)
+#define SCTP_BUF_NEXT_PKT(m) (m->m_nextpkt)
+#define SCTP_BUF_RESV_UF(m, size) m->m_data += size
+#define SCTP_BUF_AT(m, size) m->m_data + size
+#define SCTP_BUF_IS_EXTENDED(m) (m->m_flags & M_EXT)
+#define SCTP_BUF_EXTEND_SIZE(m) (m->m_ext.ext_size)
+#define SCTP_BUF_TYPE(m) (m->m_type)
+#define SCTP_BUF_RECVIF(m) (m->m_pkthdr.rcvif)
+#define SCTP_BUF_PREPEND	M_PREPEND
+
+#define SCTP_ALIGN_TO_END(m, len) if(m->m_flags & M_PKTHDR) { \
+                                     MH_ALIGN(m, len); \
+                                  } else if ((m->m_flags & M_EXT) == 0) { \
+                                     M_ALIGN(m, len); \
+                                  }
+
+/* We make it so if you have up to 4 threads
+ * writting based on the default size of
+ * the packet log 65 k, that would be
+ * 4 16k packets before we would hit
+ * a problem.
+ */
+#define SCTP_PKTLOG_WRITERS_NEED_LOCK 3
+
+/*************************/
+/*      MTU              */
+/*************************/
+#define SCTP_GATHER_MTU_FROM_IFN_INFO(ifn, ifn_index, af) ((struct ifnet *)ifn)->if_mtu
+#define SCTP_GATHER_MTU_FROM_ROUTE(sctp_ifa, sa, rt) ((rt != NULL) ? rt->rt_rmx.rmx_mtu : 0)
+#define SCTP_GATHER_MTU_FROM_INTFC(sctp_ifn) ((sctp_ifn->ifn_p != NULL) ? ((struct ifnet *)(sctp_ifn->ifn_p))->if_mtu : 0)
+#define SCTP_SET_MTU_OF_ROUTE(sa, rt, mtu) do { \
+                                              if (rt != NULL) \
+                                                 rt->rt_rmx.rmx_mtu = mtu; \
+                                           } while(0)
+
+/* (de-)register interface event notifications */
+#define SCTP_REGISTER_INTERFACE(ifhandle, af)
+#define SCTP_DEREGISTER_INTERFACE(ifhandle, af)
+
+
+/*************************/
+/* These are for logging */
+/*************************/
+/* return the base ext data pointer */
+#define SCTP_BUF_EXTEND_BASE(m) (m->m_ext.ext_buf)
+ /* return the refcnt of the data pointer */
+#define SCTP_BUF_EXTEND_REFCNT(m) (*m->m_ext.ref_cnt)
+/* return any buffer related flags, this is
+ * used beyond logging for apple only.
+ */
+#define SCTP_BUF_GET_FLAGS(m) (m->m_flags)
+
+/* For BSD this just accesses the M_PKTHDR length
+ * so it operates on an mbuf with hdr flag. Other
+ * O/S's may have seperate packet header and mbuf
+ * chain pointers.. thus the macro.
+ */
+#define SCTP_HEADER_TO_CHAIN(m) (m)
+#define SCTP_DETACH_HEADER_FROM_CHAIN(m)
+#define SCTP_HEADER_LEN(m) (m->m_pkthdr.len)
+#define SCTP_GET_HEADER_FOR_OUTPUT(o_pak) 0
+#define SCTP_RELEASE_HEADER(m)
+#define SCTP_RELEASE_PKT(m)	sctp_m_freem(m)
+
+#define SCTP_GET_PKT_VRFID(m, vrf_id)  ((vrf_id = SCTP_DEFAULT_VRFID) != SCTP_DEFAULT_VRFID)
+
+
+
+/* Attach the chain of data into the sendable packet. */
+#define SCTP_ATTACH_CHAIN(pak, m, packet_length) do { \
+                                                 pak = m; \
+                                                 pak->m_pkthdr.len = packet_length; \
+                         } while(0)
+
+/* Other m_pkthdr type things */
+#define SCTP_IS_IT_BROADCAST(dst, m) ((m->m_flags & M_PKTHDR) ? in_broadcast(dst, m->m_pkthdr.rcvif) : 0)
+#define SCTP_IS_IT_LOOPBACK(m) ((m->m_flags & M_PKTHDR) && ((m->m_pkthdr.rcvif == NULL) || (m->m_pkthdr.rcvif->if_type == IFT_LOOP)))
+
+
+/* This converts any input packet header
+ * into the chain of data holders, for BSD
+ * its a NOP.
+ */
+
+/* Macro's for getting length from V6/V4 header */
+#define SCTP_GET_IPV4_LENGTH(iph) (iph->ip_len)
+#define SCTP_GET_IPV6_LENGTH(ip6) (ntohs(ip6->ip6_plen))
+
+/* get the v6 hop limit */
+#define SCTP_GET_HLIM(inp, ro)	in6_selecthlim((struct in6pcb *)&inp->ip_inp.inp, (ro ? (ro->ro_rt ? (ro->ro_rt->rt_ifp) : (NULL)) : (NULL)));
+
+/* is the endpoint v6only? */
+#define SCTP_IPV6_V6ONLY(inp)	(((struct inpcb *)inp)->inp_flags & IN6P_IPV6_V6ONLY)
+/* is the socket non-blocking? */
+#define SCTP_SO_IS_NBIO(so)	((so)->so_state & SS_NBIO)
+#define SCTP_SET_SO_NBIO(so)	((so)->so_state |= SS_NBIO)
+#define SCTP_CLEAR_SO_NBIO(so)	((so)->so_state &= ~SS_NBIO)
+/* get the socket type */
+#define SCTP_SO_TYPE(so)	((so)->so_type)
+/* reserve sb space for a socket */
+#define SCTP_SORESERVE(so, send, recv)	soreserve(so, send, recv)
+/* wakeup a socket */
+#define SCTP_SOWAKEUP(so)	wakeup(&(so)->so_timeo)
+/* clear the socket buffer state */
+#define SCTP_SB_CLEAR(sb)	\
+	(sb).sb_cc = 0;		\
+	(sb).sb_mb = NULL;	\
+	(sb).sb_mbcnt = 0;
+
+#define SCTP_SB_LIMIT_RCV(so) so->so_rcv.sb_hiwat
+#define SCTP_SB_LIMIT_SND(so) so->so_snd.sb_hiwat
+
+/*
+ * routes, output, etc.
+ */
+typedef struct route sctp_route_t;
+typedef struct rtentry sctp_rtentry_t;
+
+#define SCTP_RTALLOC(ro, vrf_id) rtalloc_ign((struct route *)ro, 0UL)
+
+/* Future zero copy wakeup/send  function */
+#define SCTP_ZERO_COPY_EVENT(inp, so)
+/* This is re-pulse ourselves for sendbuf */
+#define SCTP_ZERO_COPY_SENDQ_EVENT(inp, so)
+
+/*
+ * IP output routines
+ */
+#define SCTP_IP_OUTPUT(result, o_pak, ro, stcb, vrf_id) \
+{ \
+	int o_flgs = 0; \
+	if (stcb && stcb->sctp_ep && stcb->sctp_ep->sctp_socket) { \
+		o_flgs = IP_RAWOUTPUT | (stcb->sctp_ep->sctp_socket->so_options & SO_DONTROUTE); \
+	} else { \
+		o_flgs = IP_RAWOUTPUT; \
+	} \
+	result = ip_output(o_pak, NULL, ro, o_flgs, 0, NULL); \
+}
+
+#define SCTP_IP6_OUTPUT(result, o_pak, ro, ifp, stcb, vrf_id) \
+{ \
+ 	if (stcb && stcb->sctp_ep) \
+		result = ip6_output(o_pak, \
+				    ((struct in6pcb *)(stcb->sctp_ep))->in6p_outputopts, \
+				    (ro), 0, 0, ifp, NULL); \
+	else \
+		result = ip6_output(o_pak, NULL, (ro), 0, 0, ifp, NULL); \
+}
+
+struct mbuf *
+sctp_get_mbuf_for_msg(unsigned int space_needed,
+    int want_header, int how, int allonebuf, int type);
+
+
+/*
+ * SCTP AUTH
+ */
+#define HAVE_SHA2
+
+#define SCTP_READ_RANDOM(buf, len)	read_random(buf, len)
+
+#ifdef USE_SCTP_SHA1
+#include <netinet/sctp_sha1.h>
+#else
+#include <crypto/sha1.h>
+/* map standard crypto API names */
+#define SHA1_Init	SHA1Init
+#define SHA1_Update	SHA1Update
+#define SHA1_Final(x,y)	SHA1Final((caddr_t)x, y)
+#endif
+
+#if defined(HAVE_SHA2)
+#include <crypto/sha2/sha2.h>
+#endif
+
+#include <sys/md5.h>
+/* map standard crypto API names */
+#define MD5_Init	MD5Init
+#define MD5_Update	MD5Update
+#define MD5_Final	MD5Final
+
+#endif
--- /dev/null
+++ sys/netinet/sctp_usrreq.c
@@ -0,0 +1,4444 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_usrreq.c,v 1.48 2005/03/07 23:26:08 itojun Exp $	 */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_usrreq.c,v 1.48.2.3 2007/12/09 20:23:47 rrs Exp $");
+#include <netinet/sctp_os.h>
+#include <sys/proc.h>
+#include <netinet/sctp_pcb.h>
+#include <netinet/sctp_header.h>
+#include <netinet/sctp_var.h>
+#if defined(INET6)
+#include <netinet6/sctp6_var.h>
+#endif
+#include <netinet/sctp_sysctl.h>
+#include <netinet/sctp_output.h>
+#include <netinet/sctp_uio.h>
+#include <netinet/sctp_asconf.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_indata.h>
+#include <netinet/sctp_timer.h>
+#include <netinet/sctp_auth.h>
+#include <netinet/sctp_bsd_addr.h>
+#include <netinet/sctp_cc_functions.h>
+
+
+
+
+void
+sctp_init(void)
+{
+	/* Init the SCTP pcb in sctp_pcb.c */
+	u_long sb_max_adj;
+
+	sctp_pcb_init();
+
+
+	if ((nmbclusters / 8) > SCTP_ASOC_MAX_CHUNKS_ON_QUEUE)
+		sctp_max_chunks_on_queue = (nmbclusters / 8);
+	/*
+	 * Allow a user to take no more than 1/2 the number of clusters or
+	 * the SB_MAX whichever is smaller for the send window.
+	 */
+	sb_max_adj = (u_long)((u_quad_t) (SB_MAX) * MCLBYTES / (MSIZE + MCLBYTES));
+	sctp_sendspace = min(sb_max_adj,
+	    (((uint32_t) nmbclusters / 2) * SCTP_DEFAULT_MAXSEGMENT));
+	/*
+	 * Now for the recv window, should we take the same amount? or
+	 * should I do 1/2 the SB_MAX instead in the SB_MAX min above. For
+	 * now I will just copy.
+	 */
+	sctp_recvspace = sctp_sendspace;
+
+}
+
+
+
+/*
+ * cleanup of the sctppcbinfo structure.
+ * Assumes that the sctppcbinfo lock is held.
+ */
+void
+sctp_pcbinfo_cleanup(void)
+{
+	/* free the hash tables */
+	if (sctppcbinfo.sctp_asochash != NULL)
+		SCTP_HASH_FREE(sctppcbinfo.sctp_asochash, sctppcbinfo.hashasocmark);
+	if (sctppcbinfo.sctp_ephash != NULL)
+		SCTP_HASH_FREE(sctppcbinfo.sctp_ephash, sctppcbinfo.hashmark);
+	if (sctppcbinfo.sctp_tcpephash != NULL)
+		SCTP_HASH_FREE(sctppcbinfo.sctp_tcpephash, sctppcbinfo.hashtcpmark);
+	if (sctppcbinfo.sctp_restarthash != NULL)
+		SCTP_HASH_FREE(sctppcbinfo.sctp_restarthash, sctppcbinfo.hashrestartmark);
+}
+
+
+static void
+sctp_pathmtu_adjustment(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_nets *net,
+    uint16_t nxtsz)
+{
+	struct sctp_tmit_chunk *chk;
+
+	/* Adjust that too */
+	stcb->asoc.smallest_mtu = nxtsz;
+	/* now off to subtract IP_DF flag if needed */
+#ifdef SCTP_PRINT_FOR_B_AND_M
+	SCTP_PRINTF("sctp_pathmtu_adjust called inp:%p stcb:%p net:%p nxtsz:%d\n",
+	    inp, stcb, net, nxtsz);
+#endif
+	TAILQ_FOREACH(chk, &stcb->asoc.send_queue, sctp_next) {
+		if ((chk->send_size + IP_HDR_SIZE) > nxtsz) {
+			chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
+		}
+	}
+	TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
+		if ((chk->send_size + IP_HDR_SIZE) > nxtsz) {
+			/*
+			 * For this guy we also mark for immediate resend
+			 * since we sent to big of chunk
+			 */
+			chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
+			if (chk->sent != SCTP_DATAGRAM_RESEND) {
+				sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+			}
+			chk->sent = SCTP_DATAGRAM_RESEND;
+			chk->rec.data.doing_fast_retransmit = 0;
+			if (sctp_logging_level & SCTP_FLIGHT_LOGGING_ENABLE) {
+				sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_PMTU,
+				    chk->whoTo->flight_size,
+				    chk->book_size,
+				    (uintptr_t) chk->whoTo,
+				    chk->rec.data.TSN_seq);
+			}
+			/* Clear any time so NO RTT is being done */
+			chk->do_rtt = 0;
+			sctp_flight_size_decrease(chk);
+			sctp_total_flight_decrease(stcb, chk);
+		}
+	}
+}
+
+static void
+sctp_notify_mbuf(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_nets *net,
+    struct ip *ip,
+    struct sctphdr *sh)
+{
+	struct icmp *icmph;
+	int totsz, tmr_stopped = 0;
+	uint16_t nxtsz;
+
+	/* protection */
+	if ((inp == NULL) || (stcb == NULL) || (net == NULL) ||
+	    (ip == NULL) || (sh == NULL)) {
+		if (stcb != NULL) {
+			SCTP_TCB_UNLOCK(stcb);
+		}
+		return;
+	}
+	/* First job is to verify the vtag matches what I would send */
+	if (ntohl(sh->v_tag) != (stcb->asoc.peer_vtag)) {
+		SCTP_TCB_UNLOCK(stcb);
+		return;
+	}
+	icmph = (struct icmp *)((caddr_t)ip - (sizeof(struct icmp) -
+	    sizeof(struct ip)));
+	if (icmph->icmp_type != ICMP_UNREACH) {
+		/* We only care about unreachable */
+		SCTP_TCB_UNLOCK(stcb);
+		return;
+	}
+	if (icmph->icmp_code != ICMP_UNREACH_NEEDFRAG) {
+		/* not a unreachable message due to frag. */
+		SCTP_TCB_UNLOCK(stcb);
+		return;
+	}
+	totsz = ip->ip_len;
+
+	nxtsz = ntohs(icmph->icmp_nextmtu);
+	if (nxtsz == 0) {
+		/*
+		 * old type router that does not tell us what the next size
+		 * mtu is. Rats we will have to guess (in a educated fashion
+		 * of course)
+		 */
+		nxtsz = find_next_best_mtu(totsz);
+	}
+	/* Stop any PMTU timer */
+	if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
+		tmr_stopped = 1;
+		sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net,
+		    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_1);
+	}
+	/* Adjust destination size limit */
+	if (net->mtu > nxtsz) {
+		net->mtu = nxtsz;
+	}
+	/* now what about the ep? */
+	if (stcb->asoc.smallest_mtu > nxtsz) {
+#ifdef SCTP_PRINT_FOR_B_AND_M
+		SCTP_PRINTF("notify_mbuf (ICMP) calls sctp_pathmtu_adjust mtu:%d\n",
+		    nxtsz);
+#endif
+		sctp_pathmtu_adjustment(inp, stcb, net, nxtsz);
+	}
+	if (tmr_stopped)
+		sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net);
+
+	SCTP_TCB_UNLOCK(stcb);
+}
+
+
+void
+sctp_notify(struct sctp_inpcb *inp,
+    struct ip *ip,
+    struct sctphdr *sh,
+    struct sockaddr *to,
+    struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	struct socket *so;
+
+#endif
+	/* protection */
+	int reason;
+	struct icmp *icmph;
+
+
+	if ((inp == NULL) || (stcb == NULL) || (net == NULL) ||
+	    (sh == NULL) || (to == NULL)) {
+		if (stcb)
+			SCTP_TCB_UNLOCK(stcb);
+		return;
+	}
+	/* First job is to verify the vtag matches what I would send */
+	if (ntohl(sh->v_tag) != (stcb->asoc.peer_vtag)) {
+		SCTP_TCB_UNLOCK(stcb);
+		return;
+	}
+	icmph = (struct icmp *)((caddr_t)ip - (sizeof(struct icmp) -
+	    sizeof(struct ip)));
+	if (icmph->icmp_type != ICMP_UNREACH) {
+		/* We only care about unreachable */
+		SCTP_TCB_UNLOCK(stcb);
+		return;
+	}
+	if ((icmph->icmp_code == ICMP_UNREACH_NET) ||
+	    (icmph->icmp_code == ICMP_UNREACH_HOST) ||
+	    (icmph->icmp_code == ICMP_UNREACH_NET_UNKNOWN) ||
+	    (icmph->icmp_code == ICMP_UNREACH_HOST_UNKNOWN) ||
+	    (icmph->icmp_code == ICMP_UNREACH_ISOLATED) ||
+	    (icmph->icmp_code == ICMP_UNREACH_NET_PROHIB) ||
+	    (icmph->icmp_code == ICMP_UNREACH_HOST_PROHIB) ||
+	    (icmph->icmp_code == ICMP_UNREACH_FILTER_PROHIB)) {
+
+		/*
+		 * Hmm reachablity problems we must examine closely. If its
+		 * not reachable, we may have lost a network. Or if there is
+		 * NO protocol at the other end named SCTP. well we consider
+		 * it a OOTB abort.
+		 */
+		if (net->dest_state & SCTP_ADDR_REACHABLE) {
+			/* Ok that destination is NOT reachable */
+			SCTP_PRINTF("ICMP (thresh %d/%d) takes interface %p down\n",
+			    net->error_count,
+			    net->failure_threshold,
+			    net);
+
+			net->dest_state &= ~SCTP_ADDR_REACHABLE;
+			net->dest_state |= SCTP_ADDR_NOT_REACHABLE;
+			/*
+			 * JRS 5/14/07 - If a destination is unreachable,
+			 * the PF bit is turned off.  This allows an
+			 * unambiguous use of the PF bit for destinations
+			 * that are reachable but potentially failed. If the
+			 * destination is set to the unreachable state, also
+			 * set the destination to the PF state.
+			 */
+			/*
+			 * Add debug message here if destination is not in
+			 * PF state.
+			 */
+			/* Stop any running T3 timers here? */
+			if (sctp_cmt_on_off && sctp_cmt_pf) {
+				net->dest_state &= ~SCTP_ADDR_PF;
+				SCTPDBG(SCTP_DEBUG_TIMER4, "Destination %p moved from PF to unreachable.\n",
+				    net);
+			}
+			net->error_count = net->failure_threshold + 1;
+			sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN,
+			    stcb, SCTP_FAILED_THRESHOLD,
+			    (void *)net, SCTP_SO_NOT_LOCKED);
+		}
+		SCTP_TCB_UNLOCK(stcb);
+	} else if ((icmph->icmp_code == ICMP_UNREACH_PROTOCOL) ||
+	    (icmph->icmp_code == ICMP_UNREACH_PORT)) {
+		/*
+		 * Here the peer is either playing tricks on us, including
+		 * an address that belongs to someone who does not support
+		 * SCTP OR was a userland implementation that shutdown and
+		 * now is dead. In either case treat it like a OOTB abort
+		 * with no TCB
+		 */
+		reason = SCTP_PEER_FAULTY;
+		sctp_abort_notification(stcb, reason, SCTP_SO_NOT_LOCKED);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		so = SCTP_INP_SO(inp);
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_SOCKET_LOCK(so, 1);
+		SCTP_TCB_LOCK(stcb);
+		atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_2);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_SOCKET_UNLOCK(so, 1);
+		/* SCTP_TCB_UNLOCK(stcb); MT: I think this is not needed. */
+#endif
+		/* no need to unlock here, since the TCB is gone */
+	} else {
+		SCTP_TCB_UNLOCK(stcb);
+	}
+}
+
+void
+sctp_ctlinput(cmd, sa, vip)
+	int cmd;
+	struct sockaddr *sa;
+	void *vip;
+{
+	struct ip *ip = vip;
+	struct sctphdr *sh;
+	uint32_t vrf_id;
+
+	/* FIX, for non-bsd is this right? */
+	vrf_id = SCTP_DEFAULT_VRFID;
+	if (sa->sa_family != AF_INET ||
+	    ((struct sockaddr_in *)sa)->sin_addr.s_addr == INADDR_ANY) {
+		return;
+	}
+	if (PRC_IS_REDIRECT(cmd)) {
+		ip = 0;
+	} else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) {
+		return;
+	}
+	if (ip) {
+		struct sctp_inpcb *inp = NULL;
+		struct sctp_tcb *stcb = NULL;
+		struct sctp_nets *net = NULL;
+		struct sockaddr_in to, from;
+
+		sh = (struct sctphdr *)((caddr_t)ip + (ip->ip_hl << 2));
+		bzero(&to, sizeof(to));
+		bzero(&from, sizeof(from));
+		from.sin_family = to.sin_family = AF_INET;
+		from.sin_len = to.sin_len = sizeof(to);
+		from.sin_port = sh->src_port;
+		from.sin_addr = ip->ip_src;
+		to.sin_port = sh->dest_port;
+		to.sin_addr = ip->ip_dst;
+
+		/*
+		 * 'to' holds the dest of the packet that failed to be sent.
+		 * 'from' holds our local endpoint address. Thus we reverse
+		 * the to and the from in the lookup.
+		 */
+		stcb = sctp_findassociation_addr_sa((struct sockaddr *)&from,
+		    (struct sockaddr *)&to,
+		    &inp, &net, 1, vrf_id);
+		if (stcb != NULL && inp && (inp->sctp_socket != NULL)) {
+			if (cmd != PRC_MSGSIZE) {
+				sctp_notify(inp, ip, sh,
+				    (struct sockaddr *)&to, stcb,
+				    net);
+			} else {
+				/* handle possible ICMP size messages */
+				sctp_notify_mbuf(inp, stcb, net, ip, sh);
+			}
+		} else {
+			if ((stcb == NULL) && (inp != NULL)) {
+				/* reduce ref-count */
+				SCTP_INP_WLOCK(inp);
+				SCTP_INP_DECR_REF(inp);
+				SCTP_INP_WUNLOCK(inp);
+			}
+		}
+	}
+	return;
+}
+
+static int
+sctp_getcred(SYSCTL_HANDLER_ARGS)
+{
+	struct xucred xuc;
+	struct sockaddr_in addrs[2];
+	struct sctp_inpcb *inp;
+	struct sctp_nets *net;
+	struct sctp_tcb *stcb;
+	int error;
+	uint32_t vrf_id;
+
+	/* FIX, for non-bsd is this right? */
+	vrf_id = SCTP_DEFAULT_VRFID;
+
+	error = priv_check(req->td, PRIV_NETINET_GETCRED);
+
+	if (error)
+		return (error);
+
+	error = SYSCTL_IN(req, addrs, sizeof(addrs));
+	if (error)
+		return (error);
+
+	stcb = sctp_findassociation_addr_sa(sintosa(&addrs[0]),
+	    sintosa(&addrs[1]),
+	    &inp, &net, 1, vrf_id);
+	if (stcb == NULL || inp == NULL || inp->sctp_socket == NULL) {
+		if ((inp != NULL) && (stcb == NULL)) {
+			/* reduce ref-count */
+			SCTP_INP_WLOCK(inp);
+			SCTP_INP_DECR_REF(inp);
+			goto cred_can_cont;
+		}
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
+		error = ENOENT;
+		goto out;
+	}
+	SCTP_TCB_UNLOCK(stcb);
+	/*
+	 * We use the write lock here, only since in the error leg we need
+	 * it. If we used RLOCK, then we would have to
+	 * wlock/decr/unlock/rlock. Which in theory could create a hole.
+	 * Better to use higher wlock.
+	 */
+	SCTP_INP_WLOCK(inp);
+cred_can_cont:
+	error = cr_canseesocket(req->td->td_ucred, inp->sctp_socket);
+	if (error) {
+		SCTP_INP_WUNLOCK(inp);
+		goto out;
+	}
+	cru2x(inp->sctp_socket->so_cred, &xuc);
+	SCTP_INP_WUNLOCK(inp);
+	error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
+out:
+	return (error);
+}
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, getcred, CTLTYPE_OPAQUE | CTLFLAG_RW,
+    0, 0, sctp_getcred, "S,ucred", "Get the ucred of a SCTP connection");
+
+
+static void
+sctp_abort(struct socket *so)
+{
+	struct sctp_inpcb *inp;
+	uint32_t flags;
+
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp == 0) {
+		return;
+	}
+sctp_must_try_again:
+	flags = inp->sctp_flags;
+#ifdef SCTP_LOG_CLOSING
+	sctp_log_closing(inp, NULL, 17);
+#endif
+	if (((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) &&
+	    (atomic_cmpset_int(&inp->sctp_flags, flags, (flags | SCTP_PCB_FLAGS_SOCKET_GONE | SCTP_PCB_FLAGS_CLOSE_IP)))) {
+#ifdef SCTP_LOG_CLOSING
+		sctp_log_closing(inp, NULL, 16);
+#endif
+		sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
+		    SCTP_CALLED_AFTER_CMPSET_OFCLOSE);
+		SOCK_LOCK(so);
+		SCTP_SB_CLEAR(so->so_snd);
+		/*
+		 * same for the rcv ones, they are only here for the
+		 * accounting/select.
+		 */
+		SCTP_SB_CLEAR(so->so_rcv);
+
+		/* Now null out the reference, we are completely detached. */
+		so->so_pcb = NULL;
+		SOCK_UNLOCK(so);
+	} else {
+		flags = inp->sctp_flags;
+		if ((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) {
+			goto sctp_must_try_again;
+		}
+	}
+	return;
+}
+
+static int
+sctp_attach(struct socket *so, int proto, struct thread *p)
+{
+	struct sctp_inpcb *inp;
+	struct inpcb *ip_inp;
+	int error;
+	uint32_t vrf_id = SCTP_DEFAULT_VRFID;
+
+#ifdef IPSEC
+	uint32_t flags;
+
+#endif
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp != 0) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return EINVAL;
+	}
+	error = SCTP_SORESERVE(so, sctp_sendspace, sctp_recvspace);
+	if (error) {
+		return error;
+	}
+	error = sctp_inpcb_alloc(so, vrf_id);
+	if (error) {
+		return error;
+	}
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	SCTP_INP_WLOCK(inp);
+	inp->sctp_flags &= ~SCTP_PCB_FLAGS_BOUND_V6;	/* I'm not v6! */
+	ip_inp = &inp->ip_inp.inp;
+	ip_inp->inp_vflag |= INP_IPV4;
+	ip_inp->inp_ip_ttl = ip_defttl;
+#ifdef IPSEC
+	error = ipsec_init_policy(so, &ip_inp->inp_sp);
+#ifdef SCTP_LOG_CLOSING
+	sctp_log_closing(inp, NULL, 17);
+#endif
+	if (error != 0) {
+		flags = inp->sctp_flags;
+		if (((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) &&
+		    (atomic_cmpset_int(&inp->sctp_flags, flags, (flags | SCTP_PCB_FLAGS_SOCKET_GONE | SCTP_PCB_FLAGS_CLOSE_IP)))) {
+#ifdef SCTP_LOG_CLOSING
+			sctp_log_closing(inp, NULL, 15);
+#endif
+			SCTP_INP_WUNLOCK(inp);
+			sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
+			    SCTP_CALLED_AFTER_CMPSET_OFCLOSE);
+		} else {
+			SCTP_INP_WUNLOCK(inp);
+		}
+		return error;
+	}
+#endif				/* IPSEC */
+	SCTP_INP_WUNLOCK(inp);
+	return 0;
+}
+
+static int
+sctp_bind(struct socket *so, struct sockaddr *addr, struct thread *p)
+{
+	struct sctp_inpcb *inp = NULL;
+	int error;
+
+#ifdef INET6
+	if (addr && addr->sa_family != AF_INET) {
+		/* must be a v4 address! */
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return EINVAL;
+	}
+#endif				/* INET6 */
+	if (addr && (addr->sa_len != sizeof(struct sockaddr_in))) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return EINVAL;
+	}
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp == 0) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return EINVAL;
+	}
+	error = sctp_inpcb_bind(so, addr, NULL, p);
+	return error;
+}
+
+void
+sctp_close(struct socket *so)
+{
+	struct sctp_inpcb *inp;
+	uint32_t flags;
+
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp == 0)
+		return;
+
+	/*
+	 * Inform all the lower layer assoc that we are done.
+	 */
+sctp_must_try_again:
+	flags = inp->sctp_flags;
+#ifdef SCTP_LOG_CLOSING
+	sctp_log_closing(inp, NULL, 17);
+#endif
+	if (((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) &&
+	    (atomic_cmpset_int(&inp->sctp_flags, flags, (flags | SCTP_PCB_FLAGS_SOCKET_GONE | SCTP_PCB_FLAGS_CLOSE_IP)))) {
+		if (((so->so_options & SO_LINGER) && (so->so_linger == 0)) ||
+		    (so->so_rcv.sb_cc > 0)) {
+#ifdef SCTP_LOG_CLOSING
+			sctp_log_closing(inp, NULL, 13);
+#endif
+			sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
+			    SCTP_CALLED_AFTER_CMPSET_OFCLOSE);
+		} else {
+#ifdef SCTP_LOG_CLOSING
+			sctp_log_closing(inp, NULL, 14);
+#endif
+			sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_GRACEFUL_CLOSE,
+			    SCTP_CALLED_AFTER_CMPSET_OFCLOSE);
+		}
+		/*
+		 * The socket is now detached, no matter what the state of
+		 * the SCTP association.
+		 */
+		SOCK_LOCK(so);
+		SCTP_SB_CLEAR(so->so_snd);
+		/*
+		 * same for the rcv ones, they are only here for the
+		 * accounting/select.
+		 */
+		SCTP_SB_CLEAR(so->so_rcv);
+
+		/* Now null out the reference, we are completely detached. */
+		so->so_pcb = NULL;
+		SOCK_UNLOCK(so);
+	} else {
+		flags = inp->sctp_flags;
+		if ((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) {
+			goto sctp_must_try_again;
+		}
+	}
+	return;
+}
+
+
+int
+sctp_sendm(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
+    struct mbuf *control, struct thread *p);
+
+
+int
+sctp_sendm(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
+    struct mbuf *control, struct thread *p)
+{
+	struct sctp_inpcb *inp;
+	int error;
+
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp == 0) {
+		if (control) {
+			sctp_m_freem(control);
+			control = NULL;
+		}
+		SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		sctp_m_freem(m);
+		return EINVAL;
+	}
+	/* Got to have an to address if we are NOT a connected socket */
+	if ((addr == NULL) &&
+	    ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) ||
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE))
+	    ) {
+		goto connected_type;
+	} else if (addr == NULL) {
+		SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EDESTADDRREQ);
+		error = EDESTADDRREQ;
+		sctp_m_freem(m);
+		if (control) {
+			sctp_m_freem(control);
+			control = NULL;
+		}
+		return (error);
+	}
+#ifdef INET6
+	if (addr->sa_family != AF_INET) {
+		/* must be a v4 address! */
+		SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EDESTADDRREQ);
+		sctp_m_freem(m);
+		if (control) {
+			sctp_m_freem(control);
+			control = NULL;
+		}
+		error = EDESTADDRREQ;
+		return EDESTADDRREQ;
+	}
+#endif				/* INET6 */
+connected_type:
+	/* now what about control */
+	if (control) {
+		if (inp->control) {
+			SCTP_PRINTF("huh? control set?\n");
+			sctp_m_freem(inp->control);
+			inp->control = NULL;
+		}
+		inp->control = control;
+	}
+	/* Place the data */
+	if (inp->pkt) {
+		SCTP_BUF_NEXT(inp->pkt_last) = m;
+		inp->pkt_last = m;
+	} else {
+		inp->pkt_last = inp->pkt = m;
+	}
+	if (
+	/* FreeBSD uses a flag passed */
+	    ((flags & PRUS_MORETOCOME) == 0)
+	    ) {
+		/*
+		 * note with the current version this code will only be used
+		 * by OpenBSD-- NetBSD, FreeBSD, and MacOS have methods for
+		 * re-defining sosend to use the sctp_sosend. One can
+		 * optionally switch back to this code (by changing back the
+		 * definitions) but this is not advisable. This code is used
+		 * by FreeBSD when sending a file with sendfile() though.
+		 */
+		int ret;
+
+		ret = sctp_output(inp, inp->pkt, addr, inp->control, p, flags);
+		inp->pkt = NULL;
+		inp->control = NULL;
+		return (ret);
+	} else {
+		return (0);
+	}
+}
+
+int
+sctp_disconnect(struct socket *so)
+{
+	struct sctp_inpcb *inp;
+
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp == NULL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN);
+		return (ENOTCONN);
+	}
+	SCTP_INP_RLOCK(inp);
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+		if (SCTP_LIST_EMPTY(&inp->sctp_asoc_list)) {
+			/* No connection */
+			SCTP_INP_RUNLOCK(inp);
+			return (0);
+		} else {
+			struct sctp_association *asoc;
+			struct sctp_tcb *stcb;
+
+			stcb = LIST_FIRST(&inp->sctp_asoc_list);
+			if (stcb == NULL) {
+				SCTP_INP_RUNLOCK(inp);
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				return (EINVAL);
+			}
+			SCTP_TCB_LOCK(stcb);
+			asoc = &stcb->asoc;
+			if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+				/* We are about to be freed, out of here */
+				SCTP_TCB_UNLOCK(stcb);
+				SCTP_INP_RUNLOCK(inp);
+				return (0);
+			}
+			if (((so->so_options & SO_LINGER) &&
+			    (so->so_linger == 0)) ||
+			    (so->so_rcv.sb_cc > 0)) {
+				if (SCTP_GET_STATE(asoc) !=
+				    SCTP_STATE_COOKIE_WAIT) {
+					/* Left with Data unread */
+					struct mbuf *err;
+
+					err = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_DONTWAIT, 1, MT_DATA);
+					if (err) {
+						/*
+						 * Fill in the user
+						 * initiated abort
+						 */
+						struct sctp_paramhdr *ph;
+
+						ph = mtod(err, struct sctp_paramhdr *);
+						SCTP_BUF_LEN(err) = sizeof(struct sctp_paramhdr);
+						ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
+						ph->param_length = htons(SCTP_BUF_LEN(err));
+					}
+#if defined(SCTP_PANIC_ON_ABORT)
+					panic("disconnect does an abort");
+#endif
+					sctp_send_abort_tcb(stcb, err, SCTP_SO_LOCKED);
+					SCTP_STAT_INCR_COUNTER32(sctps_aborted);
+				}
+				SCTP_INP_RUNLOCK(inp);
+				if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
+				    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+					SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+				}
+				(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_3);
+				/* No unlock tcb assoc is gone */
+				return (0);
+			}
+			if (TAILQ_EMPTY(&asoc->send_queue) &&
+			    TAILQ_EMPTY(&asoc->sent_queue) &&
+			    (asoc->stream_queue_cnt == 0)) {
+				/* there is nothing queued to send, so done */
+				if (asoc->locked_on_sending) {
+					goto abort_anyway;
+				}
+				if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
+				    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+					/* only send SHUTDOWN 1st time thru */
+					sctp_stop_timers_for_shutdown(stcb);
+					sctp_send_shutdown(stcb,
+					    stcb->asoc.primary_destination);
+					sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_LOCKED);
+					if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+					    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+						SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+					}
+					SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+					SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
+					    stcb->sctp_ep, stcb,
+					    asoc->primary_destination);
+					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+					    stcb->sctp_ep, stcb,
+					    asoc->primary_destination);
+				}
+			} else {
+				/*
+				 * we still got (or just got) data to send,
+				 * so set SHUTDOWN_PENDING
+				 */
+				/*
+				 * XXX sockets draft says that SCTP_EOF
+				 * should be sent with no data. currently,
+				 * we will allow user data to be sent first
+				 * and move to SHUTDOWN-PENDING
+				 */
+				asoc->state |= SCTP_STATE_SHUTDOWN_PENDING;
+				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
+				    asoc->primary_destination);
+				if (asoc->locked_on_sending) {
+					/* Locked to send out the data */
+					struct sctp_stream_queue_pending *sp;
+
+					sp = TAILQ_LAST(&asoc->locked_on_sending->outqueue, sctp_streamhead);
+					if (sp == NULL) {
+						SCTP_PRINTF("Error, sp is NULL, locked on sending is non-null strm:%d\n",
+						    asoc->locked_on_sending->stream_no);
+					} else {
+						if ((sp->length == 0) && (sp->msg_is_complete == 0))
+							asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+					}
+				}
+				if (TAILQ_EMPTY(&asoc->send_queue) &&
+				    TAILQ_EMPTY(&asoc->sent_queue) &&
+				    (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
+					struct mbuf *op_err;
+
+			abort_anyway:
+					op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (op_err) {
+						/*
+						 * Fill in the user
+						 * initiated abort
+						 */
+						struct sctp_paramhdr *ph;
+						uint32_t *ippp;
+
+						SCTP_BUF_LEN(op_err) =
+						    (sizeof(struct sctp_paramhdr) + sizeof(uint32_t));
+						ph = mtod(op_err,
+						    struct sctp_paramhdr *);
+						ph->param_type = htons(
+						    SCTP_CAUSE_USER_INITIATED_ABT);
+						ph->param_length = htons(SCTP_BUF_LEN(op_err));
+						ippp = (uint32_t *) (ph + 1);
+						*ippp = htonl(SCTP_FROM_SCTP_USRREQ + SCTP_LOC_4);
+					}
+#if defined(SCTP_PANIC_ON_ABORT)
+					panic("disconnect does an abort");
+#endif
+
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_USRREQ + SCTP_LOC_4;
+					sctp_send_abort_tcb(stcb, op_err, SCTP_SO_LOCKED);
+					SCTP_STAT_INCR_COUNTER32(sctps_aborted);
+					if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
+					    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+						SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+					}
+					SCTP_INP_RUNLOCK(inp);
+					(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_5);
+					return (0);
+				} else {
+					sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CLOSING, SCTP_SO_LOCKED);
+				}
+			}
+			SCTP_TCB_UNLOCK(stcb);
+			SCTP_INP_RUNLOCK(inp);
+			return (0);
+		}
+		/* not reached */
+	} else {
+		/* UDP model does not support this */
+		SCTP_INP_RUNLOCK(inp);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
+		return EOPNOTSUPP;
+	}
+}
+
+int
+sctp_shutdown(struct socket *so)
+{
+	struct sctp_inpcb *inp;
+
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp == 0) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return EINVAL;
+	}
+	SCTP_INP_RLOCK(inp);
+	/* For UDP model this is a invalid call */
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) {
+		/* Restore the flags that the soshutdown took away. */
+		so->so_rcv.sb_state &= ~SBS_CANTRCVMORE;
+		/* This proc will wakeup for read and do nothing (I hope) */
+		SCTP_INP_RUNLOCK(inp);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
+		return (EOPNOTSUPP);
+	}
+	/*
+	 * Ok if we reach here its the TCP model and it is either a SHUT_WR
+	 * or SHUT_RDWR. This means we put the shutdown flag against it.
+	 */
+	{
+		struct sctp_tcb *stcb;
+		struct sctp_association *asoc;
+
+		socantsendmore(so);
+
+		stcb = LIST_FIRST(&inp->sctp_asoc_list);
+		if (stcb == NULL) {
+			/*
+			 * Ok we hit the case that the shutdown call was
+			 * made after an abort or something. Nothing to do
+			 * now.
+			 */
+			SCTP_INP_RUNLOCK(inp);
+			return (0);
+		}
+		SCTP_TCB_LOCK(stcb);
+		asoc = &stcb->asoc;
+		if (TAILQ_EMPTY(&asoc->send_queue) &&
+		    TAILQ_EMPTY(&asoc->sent_queue) &&
+		    (asoc->stream_queue_cnt == 0)) {
+			if (asoc->locked_on_sending) {
+				goto abort_anyway;
+			}
+			/* there is nothing queued to send, so I'm done... */
+			if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) {
+				/* only send SHUTDOWN the first time through */
+				sctp_stop_timers_for_shutdown(stcb);
+				sctp_send_shutdown(stcb,
+				    stcb->asoc.primary_destination);
+				sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_LOCKED);
+				if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+				    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+					SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+				}
+				SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+				SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
+				    stcb->sctp_ep, stcb,
+				    asoc->primary_destination);
+				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+				    stcb->sctp_ep, stcb,
+				    asoc->primary_destination);
+			}
+		} else {
+			/*
+			 * we still got (or just got) data to send, so set
+			 * SHUTDOWN_PENDING
+			 */
+			asoc->state |= SCTP_STATE_SHUTDOWN_PENDING;
+			sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
+			    asoc->primary_destination);
+
+			if (asoc->locked_on_sending) {
+				/* Locked to send out the data */
+				struct sctp_stream_queue_pending *sp;
+
+				sp = TAILQ_LAST(&asoc->locked_on_sending->outqueue, sctp_streamhead);
+				if (sp == NULL) {
+					SCTP_PRINTF("Error, sp is NULL, locked on sending is non-null strm:%d\n",
+					    asoc->locked_on_sending->stream_no);
+				} else {
+					if ((sp->length == 0) && (sp->msg_is_complete == 0)) {
+						asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+					}
+				}
+			}
+			if (TAILQ_EMPTY(&asoc->send_queue) &&
+			    TAILQ_EMPTY(&asoc->sent_queue) &&
+			    (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
+				struct mbuf *op_err;
+
+		abort_anyway:
+				op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+				    0, M_DONTWAIT, 1, MT_DATA);
+				if (op_err) {
+					/* Fill in the user initiated abort */
+					struct sctp_paramhdr *ph;
+					uint32_t *ippp;
+
+					SCTP_BUF_LEN(op_err) =
+					    sizeof(struct sctp_paramhdr) + sizeof(uint32_t);
+					ph = mtod(op_err,
+					    struct sctp_paramhdr *);
+					ph->param_type = htons(
+					    SCTP_CAUSE_USER_INITIATED_ABT);
+					ph->param_length = htons(SCTP_BUF_LEN(op_err));
+					ippp = (uint32_t *) (ph + 1);
+					*ippp = htonl(SCTP_FROM_SCTP_USRREQ + SCTP_LOC_6);
+				}
+#if defined(SCTP_PANIC_ON_ABORT)
+				panic("shutdown does an abort");
+#endif
+				stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_USRREQ + SCTP_LOC_6;
+				sctp_abort_an_association(stcb->sctp_ep, stcb,
+				    SCTP_RESPONSE_TO_USER_REQ,
+				    op_err, SCTP_SO_LOCKED);
+				goto skip_unlock;
+			} else {
+				sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CLOSING, SCTP_SO_LOCKED);
+			}
+		}
+		SCTP_TCB_UNLOCK(stcb);
+	}
+skip_unlock:
+	SCTP_INP_RUNLOCK(inp);
+	return 0;
+}
+
+/*
+ * copies a "user" presentable address and removes embedded scope, etc.
+ * returns 0 on success, 1 on error
+ */
+static uint32_t
+sctp_fill_user_address(struct sockaddr_storage *ss, struct sockaddr *sa)
+{
+	struct sockaddr_in6 lsa6;
+
+	sa = (struct sockaddr *)sctp_recover_scope((struct sockaddr_in6 *)sa,
+	    &lsa6);
+	memcpy(ss, sa, sa->sa_len);
+	return (0);
+}
+
+
+
+/*
+ * NOTE: assumes addr lock is held
+ */
+static size_t
+sctp_fill_up_addresses_vrf(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    size_t limit,
+    struct sockaddr_storage *sas,
+    uint32_t vrf_id)
+{
+	struct sctp_ifn *sctp_ifn;
+	struct sctp_ifa *sctp_ifa;
+	int loopback_scope, ipv4_local_scope, local_scope, site_scope;
+	size_t actual;
+	int ipv4_addr_legal, ipv6_addr_legal;
+	struct sctp_vrf *vrf;
+
+	actual = 0;
+	if (limit <= 0)
+		return (actual);
+
+	if (stcb) {
+		/* Turn on all the appropriate scope */
+		loopback_scope = stcb->asoc.loopback_scope;
+		ipv4_local_scope = stcb->asoc.ipv4_local_scope;
+		local_scope = stcb->asoc.local_scope;
+		site_scope = stcb->asoc.site_scope;
+	} else {
+		/* Turn on ALL scope, since we look at the EP */
+		loopback_scope = ipv4_local_scope = local_scope =
+		    site_scope = 1;
+	}
+	ipv4_addr_legal = ipv6_addr_legal = 0;
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+		ipv6_addr_legal = 1;
+		if (SCTP_IPV6_V6ONLY(inp) == 0) {
+			ipv4_addr_legal = 1;
+		}
+	} else {
+		ipv4_addr_legal = 1;
+	}
+	vrf = sctp_find_vrf(vrf_id);
+	if (vrf == NULL) {
+		return (0);
+	}
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+			if ((loopback_scope == 0) &&
+			    SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
+				/* Skip loopback if loopback_scope not set */
+				continue;
+			}
+			LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+				if (stcb) {
+					/*
+					 * For the BOUND-ALL case, the list
+					 * associated with a TCB is Always
+					 * considered a reverse list.. i.e.
+					 * it lists addresses that are NOT
+					 * part of the association. If this
+					 * is one of those we must skip it.
+					 */
+					if (sctp_is_addr_restricted(stcb,
+					    sctp_ifa)) {
+						continue;
+					}
+				}
+				if ((sctp_ifa->address.sa.sa_family == AF_INET) &&
+				    (ipv4_addr_legal)) {
+					struct sockaddr_in *sin;
+
+					sin = (struct sockaddr_in *)&sctp_ifa->address.sa;
+					if (sin->sin_addr.s_addr == 0) {
+						/*
+						 * we skip unspecifed
+						 * addresses
+						 */
+						continue;
+					}
+					if ((ipv4_local_scope == 0) &&
+					    (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) {
+						continue;
+					}
+					if (inp->sctp_flags & SCTP_PCB_FLAGS_NEEDS_MAPPED_V4) {
+						in6_sin_2_v4mapsin6(sin, (struct sockaddr_in6 *)sas);
+						((struct sockaddr_in6 *)sas)->sin6_port = inp->sctp_lport;
+						sas = (struct sockaddr_storage *)((caddr_t)sas + sizeof(struct sockaddr_in6));
+						actual += sizeof(struct sockaddr_in6);
+					} else {
+						memcpy(sas, sin, sizeof(*sin));
+						((struct sockaddr_in *)sas)->sin_port = inp->sctp_lport;
+						sas = (struct sockaddr_storage *)((caddr_t)sas + sizeof(*sin));
+						actual += sizeof(*sin);
+					}
+					if (actual >= limit) {
+						return (actual);
+					}
+				} else if ((sctp_ifa->address.sa.sa_family == AF_INET6) &&
+				    (ipv6_addr_legal)) {
+					struct sockaddr_in6 *sin6;
+
+					sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sa;
+					if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+						/*
+						 * we skip unspecifed
+						 * addresses
+						 */
+						continue;
+					}
+					if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+						if (local_scope == 0)
+							continue;
+						if (sin6->sin6_scope_id == 0) {
+							if (sa6_recoverscope(sin6) != 0)
+								/*
+								 * bad link
+								 * local
+								 * address
+								 */
+								continue;
+						}
+					}
+					if ((site_scope == 0) &&
+					    (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) {
+						continue;
+					}
+					memcpy(sas, sin6, sizeof(*sin6));
+					((struct sockaddr_in6 *)sas)->sin6_port = inp->sctp_lport;
+					sas = (struct sockaddr_storage *)((caddr_t)sas + sizeof(*sin6));
+					actual += sizeof(*sin6);
+					if (actual >= limit) {
+						return (actual);
+					}
+				}
+			}
+		}
+	} else {
+		struct sctp_laddr *laddr;
+
+		LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+			if (stcb) {
+				if (sctp_is_addr_restricted(stcb, laddr->ifa)) {
+					continue;
+				}
+			}
+			if (sctp_fill_user_address(sas, &laddr->ifa->address.sa))
+				continue;
+
+			((struct sockaddr_in6 *)sas)->sin6_port = inp->sctp_lport;
+			sas = (struct sockaddr_storage *)((caddr_t)sas +
+			    laddr->ifa->address.sa.sa_len);
+			actual += laddr->ifa->address.sa.sa_len;
+			if (actual >= limit) {
+				return (actual);
+			}
+		}
+	}
+	return (actual);
+}
+
+static size_t
+sctp_fill_up_addresses(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    size_t limit,
+    struct sockaddr_storage *sas)
+{
+	size_t size = 0;
+
+	SCTP_IPI_ADDR_RLOCK();
+	/* fill up addresses for the endpoint's default vrf */
+	size = sctp_fill_up_addresses_vrf(inp, stcb, limit, sas,
+	    inp->def_vrf_id);
+	SCTP_IPI_ADDR_RUNLOCK();
+	return (size);
+}
+
+/*
+ * NOTE: assumes addr lock is held
+ */
+static int
+sctp_count_max_addresses_vrf(struct sctp_inpcb *inp, uint32_t vrf_id)
+{
+	int cnt = 0;
+	struct sctp_vrf *vrf = NULL;
+
+	/*
+	 * In both sub-set bound an bound_all cases we return the MAXIMUM
+	 * number of addresses that you COULD get. In reality the sub-set
+	 * bound may have an exclusion list for a given TCB OR in the
+	 * bound-all case a TCB may NOT include the loopback or other
+	 * addresses as well.
+	 */
+	vrf = sctp_find_vrf(vrf_id);
+	if (vrf == NULL) {
+		return (0);
+	}
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		struct sctp_ifn *sctp_ifn;
+		struct sctp_ifa *sctp_ifa;
+
+		LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+			LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+				/* Count them if they are the right type */
+				if (sctp_ifa->address.sa.sa_family == AF_INET) {
+					if (inp->sctp_flags & SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)
+						cnt += sizeof(struct sockaddr_in6);
+					else
+						cnt += sizeof(struct sockaddr_in);
+
+				} else if (sctp_ifa->address.sa.sa_family == AF_INET6)
+					cnt += sizeof(struct sockaddr_in6);
+			}
+		}
+	} else {
+		struct sctp_laddr *laddr;
+
+		LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+			if (laddr->ifa->address.sa.sa_family == AF_INET) {
+				if (inp->sctp_flags & SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)
+					cnt += sizeof(struct sockaddr_in6);
+				else
+					cnt += sizeof(struct sockaddr_in);
+
+			} else if (laddr->ifa->address.sa.sa_family == AF_INET6)
+				cnt += sizeof(struct sockaddr_in6);
+		}
+	}
+	return (cnt);
+}
+
+static int
+sctp_count_max_addresses(struct sctp_inpcb *inp)
+{
+	int cnt = 0;
+
+	SCTP_IPI_ADDR_RLOCK();
+	/* count addresses for the endpoint's default VRF */
+	cnt = sctp_count_max_addresses_vrf(inp, inp->def_vrf_id);
+	SCTP_IPI_ADDR_RUNLOCK();
+	return (cnt);
+}
+
+static int
+sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval,
+    size_t optsize, void *p, int delay)
+{
+	int error = 0;
+	int creat_lock_on = 0;
+	struct sctp_tcb *stcb = NULL;
+	struct sockaddr *sa;
+	int num_v6 = 0, num_v4 = 0, *totaddrp, totaddr;
+	int added = 0;
+	uint32_t vrf_id;
+	int bad_addresses = 0;
+	sctp_assoc_t *a_id;
+
+	SCTPDBG(SCTP_DEBUG_PCB1, "Connectx called\n");
+
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED)) {
+		/* We are already connected AND the TCP model */
+		SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EADDRINUSE);
+		return (EADDRINUSE);
+	}
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) {
+		SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return (EINVAL);
+	}
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) {
+		SCTP_INP_RLOCK(inp);
+		stcb = LIST_FIRST(&inp->sctp_asoc_list);
+		SCTP_INP_RUNLOCK(inp);
+	}
+	if (stcb) {
+		SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
+		return (EALREADY);
+	}
+	SCTP_INP_INCR_REF(inp);
+	SCTP_ASOC_CREATE_LOCK(inp);
+	creat_lock_on = 1;
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) {
+		SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EFAULT);
+		error = EFAULT;
+		goto out_now;
+	}
+	totaddrp = (int *)optval;
+	totaddr = *totaddrp;
+	sa = (struct sockaddr *)(totaddrp + 1);
+	stcb = sctp_connectx_helper_find(inp, sa, &totaddr, &num_v4, &num_v6, &error, (optsize - sizeof(int)), &bad_addresses);
+	if ((stcb != NULL) || bad_addresses) {
+		/* Already have or am bring up an association */
+		SCTP_ASOC_CREATE_UNLOCK(inp);
+		creat_lock_on = 0;
+		if (stcb)
+			SCTP_TCB_UNLOCK(stcb);
+		if (bad_addresses == 0) {
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
+			error = EALREADY;
+		}
+		goto out_now;
+	}
+#ifdef INET6
+	if (((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) &&
+	    (num_v6 > 0)) {
+		error = EINVAL;
+		goto out_now;
+	}
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+	    (num_v4 > 0)) {
+		struct in6pcb *inp6;
+
+		inp6 = (struct in6pcb *)inp;
+		if (SCTP_IPV6_V6ONLY(inp6)) {
+			/*
+			 * if IPV6_V6ONLY flag, ignore connections destined
+			 * to a v4 addr or v4-mapped addr
+			 */
+			SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+			error = EINVAL;
+			goto out_now;
+		}
+	}
+#endif				/* INET6 */
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) ==
+	    SCTP_PCB_FLAGS_UNBOUND) {
+		/* Bind a ephemeral port */
+		error = sctp_inpcb_bind(so, NULL, NULL, p);
+		if (error) {
+			goto out_now;
+		}
+	}
+	/* FIX ME: do we want to pass in a vrf on the connect call? */
+	vrf_id = inp->def_vrf_id;
+
+	/* We are GOOD to go */
+	stcb = sctp_aloc_assoc(inp, sa, 1, &error, 0, vrf_id,
+	    (struct thread *)p
+	    );
+	if (stcb == NULL) {
+		/* Gak! no memory */
+		goto out_now;
+	}
+	SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_COOKIE_WAIT);
+	/* move to second address */
+	if (sa->sa_family == AF_INET)
+		sa = (struct sockaddr *)((caddr_t)sa + sizeof(struct sockaddr_in));
+	else
+		sa = (struct sockaddr *)((caddr_t)sa + sizeof(struct sockaddr_in6));
+
+	error = 0;
+	added = sctp_connectx_helper_add(stcb, sa, (totaddr - 1), &error);
+	/* Fill in the return id */
+	if (error) {
+		(void)sctp_free_assoc(inp, stcb, SCTP_PCBFREE_FORCE, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_12);
+		goto out_now;
+	}
+	a_id = (sctp_assoc_t *) optval;
+	*a_id = sctp_get_associd(stcb);
+
+	/* initialize authentication parameters for the assoc */
+	sctp_initialize_auth_params(inp, stcb);
+
+	if (delay) {
+		/* doing delayed connection */
+		stcb->asoc.delayed_connection = 1;
+		sctp_timer_start(SCTP_TIMER_TYPE_INIT, inp, stcb, stcb->asoc.primary_destination);
+	} else {
+		(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
+		sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
+	}
+	SCTP_TCB_UNLOCK(stcb);
+	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) {
+		stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED;
+		/* Set the connected flag so we can queue data */
+		soisconnecting(so);
+	}
+out_now:
+	if (creat_lock_on) {
+		SCTP_ASOC_CREATE_UNLOCK(inp);
+	}
+	SCTP_INP_DECR_REF(inp);
+	return error;
+}
+
+#define SCTP_FIND_STCB(inp, stcb, assoc_id) { \
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||\
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { \
+		SCTP_INP_RLOCK(inp); \
+		stcb = LIST_FIRST(&inp->sctp_asoc_list); \
+		if (stcb) { \
+			SCTP_TCB_LOCK(stcb); \
+                } \
+		SCTP_INP_RUNLOCK(inp); \
+	} else if (assoc_id != 0) { \
+		stcb = sctp_findassociation_ep_asocid(inp, assoc_id, 1); \
+		if (stcb == NULL) { \
+		        SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT); \
+			error = ENOENT; \
+			break; \
+		} \
+	} else { \
+		stcb = NULL; \
+        } \
+  }
+
+
+#define SCTP_CHECK_AND_CAST(destp, srcp, type, size)  {\
+	if (size < sizeof(type)) { \
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); \
+		error = EINVAL; \
+		break; \
+	} else { \
+		destp = (type *)srcp; \
+	} \
+      }
+
+static int
+sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize,
+    void *p)
+{
+	struct sctp_inpcb *inp = NULL;
+	int error, val = 0;
+	struct sctp_tcb *stcb = NULL;
+
+	if (optval == NULL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return (EINVAL);
+	}
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp == 0) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return EINVAL;
+	}
+	error = 0;
+
+	switch (optname) {
+	case SCTP_NODELAY:
+	case SCTP_AUTOCLOSE:
+	case SCTP_EXPLICIT_EOR:
+	case SCTP_AUTO_ASCONF:
+	case SCTP_DISABLE_FRAGMENTS:
+	case SCTP_I_WANT_MAPPED_V4_ADDR:
+	case SCTP_USE_EXT_RCVINFO:
+		SCTP_INP_RLOCK(inp);
+		switch (optname) {
+		case SCTP_DISABLE_FRAGMENTS:
+			val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NO_FRAGMENT);
+			break;
+		case SCTP_I_WANT_MAPPED_V4_ADDR:
+			val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4);
+			break;
+		case SCTP_AUTO_ASCONF:
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+				/* only valid for bound all sockets */
+				val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTO_ASCONF);
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				error = EINVAL;
+				goto flags_out;
+			}
+			break;
+		case SCTP_EXPLICIT_EOR:
+			val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR);
+			break;
+		case SCTP_NODELAY:
+			val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NODELAY);
+			break;
+		case SCTP_USE_EXT_RCVINFO:
+			val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO);
+			break;
+		case SCTP_AUTOCLOSE:
+			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE))
+				val = TICKS_TO_SEC(inp->sctp_ep.auto_close_time);
+			else
+				val = 0;
+			break;
+
+		default:
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
+			error = ENOPROTOOPT;
+		}		/* end switch (sopt->sopt_name) */
+		if (optname != SCTP_AUTOCLOSE) {
+			/* make it an "on/off" value */
+			val = (val != 0);
+		}
+		if (*optsize < sizeof(val)) {
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+			error = EINVAL;
+		}
+flags_out:
+		SCTP_INP_RUNLOCK(inp);
+		if (error == 0) {
+			/* return the option value */
+			*(int *)optval = val;
+			*optsize = sizeof(val);
+		}
+		break;
+	case SCTP_GET_PACKET_LOG:
+		{
+#ifdef  SCTP_PACKET_LOGGING
+			uint8_t *target;
+			int ret;
+
+			SCTP_CHECK_AND_CAST(target, optval, uint8_t, *optsize);
+			ret = sctp_copy_out_packet_log(target, (int)*optsize);
+			*optsize = ret;
+#else
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
+			error = EOPNOTSUPP;
+#endif
+			break;
+		}
+	case SCTP_PARTIAL_DELIVERY_POINT:
+		{
+			uint32_t *value;
+
+			SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
+			*value = inp->partial_delivery_point;
+			*optsize = sizeof(uint32_t);
+		}
+		break;
+	case SCTP_FRAGMENT_INTERLEAVE:
+		{
+			uint32_t *value;
+
+			SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
+			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE)) {
+				if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS)) {
+					*value = SCTP_FRAG_LEVEL_2;
+				} else {
+					*value = SCTP_FRAG_LEVEL_1;
+				}
+			} else {
+				*value = SCTP_FRAG_LEVEL_0;
+			}
+			*optsize = sizeof(uint32_t);
+		}
+		break;
+	case SCTP_CMT_ON_OFF:
+		{
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+			if (sctp_cmt_on_off) {
+				SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+				if (stcb) {
+					av->assoc_value = stcb->asoc.sctp_cmt_on_off;
+					SCTP_TCB_UNLOCK(stcb);
+
+				} else {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN);
+					error = ENOTCONN;
+				}
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
+				error = ENOPROTOOPT;
+			}
+			*optsize = sizeof(*av);
+		}
+		break;
+		/* JRS - Get socket option for pluggable congestion control */
+	case SCTP_PLUGGABLE_CC:
+		{
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+			if (stcb) {
+				av->assoc_value = stcb->asoc.congestion_control_module;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				av->assoc_value = inp->sctp_ep.sctp_default_cc_module;
+			}
+			*optsize = sizeof(*av);
+		}
+		break;
+	case SCTP_GET_ADDR_LEN:
+		{
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+			error = EINVAL;
+#ifdef INET
+			if (av->assoc_value == AF_INET) {
+				av->assoc_value = sizeof(struct sockaddr_in);
+				error = 0;
+			}
+#endif
+#ifdef INET6
+			if (av->assoc_value == AF_INET6) {
+				av->assoc_value = sizeof(struct sockaddr_in6);
+				error = 0;
+			}
+#endif
+			if (error) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+			}
+			*optsize = sizeof(*av);
+		}
+		break;
+	case SCTP_GET_ASSOC_NUMBER:
+		{
+			uint32_t *value, cnt;
+
+			SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
+			cnt = 0;
+			SCTP_INP_RLOCK(inp);
+			LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+				cnt++;
+			}
+			SCTP_INP_RUNLOCK(inp);
+			*value = cnt;
+			*optsize = sizeof(uint32_t);
+		}
+		break;
+
+	case SCTP_GET_ASSOC_ID_LIST:
+		{
+			struct sctp_assoc_ids *ids;
+			unsigned int at, limit;
+
+			SCTP_CHECK_AND_CAST(ids, optval, struct sctp_assoc_ids, *optsize);
+			at = 0;
+			limit = *optsize / sizeof(sctp_assoc_t);
+			SCTP_INP_RLOCK(inp);
+			LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+				if (at < limit) {
+					ids->gaids_assoc_id[at++] = sctp_get_associd(stcb);
+				} else {
+					error = EINVAL;
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+					break;
+				}
+			}
+			SCTP_INP_RUNLOCK(inp);
+			*optsize = at * sizeof(sctp_assoc_t);
+		}
+		break;
+	case SCTP_CONTEXT:
+		{
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+			if (stcb) {
+				av->assoc_value = stcb->asoc.context;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_RLOCK(inp);
+				av->assoc_value = inp->sctp_context;
+				SCTP_INP_RUNLOCK(inp);
+			}
+			*optsize = sizeof(*av);
+		}
+		break;
+	case SCTP_VRF_ID:
+		{
+			uint32_t *default_vrfid;
+
+			SCTP_CHECK_AND_CAST(default_vrfid, optval, uint32_t, *optsize);
+			*default_vrfid = inp->def_vrf_id;
+			break;
+		}
+	case SCTP_GET_ASOC_VRF:
+		{
+			struct sctp_assoc_value *id;
+
+			SCTP_CHECK_AND_CAST(id, optval, struct sctp_assoc_value, *optsize);
+			SCTP_FIND_STCB(inp, stcb, id->assoc_id);
+			if (stcb == NULL) {
+				error = EINVAL;
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+				break;
+			}
+			id->assoc_value = stcb->asoc.vrf_id;
+			break;
+		}
+	case SCTP_GET_VRF_IDS:
+		{
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
+			error = EOPNOTSUPP;
+			break;
+		}
+	case SCTP_GET_NONCE_VALUES:
+		{
+			struct sctp_get_nonce_values *gnv;
+
+			SCTP_CHECK_AND_CAST(gnv, optval, struct sctp_get_nonce_values, *optsize);
+			SCTP_FIND_STCB(inp, stcb, gnv->gn_assoc_id);
+
+			if (stcb) {
+				gnv->gn_peers_tag = stcb->asoc.peer_vtag;
+				gnv->gn_local_tag = stcb->asoc.my_vtag;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN);
+				error = ENOTCONN;
+			}
+			*optsize = sizeof(*gnv);
+		}
+		break;
+	case SCTP_DELAYED_SACK:
+		{
+			struct sctp_sack_info *sack;
+
+			SCTP_CHECK_AND_CAST(sack, optval, struct sctp_sack_info, *optsize);
+			SCTP_FIND_STCB(inp, stcb, sack->sack_assoc_id);
+			if (stcb) {
+				sack->sack_delay = stcb->asoc.delayed_ack;
+				sack->sack_freq = stcb->asoc.sack_freq;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_RLOCK(inp);
+				sack->sack_delay = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV]);
+				sack->sack_freq = inp->sctp_ep.sctp_sack_freq;
+				SCTP_INP_RUNLOCK(inp);
+			}
+			*optsize = sizeof(*sack);
+		}
+		break;
+
+	case SCTP_GET_SNDBUF_USE:
+		{
+			struct sctp_sockstat *ss;
+
+			SCTP_CHECK_AND_CAST(ss, optval, struct sctp_sockstat, *optsize);
+			SCTP_FIND_STCB(inp, stcb, ss->ss_assoc_id);
+
+			if (stcb) {
+				ss->ss_total_sndbuf = stcb->asoc.total_output_queue_size;
+				ss->ss_total_recv_buf = (stcb->asoc.size_on_reasm_queue +
+				    stcb->asoc.size_on_all_streams);
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN);
+				error = ENOTCONN;
+			}
+			*optsize = sizeof(struct sctp_sockstat);
+		}
+		break;
+	case SCTP_MAX_BURST:
+		{
+			uint8_t *value;
+
+			SCTP_CHECK_AND_CAST(value, optval, uint8_t, *optsize);
+
+			SCTP_INP_RLOCK(inp);
+			*value = inp->sctp_ep.max_burst;
+			SCTP_INP_RUNLOCK(inp);
+			*optsize = sizeof(uint8_t);
+		}
+		break;
+	case SCTP_MAXSEG:
+		{
+			struct sctp_assoc_value *av;
+			int ovh;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+			if (stcb) {
+				av->assoc_value = sctp_get_frag_point(stcb, &stcb->asoc);
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_RLOCK(inp);
+				if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+					ovh = SCTP_MED_OVERHEAD;
+				} else {
+					ovh = SCTP_MED_V4_OVERHEAD;
+				}
+				if (inp->sctp_frag_point >= SCTP_DEFAULT_MAXSEGMENT)
+					av->assoc_value = 0;
+				else
+					av->assoc_value = inp->sctp_frag_point - ovh;
+				SCTP_INP_RUNLOCK(inp);
+			}
+			*optsize = sizeof(struct sctp_assoc_value);
+		}
+		break;
+	case SCTP_GET_STAT_LOG:
+		error = sctp_fill_stat_log(optval, optsize);
+		break;
+	case SCTP_EVENTS:
+		{
+			struct sctp_event_subscribe *events;
+
+			SCTP_CHECK_AND_CAST(events, optval, struct sctp_event_subscribe, *optsize);
+			memset(events, 0, sizeof(*events));
+			SCTP_INP_RLOCK(inp);
+			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT))
+				events->sctp_data_io_event = 1;
+
+			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVASSOCEVNT))
+				events->sctp_association_event = 1;
+
+			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVPADDREVNT))
+				events->sctp_address_event = 1;
+
+			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVSENDFAILEVNT))
+				events->sctp_send_failure_event = 1;
+
+			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVPEERERR))
+				events->sctp_peer_error_event = 1;
+
+			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT))
+				events->sctp_shutdown_event = 1;
+
+			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PDAPIEVNT))
+				events->sctp_partial_delivery_event = 1;
+
+			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_ADAPTATIONEVNT))
+				events->sctp_adaptation_layer_event = 1;
+
+			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTHEVNT))
+				events->sctp_authentication_event = 1;
+
+			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_STREAM_RESETEVNT))
+				events->sctp_stream_reset_events = 1;
+			SCTP_INP_RUNLOCK(inp);
+			*optsize = sizeof(struct sctp_event_subscribe);
+		}
+		break;
+
+	case SCTP_ADAPTATION_LAYER:
+		{
+			uint32_t *value;
+
+			SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
+
+			SCTP_INP_RLOCK(inp);
+			*value = inp->sctp_ep.adaptation_layer_indicator;
+			SCTP_INP_RUNLOCK(inp);
+			*optsize = sizeof(uint32_t);
+		}
+		break;
+	case SCTP_SET_INITIAL_DBG_SEQ:
+		{
+			uint32_t *value;
+
+			SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
+			SCTP_INP_RLOCK(inp);
+			*value = inp->sctp_ep.initial_sequence_debug;
+			SCTP_INP_RUNLOCK(inp);
+			*optsize = sizeof(uint32_t);
+		}
+		break;
+	case SCTP_GET_LOCAL_ADDR_SIZE:
+		{
+			uint32_t *value;
+
+			SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
+			SCTP_INP_RLOCK(inp);
+			*value = sctp_count_max_addresses(inp);
+			SCTP_INP_RUNLOCK(inp);
+			*optsize = sizeof(uint32_t);
+		}
+		break;
+	case SCTP_GET_REMOTE_ADDR_SIZE:
+		{
+			uint32_t *value;
+			size_t size;
+			struct sctp_nets *net;
+
+			SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
+			/* FIXME MT: change to sctp_assoc_value? */
+			SCTP_FIND_STCB(inp, stcb, (sctp_assoc_t) * value);
+
+			if (stcb) {
+				size = 0;
+				/* Count the sizes */
+				TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+					if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_NEEDS_MAPPED_V4) ||
+					    (((struct sockaddr *)&net->ro._l_addr)->sa_family == AF_INET6)) {
+						size += sizeof(struct sockaddr_in6);
+					} else if (((struct sockaddr *)&net->ro._l_addr)->sa_family == AF_INET) {
+						size += sizeof(struct sockaddr_in);
+					} else {
+						/* huh */
+						break;
+					}
+				}
+				SCTP_TCB_UNLOCK(stcb);
+				*value = (uint32_t) size;
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN);
+				error = ENOTCONN;
+			}
+			*optsize = sizeof(uint32_t);
+		}
+		break;
+	case SCTP_GET_PEER_ADDRESSES:
+		/*
+		 * Get the address information, an array is passed in to
+		 * fill up we pack it.
+		 */
+		{
+			size_t cpsz, left;
+			struct sockaddr_storage *sas;
+			struct sctp_nets *net;
+			struct sctp_getaddresses *saddr;
+
+			SCTP_CHECK_AND_CAST(saddr, optval, struct sctp_getaddresses, *optsize);
+			SCTP_FIND_STCB(inp, stcb, saddr->sget_assoc_id);
+
+			if (stcb) {
+				left = (*optsize) - sizeof(struct sctp_getaddresses);
+				*optsize = sizeof(struct sctp_getaddresses);
+				sas = (struct sockaddr_storage *)&saddr->addr[0];
+
+				TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+					if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_NEEDS_MAPPED_V4) ||
+					    (((struct sockaddr *)&net->ro._l_addr)->sa_family == AF_INET6)) {
+						cpsz = sizeof(struct sockaddr_in6);
+					} else if (((struct sockaddr *)&net->ro._l_addr)->sa_family == AF_INET) {
+						cpsz = sizeof(struct sockaddr_in);
+					} else {
+						/* huh */
+						break;
+					}
+					if (left < cpsz) {
+						/* not enough room. */
+						break;
+					}
+					if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_NEEDS_MAPPED_V4) &&
+					    (((struct sockaddr *)&net->ro._l_addr)->sa_family == AF_INET)) {
+						/* Must map the address */
+						in6_sin_2_v4mapsin6((struct sockaddr_in *)&net->ro._l_addr,
+						    (struct sockaddr_in6 *)sas);
+					} else {
+						memcpy(sas, &net->ro._l_addr, cpsz);
+					}
+					((struct sockaddr_in *)sas)->sin_port = stcb->rport;
+
+					sas = (struct sockaddr_storage *)((caddr_t)sas + cpsz);
+					left -= cpsz;
+					*optsize += cpsz;
+				}
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
+				error = ENOENT;
+			}
+		}
+		break;
+	case SCTP_GET_LOCAL_ADDRESSES:
+		{
+			size_t limit, actual;
+			struct sockaddr_storage *sas;
+			struct sctp_getaddresses *saddr;
+
+			SCTP_CHECK_AND_CAST(saddr, optval, struct sctp_getaddresses, *optsize);
+			SCTP_FIND_STCB(inp, stcb, saddr->sget_assoc_id);
+
+			sas = (struct sockaddr_storage *)&saddr->addr[0];
+			limit = *optsize - sizeof(sctp_assoc_t);
+			actual = sctp_fill_up_addresses(inp, stcb, limit, sas);
+			if (stcb) {
+				SCTP_TCB_UNLOCK(stcb);
+			}
+			*optsize = sizeof(struct sockaddr_storage) + actual;
+		}
+		break;
+	case SCTP_PEER_ADDR_PARAMS:
+		{
+			struct sctp_paddrparams *paddrp;
+			struct sctp_nets *net;
+
+			SCTP_CHECK_AND_CAST(paddrp, optval, struct sctp_paddrparams, *optsize);
+			SCTP_FIND_STCB(inp, stcb, paddrp->spp_assoc_id);
+
+			net = NULL;
+			if (stcb) {
+				net = sctp_findnet(stcb, (struct sockaddr *)&paddrp->spp_address);
+			} else {
+				/*
+				 * We increment here since
+				 * sctp_findassociation_ep_addr() wil do a
+				 * decrement if it finds the stcb as long as
+				 * the locked tcb (last argument) is NOT a
+				 * TCB.. aka NULL.
+				 */
+				SCTP_INP_INCR_REF(inp);
+				stcb = sctp_findassociation_ep_addr(&inp, (struct sockaddr *)&paddrp->spp_address, &net, NULL, NULL);
+				if (stcb == NULL) {
+					SCTP_INP_DECR_REF(inp);
+				}
+			}
+			if (stcb && (net == NULL)) {
+				struct sockaddr *sa;
+
+				sa = (struct sockaddr *)&paddrp->spp_address;
+				if (sa->sa_family == AF_INET) {
+					struct sockaddr_in *sin;
+
+					sin = (struct sockaddr_in *)sa;
+					if (sin->sin_addr.s_addr) {
+						error = EINVAL;
+						SCTP_TCB_UNLOCK(stcb);
+						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+						break;
+					}
+				} else if (sa->sa_family == AF_INET6) {
+					struct sockaddr_in6 *sin6;
+
+					sin6 = (struct sockaddr_in6 *)sa;
+					if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+						error = EINVAL;
+						SCTP_TCB_UNLOCK(stcb);
+						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+						break;
+					}
+				} else {
+					error = EAFNOSUPPORT;
+					SCTP_TCB_UNLOCK(stcb);
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+					break;
+				}
+			}
+			if (stcb) {
+				/* Applys to the specific association */
+				paddrp->spp_flags = 0;
+				if (net) {
+					int ovh;
+
+					if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+						ovh = SCTP_MED_OVERHEAD;
+					} else {
+						ovh = SCTP_MED_V4_OVERHEAD;
+					}
+
+
+					paddrp->spp_pathmaxrxt = net->failure_threshold;
+					paddrp->spp_pathmtu = net->mtu - ovh;
+					/* get flags for HB */
+					if (net->dest_state & SCTP_ADDR_NOHB)
+						paddrp->spp_flags |= SPP_HB_DISABLE;
+					else
+						paddrp->spp_flags |= SPP_HB_ENABLE;
+					/* get flags for PMTU */
+					if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
+						paddrp->spp_flags |= SPP_PMTUD_ENABLE;
+					} else {
+						paddrp->spp_flags |= SPP_PMTUD_DISABLE;
+					}
+#ifdef INET
+					if (net->ro._l_addr.sin.sin_family == AF_INET) {
+						paddrp->spp_ipv4_tos = net->tos_flowlabel & 0x000000fc;
+						paddrp->spp_flags |= SPP_IPV4_TOS;
+					}
+#endif
+#ifdef INET6
+					if (net->ro._l_addr.sin6.sin6_family == AF_INET6) {
+						paddrp->spp_ipv6_flowlabel = net->tos_flowlabel;
+						paddrp->spp_flags |= SPP_IPV6_FLOWLABEL;
+					}
+#endif
+				} else {
+					/*
+					 * No destination so return default
+					 * value
+					 */
+					int cnt = 0;
+
+					paddrp->spp_pathmaxrxt = stcb->asoc.def_net_failure;
+					paddrp->spp_pathmtu = sctp_get_frag_point(stcb, &stcb->asoc);
+#ifdef INET
+					paddrp->spp_ipv4_tos = stcb->asoc.default_tos & 0x000000fc;
+					paddrp->spp_flags |= SPP_IPV4_TOS;
+#endif
+#ifdef INET6
+					paddrp->spp_ipv6_flowlabel = stcb->asoc.default_flowlabel;
+					paddrp->spp_flags |= SPP_IPV6_FLOWLABEL;
+#endif
+					/* default settings should be these */
+					if (stcb->asoc.hb_is_disabled == 0) {
+						paddrp->spp_flags |= SPP_HB_ENABLE;
+					} else {
+						paddrp->spp_flags |= SPP_HB_DISABLE;
+					}
+					TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+						if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
+							cnt++;
+						}
+					}
+					if (cnt) {
+						paddrp->spp_flags |= SPP_PMTUD_ENABLE;
+					}
+				}
+				paddrp->spp_hbinterval = stcb->asoc.heart_beat_delay;
+				paddrp->spp_assoc_id = sctp_get_associd(stcb);
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				/* Use endpoint defaults */
+				SCTP_INP_RLOCK(inp);
+				paddrp->spp_pathmaxrxt = inp->sctp_ep.def_net_failure;
+				paddrp->spp_hbinterval = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT]);
+				paddrp->spp_assoc_id = (sctp_assoc_t) 0;
+				/* get inp's default */
+#ifdef INET
+				paddrp->spp_ipv4_tos = inp->ip_inp.inp.inp_ip_tos;
+				paddrp->spp_flags |= SPP_IPV4_TOS;
+#endif
+#ifdef INET6
+				if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+					paddrp->spp_ipv6_flowlabel = ((struct in6pcb *)inp)->in6p_flowinfo;
+					paddrp->spp_flags |= SPP_IPV6_FLOWLABEL;
+				}
+#endif
+				/* can't return this */
+				paddrp->spp_pathmtu = 0;
+
+				/* default behavior, no stcb */
+				paddrp->spp_flags = SPP_PMTUD_ENABLE;
+
+				if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT)) {
+					paddrp->spp_flags |= SPP_HB_ENABLE;
+				} else {
+					paddrp->spp_flags |= SPP_HB_DISABLE;
+				}
+				SCTP_INP_RUNLOCK(inp);
+			}
+			*optsize = sizeof(struct sctp_paddrparams);
+		}
+		break;
+	case SCTP_GET_PEER_ADDR_INFO:
+		{
+			struct sctp_paddrinfo *paddri;
+			struct sctp_nets *net;
+
+			SCTP_CHECK_AND_CAST(paddri, optval, struct sctp_paddrinfo, *optsize);
+			SCTP_FIND_STCB(inp, stcb, paddri->spinfo_assoc_id);
+
+			net = NULL;
+			if (stcb) {
+				net = sctp_findnet(stcb, (struct sockaddr *)&paddri->spinfo_address);
+			} else {
+				/*
+				 * We increment here since
+				 * sctp_findassociation_ep_addr() wil do a
+				 * decrement if it finds the stcb as long as
+				 * the locked tcb (last argument) is NOT a
+				 * TCB.. aka NULL.
+				 */
+				SCTP_INP_INCR_REF(inp);
+				stcb = sctp_findassociation_ep_addr(&inp, (struct sockaddr *)&paddri->spinfo_address, &net, NULL, NULL);
+				if (stcb == NULL) {
+					SCTP_INP_DECR_REF(inp);
+				}
+			}
+
+			if ((stcb) && (net)) {
+				paddri->spinfo_state = net->dest_state & (SCTP_REACHABLE_MASK | SCTP_ADDR_NOHB);
+				paddri->spinfo_cwnd = net->cwnd;
+				paddri->spinfo_srtt = ((net->lastsa >> 2) + net->lastsv) >> 1;
+				paddri->spinfo_rto = net->RTO;
+				paddri->spinfo_assoc_id = sctp_get_associd(stcb);
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				if (stcb) {
+					SCTP_TCB_UNLOCK(stcb);
+				}
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
+				error = ENOENT;
+			}
+			*optsize = sizeof(struct sctp_paddrinfo);
+		}
+		break;
+	case SCTP_PCB_STATUS:
+		{
+			struct sctp_pcbinfo *spcb;
+
+			SCTP_CHECK_AND_CAST(spcb, optval, struct sctp_pcbinfo, *optsize);
+			sctp_fill_pcbinfo(spcb);
+			*optsize = sizeof(struct sctp_pcbinfo);
+		}
+		break;
+
+	case SCTP_STATUS:
+		{
+			struct sctp_nets *net;
+			struct sctp_status *sstat;
+
+			SCTP_CHECK_AND_CAST(sstat, optval, struct sctp_status, *optsize);
+			SCTP_FIND_STCB(inp, stcb, sstat->sstat_assoc_id);
+
+			if (stcb == NULL) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+				error = EINVAL;
+				break;
+			}
+			/*
+			 * I think passing the state is fine since
+			 * sctp_constants.h will be available to the user
+			 * land.
+			 */
+			sstat->sstat_state = stcb->asoc.state;
+			sstat->sstat_assoc_id = sctp_get_associd(stcb);
+			sstat->sstat_rwnd = stcb->asoc.peers_rwnd;
+			sstat->sstat_unackdata = stcb->asoc.sent_queue_cnt;
+			/*
+			 * We can't include chunks that have been passed to
+			 * the socket layer. Only things in queue.
+			 */
+			sstat->sstat_penddata = (stcb->asoc.cnt_on_reasm_queue +
+			    stcb->asoc.cnt_on_all_streams);
+
+
+			sstat->sstat_instrms = stcb->asoc.streamincnt;
+			sstat->sstat_outstrms = stcb->asoc.streamoutcnt;
+			sstat->sstat_fragmentation_point = sctp_get_frag_point(stcb, &stcb->asoc);
+			memcpy(&sstat->sstat_primary.spinfo_address,
+			    &stcb->asoc.primary_destination->ro._l_addr,
+			    ((struct sockaddr *)(&stcb->asoc.primary_destination->ro._l_addr))->sa_len);
+			net = stcb->asoc.primary_destination;
+			((struct sockaddr_in *)&sstat->sstat_primary.spinfo_address)->sin_port = stcb->rport;
+			/*
+			 * Again the user can get info from sctp_constants.h
+			 * for what the state of the network is.
+			 */
+			sstat->sstat_primary.spinfo_state = net->dest_state & SCTP_REACHABLE_MASK;
+			sstat->sstat_primary.spinfo_cwnd = net->cwnd;
+			sstat->sstat_primary.spinfo_srtt = net->lastsa;
+			sstat->sstat_primary.spinfo_rto = net->RTO;
+			sstat->sstat_primary.spinfo_mtu = net->mtu;
+			sstat->sstat_primary.spinfo_assoc_id = sctp_get_associd(stcb);
+			SCTP_TCB_UNLOCK(stcb);
+			*optsize = sizeof(*sstat);
+		}
+		break;
+	case SCTP_RTOINFO:
+		{
+			struct sctp_rtoinfo *srto;
+
+			SCTP_CHECK_AND_CAST(srto, optval, struct sctp_rtoinfo, *optsize);
+			SCTP_FIND_STCB(inp, stcb, srto->srto_assoc_id);
+
+			if (stcb) {
+				srto->srto_initial = stcb->asoc.initial_rto;
+				srto->srto_max = stcb->asoc.maxrto;
+				srto->srto_min = stcb->asoc.minrto;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_RLOCK(inp);
+				srto->srto_initial = inp->sctp_ep.initial_rto;
+				srto->srto_max = inp->sctp_ep.sctp_maxrto;
+				srto->srto_min = inp->sctp_ep.sctp_minrto;
+				SCTP_INP_RUNLOCK(inp);
+			}
+			*optsize = sizeof(*srto);
+		}
+		break;
+	case SCTP_ASSOCINFO:
+		{
+			struct sctp_assocparams *sasoc;
+			uint32_t oldval;
+
+			SCTP_CHECK_AND_CAST(sasoc, optval, struct sctp_assocparams, *optsize);
+			SCTP_FIND_STCB(inp, stcb, sasoc->sasoc_assoc_id);
+
+			if (stcb) {
+				oldval = sasoc->sasoc_cookie_life;
+				sasoc->sasoc_cookie_life = TICKS_TO_MSEC(stcb->asoc.cookie_life);
+				sasoc->sasoc_asocmaxrxt = stcb->asoc.max_send_times;
+				sasoc->sasoc_number_peer_destinations = stcb->asoc.numnets;
+				sasoc->sasoc_peer_rwnd = stcb->asoc.peers_rwnd;
+				sasoc->sasoc_local_rwnd = stcb->asoc.my_rwnd;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_RLOCK(inp);
+				sasoc->sasoc_cookie_life = TICKS_TO_MSEC(inp->sctp_ep.def_cookie_life);
+				sasoc->sasoc_asocmaxrxt = inp->sctp_ep.max_send_times;
+				sasoc->sasoc_number_peer_destinations = 0;
+				sasoc->sasoc_peer_rwnd = 0;
+				sasoc->sasoc_local_rwnd = sbspace(&inp->sctp_socket->so_rcv);
+				SCTP_INP_RUNLOCK(inp);
+			}
+			*optsize = sizeof(*sasoc);
+		}
+		break;
+	case SCTP_DEFAULT_SEND_PARAM:
+		{
+			struct sctp_sndrcvinfo *s_info;
+
+			SCTP_CHECK_AND_CAST(s_info, optval, struct sctp_sndrcvinfo, *optsize);
+			SCTP_FIND_STCB(inp, stcb, s_info->sinfo_assoc_id);
+
+			if (stcb) {
+				memcpy(s_info, &stcb->asoc.def_send, sizeof(stcb->asoc.def_send));
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_RLOCK(inp);
+				memcpy(s_info, &inp->def_send, sizeof(inp->def_send));
+				SCTP_INP_RUNLOCK(inp);
+			}
+			*optsize = sizeof(*s_info);
+		}
+		break;
+	case SCTP_INITMSG:
+		{
+			struct sctp_initmsg *sinit;
+
+			SCTP_CHECK_AND_CAST(sinit, optval, struct sctp_initmsg, *optsize);
+			SCTP_INP_RLOCK(inp);
+			sinit->sinit_num_ostreams = inp->sctp_ep.pre_open_stream_count;
+			sinit->sinit_max_instreams = inp->sctp_ep.max_open_streams_intome;
+			sinit->sinit_max_attempts = inp->sctp_ep.max_init_times;
+			sinit->sinit_max_init_timeo = inp->sctp_ep.initial_init_rto_max;
+			SCTP_INP_RUNLOCK(inp);
+			*optsize = sizeof(*sinit);
+		}
+		break;
+	case SCTP_PRIMARY_ADDR:
+		/* we allow a "get" operation on this */
+		{
+			struct sctp_setprim *ssp;
+
+			SCTP_CHECK_AND_CAST(ssp, optval, struct sctp_setprim, *optsize);
+			SCTP_FIND_STCB(inp, stcb, ssp->ssp_assoc_id);
+
+			if (stcb) {
+				/* simply copy out the sockaddr_storage... */
+				int len;
+
+				len = *optsize;
+				if (len > stcb->asoc.primary_destination->ro._l_addr.sa.sa_len)
+					len = stcb->asoc.primary_destination->ro._l_addr.sa.sa_len;
+
+				memcpy(&ssp->ssp_addr,
+				    &stcb->asoc.primary_destination->ro._l_addr,
+				    len);
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+				error = EINVAL;
+			}
+			*optsize = sizeof(*ssp);
+		}
+		break;
+
+	case SCTP_HMAC_IDENT:
+		{
+			struct sctp_hmacalgo *shmac;
+			sctp_hmaclist_t *hmaclist;
+			uint32_t size;
+			int i;
+
+			SCTP_CHECK_AND_CAST(shmac, optval, struct sctp_hmacalgo, *optsize);
+
+			SCTP_INP_RLOCK(inp);
+			hmaclist = inp->sctp_ep.local_hmacs;
+			if (hmaclist == NULL) {
+				/* no HMACs to return */
+				*optsize = sizeof(*shmac);
+				SCTP_INP_RUNLOCK(inp);
+				break;
+			}
+			/* is there room for all of the hmac ids? */
+			size = sizeof(*shmac) + (hmaclist->num_algo *
+			    sizeof(shmac->shmac_idents[0]));
+			if ((size_t)(*optsize) < size) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+				error = EINVAL;
+				SCTP_INP_RUNLOCK(inp);
+				break;
+			}
+			/* copy in the list */
+			for (i = 0; i < hmaclist->num_algo; i++)
+				shmac->shmac_idents[i] = hmaclist->hmac[i];
+			SCTP_INP_RUNLOCK(inp);
+			*optsize = size;
+			break;
+		}
+	case SCTP_AUTH_ACTIVE_KEY:
+		{
+			struct sctp_authkeyid *scact;
+
+			SCTP_CHECK_AND_CAST(scact, optval, struct sctp_authkeyid, *optsize);
+			SCTP_FIND_STCB(inp, stcb, scact->scact_assoc_id);
+
+			if (stcb) {
+				/* get the active key on the assoc */
+				scact->scact_keynumber = stcb->asoc.authinfo.assoc_keyid;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				/* get the endpoint active key */
+				SCTP_INP_RLOCK(inp);
+				scact->scact_keynumber = inp->sctp_ep.default_keyid;
+				SCTP_INP_RUNLOCK(inp);
+			}
+			*optsize = sizeof(*scact);
+			break;
+		}
+	case SCTP_LOCAL_AUTH_CHUNKS:
+		{
+			struct sctp_authchunks *sac;
+			sctp_auth_chklist_t *chklist = NULL;
+			size_t size = 0;
+
+			SCTP_CHECK_AND_CAST(sac, optval, struct sctp_authchunks, *optsize);
+			SCTP_FIND_STCB(inp, stcb, sac->gauth_assoc_id);
+
+			if (stcb) {
+				/* get off the assoc */
+				chklist = stcb->asoc.local_auth_chunks;
+				/* is there enough space? */
+				size = sctp_auth_get_chklist_size(chklist);
+				if (*optsize < (sizeof(struct sctp_authchunks) + size)) {
+					error = EINVAL;
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+				} else {
+					/* copy in the chunks */
+					(void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks);
+				}
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				/* get off the endpoint */
+				SCTP_INP_RLOCK(inp);
+				chklist = inp->sctp_ep.local_auth_chunks;
+				/* is there enough space? */
+				size = sctp_auth_get_chklist_size(chklist);
+				if (*optsize < (sizeof(struct sctp_authchunks) + size)) {
+					error = EINVAL;
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+				} else {
+					/* copy in the chunks */
+					(void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks);
+				}
+				SCTP_INP_RUNLOCK(inp);
+			}
+			*optsize = sizeof(struct sctp_authchunks) + size;
+			break;
+		}
+	case SCTP_PEER_AUTH_CHUNKS:
+		{
+			struct sctp_authchunks *sac;
+			sctp_auth_chklist_t *chklist = NULL;
+			size_t size = 0;
+
+			SCTP_CHECK_AND_CAST(sac, optval, struct sctp_authchunks, *optsize);
+			SCTP_FIND_STCB(inp, stcb, sac->gauth_assoc_id);
+
+			if (stcb) {
+				/* get off the assoc */
+				chklist = stcb->asoc.peer_auth_chunks;
+				/* is there enough space? */
+				size = sctp_auth_get_chklist_size(chklist);
+				if (*optsize < (sizeof(struct sctp_authchunks) + size)) {
+					error = EINVAL;
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+				} else {
+					/* copy in the chunks */
+					(void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks);
+				}
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
+				error = ENOENT;
+			}
+			*optsize = sizeof(struct sctp_authchunks) + size;
+			break;
+		}
+
+
+	default:
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
+		error = ENOPROTOOPT;
+		*optsize = 0;
+		break;
+	}			/* end switch (sopt->sopt_name) */
+	return (error);
+}
+
+static int
+sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
+    void *p)
+{
+	int error, set_opt;
+	uint32_t *mopt;
+	struct sctp_tcb *stcb = NULL;
+	struct sctp_inpcb *inp = NULL;
+	uint32_t vrf_id;
+
+	if (optval == NULL) {
+		SCTP_PRINTF("optval is NULL\n");
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return (EINVAL);
+	}
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp == 0) {
+		SCTP_PRINTF("inp is NULL?\n");
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return EINVAL;
+	}
+	vrf_id = inp->def_vrf_id;
+
+	error = 0;
+	switch (optname) {
+	case SCTP_NODELAY:
+	case SCTP_AUTOCLOSE:
+	case SCTP_AUTO_ASCONF:
+	case SCTP_EXPLICIT_EOR:
+	case SCTP_DISABLE_FRAGMENTS:
+	case SCTP_USE_EXT_RCVINFO:
+	case SCTP_I_WANT_MAPPED_V4_ADDR:
+		/* copy in the option value */
+		SCTP_CHECK_AND_CAST(mopt, optval, uint32_t, optsize);
+		set_opt = 0;
+		if (error)
+			break;
+		switch (optname) {
+		case SCTP_DISABLE_FRAGMENTS:
+			set_opt = SCTP_PCB_FLAGS_NO_FRAGMENT;
+			break;
+		case SCTP_AUTO_ASCONF:
+			/*
+			 * NOTE: we don't really support this flag
+			 */
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+				/* only valid for bound all sockets */
+				set_opt = SCTP_PCB_FLAGS_AUTO_ASCONF;
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				return (EINVAL);
+			}
+			break;
+		case SCTP_EXPLICIT_EOR:
+			set_opt = SCTP_PCB_FLAGS_EXPLICIT_EOR;
+			break;
+		case SCTP_USE_EXT_RCVINFO:
+			set_opt = SCTP_PCB_FLAGS_EXT_RCVINFO;
+			break;
+		case SCTP_I_WANT_MAPPED_V4_ADDR:
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+				set_opt = SCTP_PCB_FLAGS_NEEDS_MAPPED_V4;
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				return (EINVAL);
+			}
+			break;
+		case SCTP_NODELAY:
+			set_opt = SCTP_PCB_FLAGS_NODELAY;
+			break;
+		case SCTP_AUTOCLOSE:
+			if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+			    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				return (EINVAL);
+			}
+			set_opt = SCTP_PCB_FLAGS_AUTOCLOSE;
+			/*
+			 * The value is in ticks. Note this does not effect
+			 * old associations, only new ones.
+			 */
+			inp->sctp_ep.auto_close_time = SEC_TO_TICKS(*mopt);
+			break;
+		}
+		SCTP_INP_WLOCK(inp);
+		if (*mopt != 0) {
+			sctp_feature_on(inp, set_opt);
+		} else {
+			sctp_feature_off(inp, set_opt);
+		}
+		SCTP_INP_WUNLOCK(inp);
+		break;
+	case SCTP_PARTIAL_DELIVERY_POINT:
+		{
+			uint32_t *value;
+
+			SCTP_CHECK_AND_CAST(value, optval, uint32_t, optsize);
+			if (*value > SCTP_SB_LIMIT_RCV(so)) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				error = EINVAL;
+				break;
+			}
+			inp->partial_delivery_point = *value;
+		}
+		break;
+	case SCTP_FRAGMENT_INTERLEAVE:
+		/* not yet until we re-write sctp_recvmsg() */
+		{
+			uint32_t *level;
+
+			SCTP_CHECK_AND_CAST(level, optval, uint32_t, optsize);
+			if (*level == SCTP_FRAG_LEVEL_2) {
+				sctp_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE);
+				sctp_feature_on(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS);
+			} else if (*level == SCTP_FRAG_LEVEL_1) {
+				sctp_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE);
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS);
+			} else if (*level == SCTP_FRAG_LEVEL_0) {
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE);
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS);
+
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				error = EINVAL;
+			}
+		}
+		break;
+	case SCTP_CMT_ON_OFF:
+		{
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+			if (sctp_cmt_on_off) {
+				SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+				if (stcb) {
+					stcb->asoc.sctp_cmt_on_off = (uint8_t) av->assoc_value;
+					SCTP_TCB_UNLOCK(stcb);
+				} else {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN);
+					error = ENOTCONN;
+				}
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
+				error = ENOPROTOOPT;
+			}
+		}
+		break;
+		/* JRS - Set socket option for pluggable congestion control */
+	case SCTP_PLUGGABLE_CC:
+		{
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+			if (stcb) {
+				switch (av->assoc_value) {
+					/*
+					 * JRS - Standard TCP congestion
+					 * control
+					 */
+				case SCTP_CC_RFC2581:
+					{
+						stcb->asoc.congestion_control_module = SCTP_CC_RFC2581;
+						stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_set_initial_cc_param;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_cwnd_update_after_sack;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_cwnd_update_after_fr;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_cwnd_update_after_timeout;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_cwnd_update_after_ecn_echo;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_cwnd_update_after_fr_timer;
+						SCTP_TCB_UNLOCK(stcb);
+						break;
+					}
+					/*
+					 * JRS - High Speed TCP congestion
+					 * control (Floyd)
+					 */
+				case SCTP_CC_HSTCP:
+					{
+						stcb->asoc.congestion_control_module = SCTP_CC_HSTCP;
+						stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_set_initial_cc_param;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_hs_cwnd_update_after_sack;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_hs_cwnd_update_after_fr;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_cwnd_update_after_timeout;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_cwnd_update_after_ecn_echo;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_cwnd_update_after_fr_timer;
+						SCTP_TCB_UNLOCK(stcb);
+						break;
+					}
+					/* JRS - HTCP congestion control */
+				case SCTP_CC_HTCP:
+					{
+						stcb->asoc.congestion_control_module = SCTP_CC_HTCP;
+						stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_htcp_set_initial_cc_param;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_htcp_cwnd_update_after_sack;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_htcp_cwnd_update_after_fr;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_htcp_cwnd_update_after_timeout;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_htcp_cwnd_update_after_ecn_echo;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output;
+						stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_htcp_cwnd_update_after_fr_timer;
+						SCTP_TCB_UNLOCK(stcb);
+						break;
+					}
+					/*
+					 * JRS - All other values are
+					 * invalid
+					 */
+				default:
+					{
+						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+						error = EINVAL;
+						SCTP_TCB_UNLOCK(stcb);
+						break;
+					}
+				}
+			} else {
+				switch (av->assoc_value) {
+				case SCTP_CC_RFC2581:
+				case SCTP_CC_HSTCP:
+				case SCTP_CC_HTCP:
+					inp->sctp_ep.sctp_default_cc_module = av->assoc_value;
+					break;
+				default:
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+					break;
+				};
+			}
+		}
+		break;
+	case SCTP_CLR_STAT_LOG:
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
+		error = EOPNOTSUPP;
+		break;
+	case SCTP_CONTEXT:
+		{
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+			if (stcb) {
+				stcb->asoc.context = av->assoc_value;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_WLOCK(inp);
+				inp->sctp_context = av->assoc_value;
+				SCTP_INP_WUNLOCK(inp);
+			}
+		}
+		break;
+	case SCTP_VRF_ID:
+		{
+			uint32_t *default_vrfid;
+
+			SCTP_CHECK_AND_CAST(default_vrfid, optval, uint32_t, optsize);
+			if (*default_vrfid > SCTP_MAX_VRF_ID) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				error = EINVAL;
+				break;
+			}
+			inp->def_vrf_id = *default_vrfid;
+			break;
+		}
+	case SCTP_DEL_VRF_ID:
+		{
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
+			error = EOPNOTSUPP;
+			break;
+		}
+	case SCTP_ADD_VRF_ID:
+		{
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
+			error = EOPNOTSUPP;
+			break;
+		}
+	case SCTP_DELAYED_SACK:
+		{
+			struct sctp_sack_info *sack;
+
+			SCTP_CHECK_AND_CAST(sack, optval, struct sctp_sack_info, optsize);
+			SCTP_FIND_STCB(inp, stcb, sack->sack_assoc_id);
+			if (sack->sack_delay) {
+				if (sack->sack_delay > SCTP_MAX_SACK_DELAY)
+					sack->sack_delay = SCTP_MAX_SACK_DELAY;
+			}
+			if (stcb) {
+				if (sack->sack_delay) {
+					if (MSEC_TO_TICKS(sack->sack_delay) < 1) {
+						sack->sack_delay = TICKS_TO_MSEC(1);
+					}
+					stcb->asoc.delayed_ack = sack->sack_delay;
+				}
+				if (sack->sack_freq) {
+					stcb->asoc.sack_freq = sack->sack_freq;
+				}
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_WLOCK(inp);
+				if (sack->sack_delay) {
+					if (MSEC_TO_TICKS(sack->sack_delay) < 1) {
+						sack->sack_delay = TICKS_TO_MSEC(1);
+					}
+					inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV] = MSEC_TO_TICKS(sack->sack_delay);
+				}
+				if (sack->sack_freq) {
+					inp->sctp_ep.sctp_sack_freq = sack->sack_freq;
+				}
+				SCTP_INP_WUNLOCK(inp);
+			}
+			break;
+		}
+	case SCTP_AUTH_CHUNK:
+		{
+			struct sctp_authchunk *sauth;
+
+			SCTP_CHECK_AND_CAST(sauth, optval, struct sctp_authchunk, optsize);
+
+			SCTP_INP_WLOCK(inp);
+			if (sctp_auth_add_chunk(sauth->sauth_chunk, inp->sctp_ep.local_auth_chunks)) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				error = EINVAL;
+			}
+			SCTP_INP_WUNLOCK(inp);
+			break;
+		}
+	case SCTP_AUTH_KEY:
+		{
+			struct sctp_authkey *sca;
+			struct sctp_keyhead *shared_keys;
+			sctp_sharedkey_t *shared_key;
+			sctp_key_t *key = NULL;
+			size_t size;
+
+			SCTP_CHECK_AND_CAST(sca, optval, struct sctp_authkey, optsize);
+			SCTP_FIND_STCB(inp, stcb, sca->sca_assoc_id);
+			size = optsize - sizeof(*sca);
+
+			if (stcb) {
+				/* set it on the assoc */
+				shared_keys = &stcb->asoc.shared_keys;
+				/* clear the cached keys for this key id */
+				sctp_clear_cachedkeys(stcb, sca->sca_keynumber);
+				/*
+				 * create the new shared key and
+				 * insert/replace it
+				 */
+				if (size > 0) {
+					key = sctp_set_key(sca->sca_key, (uint32_t) size);
+					if (key == NULL) {
+						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
+						error = ENOMEM;
+						SCTP_TCB_UNLOCK(stcb);
+						break;
+					}
+				}
+				shared_key = sctp_alloc_sharedkey();
+				if (shared_key == NULL) {
+					sctp_free_key(key);
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
+					error = ENOMEM;
+					SCTP_TCB_UNLOCK(stcb);
+					break;
+				}
+				shared_key->key = key;
+				shared_key->keyid = sca->sca_keynumber;
+				sctp_insert_sharedkey(shared_keys, shared_key);
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				/* set it on the endpoint */
+				SCTP_INP_WLOCK(inp);
+				shared_keys = &inp->sctp_ep.shared_keys;
+				/*
+				 * clear the cached keys on all assocs for
+				 * this key id
+				 */
+				sctp_clear_cachedkeys_ep(inp, sca->sca_keynumber);
+				/*
+				 * create the new shared key and
+				 * insert/replace it
+				 */
+				if (size > 0) {
+					key = sctp_set_key(sca->sca_key, (uint32_t) size);
+					if (key == NULL) {
+						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
+						error = ENOMEM;
+						SCTP_INP_WUNLOCK(inp);
+						break;
+					}
+				}
+				shared_key = sctp_alloc_sharedkey();
+				if (shared_key == NULL) {
+					sctp_free_key(key);
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
+					error = ENOMEM;
+					SCTP_INP_WUNLOCK(inp);
+					break;
+				}
+				shared_key->key = key;
+				shared_key->keyid = sca->sca_keynumber;
+				sctp_insert_sharedkey(shared_keys, shared_key);
+				SCTP_INP_WUNLOCK(inp);
+			}
+			break;
+		}
+	case SCTP_HMAC_IDENT:
+		{
+			struct sctp_hmacalgo *shmac;
+			sctp_hmaclist_t *hmaclist;
+			uint32_t hmacid;
+			size_t size, i, found;
+
+			SCTP_CHECK_AND_CAST(shmac, optval, struct sctp_hmacalgo, optsize);
+			size = (optsize - sizeof(*shmac)) / sizeof(shmac->shmac_idents[0]);
+			hmaclist = sctp_alloc_hmaclist(size);
+			if (hmaclist == NULL) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
+				error = ENOMEM;
+				break;
+			}
+			for (i = 0; i < size; i++) {
+				hmacid = shmac->shmac_idents[i];
+				if (sctp_auth_add_hmacid(hmaclist, (uint16_t) hmacid)) {
+					 /* invalid HMACs were found */ ;
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+					sctp_free_hmaclist(hmaclist);
+					goto sctp_set_hmac_done;
+				}
+			}
+			found = 0;
+			for (i = 0; i < hmaclist->num_algo; i++) {
+				if (hmaclist->hmac[i] == SCTP_AUTH_HMAC_ID_SHA1) {
+					/* already in list */
+					found = 1;
+				}
+			}
+			if (!found) {
+				sctp_free_hmaclist(hmaclist);
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				error = EINVAL;
+				break;
+			}
+			/* set it on the endpoint */
+			SCTP_INP_WLOCK(inp);
+			if (inp->sctp_ep.local_hmacs)
+				sctp_free_hmaclist(inp->sctp_ep.local_hmacs);
+			inp->sctp_ep.local_hmacs = hmaclist;
+			SCTP_INP_WUNLOCK(inp);
+	sctp_set_hmac_done:
+			break;
+		}
+	case SCTP_AUTH_ACTIVE_KEY:
+		{
+			struct sctp_authkeyid *scact;
+
+			SCTP_CHECK_AND_CAST(scact, optval, struct sctp_authkeyid, optsize);
+			SCTP_FIND_STCB(inp, stcb, scact->scact_assoc_id);
+
+			/* set the active key on the right place */
+			if (stcb) {
+				/* set the active key on the assoc */
+				if (sctp_auth_setactivekey(stcb, scact->scact_keynumber)) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+				}
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				/* set the active key on the endpoint */
+				SCTP_INP_WLOCK(inp);
+				if (sctp_auth_setactivekey_ep(inp, scact->scact_keynumber)) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+				}
+				SCTP_INP_WUNLOCK(inp);
+			}
+			break;
+		}
+	case SCTP_AUTH_DELETE_KEY:
+		{
+			struct sctp_authkeyid *scdel;
+
+			SCTP_CHECK_AND_CAST(scdel, optval, struct sctp_authkeyid, optsize);
+			SCTP_FIND_STCB(inp, stcb, scdel->scact_assoc_id);
+
+			/* delete the key from the right place */
+			if (stcb) {
+				if (sctp_delete_sharedkey(stcb, scdel->scact_keynumber)) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+				}
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_WLOCK(inp);
+				if (sctp_delete_sharedkey_ep(inp, scdel->scact_keynumber)) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+				}
+				SCTP_INP_WUNLOCK(inp);
+			}
+			break;
+		}
+
+	case SCTP_RESET_STREAMS:
+		{
+			struct sctp_stream_reset *strrst;
+			uint8_t send_in = 0, send_tsn = 0, send_out = 0;
+			int i;
+
+			SCTP_CHECK_AND_CAST(strrst, optval, struct sctp_stream_reset, optsize);
+			SCTP_FIND_STCB(inp, stcb, strrst->strrst_assoc_id);
+
+			if (stcb == NULL) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
+				error = ENOENT;
+				break;
+			}
+			if (stcb->asoc.peer_supports_strreset == 0) {
+				/*
+				 * Peer does not support it, we return
+				 * protocol not supported since this is true
+				 * for this feature and this peer, not the
+				 * socket request in general.
+				 */
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EPROTONOSUPPORT);
+				error = EPROTONOSUPPORT;
+				SCTP_TCB_UNLOCK(stcb);
+				break;
+			}
+			if (stcb->asoc.stream_reset_outstanding) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
+				error = EALREADY;
+				SCTP_TCB_UNLOCK(stcb);
+				break;
+			}
+			if (strrst->strrst_flags == SCTP_RESET_LOCAL_RECV) {
+				send_in = 1;
+			} else if (strrst->strrst_flags == SCTP_RESET_LOCAL_SEND) {
+				send_out = 1;
+			} else if (strrst->strrst_flags == SCTP_RESET_BOTH) {
+				send_in = 1;
+				send_out = 1;
+			} else if (strrst->strrst_flags == SCTP_RESET_TSN) {
+				send_tsn = 1;
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				error = EINVAL;
+				SCTP_TCB_UNLOCK(stcb);
+				break;
+			}
+			for (i = 0; i < strrst->strrst_num_streams; i++) {
+				if ((send_in) &&
+
+				    (strrst->strrst_list[i] > stcb->asoc.streamincnt)) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+					goto get_out;
+				}
+				if ((send_out) &&
+				    (strrst->strrst_list[i] > stcb->asoc.streamoutcnt)) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+					goto get_out;
+				}
+			}
+			if (error) {
+		get_out:
+				SCTP_TCB_UNLOCK(stcb);
+				break;
+			}
+			error = sctp_send_str_reset_req(stcb, strrst->strrst_num_streams,
+			    strrst->strrst_list,
+			    send_out, (stcb->asoc.str_reset_seq_in - 3),
+			    send_in, send_tsn);
+
+			sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_REQ, SCTP_SO_LOCKED);
+			SCTP_TCB_UNLOCK(stcb);
+		}
+		break;
+
+	case SCTP_CONNECT_X:
+		if (optsize < (sizeof(int) + sizeof(struct sockaddr_in))) {
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+			error = EINVAL;
+			break;
+		}
+		error = sctp_do_connect_x(so, inp, optval, optsize, p, 0);
+		break;
+
+	case SCTP_CONNECT_X_DELAYED:
+		if (optsize < (sizeof(int) + sizeof(struct sockaddr_in))) {
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+			error = EINVAL;
+			break;
+		}
+		error = sctp_do_connect_x(so, inp, optval, optsize, p, 1);
+		break;
+
+	case SCTP_CONNECT_X_COMPLETE:
+		{
+			struct sockaddr *sa;
+			struct sctp_nets *net;
+
+			/* FIXME MT: check correct? */
+			SCTP_CHECK_AND_CAST(sa, optval, struct sockaddr, optsize);
+
+			/* find tcb */
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) {
+				SCTP_INP_RLOCK(inp);
+				stcb = LIST_FIRST(&inp->sctp_asoc_list);
+				if (stcb) {
+					SCTP_TCB_LOCK(stcb);
+					net = sctp_findnet(stcb, sa);
+				}
+				SCTP_INP_RUNLOCK(inp);
+			} else {
+				/*
+				 * We increment here since
+				 * sctp_findassociation_ep_addr() wil do a
+				 * decrement if it finds the stcb as long as
+				 * the locked tcb (last argument) is NOT a
+				 * TCB.. aka NULL.
+				 */
+				SCTP_INP_INCR_REF(inp);
+				stcb = sctp_findassociation_ep_addr(&inp, sa, &net, NULL, NULL);
+				if (stcb == NULL) {
+					SCTP_INP_DECR_REF(inp);
+				}
+			}
+
+			if (stcb == NULL) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
+				error = ENOENT;
+				break;
+			}
+			if (stcb->asoc.delayed_connection == 1) {
+				stcb->asoc.delayed_connection = 0;
+				(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
+				sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb,
+				    stcb->asoc.primary_destination,
+				    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_9);
+				sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
+			} else {
+				/*
+				 * already expired or did not use delayed
+				 * connectx
+				 */
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
+				error = EALREADY;
+			}
+			SCTP_TCB_UNLOCK(stcb);
+		}
+		break;
+	case SCTP_MAX_BURST:
+		{
+			uint8_t *burst;
+
+			SCTP_CHECK_AND_CAST(burst, optval, uint8_t, optsize);
+
+			SCTP_INP_WLOCK(inp);
+			if (*burst) {
+				inp->sctp_ep.max_burst = *burst;
+			}
+			SCTP_INP_WUNLOCK(inp);
+		}
+		break;
+	case SCTP_MAXSEG:
+		{
+			struct sctp_assoc_value *av;
+			int ovh;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+				ovh = SCTP_MED_OVERHEAD;
+			} else {
+				ovh = SCTP_MED_V4_OVERHEAD;
+			}
+			if (stcb) {
+				if (av->assoc_value) {
+					stcb->asoc.sctp_frag_point = (av->assoc_value + ovh);
+				} else {
+					stcb->asoc.sctp_frag_point = SCTP_DEFAULT_MAXSEGMENT;
+				}
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_WLOCK(inp);
+				/*
+				 * FIXME MT: I think this is not in tune
+				 * with the API ID
+				 */
+				if (av->assoc_value) {
+					inp->sctp_frag_point = (av->assoc_value + ovh);
+				} else {
+					inp->sctp_frag_point = SCTP_DEFAULT_MAXSEGMENT;
+				}
+				SCTP_INP_WUNLOCK(inp);
+			}
+		}
+		break;
+	case SCTP_EVENTS:
+		{
+			struct sctp_event_subscribe *events;
+
+			SCTP_CHECK_AND_CAST(events, optval, struct sctp_event_subscribe, optsize);
+
+			SCTP_INP_WLOCK(inp);
+			if (events->sctp_data_io_event) {
+				sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT);
+			} else {
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT);
+			}
+
+			if (events->sctp_association_event) {
+				sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVASSOCEVNT);
+			} else {
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVASSOCEVNT);
+			}
+
+			if (events->sctp_address_event) {
+				sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVPADDREVNT);
+			} else {
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVPADDREVNT);
+			}
+
+			if (events->sctp_send_failure_event) {
+				sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVSENDFAILEVNT);
+			} else {
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVSENDFAILEVNT);
+			}
+
+			if (events->sctp_peer_error_event) {
+				sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVPEERERR);
+			} else {
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVPEERERR);
+			}
+
+			if (events->sctp_shutdown_event) {
+				sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT);
+			} else {
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT);
+			}
+
+			if (events->sctp_partial_delivery_event) {
+				sctp_feature_on(inp, SCTP_PCB_FLAGS_PDAPIEVNT);
+			} else {
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_PDAPIEVNT);
+			}
+
+			if (events->sctp_adaptation_layer_event) {
+				sctp_feature_on(inp, SCTP_PCB_FLAGS_ADAPTATIONEVNT);
+			} else {
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_ADAPTATIONEVNT);
+			}
+
+			if (events->sctp_authentication_event) {
+				sctp_feature_on(inp, SCTP_PCB_FLAGS_AUTHEVNT);
+			} else {
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_AUTHEVNT);
+			}
+
+			if (events->sctp_stream_reset_events) {
+				sctp_feature_on(inp, SCTP_PCB_FLAGS_STREAM_RESETEVNT);
+			} else {
+				sctp_feature_off(inp, SCTP_PCB_FLAGS_STREAM_RESETEVNT);
+			}
+			SCTP_INP_WUNLOCK(inp);
+		}
+		break;
+
+	case SCTP_ADAPTATION_LAYER:
+		{
+			struct sctp_setadaptation *adap_bits;
+
+			SCTP_CHECK_AND_CAST(adap_bits, optval, struct sctp_setadaptation, optsize);
+			SCTP_INP_WLOCK(inp);
+			inp->sctp_ep.adaptation_layer_indicator = adap_bits->ssb_adaptation_ind;
+			SCTP_INP_WUNLOCK(inp);
+		}
+		break;
+#ifdef SCTP_DEBUG
+	case SCTP_SET_INITIAL_DBG_SEQ:
+		{
+			uint32_t *vvv;
+
+			SCTP_CHECK_AND_CAST(vvv, optval, uint32_t, optsize);
+			SCTP_INP_WLOCK(inp);
+			inp->sctp_ep.initial_sequence_debug = *vvv;
+			SCTP_INP_WUNLOCK(inp);
+		}
+		break;
+#endif
+	case SCTP_DEFAULT_SEND_PARAM:
+		{
+			struct sctp_sndrcvinfo *s_info;
+
+			SCTP_CHECK_AND_CAST(s_info, optval, struct sctp_sndrcvinfo, optsize);
+			SCTP_FIND_STCB(inp, stcb, s_info->sinfo_assoc_id);
+
+			if (stcb) {
+				if (s_info->sinfo_stream <= stcb->asoc.streamoutcnt) {
+					memcpy(&stcb->asoc.def_send, s_info, min(optsize, sizeof(stcb->asoc.def_send)));
+				} else {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+				}
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_WLOCK(inp);
+				memcpy(&inp->def_send, s_info, min(optsize, sizeof(inp->def_send)));
+				SCTP_INP_WUNLOCK(inp);
+			}
+		}
+		break;
+	case SCTP_PEER_ADDR_PARAMS:
+		/* Applys to the specific association */
+		{
+			struct sctp_paddrparams *paddrp;
+			struct sctp_nets *net;
+
+			SCTP_CHECK_AND_CAST(paddrp, optval, struct sctp_paddrparams, optsize);
+			SCTP_FIND_STCB(inp, stcb, paddrp->spp_assoc_id);
+			net = NULL;
+			if (stcb) {
+				net = sctp_findnet(stcb, (struct sockaddr *)&paddrp->spp_address);
+			} else {
+				/*
+				 * We increment here since
+				 * sctp_findassociation_ep_addr() wil do a
+				 * decrement if it finds the stcb as long as
+				 * the locked tcb (last argument) is NOT a
+				 * TCB.. aka NULL.
+				 */
+				SCTP_INP_INCR_REF(inp);
+				stcb = sctp_findassociation_ep_addr(&inp,
+				    (struct sockaddr *)&paddrp->spp_address,
+				    &net, NULL, NULL);
+				if (stcb == NULL) {
+					SCTP_INP_DECR_REF(inp);
+				}
+			}
+			if (stcb && (net == NULL)) {
+				struct sockaddr *sa;
+
+				sa = (struct sockaddr *)&paddrp->spp_address;
+				if (sa->sa_family == AF_INET) {
+					struct sockaddr_in *sin;
+
+					sin = (struct sockaddr_in *)sa;
+					if (sin->sin_addr.s_addr) {
+						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+						SCTP_TCB_UNLOCK(stcb);
+						error = EINVAL;
+						break;
+					}
+				} else if (sa->sa_family == AF_INET6) {
+					struct sockaddr_in6 *sin6;
+
+					sin6 = (struct sockaddr_in6 *)sa;
+					if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+						SCTP_TCB_UNLOCK(stcb);
+						error = EINVAL;
+						break;
+					}
+				} else {
+					error = EAFNOSUPPORT;
+					SCTP_TCB_UNLOCK(stcb);
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+					break;
+				}
+			}
+			/* sanity checks */
+			if ((paddrp->spp_flags & SPP_HB_ENABLE) && (paddrp->spp_flags & SPP_HB_DISABLE)) {
+				if (stcb)
+					SCTP_TCB_UNLOCK(stcb);
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				return (EINVAL);
+			}
+			if ((paddrp->spp_flags & SPP_PMTUD_ENABLE) && (paddrp->spp_flags & SPP_PMTUD_DISABLE)) {
+				if (stcb)
+					SCTP_TCB_UNLOCK(stcb);
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				return (EINVAL);
+			}
+			if (stcb) {
+				/************************TCB SPECIFIC SET ******************/
+				/*
+				 * do we change the timer for HB, we run
+				 * only one?
+				 */
+				int ovh = 0;
+
+				if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+					ovh = SCTP_MED_OVERHEAD;
+				} else {
+					ovh = SCTP_MED_V4_OVERHEAD;
+				}
+
+				if (paddrp->spp_hbinterval)
+					stcb->asoc.heart_beat_delay = paddrp->spp_hbinterval;
+				else if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO)
+					stcb->asoc.heart_beat_delay = 0;
+
+				/* network sets ? */
+				if (net) {
+					/************************NET SPECIFIC SET ******************/
+					if (paddrp->spp_flags & SPP_HB_DEMAND) {
+						/* on demand HB */
+						if (sctp_send_hb(stcb, 1, net) < 0) {
+							/* asoc destroyed */
+							SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+							error = EINVAL;
+							break;
+						}
+					}
+					if (paddrp->spp_flags & SPP_HB_DISABLE) {
+						net->dest_state |= SCTP_ADDR_NOHB;
+					}
+					if (paddrp->spp_flags & SPP_HB_ENABLE) {
+						net->dest_state &= ~SCTP_ADDR_NOHB;
+					}
+					if ((paddrp->spp_flags & SPP_PMTUD_DISABLE) && (paddrp->spp_pathmtu >= SCTP_SMALLEST_PMTU)) {
+						if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
+							sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net,
+							    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10);
+						}
+						if (paddrp->spp_pathmtu > SCTP_DEFAULT_MINSEGMENT) {
+							net->mtu = paddrp->spp_pathmtu + ovh;
+							if (net->mtu < stcb->asoc.smallest_mtu) {
+#ifdef SCTP_PRINT_FOR_B_AND_M
+								SCTP_PRINTF("SCTP_PMTU_DISABLE calls sctp_pathmtu_adjustment:%d\n",
+								    net->mtu);
+#endif
+								sctp_pathmtu_adjustment(inp, stcb, net, net->mtu);
+							}
+						}
+					}
+					if (paddrp->spp_flags & SPP_PMTUD_ENABLE) {
+						if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
+							sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net);
+						}
+					}
+					if (paddrp->spp_pathmaxrxt)
+						net->failure_threshold = paddrp->spp_pathmaxrxt;
+#ifdef INET
+					if (paddrp->spp_flags & SPP_IPV4_TOS) {
+						if (net->ro._l_addr.sin.sin_family == AF_INET) {
+							net->tos_flowlabel = paddrp->spp_ipv4_tos & 0x000000fc;
+						}
+					}
+#endif
+#ifdef INET6
+					if (paddrp->spp_flags & SPP_IPV6_FLOWLABEL) {
+						if (net->ro._l_addr.sin6.sin6_family == AF_INET6) {
+							net->tos_flowlabel = paddrp->spp_ipv6_flowlabel;
+						}
+					}
+#endif
+				} else {
+					/************************ASSOC ONLY -- NO NET SPECIFIC SET ******************/
+					if (paddrp->spp_pathmaxrxt)
+						stcb->asoc.def_net_failure = paddrp->spp_pathmaxrxt;
+
+					if (paddrp->spp_flags & SPP_HB_ENABLE) {
+						/* Turn back on the timer */
+						stcb->asoc.hb_is_disabled = 0;
+						sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net);
+					}
+					if ((paddrp->spp_flags & SPP_PMTUD_DISABLE) && (paddrp->spp_pathmtu >= SCTP_SMALLEST_PMTU)) {
+						TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+							if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
+								sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net,
+								    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10);
+							}
+							if (paddrp->spp_pathmtu > SCTP_DEFAULT_MINSEGMENT) {
+								net->mtu = paddrp->spp_pathmtu + ovh;
+								if (net->mtu < stcb->asoc.smallest_mtu) {
+#ifdef SCTP_PRINT_FOR_B_AND_M
+									SCTP_PRINTF("SCTP_PMTU_DISABLE calls sctp_pathmtu_adjustment:%d\n",
+									    net->mtu);
+#endif
+									sctp_pathmtu_adjustment(inp, stcb, net, net->mtu);
+								}
+							}
+						}
+					}
+					if (paddrp->spp_flags & SPP_PMTUD_ENABLE) {
+						TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+							if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
+								sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net);
+							}
+						}
+					}
+					if (paddrp->spp_flags & SPP_HB_DISABLE) {
+						int cnt_of_unconf = 0;
+						struct sctp_nets *lnet;
+
+						stcb->asoc.hb_is_disabled = 1;
+						TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
+							if (lnet->dest_state & SCTP_ADDR_UNCONFIRMED) {
+								cnt_of_unconf++;
+							}
+						}
+						/*
+						 * stop the timer ONLY if we
+						 * have no unconfirmed
+						 * addresses
+						 */
+						if (cnt_of_unconf == 0) {
+							TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+								sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net,
+								    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_11);
+							}
+						}
+					}
+					if (paddrp->spp_flags & SPP_HB_ENABLE) {
+						/* start up the timer. */
+						TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+							sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net);
+						}
+					}
+#ifdef INET
+					if (paddrp->spp_flags & SPP_IPV4_TOS)
+						stcb->asoc.default_tos = paddrp->spp_ipv4_tos & 0x000000fc;
+#endif
+#ifdef INET6
+					if (paddrp->spp_flags & SPP_IPV6_FLOWLABEL)
+						stcb->asoc.default_flowlabel = paddrp->spp_ipv6_flowlabel;
+#endif
+
+				}
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				/************************NO TCB, SET TO default stuff ******************/
+				SCTP_INP_WLOCK(inp);
+				/*
+				 * For the TOS/FLOWLABEL stuff you set it
+				 * with the options on the socket
+				 */
+				if (paddrp->spp_pathmaxrxt) {
+					inp->sctp_ep.def_net_failure = paddrp->spp_pathmaxrxt;
+				}
+				if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO)
+					inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = 0;
+				else if (paddrp->spp_hbinterval) {
+					if (paddrp->spp_hbinterval > SCTP_MAX_HB_INTERVAL)
+						paddrp->spp_hbinterval = SCTP_MAX_HB_INTERVAL;
+					inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = MSEC_TO_TICKS(paddrp->spp_hbinterval);
+				}
+				if (paddrp->spp_flags & SPP_HB_ENABLE) {
+					sctp_feature_off(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT);
+
+				} else if (paddrp->spp_flags & SPP_HB_DISABLE) {
+					sctp_feature_on(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT);
+				}
+				SCTP_INP_WUNLOCK(inp);
+			}
+		}
+		break;
+	case SCTP_RTOINFO:
+		{
+			struct sctp_rtoinfo *srto;
+			uint32_t new_init, new_min, new_max;
+
+			SCTP_CHECK_AND_CAST(srto, optval, struct sctp_rtoinfo, optsize);
+			SCTP_FIND_STCB(inp, stcb, srto->srto_assoc_id);
+
+			if (stcb) {
+				if (srto->srto_initial)
+					new_init = srto->srto_initial;
+				else
+					new_init = stcb->asoc.initial_rto;
+				if (srto->srto_max)
+					new_max = srto->srto_max;
+				else
+					new_max = stcb->asoc.maxrto;
+				if (srto->srto_min)
+					new_min = srto->srto_min;
+				else
+					new_min = stcb->asoc.minrto;
+				if ((new_min <= new_init) && (new_init <= new_max)) {
+					stcb->asoc.initial_rto = new_init;
+					stcb->asoc.maxrto = new_max;
+					stcb->asoc.minrto = new_min;
+				} else {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EDOM);
+					error = EDOM;
+				}
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_WLOCK(inp);
+				if (srto->srto_initial)
+					new_init = srto->srto_initial;
+				else
+					new_init = inp->sctp_ep.initial_rto;
+				if (srto->srto_max)
+					new_max = srto->srto_max;
+				else
+					new_max = inp->sctp_ep.sctp_maxrto;
+				if (srto->srto_min)
+					new_min = srto->srto_min;
+				else
+					new_min = inp->sctp_ep.sctp_minrto;
+				if ((new_min <= new_init) && (new_init <= new_max)) {
+					inp->sctp_ep.initial_rto = new_init;
+					inp->sctp_ep.sctp_maxrto = new_max;
+					inp->sctp_ep.sctp_minrto = new_min;
+				} else {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EDOM);
+					error = EDOM;
+				}
+				SCTP_INP_WUNLOCK(inp);
+			}
+		}
+		break;
+	case SCTP_ASSOCINFO:
+		{
+			struct sctp_assocparams *sasoc;
+
+			SCTP_CHECK_AND_CAST(sasoc, optval, struct sctp_assocparams, optsize);
+			SCTP_FIND_STCB(inp, stcb, sasoc->sasoc_assoc_id);
+			if (sasoc->sasoc_cookie_life) {
+				/* boundary check the cookie life */
+				if (sasoc->sasoc_cookie_life < 1000)
+					sasoc->sasoc_cookie_life = 1000;
+				if (sasoc->sasoc_cookie_life > SCTP_MAX_COOKIE_LIFE) {
+					sasoc->sasoc_cookie_life = SCTP_MAX_COOKIE_LIFE;
+				}
+			}
+			if (stcb) {
+				if (sasoc->sasoc_asocmaxrxt)
+					stcb->asoc.max_send_times = sasoc->sasoc_asocmaxrxt;
+				sasoc->sasoc_number_peer_destinations = stcb->asoc.numnets;
+				sasoc->sasoc_peer_rwnd = 0;
+				sasoc->sasoc_local_rwnd = 0;
+				if (sasoc->sasoc_cookie_life) {
+					stcb->asoc.cookie_life = MSEC_TO_TICKS(sasoc->sasoc_cookie_life);
+				}
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_INP_WLOCK(inp);
+				if (sasoc->sasoc_asocmaxrxt)
+					inp->sctp_ep.max_send_times = sasoc->sasoc_asocmaxrxt;
+				sasoc->sasoc_number_peer_destinations = 0;
+				sasoc->sasoc_peer_rwnd = 0;
+				sasoc->sasoc_local_rwnd = 0;
+				if (sasoc->sasoc_cookie_life) {
+					inp->sctp_ep.def_cookie_life = MSEC_TO_TICKS(sasoc->sasoc_cookie_life);
+				}
+				SCTP_INP_WUNLOCK(inp);
+			}
+		}
+		break;
+	case SCTP_INITMSG:
+		{
+			struct sctp_initmsg *sinit;
+
+			SCTP_CHECK_AND_CAST(sinit, optval, struct sctp_initmsg, optsize);
+			SCTP_INP_WLOCK(inp);
+			if (sinit->sinit_num_ostreams)
+				inp->sctp_ep.pre_open_stream_count = sinit->sinit_num_ostreams;
+
+			if (sinit->sinit_max_instreams)
+				inp->sctp_ep.max_open_streams_intome = sinit->sinit_max_instreams;
+
+			if (sinit->sinit_max_attempts)
+				inp->sctp_ep.max_init_times = sinit->sinit_max_attempts;
+
+			if (sinit->sinit_max_init_timeo)
+				inp->sctp_ep.initial_init_rto_max = sinit->sinit_max_init_timeo;
+			SCTP_INP_WUNLOCK(inp);
+		}
+		break;
+	case SCTP_PRIMARY_ADDR:
+		{
+			struct sctp_setprim *spa;
+			struct sctp_nets *net, *lnet;
+
+			SCTP_CHECK_AND_CAST(spa, optval, struct sctp_setprim, optsize);
+			SCTP_FIND_STCB(inp, stcb, spa->ssp_assoc_id);
+
+			net = NULL;
+			if (stcb) {
+				net = sctp_findnet(stcb, (struct sockaddr *)&spa->ssp_addr);
+			} else {
+				/*
+				 * We increment here since
+				 * sctp_findassociation_ep_addr() wil do a
+				 * decrement if it finds the stcb as long as
+				 * the locked tcb (last argument) is NOT a
+				 * TCB.. aka NULL.
+				 */
+				SCTP_INP_INCR_REF(inp);
+				stcb = sctp_findassociation_ep_addr(&inp,
+				    (struct sockaddr *)&spa->ssp_addr,
+				    &net, NULL, NULL);
+				if (stcb == NULL) {
+					SCTP_INP_DECR_REF(inp);
+				}
+			}
+
+			if ((stcb) && (net)) {
+				if ((net != stcb->asoc.primary_destination) &&
+				    (!(net->dest_state & SCTP_ADDR_UNCONFIRMED))) {
+					/* Ok we need to set it */
+					lnet = stcb->asoc.primary_destination;
+					if (sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net) == 0) {
+						if (net->dest_state & SCTP_ADDR_SWITCH_PRIMARY) {
+							net->dest_state |= SCTP_ADDR_DOUBLE_SWITCH;
+						}
+						net->dest_state |= SCTP_ADDR_SWITCH_PRIMARY;
+					}
+				}
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				error = EINVAL;
+			}
+			if (stcb) {
+				SCTP_TCB_UNLOCK(stcb);
+			}
+		}
+		break;
+	case SCTP_SET_DYNAMIC_PRIMARY:
+		{
+			union sctp_sockstore *ss;
+
+			error = priv_check(curthread,
+			    PRIV_NETINET_RESERVEDPORT);
+			if (error)
+				break;
+
+			SCTP_CHECK_AND_CAST(ss, optval, union sctp_sockstore, optsize);
+			/* SUPER USER CHECK? */
+			error = sctp_dynamic_set_primary(&ss->sa, vrf_id);
+		}
+		break;
+	case SCTP_SET_PEER_PRIMARY_ADDR:
+		{
+			struct sctp_setpeerprim *sspp;
+
+			SCTP_CHECK_AND_CAST(sspp, optval, struct sctp_setpeerprim, optsize);
+			SCTP_FIND_STCB(inp, stcb, sspp->sspp_assoc_id);
+			if (stcb != NULL) {
+				struct sctp_ifa *ifa;
+
+				ifa = sctp_find_ifa_by_addr((struct sockaddr *)&sspp->sspp_addr,
+				    stcb->asoc.vrf_id, SCTP_ADDR_NOT_LOCKED);
+				if (ifa == NULL) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+					goto out_of_it;
+				}
+				if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) {
+					/*
+					 * Must validate the ifa found is in
+					 * our ep
+					 */
+					struct sctp_laddr *laddr;
+					int found = 0;
+
+					LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+						if (laddr->ifa == NULL) {
+							SCTPDBG(SCTP_DEBUG_OUTPUT1, "%s: NULL ifa\n",
+							    __FUNCTION__);
+							continue;
+						}
+						if (laddr->ifa == ifa) {
+							found = 1;
+							break;
+						}
+					}
+					if (!found) {
+						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+						error = EINVAL;
+						goto out_of_it;
+					}
+				}
+				if (sctp_set_primary_ip_address_sa(stcb,
+				    (struct sockaddr *)&sspp->sspp_addr) != 0) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+				}
+		out_of_it:
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				error = EINVAL;
+			}
+
+		}
+		break;
+	case SCTP_BINDX_ADD_ADDR:
+		{
+			struct sctp_getaddresses *addrs;
+			size_t sz;
+			struct thread *td;
+			int prison = 0;
+
+			td = (struct thread *)p;
+			if (jailed(td->td_ucred)) {
+				prison = 1;
+			}
+			SCTP_CHECK_AND_CAST(addrs, optval, struct sctp_getaddresses,
+			    optsize);
+			if (addrs->addr->sa_family == AF_INET) {
+				sz = sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in);
+				if (optsize < sz) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+					break;
+				}
+				if (prison && prison_ip(td->td_ucred, 0, &(((struct sockaddr_in *)(addrs->addr))->sin_addr.s_addr))) {
+					SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EADDRNOTAVAIL);
+					error = EADDRNOTAVAIL;
+				}
+			} else if (addrs->addr->sa_family == AF_INET6) {
+				sz = sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in6);
+				if (optsize < sz) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+					break;
+				}
+				/* JAIL XXXX Add else here for V6 */
+			}
+			sctp_bindx_add_address(so, inp, addrs->addr,
+			    addrs->sget_assoc_id, vrf_id,
+			    &error, p);
+		}
+		break;
+	case SCTP_BINDX_REM_ADDR:
+		{
+			struct sctp_getaddresses *addrs;
+			size_t sz;
+			struct thread *td;
+			int prison = 0;
+
+			td = (struct thread *)p;
+			if (jailed(td->td_ucred)) {
+				prison = 1;
+			}
+			SCTP_CHECK_AND_CAST(addrs, optval, struct sctp_getaddresses, optsize);
+			if (addrs->addr->sa_family == AF_INET) {
+				sz = sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in);
+				if (optsize < sz) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+					break;
+				}
+				if (prison && prison_ip(td->td_ucred, 0, &(((struct sockaddr_in *)(addrs->addr))->sin_addr.s_addr))) {
+					SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EADDRNOTAVAIL);
+					error = EADDRNOTAVAIL;
+				}
+			} else if (addrs->addr->sa_family == AF_INET6) {
+				sz = sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in6);
+				if (optsize < sz) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+					break;
+				}
+				/* JAIL XXXX Add else here for V6 */
+			}
+			sctp_bindx_delete_address(so, inp, addrs->addr,
+			    addrs->sget_assoc_id, vrf_id,
+			    &error);
+		}
+		break;
+	default:
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
+		error = ENOPROTOOPT;
+		break;
+	}			/* end switch (opt) */
+	return (error);
+}
+
+
+int
+sctp_ctloutput(struct socket *so, struct sockopt *sopt)
+{
+	void *optval = NULL;
+	size_t optsize = 0;
+	struct sctp_inpcb *inp;
+	void *p;
+	int error = 0;
+
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp == 0) {
+		/* I made the same as TCP since we are not setup? */
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return (ECONNRESET);
+	}
+	if (sopt->sopt_level != IPPROTO_SCTP) {
+		/* wrong proto level... send back up to IP */
+#ifdef INET6
+		if (INP_CHECK_SOCKAF(so, AF_INET6))
+			error = ip6_ctloutput(so, sopt);
+		else
+#endif				/* INET6 */
+			error = ip_ctloutput(so, sopt);
+		return (error);
+	}
+	optsize = sopt->sopt_valsize;
+	if (optsize) {
+		SCTP_MALLOC(optval, void *, optsize, SCTP_M_SOCKOPT);
+		if (optval == NULL) {
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOBUFS);
+			return (ENOBUFS);
+		}
+		error = sooptcopyin(sopt, optval, optsize, optsize);
+		if (error) {
+			SCTP_FREE(optval, SCTP_M_SOCKOPT);
+			goto out;
+		}
+	}
+	p = (void *)sopt->sopt_td;
+	if (sopt->sopt_dir == SOPT_SET) {
+		error = sctp_setopt(so, sopt->sopt_name, optval, optsize, p);
+	} else if (sopt->sopt_dir == SOPT_GET) {
+		error = sctp_getopt(so, sopt->sopt_name, optval, &optsize, p);
+	} else {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		error = EINVAL;
+	}
+	if ((error == 0) && (optval != NULL)) {
+		error = sooptcopyout(sopt, optval, optsize);
+		SCTP_FREE(optval, SCTP_M_SOCKOPT);
+	} else if (optval != NULL) {
+		SCTP_FREE(optval, SCTP_M_SOCKOPT);
+	}
+out:
+	return (error);
+}
+
+
+static int
+sctp_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
+{
+	int error = 0;
+	int create_lock_on = 0;
+	uint32_t vrf_id;
+	struct sctp_inpcb *inp;
+	struct sctp_tcb *stcb = NULL;
+
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp == 0) {
+		/* I made the same as TCP since we are not setup? */
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return (ECONNRESET);
+	}
+	if (addr == NULL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return EINVAL;
+	}
+	if ((addr->sa_family == AF_INET6) && (addr->sa_len != sizeof(struct sockaddr_in6))) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return (EINVAL);
+	}
+	if ((addr->sa_family == AF_INET) && (addr->sa_len != sizeof(struct sockaddr_in))) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return (EINVAL);
+	}
+	SCTP_ASOC_CREATE_LOCK(inp);
+	create_lock_on = 1;
+
+	SCTP_INP_INCR_REF(inp);
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) {
+		/* Should I really unlock ? */
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EFAULT);
+		error = EFAULT;
+		goto out_now;
+	}
+#ifdef INET6
+	if (((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) &&
+	    (addr->sa_family == AF_INET6)) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		error = EINVAL;
+		goto out_now;
+	}
+#endif				/* INET6 */
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) ==
+	    SCTP_PCB_FLAGS_UNBOUND) {
+		/* Bind a ephemeral port */
+		error = sctp_inpcb_bind(so, NULL, NULL, p);
+		if (error) {
+			goto out_now;
+		}
+	}
+	/* Now do we connect? */
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		error = EINVAL;
+		goto out_now;
+	}
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED)) {
+		/* We are already connected AND the TCP model */
+		SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EADDRINUSE);
+		error = EADDRINUSE;
+		goto out_now;
+	}
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) {
+		SCTP_INP_RLOCK(inp);
+		stcb = LIST_FIRST(&inp->sctp_asoc_list);
+		SCTP_INP_RUNLOCK(inp);
+	} else {
+		/*
+		 * We increment here since sctp_findassociation_ep_addr()
+		 * wil do a decrement if it finds the stcb as long as the
+		 * locked tcb (last argument) is NOT a TCB.. aka NULL.
+		 */
+		SCTP_INP_INCR_REF(inp);
+		stcb = sctp_findassociation_ep_addr(&inp, addr, NULL, NULL, NULL);
+		if (stcb == NULL) {
+			SCTP_INP_DECR_REF(inp);
+		} else {
+			SCTP_TCB_LOCK(stcb);
+		}
+	}
+	if (stcb != NULL) {
+		/* Already have or am bring up an association */
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
+		error = EALREADY;
+		goto out_now;
+	}
+	vrf_id = inp->def_vrf_id;
+	/* We are GOOD to go */
+	stcb = sctp_aloc_assoc(inp, addr, 1, &error, 0, vrf_id, p);
+	if (stcb == NULL) {
+		/* Gak! no memory */
+		goto out_now;
+	}
+	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) {
+		stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED;
+		/* Set the connected flag so we can queue data */
+		soisconnecting(so);
+	}
+	SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_COOKIE_WAIT);
+	(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
+
+	/* initialize authentication parameters for the assoc */
+	sctp_initialize_auth_params(inp, stcb);
+
+	sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
+	SCTP_TCB_UNLOCK(stcb);
+out_now:
+	if (create_lock_on) {
+		SCTP_ASOC_CREATE_UNLOCK(inp);
+	}
+	SCTP_INP_DECR_REF(inp);
+	return error;
+}
+
+int
+sctp_listen(struct socket *so, int backlog, struct thread *p)
+{
+	/*
+	 * Note this module depends on the protocol processing being called
+	 * AFTER any socket level flags and backlog are applied to the
+	 * socket. The traditional way that the socket flags are applied is
+	 * AFTER protocol processing. We have made a change to the
+	 * sys/kern/uipc_socket.c module to reverse this but this MUST be in
+	 * place if the socket API for SCTP is to work properly.
+	 */
+
+	int error = 0;
+	struct sctp_inpcb *inp;
+
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (inp == 0) {
+		/* I made the same as TCP since we are not setup? */
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return (ECONNRESET);
+	}
+	SCTP_INP_RLOCK(inp);
+#ifdef SCTP_LOCK_LOGGING
+	if (sctp_logging_level & SCTP_LOCK_LOGGING_ENABLE) {
+		sctp_log_lock(inp, (struct sctp_tcb *)NULL, SCTP_LOG_LOCK_SOCK);
+	}
+#endif
+	SOCK_LOCK(so);
+	error = solisten_proto_check(so);
+	if (error) {
+		SOCK_UNLOCK(so);
+		SCTP_INP_RUNLOCK(inp);
+		return (error);
+	}
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED)) {
+		/* We are already connected AND the TCP model */
+		SCTP_INP_RUNLOCK(inp);
+		SOCK_UNLOCK(so);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EADDRINUSE);
+		return (EADDRINUSE);
+	}
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) {
+		/* We must do a bind. */
+		SOCK_UNLOCK(so);
+		SCTP_INP_RUNLOCK(inp);
+		if ((error = sctp_inpcb_bind(so, NULL, NULL, p))) {
+			/* bind error, probably perm */
+			return (error);
+		}
+		SOCK_LOCK(so);
+	} else {
+		if (backlog != 0) {
+			inp->sctp_flags |= SCTP_PCB_FLAGS_LISTENING;
+		} else {
+			inp->sctp_flags &= ~SCTP_PCB_FLAGS_LISTENING;
+		}
+		SCTP_INP_RUNLOCK(inp);
+	}
+	/* It appears for 7.0 and on, we must always call this. */
+	solisten_proto(so, backlog);
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) {
+		/* remove the ACCEPTCONN flag for one-to-many sockets */
+		so->so_options &= ~SO_ACCEPTCONN;
+	}
+	if (backlog == 0) {
+		/* turning off listen */
+		so->so_options &= ~SO_ACCEPTCONN;
+	}
+	SOCK_UNLOCK(so);
+	return (error);
+}
+
+static int sctp_defered_wakeup_cnt = 0;
+
+int
+sctp_accept(struct socket *so, struct sockaddr **addr)
+{
+	struct sctp_tcb *stcb;
+	struct sctp_inpcb *inp;
+	union sctp_sockstore store;
+
+	int error;
+
+	inp = (struct sctp_inpcb *)so->so_pcb;
+
+	if (inp == 0) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return (ECONNRESET);
+	}
+	SCTP_INP_RLOCK(inp);
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) {
+		SCTP_INP_RUNLOCK(inp);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
+		return (EOPNOTSUPP);
+	}
+	if (so->so_state & SS_ISDISCONNECTED) {
+		SCTP_INP_RUNLOCK(inp);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ECONNABORTED);
+		return (ECONNABORTED);
+	}
+	stcb = LIST_FIRST(&inp->sctp_asoc_list);
+	if (stcb == NULL) {
+		SCTP_INP_RUNLOCK(inp);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return (ECONNRESET);
+	}
+	SCTP_TCB_LOCK(stcb);
+	SCTP_INP_RUNLOCK(inp);
+	store = stcb->asoc.primary_destination->ro._l_addr;
+	SCTP_TCB_UNLOCK(stcb);
+	if (store.sa.sa_family == AF_INET) {
+		struct sockaddr_in *sin;
+
+		SCTP_MALLOC_SONAME(sin, struct sockaddr_in *, sizeof *sin);
+		sin->sin_family = AF_INET;
+		sin->sin_len = sizeof(*sin);
+		sin->sin_port = ((struct sockaddr_in *)&store)->sin_port;
+		sin->sin_addr = ((struct sockaddr_in *)&store)->sin_addr;
+		*addr = (struct sockaddr *)sin;
+	} else {
+		struct sockaddr_in6 *sin6;
+
+		SCTP_MALLOC_SONAME(sin6, struct sockaddr_in6 *, sizeof *sin6);
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_len = sizeof(*sin6);
+		sin6->sin6_port = ((struct sockaddr_in6 *)&store)->sin6_port;
+
+		sin6->sin6_addr = ((struct sockaddr_in6 *)&store)->sin6_addr;
+		if ((error = sa6_recoverscope(sin6)) != 0) {
+			SCTP_FREE_SONAME(sin6);
+			return (error);
+		}
+		*addr = (struct sockaddr *)sin6;
+	}
+	/* Wake any delayed sleep action */
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_DONT_WAKE) {
+		SCTP_INP_WLOCK(inp);
+		inp->sctp_flags &= ~SCTP_PCB_FLAGS_DONT_WAKE;
+		if (inp->sctp_flags & SCTP_PCB_FLAGS_WAKEOUTPUT) {
+			inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEOUTPUT;
+			SCTP_INP_WUNLOCK(inp);
+			SOCKBUF_LOCK(&inp->sctp_socket->so_snd);
+			if (sowriteable(inp->sctp_socket)) {
+				sowwakeup_locked(inp->sctp_socket);
+			} else {
+				SOCKBUF_UNLOCK(&inp->sctp_socket->so_snd);
+			}
+			SCTP_INP_WLOCK(inp);
+		}
+		if (inp->sctp_flags & SCTP_PCB_FLAGS_WAKEINPUT) {
+			inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEINPUT;
+			SCTP_INP_WUNLOCK(inp);
+			SOCKBUF_LOCK(&inp->sctp_socket->so_rcv);
+			if (soreadable(inp->sctp_socket)) {
+				sctp_defered_wakeup_cnt++;
+				sorwakeup_locked(inp->sctp_socket);
+			} else {
+				SOCKBUF_UNLOCK(&inp->sctp_socket->so_rcv);
+			}
+			SCTP_INP_WLOCK(inp);
+		}
+		SCTP_INP_WUNLOCK(inp);
+	}
+	return (0);
+}
+
+int
+sctp_ingetaddr(struct socket *so, struct sockaddr **addr)
+{
+	struct sockaddr_in *sin;
+	uint32_t vrf_id;
+	struct sctp_inpcb *inp;
+	struct sctp_ifa *sctp_ifa;
+
+	/*
+	 * Do the malloc first in case it blocks.
+	 */
+	SCTP_MALLOC_SONAME(sin, struct sockaddr_in *, sizeof *sin);
+	sin->sin_family = AF_INET;
+	sin->sin_len = sizeof(*sin);
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (!inp) {
+		SCTP_FREE_SONAME(sin);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return ECONNRESET;
+	}
+	SCTP_INP_RLOCK(inp);
+	sin->sin_port = inp->sctp_lport;
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) {
+			struct sctp_tcb *stcb;
+			struct sockaddr_in *sin_a;
+			struct sctp_nets *net;
+			int fnd;
+
+			stcb = LIST_FIRST(&inp->sctp_asoc_list);
+			if (stcb == NULL) {
+				goto notConn;
+			}
+			fnd = 0;
+			sin_a = NULL;
+			SCTP_TCB_LOCK(stcb);
+			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+				sin_a = (struct sockaddr_in *)&net->ro._l_addr;
+				if (sin_a == NULL)
+					/* this will make coverity happy */
+					continue;
+
+				if (sin_a->sin_family == AF_INET) {
+					fnd = 1;
+					break;
+				}
+			}
+			if ((!fnd) || (sin_a == NULL)) {
+				/* punt */
+				SCTP_TCB_UNLOCK(stcb);
+				goto notConn;
+			}
+			vrf_id = inp->def_vrf_id;
+			sctp_ifa = sctp_source_address_selection(inp,
+			    stcb,
+			    (sctp_route_t *) & net->ro,
+			    net, 0, vrf_id);
+			if (sctp_ifa) {
+				sin->sin_addr = sctp_ifa->address.sin.sin_addr;
+				sctp_free_ifa(sctp_ifa);
+			}
+			SCTP_TCB_UNLOCK(stcb);
+		} else {
+			/* For the bound all case you get back 0 */
+	notConn:
+			sin->sin_addr.s_addr = 0;
+		}
+
+	} else {
+		/* Take the first IPv4 address in the list */
+		struct sctp_laddr *laddr;
+		int fnd = 0;
+
+		LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+			if (laddr->ifa->address.sa.sa_family == AF_INET) {
+				struct sockaddr_in *sin_a;
+
+				sin_a = (struct sockaddr_in *)&laddr->ifa->address.sa;
+				sin->sin_addr = sin_a->sin_addr;
+				fnd = 1;
+				break;
+			}
+		}
+		if (!fnd) {
+			SCTP_FREE_SONAME(sin);
+			SCTP_INP_RUNLOCK(inp);
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
+			return ENOENT;
+		}
+	}
+	SCTP_INP_RUNLOCK(inp);
+	(*addr) = (struct sockaddr *)sin;
+	return (0);
+}
+
+int
+sctp_peeraddr(struct socket *so, struct sockaddr **addr)
+{
+	struct sockaddr_in *sin = (struct sockaddr_in *)*addr;
+	int fnd;
+	struct sockaddr_in *sin_a;
+	struct sctp_inpcb *inp;
+	struct sctp_tcb *stcb;
+	struct sctp_nets *net;
+
+	/* Do the malloc first in case it blocks. */
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if ((inp == NULL) ||
+	    ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) == 0)) {
+		/* UDP type and listeners will drop out here */
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN);
+		return (ENOTCONN);
+	}
+	SCTP_MALLOC_SONAME(sin, struct sockaddr_in *, sizeof *sin);
+	sin->sin_family = AF_INET;
+	sin->sin_len = sizeof(*sin);
+
+	/* We must recapture incase we blocked */
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	if (!inp) {
+		SCTP_FREE_SONAME(sin);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return ECONNRESET;
+	}
+	SCTP_INP_RLOCK(inp);
+	stcb = LIST_FIRST(&inp->sctp_asoc_list);
+	if (stcb) {
+		SCTP_TCB_LOCK(stcb);
+	}
+	SCTP_INP_RUNLOCK(inp);
+	if (stcb == NULL) {
+		SCTP_FREE_SONAME(sin);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+		return ECONNRESET;
+	}
+	fnd = 0;
+	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+		sin_a = (struct sockaddr_in *)&net->ro._l_addr;
+		if (sin_a->sin_family == AF_INET) {
+			fnd = 1;
+			sin->sin_port = stcb->rport;
+			sin->sin_addr = sin_a->sin_addr;
+			break;
+		}
+	}
+	SCTP_TCB_UNLOCK(stcb);
+	if (!fnd) {
+		/* No IPv4 address */
+		SCTP_FREE_SONAME(sin);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
+		return ENOENT;
+	}
+	(*addr) = (struct sockaddr *)sin;
+	return (0);
+}
+
+struct pr_usrreqs sctp_usrreqs = {
+	.pru_abort = sctp_abort,
+	.pru_accept = sctp_accept,
+	.pru_attach = sctp_attach,
+	.pru_bind = sctp_bind,
+	.pru_connect = sctp_connect,
+	.pru_control = in_control,
+	.pru_close = sctp_close,
+	.pru_detach = sctp_close,
+	.pru_sopoll = sopoll_generic,
+	.pru_disconnect = sctp_disconnect,
+	.pru_listen = sctp_listen,
+	.pru_peeraddr = sctp_peeraddr,
+	.pru_send = sctp_sendm,
+	.pru_shutdown = sctp_shutdown,
+	.pru_sockaddr = sctp_ingetaddr,
+	.pru_sosend = sctp_sosend,
+	.pru_soreceive = sctp_soreceive
+};
Index: icmp6.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/icmp6.h,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/icmp6.h -L sys/netinet/icmp6.h -u -r1.1.1.2 -r1.2
--- sys/netinet/icmp6.h
+++ sys/netinet/icmp6.h
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet/icmp6.h,v 1.16.2.4 2005/12/25 14:03:37 suz Exp $	*/
+/*	$FreeBSD: src/sys/netinet/icmp6.h,v 1.21 2007/05/17 21:20:23 jinmei Exp $	*/
 /*	$KAME: icmp6.h,v 1.46 2001/04/27 15:09:48 itojun Exp $	*/
 
 /*-
@@ -709,6 +709,11 @@
 
 extern int	icmp6_rediraccept;	/* accept/process redirects */
 extern int	icmp6_redirtimeout;	/* cache time for redirect routes */
+
+#define ICMP6_NODEINFO_FQDNOK		0x1
+#define ICMP6_NODEINFO_NODEADDROK	0x2
+#define ICMP6_NODEINFO_TMPADDROK	0x4
+#define ICMP6_NODEINFO_GLOBALOK		0x8
 #endif /* _KERNEL */
 
 #endif /* not _NETINET_ICMP6_H_ */
Index: ip_input.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet/ip_input.c -L sys/netinet/ip_input.c -u -r1.2 -r1.3
--- sys/netinet/ip_input.c
+++ sys/netinet/ip_input.c
@@ -27,9 +27,11 @@
  * SUCH DAMAGE.
  *
  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
- * $FreeBSD: src/sys/netinet/ip_input.c,v 1.301.2.7 2006/03/04 09:15:08 oleg Exp $
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_input.c,v 1.332.2.1 2007/12/19 08:10:30 guido Exp $");
+
 #include "opt_bootp.h"
 #include "opt_ipfw.h"
 #include "opt_ipstealth.h"
@@ -40,7 +42,6 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/callout.h>
-#include <sys/mac.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/domain.h>
@@ -66,10 +67,14 @@
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_icmp.h>
+#include <netinet/ip_options.h>
 #include <machine/in_cksum.h>
 #ifdef DEV_CARP
 #include <netinet/ip_carp.h>
 #endif
+#ifdef IPSEC
+#include <netinet/ip_ipsec.h>
+#endif /* IPSEC */
 
 #include <sys/socketvar.h>
 
@@ -77,15 +82,7 @@
 #include <netinet/ip_fw.h>
 #include <netinet/ip_dummynet.h>
 
-#ifdef IPSEC
-#include <netinet6/ipsec.h>
-#include <netkey/key.h>
-#endif
-
-#ifdef FAST_IPSEC
-#include <netipsec/ipsec.h>
-#include <netipsec/key.h>
-#endif
+#include <security/mac/mac_framework.h>
 
 int rsvp_on = 0;
 
@@ -101,33 +98,20 @@
 SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
     &ip_defttl, 0, "Maximum TTL on IP packets");
 
-static int	ip_dosourceroute = 0;
-SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
-    &ip_dosourceroute, 0, "Enable forwarding source routed IP packets");
-
-static int	ip_acceptsourceroute = 0;
-SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute, 
-    CTLFLAG_RW, &ip_acceptsourceroute, 0, 
-    "Enable accepting source routed IP packets");
-
-int		ip_doopts = 1;	/* 0 = ignore, 1 = process, 2 = reject */
-SYSCTL_INT(_net_inet_ip, OID_AUTO, process_options, CTLFLAG_RW,
-    &ip_doopts, 0, "Enable IP options processing ([LS]SRR, RR, TS)");
-
 static int	ip_keepfaith = 0;
 SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW,
-	&ip_keepfaith,	0,
-	"Enable packet capture for FAITH IPv4->IPv6 translater daemon");
+    &ip_keepfaith,	0,
+    "Enable packet capture for FAITH IPv4->IPv6 translater daemon");
 
 static int	ip_sendsourcequench = 0;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, sendsourcequench, CTLFLAG_RW,
-	&ip_sendsourcequench, 0,
-	"Enable the transmission of source quench packets");
+    &ip_sendsourcequench, 0,
+    "Enable the transmission of source quench packets");
 
 int	ip_do_randomid = 0;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_RW,
-	&ip_do_randomid, 0,
-	"Assign random ip_id values");
+    &ip_do_randomid, 0,
+    "Assign random ip_id values");
 
 /*
  * XXX - Setting ip_checkinterface mostly implements the receive side of
@@ -146,10 +130,6 @@
 SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW,
     &ip_checkinterface, 0, "Verify packet arrives on correct interface");
 
-#ifdef DIAGNOSTIC
-static int	ipprintfs = 0;
-#endif
-
 struct pfil_head inet_pfil_hook;	/* Packet filter hooks */
 
 static struct	ifqueue ipintrq;
@@ -165,7 +145,8 @@
 SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW,
     &ipintrq.ifq_maxlen, 0, "Maximum size of the IP input queue");
 SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD,
-    &ipintrq.ifq_drops, 0, "Number of packets dropped from the IP input queue");
+    &ipintrq.ifq_drops, 0,
+    "Number of packets dropped from the IP input queue");
 
 struct ipstat ipstat;
 SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW,
@@ -190,16 +171,17 @@
 #define	IPQ_LOCK_ASSERT()	mtx_assert(&ipqlock, MA_OWNED)
 
 static void	maxnipq_update(void);
+static void	ipq_zone_change(void *);
 
 static int	maxnipq;	/* Administrative limit on # reass queues. */
 static int	nipq = 0;	/* Total # of reass queues */
-SYSCTL_INT(_net_inet_ip, OID_AUTO, fragpackets, CTLFLAG_RD, &nipq, 0,
-	"Current number of IPv4 fragment reassembly queue entries");
+SYSCTL_INT(_net_inet_ip, OID_AUTO, fragpackets, CTLFLAG_RD,
+    &nipq, 0, "Current number of IPv4 fragment reassembly queue entries");
 
 static int	maxfragsperpacket;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW,
-	&maxfragsperpacket, 0,
-	"Maximum number of IPv4 fragments allowed per packet");
+    &maxfragsperpacket, 0,
+    "Maximum number of IPv4 fragments allowed per packet");
 
 struct callout	ipport_tick_callout;
 
@@ -211,7 +193,7 @@
 #ifdef IPSTEALTH
 int	ipstealth = 0;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
-    &ipstealth, 0, "");
+    &ipstealth, 0, "IP stealth mode, no TTL decrementation on forwarding");
 #endif
 
 /*
@@ -220,28 +202,8 @@
  */
 ip_fw_chk_t *ip_fw_chk_ptr = NULL;
 ip_dn_io_t *ip_dn_io_ptr = NULL;
-int fw_enable = 1;
 int fw_one_pass = 1;
 
-/*
- * XXX this is ugly.  IP options source routing magic.
- */
-struct ipoptrt {
-	struct	in_addr dst;			/* final destination */
-	char	nop;				/* one NOP to align */
-	char	srcopt[IPOPT_OFFSET + 1];	/* OPTVAL, OLEN and OFFSET */
-	struct	in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
-};
-
-struct ipopt_tag {
-	struct	m_tag tag;
-	int	ip_nhops;
-	struct	ipoptrt ip_srcrt;
-};
-
-static void	save_rte(struct mbuf *, u_char *, struct in_addr);
-static int	ip_dooptions(struct mbuf *m, int);
-static void	ip_forward(struct mbuf *m, int srcrt);
 static void	ip_freef(struct ipqhead *, struct ipq *);
 
 /*
@@ -249,10 +211,10 @@
  * All protocols not implemented in kernel go to raw IP protocol handler.
  */
 void
-ip_init()
+ip_init(void)
 {
-	register struct protosw *pr;
-	register int i;
+	struct protosw *pr;
+	int i;
 
 	TAILQ_INIT(&in_ifaddrhead);
 	in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &in_ifaddrhmask);
@@ -298,6 +260,8 @@
 	ipport_tick(NULL);
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL,
 		SHUTDOWN_PRI_DEFAULT);
+	EVENTHANDLER_REGISTER(nmbclusters_change, ipq_zone_change,
+		NULL, EVENTHANDLER_PRI_ANY);
 
 	/* Initialize various other remaining things. */
 	ip_id = time_second & 0xffff;
@@ -306,9 +270,10 @@
 	netisr_register(NETISR_IP, ip_input, &ipintrq, NETISR_MPSAFE);
 }
 
-void ip_fini(xtp)
-	void *xtp;
+void
+ip_fini(void *xtp)
 {
+
 	callout_stop(&ipport_tick_callout);
 }
 
@@ -326,15 +291,9 @@
 	u_short sum;
 	int dchg = 0;				/* dest changed after fw */
 	struct in_addr odst;			/* original dst address */
-#ifdef FAST_IPSEC
-	struct m_tag *mtag;
-	struct tdb_ident *tdbi;
-	struct secpolicy *sp;
-	int s, error;
-#endif /* FAST_IPSEC */
 
-  	M_ASSERTPKTHDR(m);
-  	
+	M_ASSERTPKTHDR(m);
+
 	if (m->m_flags & M_FASTFWD_OURS) {
 		/*
 		 * Firewall or NAT changed destination to local.
@@ -344,8 +303,8 @@
 		/* Set up some basics that will be used later. */
 		ip = mtod(m, struct ip *);
 		hlen = ip->ip_hl << 2;
-  		goto ours;
-  	}
+		goto ours;
+	}
 
 	ipstat.ips_total++;
 
@@ -434,20 +393,13 @@
 		} else
 			m_adj(m, ip->ip_len - m->m_pkthdr.len);
 	}
-#if defined(IPSEC) && !defined(IPSEC_FILTERGIF)
-	/*
-	 * Bypass packet filtering for packets from a tunnel (gif).
-	 */
-	if (ipsec_getnhist(m))
-		goto passin;
-#endif
-#if defined(FAST_IPSEC) && !defined(IPSEC_FILTERGIF)
+#ifdef IPSEC
 	/*
 	 * Bypass packet filtering for packets from a tunnel (gif).
 	 */
-	if (m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL)
+	if (ip_ipsec_filtertunnel(m))
 		goto passin;
-#endif
+#endif /* IPSEC */
 
 	/*
 	 * Run through list of hooks for input packets.
@@ -458,7 +410,7 @@
 	 */
 
 	/* Jump over all PFIL processing if hooks are not active. */
-	if (inet_pfil_hook.ph_busy_count == -1)
+	if (!PFIL_HOOKED(&inet_pfil_hook))
 		goto passin;
 
 	odst = ip->ip_dst;
@@ -476,9 +428,6 @@
 		m->m_flags &= ~M_FASTFWD_OURS;
 		goto ours;
 	}
-#ifndef IPFIREWALL_FORWARD_EXTENDED
-	dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL);
-#else
 	if ((dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL)) != 0) {
 		/*
 		 * Directly ship on the packet.  This allows to forward packets
@@ -488,7 +437,6 @@
 		ip_forward(m, dchg);
 		return;
 	}
-#endif /* IPFIREWALL_FORWARD_EXTENDED */
 #endif /* IPFIREWALL_FORWARD */
 
 passin:
@@ -584,6 +532,12 @@
 #endif
 		}
 	}
+	/* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */
+	if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
+		ipstat.ips_cantforward++;
+		m_freem(m);
+		return;
+	}
 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
 		struct in_multi *inm;
 		if (ip_mrouter) {
@@ -650,42 +604,9 @@
 		m_freem(m);
 	} else {
 #ifdef IPSEC
-		/*
-		 * Enforce inbound IPsec SPD.
-		 */
-		if (ipsec4_in_reject(m, NULL)) {
-			ipsecstat.in_polvio++;
+		if (ip_ipsec_fwd(m))
 			goto bad;
-		}
 #endif /* IPSEC */
-#ifdef FAST_IPSEC
-		mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
-		s = splnet();
-		if (mtag != NULL) {
-			tdbi = (struct tdb_ident *)(mtag + 1);
-			sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
-		} else {
-			sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
-						   IP_FORWARDING, &error);   
-		}
-		if (sp == NULL) {	/* NB: can happen if error */
-			splx(s);
-			/*XXX error stat???*/
-			DPRINTF(("ip_input: no SP for forwarding\n"));	/*XXX*/
-			goto bad;
-		}
-
-		/*
-		 * Check security policy against packet attributes.
-		 */
-		error = ipsec_in_reject(sp, m);
-		KEY_FREESP(&sp);
-		splx(s);
-		if (error) {
-			ipstat.ips_cantforward++;
-			goto bad;
-		}
-#endif /* FAST_IPSEC */
 		ip_forward(m, dchg);
 	}
 	return;
@@ -732,51 +653,9 @@
 	 * note that we do not visit this with protocols with pcb layer
 	 * code - like udp/tcp/raw ip.
 	 */
-	if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0 &&
-	    ipsec4_in_reject(m, NULL)) {
-		ipsecstat.in_polvio++;
+	if (ip_ipsec_input(m))
 		goto bad;
-	}
-#endif
-#if FAST_IPSEC
-	/*
-	 * enforce IPsec policy checking if we are seeing last header.
-	 * note that we do not visit this with protocols with pcb layer
-	 * code - like udp/tcp/raw ip.
-	 */
-	if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) {
-		/*
-		 * Check if the packet has already had IPsec processing
-		 * done.  If so, then just pass it along.  This tag gets
-		 * set during AH, ESP, etc. input handling, before the
-		 * packet is returned to the ip input queue for delivery.
-		 */ 
-		mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
-		s = splnet();
-		if (mtag != NULL) {
-			tdbi = (struct tdb_ident *)(mtag + 1);
-			sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
-		} else {
-			sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
-						   IP_FORWARDING, &error);   
-		}
-		if (sp != NULL) {
-			/*
-			 * Check security policy against packet attributes.
-			 */
-			error = ipsec_in_reject(sp, m);
-			KEY_FREESP(&sp);
-		} else {
-			/* XXX error stat??? */
-			error = EINVAL;
-DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/
-			goto bad;
-		}
-		splx(s);
-		if (error)
-			goto bad;
-	}
-#endif /* FAST_IPSEC */
+#endif /* IPSEC */
 
 	/*
 	 * Switch out to protocol's input routine.
@@ -817,6 +696,16 @@
 		uma_zone_set_max(ipq_zone, 1);
 }
 
+static void
+ipq_zone_change(void *tag)
+{
+
+	if (maxnipq > 0 && maxnipq < (nmbclusters / 32)) {
+		maxnipq = nmbclusters / 32;
+		maxnipq_update();
+	}
+}
+
 static int
 sysctl_maxnipq(SYSCTL_HANDLER_ARGS)
 {
@@ -852,7 +741,6 @@
  * to the first packet/fragment are preserved.
  * The IP header is *NOT* adjusted out of iplen.
  */
-
 struct mbuf *
 ip_reass(struct mbuf *m)
 {
@@ -966,6 +854,7 @@
 #ifdef MAC
 		if (mac_init_ipq(fp, M_NOWAIT) != 0) {
 			uma_zfree(ipq_zone, fp);
+			fp = NULL;
 			goto dropfrag;
 		}
 		mac_create_ipq(m, fp);
@@ -1167,11 +1056,9 @@
  * associated datagrams.
  */
 static void
-ip_freef(fhp, fp)
-	struct ipqhead *fhp;
-	struct ipq *fp;
+ip_freef(struct ipqhead *fhp, struct ipq *fp)
 {
-	register struct mbuf *q;
+	struct mbuf *q;
 
 	IPQ_LOCK_ASSERT();
 
@@ -1191,9 +1078,9 @@
  * queue, discard it.
  */
 void
-ip_slowtimo()
+ip_slowtimo(void)
 {
-	register struct ipq *fp;
+	struct ipq *fp;
 	int i;
 
 	IPQ_LOCK();
@@ -1230,7 +1117,7 @@
  * Drain off all datagram fragments.
  */
 void
-ip_drain()
+ip_drain(void)
 {
 	int     i;
 
@@ -1306,296 +1193,12 @@
 	return (0);
 }
 
-
-/*
- * Do option processing on a datagram,
- * possibly discarding it if bad options are encountered,
- * or forwarding it if source-routed.
- * The pass argument is used when operating in the IPSTEALTH
- * mode to tell what options to process:
- * [LS]SRR (pass 0) or the others (pass 1).
- * The reason for as many as two passes is that when doing IPSTEALTH,
- * non-routing options should be processed only if the packet is for us.
- * Returns 1 if packet has been forwarded/freed,
- * 0 if the packet should be processed further.
- */
-static int
-ip_dooptions(struct mbuf *m, int pass)
-{
-	struct ip *ip = mtod(m, struct ip *);
-	u_char *cp;
-	struct in_ifaddr *ia;
-	int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
-	struct in_addr *sin, dst;
-	n_time ntime;
-	struct	sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
-
-	/* ignore or reject packets with IP options */
-	if (ip_doopts == 0)
-		return 0;
-	else if (ip_doopts == 2) {
-		type = ICMP_UNREACH;
-		code = ICMP_UNREACH_FILTER_PROHIB;
-		goto bad;
-	}
-
-	dst = ip->ip_dst;
-	cp = (u_char *)(ip + 1);
-	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
-	for (; cnt > 0; cnt -= optlen, cp += optlen) {
-		opt = cp[IPOPT_OPTVAL];
-		if (opt == IPOPT_EOL)
-			break;
-		if (opt == IPOPT_NOP)
-			optlen = 1;
-		else {
-			if (cnt < IPOPT_OLEN + sizeof(*cp)) {
-				code = &cp[IPOPT_OLEN] - (u_char *)ip;
-				goto bad;
-			}
-			optlen = cp[IPOPT_OLEN];
-			if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
-				code = &cp[IPOPT_OLEN] - (u_char *)ip;
-				goto bad;
-			}
-		}
-		switch (opt) {
-
-		default:
-			break;
-
-		/*
-		 * Source routing with record.
-		 * Find interface with current destination address.
-		 * If none on this machine then drop if strictly routed,
-		 * or do nothing if loosely routed.
-		 * Record interface address and bring up next address
-		 * component.  If strictly routed make sure next
-		 * address is on directly accessible net.
-		 */
-		case IPOPT_LSRR:
-		case IPOPT_SSRR:
-#ifdef IPSTEALTH
-			if (ipstealth && pass > 0)
-				break;
-#endif
-			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
-				code = &cp[IPOPT_OLEN] - (u_char *)ip;
-				goto bad;
-			}
-			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
-				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
-				goto bad;
-			}
-			ipaddr.sin_addr = ip->ip_dst;
-			ia = (struct in_ifaddr *)
-				ifa_ifwithaddr((struct sockaddr *)&ipaddr);
-			if (ia == NULL) {
-				if (opt == IPOPT_SSRR) {
-					type = ICMP_UNREACH;
-					code = ICMP_UNREACH_SRCFAIL;
-					goto bad;
-				}
-				if (!ip_dosourceroute)
-					goto nosourcerouting;
-				/*
-				 * Loose routing, and not at next destination
-				 * yet; nothing to do except forward.
-				 */
-				break;
-			}
-			off--;			/* 0 origin */
-			if (off > optlen - (int)sizeof(struct in_addr)) {
-				/*
-				 * End of source route.  Should be for us.
-				 */
-				if (!ip_acceptsourceroute)
-					goto nosourcerouting;
-				save_rte(m, cp, ip->ip_src);
-				break;
-			}
-#ifdef IPSTEALTH
-			if (ipstealth)
-				goto dropit;
-#endif
-			if (!ip_dosourceroute) {
-				if (ipforwarding) {
-					char buf[16]; /* aaa.bbb.ccc.ddd\0 */
-					/*
-					 * Acting as a router, so generate ICMP
-					 */
-nosourcerouting:
-					strcpy(buf, inet_ntoa(ip->ip_dst));
-					log(LOG_WARNING, 
-					    "attempted source route from %s to %s\n",
-					    inet_ntoa(ip->ip_src), buf);
-					type = ICMP_UNREACH;
-					code = ICMP_UNREACH_SRCFAIL;
-					goto bad;
-				} else {
-					/*
-					 * Not acting as a router, so silently drop.
-					 */
-#ifdef IPSTEALTH
-dropit:
-#endif
-					ipstat.ips_cantforward++;
-					m_freem(m);
-					return (1);
-				}
-			}
-
-			/*
-			 * locate outgoing interface
-			 */
-			(void)memcpy(&ipaddr.sin_addr, cp + off,
-			    sizeof(ipaddr.sin_addr));
-
-			if (opt == IPOPT_SSRR) {
-#define	INA	struct in_ifaddr *
-#define	SA	struct sockaddr *
-			    if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == NULL)
-				ia = (INA)ifa_ifwithnet((SA)&ipaddr);
-			} else
-				ia = ip_rtaddr(ipaddr.sin_addr);
-			if (ia == NULL) {
-				type = ICMP_UNREACH;
-				code = ICMP_UNREACH_SRCFAIL;
-				goto bad;
-			}
-			ip->ip_dst = ipaddr.sin_addr;
-			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
-			    sizeof(struct in_addr));
-			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
-			/*
-			 * Let ip_intr's mcast routing check handle mcast pkts
-			 */
-			forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
-			break;
-
-		case IPOPT_RR:
-#ifdef IPSTEALTH
-			if (ipstealth && pass == 0)
-				break;
-#endif
-			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
-				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
-				goto bad;
-			}
-			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
-				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
-				goto bad;
-			}
-			/*
-			 * If no space remains, ignore.
-			 */
-			off--;			/* 0 origin */
-			if (off > optlen - (int)sizeof(struct in_addr))
-				break;
-			(void)memcpy(&ipaddr.sin_addr, &ip->ip_dst,
-			    sizeof(ipaddr.sin_addr));
-			/*
-			 * locate outgoing interface; if we're the destination,
-			 * use the incoming interface (should be same).
-			 */
-			if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == NULL &&
-			    (ia = ip_rtaddr(ipaddr.sin_addr)) == NULL) {
-				type = ICMP_UNREACH;
-				code = ICMP_UNREACH_HOST;
-				goto bad;
-			}
-			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
-			    sizeof(struct in_addr));
-			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
-			break;
-
-		case IPOPT_TS:
-#ifdef IPSTEALTH
-			if (ipstealth && pass == 0)
-				break;
-#endif
-			code = cp - (u_char *)ip;
-			if (optlen < 4 || optlen > 40) {
-				code = &cp[IPOPT_OLEN] - (u_char *)ip;
-				goto bad;
-			}
-			if ((off = cp[IPOPT_OFFSET]) < 5) {
-				code = &cp[IPOPT_OLEN] - (u_char *)ip;
-				goto bad;
-			}
-			if (off > optlen - (int)sizeof(int32_t)) {
-				cp[IPOPT_OFFSET + 1] += (1 << 4);
-				if ((cp[IPOPT_OFFSET + 1] & 0xf0) == 0) {
-					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
-					goto bad;
-				}
-				break;
-			}
-			off--;				/* 0 origin */
-			sin = (struct in_addr *)(cp + off);
-			switch (cp[IPOPT_OFFSET + 1] & 0x0f) {
-
-			case IPOPT_TS_TSONLY:
-				break;
-
-			case IPOPT_TS_TSANDADDR:
-				if (off + sizeof(n_time) +
-				    sizeof(struct in_addr) > optlen) {
-					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
-					goto bad;
-				}
-				ipaddr.sin_addr = dst;
-				ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
-							    m->m_pkthdr.rcvif);
-				if (ia == NULL)
-					continue;
-				(void)memcpy(sin, &IA_SIN(ia)->sin_addr,
-				    sizeof(struct in_addr));
-				cp[IPOPT_OFFSET] += sizeof(struct in_addr);
-				off += sizeof(struct in_addr);
-				break;
-
-			case IPOPT_TS_PRESPEC:
-				if (off + sizeof(n_time) +
-				    sizeof(struct in_addr) > optlen) {
-					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
-					goto bad;
-				}
-				(void)memcpy(&ipaddr.sin_addr, sin,
-				    sizeof(struct in_addr));
-				if (ifa_ifwithaddr((SA)&ipaddr) == NULL)
-					continue;
-				cp[IPOPT_OFFSET] += sizeof(struct in_addr);
-				off += sizeof(struct in_addr);
-				break;
-
-			default:
-				code = &cp[IPOPT_OFFSET + 1] - (u_char *)ip;
-				goto bad;
-			}
-			ntime = iptime();
-			(void)memcpy(cp + off, &ntime, sizeof(n_time));
-			cp[IPOPT_OFFSET] += sizeof(n_time);
-		}
-	}
-	if (forward && ipforwarding) {
-		ip_forward(m, 1);
-		return (1);
-	}
-	return (0);
-bad:
-	icmp_error(m, type, code, 0, 0);
-	ipstat.ips_badoptions++;
-	return (1);
-}
-
 /*
  * Given address of next destination (final or next hop),
  * return internet address info of interface to be used to get there.
  */
 struct in_ifaddr *
-ip_rtaddr(dst)
-	struct in_addr dst;
+ip_rtaddr(struct in_addr dst)
 {
 	struct route sro;
 	struct sockaddr_in *sin;
@@ -1616,143 +1219,6 @@
 	return (ifa);
 }
 
-/*
- * Save incoming source route for use in replies,
- * to be picked up later by ip_srcroute if the receiver is interested.
- */
-static void
-save_rte(m, option, dst)
-	struct mbuf *m;
-	u_char *option;
-	struct in_addr dst;
-{
-	unsigned olen;
-	struct ipopt_tag *opts;
-
-	opts = (struct ipopt_tag *)m_tag_get(PACKET_TAG_IPOPTIONS,
-					sizeof(struct ipopt_tag), M_NOWAIT);
-	if (opts == NULL)
-		return;
-
-	olen = option[IPOPT_OLEN];
-#ifdef DIAGNOSTIC
-	if (ipprintfs)
-		printf("save_rte: olen %d\n", olen);
-#endif
-	if (olen > sizeof(opts->ip_srcrt) - (1 + sizeof(dst))) {
-		m_tag_free((struct m_tag *)opts);
-		return;
-	}
-	bcopy(option, opts->ip_srcrt.srcopt, olen);
-	opts->ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
-	opts->ip_srcrt.dst = dst;
-	m_tag_prepend(m, (struct m_tag *)opts);
-}
-
-/*
- * Retrieve incoming source route for use in replies,
- * in the same form used by setsockopt.
- * The first hop is placed before the options, will be removed later.
- */
-struct mbuf *
-ip_srcroute(m0)
-	struct mbuf *m0;
-{
-	register struct in_addr *p, *q;
-	register struct mbuf *m;
-	struct ipopt_tag *opts;
-
-	opts = (struct ipopt_tag *)m_tag_find(m0, PACKET_TAG_IPOPTIONS, NULL);
-	if (opts == NULL)
-		return (NULL);
-
-	if (opts->ip_nhops == 0)
-		return (NULL);
-	m = m_get(M_DONTWAIT, MT_DATA);
-	if (m == NULL)
-		return (NULL);
-
-#define OPTSIZ	(sizeof(opts->ip_srcrt.nop) + sizeof(opts->ip_srcrt.srcopt))
-
-	/* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
-	m->m_len = opts->ip_nhops * sizeof(struct in_addr) +
-	    sizeof(struct in_addr) + OPTSIZ;
-#ifdef DIAGNOSTIC
-	if (ipprintfs)
-		printf("ip_srcroute: nhops %d mlen %d", opts->ip_nhops, m->m_len);
-#endif
-
-	/*
-	 * First save first hop for return route
-	 */
-	p = &(opts->ip_srcrt.route[opts->ip_nhops - 1]);
-	*(mtod(m, struct in_addr *)) = *p--;
-#ifdef DIAGNOSTIC
-	if (ipprintfs)
-		printf(" hops %lx", (u_long)ntohl(mtod(m, struct in_addr *)->s_addr));
-#endif
-
-	/*
-	 * Copy option fields and padding (nop) to mbuf.
-	 */
-	opts->ip_srcrt.nop = IPOPT_NOP;
-	opts->ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
-	(void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
-	    &(opts->ip_srcrt.nop), OPTSIZ);
-	q = (struct in_addr *)(mtod(m, caddr_t) +
-	    sizeof(struct in_addr) + OPTSIZ);
-#undef OPTSIZ
-	/*
-	 * Record return path as an IP source route,
-	 * reversing the path (pointers are now aligned).
-	 */
-	while (p >= opts->ip_srcrt.route) {
-#ifdef DIAGNOSTIC
-		if (ipprintfs)
-			printf(" %lx", (u_long)ntohl(q->s_addr));
-#endif
-		*q++ = *p--;
-	}
-	/*
-	 * Last hop goes to final destination.
-	 */
-	*q = opts->ip_srcrt.dst;
-#ifdef DIAGNOSTIC
-	if (ipprintfs)
-		printf(" %lx\n", (u_long)ntohl(q->s_addr));
-#endif
-	m_tag_delete(m0, (struct m_tag *)opts);
-	return (m);
-}
-
-/*
- * Strip out IP options, at higher
- * level protocol in the kernel.
- * Second argument is buffer to which options
- * will be moved, and return value is their length.
- * XXX should be deleted; last arg currently ignored.
- */
-void
-ip_stripoptions(m, mopt)
-	register struct mbuf *m;
-	struct mbuf *mopt;
-{
-	register int i;
-	struct ip *ip = mtod(m, struct ip *);
-	register caddr_t opts;
-	int olen;
-
-	olen = (ip->ip_hl << 2) - sizeof (struct ip);
-	opts = (caddr_t)(ip + 1);
-	i = m->m_len - (sizeof (struct ip) + olen);
-	bcopy(opts + olen, opts, (unsigned)i);
-	m->m_len -= olen;
-	if (m->m_flags & M_PKTHDR)
-		m->m_pkthdr.len -= olen;
-	ip->ip_v = IPVERSION;
-	ip->ip_hl = sizeof(struct ip) >> 2;
-}
-
 u_char inetctlerrmap[PRC_NCMDS] = {
 	0,		0,		0,		0,
 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
@@ -1785,14 +1251,6 @@
 	struct in_addr dest;
 	int error, type = 0, code = 0, mtu = 0;
 
-#ifdef DIAGNOSTIC
-	if (ipprintfs)
-		printf("forward: src %lx dst %lx ttl %x\n",
-		    (u_long)ip->ip_src.s_addr, (u_long)ip->ip_dst.s_addr,
-		    ip->ip_ttl);
-#endif
-
-
 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
 		ipstat.ips_cantforward++;
 		m_freem(m);
@@ -1810,7 +1268,8 @@
 	}
 #endif
 
-	if (!srcrt && (ia = ip_rtaddr(ip->ip_dst)) == NULL) {
+	ia = ip_rtaddr(ip->ip_dst);
+	if (!srcrt && ia == NULL) {
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
 		return;
 	}
@@ -1831,7 +1290,7 @@
 	 * assume exclusive access to the IP header in `m', so any
 	 * data in a cluster may change before we reach icmp_error().
 	 */
-	MGET(mcopy, M_DONTWAIT, m->m_type);
+	MGETHDR(mcopy, M_DONTWAIT, m->m_type);
 	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_DONTWAIT)) {
 		/*
 		 * It's probably ok if the pkthdr dup fails (because
@@ -1843,8 +1302,7 @@
 		mcopy = NULL;
 	}
 	if (mcopy != NULL) {
-		mcopy->m_len = imin((ip->ip_hl << 2) + 8,
-		    (int)ip->ip_len);
+		mcopy->m_len = min(ip->ip_len, M_TRAILINGSPACE(mcopy));
 		mcopy->m_pkthdr.len = mcopy->m_len;
 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
 	}
@@ -1894,10 +1352,6 @@
 				/* Router requirements says to only send host redirects */
 				type = ICMP_REDIRECT;
 				code = ICMP_REDIRECT_HOST;
-#ifdef DIAGNOSTIC
-				if (ipprintfs)
-					printf("redirect (%d) to %lx\n", code, (u_long)dest.s_addr);
-#endif
 			}
 		}
 		if (rt)
@@ -1938,63 +1392,10 @@
 	case EMSGSIZE:
 		type = ICMP_UNREACH;
 		code = ICMP_UNREACH_NEEDFRAG;
-#if defined(IPSEC) || defined(FAST_IPSEC)
-		/*
-		 * If the packet is routed over IPsec tunnel, tell the
-		 * originator the tunnel MTU.
-		 *	tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
-		 * XXX quickhack!!!
-		 */
-		{
-			struct secpolicy *sp = NULL;
-			int ipsecerror;
-			int ipsechdr;
-			struct route *ro;
-
-#ifdef IPSEC
-			sp = ipsec4_getpolicybyaddr(mcopy,
-						    IPSEC_DIR_OUTBOUND,
-						    IP_FORWARDING,
-						    &ipsecerror);
-#else /* FAST_IPSEC */
-			sp = ipsec_getpolicybyaddr(mcopy,
-						   IPSEC_DIR_OUTBOUND,
-						   IP_FORWARDING,
-						   &ipsecerror);
-#endif
-			if (sp != NULL) {
-				/* count IPsec header size */
-				ipsechdr = ipsec4_hdrsiz(mcopy,
-							 IPSEC_DIR_OUTBOUND,
-							 NULL);
-
-				/*
-				 * find the correct route for outer IPv4
-				 * header, compute tunnel MTU.
-				 */
-				if (sp->req != NULL
-				 && sp->req->sav != NULL
-				 && sp->req->sav->sah != NULL) {
-					ro = &sp->req->sav->sah->sa_route;
-					if (ro->ro_rt && ro->ro_rt->rt_ifp) {
-						mtu =
-						    ro->ro_rt->rt_rmx.rmx_mtu ?
-						    ro->ro_rt->rt_rmx.rmx_mtu :
-						    ro->ro_rt->rt_ifp->if_mtu;
-						mtu -= ipsechdr;
-					}
-				}
 
 #ifdef IPSEC
-				key_freesp(sp);
-#else /* FAST_IPSEC */
-				KEY_FREESP(&sp);
-#endif
-				ipstat.ips_cantfrag++;
-				break;
-			}
-		}
-#endif /*IPSEC || FAST_IPSEC*/
+		mtu = ip_ipsec_mtu(m);
+#endif /* IPSEC */
 		/*
 		 * If the MTU wasn't set before use the interface mtu or
 		 * fall back to the next smaller mtu step compared to the
@@ -2035,11 +1436,8 @@
 }
 
 void
-ip_savecontrol(inp, mp, ip, m)
-	register struct inpcb *inp;
-	register struct mbuf **mp;
-	register struct ip *ip;
-	register struct mbuf *m;
+ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
+    struct mbuf *m)
 {
 	if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) {
 		struct bintime bt;
@@ -2104,8 +1502,7 @@
 
 		if (((ifp = m->m_pkthdr.rcvif)) 
 		&& ( ifp->if_index && (ifp->if_index <= if_index))) {
-			sdp = (struct sockaddr_dl *)
-			    (ifaddr_byindex(ifp->if_index)->ifa_addr);
+			sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr;
 			/*
 			 * Change our mind and don't try copy.
 			 */
@@ -2130,11 +1527,10 @@
 }
 
 /*
- * XXX these routines are called from the upper part of the kernel.
- * They need to be locked when we remove Giant.
- *
- * They could also be moved to ip_mroute.c, since all the RSVP
- *  handling is done there already.
+ * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the
+ * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on
+ * locking.  This code remains in ip_input.c as ip_mroute.c is optionally
+ * compiled.
  */
 static int ip_rsvp_on;
 struct socket *ip_rsvpd;
--- /dev/null
+++ sys/netinet/tcp_syncache.h
@@ -0,0 +1,49 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993, 1994, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_var.h	8.4 (Berkeley) 5/24/95
+ * $FreeBSD: src/sys/netinet/tcp_syncache.h,v 1.1 2007/07/27 00:57:06 silby Exp $
+ */
+
+#ifndef _NETINET_TCP_SYNCACHE_H_
+#define _NETINET_TCP_SYNCACHE_H_
+#ifdef _KERNEL
+
+void	 syncache_init(void);
+void	 syncache_unreach(struct in_conninfo *, struct tcphdr *);
+int	 syncache_expand(struct in_conninfo *, struct tcpopt *,
+	     struct tcphdr *, struct socket **, struct mbuf *);
+void	 syncache_add(struct in_conninfo *, struct tcpopt *,
+	     struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *);
+void	 syncache_chkrst(struct in_conninfo *, struct tcphdr *);
+void	 syncache_badack(struct in_conninfo *);
+int	 syncache_pcbcount(void);
+int	 syncache_pcblist(struct sysctl_req *req, int max_pcbs, int *pcbs_exported);
+
+#endif /* _KERNEL */
+#endif /* _NETINET_TCP_SYNCACHE_H_ */
Index: pim_var.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/pim_var.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/pim_var.h -L sys/netinet/pim_var.h -u -r1.1.1.1 -r1.2
--- sys/netinet/pim_var.h
+++ sys/netinet/pim_var.h
@@ -27,7 +27,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/netinet/pim_var.h,v 1.2 2005/01/07 01:45:44 imp Exp $
+ * $FreeBSD: src/sys/netinet/pim_var.h,v 1.3 2005/08/10 07:10:02 obrien Exp $
  */
 
 #ifndef _NETINET_PIM_VAR_H_
@@ -71,7 +71,6 @@
 }
 
 #ifdef _KERNEL
-extern struct pimstat pimstat;
 
 void pim_input(struct mbuf *, int);
 SYSCTL_DECL(_net_inet_pim);
Index: ip_ecn.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_ecn.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/ip_ecn.c -L sys/netinet/ip_ecn.c -u -r1.1.1.1 -r1.2
--- sys/netinet/ip_ecn.c
+++ sys/netinet/ip_ecn.c
@@ -1,4 +1,3 @@
-/*	$FreeBSD: src/sys/netinet/ip_ecn.c,v 1.7 2005/01/07 01:45:44 imp Exp $	*/
 /*	$KAME: ip_ecn.c,v 1.12 2002/01/07 11:34:47 kjc Exp $	*/
 
 /*-
@@ -35,6 +34,9 @@
  * http://www.aciri.org/floyd/papers/draft-ipsec-ecn-00.txt
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_ecn.c,v 1.9 2007/10/07 20:44:22 silby Exp $");
+
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
@@ -90,11 +92,9 @@
  * modify outer ECN (TOS) field on ingress operation (tunnel encapsulation).
  */
 void
-ip_ecn_ingress(mode, outer, inner)
-	int mode;
-	u_int8_t *outer;
-	const u_int8_t *inner;
+ip_ecn_ingress(int mode, u_int8_t *outer, const u_int8_t *inner)
 {
+
 	if (!outer || !inner)
 		panic("NULL pointer passed to ip_ecn_ingress");
 
@@ -124,11 +124,9 @@
  * the caller should drop the packet if the return value is 0.
  */
 int
-ip_ecn_egress(mode, outer, inner)
-	int mode;
-	const u_int8_t *outer;
-	u_int8_t *inner;
+ip_ecn_egress(int mode, const u_int8_t *outer, u_int8_t *inner)
 {
+
 	if (!outer || !inner)
 		panic("NULL pointer passed to ip_ecn_egress");
 
@@ -160,10 +158,7 @@
 
 #ifdef INET6
 void
-ip6_ecn_ingress(mode, outer, inner)
-	int mode;
-	u_int32_t *outer;
-	const u_int32_t *inner;
+ip6_ecn_ingress(int mode, u_int32_t *outer, const u_int32_t *inner)
 {
 	u_int8_t outer8, inner8;
 
@@ -177,10 +172,7 @@
 }
 
 int
-ip6_ecn_egress(mode, outer, inner)
-	int mode;
-	const u_int32_t *outer;
-	u_int32_t *inner;
+ip6_ecn_egress(int mode, const u_int32_t *outer, u_int32_t *inner)
 {
 	u_int8_t outer8, inner8, oinner8;
 
Index: tcp_syncache.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_syncache.c,v
retrieving revision 1.8
retrieving revision 1.9
diff -L sys/netinet/tcp_syncache.c -L sys/netinet/tcp_syncache.c -u -r1.8 -r1.9
--- sys/netinet/tcp_syncache.c
+++ sys/netinet/tcp_syncache.c
@@ -1,5 +1,6 @@
 /*-
  * Copyright (c) 2001 McAfee, Inc.
+ * Copyright (c) 2006 Andre Oppermann, Internet Business Solutions AG
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Jonathan Lemon
@@ -27,32 +28,33 @@
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/netinet/tcp_syncache.c,v 1.74.2.5 2006/02/16 01:06:22 qingli Exp $
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: src/sys/netinet/tcp_syncache.c,v 1.130.2.6 2007/12/20 12:34:32 ru Exp $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_mac.h"
-#include "opt_tcpdebug.h"
-#include "opt_tcp_sack.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
 #include <sys/malloc.h>
-#include <sys/mac.h>
 #include <sys/mbuf.h>
 #include <sys/md5.h>
 #include <sys/proc.h>		/* for proc0 declaration */
 #include <sys/random.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
+#include <sys/syslog.h>
+
+#include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/route.h>
@@ -63,6 +65,7 @@
 #include <netinet/in_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
+#include <netinet/ip_options.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
@@ -71,62 +74,108 @@
 #include <netinet6/in6_pcb.h>
 #endif
 #include <netinet/tcp.h>
-#ifdef TCPDEBUG
-#include <netinet/tcpip.h>
-#endif
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
-#ifdef TCPDEBUG
-#include <netinet/tcp_debug.h>
-#endif
+#include <netinet/tcp_syncache.h>
 #ifdef INET6
 #include <netinet6/tcp6_var.h>
 #endif
 
 #ifdef IPSEC
-#include <netinet6/ipsec.h>
-#ifdef INET6
-#include <netinet6/ipsec6.h>
-#endif
-#endif /*IPSEC*/
-
-#ifdef FAST_IPSEC
 #include <netipsec/ipsec.h>
 #ifdef INET6
 #include <netipsec/ipsec6.h>
 #endif
 #include <netipsec/key.h>
-#endif /*FAST_IPSEC*/
+#endif /*IPSEC*/
 
 #include <machine/in_cksum.h>
-#include <vm/uma.h>
+
+#include <security/mac/mac_framework.h>
 
 static int tcp_syncookies = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_RW,
     &tcp_syncookies, 0,
     "Use TCP SYN cookies if the syncache overflows");
 
+static int tcp_syncookiesonly = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_RW,
+    &tcp_syncookiesonly, 0,
+    "Use only TCP SYN cookies");
+
+#define	SYNCOOKIE_SECRET_SIZE	8	/* dwords */
+#define	SYNCOOKIE_LIFETIME	16	/* seconds */
+
+struct syncache {
+	TAILQ_ENTRY(syncache)	sc_hash;
+	struct		in_conninfo sc_inc;	/* addresses */
+	int		sc_rxttime;		/* retransmit time */
+	u_int16_t	sc_rxmits;		/* retransmit counter */
+
+	u_int32_t	sc_tsreflect;		/* timestamp to reflect */
+	u_int32_t	sc_ts;			/* our timestamp to send */
+	u_int32_t	sc_tsoff;		/* ts offset w/ syncookies */
+	u_int32_t	sc_flowlabel;		/* IPv6 flowlabel */
+	tcp_seq		sc_irs;			/* seq from peer */
+	tcp_seq		sc_iss;			/* our ISS */
+	struct		mbuf *sc_ipopts;	/* source route */
+
+	u_int16_t	sc_peer_mss;		/* peer's MSS */
+	u_int16_t	sc_wnd;			/* advertised window */
+	u_int8_t	sc_ip_ttl;		/* IPv4 TTL */
+	u_int8_t	sc_ip_tos;		/* IPv4 TOS */
+	u_int8_t	sc_requested_s_scale:4,
+			sc_requested_r_scale:4;
+	u_int8_t	sc_flags;
+#define SCF_NOOPT	0x01			/* no TCP options */
+#define SCF_WINSCALE	0x02			/* negotiated window scaling */
+#define SCF_TIMESTAMP	0x04			/* negotiated timestamps */
+						/* MSS is implicit */
+#define SCF_UNREACH	0x10			/* icmp unreachable received */
+#define SCF_SIGNATURE	0x20			/* send MD5 digests */
+#define SCF_SACK	0x80			/* send SACK option */
+#ifndef DISABLE_TCP_OFFLOAD
+	void		*sc_pspare[2];		/* toepcb / toe_usrreqs */
+#endif			
+#ifdef MAC
+	struct label	*sc_label;		/* MAC label reference */
+#endif
+};
+
+struct syncache_head {
+	struct mtx	sch_mtx;
+	TAILQ_HEAD(sch_head, syncache)	sch_bucket;
+	struct callout	sch_timer;
+	int		sch_nextc;
+	u_int		sch_length;
+	u_int		sch_oddeven;
+	u_int32_t	sch_secbits_odd[SYNCOOKIE_SECRET_SIZE];
+	u_int32_t	sch_secbits_even[SYNCOOKIE_SECRET_SIZE];
+	u_int		sch_reseed;		/* time_uptime, seconds */
+};
+
 static void	 syncache_drop(struct syncache *, struct syncache_head *);
 static void	 syncache_free(struct syncache *);
 static void	 syncache_insert(struct syncache *, struct syncache_head *);
 struct syncache *syncache_lookup(struct in_conninfo *, struct syncache_head **);
-#ifdef TCPDEBUG
-static int	 syncache_respond(struct syncache *, struct mbuf *, struct socket *);
-#else
-static int	 syncache_respond(struct syncache *, struct mbuf *);
-#endif
+static int	 syncache_respond(struct syncache *);
 static struct	 socket *syncache_socket(struct syncache *, struct socket *,
 		    struct mbuf *m);
+static void	 syncache_timeout(struct syncache *sc, struct syncache_head *sch,
+		    int docallout);
 static void	 syncache_timer(void *);
-static u_int32_t syncookie_generate(struct syncache *, u_int32_t *);
-static struct syncache *syncookie_lookup(struct in_conninfo *,
-		    struct tcphdr *, struct socket *);
+static void	 syncookie_generate(struct syncache_head *, struct syncache *,
+		    u_int32_t *);
+static struct syncache
+		*syncookie_lookup(struct in_conninfo *, struct syncache_head *,
+		    struct syncache *, struct tcpopt *, struct tcphdr *,
+		    struct socket *);
 
 /*
  * Transmit the SYN,ACK fewer times than TCP_MAXRXTSHIFT specifies.
- * 3 retransmits corresponds to a timeout of (1 + 2 + 4 + 8 == 15) seconds,
+ * 3 retransmits corresponds to a timeout of 3 * (1 + 2 + 4 + 8) == 45 seconds,
  * the odds are that the user has given up attempting to connect by then.
  */
 #define SYNCACHE_MAXREXMTS		3
@@ -141,12 +190,10 @@
 	u_int	hashsize;
 	u_int	hashmask;
 	u_int	bucket_limit;
-	u_int	cache_count;
+	u_int	cache_count;		/* XXX: unprotected */
 	u_int	cache_limit;
 	u_int	rexmt_limit;
 	u_int	hash_secret;
-	TAILQ_HEAD(, syncache) timerq[SYNCACHE_MAXREXMTS + 1];
-	struct	callout tt_timerq[SYNCACHE_MAXREXMTS + 1];
 };
 static struct tcp_syncache tcp_syncache;
 
@@ -167,6 +214,10 @@
 SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit, CTLFLAG_RW,
      &tcp_syncache.rexmt_limit, 0, "Limit on SYN/ACK retransmissions");
 
+int	tcp_sc_rst_sock_fail = 1;
+SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, rst_on_sock_fail, CTLFLAG_RW,
+     &tcp_sc_rst_sock_fail, 0, "Send reset on socket allocation failure");
+
 static MALLOC_DEFINE(M_SYNCACHE, "syncache", "TCP syncache");
 
 #define SYNCACHE_HASH(inc, mask)					\
@@ -190,21 +241,21 @@
 
 #define ENDPTS6_EQ(a, b) (memcmp(a, b, sizeof(*a)) == 0)
 
-#define SYNCACHE_TIMEOUT(sc, slot) do {				\
-	sc->sc_rxtslot = (slot);					\
-	sc->sc_rxttime = ticks + TCPTV_RTOBASE * tcp_backoff[(slot)];	\
-	TAILQ_INSERT_TAIL(&tcp_syncache.timerq[(slot)], sc, sc_timerq);	\
-	if (!callout_active(&tcp_syncache.tt_timerq[(slot)]))		\
-		callout_reset(&tcp_syncache.tt_timerq[(slot)],		\
-		    TCPTV_RTOBASE * tcp_backoff[(slot)],		\
-		    syncache_timer, (void *)((intptr_t)(slot)));	\
-} while (0)
+#define	SCH_LOCK(sch)		mtx_lock(&(sch)->sch_mtx)
+#define	SCH_UNLOCK(sch)		mtx_unlock(&(sch)->sch_mtx)
+#define	SCH_LOCK_ASSERT(sch)	mtx_assert(&(sch)->sch_mtx, MA_OWNED)
 
+/*
+ * Requires the syncache entry to be already removed from the bucket list.
+ */
 static void
 syncache_free(struct syncache *sc)
 {
 	if (sc->sc_ipopts)
 		(void) m_free(sc->sc_ipopts);
+#ifdef MAC
+	mac_destroy_syncache(&sc->sc_label);
+#endif
 
 	uma_zfree(tcp_syncache.zone, sc);
 }
@@ -217,15 +268,11 @@
 	tcp_syncache.cache_count = 0;
 	tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE;
 	tcp_syncache.bucket_limit = TCP_SYNCACHE_BUCKETLIMIT;
-	tcp_syncache.cache_limit =
-	    tcp_syncache.hashsize * tcp_syncache.bucket_limit;
 	tcp_syncache.rexmt_limit = SYNCACHE_MAXREXMTS;
 	tcp_syncache.hash_secret = arc4random();
 
 	TUNABLE_INT_FETCH("net.inet.tcp.syncache.hashsize",
 	    &tcp_syncache.hashsize);
-	TUNABLE_INT_FETCH("net.inet.tcp.syncache.cachelimit",
-	    &tcp_syncache.cache_limit);
 	TUNABLE_INT_FETCH("net.inet.tcp.syncache.bucketlimit",
 	    &tcp_syncache.bucket_limit);
 	if (!powerof2(tcp_syncache.hashsize) || tcp_syncache.hashsize == 0) {
@@ -234,6 +281,12 @@
 	}
 	tcp_syncache.hashmask = tcp_syncache.hashsize - 1;
 
+	/* Set limits. */
+	tcp_syncache.cache_limit =
+	    tcp_syncache.hashsize * tcp_syncache.bucket_limit;
+	TUNABLE_INT_FETCH("net.inet.tcp.syncache.cachelimit",
+	    &tcp_syncache.cache_limit);
+
 	/* Allocate the hash table. */
 	MALLOC(tcp_syncache.hashbase, struct syncache_head *,
 	    tcp_syncache.hashsize * sizeof(struct syncache_head),
@@ -242,180 +295,174 @@
 	/* Initialize the hash buckets. */
 	for (i = 0; i < tcp_syncache.hashsize; i++) {
 		TAILQ_INIT(&tcp_syncache.hashbase[i].sch_bucket);
+		mtx_init(&tcp_syncache.hashbase[i].sch_mtx, "tcp_sc_head",
+			 NULL, MTX_DEF);
+		callout_init_mtx(&tcp_syncache.hashbase[i].sch_timer,
+			 &tcp_syncache.hashbase[i].sch_mtx, 0);
 		tcp_syncache.hashbase[i].sch_length = 0;
 	}
 
-	/* Initialize the timer queues. */
-	for (i = 0; i <= SYNCACHE_MAXREXMTS; i++) {
-		TAILQ_INIT(&tcp_syncache.timerq[i]);
-		callout_init(&tcp_syncache.tt_timerq[i], NET_CALLOUT_MPSAFE);
-	}
-
-	/*
-	 * Allocate the syncache entries.  Allow the zone to allocate one
-	 * more entry than cache limit, so a new entry can bump out an
-	 * older one.
-	 */
+	/* Create the syncache entry zone. */
 	tcp_syncache.zone = uma_zcreate("syncache", sizeof(struct syncache),
-	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	uma_zone_set_max(tcp_syncache.zone, tcp_syncache.cache_limit);
-	tcp_syncache.cache_limit -= 1;
 }
 
+/*
+ * Inserts a syncache entry into the specified bucket row.
+ * Locks and unlocks the syncache_head autonomously.
+ */
 static void
-syncache_insert(sc, sch)
-	struct syncache *sc;
-	struct syncache_head *sch;
+syncache_insert(struct syncache *sc, struct syncache_head *sch)
 {
 	struct syncache *sc2;
-	int i;
 
-	INP_INFO_WLOCK_ASSERT(&tcbinfo);
+	SCH_LOCK(sch);
 
 	/*
-	 * Make sure that we don't overflow the per-bucket
-	 * limit or the total cache size limit.
+	 * Make sure that we don't overflow the per-bucket limit.
+	 * If the bucket is full, toss the oldest element.
 	 */
 	if (sch->sch_length >= tcp_syncache.bucket_limit) {
-		/*
-		 * The bucket is full, toss the oldest element.
-		 */
-		sc2 = TAILQ_FIRST(&sch->sch_bucket);
-		sc2->sc_tp->ts_recent = ticks;
+		KASSERT(!TAILQ_EMPTY(&sch->sch_bucket),
+			("sch->sch_length incorrect"));
+		sc2 = TAILQ_LAST(&sch->sch_bucket, sch_head);
 		syncache_drop(sc2, sch);
 		tcpstat.tcps_sc_bucketoverflow++;
-	} else if (tcp_syncache.cache_count >= tcp_syncache.cache_limit) {
-		/*
-		 * The cache is full.  Toss the oldest entry in the
-		 * entire cache.  This is the front entry in the
-		 * first non-empty timer queue with the largest
-		 * timeout value.
-		 */
-		for (i = SYNCACHE_MAXREXMTS; i >= 0; i--) {
-			sc2 = TAILQ_FIRST(&tcp_syncache.timerq[i]);
-			if (sc2 != NULL)
-				break;
-		}
-		sc2->sc_tp->ts_recent = ticks;
-		syncache_drop(sc2, NULL);
-		tcpstat.tcps_sc_cacheoverflow++;
 	}
 
-	/* Initialize the entry's timer. */
-	SYNCACHE_TIMEOUT(sc, 0);
-
 	/* Put it into the bucket. */
-	TAILQ_INSERT_TAIL(&sch->sch_bucket, sc, sc_hash);
+	TAILQ_INSERT_HEAD(&sch->sch_bucket, sc, sc_hash);
 	sch->sch_length++;
+
+	/* Reinitialize the bucket row's timer. */
+	if (sch->sch_length == 1)
+		sch->sch_nextc = ticks + INT_MAX;
+	syncache_timeout(sc, sch, 1);
+
+	SCH_UNLOCK(sch);
+
 	tcp_syncache.cache_count++;
 	tcpstat.tcps_sc_added++;
 }
 
+/*
+ * Remove and free entry from syncache bucket row.
+ * Expects locked syncache head.
+ */
 static void
-syncache_drop(sc, sch)
-	struct syncache *sc;
-	struct syncache_head *sch;
+syncache_drop(struct syncache *sc, struct syncache_head *sch)
 {
-	INP_INFO_WLOCK_ASSERT(&tcbinfo);
 
-	if (sch == NULL) {
-#ifdef INET6
-		if (sc->sc_inc.inc_isipv6) {
-			sch = &tcp_syncache.hashbase[
-			    SYNCACHE_HASH6(&sc->sc_inc, tcp_syncache.hashmask)];
-		} else
-#endif
-		{
-			sch = &tcp_syncache.hashbase[
-			    SYNCACHE_HASH(&sc->sc_inc, tcp_syncache.hashmask)];
-		}
-	}
+	SCH_LOCK_ASSERT(sch);
 
 	TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
 	sch->sch_length--;
-	tcp_syncache.cache_count--;
-
-	TAILQ_REMOVE(&tcp_syncache.timerq[sc->sc_rxtslot], sc, sc_timerq);
-	if (TAILQ_EMPTY(&tcp_syncache.timerq[sc->sc_rxtslot]))
-		callout_stop(&tcp_syncache.tt_timerq[sc->sc_rxtslot]);
 
 	syncache_free(sc);
+	tcp_syncache.cache_count--;
+}
+
+/*
+ * Engage/reengage time on bucket row.
+ */
+static void
+syncache_timeout(struct syncache *sc, struct syncache_head *sch, int docallout)
+{
+	sc->sc_rxttime = ticks +
+		TCPTV_RTOBASE * (tcp_backoff[sc->sc_rxmits]);
+	sc->sc_rxmits++;
+	if (TSTMP_LT(sc->sc_rxttime, sch->sch_nextc)) {
+		sch->sch_nextc = sc->sc_rxttime;
+		if (docallout)
+			callout_reset(&sch->sch_timer, sch->sch_nextc - ticks,
+			    syncache_timer, (void *)sch);
+	}
 }
 
 /*
  * Walk the timer queues, looking for SYN,ACKs that need to be retransmitted.
  * If we have retransmitted an entry the maximum number of times, expire it.
+ * One separate timer for each bucket row.
  */
 static void
-syncache_timer(xslot)
-	void *xslot;
+syncache_timer(void *xsch)
 {
-	intptr_t slot = (intptr_t)xslot;
+	struct syncache_head *sch = (struct syncache_head *)xsch;
 	struct syncache *sc, *nsc;
-	struct inpcb *inp;
+	int tick = ticks;
+	char *s;
 
-	INP_INFO_WLOCK(&tcbinfo);
-	if (callout_pending(&tcp_syncache.tt_timerq[slot]) ||
-	    !callout_active(&tcp_syncache.tt_timerq[slot])) {
-		/* XXX can this happen? */
-		INP_INFO_WUNLOCK(&tcbinfo);
-		return;
-	}
-	callout_deactivate(&tcp_syncache.tt_timerq[slot]);
+	/* NB: syncache_head has already been locked by the callout. */
+	SCH_LOCK_ASSERT(sch);
 
-	nsc = TAILQ_FIRST(&tcp_syncache.timerq[slot]);
-	while (nsc != NULL) {
-		if (ticks < nsc->sc_rxttime)
-			break;
-		sc = nsc;
-		inp = sc->sc_tp->t_inpcb;
-		if (slot == SYNCACHE_MAXREXMTS ||
-		    slot >= tcp_syncache.rexmt_limit ||
-		    inp == NULL || inp->inp_gencnt != sc->sc_inp_gencnt) {
-			nsc = TAILQ_NEXT(sc, sc_timerq);
-			syncache_drop(sc, NULL);
+	/*
+	 * In the following cycle we may remove some entries and/or
+	 * advance some timeouts, so re-initialize the bucket timer.
+	 */
+	sch->sch_nextc = tick + INT_MAX;
+
+	TAILQ_FOREACH_SAFE(sc, &sch->sch_bucket, sc_hash, nsc) {
+		/*
+		 * We do not check if the listen socket still exists
+		 * and accept the case where the listen socket may be
+		 * gone by the time we resend the SYN/ACK.  We do
+		 * not expect this to happens often. If it does,
+		 * then the RST will be sent by the time the remote
+		 * host does the SYN/ACK->ACK.
+		 */
+		if (TSTMP_GT(sc->sc_rxttime, tick)) {
+			if (TSTMP_LT(sc->sc_rxttime, sch->sch_nextc))
+				sch->sch_nextc = sc->sc_rxttime;
+			continue;
+		}
+
+		if (sc->sc_rxmits > tcp_syncache.rexmt_limit) {
+			if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
+				log(LOG_DEBUG, "%s; %s: Retransmits exhausted, "
+				    "giving up and removing syncache entry\n",
+				    s, __func__);
+				free(s, M_TCPLOG);
+			}
+			syncache_drop(sc, sch);
 			tcpstat.tcps_sc_stale++;
 			continue;
 		}
-		/*
-		 * syncache_respond() may call back into the syncache to
-		 * to modify another entry, so do not obtain the next
-		 * entry on the timer chain until it has completed.
-		 */
-#ifdef TCPDEBUG
-		(void) syncache_respond(sc, NULL, NULL);
-#else
-		(void) syncache_respond(sc, NULL);
-#endif
-		nsc = TAILQ_NEXT(sc, sc_timerq);
+		if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
+			log(LOG_DEBUG, "%s; %s: Response timeout, "
+			    "retransmitting (%u) SYN|ACK\n",
+			    s, __func__, sc->sc_rxmits);
+			free(s, M_TCPLOG);
+		}
+
+		(void) syncache_respond(sc);
 		tcpstat.tcps_sc_retransmitted++;
-		TAILQ_REMOVE(&tcp_syncache.timerq[slot], sc, sc_timerq);
-		SYNCACHE_TIMEOUT(sc, slot + 1);
+		syncache_timeout(sc, sch, 0);
 	}
-	if (nsc != NULL)
-		callout_reset(&tcp_syncache.tt_timerq[slot],
-		    nsc->sc_rxttime - ticks, syncache_timer, (void *)(slot));
-	INP_INFO_WUNLOCK(&tcbinfo);
+	if (!TAILQ_EMPTY(&(sch)->sch_bucket))
+		callout_reset(&(sch)->sch_timer, (sch)->sch_nextc - tick,
+			syncache_timer, (void *)(sch));
 }
 
 /*
  * Find an entry in the syncache.
+ * Returns always with locked syncache_head plus a matching entry or NULL.
  */
 struct syncache *
-syncache_lookup(inc, schp)
-	struct in_conninfo *inc;
-	struct syncache_head **schp;
+syncache_lookup(struct in_conninfo *inc, struct syncache_head **schp)
 {
 	struct syncache *sc;
 	struct syncache_head *sch;
 
-	INP_INFO_WLOCK_ASSERT(&tcbinfo);
-
 #ifdef INET6
 	if (inc->inc_isipv6) {
 		sch = &tcp_syncache.hashbase[
 		    SYNCACHE_HASH6(inc, tcp_syncache.hashmask)];
 		*schp = sch;
+
+		SCH_LOCK(sch);
+
+		/* Circle through bucket row to find matching entry. */
 		TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) {
 			if (ENDPTS6_EQ(&inc->inc_ie, &sc->sc_inc.inc_ie))
 				return (sc);
@@ -426,6 +473,10 @@
 		sch = &tcp_syncache.hashbase[
 		    SYNCACHE_HASH(inc, tcp_syncache.hashmask)];
 		*schp = sch;
+
+		SCH_LOCK(sch);
+
+		/* Circle through bucket row to find matching entry. */
 		TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) {
 #ifdef INET6
 			if (sc->sc_inc.inc_isipv6)
@@ -435,7 +486,8 @@
 				return (sc);
 		}
 	}
-	return (NULL);
+	SCH_LOCK_ASSERT(*schp);
+	return (NULL);			/* always returns with locked sch */
 }
 
 /*
@@ -444,18 +496,44 @@
  * connection is in the syn cache.  If it is, zap it.
  */
 void
-syncache_chkrst(inc, th)
-	struct in_conninfo *inc;
-	struct tcphdr *th;
+syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th)
 {
 	struct syncache *sc;
 	struct syncache_head *sch;
+	char *s = NULL;
 
-	INP_INFO_WLOCK_ASSERT(&tcbinfo);
+	sc = syncache_lookup(inc, &sch);	/* returns locked sch */
+	SCH_LOCK_ASSERT(sch);
+
+	/*
+	 * Any RST to our SYN|ACK must not carry ACK, SYN or FIN flags.
+	 * See RFC 793 page 65, section SEGMENT ARRIVES.
+	 */
+	if (th->th_flags & (TH_ACK|TH_SYN|TH_FIN)) {
+		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+			log(LOG_DEBUG, "%s; %s: Spurious RST with ACK, SYN or "
+			    "FIN flag set, segment ignored\n", s, __func__);
+		tcpstat.tcps_badrst++;
+		goto done;
+	}
+
+	/*
+	 * No corresponding connection was found in syncache.
+	 * If syncookies are enabled and possibly exclusively
+	 * used, or we are under memory pressure, a valid RST
+	 * may not find a syncache entry.  In that case we're
+	 * done and no SYN|ACK retransmissions will happen.
+	 * Otherwise the the RST was misdirected or spoofed.
+	 */
+	if (sc == NULL) {
+		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+			log(LOG_DEBUG, "%s; %s: Spurious RST without matching "
+			    "syncache entry (possibly syncookie only), "
+			    "segment ignored\n", s, __func__);
+		tcpstat.tcps_badrst++;
+		goto done;
+	}
 
-	sc = syncache_lookup(inc, &sch);
-	if (sc == NULL)
-		return;
 	/*
 	 * If the RST bit is set, check the sequence number to see
 	 * if this is a valid reset segment.
@@ -472,43 +550,53 @@
 	if (SEQ_GEQ(th->th_seq, sc->sc_irs) &&
 	    SEQ_LEQ(th->th_seq, sc->sc_irs + sc->sc_wnd)) {
 		syncache_drop(sc, sch);
+		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+			log(LOG_DEBUG, "%s; %s: Our SYN|ACK was rejected, "
+			    "connection attempt aborted by remote endpoint\n",
+			    s, __func__);
 		tcpstat.tcps_sc_reset++;
+	} else if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
+		log(LOG_DEBUG, "%s; %s: RST with invalid SEQ %u != IRS %u "
+		    "(+WND %u), segment ignored\n",
+		    s, __func__, th->th_seq, sc->sc_irs, sc->sc_wnd);
+		tcpstat.tcps_badrst++;
 	}
+
+done:
+	if (s != NULL)
+		free(s, M_TCPLOG);
+	SCH_UNLOCK(sch);
 }
 
 void
-syncache_badack(inc)
-	struct in_conninfo *inc;
+syncache_badack(struct in_conninfo *inc)
 {
 	struct syncache *sc;
 	struct syncache_head *sch;
 
-	INP_INFO_WLOCK_ASSERT(&tcbinfo);
-
-	sc = syncache_lookup(inc, &sch);
+	sc = syncache_lookup(inc, &sch);	/* returns locked sch */
+	SCH_LOCK_ASSERT(sch);
 	if (sc != NULL) {
 		syncache_drop(sc, sch);
 		tcpstat.tcps_sc_badack++;
 	}
+	SCH_UNLOCK(sch);
 }
 
 void
-syncache_unreach(inc, th)
-	struct in_conninfo *inc;
-	struct tcphdr *th;
+syncache_unreach(struct in_conninfo *inc, struct tcphdr *th)
 {
 	struct syncache *sc;
 	struct syncache_head *sch;
 
-	INP_INFO_WLOCK_ASSERT(&tcbinfo);
-
-	sc = syncache_lookup(inc, &sch);
+	sc = syncache_lookup(inc, &sch);	/* returns locked sch */
+	SCH_LOCK_ASSERT(sch);
 	if (sc == NULL)
-		return;
+		goto done;
 
 	/* If the sequence number != sc_iss, then it's a bogus ICMP msg */
 	if (ntohl(th->th_seq) != sc->sc_iss)
-		return;
+		goto done;
 
 	/*
 	 * If we've rertransmitted 3 times and this is our second error,
@@ -518,28 +606,27 @@
 	 *
 	 * See tcp_notify().
 	 */
-	if ((sc->sc_flags & SCF_UNREACH) == 0 || sc->sc_rxtslot < 3) {
+	if ((sc->sc_flags & SCF_UNREACH) == 0 || sc->sc_rxmits < 3 + 1) {
 		sc->sc_flags |= SCF_UNREACH;
-		return;
+		goto done;
 	}
 	syncache_drop(sc, sch);
 	tcpstat.tcps_sc_unreach++;
+done:
+	SCH_UNLOCK(sch);
 }
 
 /*
  * Build a new TCP socket structure from a syncache entry.
  */
 static struct socket *
-syncache_socket(sc, lso, m)
-	struct syncache *sc;
-	struct socket *lso;
-	struct mbuf *m;
+syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
 {
 	struct inpcb *inp = NULL;
 	struct socket *so;
 	struct tcpcb *tp;
+	char *s;
 
-	NET_ASSERT_GIANT();
 	INP_INFO_WLOCK_ASSERT(&tcbinfo);
 
 	/*
@@ -551,10 +638,17 @@
 	so = sonewconn(lso, SS_ISCONNECTED);
 	if (so == NULL) {
 		/*
-		 * Drop the connection; we will send a RST if the peer
-		 * retransmits the ACK,
+		 * Drop the connection; we will either send a RST or
+		 * have the peer retransmit its SYN again after its
+		 * RTO and try again.
 		 */
 		tcpstat.tcps_listendrop++;
+		if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
+			log(LOG_DEBUG, "%s; %s: Socket create failed "
+			    "due to limits or memory shortage\n",
+			    s, __func__);
+			free(s, M_TCPLOG);
+		}
 		goto abort2;
 	}
 #ifdef MAC
@@ -566,9 +660,7 @@
 	inp = sotoinpcb(so);
 	INP_LOCK(inp);
 
-	/*
-	 * Insert new socket into hash list.
-	 */
+	/* Insert new socket into PCB hash list. */
 	inp->inp_inc.inc_isipv6 = sc->sc_inc.inc_isipv6;
 #ifdef INET6
 	if (sc->sc_inc.inc_isipv6) {
@@ -597,14 +689,9 @@
 		goto abort;
 	}
 #ifdef IPSEC
-	/* copy old policy into new socket's */
-	if (ipsec_copy_pcbpolicy(sotoinpcb(lso)->inp_sp, inp->inp_sp))
-		printf("syncache_expand: could not copy policy\n");
-#endif
-#ifdef FAST_IPSEC
-	/* copy old policy into new socket's */
+	/* Copy old policy into new socket's. */
 	if (ipsec_copy_policy(sotoinpcb(lso)->inp_sp, inp->inp_sp))
-		printf("syncache_expand: could not copy policy\n");
+		printf("syncache_socket: could not copy policy\n");
 #endif
 #ifdef INET6
 	if (sc->sc_inc.inc_isipv6) {
@@ -667,7 +754,6 @@
 			goto abort;
 		}
 	}
-
 	tp = intotcpcb(inp);
 	tp->t_state = TCPS_SYN_RECEIVED;
 	tp->iss = sc->sc_iss;
@@ -675,31 +761,36 @@
 	tcp_rcvseqinit(tp);
 	tcp_sendseqinit(tp);
 	tp->snd_wl1 = sc->sc_irs;
+	tp->snd_max = tp->iss + 1;
+	tp->snd_nxt = tp->iss + 1;
 	tp->rcv_up = sc->sc_irs + 1;
 	tp->rcv_wnd = sc->sc_wnd;
 	tp->rcv_adv += tp->rcv_wnd;
+	tp->last_ack_sent = tp->rcv_nxt;
 
 	tp->t_flags = sototcpcb(lso)->t_flags & (TF_NOPUSH|TF_NODELAY);
 	if (sc->sc_flags & SCF_NOOPT)
 		tp->t_flags |= TF_NOOPT;
-	if (sc->sc_flags & SCF_WINSCALE) {
-		tp->t_flags |= TF_REQ_SCALE|TF_RCVD_SCALE;
-		tp->requested_s_scale = sc->sc_requested_s_scale;
-		tp->request_r_scale = sc->sc_request_r_scale;
-	}
-	if (sc->sc_flags & SCF_TIMESTAMP) {
-		tp->t_flags |= TF_REQ_TSTMP|TF_RCVD_TSTMP;
-		tp->ts_recent = sc->sc_tsrecent;
-		tp->ts_recent_age = ticks;
-	}
+	else {
+		if (sc->sc_flags & SCF_WINSCALE) {
+			tp->t_flags |= TF_REQ_SCALE|TF_RCVD_SCALE;
+			tp->snd_scale = sc->sc_requested_s_scale;
+			tp->request_r_scale = sc->sc_requested_r_scale;
+		}
+		if (sc->sc_flags & SCF_TIMESTAMP) {
+			tp->t_flags |= TF_REQ_TSTMP|TF_RCVD_TSTMP;
+			tp->ts_recent = sc->sc_tsreflect;
+			tp->ts_recent_age = ticks;
+			tp->ts_offset = sc->sc_tsoff;
+		}
 #ifdef TCP_SIGNATURE
-	if (sc->sc_flags & SCF_SIGNATURE)
-		tp->t_flags |= TF_SIGNATURE;
+		if (sc->sc_flags & SCF_SIGNATURE)
+			tp->t_flags |= TF_SIGNATURE;
 #endif
-	if (sc->sc_flags & SCF_SACK) {
-		tp->sack_enable = 1;
-		tp->t_flags |= TF_SACK_PERMIT;
+		if (sc->sc_flags & SCF_SACK)
+			tp->t_flags |= TF_SACK_PERMIT;
 	}
+
 	/*
 	 * Set up MSS and get cached values from tcp_hostcache.
 	 * This might overwrite some of the defaults we just set.
@@ -709,9 +800,9 @@
 	/*
 	 * If the SYN,ACK was retransmitted, reset cwnd to 1 segment.
 	 */
-	if (sc->sc_rxtslot != 0)
+	if (sc->sc_rxmits)
 		tp->snd_cwnd = tp->t_maxseg;
-	callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
+	tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
 
 	INP_UNLOCK(inp);
 
@@ -722,7 +813,7 @@
 	INP_UNLOCK(inp);
 abort2:
 	if (so != NULL)
-		(void) soabort(so);
+		soabort(so);
 	return (NULL);
 }
 
@@ -734,15 +825,24 @@
  * the SYN-RECEIVED state.
  */
 int
-syncache_expand(struct in_conninfo *inc, struct tcphdr *th, struct socket **sop, struct mbuf *m)
+syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
+    struct socket **lsop, struct mbuf *m)
 {
 	struct syncache *sc;
 	struct syncache_head *sch;
-	struct socket *so;
+	struct syncache scs;
+	char *s;
 
+	/*
+	 * Global TCP locks are held because we manipulate the PCB lists
+	 * and create a new socket.
+	 */
 	INP_INFO_WLOCK_ASSERT(&tcbinfo);
+	KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK,
+	    ("%s: can handle only ACK", __func__));
 
-	sc = syncache_lookup(inc, &sch);
+	sc = syncache_lookup(inc, &sch);	/* returns locked sch */
+	SCH_LOCK_ASSERT(sch);
 	if (sc == NULL) {
 		/*
 		 * There is no syncache entry, so see if this ACK is
@@ -753,43 +853,105 @@
 		 *  B. check that the syncookie is valid.  If it is, then
 		 *     cobble up a fake syncache entry, and return.
 		 */
-		if (!tcp_syncookies)
-			return (0);
-		sc = syncookie_lookup(inc, th, *sop);
-		if (sc == NULL)
-			return (0);
-		sch = NULL;
-		tcpstat.tcps_sc_recvcookie++;
+		if (!tcp_syncookies) {
+			SCH_UNLOCK(sch);
+			if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+				log(LOG_DEBUG, "%s; %s: Spurious ACK, "
+				    "segment rejected (syncookies disabled)\n",
+				    s, __func__);
+			goto failed;
+		}
+		bzero(&scs, sizeof(scs));
+		sc = syncookie_lookup(inc, sch, &scs, to, th, *lsop);
+		SCH_UNLOCK(sch);
+		if (sc == NULL) {
+			if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+				log(LOG_DEBUG, "%s; %s: Segment failed "
+				    "SYNCOOKIE authentication, segment rejected "
+				    "(probably spoofed)\n", s, __func__);
+			goto failed;
+		}
+	} else {
+		/* Pull out the entry to unlock the bucket row. */
+		TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
+		sch->sch_length--;
+		tcp_syncache.cache_count--;
+		SCH_UNLOCK(sch);
 	}
 
 	/*
-	 * If seg contains an ACK, but not for our SYN/ACK, send a RST.
+	 * Segment validation:
+	 * ACK must match our initial sequence number + 1 (the SYN|ACK).
 	 */
 	if (th->th_ack != sc->sc_iss + 1) {
-		if (sch == NULL)
-			syncache_free(sc);
-		return (0);
+		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+			log(LOG_DEBUG, "%s; %s: ACK %u != ISS+1 %u, segment "
+			    "rejected\n", s, __func__, th->th_ack, sc->sc_iss);
+		goto failed;
+	}
+	/*
+	 * The SEQ must match the received initial receive sequence
+	 * number + 1 (the SYN) because we didn't ACK any data that
+	 * may have come with the SYN.
+	 */
+	if (th->th_seq != sc->sc_irs + 1) {
+		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+			log(LOG_DEBUG, "%s; %s: SEQ %u != IRS+1 %u, segment "
+			    "rejected\n", s, __func__, th->th_seq, sc->sc_irs);
+		goto failed;
 	}
-
-	so = syncache_socket(sc, *sop, m);
-	if (so == NULL) {
 #if 0
-resetandabort:
-		/* XXXjlemon check this - is this correct? */
-		(void) tcp_respond(NULL, m, m, th,
-		    th->th_seq + tlen, (tcp_seq)0, TH_RST|TH_ACK);
+	/*
+	 * If timestamps were present in the SYN and we accepted
+	 * them in our SYN|ACK we require them to be present from
+	 * now on.  And vice versa.
+	 *
+	 * Unfortunately, during testing of 7.0 some users found
+	 * network devices that violate this constraint, so it must
+	 * be disabled.
+	 */
+	if ((sc->sc_flags & SCF_TIMESTAMP) && !(to->to_flags & TOF_TS)) {
+		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+			log(LOG_DEBUG, "%s; %s: Timestamp missing, "
+			    "segment rejected\n", s, __func__);
+		goto failed;
+	}
 #endif
-		m_freem(m);			/* XXX only needed for above */
+	if (!(sc->sc_flags & SCF_TIMESTAMP) && (to->to_flags & TOF_TS)) {
+		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+			log(LOG_DEBUG, "%s; %s: Timestamp not expected, "
+			    "segment rejected\n", s, __func__);
+		goto failed;
+	}
+	/*
+	 * If timestamps were negotiated the reflected timestamp
+	 * must be equal to what we actually sent in the SYN|ACK.
+	 */
+	if ((to->to_flags & TOF_TS) && to->to_tsecr != sc->sc_ts) {
+		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+			log(LOG_DEBUG, "%s; %s: TSECR %u != TS %u, "
+			    "segment rejected\n",
+			    s, __func__, to->to_tsecr, sc->sc_ts);
+		goto failed;
+	}
+
+	*lsop = syncache_socket(sc, *lsop, m);
+
+	if (*lsop == NULL)
 		tcpstat.tcps_sc_aborted++;
-	} else
+	else
 		tcpstat.tcps_sc_completed++;
 
-	if (sch == NULL)
+	if (sc != &scs)
 		syncache_free(sc);
-	else
-		syncache_drop(sc, sch);
-	*sop = so;
 	return (1);
+failed:
+	if (sc != NULL && sc != &scs)
+		syncache_free(sc);
+	if (s != NULL)
+		free(s, M_TCPLOG);
+	*lsop = NULL;
+	return (0);
 }
 
 /*
@@ -805,13 +967,9 @@
  * consume all available buffer space if it were ACKed.  By not ACKing
  * the data, we avoid this DoS scenario.
  */
-int
-syncache_add(inc, to, th, sop, m)
-	struct in_conninfo *inc;
-	struct tcpopt *to;
-	struct tcphdr *th;
-	struct socket **sop;
-	struct mbuf *m;
+void
+syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
+    struct inpcb *inp, struct socket **lsop, struct mbuf *m)
 {
 	struct tcpcb *tp;
 	struct socket *so;
@@ -819,13 +977,53 @@
 	struct syncache_head *sch;
 	struct mbuf *ipopts = NULL;
 	u_int32_t flowtmp;
-	int i, win;
+	int win, sb_hiwat, ip_ttl, ip_tos, noopt;
+	char *s;
+#ifdef INET6
+	int autoflowlabel = 0;
+#endif
+#ifdef MAC
+	struct label *maclabel;
+#endif
+	struct syncache scs;
 
 	INP_INFO_WLOCK_ASSERT(&tcbinfo);
+	INP_LOCK_ASSERT(inp);			/* listen socket */
+	KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN,
+	    ("%s: unexpected tcp flags", __func__));
 
-	so = *sop;
+	/*
+	 * Combine all so/tp operations very early to drop the INP lock as
+	 * soon as possible.
+	 */
+	so = *lsop;
 	tp = sototcpcb(so);
 
+#ifdef INET6
+	if (inc->inc_isipv6 &&
+	    (inp->in6p_flags & IN6P_AUTOFLOWLABEL))
+		autoflowlabel = 1;
+#endif
+	ip_ttl = inp->inp_ip_ttl;
+	ip_tos = inp->inp_ip_tos;
+	win = sbspace(&so->so_rcv);
+	sb_hiwat = so->so_rcv.sb_hiwat;
+	noopt = (tp->t_flags & TF_NOOPT);
+
+	so = NULL;
+	tp = NULL;
+
+#ifdef MAC
+	if (mac_init_syncache(&maclabel) != 0) {
+		INP_UNLOCK(inp);
+		INP_INFO_WUNLOCK(&tcbinfo);
+		goto done;
+	} else
+		mac_init_syncache_from_inpcb(maclabel, inp);
+#endif
+	INP_UNLOCK(inp);
+	INP_INFO_WUNLOCK(&tcbinfo);
+
 	/*
 	 * Remember the IP options, if any.
 	 */
@@ -838,11 +1036,16 @@
 	 * See if we already have an entry for this connection.
 	 * If we do, resend the SYN,ACK, and reset the retransmit timer.
 	 *
-	 * XXX
-	 * should the syncache be re-initialized with the contents
+	 * XXX: should the syncache be re-initialized with the contents
 	 * of the new SYN here (which may have different options?)
+	 *
+	 * XXX: We do not check the sequence number to see if this is a
+	 * real retransmit or a new connection attempt.  The question is
+	 * how to handle such a case; either ignore it as spoofed, or
+	 * drop the current entry and create a new one?
 	 */
-	sc = syncache_lookup(inc, &sch);
+	sc = syncache_lookup(inc, &sch);	/* returns locked entry */
+	SCH_LOCK_ASSERT(sch);
 	if (sc != NULL) {
 		tcpstat.tcps_sc_dupsyn++;
 		if (ipopts) {
@@ -857,27 +1060,35 @@
 		/*
 		 * Update timestamp if present.
 		 */
-		if (sc->sc_flags & SCF_TIMESTAMP)
-			sc->sc_tsrecent = to->to_tsval;
+		if ((sc->sc_flags & SCF_TIMESTAMP) && (to->to_flags & TOF_TS))
+			sc->sc_tsreflect = to->to_tsval;
+		else
+			sc->sc_flags &= ~SCF_TIMESTAMP;
+#ifdef MAC
 		/*
-		 * PCB may have changed, pick up new values.
+		 * Since we have already unconditionally allocated label
+		 * storage, free it up.  The syncache entry will already
+		 * have an initialized label we can use.
 		 */
-		sc->sc_tp = tp;
-		sc->sc_inp_gencnt = tp->t_inpcb->inp_gencnt;
-#ifdef TCPDEBUG
-		if (syncache_respond(sc, m, so) == 0) {
-#else
-		if (syncache_respond(sc, m) == 0) {
-#endif
-			/* NB: guarded by INP_INFO_WLOCK(&tcbinfo) */
-			TAILQ_REMOVE(&tcp_syncache.timerq[sc->sc_rxtslot],
-			    sc, sc_timerq);
-			SYNCACHE_TIMEOUT(sc, sc->sc_rxtslot);
+		mac_destroy_syncache(&maclabel);
+		KASSERT(sc->sc_label != NULL,
+		    ("%s: label not initialized", __func__));
+#endif
+		/* Retransmit SYN|ACK and reset retransmit count. */
+		if ((s = tcp_log_addrs(&sc->sc_inc, th, NULL, NULL))) {
+			log(LOG_DEBUG, "%s; %s: Received duplicate SYN, "
+			    "resetting timer and retransmitting SYN|ACK\n",
+			    s, __func__);
+			free(s, M_TCPLOG);
+		}
+		if (syncache_respond(sc) == 0) {
+			sc->sc_rxmits = 0;
+			syncache_timeout(sc, sch, 1);
 			tcpstat.tcps_sndacks++;
 			tcpstat.tcps_sndtotal++;
 		}
-		*sop = NULL;
-		return (1);
+		SCH_UNLOCK(sch);
+		goto done;
 	}
 
 	sc = uma_zalloc(tcp_syncache.zone, M_NOWAIT | M_ZERO);
@@ -887,67 +1098,48 @@
 		 * Treat this as if the cache was full; drop the oldest
 		 * entry and insert the new one.
 		 */
-		/* NB: guarded by INP_INFO_WLOCK(&tcbinfo) */
-		for (i = SYNCACHE_MAXREXMTS; i >= 0; i--) {
-			sc = TAILQ_FIRST(&tcp_syncache.timerq[i]);
-			if (sc != NULL)
-				break;
-		}
-		sc->sc_tp->ts_recent = ticks;
-		syncache_drop(sc, NULL);
 		tcpstat.tcps_sc_zonefail++;
+		if ((sc = TAILQ_LAST(&sch->sch_bucket, sch_head)) != NULL)
+			syncache_drop(sc, sch);
 		sc = uma_zalloc(tcp_syncache.zone, M_NOWAIT | M_ZERO);
 		if (sc == NULL) {
-			if (ipopts)
-				(void) m_free(ipopts);
-			return (0);
+			if (tcp_syncookies) {
+				bzero(&scs, sizeof(scs));
+				sc = &scs;
+			} else {
+				SCH_UNLOCK(sch);
+				if (ipopts)
+					(void) m_free(ipopts);
+				goto done;
+			}
 		}
 	}
 
 	/*
 	 * Fill in the syncache values.
 	 */
-	sc->sc_tp = tp;
-	sc->sc_inp_gencnt = tp->t_inpcb->inp_gencnt;
+#ifdef MAC
+	sc->sc_label = maclabel;
+#endif
 	sc->sc_ipopts = ipopts;
-	sc->sc_inc.inc_fport = inc->inc_fport;
-	sc->sc_inc.inc_lport = inc->inc_lport;
+	bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo));
 #ifdef INET6
-	sc->sc_inc.inc_isipv6 = inc->inc_isipv6;
-	if (inc->inc_isipv6) {
-		sc->sc_inc.inc6_faddr = inc->inc6_faddr;
-		sc->sc_inc.inc6_laddr = inc->inc6_laddr;
-	} else
+	if (!inc->inc_isipv6)
 #endif
 	{
-		sc->sc_inc.inc_faddr = inc->inc_faddr;
-		sc->sc_inc.inc_laddr = inc->inc_laddr;
+		sc->sc_ip_tos = ip_tos;
+		sc->sc_ip_ttl = ip_ttl;
 	}
+
 	sc->sc_irs = th->th_seq;
+	sc->sc_iss = arc4random();
 	sc->sc_flags = 0;
-	sc->sc_peer_mss = to->to_flags & TOF_MSS ? to->to_mss : 0;
 	sc->sc_flowlabel = 0;
-	if (tcp_syncookies) {
-		sc->sc_iss = syncookie_generate(sc, &flowtmp);
-#ifdef INET6
-		if (inc->inc_isipv6 &&
-		    (sc->sc_tp->t_inpcb->in6p_flags & IN6P_AUTOFLOWLABEL)) {
-			sc->sc_flowlabel = flowtmp & IPV6_FLOWLABEL_MASK;
-		}
-#endif
-	} else {
-		sc->sc_iss = arc4random();
-#ifdef INET6
-		if (inc->inc_isipv6 &&
-		    (sc->sc_tp->t_inpcb->in6p_flags & IN6P_AUTOFLOWLABEL)) {
-			sc->sc_flowlabel =
-			    (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
-		}
-#endif
-	}
 
-	/* Initial receive window: clip sbspace to [0 .. TCP_MAXWIN] */
-	win = sbspace(&so->so_rcv);
+	/*
+	 * Initial receive window: clip sbspace to [0 .. TCP_MAXWIN].
+	 * win was derived from socket earlier in the function.
+	 */
 	win = imax(win, 0);
 	win = imin(win, TCP_MAXWIN);
 	sc->sc_wnd = win;
@@ -958,7 +1150,8 @@
 		 * it ok to send timestamp requests and replies.
 		 */
 		if (to->to_flags & TOF_TS) {
-			sc->sc_tsrecent = to->to_tsval;
+			sc->sc_tsreflect = to->to_tsval;
+			sc->sc_ts = ticks;
 			sc->sc_flags |= SCF_TIMESTAMP;
 		}
 		if (to->to_flags & TOF_SCALE) {
@@ -981,73 +1174,86 @@
 			 * of 3 will be chosen by this algorithm.  Those who
 			 * choose a larger maxsockbuf should watch out
 			 * for the compatiblity problems mentioned above.
- 			 *
- 			 * RFC1323: The Window field in a SYN (i.e., a <SYN>
- 			 * or <SYN,ACK>) segment itself is never scaled.
+			 *
+			 * RFC1323: The Window field in a SYN (i.e., a <SYN>
+			 * or <SYN,ACK>) segment itself is never scaled.
 			 */
 			while (wscale < TCP_MAX_WINSHIFT &&
 			    (TCP_MAXWIN << wscale) < sb_max)
 				wscale++;
-			sc->sc_request_r_scale = wscale;
-			sc->sc_requested_s_scale = to->to_requested_s_scale;
+			sc->sc_requested_r_scale = wscale;
+			sc->sc_requested_s_scale = to->to_wscale;
 			sc->sc_flags |= SCF_WINSCALE;
 		}
 	}
-	if (tp->t_flags & TF_NOOPT)
-		sc->sc_flags = SCF_NOOPT;
 #ifdef TCP_SIGNATURE
 	/*
 	 * If listening socket requested TCP digests, and received SYN
 	 * contains the option, flag this in the syncache so that
 	 * syncache_respond() will do the right thing with the SYN+ACK.
-	 * XXX Currently we always record the option by default and will
+	 * XXX: Currently we always record the option by default and will
 	 * attempt to use it in syncache_respond().
 	 */
 	if (to->to_flags & TOF_SIGNATURE)
 		sc->sc_flags |= SCF_SIGNATURE;
 #endif
-
-	if (to->to_flags & TOF_SACK)
+	if (to->to_flags & TOF_SACKPERM)
 		sc->sc_flags |= SCF_SACK;
+	if (to->to_flags & TOF_MSS)
+		sc->sc_peer_mss = to->to_mss;	/* peer mss may be zero */
+	if (noopt)
+		sc->sc_flags |= SCF_NOOPT;
+
+	if (tcp_syncookies) {
+		syncookie_generate(sch, sc, &flowtmp);
+#ifdef INET6
+		if (autoflowlabel)
+			sc->sc_flowlabel = flowtmp;
+#endif
+	} else {
+#ifdef INET6
+		if (autoflowlabel)
+			sc->sc_flowlabel =
+			    (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
+#endif
+	}
+	SCH_UNLOCK(sch);
 
 	/*
 	 * Do a standard 3-way handshake.
 	 */
-#ifdef TCPDEBUG
-	if (syncache_respond(sc, m, so) == 0) {
-#else
-	if (syncache_respond(sc, m) == 0) {
-#endif
-		syncache_insert(sc, sch);
+	if (syncache_respond(sc) == 0) {
+		if (tcp_syncookies && tcp_syncookiesonly && sc != &scs)
+			syncache_free(sc);
+		else if (sc != &scs)
+			syncache_insert(sc, sch);   /* locks and unlocks sch */
 		tcpstat.tcps_sndacks++;
 		tcpstat.tcps_sndtotal++;
 	} else {
-		syncache_free(sc);
+		if (sc != &scs)
+			syncache_free(sc);
 		tcpstat.tcps_sc_dropped++;
 	}
-	*sop = NULL;
-	return (1);
+
+done:
+#ifdef MAC
+	if (sc == &scs)
+		mac_destroy_syncache(&maclabel);
+#endif
+	*lsop = NULL;
+	m_freem(m);
+	return;
 }
 
-#ifdef TCPDEBUG
 static int
-syncache_respond(sc, m, so)
-	struct syncache *sc;
-	struct mbuf *m;
-	struct socket *so;
-#else
-static int
-syncache_respond(sc, m)
-	struct syncache *sc;
-	struct mbuf *m;
-#endif
+syncache_respond(struct syncache *sc)
 {
-	u_int8_t *optp;
-	int optlen, error;
-	u_int16_t tlen, hlen, mssopt;
 	struct ip *ip = NULL;
+	struct mbuf *m;
 	struct tcphdr *th;
-	struct inpcb *inp;
+	int optlen, error;
+	u_int16_t hlen, tlen, mssopt;
+	struct tcpopt to;
 #ifdef INET6
 	struct ip6_hdr *ip6 = NULL;
 #endif
@@ -1057,54 +1263,28 @@
 	       (sc->sc_inc.inc_isipv6) ? sizeof(struct ip6_hdr) :
 #endif
 		sizeof(struct ip);
+	tlen = hlen + sizeof(struct tcphdr);
 
-	KASSERT((&sc->sc_inc) != NULL, ("syncache_respond with NULL in_conninfo pointer"));
-
-	/* Determine MSS we advertize to other end of connection */
+	/* Determine MSS we advertize to other end of connection. */
 	mssopt = tcp_mssopt(&sc->sc_inc);
+	if (sc->sc_peer_mss)
+		mssopt = max( min(sc->sc_peer_mss, mssopt), tcp_minmss);
 
-	/* Compute the size of the TCP options. */
-	if (sc->sc_flags & SCF_NOOPT) {
-		optlen = 0;
-	} else {
-		optlen = TCPOLEN_MAXSEG +
-		    ((sc->sc_flags & SCF_WINSCALE) ? 4 : 0) +
-		    ((sc->sc_flags & SCF_TIMESTAMP) ? TCPOLEN_TSTAMP_APPA : 0);
-#ifdef TCP_SIGNATURE
-		if (sc->sc_flags & SCF_SIGNATURE)
-			optlen += TCPOLEN_SIGNATURE;
-#endif
-		if (sc->sc_flags & SCF_SACK)
-			optlen += TCPOLEN_SACK_PERMITTED;
-		optlen = roundup2(optlen, 4);
-	}
-	tlen = hlen + sizeof(struct tcphdr) + optlen;
-
-	/*
-	 * XXX
-	 * assume that the entire packet will fit in a header mbuf
-	 */
-	KASSERT(max_linkhdr + tlen <= MHLEN, ("syncache: mbuf too small"));
+	/* XXX: Assume that the entire packet will fit in a header mbuf. */
+	KASSERT(max_linkhdr + tlen + TCP_MAXOLEN <= MHLEN,
+	    ("syncache: mbuf too small"));
 
-	/*
-	 * XXX shouldn't this reuse the mbuf if possible ?
-	 * Create the IP+TCP header from scratch.
-	 */
-	if (m)
-		m_freem(m);
-
-	m = m_gethdr(M_DONTWAIT, MT_HEADER);
+	/* Create the IP+TCP header from scratch. */
+	m = m_gethdr(M_DONTWAIT, MT_DATA);
 	if (m == NULL)
 		return (ENOBUFS);
+#ifdef MAC
+	mac_create_mbuf_from_syncache(sc->sc_label, m);
+#endif
 	m->m_data += max_linkhdr;
 	m->m_len = tlen;
 	m->m_pkthdr.len = tlen;
 	m->m_pkthdr.rcvif = NULL;
-	inp = sc->sc_tp->t_inpcb;
-	INP_LOCK(inp);
-#ifdef MAC
-	mac_create_mbuf_from_inpcb(inp, m);
-#endif
 
 #ifdef INET6
 	if (sc->sc_inc.inc_isipv6) {
@@ -1132,8 +1312,8 @@
 		ip->ip_p = IPPROTO_TCP;
 		ip->ip_src = sc->sc_inc.inc_laddr;
 		ip->ip_dst = sc->sc_inc.inc_faddr;
-		ip->ip_ttl = inp->inp_ip_ttl;   /* XXX */
-		ip->ip_tos = inp->inp_ip_tos;   /* XXX */
+		ip->ip_ttl = sc->sc_ip_ttl;
+		ip->ip_tos = sc->sc_ip_tos;
 
 		/*
 		 * See if we should do MTU discovery.  Route lookups are
@@ -1152,277 +1332,397 @@
 
 	th->th_seq = htonl(sc->sc_iss);
 	th->th_ack = htonl(sc->sc_irs + 1);
-	th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
+	th->th_off = sizeof(struct tcphdr) >> 2;
 	th->th_x2 = 0;
 	th->th_flags = TH_SYN|TH_ACK;
 	th->th_win = htons(sc->sc_wnd);
 	th->th_urp = 0;
 
 	/* Tack on the TCP options. */
-	if (optlen != 0) {
-		optp = (u_int8_t *)(th + 1);
-		*optp++ = TCPOPT_MAXSEG;
-		*optp++ = TCPOLEN_MAXSEG;
-		*optp++ = (mssopt >> 8) & 0xff;
-		*optp++ = mssopt & 0xff;
+	if ((sc->sc_flags & SCF_NOOPT) == 0) {
+		to.to_flags = 0;
 
+		to.to_mss = mssopt;
+		to.to_flags = TOF_MSS;
 		if (sc->sc_flags & SCF_WINSCALE) {
-			*((u_int32_t *)optp) = htonl(TCPOPT_NOP << 24 |
-			    TCPOPT_WINDOW << 16 | TCPOLEN_WINDOW << 8 |
-			    sc->sc_request_r_scale);
-			optp += 4;
+			to.to_wscale = sc->sc_requested_r_scale;
+			to.to_flags |= TOF_SCALE;
 		}
-
 		if (sc->sc_flags & SCF_TIMESTAMP) {
-			u_int32_t *lp = (u_int32_t *)(optp);
-
-			/* Form timestamp option per appendix A of RFC 1323. */
-			*lp++ = htonl(TCPOPT_TSTAMP_HDR);
-			*lp++ = htonl(ticks);
-			*lp   = htonl(sc->sc_tsrecent);
-			optp += TCPOLEN_TSTAMP_APPA;
+			/* Virgin timestamp or TCP cookie enhanced one. */
+			to.to_tsval = sc->sc_ts;
+			to.to_tsecr = sc->sc_tsreflect;
+			to.to_flags |= TOF_TS;
 		}
-
+		if (sc->sc_flags & SCF_SACK)
+			to.to_flags |= TOF_SACKPERM;
 #ifdef TCP_SIGNATURE
-		/*
-		 * Handle TCP-MD5 passive opener response.
-		 */
-		if (sc->sc_flags & SCF_SIGNATURE) {
-			u_int8_t *bp = optp;
-			int i;
-
-			*bp++ = TCPOPT_SIGNATURE;
-			*bp++ = TCPOLEN_SIGNATURE;
-			for (i = 0; i < TCP_SIGLEN; i++)
-				*bp++ = 0;
-			tcp_signature_compute(m, sizeof(struct ip), 0, optlen,
-			    optp + 2, IPSEC_DIR_OUTBOUND);
-			optp += TCPOLEN_SIGNATURE;
-		}
-#endif /* TCP_SIGNATURE */
+		if (sc->sc_flags & SCF_SIGNATURE)
+			to.to_flags |= TOF_SIGNATURE;
+#endif
+		optlen = tcp_addoptions(&to, (u_char *)(th + 1));
 
-		if (sc->sc_flags & SCF_SACK) {
-			*optp++ = TCPOPT_SACK_PERMITTED;
-			*optp++ = TCPOLEN_SACK_PERMITTED;
-		}
+		/* Adjust headers by option size. */
+		th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
+		m->m_len += optlen;
+		m->m_pkthdr.len += optlen;
 
-		{
-			/* Pad TCP options to a 4 byte boundary */
-			int padlen = optlen - (optp - (u_int8_t *)(th + 1));
-			while (padlen-- > 0)
-				*optp++ = TCPOPT_EOL;
-		}
-	}
+#ifdef TCP_SIGNATURE
+		if (sc->sc_flags & SCF_SIGNATURE)
+			tcp_signature_compute(m, sizeof(struct ip), 0, optlen,
+			    to.to_signature, IPSEC_DIR_OUTBOUND);
+#endif
+#ifdef INET6
+		if (sc->sc_inc.inc_isipv6)
+			ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) + optlen);
+		else
+#endif
+			ip->ip_len += optlen;
+	} else
+		optlen = 0;
 
 #ifdef INET6
 	if (sc->sc_inc.inc_isipv6) {
 		th->th_sum = 0;
-		th->th_sum = in6_cksum(m, IPPROTO_TCP, hlen, tlen - hlen);
+		th->th_sum = in6_cksum(m, IPPROTO_TCP, hlen,
+				       tlen + optlen - hlen);
 		ip6->ip6_hlim = in6_selecthlim(NULL, NULL);
-		error = ip6_output(m, NULL, NULL, 0, NULL, NULL, inp);
+		error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
 	} else
 #endif
 	{
 		th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
-		    htons(tlen - hlen + IPPROTO_TCP));
+		    htons(tlen + optlen - hlen + IPPROTO_TCP));
 		m->m_pkthdr.csum_flags = CSUM_TCP;
 		m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
-#ifdef TCPDEBUG
-		/*
-		 * Trace.
-		 */
-		if (so != NULL && so->so_options & SO_DEBUG) {
-			struct tcpcb *tp = sototcpcb(so);
-			tcp_trace(TA_OUTPUT, tp->t_state, tp,
-			    mtod(m, void *), th, 0);
-		}
-#endif
-		error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, inp);
+		error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, NULL);
 	}
-	INP_UNLOCK(inp);
 	return (error);
 }
 
 /*
- * cookie layers:
+ * The purpose of SYN cookies is to avoid keeping track of all SYN's we
+ * receive and to be able to handle SYN floods from bogus source addresses
+ * (where we will never receive any reply).  SYN floods try to exhaust all
+ * our memory and available slots in the SYN cache table to cause a denial
+ * of service to legitimate users of the local host.
  *
- *	|. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .|
- *	| peer iss                                                      |
- *	| MD5(laddr,faddr,secret,lport,fport)             |. . . . . . .|
- *	|                     0                       |(A)|             |
- * (A): peer mss index
- */
-
-/*
- * The values below are chosen to minimize the size of the tcp_secret
- * table, as well as providing roughly a 16 second lifetime for the cookie.
- */
-
-#define SYNCOOKIE_WNDBITS	5	/* exposed bits for window indexing */
-#define SYNCOOKIE_TIMESHIFT	1	/* scale ticks to window time units */
-
-#define SYNCOOKIE_WNDMASK	((1 << SYNCOOKIE_WNDBITS) - 1)
-#define SYNCOOKIE_NSECRETS	(1 << SYNCOOKIE_WNDBITS)
-#define SYNCOOKIE_TIMEOUT \
-    (hz * (1 << SYNCOOKIE_WNDBITS) / (1 << SYNCOOKIE_TIMESHIFT))
-#define SYNCOOKIE_DATAMASK	((3 << SYNCOOKIE_WNDBITS) | SYNCOOKIE_WNDMASK)
-
-static struct {
-	u_int32_t	ts_secbits[4];
-	u_int		ts_expire;
-} tcp_secret[SYNCOOKIE_NSECRETS];
-
-static int tcp_msstab[] = { 0, 536, 1460, 8960 };
-
-#define MD5Add(v)	MD5Update(&syn_ctx, (u_char *)&v, sizeof(v))
-
-struct md5_add {
-	u_int32_t laddr, faddr;
-	u_int32_t secbits[4];
-	u_int16_t lport, fport;
-};
-
-#ifdef CTASSERT
-CTASSERT(sizeof(struct md5_add) == 28);
-#endif
-
-/*
+ * The idea of SYN cookies is to encode and include all necessary information
+ * about the connection setup state within the SYN-ACK we send back and thus
+ * to get along without keeping any local state until the ACK to the SYN-ACK
+ * arrives (if ever).  Everything we need to know should be available from
+ * the information we encoded in the SYN-ACK.
+ *
+ * More information about the theory behind SYN cookies and its first
+ * discussion and specification can be found at:
+ *  http://cr.yp.to/syncookies.html    (overview)
+ *  http://cr.yp.to/syncookies/archive (gory details)
+ *
+ * This implementation extends the orginal idea and first implementation
+ * of FreeBSD by using not only the initial sequence number field to store
+ * information but also the timestamp field if present.  This way we can
+ * keep track of the entire state we need to know to recreate the session in
+ * its original form.  Almost all TCP speakers implement RFC1323 timestamps
+ * these days.  For those that do not we still have to live with the known
+ * shortcomings of the ISN only SYN cookies.
+ *
+ * Cookie layers:
+ *
+ * Initial sequence number we send:
+ * 31|................................|0
+ *    DDDDDDDDDDDDDDDDDDDDDDDDDMMMRRRP
+ *    D = MD5 Digest (first dword)
+ *    M = MSS index
+ *    R = Rotation of secret
+ *    P = Odd or Even secret
+ *
+ * The MD5 Digest is computed with over following parameters:
+ *  a) randomly rotated secret
+ *  b) struct in_conninfo containing the remote/local ip/port (IPv4&IPv6)
+ *  c) the received initial sequence number from remote host
+ *  d) the rotation offset and odd/even bit
+ *
+ * Timestamp we send:
+ * 31|................................|0
+ *    DDDDDDDDDDDDDDDDDDDDDDSSSSRRRRA5
+ *    D = MD5 Digest (third dword) (only as filler)
+ *    S = Requested send window scale
+ *    R = Requested receive window scale
+ *    A = SACK allowed
+ *    5 = TCP-MD5 enabled (not implemented yet)
+ *    XORed with MD5 Digest (forth dword)
+ *
+ * The timestamp isn't cryptographically secure and doesn't need to be.
+ * The double use of the MD5 digest dwords ties it to a specific remote/
+ * local host/port, remote initial sequence number and our local time
+ * limited secret.  A received timestamp is reverted (XORed) and then
+ * the contained MD5 dword is compared to the computed one to ensure the
+ * timestamp belongs to the SYN-ACK we sent.  The other parameters may
+ * have been tampered with but this isn't different from supplying bogus
+ * values in the SYN in the first place.
+ *
+ * Some problems with SYN cookies remain however:
  * Consider the problem of a recreated (and retransmitted) cookie.  If the
  * original SYN was accepted, the connection is established.  The second
  * SYN is inflight, and if it arrives with an ISN that falls within the
  * receive window, the connection is killed.
  *
- * However, since cookies have other problems, this may not be worth
- * worrying about.
+ * Notes:
+ * A heuristic to determine when to accept syn cookies is not necessary.
+ * An ACK flood would cause the syncookie verification to be attempted,
+ * but a SYN flood causes syncookies to be generated.  Both are of equal
+ * cost, so there's no point in trying to optimize the ACK flood case.
+ * Also, if you don't process certain ACKs for some reason, then all someone
+ * would have to do is launch a SYN and ACK flood at the same time, which
+ * would stop cookie verification and defeat the entire purpose of syncookies.
  */
+static int tcp_sc_msstab[] = { 0, 256, 468, 536, 996, 1452, 1460, 8960 };
 
-static u_int32_t
-syncookie_generate(struct syncache *sc, u_int32_t *flowid)
+static void
+syncookie_generate(struct syncache_head *sch, struct syncache *sc,
+    u_int32_t *flowlabel)
 {
-	u_int32_t md5_buffer[4];
+	MD5_CTX ctx;
+	u_int32_t md5_buffer[MD5_DIGEST_LENGTH / sizeof(u_int32_t)];
 	u_int32_t data;
-	int idx, i;
-	struct md5_add add;
-	MD5_CTX syn_ctx;
-
-	/* NB: single threaded; could add INP_INFO_WLOCK_ASSERT(&tcbinfo) */
-
-	idx = ((ticks << SYNCOOKIE_TIMESHIFT) / hz) & SYNCOOKIE_WNDMASK;
-	if (tcp_secret[idx].ts_expire < ticks) {
-		for (i = 0; i < 4; i++)
-			tcp_secret[idx].ts_secbits[i] = arc4random();
-		tcp_secret[idx].ts_expire = ticks + SYNCOOKIE_TIMEOUT;
-	}
-	for (data = sizeof(tcp_msstab) / sizeof(int) - 1; data > 0; data--)
-		if (tcp_msstab[data] <= sc->sc_peer_mss)
+	u_int32_t *secbits;
+	u_int off, pmss, mss;
+	int i;
+
+	SCH_LOCK_ASSERT(sch);
+
+	/* Which of the two secrets to use. */
+	secbits = sch->sch_oddeven ?
+			sch->sch_secbits_odd : sch->sch_secbits_even;
+
+	/* Reseed secret if too old. */
+	if (sch->sch_reseed < time_uptime) {
+		sch->sch_oddeven = sch->sch_oddeven ? 0 : 1;	/* toggle */
+		secbits = sch->sch_oddeven ?
+				sch->sch_secbits_odd : sch->sch_secbits_even;
+		for (i = 0; i < SYNCOOKIE_SECRET_SIZE; i++)
+			secbits[i] = arc4random();
+		sch->sch_reseed = time_uptime + SYNCOOKIE_LIFETIME;
+	}
+
+	/* Secret rotation offset. */
+	off = sc->sc_iss & 0x7;			/* iss was randomized before */
+
+	/* Maximum segment size calculation. */
+	pmss = max( min(sc->sc_peer_mss, tcp_mssopt(&sc->sc_inc)), tcp_minmss);
+	for (mss = sizeof(tcp_sc_msstab) / sizeof(int) - 1; mss > 0; mss--)
+		if (tcp_sc_msstab[mss] <= pmss)
 			break;
-	data = (data << SYNCOOKIE_WNDBITS) | idx;
-	data ^= sc->sc_irs;				/* peer's iss */
-	MD5Init(&syn_ctx);
+
+	/* Fold parameters and MD5 digest into the ISN we will send. */
+	data = sch->sch_oddeven;/* odd or even secret, 1 bit */
+	data |= off << 1;	/* secret offset, derived from iss, 3 bits */
+	data |= mss << 4;	/* mss, 3 bits */
+
+	MD5Init(&ctx);
+	MD5Update(&ctx, ((u_int8_t *)secbits) + off,
+	    SYNCOOKIE_SECRET_SIZE * sizeof(*secbits) - off);
+	MD5Update(&ctx, secbits, off);
+	MD5Update(&ctx, &sc->sc_inc, sizeof(sc->sc_inc));
+	MD5Update(&ctx, &sc->sc_irs, sizeof(sc->sc_irs));
+	MD5Update(&ctx, &data, sizeof(data));
+	MD5Final((u_int8_t *)&md5_buffer, &ctx);
+
+	data |= (md5_buffer[0] << 7);
+	sc->sc_iss = data;
+
 #ifdef INET6
-	if (sc->sc_inc.inc_isipv6) {
-		MD5Add(sc->sc_inc.inc6_laddr);
-		MD5Add(sc->sc_inc.inc6_faddr);
-		add.laddr = 0;
-		add.faddr = 0;
-	} else
+	*flowlabel = md5_buffer[1] & IPV6_FLOWLABEL_MASK;
 #endif
-	{
-		add.laddr = sc->sc_inc.inc_laddr.s_addr;
-		add.faddr = sc->sc_inc.inc_faddr.s_addr;
+
+	/* Additional parameters are stored in the timestamp if present. */
+	if (sc->sc_flags & SCF_TIMESTAMP) {
+		data =  ((sc->sc_flags & SCF_SIGNATURE) ? 1 : 0); /* TCP-MD5, 1 bit */
+		data |= ((sc->sc_flags & SCF_SACK) ? 1 : 0) << 1; /* SACK, 1 bit */
+		data |= sc->sc_requested_s_scale << 2;  /* SWIN scale, 4 bits */
+		data |= sc->sc_requested_r_scale << 6;  /* RWIN scale, 4 bits */
+		data |= md5_buffer[2] << 10;		/* more digest bits */
+		data ^= md5_buffer[3];
+		sc->sc_ts = data;
+		sc->sc_tsoff = data - ticks;		/* after XOR */
 	}
-	add.lport = sc->sc_inc.inc_lport;
-	add.fport = sc->sc_inc.inc_fport;
-	add.secbits[0] = tcp_secret[idx].ts_secbits[0];
-	add.secbits[1] = tcp_secret[idx].ts_secbits[1];
-	add.secbits[2] = tcp_secret[idx].ts_secbits[2];
-	add.secbits[3] = tcp_secret[idx].ts_secbits[3];
-	MD5Add(add);
-	MD5Final((u_char *)&md5_buffer, &syn_ctx);
-	data ^= (md5_buffer[0] & ~SYNCOOKIE_WNDMASK);
-	*flowid = md5_buffer[1];
+
 	tcpstat.tcps_sc_sendcookie++;
-	return (data);
+	return;
 }
 
 static struct syncache *
-syncookie_lookup(inc, th, so)
-	struct in_conninfo *inc;
-	struct tcphdr *th;
-	struct socket *so;
+syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch, 
+    struct syncache *sc, struct tcpopt *to, struct tcphdr *th,
+    struct socket *so)
 {
-	u_int32_t md5_buffer[4];
-	struct syncache *sc;
-	u_int32_t data;
-	int wnd, idx;
-	struct md5_add add;
-	MD5_CTX syn_ctx;
-
-	/* NB: single threaded; could add INP_INFO_WLOCK_ASSERT(&tcbinfo) */
-
-	data = (th->th_ack - 1) ^ (th->th_seq - 1);	/* remove ISS */
-	idx = data & SYNCOOKIE_WNDMASK;
-	if (tcp_secret[idx].ts_expire < ticks ||
-	    sototcpcb(so)->ts_recent + SYNCOOKIE_TIMEOUT < ticks)
+	MD5_CTX ctx;
+	u_int32_t md5_buffer[MD5_DIGEST_LENGTH / sizeof(u_int32_t)];
+	u_int32_t data = 0;
+	u_int32_t *secbits;
+	tcp_seq ack, seq;
+	int off, mss, wnd, flags;
+
+	SCH_LOCK_ASSERT(sch);
+
+	/*
+	 * Pull information out of SYN-ACK/ACK and
+	 * revert sequence number advances.
+	 */
+	ack = th->th_ack - 1;
+	seq = th->th_seq - 1;
+	off = (ack >> 1) & 0x7;
+	mss = (ack >> 4) & 0x7;
+	flags = ack & 0x7f;
+
+	/* Which of the two secrets to use. */
+	secbits = (flags & 0x1) ? sch->sch_secbits_odd : sch->sch_secbits_even;
+
+	/*
+	 * The secret wasn't updated for the lifetime of a syncookie,
+	 * so this SYN-ACK/ACK is either too old (replay) or totally bogus.
+	 */
+	if (sch->sch_reseed < time_uptime) {
 		return (NULL);
-	MD5Init(&syn_ctx);
-#ifdef INET6
-	if (inc->inc_isipv6) {
-		MD5Add(inc->inc6_laddr);
-		MD5Add(inc->inc6_faddr);
-		add.laddr = 0;
-		add.faddr = 0;
-	} else
-#endif
-	{
-		add.laddr = inc->inc_laddr.s_addr;
-		add.faddr = inc->inc_faddr.s_addr;
 	}
-	add.lport = inc->inc_lport;
-	add.fport = inc->inc_fport;
-	add.secbits[0] = tcp_secret[idx].ts_secbits[0];
-	add.secbits[1] = tcp_secret[idx].ts_secbits[1];
-	add.secbits[2] = tcp_secret[idx].ts_secbits[2];
-	add.secbits[3] = tcp_secret[idx].ts_secbits[3];
-	MD5Add(add);
-	MD5Final((u_char *)&md5_buffer, &syn_ctx);
-	data ^= md5_buffer[0];
-	if ((data & ~SYNCOOKIE_DATAMASK) != 0)
-		return (NULL);
-	data = data >> SYNCOOKIE_WNDBITS;
 
-	sc = uma_zalloc(tcp_syncache.zone, M_NOWAIT | M_ZERO);
-	if (sc == NULL)
+	/* Recompute the digest so we can compare it. */
+	MD5Init(&ctx);
+	MD5Update(&ctx, ((u_int8_t *)secbits) + off,
+	    SYNCOOKIE_SECRET_SIZE * sizeof(*secbits) - off);
+	MD5Update(&ctx, secbits, off);
+	MD5Update(&ctx, inc, sizeof(*inc));
+	MD5Update(&ctx, &seq, sizeof(seq));
+	MD5Update(&ctx, &flags, sizeof(flags));
+	MD5Final((u_int8_t *)&md5_buffer, &ctx);
+
+	/* Does the digest part of or ACK'ed ISS match? */
+	if ((ack & (~0x7f)) != (md5_buffer[0] << 7))
 		return (NULL);
-	/*
-	 * Fill in the syncache values.
-	 * XXX duplicate code from syncache_add
-	 */
+
+	/* Does the digest part of our reflected timestamp match? */
+	if (to->to_flags & TOF_TS) {
+		data = md5_buffer[3] ^ to->to_tsecr;
+		if ((data & (~0x3ff)) != (md5_buffer[2] << 10))
+			return (NULL);
+	}
+
+	/* Fill in the syncache values. */
+	bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo));
 	sc->sc_ipopts = NULL;
-	sc->sc_inc.inc_fport = inc->inc_fport;
-	sc->sc_inc.inc_lport = inc->inc_lport;
-	sc->sc_tp = sototcpcb(so);
+	
+	sc->sc_irs = seq;
+	sc->sc_iss = ack;
+
 #ifdef INET6
-	sc->sc_inc.inc_isipv6 = inc->inc_isipv6;
 	if (inc->inc_isipv6) {
-		sc->sc_inc.inc6_faddr = inc->inc6_faddr;
-		sc->sc_inc.inc6_laddr = inc->inc6_laddr;
-		if (sc->sc_tp->t_inpcb->in6p_flags & IN6P_AUTOFLOWLABEL)
+		if (sotoinpcb(so)->in6p_flags & IN6P_AUTOFLOWLABEL)
 			sc->sc_flowlabel = md5_buffer[1] & IPV6_FLOWLABEL_MASK;
 	} else
 #endif
 	{
-		sc->sc_inc.inc_faddr = inc->inc_faddr;
-		sc->sc_inc.inc_laddr = inc->inc_laddr;
+		sc->sc_ip_ttl = sotoinpcb(so)->inp_ip_ttl;
+		sc->sc_ip_tos = sotoinpcb(so)->inp_ip_tos;
 	}
-	sc->sc_irs = th->th_seq - 1;
-	sc->sc_iss = th->th_ack - 1;
+
+	/* Additional parameters that were encoded in the timestamp. */
+	if (data) {
+		sc->sc_flags |= SCF_TIMESTAMP;
+		sc->sc_tsreflect = to->to_tsval;
+		sc->sc_ts = to->to_tsecr;
+		sc->sc_tsoff = to->to_tsecr - ticks;
+		sc->sc_flags |= (data & 0x1) ? SCF_SIGNATURE : 0;
+		sc->sc_flags |= ((data >> 1) & 0x1) ? SCF_SACK : 0;
+		sc->sc_requested_s_scale = min((data >> 2) & 0xf,
+		    TCP_MAX_WINSHIFT);
+		sc->sc_requested_r_scale = min((data >> 6) & 0xf,
+		    TCP_MAX_WINSHIFT);
+		if (sc->sc_requested_s_scale || sc->sc_requested_r_scale)
+			sc->sc_flags |= SCF_WINSCALE;
+	} else
+		sc->sc_flags |= SCF_NOOPT;
+
 	wnd = sbspace(&so->so_rcv);
 	wnd = imax(wnd, 0);
 	wnd = imin(wnd, TCP_MAXWIN);
 	sc->sc_wnd = wnd;
-	sc->sc_flags = 0;
-	sc->sc_rxtslot = 0;
-	sc->sc_peer_mss = tcp_msstab[data];
+
+	sc->sc_rxmits = 0;
+	sc->sc_peer_mss = tcp_sc_msstab[mss];
+
+	tcpstat.tcps_sc_recvcookie++;
 	return (sc);
 }
+
+/*
+ * Returns the current number of syncache entries.  This number
+ * will probably change before you get around to calling 
+ * syncache_pcblist.
+ */
+
+int
+syncache_pcbcount(void)
+{
+	struct syncache_head *sch;
+	int count, i;
+
+	for (count = 0, i = 0; i < tcp_syncache.hashsize; i++) {
+		/* No need to lock for a read. */
+		sch = &tcp_syncache.hashbase[i];
+		count += sch->sch_length;
+	}
+	return count;
+}
+
+/*
+ * Exports the syncache entries to userland so that netstat can display
+ * them alongside the other sockets.  This function is intended to be
+ * called only from tcp_pcblist.
+ *
+ * Due to concurrency on an active system, the number of pcbs exported
+ * may have no relation to max_pcbs.  max_pcbs merely indicates the
+ * amount of space the caller allocated for this function to use.
+ */
+int
+syncache_pcblist(struct sysctl_req *req, int max_pcbs, int *pcbs_exported)
+{
+	struct xtcpcb xt;
+	struct syncache *sc;
+	struct syncache_head *sch;
+	int count, error, i;
+
+	for (count = 0, error = 0, i = 0; i < tcp_syncache.hashsize; i++) {
+		sch = &tcp_syncache.hashbase[i];
+		SCH_LOCK(sch);
+		TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) {
+			if (count >= max_pcbs) {
+				SCH_UNLOCK(sch);
+				goto exit;
+			}
+			bzero(&xt, sizeof(xt));
+			xt.xt_len = sizeof(xt);
+			if (sc->sc_inc.inc_isipv6)
+				xt.xt_inp.inp_vflag = INP_IPV6;
+			else
+				xt.xt_inp.inp_vflag = INP_IPV4;
+			bcopy(&sc->sc_inc, &xt.xt_inp.inp_inc, sizeof (struct in_conninfo));
+			xt.xt_tp.t_inpcb = &xt.xt_inp;
+			xt.xt_tp.t_state = TCPS_SYN_RECEIVED;
+			xt.xt_socket.xso_protocol = IPPROTO_TCP;
+			xt.xt_socket.xso_len = sizeof (struct xsocket);
+			xt.xt_socket.so_type = SOCK_STREAM;
+			xt.xt_socket.so_state = SS_ISCONNECTING;
+			error = SYSCTL_OUT(req, &xt, sizeof xt);
+			if (error) {
+				SCH_UNLOCK(sch);
+				goto exit;
+			}
+			count++;
+		}
+		SCH_UNLOCK(sch);
+	}
+exit:
+	*pcbs_exported = count;
+	return error;
+}
+
Index: ip_fw_pfil.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_fw_pfil.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/ip_fw_pfil.c -L sys/netinet/ip_fw_pfil.c -u -r1.1.1.2 -r1.2
--- sys/netinet/ip_fw_pfil.c
+++ sys/netinet/ip_fw_pfil.c
@@ -22,10 +22,11 @@
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/netinet/ip_fw_pfil.c,v 1.19.2.1 2006/02/11 08:19:37 ume Exp $
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_fw_pfil.c,v 1.25 2007/10/07 20:44:23 silby Exp $");
+
 #if !defined(KLD_MODULE)
 #include "opt_ipfw.h"
 #include "opt_ipdn.h"
@@ -64,7 +65,12 @@
 
 #include <machine/in_cksum.h>
 
-static	int ipfw_pfil_hooked = 0;
+int fw_enable = 1;
+#ifdef INET6
+int fw6_enable = 1;
+#endif
+
+int ipfw_chg_hook(SYSCTL_HANDLER_ARGS);
 
 /* Dummynet hooks. */
 ip_dn_ruledel_t	*ip_dn_ruledel_ptr = NULL;
@@ -96,9 +102,6 @@
 
 	KASSERT(dir == PFIL_IN, ("ipfw_check_in wrong direction!"));
 
-	if (!fw_enable)
-		goto pass;
-
 	bzero(&args, sizeof(args));
 
 	dn_tag = m_tag_find(*m0, PACKET_TAG_DUMMYNET, NULL);
@@ -187,6 +190,9 @@
 		if (!NG_IPFW_LOADED)
 			goto drop;
 		return ng_ipfw_input_p(m0, NG_IPFW_IN, &args, 0);
+		
+	case IP_FW_NAT:
+		goto again;		/* continue with packet */
 
 	default:
 		KASSERT(0, ("%s: unknown retval", __func__));
@@ -217,9 +223,6 @@
 
 	KASSERT(dir == PFIL_OUT, ("ipfw_check_out wrong direction!"));
 
-	if (!fw_enable)
-		goto pass;
-
 	bzero(&args, sizeof(args));
 
 	dn_tag = m_tag_find(*m0, PACKET_TAG_DUMMYNET, NULL);
@@ -316,6 +319,9 @@
 			goto drop;
 		return ng_ipfw_input_p(m0, NG_IPFW_OUT, &args, 0);
 
+	case IP_FW_NAT:
+		goto again;		/* continue with packet */
+		
 	default:
 		KASSERT(0, ("%s: unknown retval", __func__));
 	}
@@ -417,28 +423,13 @@
 ipfw_hook(void)
 {
 	struct pfil_head *pfh_inet;
-#ifdef INET6
-	struct pfil_head *pfh_inet6;
-#endif
-
-	if (ipfw_pfil_hooked)
-		return EEXIST;
 
 	pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET);
 	if (pfh_inet == NULL)
 		return ENOENT;
-#ifdef INET6
-	pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6);
-	if (pfh_inet6 == NULL)
-		return ENOENT;
-#endif
 
 	pfil_add_hook(ipfw_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet);
 	pfil_add_hook(ipfw_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet);
-#ifdef INET6
-	pfil_add_hook(ipfw_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet6);
-	pfil_add_hook(ipfw_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet6);
-#endif
 
 	return 0;
 }
@@ -447,31 +438,86 @@
 ipfw_unhook(void)
 {
 	struct pfil_head *pfh_inet;
-#ifdef INET6
-	struct pfil_head *pfh_inet6;
-#endif
-
-	if (!ipfw_pfil_hooked)
-		return ENOENT;
 
 	pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET);
 	if (pfh_inet == NULL)
 		return ENOENT;
+
+	pfil_remove_hook(ipfw_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet);
+	pfil_remove_hook(ipfw_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet);
+
+	return 0;
+}
+
 #ifdef INET6
+static int
+ipfw6_hook(void)
+{
+	struct pfil_head *pfh_inet6;
+
+	pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6);
+	if (pfh_inet6 == NULL)
+		return ENOENT;
+
+	pfil_add_hook(ipfw_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet6);
+	pfil_add_hook(ipfw_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet6);
+
+	return 0;
+}
+
+static int
+ipfw6_unhook(void)
+{
+	struct pfil_head *pfh_inet6;
+
 	pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6);
 	if (pfh_inet6 == NULL)
 		return ENOENT;
-#endif
 
-	pfil_remove_hook(ipfw_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet);
-	pfil_remove_hook(ipfw_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet);
-#ifdef INET6
 	pfil_remove_hook(ipfw_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet6);
 	pfil_remove_hook(ipfw_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet6);
-#endif
 
 	return 0;
 }
+#endif /* INET6 */
+
+int
+ipfw_chg_hook(SYSCTL_HANDLER_ARGS)
+{
+	int enable = *(int *)arg1;
+	int error;
+
+	error = sysctl_handle_int(oidp, &enable, 0, req);
+	if (error)
+		return (error);
+
+	enable = (enable) ? 1 : 0;
+
+	if (enable == *(int *)arg1)
+		return (0);
+
+	if (arg1 == &fw_enable) {
+		if (enable)
+			error = ipfw_hook();
+		else
+			error = ipfw_unhook();
+	}
+#ifdef INET6
+	if (arg1 == &fw6_enable) {
+		if (enable)
+			error = ipfw6_hook();
+		else
+			error = ipfw6_unhook();
+	}
+#endif
+
+	if (error)
+		return (error);
+
+	*(int *)arg1 = enable;
+
+	return (0);
+}
 
 static int
 ipfw_modevent(module_t mod, int type, void *unused)
@@ -480,31 +526,30 @@
 
 	switch (type) {
 	case MOD_LOAD:
-		if (ipfw_pfil_hooked) {
-			printf("IP firewall already loaded\n");
-			err = EEXIST;
-		} else {
-			if ((err = ipfw_init()) != 0) {
-				printf("ipfw_init() error\n");
-				break;
-			}
-			if ((err = ipfw_hook()) != 0) {
-				printf("ipfw_hook() error\n");
-				break;
-			}
-			ipfw_pfil_hooked = 1;
+		if ((err = ipfw_init()) != 0) {
+			printf("ipfw_init() error\n");
+			break;
+		}
+		if ((err = ipfw_hook()) != 0) {
+			printf("ipfw_hook() error\n");
+			break;
 		}
+#ifdef INET6
+		if ((err = ipfw6_hook()) != 0) {
+			printf("ipfw_hook() error\n");
+			break;
+		}
+#endif
 		break;
 
 	case MOD_UNLOAD:
-		if (ipfw_pfil_hooked) {
-			if ((err = ipfw_unhook()) > 0)
-				break;
-			ipfw_destroy();
-			ipfw_pfil_hooked = 0;
-		} else {
-			printf("IP firewall already unloaded\n");
-		}
+		if ((err = ipfw_unhook()) > 0)
+			break;
+#ifdef INET6
+		if ((err = ipfw6_unhook()) > 0)
+			break;
+#endif
+		ipfw_destroy();
 		break;
 
 	default:
Index: ip_fw.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_fw.h,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/ip_fw.h -L sys/netinet/ip_fw.h -u -r1.1.1.2 -r1.2
--- sys/netinet/ip_fw.h
+++ sys/netinet/ip_fw.h
@@ -22,7 +22,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/netinet/ip_fw.h,v 1.100.2.3 2006/02/17 16:46:47 ru Exp $
+ * $FreeBSD: src/sys/netinet/ip_fw.h,v 1.110.4.1 2008/01/28 17:44:30 rwatson Exp $
  */
 
 #ifndef _IPFW2_H
@@ -124,6 +124,7 @@
 	O_TEE,			/* arg1=port number		*/
 	O_FORWARD_IP,		/* fwd sockaddr			*/
 	O_FORWARD_MAC,		/* fwd mac			*/
+	O_NAT,                  /* nope                         */
 
 	/*
 	 * More opcodes.
@@ -157,6 +158,9 @@
 
 	O_UNREACH6,		/* arg1=icmpv6 code arg (deny)  */
 
+	O_TAG,   		/* arg1=tag number */
+	O_TAGGED,		/* arg1=tag number */
+
 	O_LAST_OPCODE		/* not an opcode!		*/
 };
 
@@ -170,6 +174,8 @@
 #define EXT_AH		0x8
 #define EXT_ESP		0x10
 #define EXT_DSTOPTS	0x20
+#define EXT_RTHDR0		0x40
+#define EXT_RTHDR2		0x80
 
 /*
  * Template for instructions.
@@ -215,6 +221,8 @@
  */
 #define	F_INSN_SIZE(t)	((sizeof (t))/sizeof(u_int32_t))
 
+#define MTAG_IPFW	1148380143	/* IPFW-tagged cookie */
+
 /*
  * This is used to store an array of 16-bit entries (ports etc.)
  */
@@ -271,19 +279,6 @@
 } ipfw_insn_if;
 
 /*
- * This is used for pipe and queue actions, which need to store
- * a single pointer (which can have different size on different
- * architectures.
- * Note that, because of previous instructions, pipe_ptr might
- * be unaligned in the overall structure, so it needs to be
- * manipulated with care.
- */
-typedef struct	_ipfw_insn_pipe {
-	ipfw_insn	o;
-	void		*pipe_ptr;	/* XXX */
-} ipfw_insn_pipe;
-
-/*
  * This is used for storing an altq queue id number.
  */
 typedef struct _ipfw_insn_altq {
@@ -315,6 +310,75 @@
 	u_int32_t log_left;	/* how many left to log 	*/
 } ipfw_insn_log;
 
+/*
+ * Data structures required by both ipfw(8) and ipfw(4) but not part of the
+ * management API are protcted by IPFW_INTERNAL.
+ */
+#ifdef IPFW_INTERNAL
+/* Server pool support (LSNAT). */
+struct cfg_spool {
+	LIST_ENTRY(cfg_spool)   _next;          /* chain of spool instances */
+	struct in_addr          addr;
+	u_short                 port;
+};
+#endif
+
+/* Redirect modes id. */
+#define REDIR_ADDR      0x01
+#define REDIR_PORT      0x02
+#define REDIR_PROTO     0x04
+
+#ifdef IPFW_INTERNAL
+/* Nat redirect configuration. */
+struct cfg_redir {
+	LIST_ENTRY(cfg_redir)   _next;          /* chain of redir instances */
+	u_int16_t               mode;           /* type of redirect mode */
+	struct in_addr	        laddr;          /* local ip address */
+	struct in_addr	        paddr;          /* public ip address */
+	struct in_addr	        raddr;          /* remote ip address */
+	u_short                 lport;          /* local port */
+	u_short                 pport;          /* public port */
+	u_short                 rport;          /* remote port  */
+	u_short                 pport_cnt;      /* number of public ports */
+	u_short                 rport_cnt;      /* number of remote ports */
+	int                     proto;          /* protocol: tcp/udp */
+	struct alias_link       **alink;	
+	/* num of entry in spool chain */
+	u_int16_t               spool_cnt;      
+	/* chain of spool instances */
+	LIST_HEAD(spool_chain, cfg_spool) spool_chain;
+};
+#endif
+
+#define NAT_BUF_LEN     1024
+
+#ifdef IPFW_INTERNAL
+/* Nat configuration data struct. */
+struct cfg_nat {
+	/* chain of nat instances */
+	LIST_ENTRY(cfg_nat)     _next;
+	int                     id;                     /* nat id */
+	struct in_addr          ip;                     /* nat ip address */
+	char                    if_name[IF_NAMESIZE];   /* interface name */
+	int                     mode;                   /* aliasing mode */
+	struct libalias	        *lib;                   /* libalias instance */
+	/* number of entry in spool chain */
+	int                     redir_cnt;              
+	/* chain of redir instances */
+	LIST_HEAD(redir_chain, cfg_redir) redir_chain;  
+};
+#endif
+
+#define SOF_NAT         sizeof(struct cfg_nat)
+#define SOF_REDIR       sizeof(struct cfg_redir)
+#define SOF_SPOOL       sizeof(struct cfg_spool)
+
+/* Nat command. */
+typedef struct	_ipfw_insn_nat {
+ 	ipfw_insn	o;
+ 	struct cfg_nat *nat;	
+} ipfw_insn_nat;
+
 /* Apply ipv6 mask on ipv6 addr */
 #define APPLY_MASK(addr,mask)                          \
     (addr)->__u6_addr.__u6_addr32[0] &= (mask)->__u6_addr.__u6_addr32[0]; \
@@ -359,6 +423,7 @@
  *  + if a rule has a "log" option, then the first action
  *	(at ACTION_PTR(r)) MUST be O_LOG
  *  + if a rule has an "altq" option, it comes after "log"
+ *  + if a rule has an O_TAG option, it comes after "log" and "altq"
  *
  * NOTE: we use a simple linked list of rules because we never need
  * 	to delete a rule without scanning the list. We do not use
@@ -490,6 +555,7 @@
 	IP_FW_DUMMYNET,
 	IP_FW_NETGRAPH,
 	IP_FW_NGTEE,
+	IP_FW_NAT,
 };
 
 /* flags for divert mtag */
@@ -528,6 +594,7 @@
 	struct inpcb	*inp;
 
 	struct _ip6dn_args	dummypar; /* dummynet->ip6_output */
+	struct sockaddr_in hopstore;	/* store here if cannot use a pointer */
 };
 
 /*
@@ -546,12 +613,13 @@
 int ipfw_init(void);
 void ipfw_destroy(void);
 
-void flush_pipe_ptrs(struct dn_flow_set *match); /* used by dummynet */
-
 typedef int ip_fw_ctl_t(struct sockopt *);
 extern ip_fw_ctl_t *ip_fw_ctl_ptr;
 extern int fw_one_pass;
 extern int fw_enable;
+#ifdef INET6
+extern int fw6_enable;
+#endif
 
 /* For kernel ipfw_ether and ipfw_bridge. */
 typedef	int ip_fw_chk_t(struct ip_fw_args *args);
--- /dev/null
+++ sys/netinet/sctp_structs.h
@@ -0,0 +1,1047 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_structs.h,v 1.13 2005/03/06 16:04:18 itojun Exp $	 */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_structs.h,v 1.25.2.2 2007/12/09 20:23:47 rrs Exp $");
+
+#ifndef __sctp_structs_h__
+#define __sctp_structs_h__
+
+#include <netinet/sctp_os.h>
+#include <netinet/sctp_header.h>
+#include <netinet/sctp_auth.h>
+
+struct sctp_timer {
+	sctp_os_timer_t timer;
+
+	int type;
+	/*
+	 * Depending on the timer type these will be setup and cast with the
+	 * appropriate entity.
+	 */
+	void *ep;
+	void *tcb;
+	void *net;
+
+	/* for sanity checking */
+	void *self;
+	uint32_t ticks;
+	uint32_t stopped_from;
+};
+
+
+struct sctp_foo_stuff {
+	struct sctp_inpcb *inp;
+	uint32_t lineno;
+	uint32_t ticks;
+	int updown;
+};
+
+
+/*
+ * This is the information we track on each interface that we know about from
+ * the distant end.
+ */
+TAILQ_HEAD(sctpnetlisthead, sctp_nets);
+
+struct sctp_stream_reset_list {
+	TAILQ_ENTRY(sctp_stream_reset_list) next_resp;
+	uint32_t tsn;
+	int number_entries;
+	struct sctp_stream_reset_out_request req;
+};
+
+TAILQ_HEAD(sctp_resethead, sctp_stream_reset_list);
+
+/*
+ * Users of the iterator need to malloc a iterator with a call to
+ * sctp_initiate_iterator(inp_func, assoc_func, inp_func,  pcb_flags, pcb_features,
+ *     asoc_state, void-ptr-arg, uint32-arg, end_func, inp);
+ *
+ * Use the following two defines if you don't care what pcb flags are on the EP
+ * and/or you don't care what state the association is in.
+ *
+ * Note that if you specify an INP as the last argument then ONLY each
+ * association of that single INP will be executed upon. Note that the pcb
+ * flags STILL apply so if the inp you specify has different pcb_flags then
+ * what you put in pcb_flags nothing will happen. use SCTP_PCB_ANY_FLAGS to
+ * assure the inp you specify gets treated.
+ */
+#define SCTP_PCB_ANY_FLAGS	0x00000000
+#define SCTP_PCB_ANY_FEATURES	0x00000000
+#define SCTP_ASOC_ANY_STATE	0x00000000
+
+typedef void (*asoc_func) (struct sctp_inpcb *, struct sctp_tcb *, void *ptr,
+         uint32_t val);
+typedef int (*inp_func) (struct sctp_inpcb *, void *ptr, uint32_t val);
+typedef void (*end_func) (void *ptr, uint32_t val);
+
+struct sctp_iterator {
+	TAILQ_ENTRY(sctp_iterator) sctp_nxt_itr;
+	struct sctp_timer tmr;
+	struct sctp_inpcb *inp;	/* current endpoint */
+	struct sctp_tcb *stcb;	/* current* assoc */
+	asoc_func function_assoc;	/* per assoc function */
+	inp_func function_inp;	/* per endpoint function */
+	inp_func function_inp_end;	/* end INP function */
+	end_func function_atend;/* iterator completion function */
+	void *pointer;		/* pointer for apply func to use */
+	uint32_t val;		/* value for apply func to use */
+	uint32_t pcb_flags;	/* endpoint flags being checked */
+	uint32_t pcb_features;	/* endpoint features being checked */
+	uint32_t asoc_state;	/* assoc state being checked */
+	uint32_t iterator_flags;
+	uint8_t no_chunk_output;
+	uint8_t done_current_ep;
+};
+
+/* iterator_flags values */
+#define SCTP_ITERATOR_DO_ALL_INP	0x00000001
+#define SCTP_ITERATOR_DO_SINGLE_INP	0x00000002
+
+TAILQ_HEAD(sctpiterators, sctp_iterator);
+
+struct sctp_copy_all {
+	struct sctp_inpcb *inp;	/* ep */
+	struct mbuf *m;
+	struct sctp_sndrcvinfo sndrcv;
+	int sndlen;
+	int cnt_sent;
+	int cnt_failed;
+};
+
+struct sctp_asconf_iterator {
+	struct sctpladdr list_of_work;
+	int cnt;
+};
+
+struct sctp_net_route {
+	sctp_rtentry_t *ro_rt;
+	union sctp_sockstore _l_addr;	/* remote peer addr */
+	struct sctp_ifa *_s_addr;	/* our selected src addr */
+};
+
+struct htcp {
+	uint16_t alpha;		/* Fixed point arith, << 7 */
+	uint8_t beta;		/* Fixed point arith, << 7 */
+	uint8_t modeswitch;	/* Delay modeswitch until we had at least one
+				 * congestion event */
+	uint32_t last_cong;	/* Time since last congestion event end */
+	uint32_t undo_last_cong;
+	uint16_t bytes_acked;
+	uint32_t bytecount;
+	uint32_t minRTT;
+	uint32_t maxRTT;
+
+	uint32_t undo_maxRTT;
+	uint32_t undo_old_maxB;
+
+	/* Bandwidth estimation */
+	uint32_t minB;
+	uint32_t maxB;
+	uint32_t old_maxB;
+	uint32_t Bi;
+	uint32_t lasttime;
+};
+
+
+struct sctp_nets {
+	TAILQ_ENTRY(sctp_nets) sctp_next;	/* next link */
+
+	/*
+	 * Things on the top half may be able to be split into a common
+	 * structure shared by all.
+	 */
+	struct sctp_timer pmtu_timer;
+
+	/*
+	 * The following two in combination equate to a route entry for v6
+	 * or v4.
+	 */
+	struct sctp_net_route ro;
+
+	/* mtu discovered so far */
+	uint32_t mtu;
+	uint32_t ssthresh;	/* not sure about this one for split */
+
+	/* smoothed average things for RTT and RTO itself */
+	int lastsa;
+	int lastsv;
+	unsigned int RTO;
+
+	/* This is used for SHUTDOWN/SHUTDOWN-ACK/SEND or INIT timers */
+	struct sctp_timer rxt_timer;
+	struct sctp_timer fr_timer;	/* for early fr */
+
+	/* last time in seconds I sent to it */
+	struct timeval last_sent_time;
+	int ref_count;
+
+	/* Congestion stats per destination */
+	/*
+	 * flight size variables and such, sorry Vern, I could not avoid
+	 * this if I wanted performance :>
+	 */
+	uint32_t flight_size;
+	uint32_t cwnd;		/* actual cwnd */
+	uint32_t prev_cwnd;	/* cwnd before any processing */
+	uint32_t partial_bytes_acked;	/* in CA tracks when to incr a MTU */
+	uint32_t prev_rtt;
+	/* tracking variables to avoid the aloc/free in sack processing */
+	unsigned int net_ack;
+	unsigned int net_ack2;
+
+	/*
+	 * JRS - 5/8/07 - Variable to track last time a destination was
+	 * active for CMT PF
+	 */
+	uint32_t last_active;
+
+	/*
+	 * CMT variables (iyengar at cis.udel.edu)
+	 */
+	uint32_t this_sack_highest_newack;	/* tracks highest TSN newly
+						 * acked for a given dest in
+						 * the current SACK. Used in
+						 * SFR and HTNA algos */
+	uint32_t pseudo_cumack;	/* CMT CUC algorithm. Maintains next expected
+				 * pseudo-cumack for this destination */
+	uint32_t rtx_pseudo_cumack;	/* CMT CUC algorithm. Maintains next
+					 * expected pseudo-cumack for this
+					 * destination */
+
+	/* CMT fast recovery variables */
+	uint32_t fast_recovery_tsn;
+	uint32_t heartbeat_random1;
+	uint32_t heartbeat_random2;
+	uint32_t tos_flowlabel;
+
+	struct timeval start_time;	/* time when this net was created */
+
+	uint32_t marked_retrans;/* number or DATA chunks marked for timer
+				 * based retransmissions */
+	uint32_t marked_fastretrans;
+
+	/* if this guy is ok or not ... status */
+	uint16_t dest_state;
+	/* number of transmit failures to down this guy */
+	uint16_t failure_threshold;
+	/* error stats on destination */
+	uint16_t error_count;
+
+	uint8_t fast_retran_loss_recovery;
+	uint8_t will_exit_fast_recovery;
+	/* Flags that probably can be combined into dest_state */
+	uint8_t fast_retran_ip;	/* fast retransmit in progress */
+	uint8_t hb_responded;
+	uint8_t saw_newack;	/* CMT's SFR algorithm flag */
+	uint8_t src_addr_selected;	/* if we split we move */
+	uint8_t indx_of_eligible_next_to_use;
+	uint8_t addr_is_local;	/* its a local address (if known) could move
+				 * in split */
+
+	/*
+	 * CMT variables (iyengar at cis.udel.edu)
+	 */
+	uint8_t find_pseudo_cumack;	/* CMT CUC algorithm. Flag used to
+					 * find a new pseudocumack. This flag
+					 * is set after a new pseudo-cumack
+					 * has been received and indicates
+					 * that the sender should find the
+					 * next pseudo-cumack expected for
+					 * this destination */
+	uint8_t find_rtx_pseudo_cumack;	/* CMT CUCv2 algorithm. Flag used to
+					 * find a new rtx-pseudocumack. This
+					 * flag is set after a new
+					 * rtx-pseudo-cumack has been received
+					 * and indicates that the sender
+					 * should find the next
+					 * rtx-pseudo-cumack expected for this
+					 * destination */
+	uint8_t new_pseudo_cumack;	/* CMT CUC algorithm. Flag used to
+					 * indicate if a new pseudo-cumack or
+					 * rtx-pseudo-cumack has been received */
+	uint8_t window_probe;	/* Doing a window probe? */
+	uint8_t RTO_measured;	/* Have we done the first measure */
+	uint8_t last_hs_used;	/* index into the last HS table entry we used */
+	/* JRS - struct used in HTCP algorithm */
+	struct htcp htcp_ca;
+};
+
+
+struct sctp_data_chunkrec {
+	uint32_t TSN_seq;	/* the TSN of this transmit */
+	uint16_t stream_seq;	/* the stream sequence number of this transmit */
+	uint16_t stream_number;	/* the stream number of this guy */
+	uint32_t payloadtype;
+	uint32_t context;	/* from send */
+
+	/* ECN Nonce: Nonce Value for this chunk */
+	uint8_t ect_nonce;
+
+	/*
+	 * part of the Highest sacked algorithm to be able to stroke counts
+	 * on ones that are FR'd.
+	 */
+	uint32_t fast_retran_tsn;	/* sending_seq at the time of FR */
+	struct timeval timetodrop;	/* time we drop it from queue */
+	uint8_t doing_fast_retransmit;
+	uint8_t rcv_flags;	/* flags pulled from data chunk on inbound for
+				 * outbound holds sending flags for PR-SCTP. */
+	uint8_t state_flags;
+	uint8_t chunk_was_revoked;
+};
+
+TAILQ_HEAD(sctpchunk_listhead, sctp_tmit_chunk);
+
+/* The lower byte is used to enumerate PR_SCTP policies */
+#define CHUNK_FLAGS_PR_SCTP_TTL	        SCTP_PR_SCTP_TTL
+#define CHUNK_FLAGS_PR_SCTP_BUF	        SCTP_PR_SCTP_BUF
+#define CHUNK_FLAGS_PR_SCTP_RTX         SCTP_PR_SCTP_RTX
+
+/* The upper byte is used a a bit mask */
+#define CHUNK_FLAGS_FRAGMENT_OK	        0x0100
+
+struct chk_id {
+	uint16_t id;
+	uint16_t can_take_data;
+};
+
+
+struct sctp_tmit_chunk {
+	union {
+		struct sctp_data_chunkrec data;
+		struct chk_id chunk_id;
+	}     rec;
+	struct sctp_association *asoc;	/* bp to asoc this belongs to */
+	struct timeval sent_rcv_time;	/* filled in if RTT being calculated */
+	struct mbuf *data;	/* pointer to mbuf chain of data */
+	struct mbuf *last_mbuf;	/* pointer to last mbuf in chain */
+	struct sctp_nets *whoTo;
+	          TAILQ_ENTRY(sctp_tmit_chunk) sctp_next;	/* next link */
+	int32_t sent;		/* the send status */
+	uint16_t snd_count;	/* number of times I sent */
+	uint16_t flags;		/* flags, such as FRAGMENT_OK */
+	uint16_t send_size;
+	uint16_t book_size;
+	uint16_t mbcnt;
+	uint8_t pad_inplace;
+	uint8_t do_rtt;
+	uint8_t book_size_scale;
+	uint8_t addr_over;	/* flag which is set if the dest address for
+				 * this chunk is overridden by user. Used for
+				 * CMT (iyengar at cis.udel.edu, 2005/06/21) */
+	uint8_t no_fr_allowed;
+	uint8_t pr_sctp_on;
+	uint8_t copy_by_ref;
+	uint8_t window_probe;
+};
+
+/*
+ * The first part of this structure MUST be the entire sinfo structure. Maybe
+ * I should have made it a sub structure... we can circle back later and do
+ * that if we want.
+ */
+struct sctp_queued_to_read {	/* sinfo structure Pluse more */
+	uint16_t sinfo_stream;	/* off the wire */
+	uint16_t sinfo_ssn;	/* off the wire */
+	uint16_t sinfo_flags;	/* SCTP_UNORDERED from wire use SCTP_EOF for
+				 * EOR */
+	uint32_t sinfo_ppid;	/* off the wire */
+	uint32_t sinfo_context;	/* pick this up from assoc def context? */
+	uint32_t sinfo_timetolive;	/* not used by kernel */
+	uint32_t sinfo_tsn;	/* Use this in reassembly as first TSN */
+	uint32_t sinfo_cumtsn;	/* Use this in reassembly as last TSN */
+	sctp_assoc_t sinfo_assoc_id;	/* our assoc id */
+	/* Non sinfo stuff */
+	uint32_t length;	/* length of data */
+	uint32_t held_length;	/* length held in sb */
+	struct sctp_nets *whoFrom;	/* where it came from */
+	struct mbuf *data;	/* front of the mbuf chain of data with
+				 * PKT_HDR */
+	struct mbuf *tail_mbuf;	/* used for multi-part data */
+	struct mbuf *aux_data;	/* used to hold/cache  control if o/s does not
+				 * take it from us */
+	struct sctp_tcb *stcb;	/* assoc, used for window update */
+	         TAILQ_ENTRY(sctp_queued_to_read) next;
+	uint16_t port_from;
+	uint16_t spec_flags;	/* Flags to hold the notification field */
+	uint8_t do_not_ref_stcb;
+	uint8_t end_added;
+	uint8_t pdapi_aborted;
+	uint8_t some_taken;
+};
+
+/* This data structure will be on the outbound
+ * stream queues. Data will be pulled off from
+ * the front of the mbuf data and chunk-ified
+ * by the output routines. We will custom
+ * fit every chunk we pull to the send/sent
+ * queue to make up the next full packet
+ * if we can. An entry cannot be removed
+ * from the stream_out queue until
+ * the msg_is_complete flag is set. This
+ * means at times data/tail_mbuf MIGHT
+ * be NULL.. If that occurs it happens
+ * for one of two reasons. Either the user
+ * is blocked on a send() call and has not
+ * awoken to copy more data down... OR
+ * the user is in the explict MSG_EOR mode
+ * and wrote some data, but has not completed
+ * sending.
+ */
+struct sctp_stream_queue_pending {
+	struct mbuf *data;
+	struct mbuf *tail_mbuf;
+	struct timeval ts;
+	struct sctp_nets *net;
+	          TAILQ_ENTRY(sctp_stream_queue_pending) next;
+	uint32_t length;
+	uint32_t timetolive;
+	uint32_t ppid;
+	uint32_t context;
+	uint16_t sinfo_flags;
+	uint16_t stream;
+	uint16_t strseq;
+	uint16_t act_flags;
+	uint8_t msg_is_complete;
+	uint8_t some_taken;
+	uint8_t addr_over;
+	uint8_t pr_sctp_on;
+	uint8_t sender_all_done;
+	uint8_t put_last_out;
+};
+
+/*
+ * this struct contains info that is used to track inbound stream data and
+ * help with ordering.
+ */
+TAILQ_HEAD(sctpwheelunrel_listhead, sctp_stream_in);
+struct sctp_stream_in {
+	struct sctp_readhead inqueue;
+	uint16_t stream_no;
+	uint16_t last_sequence_delivered;	/* used for re-order */
+	uint8_t delivery_started;
+};
+
+/* This struct is used to track the traffic on outbound streams */
+TAILQ_HEAD(sctpwheel_listhead, sctp_stream_out);
+struct sctp_stream_out {
+	struct sctp_streamhead outqueue;
+	                TAILQ_ENTRY(sctp_stream_out) next_spoke;	/* next link in wheel */
+	uint16_t stream_no;
+	uint16_t next_sequence_sent;	/* next one I expect to send out */
+	uint8_t last_msg_incomplete;
+};
+
+/* used to keep track of the addresses yet to try to add/delete */
+TAILQ_HEAD(sctp_asconf_addrhead, sctp_asconf_addr);
+struct sctp_asconf_addr {
+	TAILQ_ENTRY(sctp_asconf_addr) next;
+	struct sctp_asconf_addr_param ap;
+	struct sctp_ifa *ifa;	/* save the ifa for add/del ip */
+	uint8_t sent;		/* has this been sent yet? */
+};
+
+struct sctp_scoping {
+	uint8_t ipv4_addr_legal;
+	uint8_t ipv6_addr_legal;
+	uint8_t loopback_scope;
+	uint8_t ipv4_local_scope;
+	uint8_t local_scope;
+	uint8_t site_scope;
+};
+
+#define SCTP_TSN_LOG_SIZE 40
+
+struct sctp_tsn_log {
+	void *stcb;
+	uint32_t tsn;
+	uint16_t strm;
+	uint16_t seq;
+	uint16_t sz;
+	uint16_t flgs;
+	uint16_t in_pos;
+	uint16_t in_out;
+};
+
+#define SCTP_FS_SPEC_LOG_SIZE 200
+struct sctp_fs_spec_log {
+	uint32_t sent;
+	uint32_t total_flight;
+	uint32_t tsn;
+	uint16_t book;
+	uint8_t incr;
+	uint8_t decr;
+};
+
+/* This struct is here to cut out the compatiabilty
+ * pad that bulks up both the inp and stcb. The non
+ * pad portion MUST stay in complete sync with
+ * sctp_sndrcvinfo... i.e. if sinfo_xxxx is added
+ * this must be done here too.
+ */
+struct sctp_nonpad_sndrcvinfo {
+	uint16_t sinfo_stream;
+	uint16_t sinfo_ssn;
+	uint16_t sinfo_flags;
+	uint32_t sinfo_ppid;
+	uint32_t sinfo_context;
+	uint32_t sinfo_timetolive;
+	uint32_t sinfo_tsn;
+	uint32_t sinfo_cumtsn;
+	sctp_assoc_t sinfo_assoc_id;
+};
+
+/*
+ * JRS - Structure to hold function pointers to the functions responsible
+ * for congestion control.
+ */
+
+struct sctp_cc_functions {
+	void (*sctp_set_initial_cc_param) (struct sctp_tcb *stcb, struct sctp_nets *net);
+	void (*sctp_cwnd_update_after_sack) (struct sctp_tcb *stcb,
+	         struct sctp_association *asoc,
+	         int accum_moved, int reneged_all, int will_exit);
+	void (*sctp_cwnd_update_after_fr) (struct sctp_tcb *stcb,
+	         struct sctp_association *asoc);
+	void (*sctp_cwnd_update_after_timeout) (struct sctp_tcb *stcb,
+	         struct sctp_nets *net);
+	void (*sctp_cwnd_update_after_ecn_echo) (struct sctp_tcb *stcb,
+	         struct sctp_nets *net);
+	void (*sctp_cwnd_update_after_packet_dropped) (struct sctp_tcb *stcb,
+	         struct sctp_nets *net, struct sctp_pktdrop_chunk *cp,
+	         uint32_t * bottle_bw, uint32_t * on_queue);
+	void (*sctp_cwnd_update_after_output) (struct sctp_tcb *stcb,
+	         struct sctp_nets *net, int burst_limit);
+	void (*sctp_cwnd_update_after_fr_timer) (struct sctp_inpcb *inp,
+	         struct sctp_tcb *stcb, struct sctp_nets *net);
+};
+
+/* used to save ASCONF-ACK chunks for retransmission */
+TAILQ_HEAD(sctp_asconf_ackhead, sctp_asconf_ack);
+struct sctp_asconf_ack {
+	TAILQ_ENTRY(sctp_asconf_ack) next;
+	uint32_t serial_number;
+	struct sctp_nets *last_sent_to;
+	struct mbuf *data;
+	uint16_t len;
+};
+
+/*
+ * Here we have information about each individual association that we track.
+ * We probably in production would be more dynamic. But for ease of
+ * implementation we will have a fixed array that we hunt for in a linear
+ * fashion.
+ */
+struct sctp_association {
+	/* association state */
+	int state;
+
+	/* queue of pending addrs to add/delete */
+	struct sctp_asconf_addrhead asconf_queue;
+
+	struct timeval time_entered;	/* time we entered state */
+	struct timeval time_last_rcvd;
+	struct timeval time_last_sent;
+	struct timeval time_last_sat_advance;
+	struct sctp_nonpad_sndrcvinfo def_send;
+
+	/* timers and such */
+	struct sctp_timer hb_timer;	/* hb timer */
+	struct sctp_timer dack_timer;	/* Delayed ack timer */
+	struct sctp_timer asconf_timer;	/* asconf */
+	struct sctp_timer strreset_timer;	/* stream reset */
+	struct sctp_timer shut_guard_timer;	/* shutdown guard */
+	struct sctp_timer autoclose_timer;	/* automatic close timer */
+	struct sctp_timer delayed_event_timer;	/* timer for delayed events */
+	struct sctp_timer delete_prim_timer;	/* deleting primary dst */
+
+	/* list of restricted local addresses */
+	struct sctpladdr sctp_restricted_addrs;
+
+	/* last local address pending deletion (waiting for an address add) */
+	struct sctp_ifa *asconf_addr_del_pending;
+	/* Deleted primary destination (used to stop timer) */
+	struct sctp_nets *deleted_primary;
+
+	struct sctpnetlisthead nets;	/* remote address list */
+
+	/* Free chunk list */
+	struct sctpchunk_listhead free_chunks;
+
+	/* Control chunk queue */
+	struct sctpchunk_listhead control_send_queue;
+
+	/*
+	 * Once a TSN hits the wire it is moved to the sent_queue. We
+	 * maintain two counts here (don't know if any but retran_cnt is
+	 * needed). The idea is that the sent_queue_retran_cnt reflects how
+	 * many chunks have been marked for retranmission by either T3-rxt
+	 * or FR.
+	 */
+	struct sctpchunk_listhead sent_queue;
+	struct sctpchunk_listhead send_queue;
+
+	/* re-assembly queue for fragmented chunks on the inbound path */
+	struct sctpchunk_listhead reasmqueue;
+
+	/*
+	 * this queue is used when we reach a condition that we can NOT put
+	 * data into the socket buffer. We track the size of this queue and
+	 * set our rwnd to the space in the socket minus also the
+	 * size_on_delivery_queue.
+	 */
+	struct sctpwheel_listhead out_wheel;
+
+	/*
+	 * This pointer will be set to NULL most of the time. But when we
+	 * have a fragmented message, where we could not get out all of the
+	 * message at the last send then this will point to the stream to go
+	 * get data from.
+	 */
+	struct sctp_stream_out *locked_on_sending;
+
+	/* If an iterator is looking at me, this is it */
+	struct sctp_iterator *stcb_starting_point_for_iterator;
+
+	/* ASCONF save the last ASCONF-ACK so we can resend it if necessary */
+	struct sctp_asconf_ackhead asconf_ack_sent;
+
+	/*
+	 * pointer to last stream reset queued to control queue by us with
+	 * requests.
+	 */
+	struct sctp_tmit_chunk *str_reset;
+	/*
+	 * if Source Address Selection happening, this will rotate through
+	 * the link list.
+	 */
+	struct sctp_laddr *last_used_address;
+
+	/* stream arrays */
+	struct sctp_stream_in *strmin;
+	struct sctp_stream_out *strmout;
+	uint8_t *mapping_array;
+	/* primary destination to use */
+	struct sctp_nets *primary_destination;
+	/* For CMT */
+	struct sctp_nets *last_net_data_came_from;
+	/* last place I got a data chunk from */
+	struct sctp_nets *last_data_chunk_from;
+	/* last place I got a control from */
+	struct sctp_nets *last_control_chunk_from;
+
+	/* circular looking for output selection */
+	struct sctp_stream_out *last_out_stream;
+
+	/*
+	 * wait to the point the cum-ack passes req->send_reset_at_tsn for
+	 * any req on the list.
+	 */
+	struct sctp_resethead resetHead;
+
+	/* queue of chunks waiting to be sent into the local stack */
+	struct sctp_readhead pending_reply_queue;
+
+	/* JRS - the congestion control functions are in this struct */
+	struct sctp_cc_functions cc_functions;
+	/*
+	 * JRS - value to store the currently loaded congestion control
+	 * module
+	 */
+	uint32_t congestion_control_module;
+
+	uint32_t vrf_id;
+
+	uint32_t cookie_preserve_req;
+	/* ASCONF next seq I am sending out, inits at init-tsn */
+	uint32_t asconf_seq_out;
+	/* ASCONF last received ASCONF from peer, starts at peer's TSN-1 */
+	uint32_t asconf_seq_in;
+
+	/* next seq I am sending in str reset messages */
+	uint32_t str_reset_seq_out;
+	/* next seq I am expecting in str reset messages */
+	uint32_t str_reset_seq_in;
+
+	/* various verification tag information */
+	uint32_t my_vtag;	/* The tag to be used. if assoc is re-initited
+				 * by remote end, and I have unlocked this
+				 * will be regenerated to a new random value. */
+	uint32_t peer_vtag;	/* The peers last tag */
+
+	uint32_t my_vtag_nonce;
+	uint32_t peer_vtag_nonce;
+
+	uint32_t assoc_id;
+
+	/* This is the SCTP fragmentation threshold */
+	uint32_t smallest_mtu;
+
+	/*
+	 * Special hook for Fast retransmit, allows us to track the highest
+	 * TSN that is NEW in this SACK if gap ack blocks are present.
+	 */
+	uint32_t this_sack_highest_gap;
+
+	/*
+	 * The highest consecutive TSN that has been acked by peer on my
+	 * sends
+	 */
+	uint32_t last_acked_seq;
+
+	/* The next TSN that I will use in sending. */
+	uint32_t sending_seq;
+
+	/* Original seq number I used ??questionable to keep?? */
+	uint32_t init_seq_number;
+
+
+	/* The Advanced Peer Ack Point, as required by the PR-SCTP */
+	/* (A1 in Section 4.2) */
+	uint32_t advanced_peer_ack_point;
+
+	/*
+	 * The highest consequetive TSN at the bottom of the mapping array
+	 * (for his sends).
+	 */
+	uint32_t cumulative_tsn;
+	/*
+	 * Used to track the mapping array and its offset bits. This MAY be
+	 * lower then cumulative_tsn.
+	 */
+	uint32_t mapping_array_base_tsn;
+	/*
+	 * used to track highest TSN we have received and is listed in the
+	 * mapping array.
+	 */
+	uint32_t highest_tsn_inside_map;
+
+	uint32_t last_echo_tsn;
+	uint32_t last_cwr_tsn;
+	uint32_t fast_recovery_tsn;
+	uint32_t sat_t3_recovery_tsn;
+	uint32_t tsn_last_delivered;
+	/*
+	 * For the pd-api we should re-write this a bit more efficent. We
+	 * could have multiple sctp_queued_to_read's that we are building at
+	 * once. Now we only do this when we get ready to deliver to the
+	 * socket buffer. Note that we depend on the fact that the struct is
+	 * "stuck" on the read queue until we finish all the pd-api.
+	 */
+	struct sctp_queued_to_read *control_pdapi;
+
+	uint32_t tsn_of_pdapi_last_delivered;
+	uint32_t pdapi_ppid;
+	uint32_t context;
+	uint32_t last_reset_action[SCTP_MAX_RESET_PARAMS];
+	uint32_t last_sending_seq[SCTP_MAX_RESET_PARAMS];
+	uint32_t last_base_tsnsent[SCTP_MAX_RESET_PARAMS];
+#ifdef SCTP_ASOCLOG_OF_TSNS
+	/*
+	 * special log  - This adds considerable size to the asoc, but
+	 * provides a log that you can use to detect problems via kgdb.
+	 */
+	struct sctp_tsn_log in_tsnlog[SCTP_TSN_LOG_SIZE];
+	struct sctp_tsn_log out_tsnlog[SCTP_TSN_LOG_SIZE];
+	uint32_t cumack_log[SCTP_TSN_LOG_SIZE];
+	uint32_t cumack_logsnt[SCTP_TSN_LOG_SIZE];
+	uint16_t tsn_in_at;
+	uint16_t tsn_out_at;
+	uint16_t tsn_in_wrapped;
+	uint16_t tsn_out_wrapped;
+	uint16_t cumack_log_at;
+	uint16_t cumack_log_atsnt;
+#endif				/* SCTP_ASOCLOG_OF_TSNS */
+#ifdef SCTP_FS_SPEC_LOG
+	struct sctp_fs_spec_log fslog[SCTP_FS_SPEC_LOG_SIZE];
+	uint16_t fs_index;
+#endif
+
+	/*
+	 * window state information and smallest MTU that I use to bound
+	 * segmentation
+	 */
+	uint32_t peers_rwnd;
+	uint32_t my_rwnd;
+	uint32_t my_last_reported_rwnd;
+	uint32_t sctp_frag_point;
+
+	uint32_t total_output_queue_size;
+
+	uint32_t sb_cc;		/* shadow of sb_cc */
+	uint32_t sb_send_resv;	/* amount reserved on a send */
+	uint32_t my_rwnd_control_len;	/* shadow of sb_mbcnt used for rwnd
+					 * control */
+	/* 32 bit nonce stuff */
+	uint32_t nonce_resync_tsn;
+	uint32_t nonce_wait_tsn;
+	uint32_t default_flowlabel;
+	uint32_t pr_sctp_cnt;
+	int ctrl_queue_cnt;	/* could be removed  REM */
+	/*
+	 * All outbound datagrams queue into this list from the individual
+	 * stream queue. Here they get assigned a TSN and then await
+	 * sending. The stream seq comes when it is first put in the
+	 * individual str queue
+	 */
+	unsigned int stream_queue_cnt;
+	unsigned int send_queue_cnt;
+	unsigned int sent_queue_cnt;
+	unsigned int sent_queue_cnt_removeable;
+	/*
+	 * Number on sent queue that are marked for retran until this value
+	 * is 0 we only send one packet of retran'ed data.
+	 */
+	unsigned int sent_queue_retran_cnt;
+
+	unsigned int size_on_reasm_queue;
+	unsigned int cnt_on_reasm_queue;
+	/* amount of data (bytes) currently in flight (on all destinations) */
+	unsigned int total_flight;
+	/* Total book size in flight */
+	unsigned int total_flight_count;	/* count of chunks used with
+						 * book total */
+	/* count of destinaton nets and list of destination nets */
+	unsigned int numnets;
+
+	/* Total error count on this association */
+	unsigned int overall_error_count;
+
+	unsigned int cnt_msg_on_sb;
+
+	/* All stream count of chunks for delivery */
+	unsigned int size_on_all_streams;
+	unsigned int cnt_on_all_streams;
+
+	/* Heart Beat delay in ticks */
+	unsigned int heart_beat_delay;
+
+	/* autoclose */
+	unsigned int sctp_autoclose_ticks;
+
+	/* how many preopen streams we have */
+	unsigned int pre_open_streams;
+
+	/* How many streams I support coming into me */
+	unsigned int max_inbound_streams;
+
+	/* the cookie life I award for any cookie, in seconds */
+	unsigned int cookie_life;
+	/* time to delay acks for */
+	unsigned int delayed_ack;
+	unsigned int old_delayed_ack;
+	unsigned int sack_freq;
+	unsigned int data_pkts_seen;
+
+	unsigned int numduptsns;
+	int dup_tsns[SCTP_MAX_DUP_TSNS];
+	unsigned int initial_init_rto_max;	/* initial RTO for INIT's */
+	unsigned int initial_rto;	/* initial send RTO */
+	unsigned int minrto;	/* per assoc RTO-MIN */
+	unsigned int maxrto;	/* per assoc RTO-MAX */
+
+	/* authentication fields */
+	sctp_auth_chklist_t *local_auth_chunks;
+	sctp_auth_chklist_t *peer_auth_chunks;
+	sctp_hmaclist_t *local_hmacs;	/* local HMACs supported */
+	sctp_hmaclist_t *peer_hmacs;	/* peer HMACs supported */
+	struct sctp_keyhead shared_keys;	/* assoc's shared keys */
+	sctp_authinfo_t authinfo;	/* randoms, cached keys */
+	/*
+	 * refcnt to block freeing when a sender or receiver is off coping
+	 * user data in.
+	 */
+	uint32_t refcnt;
+	uint32_t chunks_on_out_queue;	/* total chunks floating around,
+					 * locked by send socket buffer */
+	uint32_t peers_adaptation;
+	uint16_t peer_hmac_id;	/* peer HMAC id to send */
+
+	/*
+	 * Being that we have no bag to collect stale cookies, and that we
+	 * really would not want to anyway.. we will count them in this
+	 * counter. We of course feed them to the pigeons right away (I have
+	 * always thought of pigeons as flying rats).
+	 */
+	uint16_t stale_cookie_count;
+
+	/*
+	 * For the partial delivery API, if up, invoked this is what last
+	 * TSN I delivered
+	 */
+	uint16_t str_of_pdapi;
+	uint16_t ssn_of_pdapi;
+
+	/* counts of actual built streams. Allocation may be more however */
+	/* could re-arrange to optimize space here. */
+	uint16_t streamincnt;
+	uint16_t streamoutcnt;
+
+	/* my maximum number of retrans of INIT and SEND */
+	/* copied from SCTP but should be individually setable */
+	uint16_t max_init_times;
+	uint16_t max_send_times;
+
+	uint16_t def_net_failure;
+
+	/*
+	 * lock flag: 0 is ok to send, 1+ (duals as a retran count) is
+	 * awaiting ACK
+	 */
+	uint16_t asconf_sent;
+
+	uint16_t mapping_array_size;
+
+	uint16_t last_strm_seq_delivered;
+	uint16_t last_strm_no_delivered;
+
+	uint16_t last_revoke_count;
+	int16_t num_send_timers_up;
+
+	uint16_t stream_locked_on;
+	uint16_t ecn_echo_cnt_onq;
+
+	uint16_t free_chunk_cnt;
+
+	uint8_t stream_locked;
+	uint8_t authenticated;	/* packet authenticated ok */
+	/*
+	 * This flag indicates that a SACK need to be sent. Initially this
+	 * is 1 to send the first sACK immediately.
+	 */
+	uint8_t send_sack;
+
+	/* max burst after fast retransmit completes */
+	uint8_t max_burst;
+
+	uint8_t sat_network;	/* RTT is in range of sat net or greater */
+	uint8_t sat_network_lockout;	/* lockout code */
+	uint8_t burst_limit_applied;	/* Burst limit in effect at last send? */
+	/* flag goes on when we are doing a partial delivery api */
+	uint8_t hb_random_values[4];
+	uint8_t fragmented_delivery_inprogress;
+	uint8_t fragment_flags;
+	uint8_t last_flags_delivered;
+	uint8_t hb_ect_randombit;
+	uint8_t hb_random_idx;
+	uint8_t hb_is_disabled;	/* is the hb disabled? */
+	uint8_t default_tos;
+	uint8_t asconf_del_pending;	/* asconf delete last addr pending */
+
+	/* ECN Nonce stuff */
+	uint8_t receiver_nonce_sum;	/* nonce I sum and put in my sack */
+	uint8_t ecn_nonce_allowed;	/* Tells us if ECN nonce is on */
+	uint8_t nonce_sum_check;/* On off switch used during re-sync */
+	uint8_t nonce_wait_for_ecne;	/* flag when we expect a ECN */
+	uint8_t peer_supports_ecn_nonce;
+
+	/*
+	 * This value, plus all other ack'd but above cum-ack is added
+	 * together to cross check against the bit that we have yet to
+	 * define (probably in the SACK). When the cum-ack is updated, this
+	 * sum is updated as well.
+	 */
+	uint8_t nonce_sum_expect_base;
+	/* Flag to tell if ECN is allowed */
+	uint8_t ecn_allowed;
+
+	/* flag to indicate if peer can do asconf */
+	uint8_t peer_supports_asconf;
+	/* pr-sctp support flag */
+	uint8_t peer_supports_prsctp;
+	/* peer authentication support flag */
+	uint8_t peer_supports_auth;
+	/* stream resets are supported by the peer */
+	uint8_t peer_supports_strreset;
+
+	/*
+	 * packet drop's are supported by the peer, we don't really care
+	 * about this but we bookkeep it anyway.
+	 */
+	uint8_t peer_supports_pktdrop;
+
+	/* Do we allow V6/V4? */
+	uint8_t ipv4_addr_legal;
+	uint8_t ipv6_addr_legal;
+	/* Address scoping flags */
+	/* scope value for IPv4 */
+	uint8_t ipv4_local_scope;
+	/* scope values for IPv6 */
+	uint8_t local_scope;
+	uint8_t site_scope;
+	/* loopback scope */
+	uint8_t loopback_scope;
+	/* flags to handle send alternate net tracking */
+	uint8_t used_alt_onsack;
+	uint8_t used_alt_asconfack;
+	uint8_t fast_retran_loss_recovery;
+	uint8_t sat_t3_loss_recovery;
+	uint8_t dropped_special_cnt;
+	uint8_t seen_a_sack_this_pkt;
+	uint8_t stream_reset_outstanding;
+	uint8_t stream_reset_out_is_outstanding;
+	uint8_t delayed_connection;
+	uint8_t ifp_had_enobuf;
+	uint8_t saw_sack_with_frags;
+	uint8_t in_restart_hash;
+	uint8_t assoc_up_sent;
+	uint8_t adaptation_needed;
+	uint8_t adaptation_sent;
+	/* CMT variables */
+	uint8_t cmt_dac_pkts_rcvd;
+	uint8_t sctp_cmt_on_off;
+	uint8_t iam_blocking;
+	uint8_t cookie_how[8];
+	/* JRS 5/21/07 - CMT PF variable */
+	uint8_t sctp_cmt_pf;
+	/*
+	 * The mapping array is used to track out of order sequences above
+	 * last_acked_seq. 0 indicates packet missing 1 indicates packet
+	 * rec'd. We slide it up every time we raise last_acked_seq and 0
+	 * trailing locactions out.  If I get a TSN above the array
+	 * mappingArraySz, I discard the datagram and let retransmit happen.
+	 */
+	uint32_t marked_retrans;
+	uint32_t timoinit;
+	uint32_t timodata;
+	uint32_t timosack;
+	uint32_t timoshutdown;
+	uint32_t timoheartbeat;
+	uint32_t timocookie;
+	uint32_t timoshutdownack;
+	struct timeval start_time;
+	struct timeval discontinuity_time;
+};
+
+#endif
Index: igmp.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/igmp.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/igmp.h -L sys/netinet/igmp.h -u -r1.1.1.1 -r1.2
--- sys/netinet/igmp.h
+++ sys/netinet/igmp.h
@@ -31,7 +31,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)igmp.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/igmp.h,v 1.13 2005/01/07 01:45:44 imp Exp $
+ * $FreeBSD: src/sys/netinet/igmp.h,v 1.15 2007/06/15 18:59:10 bms Exp $
  */
 
 #ifndef _NETINET_IGMP_H_
@@ -55,7 +55,44 @@
 	struct in_addr	igmp_group;	/* group address being reported    */
 };					/*  (zero for queries)             */
 
-#define IGMP_MINLEN		     8
+struct igmpv3 {
+	u_char		igmp_type;	/* version & type of IGMP message  */
+	u_char		igmp_code;	/* subtype for routing msgs        */
+	u_short		igmp_cksum;	/* IP-style checksum               */
+	struct in_addr	igmp_group;	/* group address being reported    */
+					/*  (zero for queries)             */
+	u_char		igmp_misc;	/* reserved/suppress/robustness    */
+	u_char		igmp_qqi;	/* querier's query interval        */
+	u_short		igmp_numsrc;	/* number of sources               */
+	/*struct in_addr	igmp_sources[1];*/ /* source addresses */
+};
+
+struct igmp_grouprec {
+	u_char		ig_type;	/* record type */
+	u_char		ig_datalen;	/* length of auxiliary data */
+	u_short		ig_numsrc;	/* number of sources */
+	struct in_addr	ig_group;	/* group address being reported */
+	/*struct in_addr	ig_sources[1];*/ /* source addresses */
+};
+
+struct igmp_report {
+	u_char		ir_type;	/* record type */
+	u_char		ir_rsv1;	/* reserved */
+	u_short		ir_cksum;	/* checksum */
+	u_short		ir_rsv2;	/* reserved */
+	u_short		ir_numgrps;	/* number of group records */
+	struct		igmp_grouprec ir_groups[1];	/* group records */
+};
+
+#define IGMP_MINLEN			8
+#define IGMP_HDRLEN			8
+#define IGMP_GRPREC_HDRLEN		8
+#define IGMP_PREPEND			0
+
+#if 0
+#define IGMP_QRV(pigmp)			((pigmp)->igmp_misc & (0x07)) /* XXX */
+#define IGMP_MAXSOURCES(len)		(((len) - 12) >> 2) /* XXX */
+#endif
 
 /*
  * Message types, including version number.
@@ -71,6 +108,8 @@
 #define IGMP_MTRACE_RESP		0x1e  /* traceroute resp.(to sender)*/
 #define IGMP_MTRACE			0x1f  /* mcast traceroute messages  */
 
+#define IGMP_V3_MEMBERSHIP_REPORT	0x22	/* Ver. 3 membership report */
+
 #define IGMP_MAX_HOST_REPORT_DELAY   10    /* max delay for response to     */
 					   /*  query (in seconds) according */
 					   /*  to RFC1112                   */
--- /dev/null
+++ sys/netinet/sctp_bsd_addr.h
@@ -0,0 +1,59 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_bsd_addr.h,v 1.6 2007/05/29 09:29:02 rrs Exp $");
+
+#ifndef __sctp_bsd_addr_h__
+#define __sctp_bsd_addr_h__
+#include <netinet/sctp_pcb.h>
+
+#if defined(_KERNEL)
+
+#if defined(SCTP_USE_THREAD_BASED_ITERATOR)
+void sctp_wakeup_iterator(void);
+
+void sctp_startup_iterator(void);
+
+#endif
+
+void sctp_gather_internal_ifa_flags(struct sctp_ifa *ifa);
+
+#ifdef  SCTP_PACKET_LOGGING
+
+void sctp_packet_log(struct mbuf *m, int length);
+int sctp_copy_out_packet_log(uint8_t * target, int length);
+
+#endif
+
+void sctp_addr_change(struct ifaddr *ifa, int cmd);
+
+#endif
+#endif
--- /dev/null
+++ sys/netinet/sctp_crc32.h
@@ -0,0 +1,51 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_crc32.h,v 1.5 2004/08/17 04:06:16 itojun Exp $	 */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_crc32.h,v 1.3 2007/05/08 17:01:10 rrs Exp $");
+
+#ifndef __crc32c_h__
+#define __crc32c_h__
+
+#ifndef SCTP_USE_ADLER32
+
+#if defined(_KERNEL)
+uint32_t update_crc32(uint32_t, unsigned char *, unsigned int);
+
+uint32_t old_update_crc32(uint32_t, unsigned char *, unsigned int);
+
+uint32_t sctp_csum_finalize(uint32_t);
+
+
+#endif				/* _KERNEL */
+#endif				/* !SCTP_USE_ADLER32 */
+#endif				/* __crc32c_h__ */
Index: udp.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/udp.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/udp.h -L sys/netinet/udp.h -u -r1.1.1.1 -r1.2
--- sys/netinet/udp.h
+++ sys/netinet/udp.h
@@ -1,6 +1,7 @@
 /*-
  * Copyright (c) 1982, 1986, 1993
- *	The Regents of the University of California.  All rights reserved.
+ *	The Regents of the University of California.
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -27,14 +28,14 @@
  * SUCH DAMAGE.
  *
  *	@(#)udp.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/udp.h,v 1.9 2005/01/07 01:45:45 imp Exp $
+ * $FreeBSD: src/sys/netinet/udp.h,v 1.10 2007/02/20 10:13:11 rwatson Exp $
  */
 
 #ifndef _NETINET_UDP_H_
-#define _NETINET_UDP_H_
+#define	_NETINET_UDP_H_
 
 /*
- * Udp protocol header.
+ * UDP protocol header.
  * Per RFC 768, September, 1981.
  */
 struct udphdr {
Index: tcp_seq.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_seq.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/tcp_seq.h -L sys/netinet/tcp_seq.h -u -r1.1.1.1 -r1.2
--- sys/netinet/tcp_seq.h
+++ sys/netinet/tcp_seq.h
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp_seq.h	8.3 (Berkeley) 6/21/95
- * $FreeBSD: src/sys/netinet/tcp_seq.h,v 1.25 2005/04/10 05:24:59 ps Exp $
+ * $FreeBSD: src/sys/netinet/tcp_seq.h,v 1.26 2006/06/18 14:24:12 andre Exp $
  */
 
 #ifndef _NETINET_TCP_SEQ_H_
@@ -51,19 +51,6 @@
 #define TSTMP_GEQ(a,b)	((int)((a)-(b)) >= 0)
 
 /*
- * TCP connection counts are 32 bit integers operated
- * on with modular arithmetic.  These macros can be
- * used to compare such integers.
- */
-#define	CC_LT(a,b)	((int)((a)-(b)) < 0)
-#define	CC_LEQ(a,b)	((int)((a)-(b)) <= 0)
-#define	CC_GT(a,b)	((int)((a)-(b)) > 0)
-#define	CC_GEQ(a,b)	((int)((a)-(b)) >= 0)
-
-/* Macro to increment a CC: skip 0 which has a special meaning */
-#define CC_INC(c)	(++(c) == 0 ? ++(c) : (c))
-
-/*
  * Macros to initialize tcp sequence numbers for
  * send and receive from initial send and receive
  * sequence numbers.
Index: in.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/in.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/in.c -L sys/netinet/in.c -u -r1.1.1.2 -r1.2
--- sys/netinet/in.c
+++ sys/netinet/in.c
@@ -28,15 +28,18 @@
  * SUCH DAMAGE.
  *
  *	@(#)in.c	8.4 (Berkeley) 1/9/95
- * $FreeBSD: src/sys/netinet/in.c,v 1.85.2.6 2006/01/31 16:11:37 andre Exp $
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/in.c,v 1.102 2007/10/07 20:44:22 silby Exp $");
+
 #include "opt_carp.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sockio.h>
 #include <sys/malloc.h>
+#include <sys/priv.h>
 #include <sys/socket.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
@@ -48,10 +51,7 @@
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_pcb.h>
-
-#include <netinet/igmp_var.h>
-
-static MALLOC_DEFINE(M_IPMADDR, "in_multi", "internet multicast address");
+#include <netinet/ip_var.h>
 
 static int in_mask2len(struct in_addr *);
 static void in_len2mask(struct in_addr *, int);
@@ -63,6 +63,7 @@
 static void	in_socktrim(struct sockaddr_in *);
 static int	in_ifinit(struct ifnet *,
 	    struct in_ifaddr *, struct sockaddr_in *, int);
+static void	in_purgemaddrs(struct ifnet *);
 
 static int subnetsarelocal = 0;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, subnets_are_local, CTLFLAG_RW,
@@ -72,17 +73,6 @@
 	&sameprefixcarponly, 0,
 	"Refuse to create same prefixes on different interfaces");
 
-/*
- * The IPv4 multicast list (in_multihead and associated structures) are
- * protected by the global in_multi_mtx.  See in_var.h for more details.  For
- * now, in_multi_mtx is marked as recursible due to IGMP's calling back into
- * ip_output() to send IGMP packets while holding the lock; this probably is
- * not quite desirable.
- */
-struct in_multihead in_multihead; /* XXX BSS initialization */
-struct mtx in_multi_mtx;
-MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF | MTX_RECURSE);
-
 extern struct inpcbinfo ripcbinfo;
 extern struct inpcbinfo udbinfo;
 
@@ -93,8 +83,7 @@
  * Otherwise, it includes only the directly-connected (sub)nets.
  */
 int
-in_localaddr(in)
-	struct in_addr in;
+in_localaddr(struct in_addr in)
 {
 	register u_long i = ntohl(in.s_addr);
 	register struct in_ifaddr *ia;
@@ -116,8 +105,7 @@
  * on one of its interfaces.
  */
 int
-in_localip(in)
-	struct in_addr in;
+in_localip(struct in_addr in)
 {
 	struct in_ifaddr *ia;
 
@@ -134,13 +122,12 @@
  * may be forwarded.
  */
 int
-in_canforward(in)
-	struct in_addr in;
+in_canforward(struct in_addr in)
 {
 	register u_long i = ntohl(in.s_addr);
 	register u_long net;
 
-	if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i))
+	if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i) || IN_LINKLOCAL(i))
 		return (0);
 	if (IN_CLASSA(i)) {
 		net = i & IN_CLASSA_NET;
@@ -154,8 +141,7 @@
  * Trim a mask in a sockaddr
  */
 static void
-in_socktrim(ap)
-struct sockaddr_in *ap;
+in_socktrim(struct sockaddr_in *ap)
 {
     register char *cplim = (char *) &ap->sin_addr;
     register char *cp = (char *) (&ap->sin_addr + 1);
@@ -191,9 +177,7 @@
 }
 
 static void
-in_len2mask(mask, len)
-	struct in_addr *mask;
-	int len;
+in_len2mask(struct in_addr *mask, int len)
 {
 	int i;
 	u_char *p;
@@ -212,30 +196,45 @@
  */
 /* ARGSUSED */
 int
-in_control(so, cmd, data, ifp, td)
-	struct socket *so;
-	u_long cmd;
-	caddr_t data;
-	register struct ifnet *ifp;
-	struct thread *td;
+in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
+    struct thread *td)
 {
 	register struct ifreq *ifr = (struct ifreq *)data;
 	register struct in_ifaddr *ia = 0, *iap;
 	register struct ifaddr *ifa;
+	struct in_addr allhosts_addr;
 	struct in_addr dst;
 	struct in_ifaddr *oia;
 	struct in_aliasreq *ifra = (struct in_aliasreq *)data;
 	struct sockaddr_in oldaddr;
 	int error, hostIsNew, iaIsNew, maskIsNew, s;
+	int iaIsFirst;
 
+	iaIsFirst = 0;
 	iaIsNew = 0;
+	allhosts_addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP);
 
 	switch (cmd) {
 	case SIOCALIFADDR:
+		if (td != NULL) {
+			error = priv_check(td, PRIV_NET_ADDIFADDR);
+			if (error)
+				return (error);
+		}
+		if (!ifp)
+			return EINVAL;
+		return in_lifaddr_ioctl(so, cmd, data, ifp, td);
+
 	case SIOCDLIFADDR:
-		if (td && (error = suser(td)) != 0)
-			return error;
-		/*fall through*/
+		if (td != NULL) {
+			error = priv_check(td, PRIV_NET_DELIFADDR);
+			if (error)
+				return (error);
+		}
+		if (!ifp)
+			return EINVAL;
+		return in_lifaddr_ioctl(so, cmd, data, ifp, td);
+
 	case SIOCGLIFADDR:
 		if (!ifp)
 			return EINVAL;
@@ -264,6 +263,8 @@
 					break;
 				}
 			}
+		if (ia == NULL)
+			iaIsFirst = 1;
 	}
 
 	switch (cmd) {
@@ -292,8 +293,11 @@
 	case SIOCSIFADDR:
 	case SIOCSIFNETMASK:
 	case SIOCSIFDSTADDR:
-		if (td && (error = suser(td)) != 0)
-			return error;
+		if (td != NULL) {
+			error = priv_check(td, PRIV_NET_ADDIFADDR);
+			if (error)
+				return (error);
+		}
 
 		if (ifp == 0)
 			return (EADDRNOTAVAIL);
@@ -330,8 +334,11 @@
 		break;
 
 	case SIOCSIFBRDADDR:
-		if (td && (error = suser(td)) != 0)
-			return error;
+		if (td != NULL) {
+			error = priv_check(td, PRIV_NET_ADDIFADDR);
+			if (error)
+				return (error);
+		}
 		/* FALLTHROUGH */
 
 	case SIOCGIFADDR:
@@ -399,8 +406,11 @@
 		    (struct sockaddr_in *) &ifr->ifr_addr, 1);
 		if (error != 0 && iaIsNew)
 			break;
-		if (error == 0)
+		if (error == 0) {
+			if (iaIsFirst && (ifp->if_flags & IFF_MULTICAST) != 0)
+				in_addmulti(&allhosts_addr, ifp);
 			EVENTHANDLER_INVOKE(ifaddr_event, ifp);
+		}
 		return (0);
 
 	case SIOCSIFNETMASK:
@@ -443,8 +453,11 @@
 		if ((ifp->if_flags & IFF_BROADCAST) &&
 		    (ifra->ifra_broadaddr.sin_family == AF_INET))
 			ia->ia_broadaddr = ifra->ifra_broadaddr;
-		if (error == 0)
+		if (error == 0) {
+			if (iaIsFirst && (ifp->if_flags & IFF_MULTICAST) != 0)
+				in_addmulti(&allhosts_addr, ifp);
 			EVENTHANDLER_INVOKE(ifaddr_event, ifp);
+		}
 		return (error);
 
 	case SIOCDIFADDR:
@@ -479,8 +492,27 @@
 	s = splnet();
 	TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
 	TAILQ_REMOVE(&in_ifaddrhead, ia, ia_link);
-	if (ia->ia_addr.sin_family == AF_INET)
+	if (ia->ia_addr.sin_family == AF_INET) {
 		LIST_REMOVE(ia, ia_hash);
+		/*
+		 * If this is the last IPv4 address configured on this
+		 * interface, leave the all-hosts group.
+		 * XXX: This is quite ugly because of locking and structure.
+		 */
+		oia = NULL;
+		IFP_TO_IA(ifp, oia);
+		if (oia == NULL) {
+			struct in_multi *inm;
+
+			IFF_LOCKGIANT(ifp);
+			IN_MULTI_LOCK();
+			IN_LOOKUP_MULTI(allhosts_addr, ifp, inm);
+			if (inm != NULL)
+				in_delmulti_locked(inm);
+			IN_MULTI_UNLOCK();
+			IFF_UNLOCKGIANT(ifp);
+		}
+	}
 	IFAFREE(&ia->ia_ifa);
 	splx(s);
 
@@ -504,12 +536,8 @@
  *	other values may be returned from in_ioctl()
  */
 static int
-in_lifaddr_ioctl(so, cmd, data, ifp, td)
-	struct socket *so;
-	u_long cmd;
-	caddr_t	data;
-	struct ifnet *ifp;
-	struct thread *td;
+in_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
+    struct ifnet *ifp, struct thread *td)
 {
 	struct if_laddrreq *iflr = (struct if_laddrreq *)data;
 	struct ifaddr *ifa;
@@ -579,9 +607,9 @@
 		struct in_ifaddr *ia;
 		struct in_addr mask, candidate, match;
 		struct sockaddr_in *sin;
-		int cmp;
 
 		bzero(&mask, sizeof(mask));
+		bzero(&match, sizeof(match));
 		if (iflr->flags & IFLR_PREFIX) {
 			/* lookup a prefix rather than address. */
 			in_len2mask(&mask, iflr->prefixlen);
@@ -594,25 +622,20 @@
 			if (match.s_addr != sin->sin_addr.s_addr)
 				return EINVAL;
 
-			cmp = 1;
 		} else {
-			if (cmd == SIOCGLIFADDR) {
-				/* on getting an address, take the 1st match */
-				cmp = 0;	/*XXX*/
-			} else {
-				/* on deleting an address, do exact match */
+			/* on getting an address, take the 1st match */
+			/* on deleting an address, do exact match */
+			if (cmd != SIOCGLIFADDR) {
 				in_len2mask(&mask, 32);
 				sin = (struct sockaddr_in *)&iflr->addr;
 				match.s_addr = sin->sin_addr.s_addr;
-
-				cmp = 1;
 			}
 		}
 
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)	{
 			if (ifa->ifa_addr->sa_family != AF_INET6)
 				continue;
-			if (!cmp)
+			if (match.s_addr == 0)
 				break;
 			candidate.s_addr = ((struct sockaddr_in *)&ifa->ifa_addr)->sin_addr.s_addr;
 			candidate.s_addr &= mask.s_addr;
@@ -669,10 +692,9 @@
  * Delete any existing route for an interface.
  */
 void
-in_ifscrub(ifp, ia)
-	register struct ifnet *ifp;
-	register struct in_ifaddr *ia;
+in_ifscrub(struct ifnet *ifp, struct in_ifaddr *ia)
 {
+
 	in_scrubprefix(ia);
 }
 
@@ -681,11 +703,8 @@
  * and routing table entry.
  */
 static int
-in_ifinit(ifp, ia, sin, scrub)
-	register struct ifnet *ifp;
-	register struct in_ifaddr *ia;
-	struct sockaddr_in *sin;
-	int scrub;
+in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
+    int scrub)
 {
 	register u_long i = ntohl(sin->sin_addr.s_addr);
 	struct sockaddr_in oldaddr;
@@ -769,16 +788,6 @@
 	if ((error = in_addprefix(ia, flags)) != 0)
 		return (error);
 
-	/*
-	 * If the interface supports multicast, join the "all hosts"
-	 * multicast group on that interface.
-	 */
-	if (ifp->if_flags & IFF_MULTICAST) {
-		struct in_addr addr;
-
-		addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP);
-		in_addmulti(&addr, ifp);
-	}
 	return (error);
 }
 
@@ -786,21 +795,19 @@
 	((((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) != 0) \
 	    ? RTF_HOST : 0)
 /*
- * Check if we have a route for the given prefix already or add a one
- * accordingly.
+ * Check if we have a route for the given prefix already or add one accordingly.
  */
 static int
-in_addprefix(target, flags)
-	struct in_ifaddr *target;
-	int flags;
+in_addprefix(struct in_ifaddr *target, int flags)
 {
 	struct in_ifaddr *ia;
 	struct in_addr prefix, mask, p, m;
 	int error;
 
-	if ((flags & RTF_HOST) != 0)
+	if ((flags & RTF_HOST) != 0) {
 		prefix = target->ia_dstaddr.sin_addr;
-	else {
+		mask.s_addr = 0;
+	} else {
 		prefix = target->ia_addr.sin_addr;
 		mask = target->ia_sockmask.sin_addr;
 		prefix.s_addr &= mask.s_addr;
@@ -851,8 +858,7 @@
  * otherwise.
  */
 static int
-in_scrubprefix(target)
-	struct in_ifaddr *target;
+in_scrubprefix(struct in_ifaddr *target)
 {
 	struct in_ifaddr *ia;
 	struct in_addr prefix, mask, p;
@@ -918,9 +924,7 @@
  * Return 1 if the address might be a local broadcast address.
  */
 int
-in_broadcast(in, ifp)
-	struct in_addr in;
-	struct ifnet *ifp;
+in_broadcast(struct in_addr in, struct ifnet *ifp)
 {
 	register struct ifaddr *ifa;
 	u_long t;
@@ -954,102 +958,26 @@
 	return (0);
 #undef ia
 }
-/*
- * Add an address to the list of IP multicast addresses for a given interface.
- */
-struct in_multi *
-in_addmulti(ap, ifp)
-	register struct in_addr *ap;
-	register struct ifnet *ifp;
-{
-	register struct in_multi *inm;
-	int error;
-	struct sockaddr_in sin;
-	struct ifmultiaddr *ifma;
-
-	IFF_LOCKGIANT(ifp);
-	IN_MULTI_LOCK();
-	/*
-	 * Call generic routine to add membership or increment
-	 * refcount.  It wants addresses in the form of a sockaddr,
-	 * so we build one here (being careful to zero the unused bytes).
-	 */
-	bzero(&sin, sizeof sin);
-	sin.sin_family = AF_INET;
-	sin.sin_len = sizeof sin;
-	sin.sin_addr = *ap;
-	error = if_addmulti(ifp, (struct sockaddr *)&sin, &ifma);
-	if (error) {
-		IN_MULTI_UNLOCK();
-		IFF_UNLOCKGIANT(ifp);
-		return 0;
-	}
-
-	/*
-	 * If ifma->ifma_protospec is null, then if_addmulti() created
-	 * a new record.  Otherwise, we are done.
-	 */
-	if (ifma->ifma_protospec != NULL) {
-		IN_MULTI_UNLOCK();
-		IFF_UNLOCKGIANT(ifp);
-		return ifma->ifma_protospec;
-	}
-
-	inm = (struct in_multi *)malloc(sizeof(*inm), M_IPMADDR,
-	    M_NOWAIT | M_ZERO);
-	if (inm == NULL) {
-		IN_MULTI_UNLOCK();
-		IFF_UNLOCKGIANT(ifp);
-		return (NULL);
-	}
-
-	inm->inm_addr = *ap;
-	inm->inm_ifp = ifp;
-	inm->inm_ifma = ifma;
-	ifma->ifma_protospec = inm;
-	LIST_INSERT_HEAD(&in_multihead, inm, inm_link);
-
-	/*
-	 * Let IGMP know that we have joined a new IP multicast group.
-	 */
-	igmp_joingroup(inm);
-	IN_MULTI_UNLOCK();
-	IFF_UNLOCKGIANT(ifp);
-	return (inm);
-}
 
 /*
- * Delete a multicast address record.
+ * Delete all IPv4 multicast address records, and associated link-layer
+ * multicast address records, associated with ifp.
  */
-void
-in_delmulti(inm)
-	register struct in_multi *inm;
+static void
+in_purgemaddrs(struct ifnet *ifp)
 {
-	struct ifmultiaddr *ifma;
-	struct in_multi my_inm;
-	struct ifnet *ifp;
+	struct in_multi *inm;
+	struct in_multi *oinm;
 
-	ifp = inm->inm_ifp;
+#ifdef DIAGNOSTIC
+	printf("%s: purging ifp %p\n", __func__, ifp);
+#endif
 	IFF_LOCKGIANT(ifp);
 	IN_MULTI_LOCK();
-	ifma = inm->inm_ifma;
-	my_inm.inm_ifp = NULL ; /* don't send the leave msg */
-	if (ifma->ifma_refcount == 1) {
-		/*
-		 * No remaining claims to this record; let IGMP know that
-		 * we are leaving the multicast group.
-		 * But do it after the if_delmulti() which might reset
-		 * the interface and nuke the packet.
-		 */
-		my_inm = *inm ;
-		ifma->ifma_protospec = NULL;
-		LIST_REMOVE(inm, inm_link);
-		free(inm, M_IPMADDR);
-	}
-	/* XXX - should be separate API for when we have an ifma? */
-	if_delmulti(ifma->ifma_ifp, ifma->ifma_addr);
-	if (my_inm.inm_ifp != NULL)
-		igmp_leavegroup(&my_inm);
+	LIST_FOREACH_SAFE(inm, &in_multihead, inm_link, oinm) {
+		if (inm->inm_ifp == ifp)
+			in_delmulti_locked(inm);
+	}
 	IN_MULTI_UNLOCK();
 	IFF_UNLOCKGIANT(ifp);
 }
@@ -1058,10 +986,10 @@
  * On interface removal, clean up IPv4 data structures hung off of the ifnet.
  */
 void
-in_ifdetach(ifp)
-	struct ifnet *ifp;
+in_ifdetach(struct ifnet *ifp)
 {
 
 	in_pcbpurgeif0(&ripcbinfo, ifp);
 	in_pcbpurgeif0(&udbinfo, ifp);
+	in_purgemaddrs(ifp);
 }
Index: ip_dummynet.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_dummynet.h,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/ip_dummynet.h -L sys/netinet/ip_dummynet.h -u -r1.1.1.2 -r1.2
--- sys/netinet/ip_dummynet.h
+++ sys/netinet/ip_dummynet.h
@@ -24,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/netinet/ip_dummynet.h,v 1.36.2.2 2006/02/17 16:46:47 ru Exp $
+ * $FreeBSD: src/sys/netinet/ip_dummynet.h,v 1.40 2007/06/17 00:33:34 mjacob Exp $
  */
 
 #ifndef _IP_DUMMYNET_H
@@ -70,11 +70,6 @@
  * virtual time wraps every 15 days.
  */
 
-/*
- * The OFFSET_OF macro is used to return the offset of a field within
- * a structure. It is used by the heap management routines.
- */
-#define OFFSET_OF(type, field) ((int)&( ((type *)0)->field) )
 
 /*
  * The maximum hash table size for queues.  This value must be a power
@@ -121,7 +116,7 @@
     int dn_dir;			/* action when packet comes out. */
 #define DN_TO_IP_OUT	1
 #define DN_TO_IP_IN	2
-#define DN_TO_BDG_FWD	3
+/* Obsolete: #define DN_TO_BDG_FWD	3 */
 #define DN_TO_ETH_DEMUX	4
 #define DN_TO_ETH_OUT	5
 #define DN_TO_IP6_IN	6
Index: ip.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/ip.h -L sys/netinet/ip.h -u -r1.1.1.1 -r1.2
--- sys/netinet/ip.h
+++ sys/netinet/ip.h
@@ -1,6 +1,7 @@
 /*-
  * Copyright (c) 1982, 1986, 1993
- *	The Regents of the University of California.  All rights reserved.
+ *	The Regents of the University of California.
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -27,16 +28,17 @@
  * SUCH DAMAGE.
  *
  *	@(#)ip.h	8.2 (Berkeley) 6/1/94
- * $FreeBSD: src/sys/netinet/ip.h,v 1.29 2005/01/07 01:45:44 imp Exp $
+ * $FreeBSD: src/sys/netinet/ip.h,v 1.31 2007/05/11 11:00:48 rwatson Exp $
  */
 
 #ifndef _NETINET_IP_H_
-#define _NETINET_IP_H_
+#define	_NETINET_IP_H_
 
 #include <sys/cdefs.h>
 
 /*
  * Definitions for internet protocol version 4.
+ *
  * Per RFC 791, September 1981.
  */
 #define	IPVERSION	4
@@ -65,7 +67,7 @@
 	u_char	ip_p;			/* protocol */
 	u_short	ip_sum;			/* checksum */
 	struct	in_addr ip_src,ip_dst;	/* source and dest address */
-} __packed;
+} __packed __aligned(4);
 
 #ifdef CTASSERT
 CTASSERT(sizeof (struct ip) == 20);
@@ -74,7 +76,7 @@
 #define	IP_MAXPACKET	65535		/* maximum packet size */
 
 /*
- * Definitions for IP type of service (ip_tos)
+ * Definitions for IP type of service (ip_tos).
  */
 #define	IPTOS_LOWDELAY		0x10
 #define	IPTOS_THROUGHPUT	0x08
@@ -87,7 +89,7 @@
 #endif
 
 /*
- * Definitions for IP precedence (also in ip_tos) (hopefully unused)
+ * Definitions for IP precedence (also in ip_tos) (hopefully unused).
  */
 #define	IPTOS_PREC_NETCONTROL		0xe0
 #define	IPTOS_PREC_INTERNETCONTROL	0xc0
@@ -99,8 +101,8 @@
 #define	IPTOS_PREC_ROUTINE		0x00
 
 /*
- * ECN (Explicit Congestion Notification) codepoints in RFC3168
- * mapped to the lower 2 bits of the TOS field.
+ * ECN (Explicit Congestion Notification) codepoints in RFC3168 mapped to the
+ * lower 2 bits of the TOS field.
  */
 #define	IPTOS_ECN_NOTECT	0x00	/* not-ECT */
 #define	IPTOS_ECN_ECT1		0x01	/* ECN-capable transport (1) */
@@ -138,7 +140,7 @@
  */
 #define	IPOPT_OPTVAL		0		/* option ID */
 #define	IPOPT_OLEN		1		/* option length */
-#define IPOPT_OFFSET		2		/* offset within option */
+#define	IPOPT_OFFSET		2		/* offset within option */
 #define	IPOPT_MINOFF		4		/* min value of above */
 
 /*
@@ -165,12 +167,12 @@
 	} ipt_timestamp;
 };
 
-/* flag bits for ipt_flg */
+/* Flag bits for ipt_flg. */
 #define	IPOPT_TS_TSONLY		0		/* timestamps only */
 #define	IPOPT_TS_TSANDADDR	1		/* timestamps and addresses */
 #define	IPOPT_TS_PRESPEC	3		/* specified modules only */
 
-/* bits for security (not byte swapped) */
+/* Bits for security (not byte swapped). */
 #define	IPOPT_SECUR_UNCLASS	0x0000
 #define	IPOPT_SECUR_CONFID	0xf135
 #define	IPOPT_SECUR_EFTO	0x789a
@@ -186,7 +188,6 @@
 #define	IPDEFTTL	64		/* default ttl, from RFC 1340 */
 #define	IPFRAGTTL	60		/* time to live for frags, slowhz */
 #define	IPTTLDEC	1		/* subtracted when forwarding */
-
 #define	IP_MSS		576		/* default maximum segment size */
 
 /*
--- /dev/null
+++ sys/netinet/sctp_sysctl.c
@@ -0,0 +1,809 @@
+/*-
+ * Copyright (c) 2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_sysctl.c,v 1.16 2007/09/13 14:43:54 rrs Exp $");
+
+#include <netinet/sctp_os.h>
+#include <netinet/sctp_constants.h>
+#include <netinet/sctp_sysctl.h>
+#include <netinet/sctp_pcb.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_output.h>
+/*
+ * sysctl tunable variables
+ */
+uint32_t sctp_sendspace = SCTPCTL_MAXDGRAM_DEFAULT;
+uint32_t sctp_recvspace = SCTPCTL_RECVSPACE_DEFAULT;
+uint32_t sctp_auto_asconf = SCTPCTL_AUTOASCONF_DEFAULT;
+uint32_t sctp_ecn_enable = SCTPCTL_ECN_ENABLE_DEFAULT;
+uint32_t sctp_ecn_nonce = SCTPCTL_ECN_NONCE_DEFAULT;
+uint32_t sctp_strict_sacks = SCTPCTL_STRICT_SACKS_DEFAULT;
+uint32_t sctp_no_csum_on_loopback = SCTPCTL_LOOPBACK_NOCSUM_DEFAULT;
+uint32_t sctp_strict_init = SCTPCTL_STRICT_INIT_DEFAULT;
+uint32_t sctp_peer_chunk_oh = SCTPCTL_PEER_CHKOH_DEFAULT;
+uint32_t sctp_max_burst_default = SCTPCTL_MAXBURST_DEFAULT;
+uint32_t sctp_max_chunks_on_queue = SCTPCTL_MAXCHUNKS_DEFAULT;
+uint32_t sctp_hashtblsize = SCTPCTL_TCBHASHSIZE_DEFAULT;
+uint32_t sctp_pcbtblsize = SCTPCTL_PCBHASHSIZE_DEFAULT;
+uint32_t sctp_min_split_point = SCTPCTL_MIN_SPLIT_POINT_DEFAULT;
+uint32_t sctp_chunkscale = SCTPCTL_CHUNKSCALE_DEFAULT;
+uint32_t sctp_delayed_sack_time_default = SCTPCTL_DELAYED_SACK_TIME_DEFAULT;
+uint32_t sctp_sack_freq_default = SCTPCTL_SACK_FREQ_DEFAULT;
+uint32_t sctp_system_free_resc_limit = SCTPCTL_SYS_RESOURCE_DEFAULT;
+uint32_t sctp_asoc_free_resc_limit = SCTPCTL_ASOC_RESOURCE_DEFAULT;
+uint32_t sctp_heartbeat_interval_default = SCTPCTL_HEARTBEAT_INTERVAL_DEFAULT;
+uint32_t sctp_pmtu_raise_time_default = SCTPCTL_PMTU_RAISE_TIME_DEFAULT;
+uint32_t sctp_shutdown_guard_time_default = SCTPCTL_SHUTDOWN_GUARD_TIME_DEFAULT;
+uint32_t sctp_secret_lifetime_default = SCTPCTL_SECRET_LIFETIME_DEFAULT;
+uint32_t sctp_rto_max_default = SCTPCTL_RTO_MAX_DEFAULT;
+uint32_t sctp_rto_min_default = SCTPCTL_RTO_MIN_DEFAULT;
+uint32_t sctp_rto_initial_default = SCTPCTL_RTO_INITIAL_DEFAULT;
+uint32_t sctp_init_rto_max_default = SCTPCTL_INIT_RTO_MAX_DEFAULT;
+uint32_t sctp_valid_cookie_life_default = SCTPCTL_VALID_COOKIE_LIFE_DEFAULT;
+uint32_t sctp_init_rtx_max_default = SCTPCTL_INIT_RTX_MAX_DEFAULT;
+uint32_t sctp_assoc_rtx_max_default = SCTPCTL_ASSOC_RTX_MAX_DEFAULT;
+uint32_t sctp_path_rtx_max_default = SCTPCTL_PATH_RTX_MAX_DEFAULT;
+uint32_t sctp_add_more_threshold = SCTPCTL_ADD_MORE_ON_OUTPUT_DEFAULT;
+uint32_t sctp_nr_outgoing_streams_default = SCTPCTL_OUTGOING_STREAMS_DEFAULT;
+uint32_t sctp_cmt_on_off = SCTPCTL_CMT_ON_OFF_DEFAULT;
+uint32_t sctp_cmt_use_dac = SCTPCTL_CMT_USE_DAC_DEFAULT;
+uint32_t sctp_cmt_pf = SCTPCTL_CMT_PF_DEFAULT;
+uint32_t sctp_use_cwnd_based_maxburst = SCTPCTL_CWND_MAXBURST_DEFAULT;
+uint32_t sctp_early_fr = SCTPCTL_EARLY_FAST_RETRAN_DEFAULT;
+uint32_t sctp_early_fr_msec = SCTPCTL_EARLY_FAST_RETRAN_MSEC_DEFAULT;
+uint32_t sctp_asconf_auth_nochk = SCTPCTL_ASCONF_AUTH_NOCHK_DEFAULT;
+uint32_t sctp_auth_disable = SCTPCTL_AUTH_DISABLE_DEFAULT;
+uint32_t sctp_nat_friendly = SCTPCTL_NAT_FRIENDLY_DEFAULT;
+uint32_t sctp_L2_abc_variable = SCTPCTL_ABC_L_VAR_DEFAULT;
+uint32_t sctp_mbuf_threshold_count = SCTPCTL_MAX_CHAINED_MBUFS_DEFAULT;
+uint32_t sctp_do_drain = SCTPCTL_DO_SCTP_DRAIN_DEFAULT;
+uint32_t sctp_hb_maxburst = SCTPCTL_HB_MAX_BURST_DEFAULT;
+uint32_t sctp_abort_if_one_2_one_hits_limit = SCTPCTL_ABORT_AT_LIMIT_DEFAULT;
+uint32_t sctp_strict_data_order = SCTPCTL_STRICT_DATA_ORDER_DEFAULT;
+uint32_t sctp_min_residual = SCTPCTL_MIN_RESIDUAL_DEFAULT;
+uint32_t sctp_max_retran_chunk = SCTPCTL_MAX_RETRAN_CHUNK_DEFAULT;
+uint32_t sctp_logging_level = SCTPCTL_LOGGING_LEVEL_DEFAULT;
+
+/* JRS - Variable for default congestion control module */
+uint32_t sctp_default_cc_module = SCTPCTL_DEFAULT_CC_MODULE_DEFAULT;
+uint32_t sctp_default_frag_interleave = SCTPCTL_DEFAULT_FRAG_INTERLEAVE_DEFAULT;
+uint32_t sctp_mobility_base = SCTPCTL_MOBILITY_BASE_DEFAULT;
+uint32_t sctp_mobility_fasthandoff = SCTPCTL_MOBILITY_FASTHANDOFF_DEFAULT;
+
+#if defined(SCTP_LOCAL_TRACE_BUF)
+struct sctp_log sctp_log;
+
+#endif
+#ifdef SCTP_DEBUG
+uint32_t sctp_debug_on = SCTPCTL_DEBUG_DEFAULT;
+
+#endif
+struct sctpstat sctpstat;
+
+
+/* It returns an upper limit. No filtering is done here */
+static unsigned int
+number_of_addresses(struct sctp_inpcb *inp)
+{
+	int cnt;
+	struct sctp_vrf *vrf;
+	struct sctp_ifn *sctp_ifn;
+	struct sctp_ifa *sctp_ifa;
+	struct sctp_laddr *laddr;
+
+	cnt = 0;
+	/* neither Mac OS X nor FreeBSD support mulitple routing functions */
+	if ((vrf = sctp_find_vrf(inp->def_vrf_id)) == NULL) {
+		return (0);
+	}
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+			LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+				if ((sctp_ifa->address.sa.sa_family == AF_INET) ||
+				    (sctp_ifa->address.sa.sa_family == AF_INET6)) {
+					cnt++;
+				}
+			}
+		}
+	} else {
+		LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+			if ((laddr->ifa->address.sa.sa_family == AF_INET) ||
+			    (laddr->ifa->address.sa.sa_family == AF_INET6)) {
+				cnt++;
+			}
+		}
+	}
+	return (cnt);
+}
+
+static int
+copy_out_local_addresses(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sysctl_req *req)
+{
+	struct sctp_ifn *sctp_ifn;
+	struct sctp_ifa *sctp_ifa;
+	int loopback_scope, ipv4_local_scope, local_scope, site_scope;
+	int ipv4_addr_legal, ipv6_addr_legal;
+	struct sctp_vrf *vrf;
+	struct xsctp_laddr xladdr;
+	struct sctp_laddr *laddr;
+	int error;
+
+	/* Turn on all the appropriate scope */
+	if (stcb) {
+		/* use association specific values */
+		loopback_scope = stcb->asoc.loopback_scope;
+		ipv4_local_scope = stcb->asoc.ipv4_local_scope;
+		local_scope = stcb->asoc.local_scope;
+		site_scope = stcb->asoc.site_scope;
+	} else {
+		/* use generic values for endpoints */
+		loopback_scope = 1;
+		ipv4_local_scope = 1;
+		local_scope = 1;
+		site_scope = 1;
+	}
+
+	/* use only address families of interest */
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+		ipv6_addr_legal = 1;
+		if (SCTP_IPV6_V6ONLY(inp)) {
+			ipv4_addr_legal = 0;
+		} else {
+			ipv4_addr_legal = 1;
+		}
+	} else {
+		ipv4_addr_legal = 1;
+		ipv6_addr_legal = 0;
+	}
+
+	error = 0;
+
+	/* neither Mac OS X nor FreeBSD support mulitple routing functions */
+	if ((vrf = sctp_find_vrf(inp->def_vrf_id)) == NULL) {
+		SCTP_INP_RUNLOCK(inp);
+		SCTP_INP_INFO_RUNLOCK();
+		return (-1);
+	}
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+			if ((loopback_scope == 0) && SCTP_IFN_IS_IFT_LOOP(sctp_ifn))
+				/* Skip loopback if loopback_scope not set */
+				continue;
+			LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+				if (stcb) {
+					/*
+					 * ignore if blacklisted at
+					 * association level
+					 */
+					if (sctp_is_addr_restricted(stcb, sctp_ifa))
+						continue;
+				}
+				if ((sctp_ifa->address.sa.sa_family == AF_INET) && (ipv4_addr_legal)) {
+					struct sockaddr_in *sin;
+
+					sin = (struct sockaddr_in *)&sctp_ifa->address.sa;
+					if (sin->sin_addr.s_addr == 0)
+						continue;
+					if ((ipv4_local_scope == 0) && (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)))
+						continue;
+				} else if ((sctp_ifa->address.sa.sa_family == AF_INET6) && (ipv6_addr_legal)) {
+					struct sockaddr_in6 *sin6;
+
+					sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sa;
+					if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
+						continue;
+					if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+						if (local_scope == 0)
+							continue;
+						if (sin6->sin6_scope_id == 0) {
+							/*
+							 * bad link local
+							 * address
+							 */
+							if (sa6_recoverscope(sin6) != 0)
+								continue;
+						}
+					}
+					if ((site_scope == 0) && (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)))
+						continue;
+				} else
+					continue;
+				memset((void *)&xladdr, 0, sizeof(struct xsctp_laddr));
+				memcpy((void *)&xladdr.address, (const void *)&sctp_ifa->address, sizeof(union sctp_sockstore));
+				SCTP_INP_RUNLOCK(inp);
+				SCTP_INP_INFO_RUNLOCK();
+				error = SYSCTL_OUT(req, &xladdr, sizeof(struct xsctp_laddr));
+				if (error) {
+					return (error);
+				} else {
+					SCTP_INP_INFO_RLOCK();
+					SCTP_INP_RLOCK(inp);
+				}
+			}
+		}
+	} else {
+		LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+			/* ignore if blacklisted at association level */
+			if (stcb && sctp_is_addr_restricted(stcb, laddr->ifa))
+				continue;
+			memset((void *)&xladdr, 0, sizeof(struct xsctp_laddr));
+			memcpy((void *)&xladdr.address, (const void *)&laddr->ifa->address, sizeof(union sctp_sockstore));
+			xladdr.start_time.tv_sec = (uint32_t) laddr->start_time.tv_sec;
+			xladdr.start_time.tv_usec = (uint32_t) laddr->start_time.tv_usec;
+			SCTP_INP_RUNLOCK(inp);
+			SCTP_INP_INFO_RUNLOCK();
+			error = SYSCTL_OUT(req, &xladdr, sizeof(struct xsctp_laddr));
+			if (error) {
+				return (error);
+			} else {
+				SCTP_INP_INFO_RLOCK();
+				SCTP_INP_RLOCK(inp);
+			}
+		}
+	}
+	memset((void *)&xladdr, 0, sizeof(struct xsctp_laddr));
+	xladdr.last = 1;
+	SCTP_INP_RUNLOCK(inp);
+	SCTP_INP_INFO_RUNLOCK();
+	error = SYSCTL_OUT(req, &xladdr, sizeof(struct xsctp_laddr));
+
+	if (error) {
+		return (error);
+	} else {
+		SCTP_INP_INFO_RLOCK();
+		SCTP_INP_RLOCK(inp);
+		return (0);
+	}
+}
+
+/*
+ * sysctl functions
+ */
+static int
+sctp_assoclist(SYSCTL_HANDLER_ARGS)
+{
+	unsigned int number_of_endpoints;
+	unsigned int number_of_local_addresses;
+	unsigned int number_of_associations;
+	unsigned int number_of_remote_addresses;
+	unsigned int n;
+	int error;
+	struct sctp_inpcb *inp;
+	struct sctp_tcb *stcb;
+	struct sctp_nets *net;
+	struct xsctp_inpcb xinpcb;
+	struct xsctp_tcb xstcb;
+	struct xsctp_raddr xraddr;
+
+	number_of_endpoints = 0;
+	number_of_local_addresses = 0;
+	number_of_associations = 0;
+	number_of_remote_addresses = 0;
+
+	SCTP_INP_INFO_RLOCK();
+	if (req->oldptr == USER_ADDR_NULL) {
+		LIST_FOREACH(inp, &sctppcbinfo.listhead, sctp_list) {
+			SCTP_INP_RLOCK(inp);
+			number_of_endpoints++;
+			number_of_local_addresses += number_of_addresses(inp);
+			LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+				number_of_associations++;
+				number_of_local_addresses += number_of_addresses(inp);
+				TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+					number_of_remote_addresses++;
+				}
+			}
+			SCTP_INP_RUNLOCK(inp);
+		}
+		SCTP_INP_INFO_RUNLOCK();
+		n = (number_of_endpoints + 1) * sizeof(struct xsctp_inpcb) +
+		    (number_of_local_addresses + number_of_endpoints + number_of_associations) * sizeof(struct xsctp_laddr) +
+		    (number_of_associations + number_of_endpoints) * sizeof(struct xsctp_tcb) +
+		    (number_of_remote_addresses + number_of_associations) * sizeof(struct xsctp_raddr);
+
+		/* request some more memory than needed */
+		req->oldidx = (n + n / 8);
+		return 0;
+	}
+	if (req->newptr != USER_ADDR_NULL) {
+		SCTP_INP_INFO_RUNLOCK();
+		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_SYSCTL, EPERM);
+		return EPERM;
+	}
+	LIST_FOREACH(inp, &sctppcbinfo.listhead, sctp_list) {
+		SCTP_INP_RLOCK(inp);
+		xinpcb.last = 0;
+		xinpcb.local_port = ntohs(inp->sctp_lport);
+		xinpcb.flags = inp->sctp_flags;
+		xinpcb.features = inp->sctp_features;
+		xinpcb.total_sends = inp->total_sends;
+		xinpcb.total_recvs = inp->total_recvs;
+		xinpcb.total_nospaces = inp->total_nospaces;
+		xinpcb.fragmentation_point = inp->sctp_frag_point;
+		if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+		    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) {
+			xinpcb.qlen = 0;
+			xinpcb.maxqlen = 0;
+		} else {
+			xinpcb.qlen = inp->sctp_socket->so_qlen;
+			xinpcb.maxqlen = inp->sctp_socket->so_qlimit;
+		}
+		SCTP_INP_INCR_REF(inp);
+		SCTP_INP_RUNLOCK(inp);
+		SCTP_INP_INFO_RUNLOCK();
+		error = SYSCTL_OUT(req, &xinpcb, sizeof(struct xsctp_inpcb));
+		if (error) {
+			SCTP_INP_DECR_REF(inp);
+			return error;
+		}
+		SCTP_INP_INFO_RLOCK();
+		SCTP_INP_RLOCK(inp);
+		error = copy_out_local_addresses(inp, NULL, req);
+		if (error) {
+			SCTP_INP_DECR_REF(inp);
+			return error;
+		}
+		LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+			SCTP_TCB_LOCK(stcb);
+			atomic_add_int(&stcb->asoc.refcnt, 1);
+			SCTP_TCB_UNLOCK(stcb);
+			xstcb.last = 0;
+			xstcb.local_port = ntohs(inp->sctp_lport);
+			xstcb.remote_port = ntohs(stcb->rport);
+			if (stcb->asoc.primary_destination != NULL)
+				xstcb.primary_addr = stcb->asoc.primary_destination->ro._l_addr;
+			xstcb.heartbeat_interval = stcb->asoc.heart_beat_delay;
+			xstcb.state = SCTP_GET_STATE(&stcb->asoc);	/* FIXME */
+			xstcb.in_streams = stcb->asoc.streamincnt;
+			xstcb.out_streams = stcb->asoc.streamoutcnt;
+			xstcb.max_nr_retrans = stcb->asoc.overall_error_count;
+			xstcb.primary_process = 0;	/* not really supported
+							 * yet */
+			xstcb.T1_expireries = stcb->asoc.timoinit + stcb->asoc.timocookie;
+			xstcb.T2_expireries = stcb->asoc.timoshutdown + stcb->asoc.timoshutdownack;
+			xstcb.retransmitted_tsns = stcb->asoc.marked_retrans;
+			xstcb.start_time.tv_sec = (uint32_t) stcb->asoc.start_time.tv_sec;
+			xstcb.start_time.tv_usec = (uint32_t) stcb->asoc.start_time.tv_usec;
+			xstcb.discontinuity_time.tv_sec = (uint32_t) stcb->asoc.discontinuity_time.tv_sec;
+			xstcb.discontinuity_time.tv_usec = (uint32_t) stcb->asoc.discontinuity_time.tv_usec;
+			xstcb.total_sends = stcb->total_sends;
+			xstcb.total_recvs = stcb->total_recvs;
+			xstcb.local_tag = stcb->asoc.my_vtag;
+			xstcb.remote_tag = stcb->asoc.peer_vtag;
+			xstcb.initial_tsn = stcb->asoc.init_seq_number;
+			xstcb.highest_tsn = stcb->asoc.sending_seq - 1;
+			xstcb.cumulative_tsn = stcb->asoc.last_acked_seq;
+			xstcb.cumulative_tsn_ack = stcb->asoc.cumulative_tsn;
+			xstcb.mtu = stcb->asoc.smallest_mtu;
+			xstcb.refcnt = stcb->asoc.refcnt;
+			SCTP_INP_RUNLOCK(inp);
+			SCTP_INP_INFO_RUNLOCK();
+			error = SYSCTL_OUT(req, &xstcb, sizeof(struct xsctp_tcb));
+			if (error) {
+				SCTP_INP_DECR_REF(inp);
+				atomic_subtract_int(&stcb->asoc.refcnt, 1);
+				return error;
+			}
+			SCTP_INP_INFO_RLOCK();
+			SCTP_INP_RLOCK(inp);
+			error = copy_out_local_addresses(inp, stcb, req);
+			if (error) {
+				SCTP_INP_DECR_REF(inp);
+				atomic_subtract_int(&stcb->asoc.refcnt, 1);
+				return error;
+			}
+			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+				xraddr.last = 0;
+				xraddr.address = net->ro._l_addr;
+				xraddr.active = ((net->dest_state & SCTP_ADDR_REACHABLE) == SCTP_ADDR_REACHABLE);
+				xraddr.confirmed = ((net->dest_state & SCTP_ADDR_UNCONFIRMED) == 0);
+				xraddr.heartbeat_enabled = ((net->dest_state & SCTP_ADDR_NOHB) == 0);
+				xraddr.rto = net->RTO;
+				xraddr.max_path_rtx = net->failure_threshold;
+				xraddr.rtx = net->marked_retrans;
+				xraddr.error_counter = net->error_count;
+				xraddr.cwnd = net->cwnd;
+				xraddr.flight_size = net->flight_size;
+				xraddr.mtu = net->mtu;
+				xraddr.start_time.tv_sec = (uint32_t) net->start_time.tv_sec;
+				xraddr.start_time.tv_usec = (uint32_t) net->start_time.tv_usec;
+				SCTP_INP_RUNLOCK(inp);
+				SCTP_INP_INFO_RUNLOCK();
+				error = SYSCTL_OUT(req, &xraddr, sizeof(struct xsctp_raddr));
+				if (error) {
+					SCTP_INP_DECR_REF(inp);
+					atomic_subtract_int(&stcb->asoc.refcnt, 1);
+					return error;
+				}
+				SCTP_INP_INFO_RLOCK();
+				SCTP_INP_RLOCK(inp);
+			}
+			atomic_subtract_int(&stcb->asoc.refcnt, 1);
+			memset((void *)&xraddr, 0, sizeof(struct xsctp_raddr));
+			xraddr.last = 1;
+			SCTP_INP_RUNLOCK(inp);
+			SCTP_INP_INFO_RUNLOCK();
+			error = SYSCTL_OUT(req, &xraddr, sizeof(struct xsctp_raddr));
+			if (error) {
+				SCTP_INP_DECR_REF(inp);
+				return error;
+			}
+			SCTP_INP_INFO_RLOCK();
+			SCTP_INP_RLOCK(inp);
+		}
+		SCTP_INP_DECR_REF(inp);
+		SCTP_INP_RUNLOCK(inp);
+		SCTP_INP_INFO_RUNLOCK();
+		memset((void *)&xstcb, 0, sizeof(struct xsctp_tcb));
+		xstcb.last = 1;
+		error = SYSCTL_OUT(req, &xstcb, sizeof(struct xsctp_tcb));
+		if (error) {
+			return error;
+		}
+		SCTP_INP_INFO_RLOCK();
+	}
+	SCTP_INP_INFO_RUNLOCK();
+
+	memset((void *)&xinpcb, 0, sizeof(struct xsctp_inpcb));
+	xinpcb.last = 1;
+	error = SYSCTL_OUT(req, &xinpcb, sizeof(struct xsctp_inpcb));
+	return error;
+}
+
+#define RANGECHK(var, min, max) \
+	if ((var) < (min)) { (var) = (min); } \
+	else if ((var) > (max)) { (var) = (max); }
+
+static int
+sysctl_sctp_check(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+
+	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
+	if (error == 0) {
+		RANGECHK(sctp_sendspace, SCTPCTL_MAXDGRAM_MIN, SCTPCTL_MAXDGRAM_MAX);
+		RANGECHK(sctp_recvspace, SCTPCTL_RECVSPACE_MIN, SCTPCTL_RECVSPACE_MAX);
+#if defined(__FreeBSD__) || defined(SCTP_APPLE_AUTO_ASCONF)
+		RANGECHK(sctp_auto_asconf, SCTPCTL_AUTOASCONF_MIN, SCTPCTL_AUTOASCONF_MAX);
+#endif
+		RANGECHK(sctp_ecn_enable, SCTPCTL_ECN_ENABLE_MIN, SCTPCTL_ECN_ENABLE_MAX);
+		RANGECHK(sctp_ecn_nonce, SCTPCTL_ECN_NONCE_MIN, SCTPCTL_ECN_NONCE_MAX);
+		RANGECHK(sctp_strict_sacks, SCTPCTL_STRICT_SACKS_MIN, SCTPCTL_STRICT_SACKS_MAX);
+		RANGECHK(sctp_no_csum_on_loopback, SCTPCTL_LOOPBACK_NOCSUM_MIN, SCTPCTL_LOOPBACK_NOCSUM_MAX);
+		RANGECHK(sctp_strict_init, SCTPCTL_STRICT_INIT_MIN, SCTPCTL_STRICT_INIT_MAX);
+		RANGECHK(sctp_peer_chunk_oh, SCTPCTL_PEER_CHKOH_MIN, SCTPCTL_PEER_CHKOH_MAX);
+		RANGECHK(sctp_max_burst_default, SCTPCTL_MAXBURST_MIN, SCTPCTL_MAXBURST_MAX);
+		RANGECHK(sctp_max_chunks_on_queue, SCTPCTL_MAXCHUNKS_MIN, SCTPCTL_MAXCHUNKS_MAX);
+		RANGECHK(sctp_hashtblsize, SCTPCTL_TCBHASHSIZE_MIN, SCTPCTL_TCBHASHSIZE_MAX);
+		RANGECHK(sctp_pcbtblsize, SCTPCTL_PCBHASHSIZE_MIN, SCTPCTL_PCBHASHSIZE_MAX);
+		RANGECHK(sctp_min_split_point, SCTPCTL_MIN_SPLIT_POINT_MIN, SCTPCTL_MIN_SPLIT_POINT_MAX);
+		RANGECHK(sctp_chunkscale, SCTPCTL_CHUNKSCALE_MIN, SCTPCTL_CHUNKSCALE_MAX);
+		RANGECHK(sctp_delayed_sack_time_default, SCTPCTL_DELAYED_SACK_TIME_MIN, SCTPCTL_DELAYED_SACK_TIME_MAX);
+		RANGECHK(sctp_sack_freq_default, SCTPCTL_SACK_FREQ_MIN, SCTPCTL_SACK_FREQ_MAX);
+		RANGECHK(sctp_system_free_resc_limit, SCTPCTL_SYS_RESOURCE_MIN, SCTPCTL_SYS_RESOURCE_MAX);
+		RANGECHK(sctp_asoc_free_resc_limit, SCTPCTL_ASOC_RESOURCE_MIN, SCTPCTL_ASOC_RESOURCE_MAX);
+		RANGECHK(sctp_heartbeat_interval_default, SCTPCTL_HEARTBEAT_INTERVAL_MIN, SCTPCTL_HEARTBEAT_INTERVAL_MAX);
+		RANGECHK(sctp_pmtu_raise_time_default, SCTPCTL_PMTU_RAISE_TIME_MIN, SCTPCTL_PMTU_RAISE_TIME_MAX);
+		RANGECHK(sctp_shutdown_guard_time_default, SCTPCTL_SHUTDOWN_GUARD_TIME_MIN, SCTPCTL_SHUTDOWN_GUARD_TIME_MAX);
+		RANGECHK(sctp_secret_lifetime_default, SCTPCTL_SECRET_LIFETIME_MIN, SCTPCTL_SECRET_LIFETIME_MAX);
+		RANGECHK(sctp_rto_max_default, SCTPCTL_RTO_MAX_MIN, SCTPCTL_RTO_MAX_MAX);
+		RANGECHK(sctp_rto_min_default, SCTPCTL_RTO_MIN_MIN, SCTPCTL_RTO_MIN_MAX);
+		RANGECHK(sctp_rto_initial_default, SCTPCTL_RTO_INITIAL_MIN, SCTPCTL_RTO_INITIAL_MAX);
+		RANGECHK(sctp_init_rto_max_default, SCTPCTL_INIT_RTO_MAX_MIN, SCTPCTL_INIT_RTO_MAX_MAX);
+		RANGECHK(sctp_valid_cookie_life_default, SCTPCTL_VALID_COOKIE_LIFE_MIN, SCTPCTL_VALID_COOKIE_LIFE_MAX);
+		RANGECHK(sctp_init_rtx_max_default, SCTPCTL_INIT_RTX_MAX_MIN, SCTPCTL_INIT_RTX_MAX_MAX);
+		RANGECHK(sctp_assoc_rtx_max_default, SCTPCTL_ASSOC_RTX_MAX_MIN, SCTPCTL_ASSOC_RTX_MAX_MAX);
+		RANGECHK(sctp_path_rtx_max_default, SCTPCTL_PATH_RTX_MAX_MIN, SCTPCTL_PATH_RTX_MAX_MAX);
+		RANGECHK(sctp_add_more_threshold, SCTPCTL_ADD_MORE_ON_OUTPUT_MIN, SCTPCTL_ADD_MORE_ON_OUTPUT_MAX);
+		RANGECHK(sctp_nr_outgoing_streams_default, SCTPCTL_OUTGOING_STREAMS_MIN, SCTPCTL_OUTGOING_STREAMS_MAX);
+		RANGECHK(sctp_cmt_on_off, SCTPCTL_CMT_ON_OFF_MIN, SCTPCTL_CMT_ON_OFF_MAX);
+		RANGECHK(sctp_cmt_use_dac, SCTPCTL_CMT_USE_DAC_MIN, SCTPCTL_CMT_USE_DAC_MAX);
+		RANGECHK(sctp_cmt_pf, SCTPCTL_CMT_PF_MIN, SCTPCTL_CMT_PF_MAX);
+		RANGECHK(sctp_use_cwnd_based_maxburst, SCTPCTL_CWND_MAXBURST_MIN, SCTPCTL_CWND_MAXBURST_MAX);
+		RANGECHK(sctp_early_fr, SCTPCTL_EARLY_FAST_RETRAN_MIN, SCTPCTL_EARLY_FAST_RETRAN_MAX);
+		RANGECHK(sctp_early_fr_msec, SCTPCTL_EARLY_FAST_RETRAN_MSEC_MIN, SCTPCTL_EARLY_FAST_RETRAN_MSEC_MAX);
+		RANGECHK(sctp_asconf_auth_nochk, SCTPCTL_ASCONF_AUTH_NOCHK_MIN, SCTPCTL_ASCONF_AUTH_NOCHK_MAX);
+		RANGECHK(sctp_auth_disable, SCTPCTL_AUTH_DISABLE_MIN, SCTPCTL_AUTH_DISABLE_MAX);
+		RANGECHK(sctp_nat_friendly, SCTPCTL_NAT_FRIENDLY_MIN, SCTPCTL_NAT_FRIENDLY_MAX);
+		RANGECHK(sctp_L2_abc_variable, SCTPCTL_ABC_L_VAR_MIN, SCTPCTL_ABC_L_VAR_MAX);
+		RANGECHK(sctp_mbuf_threshold_count, SCTPCTL_MAX_CHAINED_MBUFS_MIN, SCTPCTL_MAX_CHAINED_MBUFS_MAX);
+		RANGECHK(sctp_do_drain, SCTPCTL_DO_SCTP_DRAIN_MIN, SCTPCTL_DO_SCTP_DRAIN_MAX);
+		RANGECHK(sctp_hb_maxburst, SCTPCTL_HB_MAX_BURST_MIN, SCTPCTL_HB_MAX_BURST_MAX);
+		RANGECHK(sctp_abort_if_one_2_one_hits_limit, SCTPCTL_ABORT_AT_LIMIT_MIN, SCTPCTL_ABORT_AT_LIMIT_MAX);
+		RANGECHK(sctp_strict_data_order, SCTPCTL_STRICT_DATA_ORDER_MIN, SCTPCTL_STRICT_DATA_ORDER_MAX);
+		RANGECHK(sctp_min_residual, SCTPCTL_MIN_RESIDUAL_MIN, SCTPCTL_MIN_RESIDUAL_MAX);
+		RANGECHK(sctp_max_retran_chunk, SCTPCTL_MAX_RETRAN_CHUNK_MIN, SCTPCTL_MAX_RETRAN_CHUNK_MAX);
+		RANGECHK(sctp_logging_level, SCTPCTL_LOGGING_LEVEL_MIN, SCTPCTL_LOGGING_LEVEL_MAX);
+		RANGECHK(sctp_default_cc_module, SCTPCTL_DEFAULT_CC_MODULE_MIN, SCTPCTL_DEFAULT_CC_MODULE_MAX);
+		RANGECHK(sctp_default_frag_interleave, SCTPCTL_DEFAULT_FRAG_INTERLEAVE_MIN, SCTPCTL_DEFAULT_FRAG_INTERLEAVE_MAX);
+#if defined(__FreeBSD__) || defined(SCTP_APPLE_MOBILITY_BASE)
+		RANGECHK(sctp_mobility_base, SCTPCTL_MOBILITY_BASE_MIN, SCTPCTL_MOBILITY_BASE_MAX);
+#endif
+#if defined(__FreeBSD__) || defined(SCTP_APPLE_MOBILITY_FASTHANDOFF)
+		RANGECHK(sctp_mobility_fasthandoff, SCTPCTL_MOBILITY_FASTHANDOFF_MIN, SCTPCTL_MOBILITY_FASTHANDOFF_MAX);
+#endif
+#ifdef SCTP_DEBUG
+		RANGECHK(sctp_debug_on, SCTPCTL_DEBUG_MIN, SCTPCTL_DEBUG_MAX);
+#endif
+	}
+	return (error);
+}
+
+/*
+ * sysctl definitions
+ */
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, sendspace, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_sendspace, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_MAXDGRAM_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, recvspace, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_recvspace, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_RECVSPACE_DESC);
+
+#if defined(__FreeBSD__) || defined(SCTP_APPLE_AUTO_ASCONF)
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, auto_asconf, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_auto_asconf, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_AUTOASCONF_DESC);
+#endif
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, ecn_enable, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_ecn_enable, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_ECN_ENABLE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, ecn_nonce, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_ecn_nonce, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_ECN_NONCE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, strict_sacks, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_strict_sacks, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_STRICT_SACKS_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, loopback_nocsum, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_no_csum_on_loopback, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_LOOPBACK_NOCSUM_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, strict_init, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_strict_init, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_STRICT_INIT_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, peer_chkoh, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_peer_chunk_oh, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_PEER_CHKOH_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, maxburst, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_max_burst_default, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_MAXBURST_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, maxchunks, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_max_chunks_on_queue, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_MAXCHUNKS_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, tcbhashsize, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_hashtblsize, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_TCBHASHSIZE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, pcbhashsize, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_pcbtblsize, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_PCBHASHSIZE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, min_split_point, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_min_split_point, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_MIN_SPLIT_POINT_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, chunkscale, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_chunkscale, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_CHUNKSCALE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, delayed_sack_time, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_delayed_sack_time_default, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_DELAYED_SACK_TIME_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, sack_freq, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_sack_freq_default, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_SACK_FREQ_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, sys_resource, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_system_free_resc_limit, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_SYS_RESOURCE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, asoc_resource, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_asoc_free_resc_limit, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_ASOC_RESOURCE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, heartbeat_interval, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_heartbeat_interval_default, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_HEARTBEAT_INTERVAL_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, pmtu_raise_time, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_pmtu_raise_time_default, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_PMTU_RAISE_TIME_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, shutdown_guard_time, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_shutdown_guard_time_default, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_SHUTDOWN_GUARD_TIME_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, secret_lifetime, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_secret_lifetime_default, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_SECRET_LIFETIME_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, rto_max, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_rto_max_default, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_RTO_MAX_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, rto_min, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_rto_min_default, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_RTO_MIN_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, rto_initial, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_rto_initial_default, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_RTO_INITIAL_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, init_rto_max, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_init_rto_max_default, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_INIT_RTO_MAX_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, valid_cookie_life, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_valid_cookie_life_default, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_VALID_COOKIE_LIFE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, init_rtx_max, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_init_rtx_max_default, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_INIT_RTX_MAX_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, assoc_rtx_max, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_assoc_rtx_max_default, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_ASSOC_RTX_MAX_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, path_rtx_max, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_path_rtx_max_default, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_PATH_RTX_MAX_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, add_more_on_output, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_add_more_threshold, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_ADD_MORE_ON_OUTPUT_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, outgoing_streams, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_nr_outgoing_streams_default, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_OUTGOING_STREAMS_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, cmt_on_off, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_cmt_on_off, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_CMT_ON_OFF_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, cmt_use_dac, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_cmt_use_dac, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_CMT_USE_DAC_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, cmt_pf, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_cmt_pf, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_CMT_PF_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, cwnd_maxburst, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_use_cwnd_based_maxburst, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_CWND_MAXBURST_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, early_fast_retran, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_early_fr, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_EARLY_FAST_RETRAN_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, early_fast_retran_msec, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_early_fr_msec, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_EARLY_FAST_RETRAN_MSEC_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, asconf_auth_nochk, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_asconf_auth_nochk, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_ASCONF_AUTH_NOCHK_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, auth_disable, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_auth_disable, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_AUTH_DISABLE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, nat_friendly, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_nat_friendly, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_NAT_FRIENDLY_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, abc_l_var, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_L2_abc_variable, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_ABC_L_VAR_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, max_chained_mbufs, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_mbuf_threshold_count, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_MAX_CHAINED_MBUFS_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, do_sctp_drain, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_do_drain, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_DO_SCTP_DRAIN_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, hb_max_burst, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_hb_maxburst, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_HB_MAX_BURST_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, abort_at_limit, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_abort_if_one_2_one_hits_limit, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_ABORT_AT_LIMIT_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, strict_data_order, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_strict_data_order, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_STRICT_DATA_ORDER_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, min_residual, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_min_residual, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_MIN_RESIDUAL_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, max_retran_chunk, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_max_retran_chunk, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_MAX_RETRAN_CHUNK_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, log_level, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_logging_level, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_LOGGING_LEVEL_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, default_cc_module, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_default_cc_module, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_DEFAULT_CC_MODULE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, default_frag_interleave, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_default_frag_interleave, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_DEFAULT_FRAG_INTERLEAVE_DESC);
+
+#if defined(__FreeBSD__) || defined(SCTP_APPLE_MOBILITY_BASE)
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, mobility_base, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_mobility_base, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_MOBILITY_BASE_DESC);
+#endif
+
+#if defined(__FreeBSD__) || defined(SCTP_APPLE_MOBILITY_FASTHANDOFF)
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, mobility_fasthandoff, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_mobility_fasthandoff, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_MOBILITY_FASTHANDOFF_DESC);
+#endif
+
+#if defined(SCTP_LOCAL_TRACE_BUF)
+SYSCTL_STRUCT(_net_inet_sctp, OID_AUTO, log, CTLFLAG_RD,
+    &sctp_log, sctp_log,
+    "SCTP logging (struct sctp_log)");
+#endif
+
+#ifdef SCTP_DEBUG
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, debug, CTLTYPE_INT | CTLFLAG_RW,
+    &sctp_debug_on, 0, sysctl_sctp_check, "IU",
+    SCTPCTL_DEBUG_DESC);
+#endif				/* SCTP_DEBUG */
+
+
+SYSCTL_STRUCT(_net_inet_sctp, OID_AUTO, stats, CTLFLAG_RW,
+    &sctpstat, sctpstat,
+    "SCTP statistics (struct sctp_stat)");
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, assoclist, CTLFLAG_RD,
+    0, 0, sctp_assoclist,
+    "S,xassoc", "List of active SCTP associations");
Index: ip_gre.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_gre.h,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/ip_gre.h -L sys/netinet/ip_gre.h -u -r1.1.1.2 -r1.2
--- sys/netinet/ip_gre.h
+++ sys/netinet/ip_gre.h
@@ -1,5 +1,5 @@
 /*	$NetBSD: ip_gre.h,v 1.5 2002/06/09 16:33:40 itojun Exp $ */
-/*	 $FreeBSD: src/sys/netinet/ip_gre.h,v 1.3.2.1 2006/01/27 21:50:10 bz Exp $ */
+/*	 $FreeBSD: src/sys/netinet/ip_gre.h,v 1.4 2006/01/21 10:44:34 bz Exp $ */
 
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
Index: tcp.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/tcp.h -L sys/netinet/tcp.h -u -r1.1.1.1 -r1.2
--- sys/netinet/tcp.h
+++ sys/netinet/tcp.h
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/tcp.h,v 1.31.2.1 2005/10/09 03:22:51 delphij Exp $
+ * $FreeBSD: src/sys/netinet/tcp.h,v 1.40 2007/05/25 21:28:49 andre Exp $
  */
 
 #ifndef _NETINET_TCP_H_
@@ -68,7 +68,8 @@
 #define	TH_URG	0x20
 #define	TH_ECE	0x40
 #define	TH_CWR	0x80
-#define	TH_FLAGS	(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG|TH_ECE|TH_CWR)
+#define	TH_FLAGS	(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG|TH_ECE|TH_CWR)
+#define	PRINT_TH_FLAGS	"\20\1FIN\2SYN\3RST\4PUSH\5ACK\6URG\7ECE\10CWR"
 
 	u_short	th_win;			/* window */
 	u_short	th_sum;			/* checksum */
@@ -76,30 +77,24 @@
 };
 
 #define	TCPOPT_EOL		0
+#define	   TCPOLEN_EOL			1
 #define	TCPOPT_NOP		1
+#define	   TCPOLEN_NOP			1
 #define	TCPOPT_MAXSEG		2
 #define    TCPOLEN_MAXSEG		4
 #define TCPOPT_WINDOW		3
 #define    TCPOLEN_WINDOW		3
-#define TCPOPT_SACK_PERMITTED	4		/* Experimental */
+#define TCPOPT_SACK_PERMITTED	4
 #define    TCPOLEN_SACK_PERMITTED	2
-#define TCPOPT_SACK		5		/* Experimental */
+#define TCPOPT_SACK		5
+#define	   TCPOLEN_SACKHDR		2
 #define    TCPOLEN_SACK			8	/* 2*sizeof(tcp_seq) */
 #define TCPOPT_TIMESTAMP	8
 #define    TCPOLEN_TIMESTAMP		10
 #define    TCPOLEN_TSTAMP_APPA		(TCPOLEN_TIMESTAMP+2) /* appendix A */
-#define    TCPOPT_TSTAMP_HDR		\
-    (TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)
-
-#define	MAX_TCPOPTLEN		40	/* Absolute maximum TCP options len */
-
-#define	TCPOPT_SIGNATURE		19	/* Keyed MD5: RFC 2385 */
+#define	TCPOPT_SIGNATURE	19		/* Keyed MD5: RFC 2385 */
 #define	   TCPOLEN_SIGNATURE		18
 
-/* Option definitions */
-#define TCPOPT_SACK_PERMIT_HDR	\
-(TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_SACK_PERMITTED<<8|TCPOLEN_SACK_PERMITTED)
-#define	TCPOPT_SACK_HDR		(TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_SACK<<8)
 /* Miscellaneous constants */
 #define	MAX_SACK_BLKS	6	/* Max # SACK blocks stored at receiver side */
 #define	TCP_MAX_SACK	4	/* MAX # SACKs sent in any segment */
@@ -121,14 +116,6 @@
  * Setting this to "0" disables the minmss check.
  */
 #define	TCP_MINMSS 216
-/*
- * TCP_MINMSSOVERLOAD is defined to be 1000 which should cover any type
- * of interactive TCP session.
- * See tcp_subr.c tcp_minmssoverload SYSCTL declaration and tcp_input.c
- * for more comments.
- * Setting this to "0" disables the minmssoverload check.
- */
-#define	TCP_MINMSSOVERLOAD 0	/* XXX: Disabled until refined */
 
 /*
  * Default maximum segment size for TCP6.
@@ -208,8 +195,8 @@
 	/* Metrics; variable units. */
 	u_int32_t	__tcpi_pmtu;
 	u_int32_t	__tcpi_rcv_ssthresh;
-	u_int32_t	__tcpi_rtt;
-	u_int32_t	__tcpi_rttvar;
+	u_int32_t	tcpi_rtt;		/* Smoothed RTT in usecs. */
+	u_int32_t	tcpi_rttvar;		/* RTT variance in usecs. */
 	u_int32_t	tcpi_snd_ssthresh;	/* Slow start threshold. */
 	u_int32_t	tcpi_snd_cwnd;		/* Send congestion window. */
 	u_int32_t	__tcpi_advmss;
--- /dev/null
+++ sys/netinet/sctp_pcb.c
@@ -0,0 +1,6163 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_pcb.c,v 1.38 2005/03/06 16:04:18 itojun Exp $	 */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_pcb.c,v 1.62.2.2.2.1 2008/01/31 17:21:50 rrs Exp $");
+
+#include <netinet/sctp_os.h>
+#include <sys/proc.h>
+#include <netinet/sctp_var.h>
+#include <netinet/sctp_sysctl.h>
+#include <netinet/sctp_pcb.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp.h>
+#include <netinet/sctp_header.h>
+#include <netinet/sctp_asconf.h>
+#include <netinet/sctp_output.h>
+#include <netinet/sctp_timer.h>
+#include <netinet/sctp_bsd_addr.h>
+
+
+struct sctp_epinfo sctppcbinfo;
+
+/* FIX: we don't handle multiple link local scopes */
+/* "scopeless" replacement IN6_ARE_ADDR_EQUAL */
+int
+SCTP6_ARE_ADDR_EQUAL(struct in6_addr *a, struct in6_addr *b)
+{
+	struct in6_addr tmp_a, tmp_b;
+
+	/* use a copy of a and b */
+	tmp_a = *a;
+	tmp_b = *b;
+	in6_clearscope(&tmp_a);
+	in6_clearscope(&tmp_b);
+	return (IN6_ARE_ADDR_EQUAL(&tmp_a, &tmp_b));
+}
+
+void
+sctp_fill_pcbinfo(struct sctp_pcbinfo *spcb)
+{
+	/*
+	 * We really don't need to lock this, but I will just because it
+	 * does not hurt.
+	 */
+	SCTP_INP_INFO_RLOCK();
+	spcb->ep_count = sctppcbinfo.ipi_count_ep;
+	spcb->asoc_count = sctppcbinfo.ipi_count_asoc;
+	spcb->laddr_count = sctppcbinfo.ipi_count_laddr;
+	spcb->raddr_count = sctppcbinfo.ipi_count_raddr;
+	spcb->chk_count = sctppcbinfo.ipi_count_chunk;
+	spcb->readq_count = sctppcbinfo.ipi_count_readq;
+	spcb->stream_oque = sctppcbinfo.ipi_count_strmoq;
+	spcb->free_chunks = sctppcbinfo.ipi_free_chunks;
+
+	SCTP_INP_INFO_RUNLOCK();
+}
+
+/*
+ * Addresses are added to VRF's (Virtual Router's). For BSD we
+ * have only the default VRF 0. We maintain a hash list of
+ * VRF's. Each VRF has its own list of sctp_ifn's. Each of
+ * these has a list of addresses. When we add a new address
+ * to a VRF we lookup the ifn/ifn_index, if the ifn does
+ * not exist we create it and add it to the list of IFN's
+ * within the VRF. Once we have the sctp_ifn, we add the
+ * address to the list. So we look something like:
+ *
+ * hash-vrf-table
+ *   vrf-> ifn-> ifn -> ifn
+ *   vrf    |
+ *    ...   +--ifa-> ifa -> ifa
+ *   vrf
+ *
+ * We keep these seperate lists since the SCTP subsystem will
+ * point to these from its source address selection nets structure.
+ * When an address is deleted it does not happen right away on
+ * the SCTP side, it gets scheduled. What we do when a
+ * delete happens is immediately remove the address from
+ * the master list and decrement the refcount. As our
+ * addip iterator works through and frees the src address
+ * selection pointing to the sctp_ifa, eventually the refcount
+ * will reach 0 and we will delete it. Note that it is assumed
+ * that any locking on system level ifn/ifa is done at the
+ * caller of these functions and these routines will only
+ * lock the SCTP structures as they add or delete things.
+ *
+ * Other notes on VRF concepts.
+ *  - An endpoint can be in multiple VRF's
+ *  - An association lives within a VRF and only one VRF.
+ *  - Any incoming packet we can deduce the VRF for by
+ *    looking at the mbuf/pak inbound (for BSD its VRF=0 :D)
+ *  - Any downward send call or connect call must supply the
+ *    VRF via ancillary data or via some sort of set default
+ *    VRF socket option call (again for BSD no brainer since
+ *    the VRF is always 0).
+ *  - An endpoint may add multiple VRF's to it.
+ *  - Listening sockets can accept associations in any
+ *    of the VRF's they are in but the assoc will end up
+ *    in only one VRF (gotten from the packet or connect/send).
+ *
+ */
+
+struct sctp_vrf *
+sctp_allocate_vrf(int vrf_id)
+{
+	struct sctp_vrf *vrf = NULL;
+	struct sctp_vrflist *bucket;
+
+	/* First allocate the VRF structure */
+	vrf = sctp_find_vrf(vrf_id);
+	if (vrf) {
+		/* Already allocated */
+		return (vrf);
+	}
+	SCTP_MALLOC(vrf, struct sctp_vrf *, sizeof(struct sctp_vrf),
+	    SCTP_M_VRF);
+	if (vrf == NULL) {
+		/* No memory */
+#ifdef INVARIANTS
+		panic("No memory for VRF:%d", vrf_id);
+#endif
+		return (NULL);
+	}
+	/* setup the VRF */
+	memset(vrf, 0, sizeof(struct sctp_vrf));
+	vrf->vrf_id = vrf_id;
+	LIST_INIT(&vrf->ifnlist);
+	vrf->total_ifa_count = 0;
+	vrf->refcount = 0;
+	/* now also setup table ids */
+	SCTP_INIT_VRF_TABLEID(vrf);
+	/* Init the HASH of addresses */
+	vrf->vrf_addr_hash = SCTP_HASH_INIT(SCTP_VRF_ADDR_HASH_SIZE,
+	    &vrf->vrf_addr_hashmark);
+	if (vrf->vrf_addr_hash == NULL) {
+		/* No memory */
+#ifdef INVARIANTS
+		panic("No memory for VRF:%d", vrf_id);
+#endif
+		SCTP_FREE(vrf, SCTP_M_VRF);
+		return (NULL);
+	}
+	/* Add it to the hash table */
+	bucket = &sctppcbinfo.sctp_vrfhash[(vrf_id & sctppcbinfo.hashvrfmark)];
+	LIST_INSERT_HEAD(bucket, vrf, next_vrf);
+	atomic_add_int(&sctppcbinfo.ipi_count_vrfs, 1);
+	return (vrf);
+}
+
+
+struct sctp_ifn *
+sctp_find_ifn(void *ifn, uint32_t ifn_index)
+{
+	struct sctp_ifn *sctp_ifnp;
+	struct sctp_ifnlist *hash_ifn_head;
+
+	/*
+	 * We assume the lock is held for the addresses if thats wrong
+	 * problems could occur :-)
+	 */
+	hash_ifn_head = &sctppcbinfo.vrf_ifn_hash[(ifn_index & sctppcbinfo.vrf_ifn_hashmark)];
+	LIST_FOREACH(sctp_ifnp, hash_ifn_head, next_bucket) {
+		if (sctp_ifnp->ifn_index == ifn_index) {
+			return (sctp_ifnp);
+		}
+		if (sctp_ifnp->ifn_p && ifn && (sctp_ifnp->ifn_p == ifn)) {
+			return (sctp_ifnp);
+		}
+	}
+	return (NULL);
+}
+
+
+
+struct sctp_vrf *
+sctp_find_vrf(uint32_t vrf_id)
+{
+	struct sctp_vrflist *bucket;
+	struct sctp_vrf *liste;
+
+	bucket = &sctppcbinfo.sctp_vrfhash[(vrf_id & sctppcbinfo.hashvrfmark)];
+	LIST_FOREACH(liste, bucket, next_vrf) {
+		if (vrf_id == liste->vrf_id) {
+			return (liste);
+		}
+	}
+	return (NULL);
+}
+
+void
+sctp_free_vrf(struct sctp_vrf *vrf)
+{
+	int ret;
+
+	ret = atomic_fetchadd_int(&vrf->refcount, -1);
+	if (ret == 1) {
+		/* We zero'd the count */
+		LIST_REMOVE(vrf, next_vrf);
+		SCTP_FREE(vrf, SCTP_M_VRF);
+		atomic_subtract_int(&sctppcbinfo.ipi_count_vrfs, 1);
+	}
+}
+
+void
+sctp_free_ifn(struct sctp_ifn *sctp_ifnp)
+{
+	int ret;
+
+	ret = atomic_fetchadd_int(&sctp_ifnp->refcount, -1);
+	if (ret == 1) {
+		/* We zero'd the count */
+		if (sctp_ifnp->vrf) {
+			sctp_free_vrf(sctp_ifnp->vrf);
+		}
+		SCTP_FREE(sctp_ifnp, SCTP_M_IFN);
+		atomic_subtract_int(&sctppcbinfo.ipi_count_ifns, 1);
+	}
+}
+
+void
+sctp_update_ifn_mtu(uint32_t ifn_index, uint32_t mtu)
+{
+	struct sctp_ifn *sctp_ifnp;
+
+	sctp_ifnp = sctp_find_ifn((void *)NULL, ifn_index);
+	if (sctp_ifnp != NULL) {
+		sctp_ifnp->ifn_mtu = mtu;
+	}
+}
+
+
+void
+sctp_free_ifa(struct sctp_ifa *sctp_ifap)
+{
+	int ret;
+
+	ret = atomic_fetchadd_int(&sctp_ifap->refcount, -1);
+	if (ret == 1) {
+		/* We zero'd the count */
+		if (sctp_ifap->ifn_p) {
+			sctp_free_ifn(sctp_ifap->ifn_p);
+		}
+		SCTP_FREE(sctp_ifap, SCTP_M_IFA);
+		atomic_subtract_int(&sctppcbinfo.ipi_count_ifas, 1);
+	}
+}
+
+static void
+sctp_delete_ifn(struct sctp_ifn *sctp_ifnp, int hold_addr_lock)
+{
+	struct sctp_ifn *found;
+
+	found = sctp_find_ifn(sctp_ifnp->ifn_p, sctp_ifnp->ifn_index);
+	if (found == NULL) {
+		/* Not in the list.. sorry */
+		return;
+	}
+	if (hold_addr_lock == 0)
+		SCTP_IPI_ADDR_WLOCK();
+	LIST_REMOVE(sctp_ifnp, next_bucket);
+	LIST_REMOVE(sctp_ifnp, next_ifn);
+	SCTP_DEREGISTER_INTERFACE(sctp_ifnp->ifn_index,
+	    sctp_ifnp->registered_af);
+	if (hold_addr_lock == 0)
+		SCTP_IPI_ADDR_WUNLOCK();
+	/* Take away the reference, and possibly free it */
+	sctp_free_ifn(sctp_ifnp);
+}
+
+void
+sctp_mark_ifa_addr_down(uint32_t vrf_id, struct sockaddr *addr,
+    const char *if_name, uint32_t ifn_index)
+{
+	struct sctp_vrf *vrf;
+	struct sctp_ifa *sctp_ifap = NULL;
+
+	SCTP_IPI_ADDR_RLOCK();
+	vrf = sctp_find_vrf(vrf_id);
+	if (vrf == NULL) {
+		SCTPDBG(SCTP_DEBUG_PCB4, "Can't find vrf_id 0x%x\n", vrf_id);
+		goto out;
+
+	}
+	sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, SCTP_ADDR_LOCKED);
+	if (sctp_ifap == NULL) {
+		SCTPDBG(SCTP_DEBUG_PCB4, "Can't find sctp_ifap for address\n");
+		goto out;
+	}
+	if (sctp_ifap->ifn_p == NULL) {
+		SCTPDBG(SCTP_DEBUG_PCB4, "IFA has no IFN - can't mark unuseable\n");
+		goto out;
+	}
+	if (if_name) {
+		int len1, len2;
+
+		len1 = strlen(if_name);
+		len2 = strlen(sctp_ifap->ifn_p->ifn_name);
+		if (len1 != len2) {
+			SCTPDBG(SCTP_DEBUG_PCB4, "IFN of ifa names different lenght %d vs %d - ignored\n",
+			    len1, len2);
+			goto out;
+		}
+		if (strncmp(if_name, sctp_ifap->ifn_p->ifn_name, len1) != 0) {
+			SCTPDBG(SCTP_DEBUG_PCB4, "IFN %s of IFA not the same as %s\n",
+			    sctp_ifap->ifn_p->ifn_name,
+			    if_name);
+			goto out;
+		}
+	} else {
+		if (sctp_ifap->ifn_p->ifn_index != ifn_index) {
+			SCTPDBG(SCTP_DEBUG_PCB4, "IFA owned by ifn_index:%d down command for ifn_index:%d - ignored\n",
+			    sctp_ifap->ifn_p->ifn_index, ifn_index);
+			goto out;
+		}
+	}
+
+	sctp_ifap->localifa_flags &= (~SCTP_ADDR_VALID);
+	sctp_ifap->localifa_flags |= SCTP_ADDR_IFA_UNUSEABLE;
+out:
+	SCTP_IPI_ADDR_RUNLOCK();
+}
+
+void
+sctp_mark_ifa_addr_up(uint32_t vrf_id, struct sockaddr *addr,
+    const char *if_name, uint32_t ifn_index)
+{
+	struct sctp_vrf *vrf;
+	struct sctp_ifa *sctp_ifap = NULL;
+
+	SCTP_IPI_ADDR_RLOCK();
+	vrf = sctp_find_vrf(vrf_id);
+	if (vrf == NULL) {
+		SCTPDBG(SCTP_DEBUG_PCB4, "Can't find vrf_id 0x%x\n", vrf_id);
+		goto out;
+
+	}
+	sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, SCTP_ADDR_LOCKED);
+	if (sctp_ifap == NULL) {
+		SCTPDBG(SCTP_DEBUG_PCB4, "Can't find sctp_ifap for address\n");
+		goto out;
+	}
+	if (sctp_ifap->ifn_p == NULL) {
+		SCTPDBG(SCTP_DEBUG_PCB4, "IFA has no IFN - can't mark unuseable\n");
+		goto out;
+	}
+	if (if_name) {
+		int len1, len2;
+
+		len1 = strlen(if_name);
+		len2 = strlen(sctp_ifap->ifn_p->ifn_name);
+		if (len1 != len2) {
+			SCTPDBG(SCTP_DEBUG_PCB4, "IFN of ifa names different lenght %d vs %d - ignored\n",
+			    len1, len2);
+			goto out;
+		}
+		if (strncmp(if_name, sctp_ifap->ifn_p->ifn_name, len1) != 0) {
+			SCTPDBG(SCTP_DEBUG_PCB4, "IFN %s of IFA not the same as %s\n",
+			    sctp_ifap->ifn_p->ifn_name,
+			    if_name);
+			goto out;
+		}
+	} else {
+		if (sctp_ifap->ifn_p->ifn_index != ifn_index) {
+			SCTPDBG(SCTP_DEBUG_PCB4, "IFA owned by ifn_index:%d down command for ifn_index:%d - ignored\n",
+			    sctp_ifap->ifn_p->ifn_index, ifn_index);
+			goto out;
+		}
+	}
+
+	sctp_ifap->localifa_flags &= (~SCTP_ADDR_IFA_UNUSEABLE);
+	sctp_ifap->localifa_flags |= SCTP_ADDR_VALID;
+out:
+	SCTP_IPI_ADDR_RUNLOCK();
+}
+
+/*-
+ * Add an ifa to an ifn.
+ * Register the interface as necessary.
+ * NOTE: ADDR write lock MUST be held.
+ */
+static void
+sctp_add_ifa_to_ifn(struct sctp_ifn *sctp_ifnp, struct sctp_ifa *sctp_ifap)
+{
+	int ifa_af;
+
+	LIST_INSERT_HEAD(&sctp_ifnp->ifalist, sctp_ifap, next_ifa);
+	sctp_ifap->ifn_p = sctp_ifnp;
+	atomic_add_int(&sctp_ifap->ifn_p->refcount, 1);
+	/* update address counts */
+	sctp_ifnp->ifa_count++;
+	ifa_af = sctp_ifap->address.sa.sa_family;
+	if (ifa_af == AF_INET)
+		sctp_ifnp->num_v4++;
+	else
+		sctp_ifnp->num_v6++;
+	if (sctp_ifnp->ifa_count == 1) {
+		/* register the new interface */
+		SCTP_REGISTER_INTERFACE(sctp_ifnp->ifn_index, ifa_af);
+		sctp_ifnp->registered_af = ifa_af;
+	}
+}
+
+/*-
+ * Remove an ifa from its ifn.
+ * If no more addresses exist, remove the ifn too. Otherwise, re-register
+ * the interface based on the remaining address families left.
+ * NOTE: ADDR write lock MUST be held.
+ */
+static void
+sctp_remove_ifa_from_ifn(struct sctp_ifa *sctp_ifap)
+{
+	uint32_t ifn_index;
+
+	LIST_REMOVE(sctp_ifap, next_ifa);
+	if (sctp_ifap->ifn_p) {
+		/* update address counts */
+		sctp_ifap->ifn_p->ifa_count--;
+		if (sctp_ifap->address.sa.sa_family == AF_INET6)
+			sctp_ifap->ifn_p->num_v6--;
+		else if (sctp_ifap->address.sa.sa_family == AF_INET)
+			sctp_ifap->ifn_p->num_v4--;
+
+		ifn_index = sctp_ifap->ifn_p->ifn_index;
+		if (SCTP_LIST_EMPTY(&sctp_ifap->ifn_p->ifalist)) {
+			/* remove the ifn, possibly freeing it */
+			sctp_delete_ifn(sctp_ifap->ifn_p, SCTP_ADDR_LOCKED);
+		} else {
+			/* re-register address family type, if needed */
+			if ((sctp_ifap->ifn_p->num_v6 == 0) &&
+			    (sctp_ifap->ifn_p->registered_af == AF_INET6)) {
+				SCTP_DEREGISTER_INTERFACE(ifn_index, AF_INET6);
+				SCTP_REGISTER_INTERFACE(ifn_index, AF_INET);
+				sctp_ifap->ifn_p->registered_af = AF_INET;
+			} else if ((sctp_ifap->ifn_p->num_v4 == 0) &&
+			    (sctp_ifap->ifn_p->registered_af == AF_INET)) {
+				SCTP_DEREGISTER_INTERFACE(ifn_index, AF_INET);
+				SCTP_REGISTER_INTERFACE(ifn_index, AF_INET6);
+				sctp_ifap->ifn_p->registered_af = AF_INET6;
+			}
+			/* free the ifn refcount */
+			sctp_free_ifn(sctp_ifap->ifn_p);
+		}
+		sctp_ifap->ifn_p = NULL;
+	}
+}
+
+struct sctp_ifa *
+sctp_add_addr_to_vrf(uint32_t vrf_id, void *ifn, uint32_t ifn_index,
+    uint32_t ifn_type, const char *if_name, void *ifa,
+    struct sockaddr *addr, uint32_t ifa_flags,
+    int dynamic_add)
+{
+	struct sctp_vrf *vrf;
+	struct sctp_ifn *sctp_ifnp = NULL;
+	struct sctp_ifa *sctp_ifap = NULL;
+	struct sctp_ifalist *hash_addr_head;
+	struct sctp_ifnlist *hash_ifn_head;
+	uint32_t hash_of_addr;
+	int new_ifn_af = 0;
+
+#ifdef SCTP_DEBUG
+	SCTPDBG(SCTP_DEBUG_PCB4, "vrf_id 0x%x: adding address: ", vrf_id);
+	SCTPDBG_ADDR(SCTP_DEBUG_PCB4, addr);
+#endif
+	SCTP_IPI_ADDR_WLOCK();
+	sctp_ifnp = sctp_find_ifn(ifn, ifn_index);
+	if (sctp_ifnp) {
+		vrf = sctp_ifnp->vrf;
+	} else {
+		vrf = sctp_find_vrf(vrf_id);
+		if (vrf == NULL) {
+			vrf = sctp_allocate_vrf(vrf_id);
+			if (vrf == NULL) {
+				SCTP_IPI_ADDR_WUNLOCK();
+				return (NULL);
+			}
+		}
+	}
+	if (sctp_ifnp == NULL) {
+		/*
+		 * build one and add it, can't hold lock until after malloc
+		 * done though.
+		 */
+		SCTP_IPI_ADDR_WUNLOCK();
+		SCTP_MALLOC(sctp_ifnp, struct sctp_ifn *,
+		    sizeof(struct sctp_ifn), SCTP_M_IFN);
+		if (sctp_ifnp == NULL) {
+#ifdef INVARIANTS
+			panic("No memory for IFN:%u", sctp_ifnp->ifn_index);
+#endif
+			return (NULL);
+		}
+		memset(sctp_ifnp, 0, sizeof(struct sctp_ifn));
+		sctp_ifnp->ifn_index = ifn_index;
+		sctp_ifnp->ifn_p = ifn;
+		sctp_ifnp->ifn_type = ifn_type;
+		sctp_ifnp->refcount = 0;
+		sctp_ifnp->vrf = vrf;
+		atomic_add_int(&vrf->refcount, 1);
+		sctp_ifnp->ifn_mtu = SCTP_GATHER_MTU_FROM_IFN_INFO(ifn, ifn_index, addr->sa_family);
+		if (if_name != NULL) {
+			memcpy(sctp_ifnp->ifn_name, if_name, SCTP_IFNAMSIZ);
+		} else {
+			memcpy(sctp_ifnp->ifn_name, "unknown", min(7, SCTP_IFNAMSIZ));
+		}
+		hash_ifn_head = &sctppcbinfo.vrf_ifn_hash[(ifn_index & sctppcbinfo.vrf_ifn_hashmark)];
+		LIST_INIT(&sctp_ifnp->ifalist);
+		SCTP_IPI_ADDR_WLOCK();
+		LIST_INSERT_HEAD(hash_ifn_head, sctp_ifnp, next_bucket);
+		LIST_INSERT_HEAD(&vrf->ifnlist, sctp_ifnp, next_ifn);
+		atomic_add_int(&sctppcbinfo.ipi_count_ifns, 1);
+		new_ifn_af = 1;
+	}
+	sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, SCTP_ADDR_LOCKED);
+	if (sctp_ifap) {
+		/* Hmm, it already exists? */
+		if ((sctp_ifap->ifn_p) &&
+		    (sctp_ifap->ifn_p->ifn_index == ifn_index)) {
+			SCTPDBG(SCTP_DEBUG_PCB4, "Using existing ifn %s (0x%x) for ifa %p\n",
+			    sctp_ifap->ifn_p->ifn_name, ifn_index,
+			    sctp_ifap);
+			if (new_ifn_af) {
+				/* Remove the created one that we don't want */
+				sctp_delete_ifn(sctp_ifnp, SCTP_ADDR_LOCKED);
+			}
+			if (sctp_ifap->localifa_flags & SCTP_BEING_DELETED) {
+				/* easy to solve, just switch back to active */
+				SCTPDBG(SCTP_DEBUG_PCB4, "Clearing deleted ifa flag\n");
+				sctp_ifap->localifa_flags = SCTP_ADDR_VALID;
+				sctp_ifap->ifn_p = sctp_ifnp;
+				atomic_add_int(&sctp_ifap->ifn_p->refcount, 1);
+			}
+	exit_stage_left:
+			SCTP_IPI_ADDR_WUNLOCK();
+			return (sctp_ifap);
+		} else {
+			if (sctp_ifap->ifn_p) {
+				/*
+				 * The last IFN gets the address, removee
+				 * the old one
+				 */
+				SCTPDBG(SCTP_DEBUG_PCB4, "Moving ifa %p from %s (0x%x) to %s (0x%x)\n",
+				    sctp_ifap, sctp_ifap->ifn_p->ifn_name,
+				    sctp_ifap->ifn_p->ifn_index, if_name,
+				    ifn_index);
+				/* remove the address from the old ifn */
+				sctp_remove_ifa_from_ifn(sctp_ifap);
+				/* move the address over to the new ifn */
+				sctp_add_ifa_to_ifn(sctp_ifnp, sctp_ifap);
+				goto exit_stage_left;
+			} else {
+				/* repair ifnp which was NULL ? */
+				sctp_ifap->localifa_flags = SCTP_ADDR_VALID;
+				SCTPDBG(SCTP_DEBUG_PCB4, "Repairing ifn %p for ifa %p\n",
+				    sctp_ifnp, sctp_ifap);
+				sctp_add_ifa_to_ifn(sctp_ifnp, sctp_ifap);
+			}
+			goto exit_stage_left;
+		}
+	}
+	SCTP_IPI_ADDR_WUNLOCK();
+	SCTP_MALLOC(sctp_ifap, struct sctp_ifa *, sizeof(struct sctp_ifa), SCTP_M_IFA);
+	if (sctp_ifap == NULL) {
+#ifdef INVARIANTS
+		panic("No memory for IFA");
+#endif
+		return (NULL);
+	}
+	memset(sctp_ifap, 0, sizeof(struct sctp_ifa));
+	sctp_ifap->ifn_p = sctp_ifnp;
+	atomic_add_int(&sctp_ifnp->refcount, 1);
+	sctp_ifap->vrf_id = vrf_id;
+	sctp_ifap->ifa = ifa;
+	memcpy(&sctp_ifap->address, addr, addr->sa_len);
+	sctp_ifap->localifa_flags = SCTP_ADDR_VALID | SCTP_ADDR_DEFER_USE;
+	sctp_ifap->flags = ifa_flags;
+	/* Set scope */
+	if (sctp_ifap->address.sa.sa_family == AF_INET) {
+		struct sockaddr_in *sin;
+
+		sin = (struct sockaddr_in *)&sctp_ifap->address.sin;
+		if (SCTP_IFN_IS_IFT_LOOP(sctp_ifap->ifn_p) ||
+		    (IN4_ISLOOPBACK_ADDRESS(&sin->sin_addr))) {
+			sctp_ifap->src_is_loop = 1;
+		}
+		if ((IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) {
+			sctp_ifap->src_is_priv = 1;
+		}
+		sctp_ifnp->num_v4++;
+		if (new_ifn_af)
+			new_ifn_af = AF_INET;
+	} else if (sctp_ifap->address.sa.sa_family == AF_INET6) {
+		/* ok to use deprecated addresses? */
+		struct sockaddr_in6 *sin6;
+
+		sin6 = (struct sockaddr_in6 *)&sctp_ifap->address.sin6;
+		if (SCTP_IFN_IS_IFT_LOOP(sctp_ifap->ifn_p) ||
+		    (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr))) {
+			sctp_ifap->src_is_loop = 1;
+		}
+		if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+			sctp_ifap->src_is_priv = 1;
+		}
+		sctp_ifnp->num_v6++;
+		if (new_ifn_af)
+			new_ifn_af = AF_INET6;
+	} else {
+		new_ifn_af = 0;
+	}
+	hash_of_addr = sctp_get_ifa_hash_val(&sctp_ifap->address.sa);
+
+	if ((sctp_ifap->src_is_priv == 0) &&
+	    (sctp_ifap->src_is_loop == 0)) {
+		sctp_ifap->src_is_glob = 1;
+	}
+	SCTP_IPI_ADDR_WLOCK();
+	hash_addr_head = &vrf->vrf_addr_hash[(hash_of_addr & vrf->vrf_addr_hashmark)];
+	LIST_INSERT_HEAD(hash_addr_head, sctp_ifap, next_bucket);
+	sctp_ifap->refcount = 1;
+	LIST_INSERT_HEAD(&sctp_ifnp->ifalist, sctp_ifap, next_ifa);
+	sctp_ifnp->ifa_count++;
+	vrf->total_ifa_count++;
+	atomic_add_int(&sctppcbinfo.ipi_count_ifas, 1);
+	if (new_ifn_af) {
+		SCTP_REGISTER_INTERFACE(ifn_index, new_ifn_af);
+		sctp_ifnp->registered_af = new_ifn_af;
+	}
+	SCTP_IPI_ADDR_WUNLOCK();
+	if (dynamic_add) {
+		/*
+		 * Bump up the refcount so that when the timer completes it
+		 * will drop back down.
+		 */
+		struct sctp_laddr *wi;
+
+		atomic_add_int(&sctp_ifap->refcount, 1);
+		wi = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_laddr, struct sctp_laddr);
+		if (wi == NULL) {
+			/*
+			 * Gak, what can we do? We have lost an address
+			 * change can you say HOSED?
+			 */
+			SCTPDBG(SCTP_DEBUG_PCB4, "Lost an address change?\n");
+			/* Opps, must decrement the count */
+			sctp_del_addr_from_vrf(vrf_id, addr, ifn_index,
+			    if_name);
+			return (NULL);
+		}
+		SCTP_INCR_LADDR_COUNT();
+		bzero(wi, sizeof(*wi));
+		(void)SCTP_GETTIME_TIMEVAL(&wi->start_time);
+		wi->ifa = sctp_ifap;
+		wi->action = SCTP_ADD_IP_ADDRESS;
+		SCTP_IPI_ITERATOR_WQ_LOCK();
+		/*
+		 * Should this really be a tailq? As it is we will process
+		 * the newest first :-0
+		 */
+		LIST_INSERT_HEAD(&sctppcbinfo.addr_wq, wi, sctp_nxt_addr);
+		SCTP_IPI_ITERATOR_WQ_UNLOCK();
+		sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ,
+		    (struct sctp_inpcb *)NULL,
+		    (struct sctp_tcb *)NULL,
+		    (struct sctp_nets *)NULL);
+	} else {
+		/* it's ready for use */
+		sctp_ifap->localifa_flags &= ~SCTP_ADDR_DEFER_USE;
+	}
+	return (sctp_ifap);
+}
+
+void
+sctp_del_addr_from_vrf(uint32_t vrf_id, struct sockaddr *addr,
+    uint32_t ifn_index, const char *if_name)
+{
+	struct sctp_vrf *vrf;
+	struct sctp_ifa *sctp_ifap = NULL;
+
+	SCTP_IPI_ADDR_WLOCK();
+	vrf = sctp_find_vrf(vrf_id);
+	if (vrf == NULL) {
+		SCTPDBG(SCTP_DEBUG_PCB4, "Can't find vrf_id 0x%x\n", vrf_id);
+		goto out_now;
+	}
+#ifdef SCTP_DEBUG
+	SCTPDBG(SCTP_DEBUG_PCB4, "vrf_id 0x%x: deleting address:", vrf_id);
+	SCTPDBG_ADDR(SCTP_DEBUG_PCB4, addr);
+#endif
+	sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, SCTP_ADDR_LOCKED);
+	if (sctp_ifap) {
+		/* Validate the delete */
+		if (sctp_ifap->ifn_p) {
+			int valid = 0;
+
+			/*-
+			 * The name has priority over the ifn_index
+			 * if its given. We do this especially for
+			 * panda who might recycle indexes fast.
+			 */
+			if (if_name) {
+				int len1, len2;
+
+				len1 = min(SCTP_IFNAMSIZ, strlen(if_name));
+				len2 = min(SCTP_IFNAMSIZ, strlen(sctp_ifap->ifn_p->ifn_name));
+				if (len1 && len2 && (len1 == len2)) {
+					/* we can compare them */
+					if (strncmp(if_name, sctp_ifap->ifn_p->ifn_name, len1) == 0) {
+						/*
+						 * They match its a correct
+						 * delete
+						 */
+						valid = 1;
+					}
+				}
+			}
+			if (!valid) {
+				/* last ditch check ifn_index */
+				if (ifn_index == sctp_ifap->ifn_p->ifn_index) {
+					valid = 1;
+				}
+			}
+			if (!valid) {
+				SCTPDBG(SCTP_DEBUG_PCB4, "ifn:%d ifname:%s does not match addresses\n",
+				    ifn_index, ((if_name == NULL) ? "NULL" : if_name));
+				SCTPDBG(SCTP_DEBUG_PCB4, "ifn:%d ifname:%s - ignoring delete\n",
+				    sctp_ifap->ifn_p->ifn_index, sctp_ifap->ifn_p->ifn_name);
+				SCTP_IPI_ADDR_WUNLOCK();
+				return;
+			}
+		}
+		SCTPDBG(SCTP_DEBUG_PCB4, "Deleting ifa %p\n", sctp_ifap);
+		sctp_ifap->localifa_flags &= SCTP_ADDR_VALID;
+		sctp_ifap->localifa_flags |= SCTP_BEING_DELETED;
+		vrf->total_ifa_count--;
+		LIST_REMOVE(sctp_ifap, next_bucket);
+		sctp_remove_ifa_from_ifn(sctp_ifap);
+	}
+#ifdef SCTP_DEBUG
+	else {
+		SCTPDBG(SCTP_DEBUG_PCB4, "Del Addr-ifn:%d Could not find address:",
+		    ifn_index);
+		SCTPDBG_ADDR(SCTP_DEBUG_PCB1, addr);
+	}
+#endif
+
+out_now:
+	SCTP_IPI_ADDR_WUNLOCK();
+	if (sctp_ifap) {
+		struct sctp_laddr *wi;
+
+		wi = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_laddr, struct sctp_laddr);
+		if (wi == NULL) {
+			/*
+			 * Gak, what can we do? We have lost an address
+			 * change can you say HOSED?
+			 */
+			SCTPDBG(SCTP_DEBUG_PCB4, "Lost an address change?\n");
+
+			/* Oops, must decrement the count */
+			sctp_free_ifa(sctp_ifap);
+			return;
+		}
+		SCTP_INCR_LADDR_COUNT();
+		bzero(wi, sizeof(*wi));
+		(void)SCTP_GETTIME_TIMEVAL(&wi->start_time);
+		wi->ifa = sctp_ifap;
+		wi->action = SCTP_DEL_IP_ADDRESS;
+		SCTP_IPI_ITERATOR_WQ_LOCK();
+		/*
+		 * Should this really be a tailq? As it is we will process
+		 * the newest first :-0
+		 */
+		LIST_INSERT_HEAD(&sctppcbinfo.addr_wq, wi, sctp_nxt_addr);
+		SCTP_IPI_ITERATOR_WQ_UNLOCK();
+
+		sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ,
+		    (struct sctp_inpcb *)NULL,
+		    (struct sctp_tcb *)NULL,
+		    (struct sctp_nets *)NULL);
+	}
+	return;
+}
+
+
+static struct sctp_tcb *
+sctp_tcb_special_locate(struct sctp_inpcb **inp_p, struct sockaddr *from,
+    struct sockaddr *to, struct sctp_nets **netp, uint32_t vrf_id)
+{
+	/**** ASSUMES THE CALLER holds the INP_INFO_RLOCK */
+	/*
+	 * If we support the TCP model, then we must now dig through to see
+	 * if we can find our endpoint in the list of tcp ep's.
+	 */
+	uint16_t lport, rport;
+	struct sctppcbhead *ephead;
+	struct sctp_inpcb *inp;
+	struct sctp_laddr *laddr;
+	struct sctp_tcb *stcb;
+	struct sctp_nets *net;
+
+	if ((to == NULL) || (from == NULL)) {
+		return (NULL);
+	}
+	if (to->sa_family == AF_INET && from->sa_family == AF_INET) {
+		lport = ((struct sockaddr_in *)to)->sin_port;
+		rport = ((struct sockaddr_in *)from)->sin_port;
+	} else if (to->sa_family == AF_INET6 && from->sa_family == AF_INET6) {
+		lport = ((struct sockaddr_in6 *)to)->sin6_port;
+		rport = ((struct sockaddr_in6 *)from)->sin6_port;
+	} else {
+		return NULL;
+	}
+	ephead = &sctppcbinfo.sctp_tcpephash[SCTP_PCBHASH_ALLADDR(
+	    (lport + rport), sctppcbinfo.hashtcpmark)];
+	/*
+	 * Ok now for each of the guys in this bucket we must look and see:
+	 * - Does the remote port match. - Does there single association's
+	 * addresses match this address (to). If so we update p_ep to point
+	 * to this ep and return the tcb from it.
+	 */
+	LIST_FOREACH(inp, ephead, sctp_hash) {
+		SCTP_INP_RLOCK(inp);
+		if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+			SCTP_INP_RUNLOCK(inp);
+			continue;
+		}
+		if (lport != inp->sctp_lport) {
+			SCTP_INP_RUNLOCK(inp);
+			continue;
+		}
+		if (inp->def_vrf_id != vrf_id) {
+			SCTP_INP_RUNLOCK(inp);
+			continue;
+		}
+		/* check to see if the ep has one of the addresses */
+		if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) {
+			/* We are NOT bound all, so look further */
+			int match = 0;
+
+			LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+
+				if (laddr->ifa == NULL) {
+					SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n", __FUNCTION__);
+					continue;
+				}
+				if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) {
+					SCTPDBG(SCTP_DEBUG_PCB1, "ifa being deleted\n");
+					continue;
+				}
+				if (laddr->ifa->address.sa.sa_family ==
+				    to->sa_family) {
+					/* see if it matches */
+					struct sockaddr_in *intf_addr, *sin;
+
+					intf_addr = &laddr->ifa->address.sin;
+					sin = (struct sockaddr_in *)to;
+					if (from->sa_family == AF_INET) {
+						if (sin->sin_addr.s_addr ==
+						    intf_addr->sin_addr.s_addr) {
+							match = 1;
+							break;
+						}
+					} else {
+						struct sockaddr_in6 *intf_addr6;
+						struct sockaddr_in6 *sin6;
+
+						sin6 = (struct sockaddr_in6 *)
+						    to;
+						intf_addr6 = &laddr->ifa->address.sin6;
+
+						if (SCTP6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
+						    &intf_addr6->sin6_addr)) {
+							match = 1;
+							break;
+						}
+					}
+				}
+			}
+			if (match == 0) {
+				/* This endpoint does not have this address */
+				SCTP_INP_RUNLOCK(inp);
+				continue;
+			}
+		}
+		/*
+		 * Ok if we hit here the ep has the address, does it hold
+		 * the tcb?
+		 */
+
+		stcb = LIST_FIRST(&inp->sctp_asoc_list);
+		if (stcb == NULL) {
+			SCTP_INP_RUNLOCK(inp);
+			continue;
+		}
+		SCTP_TCB_LOCK(stcb);
+		if (stcb->rport != rport) {
+			/* remote port does not match. */
+			SCTP_TCB_UNLOCK(stcb);
+			SCTP_INP_RUNLOCK(inp);
+			continue;
+		}
+		if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+			SCTP_TCB_UNLOCK(stcb);
+			SCTP_INP_RUNLOCK(inp);
+			continue;
+		}
+		/* Does this TCB have a matching address? */
+		TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+
+			if (net->ro._l_addr.sa.sa_family != from->sa_family) {
+				/* not the same family, can't be a match */
+				continue;
+			}
+			if (from->sa_family == AF_INET) {
+				struct sockaddr_in *sin, *rsin;
+
+				sin = (struct sockaddr_in *)&net->ro._l_addr;
+				rsin = (struct sockaddr_in *)from;
+				if (sin->sin_addr.s_addr ==
+				    rsin->sin_addr.s_addr) {
+					/* found it */
+					if (netp != NULL) {
+						*netp = net;
+					}
+					/* Update the endpoint pointer */
+					*inp_p = inp;
+					SCTP_INP_RUNLOCK(inp);
+					return (stcb);
+				}
+			} else {
+				struct sockaddr_in6 *sin6, *rsin6;
+
+				sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
+				rsin6 = (struct sockaddr_in6 *)from;
+				if (SCTP6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
+				    &rsin6->sin6_addr)) {
+					/* found it */
+					if (netp != NULL) {
+						*netp = net;
+					}
+					/* Update the endpoint pointer */
+					*inp_p = inp;
+					SCTP_INP_RUNLOCK(inp);
+					return (stcb);
+				}
+			}
+		}
+		SCTP_TCB_UNLOCK(stcb);
+		SCTP_INP_RUNLOCK(inp);
+	}
+	return (NULL);
+}
+
+/*
+ * rules for use
+ *
+ * 1) If I return a NULL you must decrement any INP ref cnt. 2) If I find an
+ * stcb, both will be locked (locked_tcb and stcb) but decrement will be done
+ * (if locked == NULL). 3) Decrement happens on return ONLY if locked ==
+ * NULL.
+ */
+
+struct sctp_tcb *
+sctp_findassociation_ep_addr(struct sctp_inpcb **inp_p, struct sockaddr *remote,
+    struct sctp_nets **netp, struct sockaddr *local, struct sctp_tcb *locked_tcb)
+{
+	struct sctpasochead *head;
+	struct sctp_inpcb *inp;
+	struct sctp_tcb *stcb = NULL;
+	struct sctp_nets *net;
+	uint16_t rport;
+
+	inp = *inp_p;
+	if (remote->sa_family == AF_INET) {
+		rport = (((struct sockaddr_in *)remote)->sin_port);
+	} else if (remote->sa_family == AF_INET6) {
+		rport = (((struct sockaddr_in6 *)remote)->sin6_port);
+	} else {
+		return (NULL);
+	}
+	if (locked_tcb) {
+		/*
+		 * UN-lock so we can do proper locking here this occurs when
+		 * called from load_addresses_from_init.
+		 */
+		atomic_add_int(&locked_tcb->asoc.refcnt, 1);
+		SCTP_TCB_UNLOCK(locked_tcb);
+	}
+	SCTP_INP_INFO_RLOCK();
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) {
+		/*-
+		 * Now either this guy is our listener or it's the
+		 * connector. If it is the one that issued the connect, then
+		 * it's only chance is to be the first TCB in the list. If
+		 * it is the acceptor, then do the special_lookup to hash
+		 * and find the real inp.
+		 */
+		if ((inp->sctp_socket) && (inp->sctp_socket->so_qlimit)) {
+			/* to is peer addr, from is my addr */
+			stcb = sctp_tcb_special_locate(inp_p, remote, local,
+			    netp, inp->def_vrf_id);
+			if ((stcb != NULL) && (locked_tcb == NULL)) {
+				/* we have a locked tcb, lower refcount */
+				SCTP_INP_DECR_REF(inp);
+			}
+			if ((locked_tcb != NULL) && (locked_tcb != stcb)) {
+				SCTP_INP_RLOCK(locked_tcb->sctp_ep);
+				SCTP_TCB_LOCK(locked_tcb);
+				atomic_subtract_int(&locked_tcb->asoc.refcnt, 1);
+				SCTP_INP_RUNLOCK(locked_tcb->sctp_ep);
+			}
+			SCTP_INP_INFO_RUNLOCK();
+			return (stcb);
+		} else {
+			SCTP_INP_WLOCK(inp);
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+				goto null_return;
+			}
+			stcb = LIST_FIRST(&inp->sctp_asoc_list);
+			if (stcb == NULL) {
+				goto null_return;
+			}
+			SCTP_TCB_LOCK(stcb);
+
+			if (stcb->rport != rport) {
+				/* remote port does not match. */
+				SCTP_TCB_UNLOCK(stcb);
+				goto null_return;
+			}
+			if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+				SCTP_TCB_UNLOCK(stcb);
+				goto null_return;
+			}
+			/* now look at the list of remote addresses */
+			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+#ifdef INVARIANTS
+				if (net == (TAILQ_NEXT(net, sctp_next))) {
+					panic("Corrupt net list");
+				}
+#endif
+				if (net->ro._l_addr.sa.sa_family !=
+				    remote->sa_family) {
+					/* not the same family */
+					continue;
+				}
+				if (remote->sa_family == AF_INET) {
+					struct sockaddr_in *sin, *rsin;
+
+					sin = (struct sockaddr_in *)
+					    &net->ro._l_addr;
+					rsin = (struct sockaddr_in *)remote;
+					if (sin->sin_addr.s_addr ==
+					    rsin->sin_addr.s_addr) {
+						/* found it */
+						if (netp != NULL) {
+							*netp = net;
+						}
+						if (locked_tcb == NULL) {
+							SCTP_INP_DECR_REF(inp);
+						} else if (locked_tcb != stcb) {
+							SCTP_TCB_LOCK(locked_tcb);
+						}
+						if (locked_tcb) {
+							atomic_subtract_int(&locked_tcb->asoc.refcnt, 1);
+						}
+						SCTP_INP_WUNLOCK(inp);
+						SCTP_INP_INFO_RUNLOCK();
+						return (stcb);
+					}
+				} else if (remote->sa_family == AF_INET6) {
+					struct sockaddr_in6 *sin6, *rsin6;
+
+					sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
+					rsin6 = (struct sockaddr_in6 *)remote;
+					if (SCTP6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
+					    &rsin6->sin6_addr)) {
+						/* found it */
+						if (netp != NULL) {
+							*netp = net;
+						}
+						if (locked_tcb == NULL) {
+							SCTP_INP_DECR_REF(inp);
+						} else if (locked_tcb != stcb) {
+							SCTP_TCB_LOCK(locked_tcb);
+						}
+						if (locked_tcb) {
+							atomic_subtract_int(&locked_tcb->asoc.refcnt, 1);
+						}
+						SCTP_INP_WUNLOCK(inp);
+						SCTP_INP_INFO_RUNLOCK();
+						return (stcb);
+					}
+				}
+			}
+			SCTP_TCB_UNLOCK(stcb);
+		}
+	} else {
+		SCTP_INP_WLOCK(inp);
+		if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+			goto null_return;
+		}
+		head = &inp->sctp_tcbhash[SCTP_PCBHASH_ALLADDR(rport,
+		    inp->sctp_hashmark)];
+		if (head == NULL) {
+			goto null_return;
+		}
+		LIST_FOREACH(stcb, head, sctp_tcbhash) {
+			if (stcb->rport != rport) {
+				/* remote port does not match */
+				continue;
+			}
+			SCTP_TCB_LOCK(stcb);
+			if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+				SCTP_TCB_UNLOCK(stcb);
+				continue;
+			}
+			/* now look at the list of remote addresses */
+			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+#ifdef INVARIANTS
+				if (net == (TAILQ_NEXT(net, sctp_next))) {
+					panic("Corrupt net list");
+				}
+#endif
+				if (net->ro._l_addr.sa.sa_family !=
+				    remote->sa_family) {
+					/* not the same family */
+					continue;
+				}
+				if (remote->sa_family == AF_INET) {
+					struct sockaddr_in *sin, *rsin;
+
+					sin = (struct sockaddr_in *)
+					    &net->ro._l_addr;
+					rsin = (struct sockaddr_in *)remote;
+					if (sin->sin_addr.s_addr ==
+					    rsin->sin_addr.s_addr) {
+						/* found it */
+						if (netp != NULL) {
+							*netp = net;
+						}
+						if (locked_tcb == NULL) {
+							SCTP_INP_DECR_REF(inp);
+						} else if (locked_tcb != stcb) {
+							SCTP_TCB_LOCK(locked_tcb);
+						}
+						if (locked_tcb) {
+							atomic_subtract_int(&locked_tcb->asoc.refcnt, 1);
+						}
+						SCTP_INP_WUNLOCK(inp);
+						SCTP_INP_INFO_RUNLOCK();
+						return (stcb);
+					}
+				} else if (remote->sa_family == AF_INET6) {
+					struct sockaddr_in6 *sin6, *rsin6;
+
+					sin6 = (struct sockaddr_in6 *)
+					    &net->ro._l_addr;
+					rsin6 = (struct sockaddr_in6 *)remote;
+					if (SCTP6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
+					    &rsin6->sin6_addr)) {
+						/* found it */
+						if (netp != NULL) {
+							*netp = net;
+						}
+						if (locked_tcb == NULL) {
+							SCTP_INP_DECR_REF(inp);
+						} else if (locked_tcb != stcb) {
+							SCTP_TCB_LOCK(locked_tcb);
+						}
+						if (locked_tcb) {
+							atomic_subtract_int(&locked_tcb->asoc.refcnt, 1);
+						}
+						SCTP_INP_WUNLOCK(inp);
+						SCTP_INP_INFO_RUNLOCK();
+						return (stcb);
+					}
+				}
+			}
+			SCTP_TCB_UNLOCK(stcb);
+		}
+	}
+null_return:
+	/* clean up for returning null */
+	if (locked_tcb) {
+		SCTP_TCB_LOCK(locked_tcb);
+		atomic_subtract_int(&locked_tcb->asoc.refcnt, 1);
+	}
+	SCTP_INP_WUNLOCK(inp);
+	SCTP_INP_INFO_RUNLOCK();
+	/* not found */
+	return (NULL);
+}
+
+/*
+ * Find an association for a specific endpoint using the association id given
+ * out in the COMM_UP notification
+ */
+
+struct sctp_tcb *
+sctp_findassociation_ep_asocid(struct sctp_inpcb *inp, sctp_assoc_t asoc_id, int want_lock)
+{
+	/*
+	 * Use my the assoc_id to find a endpoint
+	 */
+	struct sctpasochead *head;
+	struct sctp_tcb *stcb;
+	uint32_t id;
+
+	if (asoc_id == 0 || inp == NULL) {
+		return (NULL);
+	}
+	SCTP_INP_INFO_RLOCK();
+	id = (uint32_t) asoc_id;
+	head = &sctppcbinfo.sctp_asochash[SCTP_PCBHASH_ASOC(id,
+	    sctppcbinfo.hashasocmark)];
+	if (head == NULL) {
+		/* invalid id TSNH */
+		SCTP_INP_INFO_RUNLOCK();
+		return (NULL);
+	}
+	LIST_FOREACH(stcb, head, sctp_asocs) {
+		SCTP_INP_RLOCK(stcb->sctp_ep);
+		if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+			SCTP_INP_RUNLOCK(stcb->sctp_ep);
+			SCTP_INP_INFO_RUNLOCK();
+			return (NULL);
+		}
+		if (stcb->asoc.assoc_id == id) {
+			/* candidate */
+			if (inp != stcb->sctp_ep) {
+				/*
+				 * some other guy has the same id active (id
+				 * collision ??).
+				 */
+				SCTP_INP_RUNLOCK(stcb->sctp_ep);
+				continue;
+			}
+			if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+				continue;
+			}
+			if (want_lock) {
+				SCTP_TCB_LOCK(stcb);
+			}
+			SCTP_INP_RUNLOCK(stcb->sctp_ep);
+			SCTP_INP_INFO_RUNLOCK();
+			return (stcb);
+		}
+		SCTP_INP_RUNLOCK(stcb->sctp_ep);
+	}
+	/* Ok if we missed here, lets try the restart hash */
+	head = &sctppcbinfo.sctp_restarthash[SCTP_PCBHASH_ASOC(id, sctppcbinfo.hashrestartmark)];
+	if (head == NULL) {
+		/* invalid id TSNH */
+		SCTP_INP_INFO_RUNLOCK();
+		return (NULL);
+	}
+	LIST_FOREACH(stcb, head, sctp_tcbrestarhash) {
+		SCTP_INP_RLOCK(stcb->sctp_ep);
+		if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+			SCTP_INP_RUNLOCK(stcb->sctp_ep);
+			continue;
+		}
+		if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+			continue;
+		}
+		if (want_lock) {
+			SCTP_TCB_LOCK(stcb);
+		}
+		if (stcb->asoc.assoc_id == id) {
+			/* candidate */
+			SCTP_INP_RUNLOCK(stcb->sctp_ep);
+			if (inp != stcb->sctp_ep) {
+				/*
+				 * some other guy has the same id active (id
+				 * collision ??).
+				 */
+				if (want_lock) {
+					SCTP_TCB_UNLOCK(stcb);
+				}
+				continue;
+			}
+			SCTP_INP_INFO_RUNLOCK();
+			return (stcb);
+		} else {
+			SCTP_INP_RUNLOCK(stcb->sctp_ep);
+		}
+		if (want_lock) {
+			SCTP_TCB_UNLOCK(stcb);
+		}
+	}
+	SCTP_INP_INFO_RUNLOCK();
+	return (NULL);
+}
+
+
+static struct sctp_inpcb *
+sctp_endpoint_probe(struct sockaddr *nam, struct sctppcbhead *head,
+    uint16_t lport, uint32_t vrf_id)
+{
+	struct sctp_inpcb *inp;
+	struct sockaddr_in *sin;
+	struct sockaddr_in6 *sin6;
+	struct sctp_laddr *laddr;
+	int fnd;
+
+	/*
+	 * Endpoing probe expects that the INP_INFO is locked.
+	 */
+	if (nam->sa_family == AF_INET) {
+		sin = (struct sockaddr_in *)nam;
+		sin6 = NULL;
+	} else if (nam->sa_family == AF_INET6) {
+		sin6 = (struct sockaddr_in6 *)nam;
+		sin = NULL;
+	} else {
+		/* unsupported family */
+		return (NULL);
+	}
+	if (head == NULL)
+		return (NULL);
+	LIST_FOREACH(inp, head, sctp_hash) {
+		SCTP_INP_RLOCK(inp);
+		if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+			SCTP_INP_RUNLOCK(inp);
+			continue;
+		}
+		if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) &&
+		    (inp->sctp_lport == lport)) {
+			/* got it */
+			if ((nam->sa_family == AF_INET) &&
+			    (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+			    SCTP_IPV6_V6ONLY(inp)) {
+				/* IPv4 on a IPv6 socket with ONLY IPv6 set */
+				SCTP_INP_RUNLOCK(inp);
+				continue;
+			}
+			/* A V6 address and the endpoint is NOT bound V6 */
+			if (nam->sa_family == AF_INET6 &&
+			    (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) {
+				SCTP_INP_RUNLOCK(inp);
+				continue;
+			}
+			/* does a VRF id match? */
+			fnd = 0;
+			if (inp->def_vrf_id == vrf_id)
+				fnd = 1;
+
+			SCTP_INP_RUNLOCK(inp);
+			if (!fnd)
+				continue;
+			return (inp);
+		}
+		SCTP_INP_RUNLOCK(inp);
+	}
+
+	if ((nam->sa_family == AF_INET) &&
+	    (sin->sin_addr.s_addr == INADDR_ANY)) {
+		/* Can't hunt for one that has no address specified */
+		return (NULL);
+	} else if ((nam->sa_family == AF_INET6) &&
+	    (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))) {
+		/* Can't hunt for one that has no address specified */
+		return (NULL);
+	}
+	/*
+	 * ok, not bound to all so see if we can find a EP bound to this
+	 * address.
+	 */
+	LIST_FOREACH(inp, head, sctp_hash) {
+		SCTP_INP_RLOCK(inp);
+		if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+			SCTP_INP_RUNLOCK(inp);
+			continue;
+		}
+		if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL)) {
+			SCTP_INP_RUNLOCK(inp);
+			continue;
+		}
+		/*
+		 * Ok this could be a likely candidate, look at all of its
+		 * addresses
+		 */
+		if (inp->sctp_lport != lport) {
+			SCTP_INP_RUNLOCK(inp);
+			continue;
+		}
+		/* does a VRF id match? */
+		fnd = 0;
+		if (inp->def_vrf_id == vrf_id)
+			fnd = 1;
+
+		if (!fnd) {
+			SCTP_INP_RUNLOCK(inp);
+			continue;
+		}
+		LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+			if (laddr->ifa == NULL) {
+				SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n",
+				    __FUNCTION__);
+				continue;
+			}
+			SCTPDBG(SCTP_DEBUG_PCB1, "Ok laddr->ifa:%p is possible, ",
+			    laddr->ifa);
+			if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) {
+				SCTPDBG(SCTP_DEBUG_PCB1, "Huh IFA being deleted\n");
+				continue;
+			}
+			if (laddr->ifa->address.sa.sa_family == nam->sa_family) {
+				/* possible, see if it matches */
+				struct sockaddr_in *intf_addr;
+
+				intf_addr = &laddr->ifa->address.sin;
+				if (nam->sa_family == AF_INET) {
+					if (sin->sin_addr.s_addr ==
+					    intf_addr->sin_addr.s_addr) {
+						SCTP_INP_RUNLOCK(inp);
+						return (inp);
+					}
+				} else if (nam->sa_family == AF_INET6) {
+					struct sockaddr_in6 *intf_addr6;
+
+					intf_addr6 = &laddr->ifa->address.sin6;
+					if (SCTP6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
+					    &intf_addr6->sin6_addr)) {
+						SCTP_INP_RUNLOCK(inp);
+						return (inp);
+					}
+				}
+			}
+		}
+		SCTP_INP_RUNLOCK(inp);
+	}
+	return (NULL);
+}
+
+struct sctp_inpcb *
+sctp_pcb_findep(struct sockaddr *nam, int find_tcp_pool, int have_lock,
+    uint32_t vrf_id)
+{
+	/*
+	 * First we check the hash table to see if someone has this port
+	 * bound with just the port.
+	 */
+	struct sctp_inpcb *inp;
+	struct sctppcbhead *head;
+	struct sockaddr_in *sin;
+	struct sockaddr_in6 *sin6;
+	int lport;
+
+	if (nam->sa_family == AF_INET) {
+		sin = (struct sockaddr_in *)nam;
+		lport = ((struct sockaddr_in *)nam)->sin_port;
+	} else if (nam->sa_family == AF_INET6) {
+		sin6 = (struct sockaddr_in6 *)nam;
+		lport = ((struct sockaddr_in6 *)nam)->sin6_port;
+	} else {
+		/* unsupported family */
+		return (NULL);
+	}
+	/*
+	 * I could cheat here and just cast to one of the types but we will
+	 * do it right. It also provides the check against an Unsupported
+	 * type too.
+	 */
+	/* Find the head of the ALLADDR chain */
+	if (have_lock == 0) {
+		SCTP_INP_INFO_RLOCK();
+	}
+	head = &sctppcbinfo.sctp_ephash[SCTP_PCBHASH_ALLADDR(lport,
+	    sctppcbinfo.hashmark)];
+	inp = sctp_endpoint_probe(nam, head, lport, vrf_id);
+
+	/*
+	 * If the TCP model exists it could be that the main listening
+	 * endpoint is gone but there exists a connected socket for this guy
+	 * yet. If so we can return the first one that we find. This may NOT
+	 * be the correct one but the sctp_findassociation_ep_addr has
+	 * further code to look at all TCP models.
+	 */
+	if (inp == NULL && find_tcp_pool) {
+		unsigned int i;
+
+		for (i = 0; i < sctppcbinfo.hashtblsize; i++) {
+			/*
+			 * This is real gross, but we do NOT have a remote
+			 * port at this point depending on who is calling.
+			 * We must therefore look for ANY one that matches
+			 * our local port :/
+			 */
+			head = &sctppcbinfo.sctp_tcpephash[i];
+			if (LIST_FIRST(head)) {
+				inp = sctp_endpoint_probe(nam, head, lport, vrf_id);
+				if (inp) {
+					/* Found one */
+					break;
+				}
+			}
+		}
+	}
+	if (inp) {
+		SCTP_INP_INCR_REF(inp);
+	}
+	if (have_lock == 0) {
+		SCTP_INP_INFO_RUNLOCK();
+	}
+	return (inp);
+}
+
+/*
+ * Find an association for an endpoint with the pointer to whom you want to
+ * send to and the endpoint pointer. The address can be IPv4 or IPv6. We may
+ * need to change the *to to some other struct like a mbuf...
+ */
+struct sctp_tcb *
+sctp_findassociation_addr_sa(struct sockaddr *to, struct sockaddr *from,
+    struct sctp_inpcb **inp_p, struct sctp_nets **netp, int find_tcp_pool,
+    uint32_t vrf_id)
+{
+	struct sctp_inpcb *inp = NULL;
+	struct sctp_tcb *retval;
+
+	SCTP_INP_INFO_RLOCK();
+	if (find_tcp_pool) {
+		if (inp_p != NULL) {
+			retval = sctp_tcb_special_locate(inp_p, from, to, netp,
+			    vrf_id);
+		} else {
+			retval = sctp_tcb_special_locate(&inp, from, to, netp,
+			    vrf_id);
+		}
+		if (retval != NULL) {
+			SCTP_INP_INFO_RUNLOCK();
+			return (retval);
+		}
+	}
+	inp = sctp_pcb_findep(to, 0, 1, vrf_id);
+	if (inp_p != NULL) {
+		*inp_p = inp;
+	}
+	SCTP_INP_INFO_RUNLOCK();
+
+	if (inp == NULL) {
+		return (NULL);
+	}
+	/*
+	 * ok, we have an endpoint, now lets find the assoc for it (if any)
+	 * we now place the source address or from in the to of the find
+	 * endpoint call. Since in reality this chain is used from the
+	 * inbound packet side.
+	 */
+	if (inp_p != NULL) {
+		retval = sctp_findassociation_ep_addr(inp_p, from, netp, to,
+		    NULL);
+	} else {
+		retval = sctp_findassociation_ep_addr(&inp, from, netp, to,
+		    NULL);
+	}
+	return retval;
+}
+
+
+/*
+ * This routine will grub through the mbuf that is a INIT or INIT-ACK and
+ * find all addresses that the sender has specified in any address list. Each
+ * address will be used to lookup the TCB and see if one exits.
+ */
+static struct sctp_tcb *
+sctp_findassociation_special_addr(struct mbuf *m, int iphlen, int offset,
+    struct sctphdr *sh, struct sctp_inpcb **inp_p, struct sctp_nets **netp,
+    struct sockaddr *dest)
+{
+	struct sockaddr_in sin4;
+	struct sockaddr_in6 sin6;
+	struct sctp_paramhdr *phdr, parm_buf;
+	struct sctp_tcb *retval;
+	uint32_t ptype, plen;
+
+	memset(&sin4, 0, sizeof(sin4));
+	memset(&sin6, 0, sizeof(sin6));
+	sin4.sin_len = sizeof(sin4);
+	sin4.sin_family = AF_INET;
+	sin4.sin_port = sh->src_port;
+	sin6.sin6_len = sizeof(sin6);
+	sin6.sin6_family = AF_INET6;
+	sin6.sin6_port = sh->src_port;
+
+	retval = NULL;
+	offset += sizeof(struct sctp_init_chunk);
+
+	phdr = sctp_get_next_param(m, offset, &parm_buf, sizeof(parm_buf));
+	while (phdr != NULL) {
+		/* now we must see if we want the parameter */
+		ptype = ntohs(phdr->param_type);
+		plen = ntohs(phdr->param_length);
+		if (plen == 0) {
+			break;
+		}
+		if (ptype == SCTP_IPV4_ADDRESS &&
+		    plen == sizeof(struct sctp_ipv4addr_param)) {
+			/* Get the rest of the address */
+			struct sctp_ipv4addr_param ip4_parm, *p4;
+
+			phdr = sctp_get_next_param(m, offset,
+			    (struct sctp_paramhdr *)&ip4_parm, min(plen, sizeof(ip4_parm)));
+			if (phdr == NULL) {
+				return (NULL);
+			}
+			p4 = (struct sctp_ipv4addr_param *)phdr;
+			memcpy(&sin4.sin_addr, &p4->addr, sizeof(p4->addr));
+			/* look it up */
+			retval = sctp_findassociation_ep_addr(inp_p,
+			    (struct sockaddr *)&sin4, netp, dest, NULL);
+			if (retval != NULL) {
+				return (retval);
+			}
+		} else if (ptype == SCTP_IPV6_ADDRESS &&
+		    plen == sizeof(struct sctp_ipv6addr_param)) {
+			/* Get the rest of the address */
+			struct sctp_ipv6addr_param ip6_parm, *p6;
+
+			phdr = sctp_get_next_param(m, offset,
+			    (struct sctp_paramhdr *)&ip6_parm, min(plen, sizeof(ip6_parm)));
+			if (phdr == NULL) {
+				return (NULL);
+			}
+			p6 = (struct sctp_ipv6addr_param *)phdr;
+			memcpy(&sin6.sin6_addr, &p6->addr, sizeof(p6->addr));
+			/* look it up */
+			retval = sctp_findassociation_ep_addr(inp_p,
+			    (struct sockaddr *)&sin6, netp, dest, NULL);
+			if (retval != NULL) {
+				return (retval);
+			}
+		}
+		offset += SCTP_SIZE32(plen);
+		phdr = sctp_get_next_param(m, offset, &parm_buf,
+		    sizeof(parm_buf));
+	}
+	return (NULL);
+}
+
+
+static struct sctp_tcb *
+sctp_findassoc_by_vtag(struct sockaddr *from, uint32_t vtag,
+    struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint16_t rport,
+    uint16_t lport, int skip_src_check)
+{
+	/*
+	 * Use my vtag to hash. If we find it we then verify the source addr
+	 * is in the assoc. If all goes well we save a bit on rec of a
+	 * packet.
+	 */
+	struct sctpasochead *head;
+	struct sctp_nets *net;
+	struct sctp_tcb *stcb;
+
+	*netp = NULL;
+	*inp_p = NULL;
+	SCTP_INP_INFO_RLOCK();
+	head = &sctppcbinfo.sctp_asochash[SCTP_PCBHASH_ASOC(vtag,
+	    sctppcbinfo.hashasocmark)];
+	if (head == NULL) {
+		/* invalid vtag */
+		SCTP_INP_INFO_RUNLOCK();
+		return (NULL);
+	}
+	LIST_FOREACH(stcb, head, sctp_asocs) {
+		SCTP_INP_RLOCK(stcb->sctp_ep);
+		if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+			SCTP_INP_RUNLOCK(stcb->sctp_ep);
+			continue;
+		}
+		SCTP_TCB_LOCK(stcb);
+		SCTP_INP_RUNLOCK(stcb->sctp_ep);
+		if (stcb->asoc.my_vtag == vtag) {
+			/* candidate */
+			if (stcb->rport != rport) {
+				/*
+				 * we could remove this if vtags are unique
+				 * across the system.
+				 */
+				SCTP_TCB_UNLOCK(stcb);
+				continue;
+			}
+			if (stcb->sctp_ep->sctp_lport != lport) {
+				/*
+				 * we could remove this if vtags are unique
+				 * across the system.
+				 */
+				SCTP_TCB_UNLOCK(stcb);
+				continue;
+			}
+			if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+				SCTP_TCB_UNLOCK(stcb);
+				continue;
+			}
+			if (skip_src_check) {
+				*netp = NULL;	/* unknown */
+				if (inp_p)
+					*inp_p = stcb->sctp_ep;
+				SCTP_INP_INFO_RUNLOCK();
+				return (stcb);
+			}
+			net = sctp_findnet(stcb, from);
+			if (net) {
+				/* yep its him. */
+				*netp = net;
+				SCTP_STAT_INCR(sctps_vtagexpress);
+				*inp_p = stcb->sctp_ep;
+				SCTP_INP_INFO_RUNLOCK();
+				return (stcb);
+			} else {
+				/*
+				 * not him, this should only happen in rare
+				 * cases so I peg it.
+				 */
+				SCTP_STAT_INCR(sctps_vtagbogus);
+			}
+		}
+		SCTP_TCB_UNLOCK(stcb);
+	}
+	SCTP_INP_INFO_RUNLOCK();
+	return (NULL);
+}
+
+/*
+ * Find an association with the pointer to the inbound IP packet. This can be
+ * a IPv4 or IPv6 packet.
+ */
+struct sctp_tcb *
+sctp_findassociation_addr(struct mbuf *m, int iphlen, int offset,
+    struct sctphdr *sh, struct sctp_chunkhdr *ch,
+    struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint32_t vrf_id)
+{
+	int find_tcp_pool;
+	struct ip *iph;
+	struct sctp_tcb *retval;
+	struct sockaddr_storage to_store, from_store;
+	struct sockaddr *to = (struct sockaddr *)&to_store;
+	struct sockaddr *from = (struct sockaddr *)&from_store;
+	struct sctp_inpcb *inp;
+
+	iph = mtod(m, struct ip *);
+	if (iph->ip_v == IPVERSION) {
+		/* its IPv4 */
+		struct sockaddr_in *from4;
+
+		from4 = (struct sockaddr_in *)&from_store;
+		bzero(from4, sizeof(*from4));
+		from4->sin_family = AF_INET;
+		from4->sin_len = sizeof(struct sockaddr_in);
+		from4->sin_addr.s_addr = iph->ip_src.s_addr;
+		from4->sin_port = sh->src_port;
+	} else if (iph->ip_v == (IPV6_VERSION >> 4)) {
+		/* its IPv6 */
+		struct ip6_hdr *ip6;
+		struct sockaddr_in6 *from6;
+
+		ip6 = mtod(m, struct ip6_hdr *);
+		from6 = (struct sockaddr_in6 *)&from_store;
+		bzero(from6, sizeof(*from6));
+		from6->sin6_family = AF_INET6;
+		from6->sin6_len = sizeof(struct sockaddr_in6);
+		from6->sin6_addr = ip6->ip6_src;
+		from6->sin6_port = sh->src_port;
+		/* Get the scopes in properly to the sin6 addr's */
+		/* we probably don't need these operations */
+		(void)sa6_recoverscope(from6);
+		sa6_embedscope(from6, ip6_use_defzone);
+	} else {
+		/* Currently not supported. */
+		return (NULL);
+	}
+	if (sh->v_tag) {
+		/* we only go down this path if vtag is non-zero */
+		retval = sctp_findassoc_by_vtag(from, ntohl(sh->v_tag),
+		    inp_p, netp, sh->src_port, sh->dest_port, 0);
+		if (retval) {
+			return (retval);
+		}
+	}
+	if (iph->ip_v == IPVERSION) {
+		/* its IPv4 */
+		struct sockaddr_in *to4;
+
+		to4 = (struct sockaddr_in *)&to_store;
+		bzero(to4, sizeof(*to4));
+		to4->sin_family = AF_INET;
+		to4->sin_len = sizeof(struct sockaddr_in);
+		to4->sin_addr.s_addr = iph->ip_dst.s_addr;
+		to4->sin_port = sh->dest_port;
+	} else if (iph->ip_v == (IPV6_VERSION >> 4)) {
+		/* its IPv6 */
+		struct ip6_hdr *ip6;
+		struct sockaddr_in6 *to6;
+
+		ip6 = mtod(m, struct ip6_hdr *);
+		to6 = (struct sockaddr_in6 *)&to_store;
+		bzero(to6, sizeof(*to6));
+		to6->sin6_family = AF_INET6;
+		to6->sin6_len = sizeof(struct sockaddr_in6);
+		to6->sin6_addr = ip6->ip6_dst;
+		to6->sin6_port = sh->dest_port;
+		/* Get the scopes in properly to the sin6 addr's */
+		/* we probably don't need these operations */
+		(void)sa6_recoverscope(to6);
+		sa6_embedscope(to6, ip6_use_defzone);
+	}
+	find_tcp_pool = 0;
+	if ((ch->chunk_type != SCTP_INITIATION) &&
+	    (ch->chunk_type != SCTP_INITIATION_ACK) &&
+	    (ch->chunk_type != SCTP_COOKIE_ACK) &&
+	    (ch->chunk_type != SCTP_COOKIE_ECHO)) {
+		/* Other chunk types go to the tcp pool. */
+		find_tcp_pool = 1;
+	}
+	if (inp_p) {
+		retval = sctp_findassociation_addr_sa(to, from, inp_p, netp,
+		    find_tcp_pool, vrf_id);
+		inp = *inp_p;
+	} else {
+		retval = sctp_findassociation_addr_sa(to, from, &inp, netp,
+		    find_tcp_pool, vrf_id);
+	}
+	SCTPDBG(SCTP_DEBUG_PCB1, "retval:%p inp:%p\n", retval, inp);
+	if (retval == NULL && inp) {
+		/* Found a EP but not this address */
+		if ((ch->chunk_type == SCTP_INITIATION) ||
+		    (ch->chunk_type == SCTP_INITIATION_ACK)) {
+			/*-
+			 * special hook, we do NOT return linp or an
+			 * association that is linked to an existing
+			 * association that is under the TCP pool (i.e. no
+			 * listener exists). The endpoint finding routine
+			 * will always find a listner before examining the
+			 * TCP pool.
+			 */
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) {
+				if (inp_p) {
+					*inp_p = NULL;
+				}
+				return (NULL);
+			}
+			retval = sctp_findassociation_special_addr(m, iphlen,
+			    offset, sh, &inp, netp, to);
+			if (inp_p != NULL) {
+				*inp_p = inp;
+			}
+		}
+	}
+	SCTPDBG(SCTP_DEBUG_PCB1, "retval is %p\n", retval);
+	return (retval);
+}
+
+/*
+ * lookup an association by an ASCONF lookup address.
+ * if the lookup address is 0.0.0.0 or ::0, use the vtag to do the lookup
+ */
+struct sctp_tcb *
+sctp_findassociation_ep_asconf(struct mbuf *m, int iphlen, int offset,
+    struct sctphdr *sh, struct sctp_inpcb **inp_p, struct sctp_nets **netp)
+{
+	struct sctp_tcb *stcb;
+	struct sockaddr_in *sin;
+	struct sockaddr_in6 *sin6;
+	struct sockaddr_storage local_store, remote_store;
+	struct ip *iph;
+	struct sctp_paramhdr parm_buf, *phdr;
+	int ptype;
+	int zero_address = 0;
+
+
+	memset(&local_store, 0, sizeof(local_store));
+	memset(&remote_store, 0, sizeof(remote_store));
+
+	/* First get the destination address setup too. */
+	iph = mtod(m, struct ip *);
+	if (iph->ip_v == IPVERSION) {
+		/* its IPv4 */
+		sin = (struct sockaddr_in *)&local_store;
+		sin->sin_family = AF_INET;
+		sin->sin_len = sizeof(*sin);
+		sin->sin_port = sh->dest_port;
+		sin->sin_addr.s_addr = iph->ip_dst.s_addr;
+	} else if (iph->ip_v == (IPV6_VERSION >> 4)) {
+		/* its IPv6 */
+		struct ip6_hdr *ip6;
+
+		ip6 = mtod(m, struct ip6_hdr *);
+		sin6 = (struct sockaddr_in6 *)&local_store;
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_len = sizeof(*sin6);
+		sin6->sin6_port = sh->dest_port;
+		sin6->sin6_addr = ip6->ip6_dst;
+	} else {
+		return NULL;
+	}
+
+	phdr = sctp_get_next_param(m, offset + sizeof(struct sctp_asconf_chunk),
+	    &parm_buf, sizeof(struct sctp_paramhdr));
+	if (phdr == NULL) {
+		SCTPDBG(SCTP_DEBUG_INPUT3, "%s: failed to get asconf lookup addr\n",
+		    __FUNCTION__);
+		return NULL;
+	}
+	ptype = (int)((uint32_t) ntohs(phdr->param_type));
+	/* get the correlation address */
+	if (ptype == SCTP_IPV6_ADDRESS) {
+		/* ipv6 address param */
+		struct sctp_ipv6addr_param *p6, p6_buf;
+
+		if (ntohs(phdr->param_length) != sizeof(struct sctp_ipv6addr_param)) {
+			return NULL;
+		}
+		p6 = (struct sctp_ipv6addr_param *)sctp_get_next_param(m,
+		    offset + sizeof(struct sctp_asconf_chunk),
+		    &p6_buf.ph, sizeof(*p6));
+		if (p6 == NULL) {
+			SCTPDBG(SCTP_DEBUG_INPUT3, "%s: failed to get asconf v6 lookup addr\n",
+			    __FUNCTION__);
+			return (NULL);
+		}
+		sin6 = (struct sockaddr_in6 *)&remote_store;
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_len = sizeof(*sin6);
+		sin6->sin6_port = sh->src_port;
+		memcpy(&sin6->sin6_addr, &p6->addr, sizeof(struct in6_addr));
+		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
+			zero_address = 1;
+	} else if (ptype == SCTP_IPV4_ADDRESS) {
+		/* ipv4 address param */
+		struct sctp_ipv4addr_param *p4, p4_buf;
+
+		if (ntohs(phdr->param_length) != sizeof(struct sctp_ipv4addr_param)) {
+			return NULL;
+		}
+		p4 = (struct sctp_ipv4addr_param *)sctp_get_next_param(m,
+		    offset + sizeof(struct sctp_asconf_chunk),
+		    &p4_buf.ph, sizeof(*p4));
+		if (p4 == NULL) {
+			SCTPDBG(SCTP_DEBUG_INPUT3, "%s: failed to get asconf v4 lookup addr\n",
+			    __FUNCTION__);
+			return (NULL);
+		}
+		sin = (struct sockaddr_in *)&remote_store;
+		sin->sin_family = AF_INET;
+		sin->sin_len = sizeof(*sin);
+		sin->sin_port = sh->src_port;
+		memcpy(&sin->sin_addr, &p4->addr, sizeof(struct in_addr));
+		if (sin->sin_addr.s_addr == INADDR_ANY)
+			zero_address = 1;
+	} else {
+		/* invalid address param type */
+		return NULL;
+	}
+
+	if (zero_address) {
+		stcb = sctp_findassoc_by_vtag(NULL, ntohl(sh->v_tag), inp_p,
+		    netp, sh->src_port, sh->dest_port, 1);
+		/*
+		 * printf("findassociation_ep_asconf: zero lookup address
+		 * finds stcb 0x%x\n", (uint32_t)stcb);
+		 */
+	} else {
+		stcb = sctp_findassociation_ep_addr(inp_p,
+		    (struct sockaddr *)&remote_store, netp,
+		    (struct sockaddr *)&local_store, NULL);
+	}
+	return (stcb);
+}
+
+
+/*
+ * allocate a sctp_inpcb and setup a temporary binding to a port/all
+ * addresses. This way if we don't get a bind we by default pick a ephemeral
+ * port with all addresses bound.
+ */
+int
+sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id)
+{
+	/*
+	 * we get called when a new endpoint starts up. We need to allocate
+	 * the sctp_inpcb structure from the zone and init it. Mark it as
+	 * unbound and find a port that we can use as an ephemeral with
+	 * INADDR_ANY. If the user binds later no problem we can then add in
+	 * the specific addresses. And setup the default parameters for the
+	 * EP.
+	 */
+	int i, error;
+	struct sctp_inpcb *inp;
+	struct sctp_pcb *m;
+	struct timeval time;
+	sctp_sharedkey_t *null_key;
+
+	error = 0;
+
+	SCTP_INP_INFO_WLOCK();
+	inp = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_ep, struct sctp_inpcb);
+	if (inp == NULL) {
+		SCTP_PRINTF("Out of SCTP-INPCB structures - no resources\n");
+		SCTP_INP_INFO_WUNLOCK();
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS);
+		return (ENOBUFS);
+	}
+	/* zap it */
+	bzero(inp, sizeof(*inp));
+
+	/* bump generations */
+	/* setup socket pointers */
+	inp->sctp_socket = so;
+	inp->ip_inp.inp.inp_socket = so;
+
+	inp->partial_delivery_point = SCTP_SB_LIMIT_RCV(so) >> SCTP_PARTIAL_DELIVERY_SHIFT;
+	inp->sctp_frag_point = SCTP_DEFAULT_MAXSEGMENT;
+
+#ifdef IPSEC
+	{
+		struct inpcbpolicy *pcb_sp = NULL;
+
+		error = ipsec_init_policy(so, &pcb_sp);
+		/* Arrange to share the policy */
+		inp->ip_inp.inp.inp_sp = pcb_sp;
+		((struct in6pcb *)(&inp->ip_inp.inp))->in6p_sp = pcb_sp;
+	}
+	if (error != 0) {
+		SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_ep, inp);
+		SCTP_INP_INFO_WUNLOCK();
+		return error;
+	}
+#endif				/* IPSEC */
+	SCTP_INCR_EP_COUNT();
+	inp->ip_inp.inp.inp_ip_ttl = ip_defttl;
+	SCTP_INP_INFO_WUNLOCK();
+
+	so->so_pcb = (caddr_t)inp;
+
+	if ((SCTP_SO_TYPE(so) == SOCK_DGRAM) ||
+	    (SCTP_SO_TYPE(so) == SOCK_SEQPACKET)) {
+		/* UDP style socket */
+		inp->sctp_flags = (SCTP_PCB_FLAGS_UDPTYPE |
+		    SCTP_PCB_FLAGS_UNBOUND);
+		/* Be sure it is NON-BLOCKING IO for UDP */
+		/* SCTP_SET_SO_NBIO(so); */
+	} else if (SCTP_SO_TYPE(so) == SOCK_STREAM) {
+		/* TCP style socket */
+		inp->sctp_flags = (SCTP_PCB_FLAGS_TCPTYPE |
+		    SCTP_PCB_FLAGS_UNBOUND);
+		/* Be sure we have blocking IO by default */
+		SCTP_CLEAR_SO_NBIO(so);
+	} else {
+		/*
+		 * unsupported socket type (RAW, etc)- in case we missed it
+		 * in protosw
+		 */
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EOPNOTSUPP);
+		SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_ep, inp);
+		return (EOPNOTSUPP);
+	}
+	if (sctp_default_frag_interleave == SCTP_FRAG_LEVEL_1) {
+		sctp_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE);
+		sctp_feature_off(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS);
+	} else if (sctp_default_frag_interleave == SCTP_FRAG_LEVEL_2) {
+		sctp_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE);
+		sctp_feature_on(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS);
+	} else if (sctp_default_frag_interleave == SCTP_FRAG_LEVEL_0) {
+		sctp_feature_off(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE);
+		sctp_feature_off(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS);
+	}
+	inp->sctp_tcbhash = SCTP_HASH_INIT(sctp_pcbtblsize,
+	    &inp->sctp_hashmark);
+	if (inp->sctp_tcbhash == NULL) {
+		SCTP_PRINTF("Out of SCTP-INPCB->hashinit - no resources\n");
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS);
+		SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_ep, inp);
+		return (ENOBUFS);
+	}
+	inp->def_vrf_id = vrf_id;
+
+	SCTP_INP_INFO_WLOCK();
+	SCTP_INP_LOCK_INIT(inp);
+	INP_LOCK_INIT(&inp->ip_inp.inp, "inp", "sctpinp");
+	SCTP_INP_READ_INIT(inp);
+	SCTP_ASOC_CREATE_LOCK_INIT(inp);
+	/* lock the new ep */
+	SCTP_INP_WLOCK(inp);
+
+	/* add it to the info area */
+	LIST_INSERT_HEAD(&sctppcbinfo.listhead, inp, sctp_list);
+	SCTP_INP_INFO_WUNLOCK();
+
+	TAILQ_INIT(&inp->read_queue);
+	LIST_INIT(&inp->sctp_addr_list);
+
+	LIST_INIT(&inp->sctp_asoc_list);
+
+#ifdef SCTP_TRACK_FREED_ASOCS
+	/* TEMP CODE */
+	LIST_INIT(&inp->sctp_asoc_free_list);
+#endif
+	/* Init the timer structure for signature change */
+	SCTP_OS_TIMER_INIT(&inp->sctp_ep.signature_change.timer);
+	inp->sctp_ep.signature_change.type = SCTP_TIMER_TYPE_NEWCOOKIE;
+
+	/* now init the actual endpoint default data */
+	m = &inp->sctp_ep;
+
+	/* setup the base timeout information */
+	m->sctp_timeoutticks[SCTP_TIMER_SEND] = SEC_TO_TICKS(SCTP_SEND_SEC);	/* needed ? */
+	m->sctp_timeoutticks[SCTP_TIMER_INIT] = SEC_TO_TICKS(SCTP_INIT_SEC);	/* needed ? */
+	m->sctp_timeoutticks[SCTP_TIMER_RECV] = MSEC_TO_TICKS(sctp_delayed_sack_time_default);
+	m->sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = MSEC_TO_TICKS(sctp_heartbeat_interval_default);
+	m->sctp_timeoutticks[SCTP_TIMER_PMTU] = SEC_TO_TICKS(sctp_pmtu_raise_time_default);
+	m->sctp_timeoutticks[SCTP_TIMER_MAXSHUTDOWN] = SEC_TO_TICKS(sctp_shutdown_guard_time_default);
+	m->sctp_timeoutticks[SCTP_TIMER_SIGNATURE] = SEC_TO_TICKS(sctp_secret_lifetime_default);
+	/* all max/min max are in ms */
+	m->sctp_maxrto = sctp_rto_max_default;
+	m->sctp_minrto = sctp_rto_min_default;
+	m->initial_rto = sctp_rto_initial_default;
+	m->initial_init_rto_max = sctp_init_rto_max_default;
+	m->sctp_sack_freq = sctp_sack_freq_default;
+
+	m->max_open_streams_intome = MAX_SCTP_STREAMS;
+
+	m->max_init_times = sctp_init_rtx_max_default;
+	m->max_send_times = sctp_assoc_rtx_max_default;
+	m->def_net_failure = sctp_path_rtx_max_default;
+	m->sctp_sws_sender = SCTP_SWS_SENDER_DEF;
+	m->sctp_sws_receiver = SCTP_SWS_RECEIVER_DEF;
+	m->max_burst = sctp_max_burst_default;
+	if ((sctp_default_cc_module >= SCTP_CC_RFC2581) &&
+	    (sctp_default_cc_module <= SCTP_CC_HTCP)) {
+		m->sctp_default_cc_module = sctp_default_cc_module;
+	} else {
+		/* sysctl done with invalid value, set to 2581 */
+		m->sctp_default_cc_module = SCTP_CC_RFC2581;
+	}
+	/* number of streams to pre-open on a association */
+	m->pre_open_stream_count = sctp_nr_outgoing_streams_default;
+
+	/* Add adaptation cookie */
+	m->adaptation_layer_indicator = 0x504C5253;
+
+	/* seed random number generator */
+	m->random_counter = 1;
+	m->store_at = SCTP_SIGNATURE_SIZE;
+	SCTP_READ_RANDOM(m->random_numbers, sizeof(m->random_numbers));
+	sctp_fill_random_store(m);
+
+	/* Minimum cookie size */
+	m->size_of_a_cookie = (sizeof(struct sctp_init_msg) * 2) +
+	    sizeof(struct sctp_state_cookie);
+	m->size_of_a_cookie += SCTP_SIGNATURE_SIZE;
+
+	/* Setup the initial secret */
+	(void)SCTP_GETTIME_TIMEVAL(&time);
+	m->time_of_secret_change = time.tv_sec;
+
+	for (i = 0; i < SCTP_NUMBER_OF_SECRETS; i++) {
+		m->secret_key[0][i] = sctp_select_initial_TSN(m);
+	}
+	sctp_timer_start(SCTP_TIMER_TYPE_NEWCOOKIE, inp, NULL, NULL);
+
+	/* How long is a cookie good for ? */
+	m->def_cookie_life = MSEC_TO_TICKS(sctp_valid_cookie_life_default);
+	/*
+	 * Initialize authentication parameters
+	 */
+	m->local_hmacs = sctp_default_supported_hmaclist();
+	m->local_auth_chunks = sctp_alloc_chunklist();
+	sctp_auth_set_default_chunks(m->local_auth_chunks);
+	LIST_INIT(&m->shared_keys);
+	/* add default NULL key as key id 0 */
+	null_key = sctp_alloc_sharedkey();
+	sctp_insert_sharedkey(&m->shared_keys, null_key);
+	SCTP_INP_WUNLOCK(inp);
+#ifdef SCTP_LOG_CLOSING
+	sctp_log_closing(inp, NULL, 12);
+#endif
+	return (error);
+}
+
+
+void
+sctp_move_pcb_and_assoc(struct sctp_inpcb *old_inp, struct sctp_inpcb *new_inp,
+    struct sctp_tcb *stcb)
+{
+	struct sctp_nets *net;
+	uint16_t lport, rport;
+	struct sctppcbhead *head;
+	struct sctp_laddr *laddr, *oladdr;
+
+	atomic_add_int(&stcb->asoc.refcnt, 1);
+	SCTP_TCB_UNLOCK(stcb);
+	SCTP_INP_INFO_WLOCK();
+	SCTP_INP_WLOCK(old_inp);
+	SCTP_INP_WLOCK(new_inp);
+	SCTP_TCB_LOCK(stcb);
+	atomic_subtract_int(&stcb->asoc.refcnt, 1);
+
+	new_inp->sctp_ep.time_of_secret_change =
+	    old_inp->sctp_ep.time_of_secret_change;
+	memcpy(new_inp->sctp_ep.secret_key, old_inp->sctp_ep.secret_key,
+	    sizeof(old_inp->sctp_ep.secret_key));
+	new_inp->sctp_ep.current_secret_number =
+	    old_inp->sctp_ep.current_secret_number;
+	new_inp->sctp_ep.last_secret_number =
+	    old_inp->sctp_ep.last_secret_number;
+	new_inp->sctp_ep.size_of_a_cookie = old_inp->sctp_ep.size_of_a_cookie;
+
+	/* make it so new data pours into the new socket */
+	stcb->sctp_socket = new_inp->sctp_socket;
+	stcb->sctp_ep = new_inp;
+
+	/* Copy the port across */
+	lport = new_inp->sctp_lport = old_inp->sctp_lport;
+	rport = stcb->rport;
+	/* Pull the tcb from the old association */
+	LIST_REMOVE(stcb, sctp_tcbhash);
+	LIST_REMOVE(stcb, sctp_tcblist);
+
+	/* Now insert the new_inp into the TCP connected hash */
+	head = &sctppcbinfo.sctp_tcpephash[SCTP_PCBHASH_ALLADDR((lport + rport),
+	    sctppcbinfo.hashtcpmark)];
+
+	LIST_INSERT_HEAD(head, new_inp, sctp_hash);
+	/* Its safe to access */
+	new_inp->sctp_flags &= ~SCTP_PCB_FLAGS_UNBOUND;
+
+	/* Now move the tcb into the endpoint list */
+	LIST_INSERT_HEAD(&new_inp->sctp_asoc_list, stcb, sctp_tcblist);
+	/*
+	 * Question, do we even need to worry about the ep-hash since we
+	 * only have one connection? Probably not :> so lets get rid of it
+	 * and not suck up any kernel memory in that.
+	 */
+
+	/* Ok. Let's restart timer. */
+	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+		sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, new_inp,
+		    stcb, net);
+	}
+
+	SCTP_INP_INFO_WUNLOCK();
+	if (new_inp->sctp_tcbhash != NULL) {
+		SCTP_HASH_FREE(new_inp->sctp_tcbhash, new_inp->sctp_hashmark);
+		new_inp->sctp_tcbhash = NULL;
+	}
+	if ((new_inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) {
+		/* Subset bound, so copy in the laddr list from the old_inp */
+		LIST_FOREACH(oladdr, &old_inp->sctp_addr_list, sctp_nxt_addr) {
+			laddr = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_laddr, struct sctp_laddr);
+			if (laddr == NULL) {
+				/*
+				 * Gak, what can we do? This assoc is really
+				 * HOSED. We probably should send an abort
+				 * here.
+				 */
+				SCTPDBG(SCTP_DEBUG_PCB1, "Association hosed in TCP model, out of laddr memory\n");
+				continue;
+			}
+			SCTP_INCR_LADDR_COUNT();
+			bzero(laddr, sizeof(*laddr));
+			(void)SCTP_GETTIME_TIMEVAL(&laddr->start_time);
+			laddr->ifa = oladdr->ifa;
+			atomic_add_int(&laddr->ifa->refcount, 1);
+			LIST_INSERT_HEAD(&new_inp->sctp_addr_list, laddr,
+			    sctp_nxt_addr);
+			new_inp->laddr_count++;
+		}
+	}
+	/*
+	 * Now any running timers need to be adjusted since we really don't
+	 * care if they are running or not just blast in the new_inp into
+	 * all of them.
+	 */
+
+	stcb->asoc.hb_timer.ep = (void *)new_inp;
+	stcb->asoc.dack_timer.ep = (void *)new_inp;
+	stcb->asoc.asconf_timer.ep = (void *)new_inp;
+	stcb->asoc.strreset_timer.ep = (void *)new_inp;
+	stcb->asoc.shut_guard_timer.ep = (void *)new_inp;
+	stcb->asoc.autoclose_timer.ep = (void *)new_inp;
+	stcb->asoc.delayed_event_timer.ep = (void *)new_inp;
+	stcb->asoc.delete_prim_timer.ep = (void *)new_inp;
+	/* now what about the nets? */
+	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+		net->pmtu_timer.ep = (void *)new_inp;
+		net->rxt_timer.ep = (void *)new_inp;
+		net->fr_timer.ep = (void *)new_inp;
+	}
+	SCTP_INP_WUNLOCK(new_inp);
+	SCTP_INP_WUNLOCK(old_inp);
+}
+
+static int
+sctp_isport_inuse(struct sctp_inpcb *inp, uint16_t lport, uint32_t vrf_id)
+{
+	struct sctppcbhead *head;
+	struct sctp_inpcb *t_inp;
+	int fnd;
+
+	head = &sctppcbinfo.sctp_ephash[SCTP_PCBHASH_ALLADDR(lport,
+	    sctppcbinfo.hashmark)];
+	LIST_FOREACH(t_inp, head, sctp_hash) {
+		if (t_inp->sctp_lport != lport) {
+			continue;
+		}
+		/* is it in the VRF in question */
+		fnd = 0;
+		if (t_inp->def_vrf_id == vrf_id)
+			fnd = 1;
+		if (!fnd)
+			continue;
+
+		/* This one is in use. */
+		/* check the v6/v4 binding issue */
+		if ((t_inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+		    SCTP_IPV6_V6ONLY(t_inp)) {
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+				/* collision in V6 space */
+				return (1);
+			} else {
+				/* inp is BOUND_V4 no conflict */
+				continue;
+			}
+		} else if (t_inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+			/* t_inp is bound v4 and v6, conflict always */
+			return (1);
+		} else {
+			/* t_inp is bound only V4 */
+			if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+			    SCTP_IPV6_V6ONLY(inp)) {
+				/* no conflict */
+				continue;
+			}
+			/* else fall through to conflict */
+		}
+		return (1);
+	}
+	return (0);
+}
+
+
+
+/* sctp_ifap is used to bypass normal local address validation checks */
+int
+sctp_inpcb_bind(struct socket *so, struct sockaddr *addr,
+    struct sctp_ifa *sctp_ifap, struct thread *p)
+{
+	/* bind a ep to a socket address */
+	struct sctppcbhead *head;
+	struct sctp_inpcb *inp, *inp_tmp;
+	struct inpcb *ip_inp;
+	int bindall;
+	int prison = 0;
+	uint16_t lport;
+	int error;
+	uint32_t vrf_id;
+
+	lport = 0;
+	error = 0;
+	bindall = 1;
+	inp = (struct sctp_inpcb *)so->so_pcb;
+	ip_inp = (struct inpcb *)so->so_pcb;
+#ifdef SCTP_DEBUG
+	if (addr) {
+		SCTPDBG(SCTP_DEBUG_PCB1, "Bind called port:%d\n",
+		    ntohs(((struct sockaddr_in *)addr)->sin_port));
+		SCTPDBG(SCTP_DEBUG_PCB1, "Addr :");
+		SCTPDBG_ADDR(SCTP_DEBUG_PCB1, addr);
+	}
+#endif
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) == 0) {
+		/* already did a bind, subsequent binds NOT allowed ! */
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+		return (EINVAL);
+	}
+#ifdef INVARIANTS
+	if (p == NULL)
+		panic("null proc/thread");
+#endif
+	if (p && jailed(p->td_ucred)) {
+		prison = 1;
+	}
+	if (addr != NULL) {
+		if (addr->sa_family == AF_INET) {
+			struct sockaddr_in *sin;
+
+			/* IPV6_V6ONLY socket? */
+			if (SCTP_IPV6_V6ONLY(ip_inp)) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+				return (EINVAL);
+			}
+			if (addr->sa_len != sizeof(*sin)) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+				return (EINVAL);
+			}
+			sin = (struct sockaddr_in *)addr;
+			lport = sin->sin_port;
+			if (prison) {
+				/*
+				 * For INADDR_ANY and  LOOPBACK the
+				 * prison_ip() call will transmute the ip
+				 * address to the proper value (i.e. the IP
+				 * address owned by the jail).
+				 */
+				if (prison_ip(p->td_ucred, 0, &sin->sin_addr.s_addr)) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+					return (EINVAL);
+				}
+			}
+			if (sin->sin_addr.s_addr != INADDR_ANY) {
+				bindall = 0;
+			}
+		} else if (addr->sa_family == AF_INET6) {
+			/* Only for pure IPv6 Address. (No IPv4 Mapped!) */
+			struct sockaddr_in6 *sin6;
+
+			sin6 = (struct sockaddr_in6 *)addr;
+
+			if (addr->sa_len != sizeof(*sin6)) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+				return (EINVAL);
+			}
+			lport = sin6->sin6_port;
+			/*
+			 * Jail checks for IPv6 should go HERE! i.e. add the
+			 * prison_ip() equivilant in this postion to
+			 * transmute the addresses to the proper one jailed.
+			 */
+			if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+				bindall = 0;
+				/* KAME hack: embed scopeid */
+				if (sa6_embedscope(sin6, ip6_use_defzone) != 0) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+					return (EINVAL);
+				}
+			}
+			/* this must be cleared for ifa_ifwithaddr() */
+			sin6->sin6_scope_id = 0;
+		} else {
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EAFNOSUPPORT);
+			return (EAFNOSUPPORT);
+		}
+	}
+	SCTP_INP_INFO_WLOCK();
+	SCTP_INP_WLOCK(inp);
+	/* Setup a vrf_id to be the default for the non-bind-all case. */
+	vrf_id = inp->def_vrf_id;
+
+	/* increase our count due to the unlock we do */
+	SCTP_INP_INCR_REF(inp);
+	if (lport) {
+		/*
+		 * Did the caller specify a port? if so we must see if a ep
+		 * already has this one bound.
+		 */
+		/* got to be root to get at low ports */
+		if (ntohs(lport) < IPPORT_RESERVED) {
+			if (p && (error =
+			    priv_check(p, PRIV_NETINET_RESERVEDPORT)
+			    )) {
+				SCTP_INP_DECR_REF(inp);
+				SCTP_INP_WUNLOCK(inp);
+				SCTP_INP_INFO_WUNLOCK();
+				return (error);
+			}
+		}
+		if (p == NULL) {
+			SCTP_INP_DECR_REF(inp);
+			SCTP_INP_WUNLOCK(inp);
+			SCTP_INP_INFO_WUNLOCK();
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, error);
+			return (error);
+		}
+		SCTP_INP_WUNLOCK(inp);
+		if (bindall) {
+			vrf_id = inp->def_vrf_id;
+			inp_tmp = sctp_pcb_findep(addr, 0, 1, vrf_id);
+			if (inp_tmp != NULL) {
+				/*
+				 * lock guy returned and lower count note
+				 * that we are not bound so inp_tmp should
+				 * NEVER be inp. And it is this inp
+				 * (inp_tmp) that gets the reference bump,
+				 * so we must lower it.
+				 */
+				SCTP_INP_DECR_REF(inp_tmp);
+				SCTP_INP_DECR_REF(inp);
+				/* unlock info */
+				SCTP_INP_INFO_WUNLOCK();
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRINUSE);
+				return (EADDRINUSE);
+			}
+		} else {
+			inp_tmp = sctp_pcb_findep(addr, 0, 1, vrf_id);
+			if (inp_tmp != NULL) {
+				/*
+				 * lock guy returned and lower count note
+				 * that we are not bound so inp_tmp should
+				 * NEVER be inp. And it is this inp
+				 * (inp_tmp) that gets the reference bump,
+				 * so we must lower it.
+				 */
+				SCTP_INP_DECR_REF(inp_tmp);
+				SCTP_INP_DECR_REF(inp);
+				/* unlock info */
+				SCTP_INP_INFO_WUNLOCK();
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRINUSE);
+				return (EADDRINUSE);
+			}
+		}
+		SCTP_INP_WLOCK(inp);
+		if (bindall) {
+			/* verify that no lport is not used by a singleton */
+			if (sctp_isport_inuse(inp, lport, vrf_id)) {
+				/* Sorry someone already has this one bound */
+				SCTP_INP_DECR_REF(inp);
+				SCTP_INP_WUNLOCK(inp);
+				SCTP_INP_INFO_WUNLOCK();
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRINUSE);
+				return (EADDRINUSE);
+			}
+		}
+	} else {
+		uint16_t first, last, candidate;
+		uint16_t count;
+		int done;
+
+		if (ip_inp->inp_flags & INP_HIGHPORT) {
+			first = ipport_hifirstauto;
+			last = ipport_hilastauto;
+		} else if (ip_inp->inp_flags & INP_LOWPORT) {
+			if (p && (error =
+			    priv_check(p, PRIV_NETINET_RESERVEDPORT)
+			    )) {
+				SCTP_INP_DECR_REF(inp);
+				SCTP_INP_WUNLOCK(inp);
+				SCTP_INP_INFO_WUNLOCK();
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, error);
+				return (error);
+			}
+			first = ipport_lowfirstauto;
+			last = ipport_lowlastauto;
+		} else {
+			first = ipport_firstauto;
+			last = ipport_lastauto;
+		}
+		if (first > last) {
+			uint16_t temp;
+
+			temp = first;
+			first = last;
+			last = temp;
+		}
+		count = last - first + 1;	/* number of candidates */
+		candidate = first + sctp_select_initial_TSN(&inp->sctp_ep) % (count);
+
+		done = 0;
+		while (!done) {
+			if (sctp_isport_inuse(inp, htons(candidate), inp->def_vrf_id) == 0) {
+				done = 1;
+			}
+			if (!done) {
+				if (--count == 0) {
+					SCTP_INP_DECR_REF(inp);
+					SCTP_INP_WUNLOCK(inp);
+					SCTP_INP_INFO_WUNLOCK();
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRINUSE);
+					return (EADDRINUSE);
+				}
+				if (candidate == last)
+					candidate = first;
+				else
+					candidate = candidate + 1;
+			}
+		}
+		lport = htons(candidate);
+	}
+	SCTP_INP_DECR_REF(inp);
+	if (inp->sctp_flags & (SCTP_PCB_FLAGS_SOCKET_GONE |
+	    SCTP_PCB_FLAGS_SOCKET_ALLGONE)) {
+		/*
+		 * this really should not happen. The guy did a non-blocking
+		 * bind and then did a close at the same time.
+		 */
+		SCTP_INP_WUNLOCK(inp);
+		SCTP_INP_INFO_WUNLOCK();
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+		return (EINVAL);
+	}
+	/* ok we look clear to give out this port, so lets setup the binding */
+	if (bindall) {
+		/* binding to all addresses, so just set in the proper flags */
+		inp->sctp_flags |= SCTP_PCB_FLAGS_BOUNDALL;
+		/* set the automatic addr changes from kernel flag */
+		if (sctp_auto_asconf == 0) {
+			sctp_feature_off(inp, SCTP_PCB_FLAGS_DO_ASCONF);
+			sctp_feature_off(inp, SCTP_PCB_FLAGS_AUTO_ASCONF);
+		} else {
+			sctp_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF);
+			sctp_feature_on(inp, SCTP_PCB_FLAGS_AUTO_ASCONF);
+		}
+		/*
+		 * set the automatic mobility_base from kernel flag (by
+		 * micchie)
+		 */
+		if (sctp_mobility_base == 0) {
+			sctp_mobility_feature_off(inp, SCTP_MOBILITY_BASE);
+			sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED);
+		} else {
+			sctp_mobility_feature_on(inp, SCTP_MOBILITY_BASE);
+			sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED);
+		}
+		/*
+		 * set the automatic mobility_fasthandoff from kernel flag
+		 * (by micchie)
+		 */
+		if (sctp_mobility_fasthandoff == 0) {
+			sctp_mobility_feature_off(inp, SCTP_MOBILITY_FASTHANDOFF);
+			sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED);
+		} else {
+			sctp_mobility_feature_on(inp, SCTP_MOBILITY_FASTHANDOFF);
+			sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED);
+		}
+	} else {
+		/*
+		 * bind specific, make sure flags is off and add a new
+		 * address structure to the sctp_addr_list inside the ep
+		 * structure.
+		 * 
+		 * We will need to allocate one and insert it at the head. The
+		 * socketopt call can just insert new addresses in there as
+		 * well. It will also have to do the embed scope kame hack
+		 * too (before adding).
+		 */
+		struct sctp_ifa *ifa;
+		struct sockaddr_storage store_sa;
+
+		memset(&store_sa, 0, sizeof(store_sa));
+		if (addr->sa_family == AF_INET) {
+			struct sockaddr_in *sin;
+
+			sin = (struct sockaddr_in *)&store_sa;
+			memcpy(sin, addr, sizeof(struct sockaddr_in));
+			sin->sin_port = 0;
+		} else if (addr->sa_family == AF_INET6) {
+			struct sockaddr_in6 *sin6;
+
+			sin6 = (struct sockaddr_in6 *)&store_sa;
+			memcpy(sin6, addr, sizeof(struct sockaddr_in6));
+			sin6->sin6_port = 0;
+		}
+		/*
+		 * first find the interface with the bound address need to
+		 * zero out the port to find the address! yuck! can't do
+		 * this earlier since need port for sctp_pcb_findep()
+		 */
+		if (sctp_ifap != NULL)
+			ifa = sctp_ifap;
+		else {
+			/*
+			 * Note for BSD we hit here always other O/S's will
+			 * pass things in via the sctp_ifap argument
+			 * (Panda).
+			 */
+			ifa = sctp_find_ifa_by_addr((struct sockaddr *)&store_sa,
+			    vrf_id, SCTP_ADDR_NOT_LOCKED);
+		}
+		if (ifa == NULL) {
+			/* Can't find an interface with that address */
+			SCTP_INP_WUNLOCK(inp);
+			SCTP_INP_INFO_WUNLOCK();
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRNOTAVAIL);
+			return (EADDRNOTAVAIL);
+		}
+		if (addr->sa_family == AF_INET6) {
+			/* GAK, more FIXME IFA lock? */
+			if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
+				/* Can't bind a non-existent addr. */
+				SCTP_INP_WUNLOCK(inp);
+				SCTP_INP_INFO_WUNLOCK();
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+				return (EINVAL);
+			}
+		}
+		/* we're not bound all */
+		inp->sctp_flags &= ~SCTP_PCB_FLAGS_BOUNDALL;
+		/* allow bindx() to send ASCONF's for binding changes */
+		sctp_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF);
+		/* clear automatic addr changes from kernel flag */
+		sctp_feature_off(inp, SCTP_PCB_FLAGS_AUTO_ASCONF);
+
+		/* add this address to the endpoint list */
+		error = sctp_insert_laddr(&inp->sctp_addr_list, ifa, 0);
+		if (error != 0) {
+			SCTP_INP_WUNLOCK(inp);
+			SCTP_INP_INFO_WUNLOCK();
+			return (error);
+		}
+		inp->laddr_count++;
+	}
+	/* find the bucket */
+	head = &sctppcbinfo.sctp_ephash[SCTP_PCBHASH_ALLADDR(lport,
+	    sctppcbinfo.hashmark)];
+	/* put it in the bucket */
+	LIST_INSERT_HEAD(head, inp, sctp_hash);
+	SCTPDBG(SCTP_DEBUG_PCB1, "Main hash to bind at head:%p, bound port:%d\n",
+	    head, ntohs(lport));
+	/* set in the port */
+	inp->sctp_lport = lport;
+
+	/* turn off just the unbound flag */
+	inp->sctp_flags &= ~SCTP_PCB_FLAGS_UNBOUND;
+	SCTP_INP_WUNLOCK(inp);
+	SCTP_INP_INFO_WUNLOCK();
+	return (0);
+}
+
+
+static void
+sctp_iterator_inp_being_freed(struct sctp_inpcb *inp, struct sctp_inpcb *inp_next)
+{
+	struct sctp_iterator *it;
+
+	/*
+	 * We enter with the only the ITERATOR_LOCK in place and a write
+	 * lock on the inp_info stuff.
+	 */
+
+	/*
+	 * Go through all iterators, we must do this since it is possible
+	 * that some iterator does NOT have the lock, but is waiting for it.
+	 * And the one that had the lock has either moved in the last
+	 * iteration or we just cleared it above. We need to find all of
+	 * those guys. The list of iterators should never be very big
+	 * though.
+	 */
+	TAILQ_FOREACH(it, &sctppcbinfo.iteratorhead, sctp_nxt_itr) {
+		if (it == inp->inp_starting_point_for_iterator)
+			/* skip this guy, he's special */
+			continue;
+		if (it->inp == inp) {
+			/*
+			 * This is tricky and we DON'T lock the iterator.
+			 * Reason is he's running but waiting for me since
+			 * inp->inp_starting_point_for_iterator has the lock
+			 * on me (the guy above we skipped). This tells us
+			 * its is not running but waiting for
+			 * inp->inp_starting_point_for_iterator to be
+			 * released by the guy that does have our INP in a
+			 * lock.
+			 */
+			if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
+				it->inp = NULL;
+				it->stcb = NULL;
+			} else {
+				/* set him up to do the next guy not me */
+				it->inp = inp_next;
+				it->stcb = NULL;
+			}
+		}
+	}
+	it = inp->inp_starting_point_for_iterator;
+	if (it) {
+		if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
+			it->inp = NULL;
+		} else {
+			it->inp = inp_next;
+		}
+		it->stcb = NULL;
+	}
+}
+
+/* release sctp_inpcb unbind the port */
+void
+sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
+{
+	/*
+	 * Here we free a endpoint. We must find it (if it is in the Hash
+	 * table) and remove it from there. Then we must also find it in the
+	 * overall list and remove it from there. After all removals are
+	 * complete then any timer has to be stopped. Then start the actual
+	 * freeing. a) Any local lists. b) Any associations. c) The hash of
+	 * all associations. d) finally the ep itself.
+	 */
+	struct sctp_pcb *m;
+	struct sctp_inpcb *inp_save;
+	struct sctp_tcb *asoc, *nasoc;
+	struct sctp_laddr *laddr, *nladdr;
+	struct inpcb *ip_pcb;
+	struct socket *so;
+
+	struct sctp_queued_to_read *sq;
+
+
+	int cnt;
+	sctp_sharedkey_t *shared_key;
+
+
+#ifdef SCTP_LOG_CLOSING
+	sctp_log_closing(inp, NULL, 0);
+#endif
+	SCTP_ITERATOR_LOCK();
+	so = inp->sctp_socket;
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+		/* been here before.. eeks.. get out of here */
+		SCTP_PRINTF("This conflict in free SHOULD not be happening! from %d, imm %d\n", from, immediate);
+		SCTP_ITERATOR_UNLOCK();
+#ifdef SCTP_LOG_CLOSING
+		sctp_log_closing(inp, NULL, 1);
+#endif
+		return;
+	}
+	SCTP_ASOC_CREATE_LOCK(inp);
+	SCTP_INP_INFO_WLOCK();
+
+	SCTP_INP_WLOCK(inp);
+	/* First time through we have the socket lock, after that no more. */
+	if (from == SCTP_CALLED_AFTER_CMPSET_OFCLOSE) {
+		/*
+		 * Once we are in we can remove the flag from = 1 is only
+		 * passed from the actual closing routines that are called
+		 * via the sockets layer.
+		 */
+		inp->sctp_flags &= ~SCTP_PCB_FLAGS_CLOSE_IP;
+		/* socket is gone, so no more wakeups allowed */
+		inp->sctp_flags |= SCTP_PCB_FLAGS_DONT_WAKE;
+		inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEINPUT;
+		inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEOUTPUT;
+	}
+	sctp_timer_stop(SCTP_TIMER_TYPE_NEWCOOKIE, inp, NULL, NULL,
+	    SCTP_FROM_SCTP_PCB + SCTP_LOC_1);
+
+	if (inp->control) {
+		sctp_m_freem(inp->control);
+		inp->control = NULL;
+	}
+	if (inp->pkt) {
+		sctp_m_freem(inp->pkt);
+		inp->pkt = NULL;
+	}
+	m = &inp->sctp_ep;
+	ip_pcb = &inp->ip_inp.inp;	/* we could just cast the main pointer
+					 * here but I will be nice :> (i.e.
+					 * ip_pcb = ep;) */
+	if (immediate == SCTP_FREE_SHOULD_USE_GRACEFUL_CLOSE) {
+		int cnt_in_sd;
+
+		cnt_in_sd = 0;
+		for ((asoc = LIST_FIRST(&inp->sctp_asoc_list)); asoc != NULL;
+		    asoc = nasoc) {
+			SCTP_TCB_LOCK(asoc);
+			nasoc = LIST_NEXT(asoc, sctp_tcblist);
+			if (asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+				/* Skip guys being freed */
+				/* asoc->sctp_socket = NULL; FIXME MT */
+				cnt_in_sd++;
+				SCTP_TCB_UNLOCK(asoc);
+				continue;
+			}
+			if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_COOKIE_WAIT) ||
+			    (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+				/*
+				 * If we have data in queue, we don't want
+				 * to just free since the app may have done,
+				 * send()/close or connect/send/close. And
+				 * it wants the data to get across first.
+				 */
+				/* Just abandon things in the front states */
+				if (sctp_free_assoc(inp, asoc, SCTP_PCBFREE_NOFORCE,
+				    SCTP_FROM_SCTP_PCB + SCTP_LOC_2) == 0) {
+					cnt_in_sd++;
+				}
+				continue;
+			}
+			/* Disconnect the socket please */
+			asoc->sctp_socket = NULL;
+			asoc->asoc.state |= SCTP_STATE_CLOSED_SOCKET;
+			if ((asoc->asoc.size_on_reasm_queue > 0) ||
+			    (asoc->asoc.control_pdapi) ||
+			    (asoc->asoc.size_on_all_streams > 0) ||
+			    (so && (so->so_rcv.sb_cc > 0))
+			    ) {
+				/* Left with Data unread */
+				struct mbuf *op_err;
+
+				op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+				    0, M_DONTWAIT, 1, MT_DATA);
+				if (op_err) {
+					/* Fill in the user initiated abort */
+					struct sctp_paramhdr *ph;
+					uint32_t *ippp;
+
+					SCTP_BUF_LEN(op_err) =
+					    sizeof(struct sctp_paramhdr) + sizeof(uint32_t);
+					ph = mtod(op_err,
+					    struct sctp_paramhdr *);
+					ph->param_type = htons(
+					    SCTP_CAUSE_USER_INITIATED_ABT);
+					ph->param_length = htons(SCTP_BUF_LEN(op_err));
+					ippp = (uint32_t *) (ph + 1);
+					*ippp = htonl(SCTP_FROM_SCTP_PCB + SCTP_LOC_3);
+				}
+				asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_3;
+#if defined(SCTP_PANIC_ON_ABORT)
+				panic("inpcb_free does an abort");
+#endif
+				sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED);
+				SCTP_STAT_INCR_COUNTER32(sctps_aborted);
+				if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) ||
+				    (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+					SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+				}
+				if (sctp_free_assoc(inp, asoc,
+				    SCTP_PCBFREE_NOFORCE, SCTP_FROM_SCTP_PCB + SCTP_LOC_4) == 0) {
+					cnt_in_sd++;
+				}
+				continue;
+			} else if (TAILQ_EMPTY(&asoc->asoc.send_queue) &&
+				    TAILQ_EMPTY(&asoc->asoc.sent_queue) &&
+				    (asoc->asoc.stream_queue_cnt == 0)
+			    ) {
+				if (asoc->asoc.locked_on_sending) {
+					goto abort_anyway;
+				}
+				if ((SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
+				    (SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+					/*
+					 * there is nothing queued to send,
+					 * so I send shutdown
+					 */
+					sctp_send_shutdown(asoc, asoc->asoc.primary_destination);
+					if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) ||
+					    (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+						SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+					}
+					SCTP_SET_STATE(&asoc->asoc, SCTP_STATE_SHUTDOWN_SENT);
+					SCTP_CLEAR_SUBSTATE(&asoc->asoc, SCTP_STATE_SHUTDOWN_PENDING);
+					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, asoc->sctp_ep, asoc,
+					    asoc->asoc.primary_destination);
+					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, asoc->sctp_ep, asoc,
+					    asoc->asoc.primary_destination);
+					sctp_chunk_output(inp, asoc, SCTP_OUTPUT_FROM_SHUT_TMR, SCTP_SO_LOCKED);
+				}
+			} else {
+				/* mark into shutdown pending */
+				struct sctp_stream_queue_pending *sp;
+
+				asoc->asoc.state |= SCTP_STATE_SHUTDOWN_PENDING;
+				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, asoc->sctp_ep, asoc,
+				    asoc->asoc.primary_destination);
+				if (asoc->asoc.locked_on_sending) {
+					sp = TAILQ_LAST(&((asoc->asoc.locked_on_sending)->outqueue),
+					    sctp_streamhead);
+					if (sp == NULL) {
+						SCTP_PRINTF("Error, sp is NULL, locked on sending is %p strm:%d\n",
+						    asoc->asoc.locked_on_sending,
+						    asoc->asoc.locked_on_sending->stream_no);
+					} else {
+						if ((sp->length == 0) && (sp->msg_is_complete == 0))
+							asoc->asoc.state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+					}
+				}
+				if (TAILQ_EMPTY(&asoc->asoc.send_queue) &&
+				    TAILQ_EMPTY(&asoc->asoc.sent_queue) &&
+				    (asoc->asoc.state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
+					struct mbuf *op_err;
+
+			abort_anyway:
+					op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+					    0, M_DONTWAIT, 1, MT_DATA);
+					if (op_err) {
+						/*
+						 * Fill in the user
+						 * initiated abort
+						 */
+						struct sctp_paramhdr *ph;
+						uint32_t *ippp;
+
+						SCTP_BUF_LEN(op_err) =
+						    (sizeof(struct sctp_paramhdr) +
+						    sizeof(uint32_t));
+						ph = mtod(op_err,
+						    struct sctp_paramhdr *);
+						ph->param_type = htons(
+						    SCTP_CAUSE_USER_INITIATED_ABT);
+						ph->param_length = htons(SCTP_BUF_LEN(op_err));
+						ippp = (uint32_t *) (ph + 1);
+						*ippp = htonl(SCTP_FROM_SCTP_PCB + SCTP_LOC_5);
+					}
+					asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_5;
+#if defined(SCTP_PANIC_ON_ABORT)
+					panic("inpcb_free does an abort");
+#endif
+
+					sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED);
+					SCTP_STAT_INCR_COUNTER32(sctps_aborted);
+					if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) ||
+					    (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+						SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+					}
+					if (sctp_free_assoc(inp, asoc,
+					    SCTP_PCBFREE_NOFORCE,
+					    SCTP_FROM_SCTP_PCB + SCTP_LOC_6) == 0) {
+						cnt_in_sd++;
+					}
+					continue;
+				} else {
+					sctp_chunk_output(inp, asoc, SCTP_OUTPUT_FROM_CLOSING, SCTP_SO_LOCKED);
+				}
+			}
+			cnt_in_sd++;
+			SCTP_TCB_UNLOCK(asoc);
+		}
+		/* now is there some left in our SHUTDOWN state? */
+		if (cnt_in_sd) {
+			SCTP_INP_WUNLOCK(inp);
+			SCTP_ASOC_CREATE_UNLOCK(inp);
+			SCTP_INP_INFO_WUNLOCK();
+			SCTP_ITERATOR_UNLOCK();
+#ifdef SCTP_LOG_CLOSING
+			sctp_log_closing(inp, NULL, 2);
+#endif
+			return;
+		}
+	}
+	inp->sctp_socket = NULL;
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) !=
+	    SCTP_PCB_FLAGS_UNBOUND) {
+		/*
+		 * ok, this guy has been bound. It's port is somewhere in
+		 * the sctppcbinfo hash table. Remove it!
+		 */
+		LIST_REMOVE(inp, sctp_hash);
+		inp->sctp_flags |= SCTP_PCB_FLAGS_UNBOUND;
+	}
+	/*
+	 * If there is a timer running to kill us, forget it, since it may
+	 * have a contest on the INP lock.. which would cause us to die ...
+	 */
+	cnt = 0;
+	for ((asoc = LIST_FIRST(&inp->sctp_asoc_list)); asoc != NULL;
+	    asoc = nasoc) {
+		nasoc = LIST_NEXT(asoc, sctp_tcblist);
+		if (asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+			cnt++;
+			continue;
+		}
+		/* Free associations that are NOT killing us */
+		SCTP_TCB_LOCK(asoc);
+		if ((SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_COOKIE_WAIT) &&
+		    ((asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0)) {
+			struct mbuf *op_err;
+			uint32_t *ippp;
+
+			op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+			    0, M_DONTWAIT, 1, MT_DATA);
+			if (op_err) {
+				/* Fill in the user initiated abort */
+				struct sctp_paramhdr *ph;
+
+				SCTP_BUF_LEN(op_err) = (sizeof(struct sctp_paramhdr) +
+				    sizeof(uint32_t));
+				ph = mtod(op_err, struct sctp_paramhdr *);
+				ph->param_type = htons(
+				    SCTP_CAUSE_USER_INITIATED_ABT);
+				ph->param_length = htons(SCTP_BUF_LEN(op_err));
+				ippp = (uint32_t *) (ph + 1);
+				*ippp = htonl(SCTP_FROM_SCTP_PCB + SCTP_LOC_7);
+
+			}
+			asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_7;
+#if defined(SCTP_PANIC_ON_ABORT)
+			panic("inpcb_free does an abort");
+#endif
+			sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED);
+			SCTP_STAT_INCR_COUNTER32(sctps_aborted);
+		} else if (asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+			cnt++;
+			SCTP_TCB_UNLOCK(asoc);
+			continue;
+		}
+		if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) ||
+		    (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+			SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+		}
+		if (sctp_free_assoc(inp, asoc, SCTP_PCBFREE_FORCE, SCTP_FROM_SCTP_PCB + SCTP_LOC_8) == 0) {
+			cnt++;
+		}
+	}
+	if (cnt) {
+		/* Ok we have someone out there that will kill us */
+		(void)SCTP_OS_TIMER_STOP(&inp->sctp_ep.signature_change.timer);
+		SCTP_INP_WUNLOCK(inp);
+		SCTP_ASOC_CREATE_UNLOCK(inp);
+		SCTP_INP_INFO_WUNLOCK();
+		SCTP_ITERATOR_UNLOCK();
+#ifdef SCTP_LOG_CLOSING
+		sctp_log_closing(inp, NULL, 3);
+#endif
+		return;
+	}
+	if ((inp->refcount) || (inp->sctp_flags & SCTP_PCB_FLAGS_CLOSE_IP)) {
+		(void)SCTP_OS_TIMER_STOP(&inp->sctp_ep.signature_change.timer);
+		sctp_timer_start(SCTP_TIMER_TYPE_INPKILL, inp, NULL, NULL);
+		SCTP_INP_WUNLOCK(inp);
+		SCTP_ASOC_CREATE_UNLOCK(inp);
+		SCTP_INP_INFO_WUNLOCK();
+		SCTP_ITERATOR_UNLOCK();
+#ifdef SCTP_LOG_CLOSING
+		sctp_log_closing(inp, NULL, 4);
+#endif
+		return;
+	}
+	(void)SCTP_OS_TIMER_STOP(&inp->sctp_ep.signature_change.timer);
+	inp->sctp_ep.signature_change.type = 0;
+	inp->sctp_flags |= SCTP_PCB_FLAGS_SOCKET_ALLGONE;
+
+#ifdef SCTP_LOG_CLOSING
+	sctp_log_closing(inp, NULL, 5);
+#endif
+
+	(void)SCTP_OS_TIMER_STOP(&inp->sctp_ep.signature_change.timer);
+	inp->sctp_ep.signature_change.type = SCTP_TIMER_TYPE_NONE;
+	/* Clear the read queue */
+	/* sa_ignore FREED_MEMORY */
+	while ((sq = TAILQ_FIRST(&inp->read_queue)) != NULL) {
+		/* Its only abandoned if it had data left */
+		if (sq->length)
+			SCTP_STAT_INCR(sctps_left_abandon);
+
+		TAILQ_REMOVE(&inp->read_queue, sq, next);
+		sctp_free_remote_addr(sq->whoFrom);
+		if (so)
+			so->so_rcv.sb_cc -= sq->length;
+		if (sq->data) {
+			sctp_m_freem(sq->data);
+			sq->data = NULL;
+		}
+		/*
+		 * no need to free the net count, since at this point all
+		 * assoc's are gone.
+		 */
+		SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_readq, sq);
+		SCTP_DECR_READQ_COUNT();
+	}
+	/* Now the sctp_pcb things */
+	/*
+	 * free each asoc if it is not already closed/free. we can't use the
+	 * macro here since le_next will get freed as part of the
+	 * sctp_free_assoc() call.
+	 */
+	cnt = 0;
+	if (so) {
+#ifdef IPSEC
+		ipsec4_delete_pcbpolicy(ip_pcb);
+#endif				/* IPSEC */
+
+		/* Unlocks not needed since the socket is gone now */
+	}
+	if (ip_pcb->inp_options) {
+		(void)sctp_m_free(ip_pcb->inp_options);
+		ip_pcb->inp_options = 0;
+	}
+	if (ip_pcb->inp_moptions) {
+		inp_freemoptions(ip_pcb->inp_moptions);
+		ip_pcb->inp_moptions = 0;
+	}
+#ifdef INET6
+	if (ip_pcb->inp_vflag & INP_IPV6) {
+		struct in6pcb *in6p;
+
+		in6p = (struct in6pcb *)inp;
+		ip6_freepcbopts(in6p->in6p_outputopts);
+	}
+#endif				/* INET6 */
+	ip_pcb->inp_vflag = 0;
+	/* free up authentication fields */
+	if (inp->sctp_ep.local_auth_chunks != NULL)
+		sctp_free_chunklist(inp->sctp_ep.local_auth_chunks);
+	if (inp->sctp_ep.local_hmacs != NULL)
+		sctp_free_hmaclist(inp->sctp_ep.local_hmacs);
+
+	shared_key = LIST_FIRST(&inp->sctp_ep.shared_keys);
+	while (shared_key) {
+		LIST_REMOVE(shared_key, next);
+		sctp_free_sharedkey(shared_key);
+		/* sa_ignore FREED_MEMORY */
+		shared_key = LIST_FIRST(&inp->sctp_ep.shared_keys);
+	}
+
+	inp_save = LIST_NEXT(inp, sctp_list);
+	LIST_REMOVE(inp, sctp_list);
+
+	/* fix any iterators only after out of the list */
+	sctp_iterator_inp_being_freed(inp, inp_save);
+	/*
+	 * if we have an address list the following will free the list of
+	 * ifaddr's that are set into this ep. Again macro limitations here,
+	 * since the LIST_FOREACH could be a bad idea.
+	 */
+	for ((laddr = LIST_FIRST(&inp->sctp_addr_list)); laddr != NULL;
+	    laddr = nladdr) {
+		nladdr = LIST_NEXT(laddr, sctp_nxt_addr);
+		sctp_remove_laddr(laddr);
+	}
+
+#ifdef SCTP_TRACK_FREED_ASOCS
+	/* TEMP CODE */
+	for ((asoc = LIST_FIRST(&inp->sctp_asoc_free_list)); asoc != NULL;
+	    asoc = nasoc) {
+		nasoc = LIST_NEXT(asoc, sctp_tcblist);
+		LIST_REMOVE(asoc, sctp_tcblist);
+		SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_asoc, asoc);
+		SCTP_DECR_ASOC_COUNT();
+	}
+	/* *** END TEMP CODE *** */
+#endif
+	/* Now lets see about freeing the EP hash table. */
+	if (inp->sctp_tcbhash != NULL) {
+		SCTP_HASH_FREE(inp->sctp_tcbhash, inp->sctp_hashmark);
+		inp->sctp_tcbhash = NULL;
+	}
+	/* Now we must put the ep memory back into the zone pool */
+	INP_LOCK_DESTROY(&inp->ip_inp.inp);
+	SCTP_INP_LOCK_DESTROY(inp);
+	SCTP_INP_READ_DESTROY(inp);
+	SCTP_ASOC_CREATE_LOCK_DESTROY(inp);
+	SCTP_INP_INFO_WUNLOCK();
+	SCTP_ITERATOR_UNLOCK();
+	SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_ep, inp);
+	SCTP_DECR_EP_COUNT();
+}
+
+
+struct sctp_nets *
+sctp_findnet(struct sctp_tcb *stcb, struct sockaddr *addr)
+{
+	struct sctp_nets *net;
+
+	/* locate the address */
+	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+		if (sctp_cmpaddr(addr, (struct sockaddr *)&net->ro._l_addr))
+			return (net);
+	}
+	return (NULL);
+}
+
+
+int
+sctp_is_address_on_local_host(struct sockaddr *addr, uint32_t vrf_id)
+{
+	struct sctp_ifa *sctp_ifa;
+
+	sctp_ifa = sctp_find_ifa_by_addr(addr, vrf_id, SCTP_ADDR_NOT_LOCKED);
+	if (sctp_ifa) {
+		return (1);
+	} else {
+		return (0);
+	}
+}
+
+/*
+ * add's a remote endpoint address, done with the INIT/INIT-ACK as well as
+ * when a ASCONF arrives that adds it. It will also initialize all the cwnd
+ * stats of stuff.
+ */
+int
+sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr,
+    int set_scope, int from)
+{
+	/*
+	 * The following is redundant to the same lines in the
+	 * sctp_aloc_assoc() but is needed since other's call the add
+	 * address function
+	 */
+	struct sctp_nets *net, *netfirst;
+	int addr_inscope;
+
+	SCTPDBG(SCTP_DEBUG_PCB1, "Adding an address (from:%d) to the peer: ",
+	    from);
+	SCTPDBG_ADDR(SCTP_DEBUG_PCB1, newaddr);
+
+	netfirst = sctp_findnet(stcb, newaddr);
+	if (netfirst) {
+		/*
+		 * Lie and return ok, we don't want to make the association
+		 * go away for this behavior. It will happen in the TCP
+		 * model in a connected socket. It does not reach the hash
+		 * table until after the association is built so it can't be
+		 * found. Mark as reachable, since the initial creation will
+		 * have been cleared and the NOT_IN_ASSOC flag will have
+		 * been added... and we don't want to end up removing it
+		 * back out.
+		 */
+		if (netfirst->dest_state & SCTP_ADDR_UNCONFIRMED) {
+			netfirst->dest_state = (SCTP_ADDR_REACHABLE |
+			    SCTP_ADDR_UNCONFIRMED);
+		} else {
+			netfirst->dest_state = SCTP_ADDR_REACHABLE;
+		}
+
+		return (0);
+	}
+	addr_inscope = 1;
+	if (newaddr->sa_family == AF_INET) {
+		struct sockaddr_in *sin;
+
+		sin = (struct sockaddr_in *)newaddr;
+		if (sin->sin_addr.s_addr == 0) {
+			/* Invalid address */
+			return (-1);
+		}
+		/* zero out the bzero area */
+		memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
+
+		/* assure len is set */
+		sin->sin_len = sizeof(struct sockaddr_in);
+		if (set_scope) {
+#ifdef SCTP_DONT_DO_PRIVADDR_SCOPE
+			stcb->ipv4_local_scope = 1;
+#else
+			if (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
+				stcb->asoc.ipv4_local_scope = 1;
+			}
+#endif				/* SCTP_DONT_DO_PRIVADDR_SCOPE */
+		} else {
+			/* Validate the address is in scope */
+			if ((IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) &&
+			    (stcb->asoc.ipv4_local_scope == 0)) {
+				addr_inscope = 0;
+			}
+		}
+	} else if (newaddr->sa_family == AF_INET6) {
+		struct sockaddr_in6 *sin6;
+
+		sin6 = (struct sockaddr_in6 *)newaddr;
+		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+			/* Invalid address */
+			return (-1);
+		}
+		/* assure len is set */
+		sin6->sin6_len = sizeof(struct sockaddr_in6);
+		if (set_scope) {
+			if (sctp_is_address_on_local_host(newaddr, stcb->asoc.vrf_id)) {
+				stcb->asoc.loopback_scope = 1;
+				stcb->asoc.local_scope = 0;
+				stcb->asoc.ipv4_local_scope = 1;
+				stcb->asoc.site_scope = 1;
+			} else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+				/*
+				 * If the new destination is a LINK_LOCAL we
+				 * must have common site scope. Don't set
+				 * the local scope since we may not share
+				 * all links, only loopback can do this.
+				 * Links on the local network would also be
+				 * on our private network for v4 too.
+				 */
+				stcb->asoc.ipv4_local_scope = 1;
+				stcb->asoc.site_scope = 1;
+			} else if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) {
+				/*
+				 * If the new destination is SITE_LOCAL then
+				 * we must have site scope in common.
+				 */
+				stcb->asoc.site_scope = 1;
+			}
+		} else {
+			/* Validate the address is in scope */
+			if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr) &&
+			    (stcb->asoc.loopback_scope == 0)) {
+				addr_inscope = 0;
+			} else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
+			    (stcb->asoc.local_scope == 0)) {
+				addr_inscope = 0;
+			} else if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
+			    (stcb->asoc.site_scope == 0)) {
+				addr_inscope = 0;
+			}
+		}
+	} else {
+		/* not supported family type */
+		return (-1);
+	}
+	net = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_net, struct sctp_nets);
+	if (net == NULL) {
+		return (-1);
+	}
+	SCTP_INCR_RADDR_COUNT();
+	bzero(net, sizeof(*net));
+	(void)SCTP_GETTIME_TIMEVAL(&net->start_time);
+	memcpy(&net->ro._l_addr, newaddr, newaddr->sa_len);
+	if (newaddr->sa_family == AF_INET) {
+		((struct sockaddr_in *)&net->ro._l_addr)->sin_port = stcb->rport;
+	} else if (newaddr->sa_family == AF_INET6) {
+		((struct sockaddr_in6 *)&net->ro._l_addr)->sin6_port = stcb->rport;
+	}
+	net->addr_is_local = sctp_is_address_on_local_host(newaddr, stcb->asoc.vrf_id);
+	if (net->addr_is_local && ((set_scope || (from == SCTP_ADDR_IS_CONFIRMED)))) {
+		stcb->asoc.loopback_scope = 1;
+		stcb->asoc.ipv4_local_scope = 1;
+		stcb->asoc.local_scope = 0;
+		stcb->asoc.site_scope = 1;
+		addr_inscope = 1;
+	}
+	net->failure_threshold = stcb->asoc.def_net_failure;
+	if (addr_inscope == 0) {
+		net->dest_state = (SCTP_ADDR_REACHABLE |
+		    SCTP_ADDR_OUT_OF_SCOPE);
+	} else {
+		if (from == SCTP_ADDR_IS_CONFIRMED)
+			/* SCTP_ADDR_IS_CONFIRMED is passed by connect_x */
+			net->dest_state = SCTP_ADDR_REACHABLE;
+		else
+			net->dest_state = SCTP_ADDR_REACHABLE |
+			    SCTP_ADDR_UNCONFIRMED;
+	}
+	/*
+	 * We set this to 0, the timer code knows that this means its an
+	 * initial value
+	 */
+	net->RTO = 0;
+	net->RTO_measured = 0;
+	stcb->asoc.numnets++;
+	*(&net->ref_count) = 1;
+	net->tos_flowlabel = 0;
+#ifdef INET
+	if (newaddr->sa_family == AF_INET)
+		net->tos_flowlabel = stcb->asoc.default_tos;
+#endif
+#ifdef INET6
+	if (newaddr->sa_family == AF_INET6)
+		net->tos_flowlabel = stcb->asoc.default_flowlabel;
+#endif
+	/* Init the timer structure */
+	SCTP_OS_TIMER_INIT(&net->rxt_timer.timer);
+	SCTP_OS_TIMER_INIT(&net->fr_timer.timer);
+	SCTP_OS_TIMER_INIT(&net->pmtu_timer.timer);
+
+	/* Now generate a route for this guy */
+	/* KAME hack: embed scopeid */
+	if (newaddr->sa_family == AF_INET6) {
+		struct sockaddr_in6 *sin6;
+
+		sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
+		(void)sa6_embedscope(sin6, ip6_use_defzone);
+		sin6->sin6_scope_id = 0;
+	}
+	SCTP_RTALLOC((sctp_route_t *) & net->ro, stcb->asoc.vrf_id);
+
+	if (newaddr->sa_family == AF_INET6) {
+		struct sockaddr_in6 *sin6;
+
+		sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
+		(void)sa6_recoverscope(sin6);
+	}
+	if (SCTP_ROUTE_HAS_VALID_IFN(&net->ro)) {
+		/* Get source address */
+		net->ro._s_addr = sctp_source_address_selection(stcb->sctp_ep,
+		    stcb,
+		    (sctp_route_t *) & net->ro,
+		    net,
+		    0,
+		    stcb->asoc.vrf_id);
+		/* Now get the interface MTU */
+		if (net->ro._s_addr && net->ro._s_addr->ifn_p) {
+			net->mtu = SCTP_GATHER_MTU_FROM_INTFC(net->ro._s_addr->ifn_p);
+		} else {
+			net->mtu = 0;
+		}
+#ifdef SCTP_PRINT_FOR_B_AND_M
+		SCTP_PRINTF("We have found an interface mtu of %d\n", net->mtu);
+#endif
+		if (net->mtu == 0) {
+			/* Huh ?? */
+			net->mtu = SCTP_DEFAULT_MTU;
+		} else {
+			uint32_t rmtu;
+
+			rmtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, net->ro.ro_rt);
+#ifdef SCTP_PRINT_FOR_B_AND_M
+			SCTP_PRINTF("The route mtu is %d\n", rmtu);
+#endif
+			if (rmtu == 0) {
+				/*
+				 * Start things off to match mtu of
+				 * interface please.
+				 */
+				SCTP_SET_MTU_OF_ROUTE(&net->ro._l_addr.sa,
+				    net->ro.ro_rt, net->mtu);
+			} else {
+				/*
+				 * we take the route mtu over the interface,
+				 * since the route may be leading out the
+				 * loopback, or a different interface.
+				 */
+				net->mtu = rmtu;
+			}
+		}
+		if (from == SCTP_ALLOC_ASOC) {
+#ifdef SCTP_PRINT_FOR_B_AND_M
+			SCTP_PRINTF("New assoc sets mtu to :%d\n", net->mtu);
+#endif
+			stcb->asoc.smallest_mtu = net->mtu;
+		}
+	} else {
+		net->mtu = stcb->asoc.smallest_mtu;
+	}
+	if (stcb->asoc.smallest_mtu > net->mtu) {
+#ifdef SCTP_PRINT_FOR_B_AND_M
+		SCTP_PRINTF("new address mtu:%d smaller than smallest:%d\n",
+		    net->mtu, stcb->asoc.smallest_mtu);
+#endif
+		stcb->asoc.smallest_mtu = net->mtu;
+	}
+	/* JRS - Use the congestion control given in the CC module */
+	stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net);
+
+	/*
+	 * CMT: CUC algo - set find_pseudo_cumack to TRUE (1) at beginning
+	 * of assoc (2005/06/27, iyengar at cis.udel.edu)
+	 */
+	net->find_pseudo_cumack = 1;
+	net->find_rtx_pseudo_cumack = 1;
+	net->src_addr_selected = 0;
+	netfirst = TAILQ_FIRST(&stcb->asoc.nets);
+	if (net->ro.ro_rt == NULL) {
+		/* Since we have no route put it at the back */
+		TAILQ_INSERT_TAIL(&stcb->asoc.nets, net, sctp_next);
+	} else if (netfirst == NULL) {
+		/* We are the first one in the pool. */
+		TAILQ_INSERT_HEAD(&stcb->asoc.nets, net, sctp_next);
+	} else if (netfirst->ro.ro_rt == NULL) {
+		/*
+		 * First one has NO route. Place this one ahead of the first
+		 * one.
+		 */
+		TAILQ_INSERT_HEAD(&stcb->asoc.nets, net, sctp_next);
+	} else if (net->ro.ro_rt->rt_ifp != netfirst->ro.ro_rt->rt_ifp) {
+		/*
+		 * This one has a different interface than the one at the
+		 * top of the list. Place it ahead.
+		 */
+		TAILQ_INSERT_HEAD(&stcb->asoc.nets, net, sctp_next);
+	} else {
+		/*
+		 * Ok we have the same interface as the first one. Move
+		 * forward until we find either a) one with a NULL route...
+		 * insert ahead of that b) one with a different ifp.. insert
+		 * after that. c) end of the list.. insert at the tail.
+		 */
+		struct sctp_nets *netlook;
+
+		do {
+			netlook = TAILQ_NEXT(netfirst, sctp_next);
+			if (netlook == NULL) {
+				/* End of the list */
+				TAILQ_INSERT_TAIL(&stcb->asoc.nets, net, sctp_next);
+				break;
+			} else if (netlook->ro.ro_rt == NULL) {
+				/* next one has NO route */
+				TAILQ_INSERT_BEFORE(netfirst, net, sctp_next);
+				break;
+			} else if (netlook->ro.ro_rt->rt_ifp != net->ro.ro_rt->rt_ifp) {
+				TAILQ_INSERT_AFTER(&stcb->asoc.nets, netlook,
+				    net, sctp_next);
+				break;
+			}
+			/* Shift forward */
+			netfirst = netlook;
+		} while (netlook != NULL);
+	}
+
+	/* got to have a primary set */
+	if (stcb->asoc.primary_destination == 0) {
+		stcb->asoc.primary_destination = net;
+	} else if ((stcb->asoc.primary_destination->ro.ro_rt == NULL) &&
+		    (net->ro.ro_rt) &&
+	    ((net->dest_state & SCTP_ADDR_UNCONFIRMED) == 0)) {
+		/* No route to current primary adopt new primary */
+		stcb->asoc.primary_destination = net;
+	}
+	sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, stcb->sctp_ep, stcb,
+	    net);
+	/* Validate primary is first */
+	net = TAILQ_FIRST(&stcb->asoc.nets);
+	if ((net != stcb->asoc.primary_destination) &&
+	    (stcb->asoc.primary_destination)) {
+		/*
+		 * first one on the list is NOT the primary sctp_cmpaddr()
+		 * is much more efficent if the primary is the first on the
+		 * list, make it so.
+		 */
+		TAILQ_REMOVE(&stcb->asoc.nets,
+		    stcb->asoc.primary_destination, sctp_next);
+		TAILQ_INSERT_HEAD(&stcb->asoc.nets,
+		    stcb->asoc.primary_destination, sctp_next);
+	}
+	return (0);
+}
+
+
+/*
+ * allocate an association and add it to the endpoint. The caller must be
+ * careful to add all additional addresses once they are know right away or
+ * else the assoc will be may experience a blackout scenario.
+ */
+struct sctp_tcb *
+sctp_aloc_assoc(struct sctp_inpcb *inp, struct sockaddr *firstaddr,
+    int for_a_init, int *error, uint32_t override_tag, uint32_t vrf_id,
+    struct thread *p
+)
+{
+	/* note the p argument is only valid in unbound sockets */
+
+	struct sctp_tcb *stcb;
+	struct sctp_association *asoc;
+	struct sctpasochead *head;
+	uint16_t rport;
+	int err;
+
+	/*
+	 * Assumption made here: Caller has done a
+	 * sctp_findassociation_ep_addr(ep, addr's); to make sure the
+	 * address does not exist already.
+	 */
+	if (sctppcbinfo.ipi_count_asoc >= SCTP_MAX_NUM_OF_ASOC) {
+		/* Hit max assoc, sorry no more */
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS);
+		*error = ENOBUFS;
+		return (NULL);
+	}
+	if (firstaddr == NULL) {
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+		*error = EINVAL;
+		return (NULL);
+	}
+	SCTP_INP_RLOCK(inp);
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) {
+		/*
+		 * If its in the TCP pool, its NOT allowed to create an
+		 * association. The parent listener needs to call
+		 * sctp_aloc_assoc.. or the one-2-many socket. If a peeled
+		 * off, or connected one does this.. its an error.
+		 */
+		SCTP_INP_RUNLOCK(inp);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+		*error = EINVAL;
+		return (NULL);
+	}
+	SCTPDBG(SCTP_DEBUG_PCB3, "Allocate an association for peer:");
+#ifdef SCTP_DEBUG
+	if (firstaddr) {
+		SCTPDBG_ADDR(SCTP_DEBUG_PCB3, firstaddr);
+		SCTPDBG(SCTP_DEBUG_PCB3, "Port:%d\n",
+		    ntohs(((struct sockaddr_in *)firstaddr)->sin_port));
+	} else {
+		SCTPDBG(SCTP_DEBUG_PCB3, "None\n");
+	}
+#endif				/* SCTP_DEBUG */
+	if (firstaddr->sa_family == AF_INET) {
+		struct sockaddr_in *sin;
+
+		sin = (struct sockaddr_in *)firstaddr;
+		if ((sin->sin_port == 0) || (sin->sin_addr.s_addr == 0)) {
+			/* Invalid address */
+			SCTP_INP_RUNLOCK(inp);
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+			*error = EINVAL;
+			return (NULL);
+		}
+		rport = sin->sin_port;
+	} else if (firstaddr->sa_family == AF_INET6) {
+		struct sockaddr_in6 *sin6;
+
+		sin6 = (struct sockaddr_in6 *)firstaddr;
+		if ((sin6->sin6_port == 0) ||
+		    (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))) {
+			/* Invalid address */
+			SCTP_INP_RUNLOCK(inp);
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+			*error = EINVAL;
+			return (NULL);
+		}
+		rport = sin6->sin6_port;
+	} else {
+		/* not supported family type */
+		SCTP_INP_RUNLOCK(inp);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+		*error = EINVAL;
+		return (NULL);
+	}
+	SCTP_INP_RUNLOCK(inp);
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) {
+		/*
+		 * If you have not performed a bind, then we need to do the
+		 * ephemerial bind for you.
+		 */
+		if ((err = sctp_inpcb_bind(inp->sctp_socket,
+		    (struct sockaddr *)NULL,
+		    (struct sctp_ifa *)NULL,
+		    p
+		    ))) {
+			/* bind error, probably perm */
+			*error = err;
+			return (NULL);
+		}
+	}
+	stcb = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_asoc, struct sctp_tcb);
+	if (stcb == NULL) {
+		/* out of memory? */
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOMEM);
+		*error = ENOMEM;
+		return (NULL);
+	}
+	SCTP_INCR_ASOC_COUNT();
+
+	bzero(stcb, sizeof(*stcb));
+	asoc = &stcb->asoc;
+	SCTP_TCB_LOCK_INIT(stcb);
+	SCTP_TCB_SEND_LOCK_INIT(stcb);
+	/* setup back pointer's */
+	stcb->sctp_ep = inp;
+	stcb->sctp_socket = inp->sctp_socket;
+	if ((err = sctp_init_asoc(inp, stcb, for_a_init, override_tag, vrf_id))) {
+		/* failed */
+		SCTP_TCB_LOCK_DESTROY(stcb);
+		SCTP_TCB_SEND_LOCK_DESTROY(stcb);
+		SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_asoc, stcb);
+		SCTP_DECR_ASOC_COUNT();
+		*error = err;
+		return (NULL);
+	}
+	/* and the port */
+	stcb->rport = rport;
+	SCTP_INP_INFO_WLOCK();
+	SCTP_INP_WLOCK(inp);
+	if (inp->sctp_flags & (SCTP_PCB_FLAGS_SOCKET_GONE | SCTP_PCB_FLAGS_SOCKET_ALLGONE)) {
+		/* inpcb freed while alloc going on */
+		SCTP_TCB_LOCK_DESTROY(stcb);
+		SCTP_TCB_SEND_LOCK_DESTROY(stcb);
+		SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_asoc, stcb);
+		SCTP_INP_WUNLOCK(inp);
+		SCTP_INP_INFO_WUNLOCK();
+		SCTP_DECR_ASOC_COUNT();
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+		*error = EINVAL;
+		return (NULL);
+	}
+	SCTP_TCB_LOCK(stcb);
+
+	/* now that my_vtag is set, add it to the hash */
+	head = &sctppcbinfo.sctp_asochash[SCTP_PCBHASH_ASOC(stcb->asoc.my_vtag,
+	    sctppcbinfo.hashasocmark)];
+	/* put it in the bucket in the vtag hash of assoc's for the system */
+	LIST_INSERT_HEAD(head, stcb, sctp_asocs);
+	sctp_delete_from_timewait(stcb->asoc.my_vtag);
+
+	SCTP_INP_INFO_WUNLOCK();
+
+	if ((err = sctp_add_remote_addr(stcb, firstaddr, SCTP_DO_SETSCOPE, SCTP_ALLOC_ASOC))) {
+		/* failure.. memory error? */
+		if (asoc->strmout) {
+			SCTP_FREE(asoc->strmout, SCTP_M_STRMO);
+			asoc->strmout = NULL;
+		}
+		if (asoc->mapping_array) {
+			SCTP_FREE(asoc->mapping_array, SCTP_M_MAP);
+			asoc->mapping_array = NULL;
+		}
+		SCTP_DECR_ASOC_COUNT();
+		SCTP_TCB_LOCK_DESTROY(stcb);
+		SCTP_TCB_SEND_LOCK_DESTROY(stcb);
+		SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_asoc, stcb);
+		SCTP_INP_WUNLOCK(inp);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS);
+		*error = ENOBUFS;
+		return (NULL);
+	}
+	/* Init all the timers */
+	SCTP_OS_TIMER_INIT(&asoc->hb_timer.timer);
+	SCTP_OS_TIMER_INIT(&asoc->dack_timer.timer);
+	SCTP_OS_TIMER_INIT(&asoc->strreset_timer.timer);
+	SCTP_OS_TIMER_INIT(&asoc->asconf_timer.timer);
+	SCTP_OS_TIMER_INIT(&asoc->shut_guard_timer.timer);
+	SCTP_OS_TIMER_INIT(&asoc->autoclose_timer.timer);
+	SCTP_OS_TIMER_INIT(&asoc->delayed_event_timer.timer);
+	SCTP_OS_TIMER_INIT(&asoc->delete_prim_timer.timer);
+
+	LIST_INSERT_HEAD(&inp->sctp_asoc_list, stcb, sctp_tcblist);
+	/* now file the port under the hash as well */
+	if (inp->sctp_tcbhash != NULL) {
+		head = &inp->sctp_tcbhash[SCTP_PCBHASH_ALLADDR(stcb->rport,
+		    inp->sctp_hashmark)];
+		LIST_INSERT_HEAD(head, stcb, sctp_tcbhash);
+	}
+	SCTP_INP_WUNLOCK(inp);
+	SCTPDBG(SCTP_DEBUG_PCB1, "Association %p now allocated\n", stcb);
+	return (stcb);
+}
+
+
+void
+sctp_remove_net(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+	struct sctp_association *asoc;
+
+	asoc = &stcb->asoc;
+	asoc->numnets--;
+	TAILQ_REMOVE(&asoc->nets, net, sctp_next);
+	if (net == asoc->primary_destination) {
+		/* Reset primary */
+		struct sctp_nets *lnet;
+
+		lnet = TAILQ_FIRST(&asoc->nets);
+		/*
+		 * Mobility adaptation Ideally, if deleted destination is
+		 * the primary, it becomes a fast retransmission trigger by
+		 * the subsequent SET PRIMARY. (by micchie)
+		 */
+		if (sctp_is_mobility_feature_on(stcb->sctp_ep,
+		    SCTP_MOBILITY_BASE) ||
+		    sctp_is_mobility_feature_on(stcb->sctp_ep,
+		    SCTP_MOBILITY_FASTHANDOFF)) {
+			SCTPDBG(SCTP_DEBUG_ASCONF1, "remove_net: primary dst is deleting\n");
+			if (asoc->deleted_primary != NULL) {
+				SCTPDBG(SCTP_DEBUG_ASCONF1, "remove_net: deleted primary may be already stored\n");
+				goto leave;
+			}
+			asoc->deleted_primary = net;
+			atomic_add_int(&net->ref_count, 1);
+			memset(&net->lastsa, 0, sizeof(net->lastsa));
+			memset(&net->lastsv, 0, sizeof(net->lastsv));
+			sctp_mobility_feature_on(stcb->sctp_ep,
+			    SCTP_MOBILITY_PRIM_DELETED);
+			sctp_timer_start(SCTP_TIMER_TYPE_PRIM_DELETED,
+			    stcb->sctp_ep, stcb, NULL);
+		}
+leave:
+		/* Try to find a confirmed primary */
+		asoc->primary_destination = sctp_find_alternate_net(stcb, lnet, 0);
+	}
+	if (net == asoc->last_data_chunk_from) {
+		/* Reset primary */
+		asoc->last_data_chunk_from = TAILQ_FIRST(&asoc->nets);
+	}
+	if (net == asoc->last_control_chunk_from) {
+		/* Clear net */
+		asoc->last_control_chunk_from = NULL;
+	}
+	sctp_free_remote_addr(net);
+}
+
+/*
+ * remove a remote endpoint address from an association, it will fail if the
+ * address does not exist.
+ */
+int
+sctp_del_remote_addr(struct sctp_tcb *stcb, struct sockaddr *remaddr)
+{
+	/*
+	 * Here we need to remove a remote address. This is quite simple, we
+	 * first find it in the list of address for the association
+	 * (tasoc->asoc.nets) and then if it is there, we do a LIST_REMOVE
+	 * on that item. Note we do not allow it to be removed if there are
+	 * no other addresses.
+	 */
+	struct sctp_association *asoc;
+	struct sctp_nets *net, *net_tmp;
+
+	asoc = &stcb->asoc;
+
+	/* locate the address */
+	for (net = TAILQ_FIRST(&asoc->nets); net != NULL; net = net_tmp) {
+		net_tmp = TAILQ_NEXT(net, sctp_next);
+		if (net->ro._l_addr.sa.sa_family != remaddr->sa_family) {
+			continue;
+		}
+		if (sctp_cmpaddr((struct sockaddr *)&net->ro._l_addr,
+		    remaddr)) {
+			/* we found the guy */
+			if (asoc->numnets < 2) {
+				/* Must have at LEAST two remote addresses */
+				return (-1);
+			} else {
+				sctp_remove_net(stcb, net);
+				return (0);
+			}
+		}
+	}
+	/* not found. */
+	return (-2);
+}
+
+void
+sctp_delete_from_timewait(uint32_t tag)
+{
+	struct sctpvtaghead *chain;
+	struct sctp_tagblock *twait_block;
+	int found = 0;
+	int i;
+
+	chain = &sctppcbinfo.vtag_timewait[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
+	if (!SCTP_LIST_EMPTY(chain)) {
+		LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
+			for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
+				if (twait_block->vtag_block[i].v_tag == tag) {
+					twait_block->vtag_block[i].tv_sec_at_expire = 0;
+					twait_block->vtag_block[i].v_tag = 0;
+					found = 1;
+					break;
+				}
+			}
+			if (found)
+				break;
+		}
+	}
+}
+
+int
+sctp_is_in_timewait(uint32_t tag)
+{
+	struct sctpvtaghead *chain;
+	struct sctp_tagblock *twait_block;
+	int found = 0;
+	int i;
+
+	chain = &sctppcbinfo.vtag_timewait[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
+	if (!SCTP_LIST_EMPTY(chain)) {
+		LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
+			for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
+				if (twait_block->vtag_block[i].v_tag == tag) {
+					found = 1;
+					break;
+				}
+			}
+			if (found)
+				break;
+		}
+	}
+	return (found);
+}
+
+
+void
+sctp_add_vtag_to_timewait(uint32_t tag, uint32_t time)
+{
+	struct sctpvtaghead *chain;
+	struct sctp_tagblock *twait_block;
+	struct timeval now;
+	int set, i;
+
+	(void)SCTP_GETTIME_TIMEVAL(&now);
+	chain = &sctppcbinfo.vtag_timewait[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
+	set = 0;
+	if (!SCTP_LIST_EMPTY(chain)) {
+		/* Block(s) present, lets find space, and expire on the fly */
+		LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
+			for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
+				if ((twait_block->vtag_block[i].v_tag == 0) &&
+				    !set) {
+					twait_block->vtag_block[i].tv_sec_at_expire =
+					    now.tv_sec + time;
+					twait_block->vtag_block[i].v_tag = tag;
+					set = 1;
+				} else if ((twait_block->vtag_block[i].v_tag) &&
+				    ((long)twait_block->vtag_block[i].tv_sec_at_expire < now.tv_sec)) {
+					/* Audit expires this guy */
+					twait_block->vtag_block[i].tv_sec_at_expire = 0;
+					twait_block->vtag_block[i].v_tag = 0;
+					if (set == 0) {
+						/* Reuse it for my new tag */
+						twait_block->vtag_block[0].tv_sec_at_expire = now.tv_sec + time;
+						twait_block->vtag_block[0].v_tag = tag;
+						set = 1;
+					}
+				}
+			}
+			if (set) {
+				/*
+				 * We only do up to the block where we can
+				 * place our tag for audits
+				 */
+				break;
+			}
+		}
+	}
+	/* Need to add a new block to chain */
+	if (!set) {
+		SCTP_MALLOC(twait_block, struct sctp_tagblock *,
+		    sizeof(struct sctp_tagblock), SCTP_M_TIMW);
+		if (twait_block == NULL) {
+			return;
+		}
+		memset(twait_block, 0, sizeof(struct sctp_tagblock));
+		LIST_INSERT_HEAD(chain, twait_block, sctp_nxt_tagblock);
+		twait_block->vtag_block[0].tv_sec_at_expire = now.tv_sec + time;
+		twait_block->vtag_block[0].v_tag = tag;
+	}
+}
+
+
+static void
+sctp_iterator_asoc_being_freed(struct sctp_inpcb *inp, struct sctp_tcb *stcb)
+{
+	struct sctp_iterator *it;
+
+	/*
+	 * Unlock the tcb lock we do this so we avoid a dead lock scenario
+	 * where the iterator is waiting on the TCB lock and the TCB lock is
+	 * waiting on the iterator lock.
+	 */
+	it = stcb->asoc.stcb_starting_point_for_iterator;
+	if (it == NULL) {
+		return;
+	}
+	if (it->inp != stcb->sctp_ep) {
+		/* hmm, focused on the wrong one? */
+		return;
+	}
+	if (it->stcb != stcb) {
+		return;
+	}
+	it->stcb = LIST_NEXT(stcb, sctp_tcblist);
+	if (it->stcb == NULL) {
+		/* done with all asoc's in this assoc */
+		if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
+			it->inp = NULL;
+		} else {
+			it->inp = LIST_NEXT(inp, sctp_list);
+		}
+	}
+}
+
+
+/*-
+ * Free the association after un-hashing the remote port. This
+ * function ALWAYS returns holding NO LOCK on the stcb. It DOES
+ * expect that the input to this function IS a locked TCB.
+ * It will return 0, if it did NOT destroy the association (instead
+ * it unlocks it. It will return NON-zero if it either destroyed the
+ * association OR the association is already destroyed.
+ */
+int
+sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfree, int from_location)
+{
+	int i;
+	struct sctp_association *asoc;
+	struct sctp_nets *net, *prev;
+	struct sctp_laddr *laddr;
+	struct sctp_tmit_chunk *chk;
+	struct sctp_asconf_addr *aparam;
+	struct sctp_asconf_ack *aack;
+	struct sctp_stream_reset_list *liste;
+	struct sctp_queued_to_read *sq;
+	struct sctp_stream_queue_pending *sp;
+	sctp_sharedkey_t *shared_key;
+	struct socket *so;
+	int ccnt = 0;
+	int cnt = 0;
+
+	/* first, lets purge the entry from the hash table. */
+
+#ifdef SCTP_LOG_CLOSING
+	sctp_log_closing(inp, stcb, 6);
+#endif
+	if (stcb->asoc.state == 0) {
+#ifdef SCTP_LOG_CLOSING
+		sctp_log_closing(inp, NULL, 7);
+#endif
+		/* there is no asoc, really TSNH :-0 */
+		return (1);
+	}
+	/* TEMP CODE */
+	if (stcb->freed_from_where == 0) {
+		/* Only record the first place free happened from */
+		stcb->freed_from_where = from_location;
+	}
+	/* TEMP CODE */
+
+	asoc = &stcb->asoc;
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE))
+		/* nothing around */
+		so = NULL;
+	else
+		so = inp->sctp_socket;
+
+	/*
+	 * We used timer based freeing if a reader or writer is in the way.
+	 * So we first check if we are actually being called from a timer,
+	 * if so we abort early if a reader or writer is still in the way.
+	 */
+	if ((stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) &&
+	    (from_inpcbfree == SCTP_NORMAL_PROC)) {
+		/*
+		 * is it the timer driving us? if so are the reader/writers
+		 * gone?
+		 */
+		if (stcb->asoc.refcnt) {
+			/* nope, reader or writer in the way */
+			sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL);
+			/* no asoc destroyed */
+			SCTP_TCB_UNLOCK(stcb);
+#ifdef SCTP_LOG_CLOSING
+			sctp_log_closing(inp, stcb, 8);
+#endif
+			return (0);
+		}
+	}
+	/* now clean up any other timers */
+	(void)SCTP_OS_TIMER_STOP(&asoc->hb_timer.timer);
+	asoc->hb_timer.self = NULL;
+	(void)SCTP_OS_TIMER_STOP(&asoc->dack_timer.timer);
+	asoc->dack_timer.self = NULL;
+	(void)SCTP_OS_TIMER_STOP(&asoc->strreset_timer.timer);
+	/*-
+	 * For stream reset we don't blast this unless
+	 * it is a str-reset timer, it might be the
+	 * free-asoc timer which we DON'T want to
+	 * disturb.
+	 */
+	if (asoc->strreset_timer.type == SCTP_TIMER_TYPE_STRRESET)
+		asoc->strreset_timer.self = NULL;
+	(void)SCTP_OS_TIMER_STOP(&asoc->asconf_timer.timer);
+	asoc->asconf_timer.self = NULL;
+	(void)SCTP_OS_TIMER_STOP(&asoc->autoclose_timer.timer);
+	asoc->autoclose_timer.self = NULL;
+	(void)SCTP_OS_TIMER_STOP(&asoc->shut_guard_timer.timer);
+	asoc->shut_guard_timer.self = NULL;
+	(void)SCTP_OS_TIMER_STOP(&asoc->delayed_event_timer.timer);
+	asoc->delayed_event_timer.self = NULL;
+	/* Mobility adaptation */
+	(void)SCTP_OS_TIMER_STOP(&asoc->delete_prim_timer.timer);
+	asoc->delete_prim_timer.self = NULL;
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		(void)SCTP_OS_TIMER_STOP(&net->fr_timer.timer);
+		net->fr_timer.self = NULL;
+		(void)SCTP_OS_TIMER_STOP(&net->rxt_timer.timer);
+		net->rxt_timer.self = NULL;
+		(void)SCTP_OS_TIMER_STOP(&net->pmtu_timer.timer);
+		net->pmtu_timer.self = NULL;
+	}
+	/* Now the read queue needs to be cleaned up (only once) */
+	cnt = 0;
+	if ((stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0) {
+		stcb->asoc.state |= SCTP_STATE_ABOUT_TO_BE_FREED;
+		SCTP_INP_READ_LOCK(inp);
+		TAILQ_FOREACH(sq, &inp->read_queue, next) {
+			if (sq->stcb == stcb) {
+				sq->do_not_ref_stcb = 1;
+				sq->sinfo_cumtsn = stcb->asoc.cumulative_tsn;
+				/*
+				 * If there is no end, there never will be
+				 * now.
+				 */
+				if (sq->end_added == 0) {
+					/* Held for PD-API clear that. */
+					sq->pdapi_aborted = 1;
+					sq->held_length = 0;
+					if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PDAPIEVNT) && (so != NULL)) {
+						/*
+						 * Need to add a PD-API
+						 * aborted indication.
+						 * Setting the control_pdapi
+						 * assures that it will be
+						 * added right after this
+						 * msg.
+						 */
+						uint32_t strseq;
+
+						stcb->asoc.control_pdapi = sq;
+						strseq = (sq->sinfo_stream << 16) | sq->sinfo_ssn;
+						sctp_notify_partial_delivery_indication(stcb,
+						    SCTP_PARTIAL_DELIVERY_ABORTED, 1, strseq);
+						stcb->asoc.control_pdapi = NULL;
+					}
+				}
+				/* Add an end to wake them */
+				sq->end_added = 1;
+				cnt++;
+			}
+		}
+		SCTP_INP_READ_UNLOCK(inp);
+		if (stcb->block_entry) {
+			cnt++;
+			SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_PCB, ECONNRESET);
+			stcb->block_entry->error = ECONNRESET;
+			stcb->block_entry = NULL;
+		}
+	}
+	if (stcb->asoc.refcnt) {
+		/*
+		 * reader or writer in the way, we have hopefully given him
+		 * something to chew on above.
+		 */
+		sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL);
+		SCTP_TCB_UNLOCK(stcb);
+		if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+		    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE))
+			/* nothing around */
+			so = NULL;
+		if (so) {
+			/* Wake any reader/writers */
+			sctp_sorwakeup(inp, so);
+			sctp_sowwakeup(inp, so);
+		}
+#ifdef SCTP_LOG_CLOSING
+		sctp_log_closing(inp, stcb, 9);
+#endif
+		/* no asoc destroyed */
+		return (0);
+	}
+#ifdef SCTP_LOG_CLOSING
+	sctp_log_closing(inp, stcb, 10);
+#endif
+	/*
+	 * When I reach here, no others want to kill the assoc yet.. and I
+	 * own the lock. Now its possible an abort comes in when I do the
+	 * lock exchange below to grab all the locks to do the final take
+	 * out. to prevent this we increment the count, which will start a
+	 * timer and blow out above thus assuring us that we hold exclusive
+	 * killing of the asoc. Note that after getting back the TCB lock we
+	 * will go ahead and increment the counter back up and stop any
+	 * timer a passing stranger may have started :-S
+	 */
+	if (from_inpcbfree == SCTP_NORMAL_PROC) {
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+
+		SCTP_TCB_UNLOCK(stcb);
+
+		SCTP_ITERATOR_LOCK();
+		SCTP_INP_INFO_WLOCK();
+		SCTP_INP_WLOCK(inp);
+		SCTP_TCB_LOCK(stcb);
+	}
+	/* Double check the GONE flag */
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE))
+		/* nothing around */
+		so = NULL;
+
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+		/*
+		 * For TCP type we need special handling when we are
+		 * connected. We also include the peel'ed off ones to.
+		 */
+		if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) {
+			inp->sctp_flags &= ~SCTP_PCB_FLAGS_CONNECTED;
+			inp->sctp_flags |= SCTP_PCB_FLAGS_WAS_CONNECTED;
+			if (so) {
+				SOCK_LOCK(so);
+				if (so->so_rcv.sb_cc == 0) {
+					so->so_state &= ~(SS_ISCONNECTING |
+					    SS_ISDISCONNECTING |
+					    SS_ISCONFIRMING |
+					    SS_ISCONNECTED);
+				}
+				SOCK_UNLOCK(so);
+				socantrcvmore(so);
+				sctp_sowwakeup(inp, so);
+				sctp_sorwakeup(inp, so);
+				SCTP_SOWAKEUP(so);
+			}
+		}
+	}
+	/*
+	 * Make it invalid too, that way if its about to run it will abort
+	 * and return.
+	 */
+	sctp_iterator_asoc_being_freed(inp, stcb);
+	/* re-increment the lock */
+	if (from_inpcbfree == SCTP_NORMAL_PROC) {
+		atomic_add_int(&stcb->asoc.refcnt, -1);
+	}
+	asoc->state = 0;
+	if (inp->sctp_tcbhash) {
+		LIST_REMOVE(stcb, sctp_tcbhash);
+	}
+	if (stcb->asoc.in_restart_hash) {
+		LIST_REMOVE(stcb, sctp_tcbrestarhash);
+	}
+	/* Now lets remove it from the list of ALL associations in the EP */
+	LIST_REMOVE(stcb, sctp_tcblist);
+	if (from_inpcbfree == SCTP_NORMAL_PROC) {
+		SCTP_INP_INCR_REF(inp);
+		SCTP_INP_WUNLOCK(inp);
+		SCTP_ITERATOR_UNLOCK();
+	}
+	/* pull from vtag hash */
+	LIST_REMOVE(stcb, sctp_asocs);
+	sctp_add_vtag_to_timewait(asoc->my_vtag, SCTP_TIME_WAIT);
+
+	/*
+	 * Now restop the timers to be sure - this is paranoia at is finest!
+	 */
+	(void)SCTP_OS_TIMER_STOP(&asoc->strreset_timer.timer);
+	(void)SCTP_OS_TIMER_STOP(&asoc->hb_timer.timer);
+	(void)SCTP_OS_TIMER_STOP(&asoc->dack_timer.timer);
+	(void)SCTP_OS_TIMER_STOP(&asoc->strreset_timer.timer);
+	(void)SCTP_OS_TIMER_STOP(&asoc->asconf_timer.timer);
+	(void)SCTP_OS_TIMER_STOP(&asoc->shut_guard_timer.timer);
+	(void)SCTP_OS_TIMER_STOP(&asoc->autoclose_timer.timer);
+	(void)SCTP_OS_TIMER_STOP(&asoc->delayed_event_timer.timer);
+	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+		(void)SCTP_OS_TIMER_STOP(&net->fr_timer.timer);
+		(void)SCTP_OS_TIMER_STOP(&net->rxt_timer.timer);
+		(void)SCTP_OS_TIMER_STOP(&net->pmtu_timer.timer);
+	}
+
+	asoc->strreset_timer.type = SCTP_TIMER_TYPE_NONE;
+	prev = NULL;
+	/*
+	 * The chunk lists and such SHOULD be empty but we check them just
+	 * in case.
+	 */
+	/* anything on the wheel needs to be removed */
+	for (i = 0; i < asoc->streamoutcnt; i++) {
+		struct sctp_stream_out *outs;
+
+		outs = &asoc->strmout[i];
+		/* now clean up any chunks here */
+		sp = TAILQ_FIRST(&outs->outqueue);
+		while (sp) {
+			TAILQ_REMOVE(&outs->outqueue, sp, next);
+			if (sp->data) {
+				sctp_m_freem(sp->data);
+				sp->data = NULL;
+				sp->tail_mbuf = NULL;
+			}
+			sctp_free_remote_addr(sp->net);
+			sctp_free_spbufspace(stcb, asoc, sp);
+			/* Free the zone stuff  */
+			SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_strmoq, sp);
+			SCTP_DECR_STRMOQ_COUNT();
+			/* sa_ignore FREED_MEMORY */
+			sp = TAILQ_FIRST(&outs->outqueue);
+		}
+	}
+
+	/* sa_ignore FREED_MEMORY */
+	while ((liste = TAILQ_FIRST(&asoc->resetHead)) != NULL) {
+		TAILQ_REMOVE(&asoc->resetHead, liste, next_resp);
+		SCTP_FREE(liste, SCTP_M_STRESET);
+	}
+
+	sq = TAILQ_FIRST(&asoc->pending_reply_queue);
+	while (sq) {
+		TAILQ_REMOVE(&asoc->pending_reply_queue, sq, next);
+		if (sq->data) {
+			sctp_m_freem(sq->data);
+			sq->data = NULL;
+		}
+		sctp_free_remote_addr(sq->whoFrom);
+		sq->whoFrom = NULL;
+		sq->stcb = NULL;
+		/* Free the ctl entry */
+		SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_readq, sq);
+		SCTP_DECR_READQ_COUNT();
+		/* sa_ignore FREED_MEMORY */
+		sq = TAILQ_FIRST(&asoc->pending_reply_queue);
+	}
+
+	chk = TAILQ_FIRST(&asoc->free_chunks);
+	while (chk) {
+		TAILQ_REMOVE(&asoc->free_chunks, chk, sctp_next);
+		if (chk->data) {
+			sctp_m_freem(chk->data);
+			chk->data = NULL;
+		}
+		ccnt++;
+		SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_chunk, chk);
+		SCTP_DECR_CHK_COUNT();
+		atomic_subtract_int(&sctppcbinfo.ipi_free_chunks, 1);
+		asoc->free_chunk_cnt--;
+		/* sa_ignore FREED_MEMORY */
+		chk = TAILQ_FIRST(&asoc->free_chunks);
+	}
+	/* pending send queue SHOULD be empty */
+	if (!TAILQ_EMPTY(&asoc->send_queue)) {
+		chk = TAILQ_FIRST(&asoc->send_queue);
+		while (chk) {
+			TAILQ_REMOVE(&asoc->send_queue, chk, sctp_next);
+			if (chk->data) {
+				sctp_m_freem(chk->data);
+				chk->data = NULL;
+			}
+			ccnt++;
+			sctp_free_remote_addr(chk->whoTo);
+			SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_chunk, chk);
+			SCTP_DECR_CHK_COUNT();
+			/* sa_ignore FREED_MEMORY */
+			chk = TAILQ_FIRST(&asoc->send_queue);
+		}
+	}
+/*
+  if(ccnt) {
+  printf("Freed %d from send_queue\n", ccnt);
+  ccnt = 0;
+  }
+*/
+	/* sent queue SHOULD be empty */
+	if (!TAILQ_EMPTY(&asoc->sent_queue)) {
+		chk = TAILQ_FIRST(&asoc->sent_queue);
+		while (chk) {
+			TAILQ_REMOVE(&asoc->sent_queue, chk, sctp_next);
+			if (chk->data) {
+				sctp_m_freem(chk->data);
+				chk->data = NULL;
+			}
+			ccnt++;
+			sctp_free_remote_addr(chk->whoTo);
+			SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_chunk, chk);
+			SCTP_DECR_CHK_COUNT();
+			/* sa_ignore FREED_MEMORY */
+			chk = TAILQ_FIRST(&asoc->sent_queue);
+		}
+	}
+/*
+  if(ccnt) {
+  printf("Freed %d from sent_queue\n", ccnt);
+  ccnt = 0;
+  }
+*/
+	/* control queue MAY not be empty */
+	if (!TAILQ_EMPTY(&asoc->control_send_queue)) {
+		chk = TAILQ_FIRST(&asoc->control_send_queue);
+		while (chk) {
+			TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next);
+			if (chk->data) {
+				sctp_m_freem(chk->data);
+				chk->data = NULL;
+			}
+			ccnt++;
+			sctp_free_remote_addr(chk->whoTo);
+			SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_chunk, chk);
+			SCTP_DECR_CHK_COUNT();
+			/* sa_ignore FREED_MEMORY */
+			chk = TAILQ_FIRST(&asoc->control_send_queue);
+		}
+	}
+/*
+  if(ccnt) {
+  printf("Freed %d from ctrl_queue\n", ccnt);
+  ccnt = 0;
+  }
+*/
+	if (!TAILQ_EMPTY(&asoc->reasmqueue)) {
+		chk = TAILQ_FIRST(&asoc->reasmqueue);
+		while (chk) {
+			TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next);
+			if (chk->data) {
+				sctp_m_freem(chk->data);
+				chk->data = NULL;
+			}
+			sctp_free_remote_addr(chk->whoTo);
+			ccnt++;
+			SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_chunk, chk);
+			SCTP_DECR_CHK_COUNT();
+			/* sa_ignore FREED_MEMORY */
+			chk = TAILQ_FIRST(&asoc->reasmqueue);
+		}
+	}
+/*
+  if(ccnt) {
+  printf("Freed %d from reasm_queue\n", ccnt);
+  ccnt = 0;
+  }
+*/
+	if (asoc->mapping_array) {
+		SCTP_FREE(asoc->mapping_array, SCTP_M_MAP);
+		asoc->mapping_array = NULL;
+	}
+	/* the stream outs */
+	if (asoc->strmout) {
+		SCTP_FREE(asoc->strmout, SCTP_M_STRMO);
+		asoc->strmout = NULL;
+	}
+	asoc->streamoutcnt = 0;
+	if (asoc->strmin) {
+		struct sctp_queued_to_read *ctl;
+
+		for (i = 0; i < asoc->streamincnt; i++) {
+			if (!TAILQ_EMPTY(&asoc->strmin[i].inqueue)) {
+				/* We have somethings on the streamin queue */
+				ctl = TAILQ_FIRST(&asoc->strmin[i].inqueue);
+				while (ctl) {
+					TAILQ_REMOVE(&asoc->strmin[i].inqueue,
+					    ctl, next);
+					sctp_free_remote_addr(ctl->whoFrom);
+					if (ctl->data) {
+						sctp_m_freem(ctl->data);
+						ctl->data = NULL;
+					}
+					/*
+					 * We don't free the address here
+					 * since all the net's were freed
+					 * above.
+					 */
+					SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_readq, ctl);
+					SCTP_DECR_READQ_COUNT();
+					ctl = TAILQ_FIRST(&asoc->strmin[i].inqueue);
+				}
+			}
+		}
+		SCTP_FREE(asoc->strmin, SCTP_M_STRMI);
+		asoc->strmin = NULL;
+	}
+	asoc->streamincnt = 0;
+	while (!TAILQ_EMPTY(&asoc->nets)) {
+		/* sa_ignore FREED_MEMORY */
+		net = TAILQ_FIRST(&asoc->nets);
+		/* pull from list */
+		if ((sctppcbinfo.ipi_count_raddr == 0) || (prev == net)) {
+#ifdef INVARIANTS
+			panic("no net's left alloc'ed, or list points to itself");
+#endif
+			break;
+		}
+		prev = net;
+		TAILQ_REMOVE(&asoc->nets, net, sctp_next);
+		sctp_free_remote_addr(net);
+	}
+
+	while (!SCTP_LIST_EMPTY(&asoc->sctp_restricted_addrs)) {
+		/* sa_ignore FREED_MEMORY */
+		laddr = LIST_FIRST(&asoc->sctp_restricted_addrs);
+		sctp_remove_laddr(laddr);
+	}
+
+	/* pending asconf (address) parameters */
+	while (!TAILQ_EMPTY(&asoc->asconf_queue)) {
+		/* sa_ignore FREED_MEMORY */
+		aparam = TAILQ_FIRST(&asoc->asconf_queue);
+		TAILQ_REMOVE(&asoc->asconf_queue, aparam, next);
+		SCTP_FREE(aparam, SCTP_M_ASC_ADDR);
+	}
+	while (!TAILQ_EMPTY(&asoc->asconf_ack_sent)) {
+		/* sa_ignore FREED_MEMORY */
+		aack = TAILQ_FIRST(&asoc->asconf_ack_sent);
+		TAILQ_REMOVE(&asoc->asconf_ack_sent, aack, next);
+		if (aack->data != NULL) {
+			sctp_m_freem(aack->data);
+		}
+		SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_asconf_ack, aack);
+	}
+	/* clean up auth stuff */
+	if (asoc->local_hmacs)
+		sctp_free_hmaclist(asoc->local_hmacs);
+	if (asoc->peer_hmacs)
+		sctp_free_hmaclist(asoc->peer_hmacs);
+
+	if (asoc->local_auth_chunks)
+		sctp_free_chunklist(asoc->local_auth_chunks);
+	if (asoc->peer_auth_chunks)
+		sctp_free_chunklist(asoc->peer_auth_chunks);
+
+	sctp_free_authinfo(&asoc->authinfo);
+
+	shared_key = LIST_FIRST(&asoc->shared_keys);
+	while (shared_key) {
+		LIST_REMOVE(shared_key, next);
+		sctp_free_sharedkey(shared_key);
+		/* sa_ignore FREED_MEMORY */
+		shared_key = LIST_FIRST(&asoc->shared_keys);
+	}
+
+	/* Insert new items here :> */
+
+	/* Get rid of LOCK */
+	SCTP_TCB_LOCK_DESTROY(stcb);
+	SCTP_TCB_SEND_LOCK_DESTROY(stcb);
+	if (from_inpcbfree == SCTP_NORMAL_PROC) {
+		SCTP_INP_INFO_WUNLOCK();
+		SCTP_INP_RLOCK(inp);
+	}
+#ifdef SCTP_TRACK_FREED_ASOCS
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+		/* now clean up the tasoc itself */
+		SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_asoc, stcb);
+		SCTP_DECR_ASOC_COUNT();
+	} else {
+		LIST_INSERT_HEAD(&inp->sctp_asoc_free_list, stcb, sctp_tcblist);
+	}
+#else
+	SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_asoc, stcb);
+	SCTP_DECR_ASOC_COUNT();
+#endif
+	if (from_inpcbfree == SCTP_NORMAL_PROC) {
+		if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+			/*
+			 * If its NOT the inp_free calling us AND sctp_close
+			 * as been called, we call back...
+			 */
+			SCTP_INP_RUNLOCK(inp);
+			/*
+			 * This will start the kill timer (if we are the
+			 * lastone) since we hold an increment yet. But this
+			 * is the only safe way to do this since otherwise
+			 * if the socket closes at the same time we are here
+			 * we might collide in the cleanup.
+			 */
+			sctp_inpcb_free(inp,
+			    SCTP_FREE_SHOULD_USE_GRACEFUL_CLOSE,
+			    SCTP_CALLED_DIRECTLY_NOCMPSET);
+			SCTP_INP_DECR_REF(inp);
+			goto out_of;
+		} else {
+			/* The socket is still open. */
+			SCTP_INP_DECR_REF(inp);
+		}
+	}
+	if (from_inpcbfree == SCTP_NORMAL_PROC) {
+		SCTP_INP_RUNLOCK(inp);
+	}
+out_of:
+	/* destroyed the asoc */
+#ifdef SCTP_LOG_CLOSING
+	sctp_log_closing(inp, NULL, 11);
+#endif
+	return (1);
+}
+
+
+
+/*
+ * determine if a destination is "reachable" based upon the addresses bound
+ * to the current endpoint (e.g. only v4 or v6 currently bound)
+ */
+/*
+ * FIX: if we allow assoc-level bindx(), then this needs to be fixed to use
+ * assoc level v4/v6 flags, as the assoc *may* not have the same address
+ * types bound as its endpoint
+ */
+int
+sctp_destination_is_reachable(struct sctp_tcb *stcb, struct sockaddr *destaddr)
+{
+	struct sctp_inpcb *inp;
+	int answer;
+
+	/*
+	 * No locks here, the TCB, in all cases is already locked and an
+	 * assoc is up. There is either a INP lock by the caller applied (in
+	 * asconf case when deleting an address) or NOT in the HB case,
+	 * however if HB then the INP increment is up and the INP will not
+	 * be removed (on top of the fact that we have a TCB lock). So we
+	 * only want to read the sctp_flags, which is either bound-all or
+	 * not.. no protection needed since once an assoc is up you can't be
+	 * changing your binding.
+	 */
+	inp = stcb->sctp_ep;
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		/* if bound all, destination is not restricted */
+		/*
+		 * RRS: Question during lock work: Is this correct? If you
+		 * are bound-all you still might need to obey the V4--V6
+		 * flags??? IMO this bound-all stuff needs to be removed!
+		 */
+		return (1);
+	}
+	/* NOTE: all "scope" checks are done when local addresses are added */
+	if (destaddr->sa_family == AF_INET6) {
+		answer = inp->ip_inp.inp.inp_vflag & INP_IPV6;
+	} else if (destaddr->sa_family == AF_INET) {
+		answer = inp->ip_inp.inp.inp_vflag & INP_IPV4;
+	} else {
+		/* invalid family, so it's unreachable */
+		answer = 0;
+	}
+	return (answer);
+}
+
+/*
+ * update the inp_vflags on an endpoint
+ */
+static void
+sctp_update_ep_vflag(struct sctp_inpcb *inp)
+{
+	struct sctp_laddr *laddr;
+
+	/* first clear the flag */
+	inp->ip_inp.inp.inp_vflag = 0;
+	/* set the flag based on addresses on the ep list */
+	LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+		if (laddr->ifa == NULL) {
+			SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n",
+			    __FUNCTION__);
+			continue;
+		}
+		if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) {
+			continue;
+		}
+		if (laddr->ifa->address.sa.sa_family == AF_INET6) {
+			inp->ip_inp.inp.inp_vflag |= INP_IPV6;
+		} else if (laddr->ifa->address.sa.sa_family == AF_INET) {
+			inp->ip_inp.inp.inp_vflag |= INP_IPV4;
+		}
+	}
+}
+
+/*
+ * Add the address to the endpoint local address list There is nothing to be
+ * done if we are bound to all addresses
+ */
+void
+sctp_add_local_addr_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa, uint32_t action)
+{
+	struct sctp_laddr *laddr;
+	int fnd, error = 0;
+
+	fnd = 0;
+
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		/* You are already bound to all. You have it already */
+		return;
+	}
+	if (ifa->address.sa.sa_family == AF_INET6) {
+		if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
+			/* Can't bind a non-useable addr. */
+			return;
+		}
+	}
+	/* first, is it already present? */
+	LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+		if (laddr->ifa == ifa) {
+			fnd = 1;
+			break;
+		}
+	}
+
+	if (fnd == 0) {
+		/* Not in the ep list */
+		error = sctp_insert_laddr(&inp->sctp_addr_list, ifa, action);
+		if (error != 0)
+			return;
+		inp->laddr_count++;
+		/* update inp_vflag flags */
+		if (ifa->address.sa.sa_family == AF_INET6) {
+			inp->ip_inp.inp.inp_vflag |= INP_IPV6;
+		} else if (ifa->address.sa.sa_family == AF_INET) {
+			inp->ip_inp.inp.inp_vflag |= INP_IPV4;
+		}
+	}
+	return;
+}
+
+
+/*
+ * select a new (hopefully reachable) destination net (should only be used
+ * when we deleted an ep addr that is the only usable source address to reach
+ * the destination net)
+ */
+static void
+sctp_select_primary_destination(struct sctp_tcb *stcb)
+{
+	struct sctp_nets *net;
+
+	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+		/* for now, we'll just pick the first reachable one we find */
+		if (net->dest_state & SCTP_ADDR_UNCONFIRMED)
+			continue;
+		if (sctp_destination_is_reachable(stcb,
+		    (struct sockaddr *)&net->ro._l_addr)) {
+			/* found a reachable destination */
+			stcb->asoc.primary_destination = net;
+		}
+	}
+	/* I can't there from here! ...we're gonna die shortly... */
+}
+
+
+/*
+ * Delete the address from the endpoint local address list There is nothing
+ * to be done if we are bound to all addresses
+ */
+void
+sctp_del_local_addr_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa)
+{
+	struct sctp_laddr *laddr;
+	int fnd;
+
+	fnd = 0;
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		/* You are already bound to all. You have it already */
+		return;
+	}
+	LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+		if (laddr->ifa == ifa) {
+			fnd = 1;
+			break;
+		}
+	}
+	if (fnd && (inp->laddr_count < 2)) {
+		/* can't delete unless there are at LEAST 2 addresses */
+		return;
+	}
+	if (fnd) {
+		/*
+		 * clean up any use of this address go through our
+		 * associations and clear any last_used_address that match
+		 * this one for each assoc, see if a new primary_destination
+		 * is needed
+		 */
+		struct sctp_tcb *stcb;
+
+		/* clean up "next_addr_touse" */
+		if (inp->next_addr_touse == laddr)
+			/* delete this address */
+			inp->next_addr_touse = NULL;
+
+		/* clean up "last_used_address" */
+		LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+			struct sctp_nets *net;
+
+			SCTP_TCB_LOCK(stcb);
+			if (stcb->asoc.last_used_address == laddr)
+				/* delete this address */
+				stcb->asoc.last_used_address = NULL;
+			/*
+			 * Now spin through all the nets and purge any ref
+			 * to laddr
+			 */
+			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+				if (net->ro._s_addr &&
+				    (net->ro._s_addr->ifa == laddr->ifa)) {
+					/* Yep, purge src address selected */
+					sctp_rtentry_t *rt;
+
+					/* delete this address if cached */
+					rt = net->ro.ro_rt;
+					if (rt != NULL) {
+						RTFREE(rt);
+						net->ro.ro_rt = NULL;
+					}
+					sctp_free_ifa(net->ro._s_addr);
+					net->ro._s_addr = NULL;
+					net->src_addr_selected = 0;
+				}
+			}
+			SCTP_TCB_UNLOCK(stcb);
+		}		/* for each tcb */
+		/* remove it from the ep list */
+		sctp_remove_laddr(laddr);
+		inp->laddr_count--;
+		/* update inp_vflag flags */
+		sctp_update_ep_vflag(inp);
+	}
+	return;
+}
+
+/*
+ * Add the address to the TCB local address restricted list.
+ * This is a "pending" address list (eg. addresses waiting for an
+ * ASCONF-ACK response) and cannot be used as a valid source address.
+ */
+void
+sctp_add_local_addr_restricted(struct sctp_tcb *stcb, struct sctp_ifa *ifa)
+{
+	struct sctp_inpcb *inp;
+	struct sctp_laddr *laddr;
+	struct sctpladdr *list;
+
+	/*
+	 * Assumes TCB is locked.. and possibly the INP. May need to
+	 * confirm/fix that if we need it and is not the case.
+	 */
+	list = &stcb->asoc.sctp_restricted_addrs;
+
+	inp = stcb->sctp_ep;
+	if (ifa->address.sa.sa_family == AF_INET6) {
+		if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
+			/* Can't bind a non-existent addr. */
+			return;
+		}
+	}
+	/* does the address already exist? */
+	LIST_FOREACH(laddr, list, sctp_nxt_addr) {
+		if (laddr->ifa == ifa) {
+			return;
+		}
+	}
+
+	/* add to the list */
+	(void)sctp_insert_laddr(list, ifa, 0);
+	return;
+}
+
+/*
+ * insert an laddr entry with the given ifa for the desired list
+ */
+int
+sctp_insert_laddr(struct sctpladdr *list, struct sctp_ifa *ifa, uint32_t act)
+{
+	struct sctp_laddr *laddr;
+
+	laddr = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_laddr, struct sctp_laddr);
+	if (laddr == NULL) {
+		/* out of memory? */
+		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+		return (EINVAL);
+	}
+	SCTP_INCR_LADDR_COUNT();
+	bzero(laddr, sizeof(*laddr));
+	(void)SCTP_GETTIME_TIMEVAL(&laddr->start_time);
+	laddr->ifa = ifa;
+	laddr->action = act;
+	atomic_add_int(&ifa->refcount, 1);
+	/* insert it */
+	LIST_INSERT_HEAD(list, laddr, sctp_nxt_addr);
+
+	return (0);
+}
+
+/*
+ * Remove an laddr entry from the local address list (on an assoc)
+ */
+void
+sctp_remove_laddr(struct sctp_laddr *laddr)
+{
+
+	/* remove from the list */
+	LIST_REMOVE(laddr, sctp_nxt_addr);
+	sctp_free_ifa(laddr->ifa);
+	SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_laddr, laddr);
+	SCTP_DECR_LADDR_COUNT();
+}
+
+/*
+ * Remove a local address from the TCB local address restricted list
+ */
+void
+sctp_del_local_addr_restricted(struct sctp_tcb *stcb, struct sctp_ifa *ifa)
+{
+	struct sctp_inpcb *inp;
+	struct sctp_laddr *laddr;
+
+	/*
+	 * This is called by asconf work. It is assumed that a) The TCB is
+	 * locked and b) The INP is locked. This is true in as much as I can
+	 * trace through the entry asconf code where I did these locks.
+	 * Again, the ASCONF code is a bit different in that it does lock
+	 * the INP during its work often times. This must be since we don't
+	 * want other proc's looking up things while what they are looking
+	 * up is changing :-D
+	 */
+
+	inp = stcb->sctp_ep;
+	/* if subset bound and don't allow ASCONF's, can't delete last */
+	if (((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) &&
+	    sctp_is_feature_off(inp, SCTP_PCB_FLAGS_DO_ASCONF)) {
+		if (stcb->sctp_ep->laddr_count < 2) {
+			/* can't delete last address */
+			return;
+		}
+	}
+	LIST_FOREACH(laddr, &stcb->asoc.sctp_restricted_addrs, sctp_nxt_addr) {
+		/* remove the address if it exists */
+		if (laddr->ifa == NULL)
+			continue;
+		if (laddr->ifa == ifa) {
+			sctp_remove_laddr(laddr);
+			return;
+		}
+	}
+
+	/* address not found! */
+	return;
+}
+
+static char sctp_pcb_initialized = 0;
+
+/*
+ * Temporarily remove for __APPLE__ until we use the Tiger equivalents
+ */
+/* sysctl */
+static int sctp_max_number_of_assoc = SCTP_MAX_NUM_OF_ASOC;
+static int sctp_scale_up_for_address = SCTP_SCALE_FOR_ADDR;
+
+void
+sctp_pcb_init()
+{
+	/*
+	 * SCTP initialization for the PCB structures should be called by
+	 * the sctp_init() funciton.
+	 */
+	int i;
+	struct timeval tv;
+
+	if (sctp_pcb_initialized != 0) {
+		/* error I was called twice */
+		return;
+	}
+	sctp_pcb_initialized = 1;
+
+	bzero(&sctpstat, sizeof(struct sctpstat));
+#if defined(SCTP_LOCAL_TRACE_BUF)
+	bzero(&sctp_log, sizeof(struct sctp_log));
+#endif
+	(void)SCTP_GETTIME_TIMEVAL(&tv);
+	sctpstat.sctps_discontinuitytime.tv_sec = (uint32_t) tv.tv_sec;
+	sctpstat.sctps_discontinuitytime.tv_usec = (uint32_t) tv.tv_usec;
+	/* init the empty list of (All) Endpoints */
+	LIST_INIT(&sctppcbinfo.listhead);
+
+	/* init the iterator head */
+	TAILQ_INIT(&sctppcbinfo.iteratorhead);
+
+	/* init the hash table of endpoints */
+	TUNABLE_INT_FETCH("net.inet.sctp.tcbhashsize", &sctp_hashtblsize);
+	TUNABLE_INT_FETCH("net.inet.sctp.pcbhashsize", &sctp_pcbtblsize);
+	TUNABLE_INT_FETCH("net.inet.sctp.chunkscale", &sctp_chunkscale);
+	sctppcbinfo.sctp_asochash = SCTP_HASH_INIT((sctp_hashtblsize * 31),
+	    &sctppcbinfo.hashasocmark);
+	sctppcbinfo.sctp_ephash = SCTP_HASH_INIT(sctp_hashtblsize,
+	    &sctppcbinfo.hashmark);
+	sctppcbinfo.sctp_tcpephash = SCTP_HASH_INIT(sctp_hashtblsize,
+	    &sctppcbinfo.hashtcpmark);
+	sctppcbinfo.hashtblsize = sctp_hashtblsize;
+
+	/* init the small hash table we use to track restarted asoc's */
+	sctppcbinfo.sctp_restarthash = SCTP_HASH_INIT(SCTP_STACK_VTAG_HASH_SIZE,
+	    &sctppcbinfo.hashrestartmark);
+
+
+	sctppcbinfo.sctp_vrfhash = SCTP_HASH_INIT(SCTP_SIZE_OF_VRF_HASH,
+	    &sctppcbinfo.hashvrfmark);
+
+	sctppcbinfo.vrf_ifn_hash = SCTP_HASH_INIT(SCTP_VRF_IFN_HASH_SIZE,
+	    &sctppcbinfo.vrf_ifn_hashmark);
+
+	/* init the zones */
+	/*
+	 * FIX ME: Should check for NULL returns, but if it does fail we are
+	 * doomed to panic anyways... add later maybe.
+	 */
+	SCTP_ZONE_INIT(sctppcbinfo.ipi_zone_ep, "sctp_ep",
+	    sizeof(struct sctp_inpcb), maxsockets);
+
+	SCTP_ZONE_INIT(sctppcbinfo.ipi_zone_asoc, "sctp_asoc",
+	    sizeof(struct sctp_tcb), sctp_max_number_of_assoc);
+
+	SCTP_ZONE_INIT(sctppcbinfo.ipi_zone_laddr, "sctp_laddr",
+	    sizeof(struct sctp_laddr),
+	    (sctp_max_number_of_assoc * sctp_scale_up_for_address));
+
+	SCTP_ZONE_INIT(sctppcbinfo.ipi_zone_net, "sctp_raddr",
+	    sizeof(struct sctp_nets),
+	    (sctp_max_number_of_assoc * sctp_scale_up_for_address));
+
+	SCTP_ZONE_INIT(sctppcbinfo.ipi_zone_chunk, "sctp_chunk",
+	    sizeof(struct sctp_tmit_chunk),
+	    (sctp_max_number_of_assoc * sctp_chunkscale));
+
+	SCTP_ZONE_INIT(sctppcbinfo.ipi_zone_readq, "sctp_readq",
+	    sizeof(struct sctp_queued_to_read),
+	    (sctp_max_number_of_assoc * sctp_chunkscale));
+
+	SCTP_ZONE_INIT(sctppcbinfo.ipi_zone_strmoq, "sctp_stream_msg_out",
+	    sizeof(struct sctp_stream_queue_pending),
+	    (sctp_max_number_of_assoc * sctp_chunkscale));
+
+	SCTP_ZONE_INIT(sctppcbinfo.ipi_zone_asconf_ack, "sctp_asconf_ack",
+	    sizeof(struct sctp_asconf_ack),
+	    (sctp_max_number_of_assoc * sctp_chunkscale));
+
+
+	/* Master Lock INIT for info structure */
+	SCTP_INP_INFO_LOCK_INIT();
+	SCTP_STATLOG_INIT_LOCK();
+	SCTP_ITERATOR_LOCK_INIT();
+
+	SCTP_IPI_COUNT_INIT();
+	SCTP_IPI_ADDR_INIT();
+	SCTP_IPI_ITERATOR_WQ_INIT();
+#ifdef SCTP_PACKET_LOGGING
+	SCTP_IP_PKTLOG_INIT();
+#endif
+	LIST_INIT(&sctppcbinfo.addr_wq);
+
+	/* not sure if we need all the counts */
+	sctppcbinfo.ipi_count_ep = 0;
+	/* assoc/tcb zone info */
+	sctppcbinfo.ipi_count_asoc = 0;
+	/* local addrlist zone info */
+	sctppcbinfo.ipi_count_laddr = 0;
+	/* remote addrlist zone info */
+	sctppcbinfo.ipi_count_raddr = 0;
+	/* chunk info */
+	sctppcbinfo.ipi_count_chunk = 0;
+
+	/* socket queue zone info */
+	sctppcbinfo.ipi_count_readq = 0;
+
+	/* stream out queue cont */
+	sctppcbinfo.ipi_count_strmoq = 0;
+
+	sctppcbinfo.ipi_free_strmoq = 0;
+	sctppcbinfo.ipi_free_chunks = 0;
+
+	SCTP_OS_TIMER_INIT(&sctppcbinfo.addr_wq_timer.timer);
+
+	/* Init the TIMEWAIT list */
+	for (i = 0; i < SCTP_STACK_VTAG_HASH_SIZE_A; i++) {
+		LIST_INIT(&sctppcbinfo.vtag_timewait[i]);
+	}
+
+#if defined(SCTP_USE_THREAD_BASED_ITERATOR)
+	sctppcbinfo.iterator_running = 0;
+	sctp_startup_iterator();
+#endif
+
+	/*
+	 * INIT the default VRF which for BSD is the only one, other O/S's
+	 * may have more. But initially they must start with one and then
+	 * add the VRF's as addresses are added.
+	 */
+	sctp_init_vrf_list(SCTP_DEFAULT_VRF);
+
+}
+
+
+int
+sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
+    int iphlen, int offset, int limit, struct sctphdr *sh,
+    struct sockaddr *altsa)
+{
+	/*
+	 * grub through the INIT pulling addresses and loading them to the
+	 * nets structure in the asoc. The from address in the mbuf should
+	 * also be loaded (if it is not already). This routine can be called
+	 * with either INIT or INIT-ACK's as long as the m points to the IP
+	 * packet and the offset points to the beginning of the parameters.
+	 */
+	struct sctp_inpcb *inp, *l_inp;
+	struct sctp_nets *net, *net_tmp;
+	struct ip *iph;
+	struct sctp_paramhdr *phdr, parm_buf;
+	struct sctp_tcb *stcb_tmp;
+	uint16_t ptype, plen;
+	struct sockaddr *sa;
+	struct sockaddr_storage dest_store;
+	struct sockaddr *local_sa = (struct sockaddr *)&dest_store;
+	struct sockaddr_in sin;
+	struct sockaddr_in6 sin6;
+	uint8_t random_store[SCTP_PARAM_BUFFER_SIZE];
+	struct sctp_auth_random *p_random = NULL;
+	uint16_t random_len = 0;
+	uint8_t hmacs_store[SCTP_PARAM_BUFFER_SIZE];
+	struct sctp_auth_hmac_algo *hmacs = NULL;
+	uint16_t hmacs_len = 0;
+	uint8_t saw_asconf = 0;
+	uint8_t saw_asconf_ack = 0;
+	uint8_t chunks_store[SCTP_PARAM_BUFFER_SIZE];
+	struct sctp_auth_chunk_list *chunks = NULL;
+	uint16_t num_chunks = 0;
+	sctp_key_t *new_key;
+	uint32_t keylen;
+	int got_random = 0, got_hmacs = 0, got_chklist = 0;
+
+	/* First get the destination address setup too. */
+	memset(&sin, 0, sizeof(sin));
+	memset(&sin6, 0, sizeof(sin6));
+
+	sin.sin_family = AF_INET;
+	sin.sin_len = sizeof(sin);
+	sin.sin_port = stcb->rport;
+
+	sin6.sin6_family = AF_INET6;
+	sin6.sin6_len = sizeof(struct sockaddr_in6);
+	sin6.sin6_port = stcb->rport;
+	if (altsa == NULL) {
+		iph = mtod(m, struct ip *);
+		if (iph->ip_v == IPVERSION) {
+			/* its IPv4 */
+			struct sockaddr_in *sin_2;
+
+			sin_2 = (struct sockaddr_in *)(local_sa);
+			memset(sin_2, 0, sizeof(sin));
+			sin_2->sin_family = AF_INET;
+			sin_2->sin_len = sizeof(sin);
+			sin_2->sin_port = sh->dest_port;
+			sin_2->sin_addr.s_addr = iph->ip_dst.s_addr;
+			sin.sin_addr = iph->ip_src;
+			sa = (struct sockaddr *)&sin;
+		} else if (iph->ip_v == (IPV6_VERSION >> 4)) {
+			/* its IPv6 */
+			struct ip6_hdr *ip6;
+			struct sockaddr_in6 *sin6_2;
+
+			ip6 = mtod(m, struct ip6_hdr *);
+			sin6_2 = (struct sockaddr_in6 *)(local_sa);
+			memset(sin6_2, 0, sizeof(sin6));
+			sin6_2->sin6_family = AF_INET6;
+			sin6_2->sin6_len = sizeof(struct sockaddr_in6);
+			sin6_2->sin6_port = sh->dest_port;
+			sin6.sin6_addr = ip6->ip6_src;
+			sa = (struct sockaddr *)&sin6;
+		} else {
+			sa = NULL;
+		}
+	} else {
+		/*
+		 * For cookies we use the src address NOT from the packet
+		 * but from the original INIT
+		 */
+		sa = altsa;
+	}
+	/* Turn off ECN until we get through all params */
+	stcb->asoc.ecn_allowed = 0;
+	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+		/* mark all addresses that we have currently on the list */
+		net->dest_state |= SCTP_ADDR_NOT_IN_ASSOC;
+	}
+	/* does the source address already exist? if so skip it */
+	l_inp = inp = stcb->sctp_ep;
+
+	atomic_add_int(&stcb->asoc.refcnt, 1);
+	stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net_tmp, local_sa, stcb);
+	atomic_add_int(&stcb->asoc.refcnt, -1);
+
+	if ((stcb_tmp == NULL && inp == stcb->sctp_ep) || inp == NULL) {
+		/* we must add the source address */
+		/* no scope set here since we have a tcb already. */
+		if ((sa->sa_family == AF_INET) &&
+		    (stcb->asoc.ipv4_addr_legal)) {
+			if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_2)) {
+				return (-1);
+			}
+		} else if ((sa->sa_family == AF_INET6) &&
+		    (stcb->asoc.ipv6_addr_legal)) {
+			if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_3)) {
+				return (-2);
+			}
+		}
+	} else {
+		if (net_tmp != NULL && stcb_tmp == stcb) {
+			net_tmp->dest_state &= ~SCTP_ADDR_NOT_IN_ASSOC;
+		} else if (stcb_tmp != stcb) {
+			/* It belongs to another association? */
+			if (stcb_tmp)
+				SCTP_TCB_UNLOCK(stcb_tmp);
+			return (-3);
+		}
+	}
+	if (stcb->asoc.state == 0) {
+		/* the assoc was freed? */
+		return (-4);
+	}
+	/*
+	 * peer must explicitly turn this on. This may have been initialized
+	 * to be "on" in order to allow local addr changes while INIT's are
+	 * in flight.
+	 */
+	stcb->asoc.peer_supports_asconf = 0;
+	/* now we must go through each of the params. */
+	phdr = sctp_get_next_param(m, offset, &parm_buf, sizeof(parm_buf));
+	while (phdr) {
+		ptype = ntohs(phdr->param_type);
+		plen = ntohs(phdr->param_length);
+		/*
+		 * printf("ptype => %0x, plen => %d\n", (uint32_t)ptype,
+		 * (int)plen);
+		 */
+		if (offset + plen > limit) {
+			break;
+		}
+		if (plen == 0) {
+			break;
+		}
+		if (ptype == SCTP_IPV4_ADDRESS) {
+			if (stcb->asoc.ipv4_addr_legal) {
+				struct sctp_ipv4addr_param *p4, p4_buf;
+
+				/* ok get the v4 address and check/add */
+				phdr = sctp_get_next_param(m, offset,
+				    (struct sctp_paramhdr *)&p4_buf, sizeof(p4_buf));
+				if (plen != sizeof(struct sctp_ipv4addr_param) ||
+				    phdr == NULL) {
+					return (-5);
+				}
+				p4 = (struct sctp_ipv4addr_param *)phdr;
+				sin.sin_addr.s_addr = p4->addr;
+				if (IN_MULTICAST(sin.sin_addr.s_addr)) {
+					/* Skip multi-cast addresses */
+					goto next_param;
+				}
+				if ((sin.sin_addr.s_addr == INADDR_BROADCAST) ||
+				    (sin.sin_addr.s_addr == INADDR_ANY)) {
+					goto next_param;
+				}
+				sa = (struct sockaddr *)&sin;
+				inp = stcb->sctp_ep;
+				atomic_add_int(&stcb->asoc.refcnt, 1);
+				stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net,
+				    local_sa, stcb);
+				atomic_add_int(&stcb->asoc.refcnt, -1);
+
+				if ((stcb_tmp == NULL && inp == stcb->sctp_ep) ||
+				    inp == NULL) {
+					/* we must add the source address */
+					/*
+					 * no scope set since we have a tcb
+					 * already
+					 */
+
+					/*
+					 * we must validate the state again
+					 * here
+					 */
+					if (stcb->asoc.state == 0) {
+						/* the assoc was freed? */
+						return (-7);
+					}
+					if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_4)) {
+						return (-8);
+					}
+				} else if (stcb_tmp == stcb) {
+					if (stcb->asoc.state == 0) {
+						/* the assoc was freed? */
+						return (-10);
+					}
+					if (net != NULL) {
+						/* clear flag */
+						net->dest_state &=
+						    ~SCTP_ADDR_NOT_IN_ASSOC;
+					}
+				} else {
+					/*
+					 * strange, address is in another
+					 * assoc? straighten out locks.
+					 */
+					if (stcb_tmp)
+						SCTP_TCB_UNLOCK(stcb_tmp);
+
+					if (stcb->asoc.state == 0) {
+						/* the assoc was freed? */
+						return (-12);
+					}
+					return (-13);
+				}
+			}
+		} else if (ptype == SCTP_IPV6_ADDRESS) {
+			if (stcb->asoc.ipv6_addr_legal) {
+				/* ok get the v6 address and check/add */
+				struct sctp_ipv6addr_param *p6, p6_buf;
+
+				phdr = sctp_get_next_param(m, offset,
+				    (struct sctp_paramhdr *)&p6_buf, sizeof(p6_buf));
+				if (plen != sizeof(struct sctp_ipv6addr_param) ||
+				    phdr == NULL) {
+					return (-14);
+				}
+				p6 = (struct sctp_ipv6addr_param *)phdr;
+				memcpy((caddr_t)&sin6.sin6_addr, p6->addr,
+				    sizeof(p6->addr));
+				if (IN6_IS_ADDR_MULTICAST(&sin6.sin6_addr)) {
+					/* Skip multi-cast addresses */
+					goto next_param;
+				}
+				if (IN6_IS_ADDR_LINKLOCAL(&sin6.sin6_addr)) {
+					/*
+					 * Link local make no sense without
+					 * scope
+					 */
+					goto next_param;
+				}
+				sa = (struct sockaddr *)&sin6;
+				inp = stcb->sctp_ep;
+				atomic_add_int(&stcb->asoc.refcnt, 1);
+				stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net,
+				    local_sa, stcb);
+				atomic_add_int(&stcb->asoc.refcnt, -1);
+				if (stcb_tmp == NULL && (inp == stcb->sctp_ep ||
+				    inp == NULL)) {
+					/*
+					 * we must validate the state again
+					 * here
+					 */
+					if (stcb->asoc.state == 0) {
+						/* the assoc was freed? */
+						return (-16);
+					}
+					/*
+					 * we must add the address, no scope
+					 * set
+					 */
+					if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_5)) {
+						return (-17);
+					}
+				} else if (stcb_tmp == stcb) {
+					/*
+					 * we must validate the state again
+					 * here
+					 */
+					if (stcb->asoc.state == 0) {
+						/* the assoc was freed? */
+						return (-19);
+					}
+					if (net != NULL) {
+						/* clear flag */
+						net->dest_state &=
+						    ~SCTP_ADDR_NOT_IN_ASSOC;
+					}
+				} else {
+					/*
+					 * strange, address is in another
+					 * assoc? straighten out locks.
+					 */
+					if (stcb_tmp)
+						SCTP_TCB_UNLOCK(stcb_tmp);
+
+					if (stcb->asoc.state == 0) {
+						/* the assoc was freed? */
+						return (-21);
+					}
+					return (-22);
+				}
+			}
+		} else if (ptype == SCTP_ECN_CAPABLE) {
+			stcb->asoc.ecn_allowed = 1;
+		} else if (ptype == SCTP_ULP_ADAPTATION) {
+			if (stcb->asoc.state != SCTP_STATE_OPEN) {
+				struct sctp_adaptation_layer_indication ai,
+				                                *aip;
+
+				phdr = sctp_get_next_param(m, offset,
+				    (struct sctp_paramhdr *)&ai, sizeof(ai));
+				aip = (struct sctp_adaptation_layer_indication *)phdr;
+				if (aip) {
+					stcb->asoc.peers_adaptation = ntohl(aip->indication);
+					stcb->asoc.adaptation_needed = 1;
+				}
+			}
+		} else if (ptype == SCTP_SET_PRIM_ADDR) {
+			struct sctp_asconf_addr_param lstore, *fee;
+			struct sctp_asconf_addrv4_param *fii;
+			int lptype;
+			struct sockaddr *lsa = NULL;
+
+			stcb->asoc.peer_supports_asconf = 1;
+			if (plen > sizeof(lstore)) {
+				return (-23);
+			}
+			phdr = sctp_get_next_param(m, offset,
+			    (struct sctp_paramhdr *)&lstore, min(plen, sizeof(lstore)));
+			if (phdr == NULL) {
+				return (-24);
+			}
+			fee = (struct sctp_asconf_addr_param *)phdr;
+			lptype = ntohs(fee->addrp.ph.param_type);
+			if (lptype == SCTP_IPV4_ADDRESS) {
+				if (plen !=
+				    sizeof(struct sctp_asconf_addrv4_param)) {
+					SCTP_PRINTF("Sizeof setprim in init/init ack not %d but %d - ignored\n",
+					    (int)sizeof(struct sctp_asconf_addrv4_param),
+					    plen);
+				} else {
+					fii = (struct sctp_asconf_addrv4_param *)fee;
+					sin.sin_addr.s_addr = fii->addrp.addr;
+					lsa = (struct sockaddr *)&sin;
+				}
+			} else if (lptype == SCTP_IPV6_ADDRESS) {
+				if (plen !=
+				    sizeof(struct sctp_asconf_addr_param)) {
+					SCTP_PRINTF("Sizeof setprim (v6) in init/init ack not %d but %d - ignored\n",
+					    (int)sizeof(struct sctp_asconf_addr_param),
+					    plen);
+				} else {
+					memcpy(sin6.sin6_addr.s6_addr,
+					    fee->addrp.addr,
+					    sizeof(fee->addrp.addr));
+					lsa = (struct sockaddr *)&sin6;
+				}
+			}
+			if (lsa) {
+				(void)sctp_set_primary_addr(stcb, sa, NULL);
+			}
+		} else if (ptype == SCTP_PRSCTP_SUPPORTED) {
+			/* Peer supports pr-sctp */
+			stcb->asoc.peer_supports_prsctp = 1;
+		} else if (ptype == SCTP_SUPPORTED_CHUNK_EXT) {
+			/* A supported extension chunk */
+			struct sctp_supported_chunk_types_param *pr_supported;
+			uint8_t local_store[SCTP_PARAM_BUFFER_SIZE];
+			int num_ent, i;
+
+			phdr = sctp_get_next_param(m, offset,
+			    (struct sctp_paramhdr *)&local_store, min(sizeof(local_store), plen));
+			if (phdr == NULL) {
+				return (-25);
+			}
+			stcb->asoc.peer_supports_asconf = 0;
+			stcb->asoc.peer_supports_prsctp = 0;
+			stcb->asoc.peer_supports_pktdrop = 0;
+			stcb->asoc.peer_supports_strreset = 0;
+			stcb->asoc.peer_supports_auth = 0;
+			pr_supported = (struct sctp_supported_chunk_types_param *)phdr;
+			num_ent = plen - sizeof(struct sctp_paramhdr);
+			for (i = 0; i < num_ent; i++) {
+				switch (pr_supported->chunk_types[i]) {
+				case SCTP_ASCONF:
+				case SCTP_ASCONF_ACK:
+					stcb->asoc.peer_supports_asconf = 1;
+					break;
+				case SCTP_FORWARD_CUM_TSN:
+					stcb->asoc.peer_supports_prsctp = 1;
+					break;
+				case SCTP_PACKET_DROPPED:
+					stcb->asoc.peer_supports_pktdrop = 1;
+					break;
+				case SCTP_STREAM_RESET:
+					stcb->asoc.peer_supports_strreset = 1;
+					break;
+				case SCTP_AUTHENTICATION:
+					stcb->asoc.peer_supports_auth = 1;
+					break;
+				default:
+					/* one I have not learned yet */
+					break;
+
+				}
+			}
+		} else if (ptype == SCTP_ECN_NONCE_SUPPORTED) {
+			/* Peer supports ECN-nonce */
+			stcb->asoc.peer_supports_ecn_nonce = 1;
+			stcb->asoc.ecn_nonce_allowed = 1;
+		} else if (ptype == SCTP_RANDOM) {
+			if (plen > sizeof(random_store))
+				break;
+			if (got_random) {
+				/* already processed a RANDOM */
+				goto next_param;
+			}
+			phdr = sctp_get_next_param(m, offset,
+			    (struct sctp_paramhdr *)random_store,
+			    min(sizeof(random_store), plen));
+			if (phdr == NULL)
+				return (-26);
+			p_random = (struct sctp_auth_random *)phdr;
+			random_len = plen - sizeof(*p_random);
+			/* enforce the random length */
+			if (random_len != SCTP_AUTH_RANDOM_SIZE_REQUIRED) {
+				SCTPDBG(SCTP_DEBUG_AUTH1, "SCTP: invalid RANDOM len\n");
+				return (-27);
+			}
+			got_random = 1;
+		} else if (ptype == SCTP_HMAC_LIST) {
+			int num_hmacs;
+			int i;
+
+			if (plen > sizeof(hmacs_store))
+				break;
+			if (got_hmacs) {
+				/* already processed a HMAC list */
+				goto next_param;
+			}
+			phdr = sctp_get_next_param(m, offset,
+			    (struct sctp_paramhdr *)hmacs_store,
+			    min(plen, sizeof(hmacs_store)));
+			if (phdr == NULL)
+				return (-28);
+			hmacs = (struct sctp_auth_hmac_algo *)phdr;
+			hmacs_len = plen - sizeof(*hmacs);
+			num_hmacs = hmacs_len / sizeof(hmacs->hmac_ids[0]);
+			/* validate the hmac list */
+			if (sctp_verify_hmac_param(hmacs, num_hmacs)) {
+				return (-29);
+			}
+			if (stcb->asoc.peer_hmacs != NULL)
+				sctp_free_hmaclist(stcb->asoc.peer_hmacs);
+			stcb->asoc.peer_hmacs = sctp_alloc_hmaclist(num_hmacs);
+			if (stcb->asoc.peer_hmacs != NULL) {
+				for (i = 0; i < num_hmacs; i++) {
+					(void)sctp_auth_add_hmacid(stcb->asoc.peer_hmacs,
+					    ntohs(hmacs->hmac_ids[i]));
+				}
+			}
+			got_hmacs = 1;
+		} else if (ptype == SCTP_CHUNK_LIST) {
+			int i;
+
+			if (plen > sizeof(chunks_store))
+				break;
+			if (got_chklist) {
+				/* already processed a Chunks list */
+				goto next_param;
+			}
+			phdr = sctp_get_next_param(m, offset,
+			    (struct sctp_paramhdr *)chunks_store,
+			    min(plen, sizeof(chunks_store)));
+			if (phdr == NULL)
+				return (-30);
+			chunks = (struct sctp_auth_chunk_list *)phdr;
+			num_chunks = plen - sizeof(*chunks);
+			if (stcb->asoc.peer_auth_chunks != NULL)
+				sctp_clear_chunklist(stcb->asoc.peer_auth_chunks);
+			else
+				stcb->asoc.peer_auth_chunks = sctp_alloc_chunklist();
+			for (i = 0; i < num_chunks; i++) {
+				(void)sctp_auth_add_chunk(chunks->chunk_types[i],
+				    stcb->asoc.peer_auth_chunks);
+				/* record asconf/asconf-ack if listed */
+				if (chunks->chunk_types[i] == SCTP_ASCONF)
+					saw_asconf = 1;
+				if (chunks->chunk_types[i] == SCTP_ASCONF_ACK)
+					saw_asconf_ack = 1;
+
+			}
+			got_chklist = 1;
+		} else if ((ptype == SCTP_HEARTBEAT_INFO) ||
+			    (ptype == SCTP_STATE_COOKIE) ||
+			    (ptype == SCTP_UNRECOG_PARAM) ||
+			    (ptype == SCTP_COOKIE_PRESERVE) ||
+			    (ptype == SCTP_SUPPORTED_ADDRTYPE) ||
+			    (ptype == SCTP_ADD_IP_ADDRESS) ||
+			    (ptype == SCTP_DEL_IP_ADDRESS) ||
+			    (ptype == SCTP_ERROR_CAUSE_IND) ||
+		    (ptype == SCTP_SUCCESS_REPORT)) {
+			 /* don't care */ ;
+		} else {
+			if ((ptype & 0x8000) == 0x0000) {
+				/*
+				 * must stop processing the rest of the
+				 * param's. Any report bits were handled
+				 * with the call to
+				 * sctp_arethere_unrecognized_parameters()
+				 * when the INIT or INIT-ACK was first seen.
+				 */
+				break;
+			}
+		}
+next_param:
+		offset += SCTP_SIZE32(plen);
+		if (offset >= limit) {
+			break;
+		}
+		phdr = sctp_get_next_param(m, offset, &parm_buf,
+		    sizeof(parm_buf));
+	}
+	/* Now check to see if we need to purge any addresses */
+	for (net = TAILQ_FIRST(&stcb->asoc.nets); net != NULL; net = net_tmp) {
+		net_tmp = TAILQ_NEXT(net, sctp_next);
+		if ((net->dest_state & SCTP_ADDR_NOT_IN_ASSOC) ==
+		    SCTP_ADDR_NOT_IN_ASSOC) {
+			/* This address has been removed from the asoc */
+			/* remove and free it */
+			stcb->asoc.numnets--;
+			TAILQ_REMOVE(&stcb->asoc.nets, net, sctp_next);
+			sctp_free_remote_addr(net);
+			if (net == stcb->asoc.primary_destination) {
+				stcb->asoc.primary_destination = NULL;
+				sctp_select_primary_destination(stcb);
+			}
+		}
+	}
+	/* validate authentication required parameters */
+	if (got_random && got_hmacs) {
+		stcb->asoc.peer_supports_auth = 1;
+	} else {
+		stcb->asoc.peer_supports_auth = 0;
+	}
+	if (!stcb->asoc.peer_supports_auth && got_chklist) {
+		/* peer does not support auth but sent a chunks list? */
+		return (-31);
+	}
+	if (!sctp_asconf_auth_nochk && stcb->asoc.peer_supports_asconf &&
+	    !stcb->asoc.peer_supports_auth) {
+		/* peer supports asconf but not auth? */
+		return (-32);
+	} else if ((stcb->asoc.peer_supports_asconf) && (stcb->asoc.peer_supports_auth) &&
+	    ((saw_asconf == 0) || (saw_asconf_ack == 0))) {
+		return (-33);
+	}
+	/* concatenate the full random key */
+#ifdef SCTP_AUTH_DRAFT_04
+	keylen = random_len;
+	new_key = sctp_alloc_key(keylen);
+	if (new_key != NULL) {
+		/* copy in the RANDOM */
+		if (p_random != NULL)
+			bcopy(p_random->random_data, new_key->key, random_len);
+	}
+#else
+	keylen = sizeof(*p_random) + random_len + sizeof(*chunks) + num_chunks +
+	    sizeof(*hmacs) + hmacs_len;
+	new_key = sctp_alloc_key(keylen);
+	if (new_key != NULL) {
+		/* copy in the RANDOM */
+		if (p_random != NULL) {
+			keylen = sizeof(*p_random) + random_len;
+			bcopy(p_random, new_key->key, keylen);
+		}
+		/* append in the AUTH chunks */
+		if (chunks != NULL) {
+			bcopy(chunks, new_key->key + keylen,
+			    sizeof(*chunks) + num_chunks);
+			keylen += sizeof(*chunks) + num_chunks;
+		}
+		/* append in the HMACs */
+		if (hmacs != NULL) {
+			bcopy(hmacs, new_key->key + keylen,
+			    sizeof(*hmacs) + hmacs_len);
+		}
+	}
+#endif
+	else {
+		/* failed to get memory for the key */
+		return (-34);
+	}
+	if (stcb->asoc.authinfo.peer_random != NULL)
+		sctp_free_key(stcb->asoc.authinfo.peer_random);
+	stcb->asoc.authinfo.peer_random = new_key;
+#ifdef SCTP_AUTH_DRAFT_04
+	/* don't include the chunks and hmacs for draft -04 */
+	stcb->asoc.authinfo.peer_random->keylen = random_len;
+#endif
+	sctp_clear_cachedkeys(stcb, stcb->asoc.authinfo.assoc_keyid);
+	sctp_clear_cachedkeys(stcb, stcb->asoc.authinfo.recv_keyid);
+
+	return (0);
+}
+
+int
+sctp_set_primary_addr(struct sctp_tcb *stcb, struct sockaddr *sa,
+    struct sctp_nets *net)
+{
+	/* make sure the requested primary address exists in the assoc */
+	if (net == NULL && sa)
+		net = sctp_findnet(stcb, sa);
+
+	if (net == NULL) {
+		/* didn't find the requested primary address! */
+		return (-1);
+	} else {
+		/* set the primary address */
+		if (net->dest_state & SCTP_ADDR_UNCONFIRMED) {
+			/* Must be confirmed, so queue to set */
+			net->dest_state |= SCTP_ADDR_REQ_PRIMARY;
+			return (0);
+		}
+		stcb->asoc.primary_destination = net;
+		net->dest_state &= ~SCTP_ADDR_WAS_PRIMARY;
+		net = TAILQ_FIRST(&stcb->asoc.nets);
+		if (net != stcb->asoc.primary_destination) {
+			/*
+			 * first one on the list is NOT the primary
+			 * sctp_cmpaddr() is much more efficent if the
+			 * primary is the first on the list, make it so.
+			 */
+			TAILQ_REMOVE(&stcb->asoc.nets, stcb->asoc.primary_destination, sctp_next);
+			TAILQ_INSERT_HEAD(&stcb->asoc.nets, stcb->asoc.primary_destination, sctp_next);
+		}
+		return (0);
+	}
+}
+
+int
+sctp_is_vtag_good(struct sctp_inpcb *inp, uint32_t tag, struct timeval *now, int save_in_twait)
+{
+	/*
+	 * This function serves two purposes. It will see if a TAG can be
+	 * re-used and return 1 for yes it is ok and 0 for don't use that
+	 * tag. A secondary function it will do is purge out old tags that
+	 * can be removed.
+	 */
+	struct sctpasochead *head;
+	struct sctpvtaghead *chain;
+	struct sctp_tagblock *twait_block;
+	struct sctp_tcb *stcb;
+	int i;
+
+	SCTP_INP_INFO_WLOCK();
+	chain = &sctppcbinfo.vtag_timewait[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
+	/* First is the vtag in use ? */
+
+	head = &sctppcbinfo.sctp_asochash[SCTP_PCBHASH_ASOC(tag,
+	    sctppcbinfo.hashasocmark)];
+	if (head == NULL) {
+		goto check_restart;
+	}
+	LIST_FOREACH(stcb, head, sctp_asocs) {
+
+		if (stcb->asoc.my_vtag == tag) {
+			/*
+			 * We should remove this if and return 0 always if
+			 * we want vtags unique across all endpoints. For
+			 * now within a endpoint is ok.
+			 */
+			if (inp == stcb->sctp_ep) {
+				/* bad tag, in use */
+				SCTP_INP_INFO_WUNLOCK();
+				return (0);
+			}
+		}
+	}
+check_restart:
+	/* Now lets check the restart hash */
+	head = &sctppcbinfo.sctp_restarthash[SCTP_PCBHASH_ASOC(tag,
+	    sctppcbinfo.hashrestartmark)];
+	if (head == NULL) {
+		goto check_time_wait;
+	}
+	LIST_FOREACH(stcb, head, sctp_tcbrestarhash) {
+		if (stcb->asoc.assoc_id == tag) {
+			/* candidate */
+			if (inp == stcb->sctp_ep) {
+				/* bad tag, in use */
+				SCTP_INP_INFO_WUNLOCK();
+				return (0);
+			}
+		}
+	}
+check_time_wait:
+	/* Now what about timed wait ? */
+	if (!SCTP_LIST_EMPTY(chain)) {
+		/*
+		 * Block(s) are present, lets see if we have this tag in the
+		 * list
+		 */
+		LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
+			for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
+				if (twait_block->vtag_block[i].v_tag == 0) {
+					/* not used */
+					continue;
+				} else if ((long)twait_block->vtag_block[i].tv_sec_at_expire <
+				    now->tv_sec) {
+					/* Audit expires this guy */
+					twait_block->vtag_block[i].tv_sec_at_expire = 0;
+					twait_block->vtag_block[i].v_tag = 0;
+				} else if (twait_block->vtag_block[i].v_tag ==
+				    tag) {
+					/* Bad tag, sorry :< */
+					SCTP_INP_INFO_WUNLOCK();
+					return (0);
+				}
+			}
+		}
+	}
+	/*-
+	 * Not found, ok to use the tag, add it to the time wait hash
+	 * as well this will prevent two sucessive cookies from getting
+	 * the same tag or two inits sent quickly on multi-processors.
+	 * We only keep the tag for the life of a cookie and when we
+	 * add this tag to the assoc hash we need to purge it from
+	 * the t-wait hash.
+	 */
+	if (save_in_twait)
+		sctp_add_vtag_to_timewait(tag, TICKS_TO_SEC(inp->sctp_ep.def_cookie_life));
+	SCTP_INP_INFO_WUNLOCK();
+	return (1);
+}
+
+
+static sctp_assoc_t reneged_asoc_ids[256];
+static uint8_t reneged_at = 0;
+
+
+static void
+sctp_drain_mbufs(struct sctp_inpcb *inp, struct sctp_tcb *stcb)
+{
+	/*
+	 * We must hunt this association for MBUF's past the cumack (i.e.
+	 * out of order data that we can renege on).
+	 */
+	struct sctp_association *asoc;
+	struct sctp_tmit_chunk *chk, *nchk;
+	uint32_t cumulative_tsn_p1, tsn;
+	struct sctp_queued_to_read *ctl, *nctl;
+	int cnt, strmat, gap;
+
+	/* We look for anything larger than the cum-ack + 1 */
+
+	SCTP_STAT_INCR(sctps_protocol_drain_calls);
+	if (sctp_do_drain == 0) {
+		return;
+	}
+	asoc = &stcb->asoc;
+	if (asoc->cumulative_tsn == asoc->highest_tsn_inside_map) {
+		/* none we can reneg on. */
+		return;
+	}
+	SCTP_STAT_INCR(sctps_protocol_drains_done);
+	cumulative_tsn_p1 = asoc->cumulative_tsn + 1;
+	cnt = 0;
+	/* First look in the re-assembly queue */
+	chk = TAILQ_FIRST(&asoc->reasmqueue);
+	while (chk) {
+		/* Get the next one */
+		nchk = TAILQ_NEXT(chk, sctp_next);
+		if (compare_with_wrap(chk->rec.data.TSN_seq,
+		    cumulative_tsn_p1, MAX_TSN)) {
+			/* Yep it is above cum-ack */
+			cnt++;
+			tsn = chk->rec.data.TSN_seq;
+			if (tsn >= asoc->mapping_array_base_tsn) {
+				gap = tsn - asoc->mapping_array_base_tsn;
+			} else {
+				gap = (MAX_TSN - asoc->mapping_array_base_tsn) +
+				    tsn + 1;
+			}
+			asoc->size_on_reasm_queue = sctp_sbspace_sub(asoc->size_on_reasm_queue, chk->send_size);
+			sctp_ucount_decr(asoc->cnt_on_reasm_queue);
+			SCTP_UNSET_TSN_PRESENT(asoc->mapping_array, gap);
+			TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next);
+			if (chk->data) {
+				sctp_m_freem(chk->data);
+				chk->data = NULL;
+			}
+			sctp_free_a_chunk(stcb, chk);
+		}
+		chk = nchk;
+	}
+	/* Ok that was fun, now we will drain all the inbound streams? */
+	for (strmat = 0; strmat < asoc->streamincnt; strmat++) {
+		ctl = TAILQ_FIRST(&asoc->strmin[strmat].inqueue);
+		while (ctl) {
+			nctl = TAILQ_NEXT(ctl, next);
+			if (compare_with_wrap(ctl->sinfo_tsn,
+			    cumulative_tsn_p1, MAX_TSN)) {
+				/* Yep it is above cum-ack */
+				cnt++;
+				tsn = ctl->sinfo_tsn;
+				if (tsn >= asoc->mapping_array_base_tsn) {
+					gap = tsn -
+					    asoc->mapping_array_base_tsn;
+				} else {
+					gap = (MAX_TSN -
+					    asoc->mapping_array_base_tsn) +
+					    tsn + 1;
+				}
+				asoc->size_on_all_streams = sctp_sbspace_sub(asoc->size_on_all_streams, ctl->length);
+				sctp_ucount_decr(asoc->cnt_on_all_streams);
+
+				SCTP_UNSET_TSN_PRESENT(asoc->mapping_array,
+				    gap);
+				TAILQ_REMOVE(&asoc->strmin[strmat].inqueue,
+				    ctl, next);
+				if (ctl->data) {
+					sctp_m_freem(ctl->data);
+					ctl->data = NULL;
+				}
+				sctp_free_remote_addr(ctl->whoFrom);
+				SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_readq, ctl);
+				SCTP_DECR_READQ_COUNT();
+			}
+			ctl = nctl;
+		}
+	}
+	/*
+	 * Question, should we go through the delivery queue? The only
+	 * reason things are on here is the app not reading OR a p-d-api up.
+	 * An attacker COULD send enough in to initiate the PD-API and then
+	 * send a bunch of stuff to other streams... these would wind up on
+	 * the delivery queue.. and then we would not get to them. But in
+	 * order to do this I then have to back-track and un-deliver
+	 * sequence numbers in streams.. el-yucko. I think for now we will
+	 * NOT look at the delivery queue and leave it to be something to
+	 * consider later. An alternative would be to abort the P-D-API with
+	 * a notification and then deliver the data.... Or another method
+	 * might be to keep track of how many times the situation occurs and
+	 * if we see a possible attack underway just abort the association.
+	 */
+#ifdef SCTP_DEBUG
+	if (cnt) {
+		SCTPDBG(SCTP_DEBUG_PCB1, "Freed %d chunks from reneg harvest\n", cnt);
+	}
+#endif
+	if (cnt) {
+		/*
+		 * Now do we need to find a new
+		 * asoc->highest_tsn_inside_map?
+		 */
+		if (asoc->highest_tsn_inside_map >= asoc->mapping_array_base_tsn) {
+			gap = asoc->highest_tsn_inside_map - asoc->mapping_array_base_tsn;
+		} else {
+			gap = (MAX_TSN - asoc->mapping_array_base_tsn) +
+			    asoc->highest_tsn_inside_map + 1;
+		}
+		if (gap >= (asoc->mapping_array_size << 3)) {
+			/*
+			 * Something bad happened or cum-ack and high were
+			 * behind the base, but if so earlier checks should
+			 * have found NO data... wierd... we will start at
+			 * end of mapping array.
+			 */
+			SCTP_PRINTF("Gap was larger than array?? %d set to max:%d maparraymax:%x\n",
+			    (int)gap,
+			    (int)(asoc->mapping_array_size << 3),
+			    (int)asoc->highest_tsn_inside_map);
+			gap = asoc->mapping_array_size << 3;
+		}
+		while (gap > 0) {
+			if (SCTP_IS_TSN_PRESENT(asoc->mapping_array, gap)) {
+				/* found the new highest */
+				asoc->highest_tsn_inside_map = asoc->mapping_array_base_tsn + gap;
+				break;
+			}
+			gap--;
+		}
+		if (gap == 0) {
+			/* Nothing left in map */
+			memset(asoc->mapping_array, 0, asoc->mapping_array_size);
+			asoc->mapping_array_base_tsn = asoc->cumulative_tsn + 1;
+			asoc->highest_tsn_inside_map = asoc->cumulative_tsn;
+		}
+		asoc->last_revoke_count = cnt;
+		(void)SCTP_OS_TIMER_STOP(&stcb->asoc.dack_timer.timer);
+		/* sa_ignore NO_NULL_CHK */
+		sctp_send_sack(stcb);
+		sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_DRAIN, SCTP_SO_NOT_LOCKED);
+		reneged_asoc_ids[reneged_at] = sctp_get_associd(stcb);
+		reneged_at++;
+	}
+	/*
+	 * Another issue, in un-setting the TSN's in the mapping array we
+	 * DID NOT adjust the higest_tsn marker.  This will cause one of two
+	 * things to occur. It may cause us to do extra work in checking for
+	 * our mapping array movement. More importantly it may cause us to
+	 * SACK every datagram. This may not be a bad thing though since we
+	 * will recover once we get our cum-ack above and all this stuff we
+	 * dumped recovered.
+	 */
+}
+
+void
+sctp_drain()
+{
+	/*
+	 * We must walk the PCB lists for ALL associations here. The system
+	 * is LOW on MBUF's and needs help. This is where reneging will
+	 * occur. We really hope this does NOT happen!
+	 */
+	struct sctp_inpcb *inp;
+	struct sctp_tcb *stcb;
+
+	SCTP_INP_INFO_RLOCK();
+	LIST_FOREACH(inp, &sctppcbinfo.listhead, sctp_list) {
+		/* For each endpoint */
+		SCTP_INP_RLOCK(inp);
+		LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+			/* For each association */
+			SCTP_TCB_LOCK(stcb);
+			sctp_drain_mbufs(inp, stcb);
+			SCTP_TCB_UNLOCK(stcb);
+		}
+		SCTP_INP_RUNLOCK(inp);
+	}
+	SCTP_INP_INFO_RUNLOCK();
+}
+
+/*
+ * start a new iterator
+ * iterates through all endpoints and associations based on the pcb_state
+ * flags and asoc_state.  "af" (mandatory) is executed for all matching
+ * assocs and "ef" (optional) is executed when the iterator completes.
+ * "inpf" (optional) is executed for each new endpoint as it is being
+ * iterated through. inpe (optional) is called when the inp completes
+ * its way through all the stcbs.
+ */
+int
+sctp_initiate_iterator(inp_func inpf,
+    asoc_func af,
+    inp_func inpe,
+    uint32_t pcb_state,
+    uint32_t pcb_features,
+    uint32_t asoc_state,
+    void *argp,
+    uint32_t argi,
+    end_func ef,
+    struct sctp_inpcb *s_inp,
+    uint8_t chunk_output_off)
+{
+	struct sctp_iterator *it = NULL;
+
+	if (af == NULL) {
+		return (-1);
+	}
+	SCTP_MALLOC(it, struct sctp_iterator *, sizeof(struct sctp_iterator),
+	    SCTP_M_ITER);
+	if (it == NULL) {
+		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOMEM);
+		return (ENOMEM);
+	}
+	memset(it, 0, sizeof(*it));
+	it->function_assoc = af;
+	it->function_inp = inpf;
+	if (inpf)
+		it->done_current_ep = 0;
+	else
+		it->done_current_ep = 1;
+	it->function_atend = ef;
+	it->pointer = argp;
+	it->val = argi;
+	it->pcb_flags = pcb_state;
+	it->pcb_features = pcb_features;
+	it->asoc_state = asoc_state;
+	it->function_inp_end = inpe;
+	it->no_chunk_output = chunk_output_off;
+	if (s_inp) {
+		it->inp = s_inp;
+		it->iterator_flags = SCTP_ITERATOR_DO_SINGLE_INP;
+	} else {
+		SCTP_INP_INFO_RLOCK();
+		it->inp = LIST_FIRST(&sctppcbinfo.listhead);
+
+		SCTP_INP_INFO_RUNLOCK();
+		it->iterator_flags = SCTP_ITERATOR_DO_ALL_INP;
+
+	}
+	SCTP_IPI_ITERATOR_WQ_LOCK();
+	if (it->inp) {
+		SCTP_INP_INCR_REF(it->inp);
+	}
+	TAILQ_INSERT_TAIL(&sctppcbinfo.iteratorhead, it, sctp_nxt_itr);
+#if defined(SCTP_USE_THREAD_BASED_ITERATOR)
+	if (sctppcbinfo.iterator_running == 0) {
+		sctp_wakeup_iterator();
+	}
+	SCTP_IPI_ITERATOR_WQ_UNLOCK();
+#else
+	if (it->inp)
+		SCTP_INP_DECR_REF(it->inp);
+	SCTP_IPI_ITERATOR_WQ_UNLOCK();
+	/* Init the timer */
+	SCTP_OS_TIMER_INIT(&it->tmr.timer);
+	/* add to the list of all iterators */
+	sctp_timer_start(SCTP_TIMER_TYPE_ITERATOR, (struct sctp_inpcb *)it,
+	    NULL, NULL);
+#endif
+	/* sa_ignore MEMLEAK {memory is put on the tailq for the iterator} */
+	return (0);
+}
--- /dev/null
+++ sys/netinet/sctp_uio.h
@@ -0,0 +1,1104 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_uio.h,v 1.11 2005/03/06 16:04:18 itojun Exp $	 */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_uio.h,v 1.29 2007/09/18 15:16:39 rrs Exp $");
+
+#ifndef __sctp_uio_h__
+#define __sctp_uio_h__
+
+
+#if ! defined(_KERNEL)
+#include <stdint.h>
+#endif
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+typedef uint32_t sctp_assoc_t;
+
+/* On/Off setup for subscription to events */
+struct sctp_event_subscribe {
+	uint8_t sctp_data_io_event;
+	uint8_t sctp_association_event;
+	uint8_t sctp_address_event;
+	uint8_t sctp_send_failure_event;
+	uint8_t sctp_peer_error_event;
+	uint8_t sctp_shutdown_event;
+	uint8_t sctp_partial_delivery_event;
+	uint8_t sctp_adaptation_layer_event;
+	uint8_t sctp_authentication_event;
+	uint8_t sctp_stream_reset_events;
+};
+
+/* ancillary data types */
+#define SCTP_INIT	0x0001
+#define SCTP_SNDRCV	0x0002
+#define SCTP_EXTRCV	0x0003
+/*
+ * ancillary data structures
+ */
+struct sctp_initmsg {
+	uint32_t sinit_num_ostreams;
+	uint32_t sinit_max_instreams;
+	uint16_t sinit_max_attempts;
+	uint16_t sinit_max_init_timeo;
+};
+
+/* We add 96 bytes to the size of sctp_sndrcvinfo.
+ * This makes the current structure 128 bytes long
+ * which is nicely 64 bit aligned but also has room
+ * for us to add more and keep ABI compatability.
+ * For example, already we have the sctp_extrcvinfo
+ * when enabled which is 48 bytes.
+ */
+
+/*
+ * The assoc up needs a verfid
+ * all sendrcvinfo's need a verfid for SENDING only.
+ */
+
+
+#define SCTP_ALIGN_RESV_PAD 96
+#define SCTP_ALIGN_RESV_PAD_SHORT 80
+
+struct sctp_sndrcvinfo {
+	uint16_t sinfo_stream;
+	uint16_t sinfo_ssn;
+	uint16_t sinfo_flags;
+	uint16_t sinfo_pr_policy;
+	uint32_t sinfo_ppid;
+	uint32_t sinfo_context;
+	uint32_t sinfo_timetolive;
+	uint32_t sinfo_tsn;
+	uint32_t sinfo_cumtsn;
+	sctp_assoc_t sinfo_assoc_id;
+	uint8_t __reserve_pad[SCTP_ALIGN_RESV_PAD];
+};
+
+struct sctp_extrcvinfo {
+	uint16_t sinfo_stream;
+	uint16_t sinfo_ssn;
+	uint16_t sinfo_flags;
+	uint16_t sinfo_pr_policy;
+	uint32_t sinfo_ppid;
+	uint32_t sinfo_context;
+	uint32_t sinfo_timetolive;
+	uint32_t sinfo_tsn;
+	uint32_t sinfo_cumtsn;
+	sctp_assoc_t sinfo_assoc_id;
+	uint16_t sreinfo_next_flags;
+	uint16_t sreinfo_next_stream;
+	uint32_t sreinfo_next_aid;
+	uint32_t sreinfo_next_length;
+	uint32_t sreinfo_next_ppid;
+	uint8_t __reserve_pad[SCTP_ALIGN_RESV_PAD_SHORT];
+};
+
+#define SCTP_NO_NEXT_MSG           0x0000
+#define SCTP_NEXT_MSG_AVAIL        0x0001
+#define SCTP_NEXT_MSG_ISCOMPLETE   0x0002
+#define SCTP_NEXT_MSG_IS_UNORDERED 0x0004
+#define SCTP_NEXT_MSG_IS_NOTIFICATION 0x0008
+
+struct sctp_snd_all_completes {
+	uint16_t sall_stream;
+	uint16_t sall_flags;
+	uint32_t sall_ppid;
+	uint32_t sall_context;
+	uint32_t sall_num_sent;
+	uint32_t sall_num_failed;
+};
+
+/* Flags that go into the sinfo->sinfo_flags field */
+#define SCTP_EOF 	  0x0100/* Start shutdown procedures */
+#define SCTP_ABORT	  0x0200/* Send an ABORT to peer */
+#define SCTP_UNORDERED 	  0x0400/* Message is un-ordered */
+#define SCTP_ADDR_OVER	  0x0800/* Override the primary-address */
+#define SCTP_SENDALL      0x1000/* Send this on all associations */
+#define SCTP_EOR          0x2000/* end of message signal */
+#define SCTP_PR_POLICY_VALID 0x4000	/* pr sctp policy valid */
+
+#define INVALID_SINFO_FLAG(x) (((x) & 0xffffff00 \
+                                    & ~(SCTP_EOF | SCTP_ABORT | SCTP_UNORDERED |\
+				        SCTP_ADDR_OVER | SCTP_SENDALL | SCTP_EOR)) != 0)
+/* for the endpoint */
+
+/* The lower byte is an enumeration of PR-SCTP policies */
+#define SCTP_PR_SCTP_TTL  0x0001/* Time based PR-SCTP */
+#define SCTP_PR_SCTP_BUF  0x0002/* Buffer based PR-SCTP */
+#define SCTP_PR_SCTP_RTX  0x0003/* Number of retransmissions based PR-SCTP */
+
+#define PR_SCTP_POLICY(x)         ((x) & 0xff)
+#define PR_SCTP_ENABLED(x)        (PR_SCTP_POLICY(x) != 0)
+#define PR_SCTP_TTL_ENABLED(x)    (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_TTL)
+#define PR_SCTP_BUF_ENABLED(x)    (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_BUF)
+#define PR_SCTP_RTX_ENABLED(x)    (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_RTX)
+#define PR_SCTP_INVALID_POLICY(x) (PR_SCTP_POLICY(x) > SCTP_PR_SCTP_RTX)
+/* Stat's */
+struct sctp_pcbinfo {
+	uint32_t ep_count;
+	uint32_t asoc_count;
+	uint32_t laddr_count;
+	uint32_t raddr_count;
+	uint32_t chk_count;
+	uint32_t readq_count;
+	uint32_t free_chunks;
+	uint32_t stream_oque;
+};
+
+struct sctp_sockstat {
+	sctp_assoc_t ss_assoc_id;
+	uint32_t ss_total_sndbuf;
+	uint32_t ss_total_recv_buf;
+};
+
+/*
+ * notification event structures
+ */
+
+/*
+ * association change event
+ */
+struct sctp_assoc_change {
+	uint16_t sac_type;
+	uint16_t sac_flags;
+	uint32_t sac_length;
+	uint16_t sac_state;
+	uint16_t sac_error;
+	uint16_t sac_outbound_streams;
+	uint16_t sac_inbound_streams;
+	sctp_assoc_t sac_assoc_id;
+};
+
+/* sac_state values */
+#define SCTP_COMM_UP		0x0001
+#define SCTP_COMM_LOST		0x0002
+#define SCTP_RESTART		0x0003
+#define SCTP_SHUTDOWN_COMP	0x0004
+#define SCTP_CANT_STR_ASSOC	0x0005
+
+
+/*
+ * Address event
+ */
+struct sctp_paddr_change {
+	uint16_t spc_type;
+	uint16_t spc_flags;
+	uint32_t spc_length;
+	struct sockaddr_storage spc_aaddr;
+	uint32_t spc_state;
+	uint32_t spc_error;
+	sctp_assoc_t spc_assoc_id;
+	uint8_t spc_padding[4];
+};
+
+/* paddr state values */
+#define SCTP_ADDR_AVAILABLE	0x0001
+#define SCTP_ADDR_UNREACHABLE	0x0002
+#define SCTP_ADDR_REMOVED	0x0003
+#define SCTP_ADDR_ADDED		0x0004
+#define SCTP_ADDR_MADE_PRIM	0x0005
+#define SCTP_ADDR_CONFIRMED	0x0006
+
+/*
+ * CAUTION: these are user exposed SCTP addr reachability states must be
+ * compatible with SCTP_ADDR states in sctp_constants.h
+ */
+#ifdef SCTP_ACTIVE
+#undef SCTP_ACTIVE
+#endif
+#define SCTP_ACTIVE		0x0001	/* SCTP_ADDR_REACHABLE */
+
+#ifdef SCTP_INACTIVE
+#undef SCTP_INACTIVE
+#endif
+#define SCTP_INACTIVE		0x0002	/* SCTP_ADDR_NOT_REACHABLE */
+
+#ifdef SCTP_UNCONFIRMED
+#undef SCTP_UNCONFIRMED
+#endif
+#define SCTP_UNCONFIRMED	0x0200	/* SCTP_ADDR_UNCONFIRMED */
+
+#ifdef SCTP_NOHEARTBEAT
+#undef SCTP_NOHEARTBEAT
+#endif
+#define SCTP_NOHEARTBEAT	0x0040	/* SCTP_ADDR_NOHB */
+
+
+/* remote error events */
+struct sctp_remote_error {
+	uint16_t sre_type;
+	uint16_t sre_flags;
+	uint32_t sre_length;
+	uint16_t sre_error;
+	sctp_assoc_t sre_assoc_id;
+	uint8_t sre_data[4];
+};
+
+/* data send failure event */
+struct sctp_send_failed {
+	uint16_t ssf_type;
+	uint16_t ssf_flags;
+	uint32_t ssf_length;
+	uint32_t ssf_error;
+	struct sctp_sndrcvinfo ssf_info;
+	sctp_assoc_t ssf_assoc_id;
+	uint8_t ssf_data[0];
+};
+
+/* flag that indicates state of data */
+#define SCTP_DATA_UNSENT	0x0001	/* inqueue never on wire */
+#define SCTP_DATA_SENT		0x0002	/* on wire at failure */
+
+/* shutdown event */
+struct sctp_shutdown_event {
+	uint16_t sse_type;
+	uint16_t sse_flags;
+	uint32_t sse_length;
+	sctp_assoc_t sse_assoc_id;
+};
+
+/* Adaptation layer indication stuff */
+struct sctp_adaptation_event {
+	uint16_t sai_type;
+	uint16_t sai_flags;
+	uint32_t sai_length;
+	uint32_t sai_adaptation_ind;
+	sctp_assoc_t sai_assoc_id;
+};
+
+struct sctp_setadaptation {
+	uint32_t ssb_adaptation_ind;
+};
+
+/* compatable old spelling */
+struct sctp_adaption_event {
+	uint16_t sai_type;
+	uint16_t sai_flags;
+	uint32_t sai_length;
+	uint32_t sai_adaption_ind;
+	sctp_assoc_t sai_assoc_id;
+};
+
+struct sctp_setadaption {
+	uint32_t ssb_adaption_ind;
+};
+
+
+/*
+ * Partial Delivery API event
+ */
+struct sctp_pdapi_event {
+	uint16_t pdapi_type;
+	uint16_t pdapi_flags;
+	uint32_t pdapi_length;
+	uint32_t pdapi_indication;
+	uint16_t pdapi_stream;
+	uint16_t pdapi_seq;
+	sctp_assoc_t pdapi_assoc_id;
+};
+
+/* indication values */
+#define SCTP_PARTIAL_DELIVERY_ABORTED	0x0001
+
+
+/*
+ * authentication key event
+ */
+struct sctp_authkey_event {
+	uint16_t auth_type;
+	uint16_t auth_flags;
+	uint32_t auth_length;
+	uint16_t auth_keynumber;
+	uint16_t auth_altkeynumber;
+	uint32_t auth_indication;
+	sctp_assoc_t auth_assoc_id;
+};
+
+/* indication values */
+#define SCTP_AUTH_NEWKEY	0x0001
+
+
+/*
+ * stream reset event
+ */
+struct sctp_stream_reset_event {
+	uint16_t strreset_type;
+	uint16_t strreset_flags;
+	uint32_t strreset_length;
+	sctp_assoc_t strreset_assoc_id;
+	uint16_t strreset_list[0];
+};
+
+/* flags in strreset_flags field */
+#define SCTP_STRRESET_INBOUND_STR  0x0001
+#define SCTP_STRRESET_OUTBOUND_STR 0x0002
+#define SCTP_STRRESET_ALL_STREAMS  0x0004
+#define SCTP_STRRESET_STREAM_LIST  0x0008
+#define SCTP_STRRESET_FAILED       0x0010
+
+
+/* SCTP notification event */
+struct sctp_tlv {
+	uint16_t sn_type;
+	uint16_t sn_flags;
+	uint32_t sn_length;
+};
+
+union sctp_notification {
+	struct sctp_tlv sn_header;
+	struct sctp_assoc_change sn_assoc_change;
+	struct sctp_paddr_change sn_paddr_change;
+	struct sctp_remote_error sn_remote_error;
+	struct sctp_send_failed sn_send_failed;
+	struct sctp_shutdown_event sn_shutdown_event;
+	struct sctp_adaptation_event sn_adaptation_event;
+	/* compatability same as above */
+	struct sctp_adaption_event sn_adaption_event;
+	struct sctp_pdapi_event sn_pdapi_event;
+	struct sctp_authkey_event sn_auth_event;
+	struct sctp_stream_reset_event sn_strreset_event;
+};
+
+/* notification types */
+#define SCTP_ASSOC_CHANGE		0x0001
+#define SCTP_PEER_ADDR_CHANGE		0x0002
+#define SCTP_REMOTE_ERROR		0x0003
+#define SCTP_SEND_FAILED		0x0004
+#define SCTP_SHUTDOWN_EVENT		0x0005
+#define SCTP_ADAPTATION_INDICATION	0x0006
+/* same as above */
+#define SCTP_ADAPTION_INDICATION	0x0006
+#define SCTP_PARTIAL_DELIVERY_EVENT	0x0007
+#define SCTP_AUTHENTICATION_EVENT	0x0008
+#define SCTP_STREAM_RESET_EVENT		0x0009
+
+
+/*
+ * socket option structs
+ */
+
+struct sctp_paddrparams {
+	struct sockaddr_storage spp_address;
+	sctp_assoc_t spp_assoc_id;
+	uint32_t spp_hbinterval;
+	uint32_t spp_pathmtu;
+	uint32_t spp_flags;
+	uint32_t spp_ipv6_flowlabel;
+	uint16_t spp_pathmaxrxt;
+	uint8_t spp_ipv4_tos;
+};
+
+#define SPP_HB_ENABLE		0x00000001
+#define SPP_HB_DISABLE		0x00000002
+#define SPP_HB_DEMAND		0x00000004
+#define SPP_PMTUD_ENABLE	0x00000008
+#define SPP_PMTUD_DISABLE	0x00000010
+#define SPP_HB_TIME_IS_ZERO     0x00000080
+#define SPP_IPV6_FLOWLABEL      0x00000100
+#define SPP_IPV4_TOS            0x00000200
+
+struct sctp_paddrinfo {
+	struct sockaddr_storage spinfo_address;
+	sctp_assoc_t spinfo_assoc_id;
+	int32_t spinfo_state;
+	uint32_t spinfo_cwnd;
+	uint32_t spinfo_srtt;
+	uint32_t spinfo_rto;
+	uint32_t spinfo_mtu;
+};
+
+struct sctp_rtoinfo {
+	sctp_assoc_t srto_assoc_id;
+	uint32_t srto_initial;
+	uint32_t srto_max;
+	uint32_t srto_min;
+};
+
+struct sctp_assocparams {
+	sctp_assoc_t sasoc_assoc_id;
+	uint32_t sasoc_peer_rwnd;
+	uint32_t sasoc_local_rwnd;
+	uint32_t sasoc_cookie_life;
+	uint16_t sasoc_asocmaxrxt;
+	uint16_t sasoc_number_peer_destinations;
+};
+
+struct sctp_setprim {
+	struct sockaddr_storage ssp_addr;
+	sctp_assoc_t ssp_assoc_id;
+	uint8_t ssp_padding[4];
+};
+
+struct sctp_setpeerprim {
+	struct sockaddr_storage sspp_addr;
+	sctp_assoc_t sspp_assoc_id;
+	uint8_t sspp_padding[4];
+};
+
+struct sctp_getaddresses {
+	sctp_assoc_t sget_assoc_id;
+	/* addr is filled in for N * sockaddr_storage */
+	struct sockaddr addr[1];
+};
+
+struct sctp_setstrm_timeout {
+	sctp_assoc_t ssto_assoc_id;
+	uint32_t ssto_timeout;
+	uint32_t ssto_streamid_start;
+	uint32_t ssto_streamid_end;
+};
+
+struct sctp_status {
+	sctp_assoc_t sstat_assoc_id;
+	int32_t sstat_state;
+	uint32_t sstat_rwnd;
+	uint16_t sstat_unackdata;
+	uint16_t sstat_penddata;
+	uint16_t sstat_instrms;
+	uint16_t sstat_outstrms;
+	uint32_t sstat_fragmentation_point;
+	struct sctp_paddrinfo sstat_primary;
+};
+
+/*
+ * AUTHENTICATION support
+ */
+/* SCTP_AUTH_CHUNK */
+struct sctp_authchunk {
+	uint8_t sauth_chunk;
+};
+
+/* SCTP_AUTH_KEY */
+struct sctp_authkey {
+	sctp_assoc_t sca_assoc_id;
+	uint16_t sca_keynumber;
+	uint8_t sca_key[0];
+};
+
+/* SCTP_HMAC_IDENT */
+struct sctp_hmacalgo {
+	uint16_t shmac_idents[0];
+};
+
+/* AUTH hmac_id */
+#define SCTP_AUTH_HMAC_ID_RSVD		0x0000
+#define SCTP_AUTH_HMAC_ID_SHA1		0x0001	/* default, mandatory */
+#define SCTP_AUTH_HMAC_ID_MD5		0x0002	/* deprecated */
+#define SCTP_AUTH_HMAC_ID_SHA256	0x0003
+#define SCTP_AUTH_HMAC_ID_SHA224	0x0004
+#define SCTP_AUTH_HMAC_ID_SHA384	0x0005
+#define SCTP_AUTH_HMAC_ID_SHA512	0x0006
+
+
+/* SCTP_AUTH_ACTIVE_KEY / SCTP_AUTH_DELETE_KEY */
+struct sctp_authkeyid {
+	sctp_assoc_t scact_assoc_id;
+	uint16_t scact_keynumber;
+};
+
+/* SCTP_PEER_AUTH_CHUNKS / SCTP_LOCAL_AUTH_CHUNKS */
+struct sctp_authchunks {
+	sctp_assoc_t gauth_assoc_id;
+	uint8_t gauth_chunks[0];
+};
+
+struct sctp_assoc_value {
+	sctp_assoc_t assoc_id;
+	uint32_t assoc_value;
+};
+
+struct sctp_assoc_ids {
+	sctp_assoc_t gaids_assoc_id[0];
+};
+
+struct sctp_sack_info {
+	sctp_assoc_t sack_assoc_id;
+	uint32_t sack_delay;
+	uint32_t sack_freq;
+};
+
+struct sctp_cwnd_args {
+	struct sctp_nets *net;	/* network to *//* FIXME: LP64 issue */
+	uint32_t cwnd_new_value;/* cwnd in k */
+	uint32_t inflight;	/* flightsize in k */
+	uint32_t pseudo_cumack;
+	uint32_t cwnd_augment;	/* increment to it */
+	uint8_t meets_pseudo_cumack;
+	uint8_t need_new_pseudo_cumack;
+	uint8_t cnt_in_send;
+	uint8_t cnt_in_str;
+};
+
+struct sctp_blk_args {
+	uint32_t onsb;		/* in 1k bytes */
+	uint32_t sndlen;	/* len of send being attempted */
+	uint32_t peer_rwnd;	/* rwnd of peer */
+	uint16_t send_sent_qcnt;/* chnk cnt */
+	uint16_t stream_qcnt;	/* chnk cnt */
+	uint16_t chunks_on_oque;/* chunks out */
+	uint16_t flight_size;	/* flight size in k */
+};
+
+/*
+ * Max we can reset in one setting, note this is dictated not by the define
+ * but the size of a mbuf cluster so don't change this define and think you
+ * can specify more. You must do multiple resets if you want to reset more
+ * than SCTP_MAX_EXPLICIT_STR_RESET.
+ */
+#define SCTP_MAX_EXPLICT_STR_RESET   1000
+
+#define SCTP_RESET_LOCAL_RECV  0x0001
+#define SCTP_RESET_LOCAL_SEND  0x0002
+#define SCTP_RESET_BOTH        0x0003
+#define SCTP_RESET_TSN         0x0004
+
+struct sctp_stream_reset {
+	sctp_assoc_t strrst_assoc_id;
+	uint16_t strrst_flags;
+	uint16_t strrst_num_streams;	/* 0 == ALL */
+	uint16_t strrst_list[0];/* list if strrst_num_streams is not 0 */
+};
+
+
+struct sctp_get_nonce_values {
+	sctp_assoc_t gn_assoc_id;
+	uint32_t gn_peers_tag;
+	uint32_t gn_local_tag;
+};
+
+/* Debugging logs */
+struct sctp_str_log {
+	void *stcb;		/* FIXME: LP64 issue */
+	uint32_t n_tsn;
+	uint32_t e_tsn;
+	uint16_t n_sseq;
+	uint16_t e_sseq;
+	uint16_t strm;
+};
+
+struct sctp_sb_log {
+	void *stcb;		/* FIXME: LP64 issue */
+	uint32_t so_sbcc;
+	uint32_t stcb_sbcc;
+	uint32_t incr;
+};
+
+struct sctp_fr_log {
+	uint32_t largest_tsn;
+	uint32_t largest_new_tsn;
+	uint32_t tsn;
+};
+
+struct sctp_fr_map {
+	uint32_t base;
+	uint32_t cum;
+	uint32_t high;
+};
+
+struct sctp_rwnd_log {
+	uint32_t rwnd;
+	uint32_t send_size;
+	uint32_t overhead;
+	uint32_t new_rwnd;
+};
+
+struct sctp_mbcnt_log {
+	uint32_t total_queue_size;
+	uint32_t size_change;
+	uint32_t total_queue_mb_size;
+	uint32_t mbcnt_change;
+};
+
+struct sctp_sack_log {
+	uint32_t cumack;
+	uint32_t oldcumack;
+	uint32_t tsn;
+	uint16_t numGaps;
+	uint16_t numDups;
+};
+
+struct sctp_lock_log {
+	void *sock;		/* FIXME: LP64 issue */
+	void *inp;		/* FIXME: LP64 issue */
+	uint8_t tcb_lock;
+	uint8_t inp_lock;
+	uint8_t info_lock;
+	uint8_t sock_lock;
+	uint8_t sockrcvbuf_lock;
+	uint8_t socksndbuf_lock;
+	uint8_t create_lock;
+	uint8_t resv;
+};
+
+struct sctp_rto_log {
+	void *net;		/* FIXME: LP64 issue */
+	uint32_t rtt;
+};
+
+struct sctp_nagle_log {
+	void *stcb;		/* FIXME: LP64 issue */
+	uint32_t total_flight;
+	uint32_t total_in_queue;
+	uint16_t count_in_queue;
+	uint16_t count_in_flight;
+};
+
+struct sctp_sbwake_log {
+	void *stcb;		/* FIXME: LP64 issue */
+	uint16_t send_q;
+	uint16_t sent_q;
+	uint16_t flight;
+	uint16_t wake_cnt;
+	uint8_t stream_qcnt;	/* chnk cnt */
+	uint8_t chunks_on_oque;	/* chunks out */
+	uint8_t sbflags;
+	uint8_t sctpflags;
+};
+
+struct sctp_misc_info {
+	uint32_t log1;
+	uint32_t log2;
+	uint32_t log3;
+	uint32_t log4;
+};
+
+struct sctp_log_closing {
+	void *inp;		/* FIXME: LP64 issue */
+	void *stcb;		/* FIXME: LP64 issue */
+	uint32_t sctp_flags;
+	uint16_t state;
+	int16_t loc;
+};
+
+struct sctp_mbuf_log {
+	struct mbuf *mp;	/* FIXME: LP64 issue */
+	caddr_t ext;
+	caddr_t data;
+	uint16_t size;
+	uint8_t refcnt;
+	uint8_t mbuf_flags;
+};
+
+struct sctp_cwnd_log {
+	uint64_t time_event;
+	uint8_t from;
+	uint8_t event_type;
+	uint8_t resv[2];
+	union {
+		struct sctp_log_closing close;
+		struct sctp_blk_args blk;
+		struct sctp_cwnd_args cwnd;
+		struct sctp_str_log strlog;
+		struct sctp_fr_log fr;
+		struct sctp_fr_map map;
+		struct sctp_rwnd_log rwnd;
+		struct sctp_mbcnt_log mbcnt;
+		struct sctp_sack_log sack;
+		struct sctp_lock_log lock;
+		struct sctp_rto_log rto;
+		struct sctp_sb_log sb;
+		struct sctp_nagle_log nagle;
+		struct sctp_sbwake_log wake;
+		struct sctp_mbuf_log mb;
+		struct sctp_misc_info misc;
+	}     x;
+};
+
+struct sctp_cwnd_log_req {
+	int32_t num_in_log;	/* Number in log */
+	int32_t num_ret;	/* Number returned */
+	int32_t start_at;	/* start at this one */
+	int32_t end_at;		/* end at this one */
+	struct sctp_cwnd_log log[0];
+};
+
+struct sctp_timeval {
+	uint32_t tv_sec;
+	uint32_t tv_usec;
+};
+
+struct sctpstat {
+	/* MIB according to RFC 3873 */
+	uint32_t sctps_currestab;	/* sctpStats  1   (Gauge32) */
+	uint32_t sctps_activeestab;	/* sctpStats  2 (Counter32) */
+	uint32_t sctps_restartestab;
+	uint32_t sctps_collisionestab;
+	uint32_t sctps_passiveestab;	/* sctpStats  3 (Counter32) */
+	uint32_t sctps_aborted;	/* sctpStats  4 (Counter32) */
+	uint32_t sctps_shutdown;/* sctpStats  5 (Counter32) */
+	uint32_t sctps_outoftheblue;	/* sctpStats  6 (Counter32) */
+	uint32_t sctps_checksumerrors;	/* sctpStats  7 (Counter32) */
+	uint32_t sctps_outcontrolchunks;	/* sctpStats  8 (Counter64) */
+	uint32_t sctps_outorderchunks;	/* sctpStats  9 (Counter64) */
+	uint32_t sctps_outunorderchunks;	/* sctpStats 10 (Counter64) */
+	uint32_t sctps_incontrolchunks;	/* sctpStats 11 (Counter64) */
+	uint32_t sctps_inorderchunks;	/* sctpStats 12 (Counter64) */
+	uint32_t sctps_inunorderchunks;	/* sctpStats 13 (Counter64) */
+	uint32_t sctps_fragusrmsgs;	/* sctpStats 14 (Counter64) */
+	uint32_t sctps_reasmusrmsgs;	/* sctpStats 15 (Counter64) */
+	uint32_t sctps_outpackets;	/* sctpStats 16 (Counter64) */
+	uint32_t sctps_inpackets;	/* sctpStats 17 (Counter64) */
+
+	/* input statistics: */
+	uint32_t sctps_recvpackets;	/* total input packets        */
+	uint32_t sctps_recvdatagrams;	/* total input datagrams      */
+	uint32_t sctps_recvpktwithdata;	/* total packets that had data */
+	uint32_t sctps_recvsacks;	/* total input SACK chunks    */
+	uint32_t sctps_recvdata;/* total input DATA chunks    */
+	uint32_t sctps_recvdupdata;	/* total input duplicate DATA chunks */
+	uint32_t sctps_recvheartbeat;	/* total input HB chunks      */
+	uint32_t sctps_recvheartbeatack;	/* total input HB-ACK chunks  */
+	uint32_t sctps_recvecne;/* total input ECNE chunks    */
+	uint32_t sctps_recvauth;/* total input AUTH chunks    */
+	uint32_t sctps_recvauthmissing;	/* total input chunks missing AUTH */
+	uint32_t sctps_recvivalhmacid;	/* total number of invalid HMAC ids
+					 * received */
+	uint32_t sctps_recvivalkeyid;	/* total number of invalid secret ids
+					 * received */
+	uint32_t sctps_recvauthfailed;	/* total number of auth failed */
+	uint32_t sctps_recvexpress;	/* total fast path receives all one
+					 * chunk */
+	uint32_t sctps_recvexpressm;	/* total fast path multi-part data */
+	/* output statistics: */
+	uint32_t sctps_sendpackets;	/* total output packets       */
+	uint32_t sctps_sendsacks;	/* total output SACKs         */
+	uint32_t sctps_senddata;/* total output DATA chunks   */
+	uint32_t sctps_sendretransdata;	/* total output retransmitted DATA
+					 * chunks */
+	uint32_t sctps_sendfastretrans;	/* total output fast retransmitted
+					 * DATA chunks */
+	uint32_t sctps_sendmultfastretrans;	/* total FR's that happened
+						 * more than once to same
+						 * chunk (u-del multi-fr
+						 * algo). */
+	uint32_t sctps_sendheartbeat;	/* total output HB chunks     */
+	uint32_t sctps_sendecne;/* total output ECNE chunks    */
+	uint32_t sctps_sendauth;/* total output AUTH chunks FIXME   */
+	uint32_t sctps_senderrors;	/* ip_output error counter */
+	/* PCKDROPREP statistics: */
+	uint32_t sctps_pdrpfmbox;	/* Packet drop from middle box */
+	uint32_t sctps_pdrpfehos;	/* P-drop from end host */
+	uint32_t sctps_pdrpmbda;/* P-drops with data */
+	uint32_t sctps_pdrpmbct;/* P-drops, non-data, non-endhost */
+	uint32_t sctps_pdrpbwrpt;	/* P-drop, non-endhost, bandwidth rep
+					 * only */
+	uint32_t sctps_pdrpcrupt;	/* P-drop, not enough for chunk header */
+	uint32_t sctps_pdrpnedat;	/* P-drop, not enough data to confirm */
+	uint32_t sctps_pdrppdbrk;	/* P-drop, where process_chunk_drop
+					 * said break */
+	uint32_t sctps_pdrptsnnf;	/* P-drop, could not find TSN */
+	uint32_t sctps_pdrpdnfnd;	/* P-drop, attempt reverse TSN lookup */
+	uint32_t sctps_pdrpdiwnp;	/* P-drop, e-host confirms zero-rwnd */
+	uint32_t sctps_pdrpdizrw;	/* P-drop, midbox confirms no space */
+	uint32_t sctps_pdrpbadd;/* P-drop, data did not match TSN */
+	uint32_t sctps_pdrpmark;/* P-drop, TSN's marked for Fast Retran */
+	/* timeouts */
+	uint32_t sctps_timoiterator;	/* Number of iterator timers that
+					 * fired */
+	uint32_t sctps_timodata;/* Number of T3 data time outs */
+	uint32_t sctps_timowindowprobe;	/* Number of window probe (T3) timers
+					 * that fired */
+	uint32_t sctps_timoinit;/* Number of INIT timers that fired */
+	uint32_t sctps_timosack;/* Number of sack timers that fired */
+	uint32_t sctps_timoshutdown;	/* Number of shutdown timers that
+					 * fired */
+	uint32_t sctps_timoheartbeat;	/* Number of heartbeat timers that
+					 * fired */
+	uint32_t sctps_timocookie;	/* Number of times a cookie timeout
+					 * fired */
+	uint32_t sctps_timosecret;	/* Number of times an endpoint changed
+					 * its cookie secret */
+	uint32_t sctps_timopathmtu;	/* Number of PMTU timers that fired */
+	uint32_t sctps_timoshutdownack;	/* Number of shutdown ack timers that
+					 * fired */
+	uint32_t sctps_timoshutdownguard;	/* Number of shutdown guard
+						 * timers that fired */
+	uint32_t sctps_timostrmrst;	/* Number of stream reset timers that
+					 * fired */
+	uint32_t sctps_timoearlyfr;	/* Number of early FR timers that
+					 * fired */
+	uint32_t sctps_timoasconf;	/* Number of times an asconf timer
+					 * fired */
+	uint32_t sctps_timodelprim;	/* Number of times a prim_deleted
+					 * timer fired */
+	uint32_t sctps_timoautoclose;	/* Number of times auto close timer
+					 * fired */
+	uint32_t sctps_timoassockill;	/* Number of asoc free timers expired */
+	uint32_t sctps_timoinpkill;	/* Number of inp free timers expired */
+	/* Early fast retransmission counters */
+	uint32_t sctps_earlyfrstart;
+	uint32_t sctps_earlyfrstop;
+	uint32_t sctps_earlyfrmrkretrans;
+	uint32_t sctps_earlyfrstpout;
+	uint32_t sctps_earlyfrstpidsck1;
+	uint32_t sctps_earlyfrstpidsck2;
+	uint32_t sctps_earlyfrstpidsck3;
+	uint32_t sctps_earlyfrstpidsck4;
+	uint32_t sctps_earlyfrstrid;
+	uint32_t sctps_earlyfrstrout;
+	uint32_t sctps_earlyfrstrtmr;
+	/* otheres */
+	uint32_t sctps_hdrops;	/* packet shorter than header */
+	uint32_t sctps_badsum;	/* checksum error             */
+	uint32_t sctps_noport;	/* no endpoint for port       */
+	uint32_t sctps_badvtag;	/* bad v-tag                  */
+	uint32_t sctps_badsid;	/* bad SID                    */
+	uint32_t sctps_nomem;	/* no memory                  */
+	uint32_t sctps_fastretransinrtt;	/* number of multiple FR in a
+						 * RTT window */
+	uint32_t sctps_markedretrans;
+	uint32_t sctps_naglesent;	/* nagle allowed sending      */
+	uint32_t sctps_naglequeued;	/* nagle does't allow sending */
+	uint32_t sctps_maxburstqueued;	/* max burst dosn't allow sending */
+	uint32_t sctps_ifnomemqueued;	/* look ahead tells us no memory in
+					 * interface ring buffer OR we had a
+					 * send error and are queuing one
+					 * send. */
+	uint32_t sctps_windowprobed;	/* total number of window probes sent */
+	uint32_t sctps_lowlevelerr;	/* total times an output error causes
+					 * us to clamp down on next user send. */
+	uint32_t sctps_lowlevelerrusr;	/* total times sctp_senderrors were
+					 * caused from a user send from a user
+					 * invoked send not a sack response */
+	uint32_t sctps_datadropchklmt;	/* Number of in data drops due to
+					 * chunk limit reached */
+	uint32_t sctps_datadroprwnd;	/* Number of in data drops due to rwnd
+					 * limit reached */
+	uint32_t sctps_ecnereducedcwnd;	/* Number of times a ECN reduced the
+					 * cwnd */
+	uint32_t sctps_vtagexpress;	/* Used express lookup via vtag */
+	uint32_t sctps_vtagbogus;	/* Collision in express lookup. */
+	uint32_t sctps_primary_randry;	/* Number of times the sender ran dry
+					 * of user data on primary */
+	uint32_t sctps_cmt_randry;	/* Same for above */
+	uint32_t sctps_slowpath_sack;	/* Sacks the slow way */
+	uint32_t sctps_wu_sacks_sent;	/* Window Update only sacks sent */
+	uint32_t sctps_sends_with_flags;	/* number of sends with
+						 * sinfo_flags !=0 */
+	uint32_t sctps_sends_with_unord /* number of undordered sends */ ;
+	uint32_t sctps_sends_with_eof;	/* number of sends with EOF flag set */
+	uint32_t sctps_sends_with_abort;	/* number of sends with ABORT
+						 * flag set */
+	uint32_t sctps_protocol_drain_calls;	/* number of times protocol
+						 * drain called */
+	uint32_t sctps_protocol_drains_done;	/* number of times we did a
+						 * protocol drain */
+	uint32_t sctps_read_peeks;	/* Number of times recv was called
+					 * with peek */
+	uint32_t sctps_cached_chk;	/* Number of cached chunks used */
+	uint32_t sctps_cached_strmoq;	/* Number of cached stream oq's used */
+	uint32_t sctps_left_abandon;	/* Number of unread message abandonded
+					 * by close */
+	uint32_t sctps_send_burst_avoid;	/* Send burst avoidance,
+						 * already max burst inflight
+						 * to net */
+	uint32_t sctps_send_cwnd_avoid;	/* Send cwnd full  avoidance, already
+					 * max burst inflight to net */
+	uint32_t sctps_fwdtsn_map_over;	/* number of map array over-runs via
+					 * fwd-tsn's */
+
+	struct sctp_timeval sctps_discontinuitytime;	/* sctpStats 18
+							 * (TimeStamp) */
+};
+
+#define SCTP_STAT_INCR(_x) SCTP_STAT_INCR_BY(_x,1)
+#define SCTP_STAT_DECR(_x) SCTP_STAT_DECR_BY(_x,1)
+#define SCTP_STAT_INCR_BY(_x,_d) atomic_add_int(&sctpstat._x, _d)
+#define SCTP_STAT_DECR_BY(_x,_d) atomic_subtract_int(&sctpstat._x, _d)
+
+/* The following macros are for handling MIB values, */
+#define SCTP_STAT_INCR_COUNTER32(_x) SCTP_STAT_INCR(_x)
+#define SCTP_STAT_INCR_COUNTER64(_x) SCTP_STAT_INCR(_x)
+#define SCTP_STAT_INCR_GAUGE32(_x) SCTP_STAT_INCR(_x)
+#define SCTP_STAT_DECR_COUNTER32(_x) SCTP_STAT_DECR(_x)
+#define SCTP_STAT_DECR_COUNTER64(_x) SCTP_STAT_DECR(_x)
+#define SCTP_STAT_DECR_GAUGE32(_x) SCTP_STAT_DECR(_x)
+
+union sctp_sockstore {
+#if defined(INET) || !defined(_KERNEL)
+	struct sockaddr_in sin;
+#endif
+#if defined(INET6) || !defined(_KERNEL)
+	struct sockaddr_in6 sin6;
+#endif
+	struct sockaddr sa;
+};
+
+struct xsctp_inpcb {
+	uint32_t last;
+	uint32_t flags;
+	uint32_t features;
+	uint32_t total_sends;
+	uint32_t total_recvs;
+	uint32_t total_nospaces;
+	uint32_t fragmentation_point;
+	uint16_t local_port;
+	uint16_t qlen;
+	uint16_t maxqlen;
+};
+
+struct xsctp_tcb {
+	union sctp_sockstore primary_addr;	/* sctpAssocEntry 5/6 */
+	uint32_t last;
+	uint32_t heartbeat_interval;	/* sctpAssocEntry 7   */
+	uint32_t state;		/* sctpAssocEntry 8   */
+	uint32_t in_streams;	/* sctpAssocEntry 9   */
+	uint32_t out_streams;	/* sctpAssocEntry 10  */
+	uint32_t max_nr_retrans;/* sctpAssocEntry 11  */
+	uint32_t primary_process;	/* sctpAssocEntry 12  */
+	uint32_t T1_expireries;	/* sctpAssocEntry 13  */
+	uint32_t T2_expireries;	/* sctpAssocEntry 14  */
+	uint32_t retransmitted_tsns;	/* sctpAssocEntry 15  */
+	uint32_t total_sends;
+	uint32_t total_recvs;
+	uint32_t local_tag;
+	uint32_t remote_tag;
+	uint32_t initial_tsn;
+	uint32_t highest_tsn;
+	uint32_t cumulative_tsn;
+	uint32_t cumulative_tsn_ack;
+	uint32_t mtu;
+	uint32_t refcnt;
+	uint16_t local_port;	/* sctpAssocEntry 3   */
+	uint16_t remote_port;	/* sctpAssocEntry 4   */
+	struct sctp_timeval start_time;	/* sctpAssocEntry 16  */
+	struct sctp_timeval discontinuity_time;	/* sctpAssocEntry 17  */
+};
+
+struct xsctp_laddr {
+	union sctp_sockstore address;	/* sctpAssocLocalAddrEntry 1/2 */
+	uint32_t last;
+	struct sctp_timeval start_time;	/* sctpAssocLocalAddrEntry 3   */
+};
+
+struct xsctp_raddr {
+	union sctp_sockstore address;	/* sctpAssocLocalRemEntry 1/2 */
+	uint32_t last;
+	uint32_t rto;		/* sctpAssocLocalRemEntry 5   */
+	uint32_t max_path_rtx;	/* sctpAssocLocalRemEntry 6   */
+	uint32_t rtx;		/* sctpAssocLocalRemEntry 7   */
+	uint32_t error_counter;	/* */
+	uint32_t cwnd;		/* */
+	uint32_t flight_size;	/* */
+	uint32_t mtu;		/* */
+	uint8_t active;		/* sctpAssocLocalRemEntry 3   */
+	uint8_t confirmed;	/* */
+	uint8_t heartbeat_enabled;	/* sctpAssocLocalRemEntry 4   */
+	struct sctp_timeval start_time;	/* sctpAssocLocalRemEntry 8   */
+};
+
+#define SCTP_MAX_LOGGING_SIZE 30000
+#define SCTP_TRACE_PARAMS 6	/* This number MUST be even   */
+
+struct sctp_log_entry {
+	uint64_t timestamp;
+	uint32_t subsys;
+	uint32_t padding;
+	uint32_t params[SCTP_TRACE_PARAMS];
+};
+
+struct sctp_log {
+	struct sctp_log_entry entry[SCTP_MAX_LOGGING_SIZE];
+	uint32_t index;
+	uint32_t padding;
+};
+
+/*
+ * Kernel defined for sctp_send
+ */
+#if defined(_KERNEL)
+int
+sctp_lower_sosend(struct socket *so,
+    struct sockaddr *addr,
+    struct uio *uio,
+    struct mbuf *i_pak,
+    struct mbuf *control,
+    int flags,
+    int use_rcvinfo,
+    struct sctp_sndrcvinfo *srcv
+    ,struct thread *p
+);
+
+int
+sctp_sorecvmsg(struct socket *so,
+    struct uio *uio,
+    struct mbuf **mp,
+    struct sockaddr *from,
+    int fromlen,
+    int *msg_flags,
+    struct sctp_sndrcvinfo *sinfo,
+    int filling_sinfo);
+
+#endif
+
+/*
+ * API system calls
+ */
+#if !(defined(_KERNEL))
+
+__BEGIN_DECLS
+int sctp_peeloff __P((int, sctp_assoc_t));
+int sctp_bindx __P((int, struct sockaddr *, int, int));
+int sctp_connectx __P((int, const struct sockaddr *, int, sctp_assoc_t *));
+int sctp_getaddrlen __P((sa_family_t));
+int sctp_getpaddrs __P((int, sctp_assoc_t, struct sockaddr **));
+void sctp_freepaddrs __P((struct sockaddr *));
+int sctp_getladdrs __P((int, sctp_assoc_t, struct sockaddr **));
+void sctp_freeladdrs __P((struct sockaddr *));
+int sctp_opt_info __P((int, sctp_assoc_t, int, void *, socklen_t *));
+
+ssize_t sctp_sendmsg 
+__P((int, const void *, size_t,
+    const struct sockaddr *,
+    socklen_t, uint32_t, uint32_t, uint16_t, uint32_t, uint32_t));
+
+	ssize_t sctp_send __P((int sd, const void *msg, size_t len,
+              const struct sctp_sndrcvinfo *sinfo, int flags));
+
+	ssize_t sctp_sendx __P((int sd, const void *msg, size_t len,
+               struct sockaddr *addrs, int addrcnt,
+               struct sctp_sndrcvinfo *sinfo, int flags));
+
+	ssize_t sctp_sendmsgx __P((int sd, const void *, size_t,
+                  struct sockaddr *, int,
+                  uint32_t, uint32_t, uint16_t, uint32_t, uint32_t));
+
+	sctp_assoc_t sctp_getassocid __P((int sd, struct sockaddr *sa));
+
+	ssize_t sctp_recvmsg __P((int, void *, size_t, struct sockaddr *,
+                 socklen_t *, struct sctp_sndrcvinfo *, int *));
+
+__END_DECLS
+
+#endif				/* !_KERNEL */
+#endif				/* !__sctp_uio_h__ */
Index: ip_fastfwd.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_fastfwd.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/ip_fastfwd.c -L sys/netinet/ip_fastfwd.c -u -r1.1.1.1 -r1.2
--- sys/netinet/ip_fastfwd.c
+++ sys/netinet/ip_fastfwd.c
@@ -25,8 +25,6 @@
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/netinet/ip_fastfwd.c,v 1.28.2.2 2005/08/29 17:52:53 andre Exp $
  */
 
 /*
@@ -75,6 +73,9 @@
  * is being followed here.
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_fastfwd.c,v 1.41 2007/10/07 20:44:23 silby Exp $");
+
 #include "opt_ipfw.h"
 #include "opt_ipstealth.h"
 
@@ -100,6 +101,7 @@
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_icmp.h>
+#include <netinet/ip_options.h>
 
 #include <machine/in_cksum.h>
 
@@ -150,7 +152,7 @@
  * otherwise 0 is returned and the packet should be delivered
  * to ip_input for full processing.
  */
-int
+struct mbuf *
 ip_fastforward(struct mbuf *m)
 {
 	struct ip *ip;
@@ -170,7 +172,7 @@
 	 * Are we active and forwarding packets?
 	 */
 	if (!ipfastforward_active || !ipforwarding)
-		return 0;
+		return m;
 
 	M_ASSERTVALID(m);
 	M_ASSERTPKTHDR(m);
@@ -195,7 +197,7 @@
 	if (m->m_len < sizeof (struct ip) &&
 	   (m = m_pullup(m, sizeof (struct ip))) == NULL) {
 		ipstat.ips_toosmall++;
-		return 1;	/* mbuf already free'd */
+		return NULL;	/* mbuf already free'd */
 	}
 
 	ip = mtod(m, struct ip *);
@@ -217,9 +219,9 @@
 		goto drop;
 	}
 	if (hlen > m->m_len) {
-		if ((m = m_pullup(m, hlen)) == 0) {
+		if ((m = m_pullup(m, hlen)) == NULL) {
 			ipstat.ips_badhlen++;
-			return 1;
+			return NULL;	/* mbuf already free'd */
 		}
 		ip = mtod(m, struct ip *);
 	}
@@ -280,7 +282,7 @@
 	 * Is packet dropped by traffic conditioner?
 	 */
 	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
-		return 1;
+		goto drop;
 #endif
 
 	/*
@@ -292,11 +294,11 @@
 	 */
 	if (ip->ip_hl != (sizeof(struct ip) >> 2)) {
 		if (ip_doopts == 1)
-			return 0;
+			return m;
 		else if (ip_doopts == 2) {
 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_FILTER_PROHIB,
 				0, 0);
-			return 1;
+			return NULL;	/* mbuf already free'd */
 		}
 		/* else ignore IP options and continue */
 	}
@@ -317,15 +319,17 @@
 	    ntohl(ip->ip_dst.s_addr) == (u_long)INADDR_BROADCAST ||
 	    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
 	    IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
+	    IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) ||
+	    IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) ||
 	    ip->ip_src.s_addr == INADDR_ANY ||
 	    ip->ip_dst.s_addr == INADDR_ANY )
-		return 0;
+		return m;
 
 	/*
 	 * Is it for a local address on this host?
 	 */
 	if (in_localip(ip->ip_dst))
-		return 0;
+		return m;
 
 	ipstat.ips_total++;
 
@@ -344,12 +348,12 @@
 	/*
 	 * Run through list of ipfilter hooks for input packets
 	 */
-	if (inet_pfil_hook.ph_busy_count == -1)
+	if (!PFIL_HOOKED(&inet_pfil_hook))
 		goto passin;
 
 	if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN, NULL) ||
 	    m == NULL)
-		return 1;
+		goto drop;
 
 	M_ASSERTVALID(m);
 	M_ASSERTPKTHDR(m);
@@ -392,7 +396,7 @@
 #endif
 	if (ip->ip_ttl <= IPTTLDEC) {
 		icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0);
-		return 1;
+		return NULL;	/* mbuf already free'd */
 	}
 
 	/*
@@ -413,13 +417,15 @@
 	 * Find route to destination.
 	 */
 	if ((dst = ip_findroute(&ro, dest, m)) == NULL)
-		return 1;	/* icmp unreach already sent */
+		return NULL;	/* icmp unreach already sent */
 	ifp = ro.ro_rt->rt_ifp;
 
 	/*
-	 * Immediately drop blackholed traffic.
+	 * Immediately drop blackholed traffic, and directed broadcasts
+	 * for either the all-ones or all-zero subnet addresses on
+	 * locally attached networks.
 	 */
-	if (ro.ro_rt->rt_flags & RTF_BLACKHOLE)
+	if ((ro.ro_rt->rt_flags & (RTF_BLACKHOLE|RTF_BROADCAST)) != 0)
 		goto drop;
 
 	/*
@@ -429,11 +435,11 @@
 	/*
 	 * Run through list of hooks for output packets.
 	 */
-	if (inet_pfil_hook.ph_busy_count == -1)
+	if (!PFIL_HOOKED(&inet_pfil_hook))
 		goto passout;
 
 	if (pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT, NULL) || m == NULL) {
-		goto consumed;
+		goto drop;
 	}
 
 	M_ASSERTVALID(m);
@@ -468,21 +474,21 @@
 			m->m_flags |= M_FASTFWD_OURS;
 			if (ro.ro_rt)
 				RTFREE(ro.ro_rt);
-			return 0;
+			return m;
 		}
 		/*
 		 * Redo route lookup with new destination address
 		 */
 #ifdef IPFIREWALL_FORWARD
 		if (fwd_tag) {
-			if (!in_localip(ip->ip_src) && !in_localaddr(ip->ip_dst))
-				dest.s_addr = ((struct sockaddr_in *)(fwd_tag+1))->sin_addr.s_addr;
+			dest.s_addr = ((struct sockaddr_in *)
+				    (fwd_tag + 1))->sin_addr.s_addr;
 			m_tag_delete(m, fwd_tag);
 		}
 #endif /* IPFIREWALL_FORWARD */
 		RTFREE(ro.ro_rt);
 		if ((dst = ip_findroute(&ro, dest, m)) == NULL)
-			return 1;	/* icmp unreach already sent */
+			return NULL;	/* icmp unreach already sent */
 		ifp = ro.ro_rt->rt_ifp;
 	}
 
@@ -495,7 +501,8 @@
 	 * Check if route is dampned (when ARP is unable to resolve)
 	 */
 	if ((ro.ro_rt->rt_flags & RTF_REJECT) &&
-	    ro.ro_rt->rt_rmx.rmx_expire >= time_second) {
+	    (ro.ro_rt->rt_rmx.rmx_expire == 0 ||
+	    time_uptime < ro.ro_rt->rt_rmx.rmx_expire)) {
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
 		goto consumed;
 	}
@@ -596,11 +603,11 @@
 	}
 consumed:
 	RTFREE(ro.ro_rt);
-	return 1;
+	return NULL;
 drop:
 	if (m)
 		m_freem(m);
 	if (ro.ro_rt)
 		RTFREE(ro.ro_rt);
-	return 1;
+	return NULL;
 }
--- /dev/null
+++ sys/netinet/tcp_timewait.c
@@ -0,0 +1,649 @@
+/*-
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_subr.c	8.2 (Berkeley) 5/24/95
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/tcp_timewait.c,v 1.287 2007/10/07 20:44:24 silby Exp $");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+#include "opt_mac.h"
+#include "opt_tcpdebug.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/callout.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/random.h>
+
+#include <vm/uma.h>
+
+#include <net/route.h>
+#include <net/if.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+#include <netinet/in_pcb.h>
+#ifdef INET6
+#include <netinet6/in6_pcb.h>
+#endif
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+#ifdef INET6
+#include <netinet6/ip6_var.h>
+#include <netinet6/scope6_var.h>
+#include <netinet6/nd6.h>
+#endif
+#include <netinet/ip_icmp.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#ifdef INET6
+#include <netinet6/tcp6_var.h>
+#endif
+#include <netinet/tcpip.h>
+#ifdef TCPDEBUG
+#include <netinet/tcp_debug.h>
+#endif
+#include <netinet6/ip6protosw.h>
+
+#include <machine/in_cksum.h>
+
+#include <security/mac/mac_framework.h>
+
+static uma_zone_t tcptw_zone;
+static int	maxtcptw;
+
+/*
+ * The timed wait queue contains references to each of the TCP sessions
+ * currently in the TIME_WAIT state.  The queue pointers, including the
+ * queue pointers in each tcptw structure, are protected using the global
+ * tcbinfo lock, which must be held over queue iteration and modification.
+ */
+static TAILQ_HEAD(, tcptw)	twq_2msl;
+
+static void	tcp_tw_2msl_reset(struct tcptw *, int);
+static void	tcp_tw_2msl_stop(struct tcptw *);
+
+static int
+tcptw_auto_size(void)
+{
+	int halfrange;
+
+	/*
+	 * Max out at half the ephemeral port range so that TIME_WAIT
+	 * sockets don't tie up too many ephemeral ports.
+	 */
+	if (ipport_lastauto > ipport_firstauto)
+		halfrange = (ipport_lastauto - ipport_firstauto) / 2;
+	else
+		halfrange = (ipport_firstauto - ipport_lastauto) / 2;
+	/* Protect against goofy port ranges smaller than 32. */
+	return (imin(imax(halfrange, 32), maxsockets / 5));
+}
+
+static int
+sysctl_maxtcptw(SYSCTL_HANDLER_ARGS)
+{
+	int error, new;
+
+	if (maxtcptw == 0)
+		new = tcptw_auto_size();
+	else
+		new = maxtcptw;
+	error = sysctl_handle_int(oidp, &new, 0, req);
+	if (error == 0 && req->newptr)
+		if (new >= 32) {
+			maxtcptw = new;
+			uma_zone_set_max(tcptw_zone, maxtcptw);
+		}
+	return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, maxtcptw, CTLTYPE_INT|CTLFLAG_RW,
+    &maxtcptw, 0, sysctl_maxtcptw, "IU",
+    "Maximum number of compressed TCP TIME_WAIT entries");
+
+static int	nolocaltimewait = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, nolocaltimewait, CTLFLAG_RW,
+    &nolocaltimewait, 0,
+    "Do not create compressed TCP TIME_WAIT entries for local connections");
+
+void
+tcp_tw_zone_change(void)
+{
+
+	if (maxtcptw == 0)
+		uma_zone_set_max(tcptw_zone, tcptw_auto_size());
+}
+
+void
+tcp_tw_init(void)
+{
+
+	tcptw_zone = uma_zcreate("tcptw", sizeof(struct tcptw),
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+	TUNABLE_INT_FETCH("net.inet.tcp.maxtcptw", &maxtcptw);
+	if (maxtcptw == 0)
+		uma_zone_set_max(tcptw_zone, tcptw_auto_size());
+	else
+		uma_zone_set_max(tcptw_zone, maxtcptw);
+	TAILQ_INIT(&twq_2msl);
+}
+
+/*
+ * Move a TCP connection into TIME_WAIT state.
+ *    tcbinfo is locked.
+ *    inp is locked, and is unlocked before returning.
+ */
+void
+tcp_twstart(struct tcpcb *tp)
+{
+	struct tcptw *tw;
+	struct inpcb *inp = tp->t_inpcb;
+	int acknow;
+	struct socket *so;
+
+	INP_INFO_WLOCK_ASSERT(&tcbinfo);	/* tcp_tw_2msl_reset(). */
+	INP_LOCK_ASSERT(inp);
+
+	if (nolocaltimewait && in_localip(inp->inp_faddr)) {
+		tp = tcp_close(tp);
+		if (tp != NULL)
+			INP_UNLOCK(inp);
+		return;
+	}
+
+	tw = uma_zalloc(tcptw_zone, M_NOWAIT);
+	if (tw == NULL) {
+		tw = tcp_tw_2msl_scan(1);
+		if (tw == NULL) {
+			tp = tcp_close(tp);
+			if (tp != NULL)
+				INP_UNLOCK(inp);
+			return;
+		}
+	}
+	tw->tw_inpcb = inp;
+
+	/*
+	 * Recover last window size sent.
+	 */
+	tw->last_win = (tp->rcv_adv - tp->rcv_nxt) >> tp->rcv_scale;
+
+	/*
+	 * Set t_recent if timestamps are used on the connection.
+	 */
+	if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) ==
+	    (TF_REQ_TSTMP|TF_RCVD_TSTMP)) {
+		tw->t_recent = tp->ts_recent;
+		tw->ts_offset = tp->ts_offset;
+	} else {
+		tw->t_recent = 0;
+		tw->ts_offset = 0;
+	}
+
+	tw->snd_nxt = tp->snd_nxt;
+	tw->rcv_nxt = tp->rcv_nxt;
+	tw->iss     = tp->iss;
+	tw->irs     = tp->irs;
+	tw->t_starttime = tp->t_starttime;
+	tw->tw_time = 0;
+
+/* XXX
+ * If this code will
+ * be used for fin-wait-2 state also, then we may need
+ * a ts_recent from the last segment.
+ */
+	acknow = tp->t_flags & TF_ACKNOW;
+
+	/*
+	 * First, discard tcpcb state, which includes stopping its timers and
+	 * freeing it.  tcp_discardcb() used to also release the inpcb, but
+	 * that work is now done in the caller.
+	 *
+	 * Note: soisdisconnected() call used to be made in tcp_discardcb(),
+	 * and might not be needed here any longer.
+	 */
+	tcp_discardcb(tp);
+	so = inp->inp_socket;
+	soisdisconnected(so);
+	tw->tw_cred = crhold(so->so_cred);
+	SOCK_LOCK(so);
+	tw->tw_so_options = so->so_options;
+	SOCK_UNLOCK(so);
+	if (acknow)
+		tcp_twrespond(tw, TH_ACK);
+	inp->inp_ppcb = tw;
+	inp->inp_vflag |= INP_TIMEWAIT;
+	tcp_tw_2msl_reset(tw, 0);
+
+	/*
+	 * If the inpcb owns the sole reference to the socket, then we can
+	 * detach and free the socket as it is not needed in time wait.
+	 */
+	if (inp->inp_vflag & INP_SOCKREF) {
+		KASSERT(so->so_state & SS_PROTOREF,
+		    ("tcp_twstart: !SS_PROTOREF"));
+		inp->inp_vflag &= ~INP_SOCKREF;
+		INP_UNLOCK(inp);
+		ACCEPT_LOCK();
+		SOCK_LOCK(so);
+		so->so_state &= ~SS_PROTOREF;
+		sofree(so);
+	} else
+		INP_UNLOCK(inp);
+}
+
+#if 0
+/*
+ * The appromixate rate of ISN increase of Microsoft TCP stacks;
+ * the actual rate is slightly higher due to the addition of
+ * random positive increments.
+ *
+ * Most other new OSes use semi-randomized ISN values, so we
+ * do not need to worry about them.
+ */
+#define MS_ISN_BYTES_PER_SECOND		250000
+
+/*
+ * Determine if the ISN we will generate has advanced beyond the last
+ * sequence number used by the previous connection.  If so, indicate
+ * that it is safe to recycle this tw socket by returning 1.
+ */
+int
+tcp_twrecycleable(struct tcptw *tw)
+{
+	tcp_seq new_iss = tw->iss;
+	tcp_seq new_irs = tw->irs;
+
+	INP_INFO_WLOCK_ASSERT(&tcbinfo);
+	new_iss += (ticks - tw->t_starttime) * (ISN_BYTES_PER_SECOND / hz);
+	new_irs += (ticks - tw->t_starttime) * (MS_ISN_BYTES_PER_SECOND / hz);
+
+	if (SEQ_GT(new_iss, tw->snd_nxt) && SEQ_GT(new_irs, tw->rcv_nxt))
+		return (1);
+	else
+		return (0);
+}
+#endif
+
+/*
+ * Returns 1 if the TIME_WAIT state was killed and we should start over,
+ * looking for a pcb in the listen state.  Returns 0 otherwise.
+ */
+int
+tcp_twcheck(struct inpcb *inp, struct tcpopt *to, struct tcphdr *th,
+    struct mbuf *m, int tlen)
+{
+	struct tcptw *tw;
+	int thflags;
+	tcp_seq seq;
+#ifdef INET6
+	int isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
+#else
+	const int isipv6 = 0;
+#endif
+
+	/* tcbinfo lock required for tcp_twclose(), tcp_tw_2msl_reset(). */
+	INP_INFO_WLOCK_ASSERT(&tcbinfo);
+	INP_LOCK_ASSERT(inp);
+
+	/*
+	 * XXXRW: Time wait state for inpcb has been recycled, but inpcb is
+	 * still present.  This is undesirable, but temporarily necessary
+	 * until we work out how to handle inpcb's who's timewait state has
+	 * been removed.
+	 */
+	tw = intotw(inp);
+	if (tw == NULL)
+		goto drop;
+
+	thflags = th->th_flags;
+
+	/*
+	 * NOTE: for FIN_WAIT_2 (to be added later),
+	 * must validate sequence number before accepting RST
+	 */
+
+	/*
+	 * If the segment contains RST:
+	 *	Drop the segment - see Stevens, vol. 2, p. 964 and
+	 *      RFC 1337.
+	 */
+	if (thflags & TH_RST)
+		goto drop;
+
+#if 0
+/* PAWS not needed at the moment */
+	/*
+	 * RFC 1323 PAWS: If we have a timestamp reply on this segment
+	 * and it's less than ts_recent, drop it.
+	 */
+	if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent &&
+	    TSTMP_LT(to.to_tsval, tp->ts_recent)) {
+		if ((thflags & TH_ACK) == 0)
+			goto drop;
+		goto ack;
+	}
+	/*
+	 * ts_recent is never updated because we never accept new segments.
+	 */
+#endif
+
+	/*
+	 * If a new connection request is received
+	 * while in TIME_WAIT, drop the old connection
+	 * and start over if the sequence numbers
+	 * are above the previous ones.
+	 */
+	if ((thflags & TH_SYN) && SEQ_GT(th->th_seq, tw->rcv_nxt)) {
+		tcp_twclose(tw, 0);
+		return (1);
+	}
+
+	/*
+	 * Drop the the segment if it does not contain an ACK.
+	 */
+	if ((thflags & TH_ACK) == 0)
+		goto drop;
+
+	/*
+	 * Reset the 2MSL timer if this is a duplicate FIN.
+	 */
+	if (thflags & TH_FIN) {
+		seq = th->th_seq + tlen + (thflags & TH_SYN ? 1 : 0);
+		if (seq + 1 == tw->rcv_nxt)
+			tcp_tw_2msl_reset(tw, 1);
+	}
+
+	/*
+	 * Acknowledge the segment if it has data or is not a duplicate ACK.
+	 */
+	if (thflags != TH_ACK || tlen != 0 ||
+	    th->th_seq != tw->rcv_nxt || th->th_ack != tw->snd_nxt)
+		tcp_twrespond(tw, TH_ACK);
+	goto drop;
+
+	/*
+	 * Generate a RST, dropping incoming segment.
+	 * Make ACK acceptable to originator of segment.
+	 * Don't bother to respond if destination was broadcast/multicast.
+	 */
+	if (m->m_flags & (M_BCAST|M_MCAST))
+		goto drop;
+	if (isipv6) {
+#ifdef INET6
+		struct ip6_hdr *ip6;
+
+		/* IPv6 anycast check is done at tcp6_input() */
+		ip6 = mtod(m, struct ip6_hdr *);
+		if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
+		    IN6_IS_ADDR_MULTICAST(&ip6->ip6_src))
+			goto drop;
+#endif
+	} else {
+		struct ip *ip;
+
+		ip = mtod(m, struct ip *);
+		if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
+		    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
+		    ip->ip_src.s_addr == htonl(INADDR_BROADCAST) ||
+		    in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))
+			goto drop;
+	}
+	if (thflags & TH_ACK) {
+		tcp_respond(NULL,
+		    mtod(m, void *), th, m, 0, th->th_ack, TH_RST);
+	} else {
+		seq = th->th_seq + (thflags & TH_SYN ? 1 : 0);
+		tcp_respond(NULL,
+		    mtod(m, void *), th, m, seq, 0, TH_RST|TH_ACK);
+	}
+	INP_UNLOCK(inp);
+	return (0);
+
+drop:
+	INP_UNLOCK(inp);
+	m_freem(m);
+	return (0);
+}
+
+void
+tcp_twclose(struct tcptw *tw, int reuse)
+{
+	struct socket *so;
+	struct inpcb *inp;
+
+	/*
+	 * At this point, we are in one of two situations:
+	 *
+	 * (1) We have no socket, just an inpcb<->twtcp pair.  We can free
+	 *     all state.
+	 *
+	 * (2) We have a socket -- if we own a reference, release it and
+	 *     notify the socket layer.
+	 */
+	inp = tw->tw_inpcb;
+	KASSERT((inp->inp_vflag & INP_TIMEWAIT), ("tcp_twclose: !timewait"));
+	KASSERT(intotw(inp) == tw, ("tcp_twclose: inp_ppcb != tw"));
+	INP_INFO_WLOCK_ASSERT(&tcbinfo);	/* tcp_tw_2msl_stop(). */
+	INP_LOCK_ASSERT(inp);
+
+	tw->tw_inpcb = NULL;
+	tcp_tw_2msl_stop(tw);
+	inp->inp_ppcb = NULL;
+	in_pcbdrop(inp);
+
+	so = inp->inp_socket;
+	if (so != NULL) {
+		/*
+		 * If there's a socket, handle two cases: first, we own a
+		 * strong reference, which we will now release, or we don't
+		 * in which case another reference exists (XXXRW: think
+		 * about this more), and we don't need to take action.
+		 */
+		if (inp->inp_vflag & INP_SOCKREF) {
+			inp->inp_vflag &= ~INP_SOCKREF;
+			INP_UNLOCK(inp);
+			ACCEPT_LOCK();
+			SOCK_LOCK(so);
+			KASSERT(so->so_state & SS_PROTOREF,
+			    ("tcp_twclose: INP_SOCKREF && !SS_PROTOREF"));
+			so->so_state &= ~SS_PROTOREF;
+			sofree(so);
+		} else {
+			/*
+			 * If we don't own the only reference, the socket and
+			 * inpcb need to be left around to be handled by
+			 * tcp_usr_detach() later.
+			 */
+			INP_UNLOCK(inp);
+		}
+	} else {
+#ifdef INET6
+		if (inp->inp_vflag & INP_IPV6PROTO)
+			in6_pcbfree(inp);
+		else
+#endif
+			in_pcbfree(inp);
+	}
+	tcpstat.tcps_closed++;
+	crfree(tw->tw_cred);
+	tw->tw_cred = NULL;
+	if (reuse)
+		return;
+	uma_zfree(tcptw_zone, tw);
+}
+
+int
+tcp_twrespond(struct tcptw *tw, int flags)
+{
+	struct inpcb *inp = tw->tw_inpcb;
+	struct tcphdr *th;
+	struct mbuf *m;
+	struct ip *ip = NULL;
+	u_int hdrlen, optlen;
+	int error;
+	struct tcpopt to;
+#ifdef INET6
+	struct ip6_hdr *ip6 = NULL;
+	int isipv6 = inp->inp_inc.inc_isipv6;
+#endif
+
+	INP_LOCK_ASSERT(inp);
+
+	m = m_gethdr(M_DONTWAIT, MT_DATA);
+	if (m == NULL)
+		return (ENOBUFS);
+	m->m_data += max_linkhdr;
+
+#ifdef MAC
+	mac_create_mbuf_from_inpcb(inp, m);
+#endif
+
+#ifdef INET6
+	if (isipv6) {
+		hdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
+		ip6 = mtod(m, struct ip6_hdr *);
+		th = (struct tcphdr *)(ip6 + 1);
+		tcpip_fillheaders(inp, ip6, th);
+	} else
+#endif
+	{
+		hdrlen = sizeof(struct tcpiphdr);
+		ip = mtod(m, struct ip *);
+		th = (struct tcphdr *)(ip + 1);
+		tcpip_fillheaders(inp, ip, th);
+	}
+	to.to_flags = 0;
+
+	/*
+	 * Send a timestamp and echo-reply if both our side and our peer
+	 * have sent timestamps in our SYN's and this is not a RST.
+	 */
+	if (tw->t_recent && flags == TH_ACK) {
+		to.to_flags |= TOF_TS;
+		to.to_tsval = ticks + tw->ts_offset;
+		to.to_tsecr = tw->t_recent;
+	}
+	optlen = tcp_addoptions(&to, (u_char *)(th + 1));
+
+	m->m_len = hdrlen + optlen;
+	m->m_pkthdr.len = m->m_len;
+
+	KASSERT(max_linkhdr + m->m_len <= MHLEN, ("tcptw: mbuf too small"));
+
+	th->th_seq = htonl(tw->snd_nxt);
+	th->th_ack = htonl(tw->rcv_nxt);
+	th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
+	th->th_flags = flags;
+	th->th_win = htons(tw->last_win);
+
+#ifdef INET6
+	if (isipv6) {
+		th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr),
+		    sizeof(struct tcphdr) + optlen);
+		ip6->ip6_hlim = in6_selecthlim(inp, NULL);
+		error = ip6_output(m, inp->in6p_outputopts, NULL,
+		    (tw->tw_so_options & SO_DONTROUTE), NULL, NULL, inp);
+	} else
+#endif
+	{
+		th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+		    htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP));
+		m->m_pkthdr.csum_flags = CSUM_TCP;
+		m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+		ip->ip_len = m->m_pkthdr.len;
+		if (path_mtu_discovery)
+			ip->ip_off |= IP_DF;
+		error = ip_output(m, inp->inp_options, NULL,
+		    ((tw->tw_so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0),
+		    NULL, inp);
+	}
+	if (flags & TH_ACK)
+		tcpstat.tcps_sndacks++;
+	else
+		tcpstat.tcps_sndctrl++;
+	tcpstat.tcps_sndtotal++;
+	return (error);
+}
+
+static void
+tcp_tw_2msl_reset(struct tcptw *tw, int rearm)
+{
+
+	INP_INFO_WLOCK_ASSERT(&tcbinfo);
+	INP_LOCK_ASSERT(tw->tw_inpcb);
+	if (rearm)
+		TAILQ_REMOVE(&twq_2msl, tw, tw_2msl);
+	tw->tw_time = ticks + 2 * tcp_msl;
+	TAILQ_INSERT_TAIL(&twq_2msl, tw, tw_2msl);
+}
+
+static void
+tcp_tw_2msl_stop(struct tcptw *tw)
+{
+
+	INP_INFO_WLOCK_ASSERT(&tcbinfo);
+	TAILQ_REMOVE(&twq_2msl, tw, tw_2msl);
+}
+
+struct tcptw *
+tcp_tw_2msl_scan(int reuse)
+{
+	struct tcptw *tw;
+
+	INP_INFO_WLOCK_ASSERT(&tcbinfo);
+	for (;;) {
+		tw = TAILQ_FIRST(&twq_2msl);
+		if (tw == NULL || (!reuse && tw->tw_time > ticks))
+			break;
+		INP_LOCK(tw->tw_inpcb);
+		tcp_twclose(tw, reuse);
+		if (reuse)
+			return (tw);
+	}
+	return (NULL);
+}
Index: ip_id.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_id.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet/ip_id.c -L sys/netinet/ip_id.c -u -r1.2 -r1.3
--- sys/netinet/ip_id.c
+++ sys/netinet/ip_id.c
@@ -1,17 +1,27 @@
+/* $OpenBSD: ip_id.c,v 1.2 1999/08/26 13:37:01 provos Exp $ */
 
 /*-
- * Copyright (c) 2008 Michael J. Silbersack.
+ * Copyright 1998 Niels Provos <provos at citi.umich.edu>
  * All rights reserved.
  *
+ * Theo de Raadt <deraadt at openbsd.org> came up with the idea of using
+ * such a mathematical system to generate more random (yet non-repeating)
+ * ids to solve the resolver/named problem.  But Niels designed the
+ * actual system based on the constraints.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
- *    notice unmodified, this list of conditions, and the following
- *    disclaimer.
+ *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Niels Provos.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
@@ -23,189 +33,169 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/netinet/ip_id.c,v 1.7 2005/01/07 01:45:44 imp Exp $
  */
 
-#include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
-
-/*
- * IP ID generation is a fascinating topic.
- *
- * In order to avoid ID collisions during packet reassembly, common sense
- * dictates that the period between reuse of IDs be as large as possible.
- * This leads to the classic implementation of a system-wide counter, thereby
- * ensuring that IDs repeat only once every 2^16 packets.
- *
- * Subsequent security researchers have pointed out that using a global
- * counter makes ID values predictable.  This predictability allows traffic
- * analysis, idle scanning, and even packet injection in specific cases.
- * These results suggest that IP IDs should be as random as possible.
- *
- * The "searchable queues" algorithm used in this IP ID implementation was
- * proposed by Amit Klein.  It is a compromise between the above two
- * viewpoints that has provable behavior that can be tuned to the user's
- * requirements.
- *
- * The basic concept is that we supplement a standard random number generator
- * with a queue of the last L IDs that we have handed out to ensure that all
- * IDs have a period of at least L.
- *
- * To efficiently implement this idea, we keep two data structures: a
- * circular array of IDs of size L and a bitstring of 65536 bits.
- *
- * To start, we ask the RNG for a new ID.  A quick index into the bitstring
- * is used to determine if this is a recently used value.  The process is
- * repeated until a value is returned that is not in the bitstring.
- *
- * Having found a usable ID, we remove the ID stored at the current position
- * in the queue from the bitstring and replace it with our new ID.  Our new
- * ID is then added to the bitstring and the queue pointer is incremented.
- *
- * The lower limit of 512 was chosen because there doesn't seem to be much
- * point to having a smaller value.  The upper limit of 32768 was chosen for
- * two reasons.  First, every step above 32768 decreases the entropy.  Taken
- * to an extreme, 65533 would offer 1 bit of entropy.  Second, the number of
- * attempts it takes the algorithm to find an unused ID drastically
- * increases, killing performance.  The default value of 8192 was chosen
- * because it provides a good tradeoff between randomness and non-repetition.
- *
- * With L=8192, the queue will use 16K of memory.  The bitstring always
- * uses 8K of memory.  No memory is allocated until the use of random ids is
- * enabled.
+/*-
+ * seed = random 15bit
+ * n = prime, g0 = generator to n,
+ * j = random so that gcd(j,n-1) == 1
+ * g = g0^j mod n will be a generator again.
+ *
+ * X[0] = random seed.
+ * X[n] = a*X[n-1]+b mod m is a Linear Congruential Generator
+ * with a = 7^(even random) mod m,
+ *      b = random with gcd(b,m) == 1
+ *      m = 31104 and a maximal period of m-1.
+ *
+ * The transaction id is determined by:
+ * id[n] = seed xor (g^X[n] mod n)
+ *
+ * Effectivly the id is restricted to the lower 15 bits, thus yielding two
+ * different cycles by toggling the msb on and off.  This avoids reuse issues
+ * caused by reseeding.
  */
 
-#include <sys/types.h>
-#include <sys/malloc.h>
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_id.c,v 1.9 2007/10/07 20:44:23 silby Exp $");
+
+#include "opt_pf.h"
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
-#include <sys/libkern.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
 #include <sys/random.h>
-#include <sys/systm.h>
-#include <sys/sysctl.h>
-#include <netinet/in.h>
-#include <netinet/ip_var.h>
-#include <sys/bitstring.h>
-
-static MALLOC_DEFINE(M_IPID, "ipid", "randomized ip id state");
-
-static u_int16_t 	*id_array = NULL;
-static bitstr_t		*id_bits = NULL;
-static int		 array_ptr = 0;
-static int		 array_size = 8192;
-static int		 random_id_collisions = 0;
-static int		 random_id_total = 0;
-static struct mtx	 ip_id_mtx;
-
-static void	ip_initid(void);
-static int	sysctl_ip_id_change(SYSCTL_HANDLER_ARGS);
-
-MTX_SYSINIT(ip_id_mtx, &ip_id_mtx, "ip_id_mtx", MTX_DEF);
-
-SYSCTL_DECL(_net_inet_ip);
-SYSCTL_PROC(_net_inet_ip, OID_AUTO, random_id_period, CTLTYPE_INT|CTLFLAG_RW,
-    &array_size, 0, sysctl_ip_id_change, "IU", "IP ID Array size");
-SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id_collisions, CTLFLAG_RD,
-    &random_id_collisions, 0, "Count of IP ID collisions");
-SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id_total, CTLFLAG_RD,
-    &random_id_total, 0, "Count of IP IDs created");
 
-static int
-sysctl_ip_id_change(SYSCTL_HANDLER_ARGS)
+#define	RU_OUT  180		/* Time after wich will be reseeded */
+#define	RU_MAX	30000		/* Uniq cycle, avoid blackjack prediction */
+#define	RU_GEN	2		/* Starting generator */
+#define	RU_N	32749		/* RU_N-1 = 2*2*3*2729 */
+#define	RU_AGEN	7		/* determine ru_a as RU_AGEN^(2*rand) */
+#define	RU_M	31104		/* RU_M = 2^7*3^5 - don't change */
+
+#define	PFAC_N 3
+const static u_int16_t pfacts[PFAC_N] = {
+	2,
+	3,
+	2729
+};
+
+static u_int16_t ru_x;
+static u_int16_t ru_seed, ru_seed2;
+static u_int16_t ru_a, ru_b;
+static u_int16_t ru_g;
+static u_int16_t ru_counter = 0;
+static u_int16_t ru_msb = 0;
+static long ru_reseed;
+static u_int32_t tmp;		/* Storage for unused random */
+
+static u_int16_t	pmod(u_int16_t, u_int16_t, u_int16_t);
+static void		ip_initid(void);
+u_int16_t		ip_randomid(void);
+
+/*
+ * Do a fast modular exponation, returned value will be in the range of 0 -
+ * (mod-1).
+ */
+
+static u_int16_t
+pmod(u_int16_t gen, u_int16_t exp, u_int16_t mod)
 {
-	int error, new;
+	u_int16_t s, t, u;
 
-	new = array_size;
-	error = sysctl_handle_int(oidp, &new, 0, req);
-	if (error == 0 && req->newptr) {
-		if (new >= 512 && new <= 32768) {
-			mtx_lock(&ip_id_mtx);
-			array_size = new;
-			ip_initid();
-			mtx_unlock(&ip_id_mtx);
-		} else
-			error = EINVAL;
- 	}
-	return (error);
+	s = 1;
+	t = gen;
+	u = exp;
+
+	while (u) {
+		if (u & 1)
+			s = (s*t) % mod;
+		u >>= 1;
+		t = (t*t) % mod;
+	}
+	return (s);
 }
 
 /*
- * ip_initid() runs with a mutex held and may execute in a network context.
- * As a result, it uses M_NOWAIT.  Ideally, we would always do this
- * allocation from the sysctl contact and have it be an invariant that if
- * this random ID allocation mode is selected, the buffers are present.  This
- * would also avoid potential network context failures of IP ID generation.
+ * Initalizes the seed and chooses a suitable generator.  Also toggles the
+ * msb flag.  The msb flag is used to generate two distinct cycles of random
+ * numbers and thus avoiding reuse of ids.
+ *
+ * This function is called from id_randomid() when needed, an application
+ * does not have to worry about it.
  */
 static void
 ip_initid(void)
 {
-	mtx_assert(&ip_id_mtx, MA_OWNED);
+	u_int16_t j, i;
+	int noprime = 1;
+	struct timeval time;
+
+	getmicrotime(&time);
+	read_random((void *) &tmp, sizeof(tmp));
+	ru_x = (tmp & 0xFFFF) % RU_M;
+
+	/* 15 bits of random seed. */
+	ru_seed = (tmp >> 16) & 0x7FFF;
+	read_random((void *) &tmp, sizeof(tmp));
+	ru_seed2 = tmp & 0x7FFF;
+
+	read_random((void *) &tmp, sizeof(tmp));
+
+	/* Determine the LCG we use. */
+	ru_b = (tmp & 0xfffe) | 1;
+	ru_a = pmod(RU_AGEN, (tmp >> 16) & 0xfffe, RU_M);
+	while (ru_b % 3 == 0)
+	  ru_b += 2;
+
+	read_random((void *) &tmp, sizeof(tmp));
+	j = tmp % RU_N;
+	tmp = tmp >> 16;
 
-	if (id_array != NULL) {
-		free(id_array, M_IPID);
-		free(id_bits, M_IPID);
-	}
-	random_id_collisions = 0;
-	random_id_total = 0;
-	array_ptr = 0;
-	id_array = (u_int16_t *) malloc(array_size * sizeof(u_int16_t),
-	    M_IPID, M_NOWAIT | M_ZERO);
-	id_bits = (bitstr_t *) malloc(bitstr_size(65536), M_IPID,
-	    M_NOWAIT | M_ZERO);
-	if (id_array == NULL || id_bits == NULL) {
-		/* Neither or both. */
-		if (id_array != NULL) {
-			free(id_array, M_IPID);
-			id_array = NULL;
-		}
-		if (id_bits != NULL) {
-			free(id_bits, M_IPID);
-			id_bits = NULL;
-		}
+	/*
+	 * Do a fast gcd(j,RU_N-1), so we can find a j with gcd(j, RU_N-1) ==
+	 * 1, giving a new generator for RU_GEN^j mod RU_N.
+	 */
+	while (noprime) {
+		for (i=0; i<PFAC_N; i++)
+			if (j%pfacts[i] == 0)
+				break;
+
+		if (i>=PFAC_N)
+			noprime = 0;
+		else
+			j = (j+1) % RU_N;
 	}
+
+	ru_g = pmod(RU_GEN,j,RU_N);
+	ru_counter = 0;
+
+	ru_reseed = time.tv_sec + RU_OUT;
+	ru_msb = ru_msb == 0x8000 ? 0 : 0x8000;
 }
 
 u_int16_t
 ip_randomid(void)
 {
-	u_int16_t new_id;
+	int i, n;
+	struct timeval time;
 
-	mtx_lock(&ip_id_mtx);
-	if (id_array == NULL)
- 		ip_initid();
+	/* XXX not reentrant */
+	getmicrotime(&time);
+	if (ru_counter >= RU_MAX || time.tv_sec > ru_reseed)
+		ip_initid();
 
-	/*
-	 * Fail gracefully; return a fixed id if memory allocation failed;
-	 * ideally we wouldn't do allocation in this context in order to
-	 * avoid the possibility of this failure mode.
-	 */
-	if (id_array == NULL) {
-		mtx_unlock(&ip_id_mtx);
-		return (1);
-	}
+	if (!tmp)
+		read_random((void *) &tmp, sizeof(tmp));
 
-	/*
-	 * To avoid a conflict with the zeros that the array is initially
-	 * filled with, we never hand out an id of zero.
-	 */
-	new_id = 0;
-	do {
-		if (new_id != 0)
-			random_id_collisions++;
-		arc4rand(&new_id, sizeof(new_id), 0);
-	} while (bit_test(id_bits, new_id) || new_id == 0);
-	bit_clear(id_bits, id_array[array_ptr]);
-	bit_set(id_bits, new_id);
-	id_array[array_ptr] = new_id;
-	array_ptr++;
-	if (array_ptr == array_size)
-		array_ptr = 0;
-	random_id_total++;
-	mtx_unlock(&ip_id_mtx);
-	return (new_id);
-}
+	/* Skip a random number of ids. */
+	n = tmp & 0x3; tmp = tmp >> 2;
+	if (ru_counter + n >= RU_MAX)
+		ip_initid();
 
+	for (i = 0; i <= n; i++)
+		/* Linear Congruential Generator. */
+		ru_x = (ru_a*ru_x + ru_b) % RU_M;
+
+	ru_counter += i;
+
+	return (ru_seed ^ pmod(ru_g,ru_seed2 ^ ru_x,RU_N)) | ru_msb;
+}
Index: icmp_var.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/icmp_var.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/icmp_var.h -L sys/netinet/icmp_var.h -u -r1.1.1.1 -r1.2
--- sys/netinet/icmp_var.h
+++ sys/netinet/icmp_var.h
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)icmp_var.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/icmp_var.h,v 1.25 2005/01/07 01:45:44 imp Exp $
+ * $FreeBSD: src/sys/netinet/icmp_var.h,v 1.26 2007/07/19 22:34:24 rwatson Exp $
  */
 
 #ifndef _NETINET_ICMP_VAR_H_
@@ -82,7 +82,8 @@
 #define BANDLIM_ICMP_TSTAMP 2
 #define BANDLIM_RST_CLOSEDPORT 3 /* No connection, and no listeners */
 #define BANDLIM_RST_OPENPORT 4   /* No connection, listener */
-#define BANDLIM_MAX 4
+#define BANDLIM_ICMP6_UNREACH 5
+#define BANDLIM_MAX 5
 #endif
 
 #endif
--- /dev/null
+++ sys/netinet/sctp_header.h
@@ -0,0 +1,584 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_header.h,v 1.14 2005/03/06 16:04:17 itojun Exp $	 */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_header.h,v 1.6 2007/08/24 00:53:51 rrs Exp $");
+
+#ifndef __sctp_header_h__
+#define __sctp_header_h__
+
+#include <sys/time.h>
+#include <netinet/sctp.h>
+#include <netinet/sctp_constants.h>
+
+
+/*
+ * Parameter structures
+ */
+struct sctp_ipv4addr_param {
+	struct sctp_paramhdr ph;/* type=SCTP_IPV4_PARAM_TYPE, len=8 */
+	uint32_t addr;		/* IPV4 address */
+}                   SCTP_PACKED;
+
+#define SCTP_V6_ADDR_BYTES 16
+
+
+struct sctp_ipv6addr_param {
+	struct sctp_paramhdr ph;/* type=SCTP_IPV6_PARAM_TYPE, len=20 */
+	uint8_t addr[SCTP_V6_ADDR_BYTES];	/* IPV6 address */
+}                   SCTP_PACKED;
+
+/* Cookie Preservative */
+struct sctp_cookie_perserve_param {
+	struct sctp_paramhdr ph;/* type=SCTP_COOKIE_PRESERVE, len=8 */
+	uint32_t time;		/* time in ms to extend cookie */
+};
+
+#define SCTP_ARRAY_MIN_LEN 1
+/* Host Name Address */
+struct sctp_host_name_param {
+	struct sctp_paramhdr ph;/* type=SCTP_HOSTNAME_ADDRESS */
+	char name[SCTP_ARRAY_MIN_LEN];	/* host name */
+}                    SCTP_PACKED;
+
+/*
+ * This is the maximum padded size of a s-a-p
+ * so paramheadr + 3 address types (6 bytes) + 2 byte pad = 12
+ */
+#define SCTP_MAX_ADDR_PARAMS_SIZE 12
+/* supported address type */
+struct sctp_supported_addr_param {
+	struct sctp_paramhdr ph;/* type=SCTP_SUPPORTED_ADDRTYPE */
+	uint16_t addr_type[SCTP_ARRAY_MIN_LEN];	/* array of supported address
+						 * types */
+}                         SCTP_PACKED;
+
+/* ECN parameter */
+struct sctp_ecn_supported_param {
+	struct sctp_paramhdr ph;/* type=SCTP_ECN_CAPABLE */
+}                        SCTP_PACKED;
+
+
+/* heartbeat info parameter */
+struct sctp_heartbeat_info_param {
+	struct sctp_paramhdr ph;
+	uint32_t time_value_1;
+	uint32_t time_value_2;
+	uint32_t random_value1;
+	uint32_t random_value2;
+	uint16_t user_req;
+	uint8_t addr_family;
+	uint8_t addr_len;
+	char address[SCTP_ADDRMAX];
+}                         SCTP_PACKED;
+
+
+/* draft-ietf-tsvwg-prsctp */
+/* PR-SCTP supported parameter */
+struct sctp_prsctp_supported_param {
+	struct sctp_paramhdr ph;
+}                           SCTP_PACKED;
+
+
+/* draft-ietf-tsvwg-addip-sctp */
+struct sctp_asconf_paramhdr {	/* an ASCONF "parameter" */
+	struct sctp_paramhdr ph;/* a SCTP parameter header */
+	uint32_t correlation_id;/* correlation id for this param */
+}                    SCTP_PACKED;
+
+struct sctp_asconf_addr_param {	/* an ASCONF address parameter */
+	struct sctp_asconf_paramhdr aph;	/* asconf "parameter" */
+	struct sctp_ipv6addr_param addrp;	/* max storage size */
+}                      SCTP_PACKED;
+
+struct sctp_asconf_addrv4_param {	/* an ASCONF address (v4) parameter */
+	struct sctp_asconf_paramhdr aph;	/* asconf "parameter" */
+	struct sctp_ipv4addr_param addrp;	/* max storage size */
+}                        SCTP_PACKED;
+
+#define SCTP_MAX_SUPPORTED_EXT 256
+
+struct sctp_supported_chunk_types_param {
+	struct sctp_paramhdr ph;/* type = 0x8008  len = x */
+	uint8_t chunk_types[0];
+}                                SCTP_PACKED;
+
+
+/* ECN Nonce: draft-ladha-sctp-ecn-nonce */
+struct sctp_ecn_nonce_supported_param {
+	struct sctp_paramhdr ph;/* type = 0x8001  len = 4 */
+}                              SCTP_PACKED;
+
+
+/*
+ * Structures for DATA chunks
+ */
+struct sctp_data {
+	uint32_t tsn;
+	uint16_t stream_id;
+	uint16_t stream_sequence;
+	uint32_t protocol_id;
+	/* user data follows */
+}         SCTP_PACKED;
+
+struct sctp_data_chunk {
+	struct sctp_chunkhdr ch;
+	struct sctp_data dp;
+}               SCTP_PACKED;
+
+/*
+ * Structures for the control chunks
+ */
+
+/* Initiate (INIT)/Initiate Ack (INIT ACK) */
+struct sctp_init {
+	uint32_t initiate_tag;	/* initiate tag */
+	uint32_t a_rwnd;	/* a_rwnd */
+	uint16_t num_outbound_streams;	/* OS */
+	uint16_t num_inbound_streams;	/* MIS */
+	uint32_t initial_tsn;	/* I-TSN */
+	/* optional param's follow */
+}         SCTP_PACKED;
+
+#define SCTP_IDENTIFICATION_SIZE 16
+#define SCTP_ADDRESS_SIZE 4
+#define SCTP_RESERVE_SPACE 6
+/* state cookie header */
+struct sctp_state_cookie {	/* this is our definition... */
+	uint8_t identification[SCTP_IDENTIFICATION_SIZE];	/* id of who we are */
+	struct timeval time_entered;	/* the time I built cookie */
+	uint32_t cookie_life;	/* life I will award this cookie */
+	uint32_t tie_tag_my_vtag;	/* my tag in old association */
+
+	uint32_t tie_tag_peer_vtag;	/* peers tag in old association */
+	uint32_t peers_vtag;	/* peers tag in INIT (for quick ref) */
+
+	uint32_t my_vtag;	/* my tag in INIT-ACK (for quick ref) */
+	uint32_t address[SCTP_ADDRESS_SIZE];	/* 4 ints/128 bits */
+	uint32_t addr_type;	/* address type */
+	uint32_t laddress[SCTP_ADDRESS_SIZE];	/* my local from address */
+	uint32_t laddr_type;	/* my local from address type */
+	uint32_t scope_id;	/* v6 scope id for link-locals */
+
+	uint16_t peerport;	/* port address of the peer in the INIT */
+	uint16_t myport;	/* my port address used in the INIT */
+	uint8_t ipv4_addr_legal;/* Are V4 addr legal? */
+	uint8_t ipv6_addr_legal;/* Are V6 addr legal? */
+	uint8_t local_scope;	/* IPv6 local scope flag */
+	uint8_t site_scope;	/* IPv6 site scope flag */
+
+	uint8_t ipv4_scope;	/* IPv4 private addr scope */
+	uint8_t loopback_scope;	/* loopback scope information */
+	uint8_t reserved[SCTP_RESERVE_SPACE];	/* Align to 64 bits */
+	/*
+	 * at the end is tacked on the INIT chunk and the INIT-ACK chunk
+	 * (minus the cookie).
+	 */
+}                 SCTP_PACKED;
+
+struct sctp_inv_mandatory_param {
+	uint16_t cause;
+	uint16_t length;
+	uint32_t num_param;
+	uint16_t param;
+	/*
+	 * We include this to 0 it since only a missing cookie will cause
+	 * this error.
+	 */
+	uint16_t resv;
+}                        SCTP_PACKED;
+
+struct sctp_unresolv_addr {
+	uint16_t cause;
+	uint16_t length;
+	uint16_t addr_type;
+	uint16_t reserved;	/* Only one invalid addr type */
+}                  SCTP_PACKED;
+
+/* state cookie parameter */
+struct sctp_state_cookie_param {
+	struct sctp_paramhdr ph;
+	struct sctp_state_cookie cookie;
+}                       SCTP_PACKED;
+
+struct sctp_init_chunk {
+	struct sctp_chunkhdr ch;
+	struct sctp_init init;
+}               SCTP_PACKED;
+
+struct sctp_init_msg {
+	struct sctphdr sh;
+	struct sctp_init_chunk msg;
+}             SCTP_PACKED;
+
+/* ... used for both INIT and INIT ACK */
+#define sctp_init_ack		sctp_init
+#define sctp_init_ack_chunk	sctp_init_chunk
+#define sctp_init_ack_msg	sctp_init_msg
+
+
+/* Selective Ack (SACK) */
+struct sctp_gap_ack_block {
+	uint16_t start;		/* Gap Ack block start */
+	uint16_t end;		/* Gap Ack block end */
+}                  SCTP_PACKED;
+
+struct sctp_sack {
+	uint32_t cum_tsn_ack;	/* cumulative TSN Ack */
+	uint32_t a_rwnd;	/* updated a_rwnd of sender */
+	uint16_t num_gap_ack_blks;	/* number of Gap Ack blocks */
+	uint16_t num_dup_tsns;	/* number of duplicate TSNs */
+	/* struct sctp_gap_ack_block's follow */
+	/* uint32_t duplicate_tsn's follow */
+}         SCTP_PACKED;
+
+struct sctp_sack_chunk {
+	struct sctp_chunkhdr ch;
+	struct sctp_sack sack;
+}               SCTP_PACKED;
+
+
+/* Heartbeat Request (HEARTBEAT) */
+struct sctp_heartbeat {
+	struct sctp_heartbeat_info_param hb_info;
+}              SCTP_PACKED;
+
+struct sctp_heartbeat_chunk {
+	struct sctp_chunkhdr ch;
+	struct sctp_heartbeat heartbeat;
+}                    SCTP_PACKED;
+
+/* ... used for Heartbeat Ack (HEARTBEAT ACK) */
+#define sctp_heartbeat_ack		sctp_heartbeat
+#define sctp_heartbeat_ack_chunk	sctp_heartbeat_chunk
+
+
+/* Abort Asssociation (ABORT) */
+struct sctp_abort_chunk {
+	struct sctp_chunkhdr ch;
+	/* optional error cause may follow */
+}                SCTP_PACKED;
+
+struct sctp_abort_msg {
+	struct sctphdr sh;
+	struct sctp_abort_chunk msg;
+}              SCTP_PACKED;
+
+
+/* Shutdown Association (SHUTDOWN) */
+struct sctp_shutdown_chunk {
+	struct sctp_chunkhdr ch;
+	uint32_t cumulative_tsn_ack;
+}                   SCTP_PACKED;
+
+
+/* Shutdown Acknowledgment (SHUTDOWN ACK) */
+struct sctp_shutdown_ack_chunk {
+	struct sctp_chunkhdr ch;
+}                       SCTP_PACKED;
+
+
+/* Operation Error (ERROR) */
+struct sctp_error_chunk {
+	struct sctp_chunkhdr ch;
+	/* optional error causes follow */
+}                SCTP_PACKED;
+
+
+/* Cookie Echo (COOKIE ECHO) */
+struct sctp_cookie_echo_chunk {
+	struct sctp_chunkhdr ch;
+	struct sctp_state_cookie cookie;
+}                      SCTP_PACKED;
+
+/* Cookie Acknowledgment (COOKIE ACK) */
+struct sctp_cookie_ack_chunk {
+	struct sctp_chunkhdr ch;
+}                     SCTP_PACKED;
+
+/* Explicit Congestion Notification Echo (ECNE) */
+struct sctp_ecne_chunk {
+	struct sctp_chunkhdr ch;
+	uint32_t tsn;
+}               SCTP_PACKED;
+
+/* Congestion Window Reduced (CWR) */
+struct sctp_cwr_chunk {
+	struct sctp_chunkhdr ch;
+	uint32_t tsn;
+}              SCTP_PACKED;
+
+/* Shutdown Complete (SHUTDOWN COMPLETE) */
+struct sctp_shutdown_complete_chunk {
+	struct sctp_chunkhdr ch;
+}                            SCTP_PACKED;
+
+/* Oper error holding a stale cookie */
+struct sctp_stale_cookie_msg {
+	struct sctp_paramhdr ph;/* really an error cause */
+	uint32_t time_usec;
+}                     SCTP_PACKED;
+
+struct sctp_adaptation_layer_indication {
+	struct sctp_paramhdr ph;
+	uint32_t indication;
+}                                SCTP_PACKED;
+
+struct sctp_cookie_while_shutting_down {
+	struct sctphdr sh;
+	struct sctp_chunkhdr ch;
+	struct sctp_paramhdr ph;/* really an error cause */
+}                               SCTP_PACKED;
+
+struct sctp_shutdown_complete_msg {
+	struct sctphdr sh;
+	struct sctp_shutdown_complete_chunk shut_cmp;
+}                          SCTP_PACKED;
+
+/*
+ * draft-ietf-tsvwg-addip-sctp
+ */
+/* Address/Stream Configuration Change (ASCONF) */
+struct sctp_asconf_chunk {
+	struct sctp_chunkhdr ch;
+	uint32_t serial_number;
+	/* lookup address parameter (mandatory) */
+	/* asconf parameters follow */
+}                 SCTP_PACKED;
+
+/* Address/Stream Configuration Acknowledge (ASCONF ACK) */
+struct sctp_asconf_ack_chunk {
+	struct sctp_chunkhdr ch;
+	uint32_t serial_number;
+	/* asconf parameters follow */
+}                     SCTP_PACKED;
+
+/* draft-ietf-tsvwg-prsctp */
+/* Forward Cumulative TSN (FORWARD TSN) */
+struct sctp_forward_tsn_chunk {
+	struct sctp_chunkhdr ch;
+	uint32_t new_cumulative_tsn;
+	/* stream/sequence pairs (sctp_strseq) follow */
+}                      SCTP_PACKED;
+
+struct sctp_strseq {
+	uint16_t stream;
+	uint16_t sequence;
+}           SCTP_PACKED;
+
+struct sctp_forward_tsn_msg {
+	struct sctphdr sh;
+	struct sctp_forward_tsn_chunk msg;
+}                    SCTP_PACKED;
+
+/* should be a multiple of 4 - 1 aka 3/7/11 etc. */
+
+#define SCTP_NUM_DB_TO_VERIFY 31
+
+struct sctp_chunk_desc {
+	uint8_t chunk_type;
+	uint8_t data_bytes[SCTP_NUM_DB_TO_VERIFY];
+	uint32_t tsn_ifany;
+}               SCTP_PACKED;
+
+
+struct sctp_pktdrop_chunk {
+	struct sctp_chunkhdr ch;
+	uint32_t bottle_bw;
+	uint32_t current_onq;
+	uint16_t trunc_len;
+	uint16_t reserved;
+	uint8_t data[0];
+}                  SCTP_PACKED;
+
+/**********STREAM RESET STUFF ******************/
+
+struct sctp_stream_reset_out_request {
+	struct sctp_paramhdr ph;
+	uint32_t request_seq;	/* monotonically increasing seq no */
+	uint32_t response_seq;	/* if a response, the resp seq no */
+	uint32_t send_reset_at_tsn;	/* last TSN I assigned outbound */
+	uint16_t list_of_streams[0];	/* if not all list of streams */
+}                             SCTP_PACKED;
+
+struct sctp_stream_reset_in_request {
+	struct sctp_paramhdr ph;
+	uint32_t request_seq;
+	uint16_t list_of_streams[0];	/* if not all list of streams */
+}                            SCTP_PACKED;
+
+
+struct sctp_stream_reset_tsn_request {
+	struct sctp_paramhdr ph;
+	uint32_t request_seq;
+}                             SCTP_PACKED;
+
+struct sctp_stream_reset_response {
+	struct sctp_paramhdr ph;
+	uint32_t response_seq;	/* if a response, the resp seq no */
+	uint32_t result;
+}                          SCTP_PACKED;
+
+struct sctp_stream_reset_response_tsn {
+	struct sctp_paramhdr ph;
+	uint32_t response_seq;	/* if a response, the resp seq no */
+	uint32_t result;
+	uint32_t senders_next_tsn;
+	uint32_t receivers_next_tsn;
+}                              SCTP_PACKED;
+
+
+
+#define SCTP_STREAM_RESET_NOTHING   0x00000000	/* Nothing for me to do */
+#define SCTP_STREAM_RESET_PERFORMED 0x00000001	/* Did it */
+#define SCTP_STREAM_RESET_DENIED    0x00000002	/* refused to do it */
+#define SCTP_STREAM_RESET_ERROR_STR 0x00000003	/* bad Stream no */
+#define SCTP_STREAM_RESET_TRY_LATER 0x00000004	/* collision, try again */
+#define SCTP_STREAM_RESET_BAD_SEQNO 0x00000005	/* bad str-reset seq no */
+
+/*
+ * convience structures, note that if you are making a request for specific
+ * streams then the request will need to be an overlay structure.
+ */
+
+struct sctp_stream_reset_out_req {
+	struct sctp_chunkhdr ch;
+	struct sctp_stream_reset_out_request sr_req;
+}                         SCTP_PACKED;
+
+struct sctp_stream_reset_in_req {
+	struct sctp_chunkhdr ch;
+	struct sctp_stream_reset_in_request sr_req;
+}                        SCTP_PACKED;
+
+struct sctp_stream_reset_tsn_req {
+	struct sctp_chunkhdr ch;
+	struct sctp_stream_reset_tsn_request sr_req;
+}                         SCTP_PACKED;
+
+struct sctp_stream_reset_resp {
+	struct sctp_chunkhdr ch;
+	struct sctp_stream_reset_response sr_resp;
+}                      SCTP_PACKED;
+
+/* respone only valid with a TSN request */
+struct sctp_stream_reset_resp_tsn {
+	struct sctp_chunkhdr ch;
+	struct sctp_stream_reset_response_tsn sr_resp;
+}                          SCTP_PACKED;
+
+/****************************************************/
+
+/*
+ * Authenticated chunks support draft-ietf-tsvwg-sctp-auth
+ */
+
+/* Should we make the max be 32? */
+#define SCTP_RANDOM_MAX_SIZE 256
+struct sctp_auth_random {
+	struct sctp_paramhdr ph;/* type = 0x8002 */
+	uint8_t random_data[0];
+}                SCTP_PACKED;
+
+struct sctp_auth_chunk_list {
+	struct sctp_paramhdr ph;/* type = 0x8003 */
+	uint8_t chunk_types[0];
+}                    SCTP_PACKED;
+
+struct sctp_auth_hmac_algo {
+	struct sctp_paramhdr ph;/* type = 0x8004 */
+	uint16_t hmac_ids[0];
+}                   SCTP_PACKED;
+
+struct sctp_auth_chunk {
+	struct sctp_chunkhdr ch;
+	uint16_t shared_key_id;
+	uint16_t hmac_id;
+	uint8_t hmac[0];
+}               SCTP_PACKED;
+
+struct sctp_auth_invalid_hmac {
+	struct sctp_paramhdr ph;
+	uint16_t hmac_id;
+	uint16_t padding;
+}                      SCTP_PACKED;
+
+/*
+ * we pre-reserve enough room for a ECNE or CWR AND a SACK with no missing
+ * pieces. If ENCE is missing we could have a couple of blocks. This way we
+ * optimize so we MOST likely can bundle a SACK/ECN with the smallest size
+ * data chunk I will split into. We could increase throughput slightly by
+ * taking out these two but the  24-sack/8-CWR i.e. 32 bytes I pre-reserve I
+ * feel is worth it for now.
+ */
+#ifndef SCTP_MAX_OVERHEAD
+#ifdef INET6
+#define SCTP_MAX_OVERHEAD (sizeof(struct sctp_data_chunk) + \
+			   sizeof(struct sctphdr) + \
+			   sizeof(struct sctp_ecne_chunk) + \
+			   sizeof(struct sctp_sack_chunk) + \
+			   sizeof(struct ip6_hdr))
+
+#define SCTP_MED_OVERHEAD (sizeof(struct sctp_data_chunk) + \
+			   sizeof(struct sctphdr) + \
+			   sizeof(struct ip6_hdr))
+
+
+#define SCTP_MIN_OVERHEAD (sizeof(struct ip6_hdr) + \
+			   sizeof(struct sctphdr))
+
+#else
+#define SCTP_MAX_OVERHEAD (sizeof(struct sctp_data_chunk) + \
+			   sizeof(struct sctphdr) + \
+			   sizeof(struct sctp_ecne_chunk) + \
+			   sizeof(struct sctp_sack_chunk) + \
+			   sizeof(struct ip))
+
+#define SCTP_MED_OVERHEAD (sizeof(struct sctp_data_chunk) + \
+			   sizeof(struct sctphdr) + \
+			   sizeof(struct ip))
+
+
+#define SCTP_MIN_OVERHEAD (sizeof(struct ip) + \
+			   sizeof(struct sctphdr))
+
+#endif				/* INET6 */
+#endif				/* !SCTP_MAX_OVERHEAD */
+
+#define SCTP_MED_V4_OVERHEAD (sizeof(struct sctp_data_chunk) + \
+			      sizeof(struct sctphdr) + \
+			      sizeof(struct ip))
+
+#define SCTP_MIN_V4_OVERHEAD (sizeof(struct ip) + \
+			      sizeof(struct sctphdr))
+
+#endif				/* !__sctp_header_h__ */
Index: tcp_debug.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_debug.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/tcp_debug.h -L sys/netinet/tcp_debug.h -u -r1.1.1.1 -r1.2
--- sys/netinet/tcp_debug.h
+++ sys/netinet/tcp_debug.h
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp_debug.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/tcp_debug.h,v 1.15 2005/01/07 01:45:45 imp Exp $
+ * $FreeBSD: src/sys/netinet/tcp_debug.h,v 1.16 2007/03/24 22:15:02 maxim Exp $
  */
 
 #ifndef _NETINET_TCP_DEBUG_H_
@@ -45,10 +45,11 @@
 	 */
 	struct	tcpiphdr td_ti;
 	struct {
+#define	IP6_HDR_LEN	40	/* sizeof(struct ip6_hdr) */
 #if !defined(_KERNEL) && defined(INET6)
 		struct	ip6_hdr ip6;
 #else
-		u_char	ip6buf[40]; /* sizeof(struct ip6_hdr) */
+		u_char	ip6buf[IP6_HDR_LEN];
 #endif
 		struct	tcphdr th;
 	} td_ti6;
Index: ip_gre.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_gre.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/ip_gre.c -L sys/netinet/ip_gre.c -u -r1.1.1.2 -r1.2
--- sys/netinet/ip_gre.c
+++ sys/netinet/ip_gre.c
@@ -1,5 +1,4 @@
 /*	$NetBSD: ip_gre.c,v 1.29 2003/09/05 23:02:43 itojun Exp $ */
-/*	 $FreeBSD: src/sys/netinet/ip_gre.c,v 1.19.2.2 2006/01/27 21:50:10 bz Exp $ */
 
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
@@ -45,6 +44,9 @@
  * This currently handles IPPROTO_GRE, IPPROTO_MOBILE
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_gre.c,v 1.25 2007/10/07 20:44:23 silby Exp $");
+
 #include "opt_inet.h"
 #include "opt_atalk.h"
 #include "opt_inet6.h"
@@ -95,7 +97,7 @@
 
 static struct gre_softc *gre_lookup(struct mbuf *, u_int8_t);
 
-static int	gre_input2(struct mbuf *, int, u_char);
+static struct mbuf *gre_input2(struct mbuf *, int, u_char);
 
 /*
  * De-encapsulate a packet and feed it back through ip input (this
@@ -106,29 +108,27 @@
 void
 gre_input(struct mbuf *m, int off)
 {
-	int ret, proto;
+	int proto;
 
 	proto = (mtod(m, struct ip *))->ip_p;
 
-	ret = gre_input2(m, off, proto);
+	m = gre_input2(m, off, proto);
+
 	/*
-	 * ret == 0 : packet not processed, meaning that
-	 * no matching tunnel that is up is found.
-	 * we inject it to raw ip socket to see if anyone picks it up.
+	 * If no matching tunnel that is up is found. We inject
+	 * the mbuf to raw ip socket to see if anyone picks it up.
 	 */
-	if (ret == 0)
+	if (m != NULL)
 		rip_input(m, off);
 }
 
 /*
- * decapsulate.
- * Does the real work and is called from gre_input() (above)
- * returns 0 if packet is not yet processed
- * and 1 if it needs no further processing
- * proto is the protocol number of the "calling" foo_input()
- * routine.
+ * Decapsulate. Does the real work and is called from gre_input()
+ * (above). Returns an mbuf back if packet is not yet processed,
+ * and NULL if it needs no further processing. proto is the protocol
+ * number of the "calling" foo_input() routine.
  */
-static int
+static struct mbuf *
 gre_input2(struct mbuf *m ,int hlen, u_char proto)
 {
 	struct greip *gip;
@@ -139,13 +139,13 @@
 
 	if ((sc = gre_lookup(m, proto)) == NULL) {
 		/* No matching tunnel or tunnel is down. */
-		return (0);
+		return (m);
 	}
 
 	if (m->m_len < sizeof(*gip)) {
 		m = m_pullup(m, sizeof(*gip));
 		if (m == NULL)
-			return (ENOBUFS);
+			return (NULL);
 	}
 	gip = mtod(m, struct greip *);
 
@@ -164,7 +164,7 @@
 			hlen += 4;
 		/* We don't support routing fields (variable length) */
 		if (flags & GRE_RP)
-			return (0);
+			return (m);
 		if (flags & GRE_KP)
 			hlen += 4;
 		if (flags & GRE_SP)
@@ -191,23 +191,24 @@
 			af = AF_APPLETALK;
 			break;
 #endif
-		default:	   /* others not yet supported */
-			return (0);
+		default:
+			/* Others not yet supported. */
+			return (m);
 		}
 		break;
 	default:
-		/* others not yet supported */
-		return (0);
+		/* Others not yet supported. */
+		return (m);
 	}
 
 	if (hlen > m->m_pkthdr.len) {
 		m_freem(m);
-		return (EINVAL);
+		return (NULL);
 	}
 	/* Unlike NetBSD, in FreeBSD m_adj() adjusts m->m_pkthdr.len as well */
 	m_adj(m, hlen);
 
-	if (GRE2IFP(sc)->if_bpf) {
+	if (bpf_peers_present(GRE2IFP(sc)->if_bpf)) {
 		bpf_mtap2(GRE2IFP(sc)->if_bpf, &af, sizeof(af), m);
 	}
 
@@ -215,7 +216,8 @@
 
 	netisr_dispatch(isr, m);
 
-	return (1);	/* packet is done, no further processing needed */
+	/* Packet is done, no further processing needed. */
+	return (NULL);
 }
 
 /*
@@ -289,7 +291,7 @@
 	ip->ip_sum = 0;
 	ip->ip_sum = in_cksum(m, (ip->ip_hl << 2));
 
-	if (GRE2IFP(sc)->if_bpf) {
+	if (bpf_peers_present(GRE2IFP(sc)->if_bpf)) {
 		u_int32_t af = AF_INET;
 		bpf_mtap2(GRE2IFP(sc)->if_bpf, &af, sizeof(af), m);
 	}
@@ -310,9 +312,7 @@
  * in_gre.c during destroy.
  */
 static struct gre_softc *
-gre_lookup(m, proto)
-	struct mbuf *m;
-	u_int8_t proto;
+gre_lookup(struct mbuf *m, u_int8_t proto)
 {
 	struct ip *ip = mtod(m, struct ip *);
 	struct gre_softc *sc;
Index: ip6.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip6.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/ip6.h -L sys/netinet/ip6.h -u -r1.1.1.1 -r1.2
--- sys/netinet/ip6.h
+++ sys/netinet/ip6.h
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet/ip6.h,v 1.13.2.2 2005/10/09 05:50:43 ume Exp $	*/
+/*	$FreeBSD: src/sys/netinet/ip6.h,v 1.15 2005/07/20 10:30:52 ume Exp $	*/
 /*	$KAME: ip6.h,v 1.18 2001/03/29 05:34:30 itojun Exp $	*/
 
 /*-
Index: in_rmx.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/in_rmx.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/in_rmx.c -L sys/netinet/in_rmx.c -u -r1.1.1.1 -r1.2
--- sys/netinet/in_rmx.c
+++ sys/netinet/in_rmx.c
@@ -25,8 +25,6 @@
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/netinet/in_rmx.c,v 1.53 2005/01/07 01:45:44 imp Exp $
  */
 
 /*
@@ -42,6 +40,9 @@
  *     indefinitely.  See in_rtqtimo() below for the exact mechanism.
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/in_rmx.c,v 1.57 2007/10/07 20:44:22 silby Exp $");
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
@@ -66,7 +67,7 @@
  */
 static struct radix_node *
 in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
-	    struct radix_node *treenodes)
+    struct radix_node *treenodes)
 {
 	struct rtentry *rt = (struct rtentry *)treenodes;
 	struct sockaddr_in *sin = (struct sockaddr_in *)rt_key(rt);
@@ -190,7 +191,7 @@
 	 */
 	if (rtq_reallyold != 0) {
 		rt->rt_flags |= RTPRF_OURS;
-		rt->rt_rmx.rmx_expire = time_second + rtq_reallyold;
+		rt->rt_rmx.rmx_expire = time_uptime + rtq_reallyold;
 	} else {
 		rtexpunge(rt);
 	}
@@ -220,7 +221,7 @@
 	if (rt->rt_flags & RTPRF_OURS) {
 		ap->found++;
 
-		if (ap->draining || rt->rt_rmx.rmx_expire <= time_second) {
+		if (ap->draining || rt->rt_rmx.rmx_expire <= time_uptime) {
 			if (rt->rt_refcnt > 0)
 				panic("rtqkill route really not free");
 
@@ -235,10 +236,10 @@
 			}
 		} else {
 			if (ap->updating &&
-			    (rt->rt_rmx.rmx_expire - time_second >
+			    (rt->rt_rmx.rmx_expire - time_uptime >
 			     rtq_reallyold)) {
 				rt->rt_rmx.rmx_expire =
-				    time_second + rtq_reallyold;
+				    time_uptime + rtq_reallyold;
 			}
 			ap->nextstop = lmin(ap->nextstop,
 					    rt->rt_rmx.rmx_expire);
@@ -262,7 +263,7 @@
 
 	arg.found = arg.killed = 0;
 	arg.rnh = rnh;
-	arg.nextstop = time_second + rtq_timeout;
+	arg.nextstop = time_uptime + rtq_timeout;
 	arg.draining = arg.updating = 0;
 	RADIX_NODE_HEAD_LOCK(rnh);
 	rnh->rnh_walktree(rnh, in_rtqkill, &arg);
@@ -277,14 +278,14 @@
 	 * hard.
 	 */
 	if ((arg.found - arg.killed > rtq_toomany) &&
-	    (time_second - last_adjusted_timeout >= rtq_timeout) &&
+	    (time_uptime - last_adjusted_timeout >= rtq_timeout) &&
 	    rtq_reallyold > rtq_minreallyold) {
 		rtq_reallyold = 2 * rtq_reallyold / 3;
 		if (rtq_reallyold < rtq_minreallyold) {
 			rtq_reallyold = rtq_minreallyold;
 		}
 
-		last_adjusted_timeout = time_second;
+		last_adjusted_timeout = time_uptime;
 #ifdef DIAGNOSTIC
 		log(LOG_DEBUG, "in_rtqtimo: adjusted rtq_reallyold to %d\n",
 		    rtq_reallyold);
@@ -297,7 +298,7 @@
 	}
 
 	atv.tv_usec = 0;
-	atv.tv_sec = arg.nextstop - time_second;
+	atv.tv_sec = arg.nextstop - time_uptime;
 	callout_reset(&rtq_timer, tvtohz(&atv), in_rtqtimo, rock);
 }
 
Index: accf_data.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/accf_data.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/accf_data.c -L sys/netinet/accf_data.c -u -r1.1.1.1 -r1.2
--- sys/netinet/accf_data.c
+++ sys/netinet/accf_data.c
@@ -22,10 +22,11 @@
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
- *
- *	$FreeBSD: src/sys/netinet/accf_data.c,v 1.10 2004/05/30 20:23:30 phk Exp $
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/accf_data.c,v 1.11 2007/10/07 20:44:22 silby Exp $");
+
 #define ACCEPT_FILTER_MOD
 
 #include <sys/param.h>
Index: ip_divert.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_divert.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/ip_divert.c -L sys/netinet/ip_divert.c -u -r1.1.1.1 -r1.2
--- sys/netinet/ip_divert.c
+++ sys/netinet/ip_divert.c
@@ -25,10 +25,11 @@
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/netinet/ip_divert.c,v 1.113.2.1 2005/11/16 10:31:22 ru Exp $
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_divert.c,v 1.130 2007/10/07 20:44:22 silby Exp $");
+
 #if !defined(KLD_MODULE)
 #include "opt_inet.h"
 #include "opt_ipfw.h"
@@ -45,10 +46,10 @@
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
-#include <sys/mac.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/kernel.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
@@ -61,6 +62,7 @@
 #include <vm/uma.h>
 
 #include <net/if.h>
+#include <net/netisr.h> 
 #include <net/route.h>
 
 #include <netinet/in.h>
@@ -72,6 +74,8 @@
 #include <netinet/ip_var.h>
 #include <netinet/ip_fw.h>
 
+#include <security/mac/mac_framework.h>
+
 /*
  * Divert sockets
  */
@@ -116,22 +120,51 @@
 /*
  * Initialize divert connection block queue.
  */
+static void
+div_zone_change(void *tag)
+{
+
+	uma_zone_set_max(divcbinfo.ipi_zone, maxsockets);
+}
+
+static int
+div_inpcb_init(void *mem, int size, int flags)
+{
+	struct inpcb *inp = mem;
+
+	INP_LOCK_INIT(inp, "inp", "divinp");
+	return (0);
+}
+
+static void
+div_inpcb_fini(void *mem, int size)
+{
+	struct inpcb *inp = mem;
+
+	INP_LOCK_DESTROY(inp);
+}
+
 void
 div_init(void)
 {
+
 	INP_INFO_LOCK_INIT(&divcbinfo, "div");
 	LIST_INIT(&divcb);
-	divcbinfo.listhead = &divcb;
+	divcbinfo.ipi_listhead = &divcb;
 	/*
 	 * XXX We don't use the hash list for divert IP, but it's easier
 	 * to allocate a one entry hash list than it is to check all
 	 * over the place for hashbase == NULL.
 	 */
-	divcbinfo.hashbase = hashinit(1, M_PCB, &divcbinfo.hashmask);
-	divcbinfo.porthashbase = hashinit(1, M_PCB, &divcbinfo.porthashmask);
+	divcbinfo.ipi_hashbase = hashinit(1, M_PCB, &divcbinfo.ipi_hashmask);
+	divcbinfo.ipi_porthashbase = hashinit(1, M_PCB,
+	    &divcbinfo.ipi_porthashmask);
 	divcbinfo.ipi_zone = uma_zcreate("divcb", sizeof(struct inpcb),
-	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+	    NULL, NULL, div_inpcb_init, div_inpcb_fini, UMA_ALIGN_PTR,
+	    UMA_ZONE_NOFREE);
 	uma_zone_set_max(divcbinfo.ipi_zone, maxsockets);
+	EVENTHANDLER_REGISTER(maxsockets_change, div_zone_change,
+		NULL, EVENTHANDLER_PRI_ANY);
 }
 
 /*
@@ -197,8 +230,6 @@
 
 		/* Find IP address for receive interface */
 		TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) {
-			if (ifa->ifa_addr == NULL)
-				continue;
 			if (ifa->ifa_addr->sa_family != AF_INET)
 				continue;
 			divsrc.sin_addr =
@@ -270,12 +301,13 @@
  * the interface with that address.
  */
 static int
-div_output(struct socket *so, struct mbuf *m,
-	struct sockaddr_in *sin, struct mbuf *control)
+div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin,
+    struct mbuf *control)
 {
 	struct m_tag *mtag;
 	struct divert_tag *dt;
 	int error = 0;
+	struct mbuf *options;
 
 	/*
 	 * An mbuf may hasn't come from userland, but we pretend
@@ -332,6 +364,8 @@
 		if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options) ||
 		     ((u_short)ntohs(ip->ip_len) > m->m_pkthdr.len)) {
 			error = EINVAL;
+			INP_UNLOCK(inp);
+			INP_INFO_WUNLOCK(&divcbinfo);
 			m_freem(m);
 		} else {
 			/* Convert fields to host order for ip_output() */
@@ -344,15 +378,46 @@
 #ifdef MAC
 			mac_create_mbuf_from_inpcb(inp, m);
 #endif
-			error = ip_output(m,
-				    inp->inp_options, NULL,
-				    ((so->so_options & SO_DONTROUTE) ?
-				    IP_ROUTETOIF : 0) |
-				    IP_ALLOWBROADCAST | IP_RAWOUTPUT,
-				    inp->inp_moptions, NULL);
+			/*
+			 * Get ready to inject the packet into ip_output().
+			 * Just in case socket options were specified on the
+			 * divert socket, we duplicate them.  This is done
+			 * to avoid having to hold the PCB locks over the call
+			 * to ip_output(), as doing this results in a number of
+			 * lock ordering complexities.
+			 *
+			 * Note that we set the multicast options argument for
+			 * ip_output() to NULL since it should be invariant that
+			 * they are not present.
+			 */
+			KASSERT(inp->inp_moptions == NULL,
+			    ("multicast options set on a divert socket"));
+			options = NULL;
+			/*
+			 * XXXCSJP: It is unclear to me whether or not it makes
+			 * sense for divert sockets to have options.  However,
+			 * for now we will duplicate them with the INP locks
+			 * held so we can use them in ip_output() without
+			 * requring a reference to the pcb.
+			 */
+			if (inp->inp_options != NULL) {
+				options = m_dup(inp->inp_options, M_DONTWAIT);
+				if (options == NULL)
+					error = ENOBUFS;
+			}
+			INP_UNLOCK(inp);
+			INP_INFO_WUNLOCK(&divcbinfo);
+			if (error == ENOBUFS) {
+				m_freem(m);
+				return (error);
+			}
+			error = ip_output(m, options, NULL,
+			    ((so->so_options & SO_DONTROUTE) ?
+			    IP_ROUTETOIF : 0) | IP_ALLOWBROADCAST |
+			    IP_RAWOUTPUT, NULL, NULL);
+			if (options != NULL)
+				m_freem(options);
 		}
-		INP_UNLOCK(inp);
-		INP_INFO_WUNLOCK(&divcbinfo);
 	} else {
 		dt->info |= IP_FW_DIVERT_LOOPBACK_FLAG;
 		if (m->m_pkthdr.rcvif == NULL) {
@@ -377,8 +442,8 @@
 		mac_create_mbuf_from_socket(so, m);
 		SOCK_UNLOCK(so);
 #endif
-		/* Send packet to input processing */
-		ip_input(m);
+		/* Send packet to input processing via netisr */
+		netisr_queue(NETISR_IP, m);
 	}
 
 	return error;
@@ -394,28 +459,23 @@
 	struct inpcb *inp;
 	int error;
 
-	INP_INFO_WLOCK(&divcbinfo);
 	inp  = sotoinpcb(so);
-	if (inp != 0) {
-		INP_INFO_WUNLOCK(&divcbinfo);
-		return EINVAL;
-	}
-	if (td && (error = suser(td)) != 0) {
-		INP_INFO_WUNLOCK(&divcbinfo);
-		return error;
+	KASSERT(inp == NULL, ("div_attach: inp != NULL"));
+	if (td != NULL) {
+		error = priv_check(td, PRIV_NETINET_DIVERT);
+		if (error)
+			return (error);
 	}
 	error = soreserve(so, div_sendspace, div_recvspace);
-	if (error) {
-		INP_INFO_WUNLOCK(&divcbinfo);
+	if (error)
 		return error;
-	}
-	error = in_pcballoc(so, &divcbinfo, "divinp");
+	INP_INFO_WLOCK(&divcbinfo);
+	error = in_pcballoc(so, &divcbinfo);
 	if (error) {
 		INP_INFO_WUNLOCK(&divcbinfo);
 		return error;
 	}
 	inp = (struct inpcb *)so->so_pcb;
-	INP_LOCK(inp);
 	INP_INFO_WUNLOCK(&divcbinfo);
 	inp->inp_ip_p = proto;
 	inp->inp_vflag |= INP_IPV4;
@@ -424,21 +484,18 @@
 	return 0;
 }
 
-static int
+static void
 div_detach(struct socket *so)
 {
 	struct inpcb *inp;
 
-	INP_INFO_WLOCK(&divcbinfo);
 	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_WUNLOCK(&divcbinfo);
-		return EINVAL;
-	}
+	KASSERT(inp != NULL, ("div_detach: inp == NULL"));
+	INP_INFO_WLOCK(&divcbinfo);
 	INP_LOCK(inp);
 	in_pcbdetach(inp);
+	in_pcbfree(inp);
 	INP_INFO_WUNLOCK(&divcbinfo);
-	return 0;
 }
 
 static int
@@ -447,12 +504,8 @@
 	struct inpcb *inp;
 	int error;
 
-	INP_INFO_WLOCK(&divcbinfo);
 	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_WUNLOCK(&divcbinfo);
-		return EINVAL;
-	}
+	KASSERT(inp != NULL, ("div_bind: inp == NULL"));
 	/* in_pcbbind assumes that nam is a sockaddr_in
 	 * and in_pcbbind requires a valid address. Since divert
 	 * sockets don't we need to make sure the address is
@@ -461,13 +514,12 @@
 	 * and should probably have its own family.
 	 */
 	if (nam->sa_family != AF_INET)
-		error = EAFNOSUPPORT;
-	else {
-		((struct sockaddr_in *)nam)->sin_addr.s_addr = INADDR_ANY;
-		INP_LOCK(inp);
-		error = in_pcbbind(inp, nam, td->td_ucred);
-		INP_UNLOCK(inp);
-	}
+		return EAFNOSUPPORT;
+	((struct sockaddr_in *)nam)->sin_addr.s_addr = INADDR_ANY;
+	INP_INFO_WLOCK(&divcbinfo);
+	INP_LOCK(inp);
+	error = in_pcbbind(inp, nam, td->td_ucred);
+	INP_UNLOCK(inp);
 	INP_INFO_WUNLOCK(&divcbinfo);
 	return error;
 }
@@ -477,14 +529,9 @@
 {
 	struct inpcb *inp;
 
-	INP_INFO_RLOCK(&divcbinfo);
 	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_RUNLOCK(&divcbinfo);
-		return EINVAL;
-	}
+	KASSERT(inp != NULL, ("div_shutdown: inp == NULL"));
 	INP_LOCK(inp);
-	INP_INFO_RUNLOCK(&divcbinfo);
 	socantsendmore(so);
 	INP_UNLOCK(inp);
 	return 0;
@@ -492,7 +539,7 @@
 
 static int
 div_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
-	 struct mbuf *control, struct thread *td)
+    struct mbuf *control, struct thread *td)
 {
 	/* Packet must have a header (but that's about it) */
 	if (m->m_len < sizeof (struct ip) &&
@@ -566,7 +613,7 @@
 		return ENOMEM;
 	
 	INP_INFO_RLOCK(&divcbinfo);
-	for (inp = LIST_FIRST(divcbinfo.listhead), i = 0; inp && i < n;
+	for (inp = LIST_FIRST(divcbinfo.ipi_listhead), i = 0; inp && i < n;
 	     inp = LIST_NEXT(inp, inp_list)) {
 		INP_LOCK(inp);
 		if (inp->inp_gencnt <= gencnt &&
@@ -580,6 +627,7 @@
 	error = 0;
 	for (i = 0; i < n; i++) {
 		inp = inp_list[i];
+		INP_LOCK(inp);
 		if (inp->inp_gencnt <= gencnt) {
 			struct xinpcb xi;
 			bzero(&xi, sizeof(xi));
@@ -588,8 +636,10 @@
 			bcopy(inp, &xi.xi_inp, sizeof *inp);
 			if (inp->inp_socket)
 				sotoxsocket(inp->inp_socket, &xi.xi_socket);
+			INP_UNLOCK(inp);
 			error = SYSCTL_OUT(req, &xi, sizeof xi);
-		}
+		} else
+			INP_UNLOCK(inp);
 	}
 	if (!error) {
 		/*
@@ -610,26 +660,6 @@
 	return error;
 }
 
-/*
- * This is the wrapper function for in_setsockaddr.  We just pass down
- * the pcbinfo for in_setpeeraddr to lock.
- */
-static int
-div_sockaddr(struct socket *so, struct sockaddr **nam)
-{
-	return (in_setsockaddr(so, nam, &divcbinfo));
-}
-
-/*
- * This is the wrapper function for in_setpeeraddr. We just pass down
- * the pcbinfo for in_setpeeraddr to lock.
- */
-static int
-div_peeraddr(struct socket *so, struct sockaddr **nam)
-{
-	return (in_setpeeraddr(so, nam, &divcbinfo));
-}
-
 #ifdef SYSCTL_NODE
 SYSCTL_NODE(_net_inet, IPPROTO_DIVERT, divert, CTLFLAG_RW, 0, "IPDIVERT");
 SYSCTL_PROC(_net_inet_divert, OID_AUTO, pcblist, CTLFLAG_RD, 0, 0,
@@ -641,10 +671,10 @@
 	.pru_bind =		div_bind,
 	.pru_control =		in_control,
 	.pru_detach =		div_detach,
-	.pru_peeraddr =		div_peeraddr,
+	.pru_peeraddr =		in_getpeeraddr,
 	.pru_send =		div_send,
 	.pru_shutdown =		div_shutdown,
-	.pru_sockaddr =		div_sockaddr,
+	.pru_sockaddr =		in_getsockaddr,
 	.pru_sosetlabel =	in_pcbsosetlabel
 };
 
Index: accf_http.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/accf_http.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/accf_http.c -L sys/netinet/accf_http.c -u -r1.1.1.1 -r1.2
--- sys/netinet/accf_http.c
+++ sys/netinet/accf_http.c
@@ -23,10 +23,11 @@
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
- *
- *	$FreeBSD: src/sys/netinet/accf_http.c,v 1.16 2005/01/07 01:45:44 imp Exp $
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/accf_http.c,v 1.17 2007/10/07 20:44:22 silby Exp $");
+
 #define ACCEPT_FILTER_MOD
 
 #include <sys/param.h>
--- /dev/null
+++ sys/netinet/sctp_crc32.c
@@ -0,0 +1,712 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_crc32.c,v 1.12 2005/03/06 16:04:17 itojun Exp $	 */
+
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_crc32.c,v 1.8 2007/05/08 17:01:10 rrs Exp $");
+
+#include <netinet/sctp_os.h>
+#include <netinet/sctp_crc32.h>
+
+#ifndef SCTP_USE_ADLER32
+
+
+/**
+ *
+ * Routine Description:
+ *
+ * Computes the CRC32c checksum for the specified buffer using the slicing by 8
+ * algorithm over 64 bit quantities.
+ *
+ * Arguments:
+ *
+ *		p_running_crc - pointer to the initial or final remainder value
+ *				used in CRC computations. It should be set to
+ *				non-NULL if the mode argument is equal to CONT or END
+ *		p_buf - the packet buffer where crc computations are being performed
+ *		length - the length of p_buf in bytes
+ *		init_bytes - the number of initial bytes that need to be procesed before
+ *					 aligning p_buf to multiples of 4 bytes
+ *		mode - can be any of the following: BEGIN, CONT, END, BODY, ALIGN
+ *
+ * Return value:
+ *
+ *		The computed CRC32c value
+ */
+
+
+/*
+ * Copyright (c) 2004-2006 Intel Corporation - All Rights Reserved
+ *
+ *
+ * This software program is licensed subject to the BSD License, available at
+ * http://www.opensource.org/licenses/bsd-license.html.
+ *
+ * Abstract:
+ *
+ * Tables for software CRC generation
+ */
+
+/*
+ * The following CRC lookup table was generated automagically using the
+ * following model parameters:
+ *
+ * Generator Polynomial = ................. 0x1EDC6F41 Generator Polynomial
+ * Length = .......... 32 bits Reflected Bits = ....................... TRUE
+ * Table Generation Offset = .............. 32 bits Number of Slices =
+ * ..................... 8 slices Slice Lengths = ........................ 8
+ * 8 8 8 8 8 8 8 Directory Name = ....................... .\ File Name =
+ * ............................ 8x256_tables.c
+ */
+
+uint32_t sctp_crc_tableil8_o32[256] =
+{
+	0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
+	0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
+	0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B, 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
+	0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
+	0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A, 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
+	0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
+	0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
+	0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
+	0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48, 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
+	0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
+	0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927, 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
+	0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8, 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
+	0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
+	0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859, 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
+	0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9, 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
+	0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
+	0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C, 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
+	0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
+	0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
+	0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C, 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
+	0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652, 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
+	0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
+	0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D, 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
+	0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
+	0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
+	0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
+	0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F, 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
+	0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
+	0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE, 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
+	0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321, 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
+	0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
+	0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351
+};
+
+/*
+ * end of the CRC lookup table crc_tableil8_o32
+ */
+
+
+
+/*
+ * The following CRC lookup table was generated automagically using the
+ * following model parameters:
+ *
+ * Generator Polynomial = ................. 0x1EDC6F41 Generator Polynomial
+ * Length = .......... 32 bits Reflected Bits = ....................... TRUE
+ * Table Generation Offset = .............. 32 bits Number of Slices =
+ * ..................... 8 slices Slice Lengths = ........................ 8
+ * 8 8 8 8 8 8 8 Directory Name = ....................... .\ File Name =
+ * ............................ 8x256_tables.c
+ */
+
+uint32_t sctp_crc_tableil8_o40[256] =
+{
+	0x00000000, 0x13A29877, 0x274530EE, 0x34E7A899, 0x4E8A61DC, 0x5D28F9AB, 0x69CF5132, 0x7A6DC945,
+	0x9D14C3B8, 0x8EB65BCF, 0xBA51F356, 0xA9F36B21, 0xD39EA264, 0xC03C3A13, 0xF4DB928A, 0xE7790AFD,
+	0x3FC5F181, 0x2C6769F6, 0x1880C16F, 0x0B225918, 0x714F905D, 0x62ED082A, 0x560AA0B3, 0x45A838C4,
+	0xA2D13239, 0xB173AA4E, 0x859402D7, 0x96369AA0, 0xEC5B53E5, 0xFFF9CB92, 0xCB1E630B, 0xD8BCFB7C,
+	0x7F8BE302, 0x6C297B75, 0x58CED3EC, 0x4B6C4B9B, 0x310182DE, 0x22A31AA9, 0x1644B230, 0x05E62A47,
+	0xE29F20BA, 0xF13DB8CD, 0xC5DA1054, 0xD6788823, 0xAC154166, 0xBFB7D911, 0x8B507188, 0x98F2E9FF,
+	0x404E1283, 0x53EC8AF4, 0x670B226D, 0x74A9BA1A, 0x0EC4735F, 0x1D66EB28, 0x298143B1, 0x3A23DBC6,
+	0xDD5AD13B, 0xCEF8494C, 0xFA1FE1D5, 0xE9BD79A2, 0x93D0B0E7, 0x80722890, 0xB4958009, 0xA737187E,
+	0xFF17C604, 0xECB55E73, 0xD852F6EA, 0xCBF06E9D, 0xB19DA7D8, 0xA23F3FAF, 0x96D89736, 0x857A0F41,
+	0x620305BC, 0x71A19DCB, 0x45463552, 0x56E4AD25, 0x2C896460, 0x3F2BFC17, 0x0BCC548E, 0x186ECCF9,
+	0xC0D23785, 0xD370AFF2, 0xE797076B, 0xF4359F1C, 0x8E585659, 0x9DFACE2E, 0xA91D66B7, 0xBABFFEC0,
+	0x5DC6F43D, 0x4E646C4A, 0x7A83C4D3, 0x69215CA4, 0x134C95E1, 0x00EE0D96, 0x3409A50F, 0x27AB3D78,
+	0x809C2506, 0x933EBD71, 0xA7D915E8, 0xB47B8D9F, 0xCE1644DA, 0xDDB4DCAD, 0xE9537434, 0xFAF1EC43,
+	0x1D88E6BE, 0x0E2A7EC9, 0x3ACDD650, 0x296F4E27, 0x53028762, 0x40A01F15, 0x7447B78C, 0x67E52FFB,
+	0xBF59D487, 0xACFB4CF0, 0x981CE469, 0x8BBE7C1E, 0xF1D3B55B, 0xE2712D2C, 0xD69685B5, 0xC5341DC2,
+	0x224D173F, 0x31EF8F48, 0x050827D1, 0x16AABFA6, 0x6CC776E3, 0x7F65EE94, 0x4B82460D, 0x5820DE7A,
+	0xFBC3FAF9, 0xE861628E, 0xDC86CA17, 0xCF245260, 0xB5499B25, 0xA6EB0352, 0x920CABCB, 0x81AE33BC,
+	0x66D73941, 0x7575A136, 0x419209AF, 0x523091D8, 0x285D589D, 0x3BFFC0EA, 0x0F186873, 0x1CBAF004,
+	0xC4060B78, 0xD7A4930F, 0xE3433B96, 0xF0E1A3E1, 0x8A8C6AA4, 0x992EF2D3, 0xADC95A4A, 0xBE6BC23D,
+	0x5912C8C0, 0x4AB050B7, 0x7E57F82E, 0x6DF56059, 0x1798A91C, 0x043A316B, 0x30DD99F2, 0x237F0185,
+	0x844819FB, 0x97EA818C, 0xA30D2915, 0xB0AFB162, 0xCAC27827, 0xD960E050, 0xED8748C9, 0xFE25D0BE,
+	0x195CDA43, 0x0AFE4234, 0x3E19EAAD, 0x2DBB72DA, 0x57D6BB9F, 0x447423E8, 0x70938B71, 0x63311306,
+	0xBB8DE87A, 0xA82F700D, 0x9CC8D894, 0x8F6A40E3, 0xF50789A6, 0xE6A511D1, 0xD242B948, 0xC1E0213F,
+	0x26992BC2, 0x353BB3B5, 0x01DC1B2C, 0x127E835B, 0x68134A1E, 0x7BB1D269, 0x4F567AF0, 0x5CF4E287,
+	0x04D43CFD, 0x1776A48A, 0x23910C13, 0x30339464, 0x4A5E5D21, 0x59FCC556, 0x6D1B6DCF, 0x7EB9F5B8,
+	0x99C0FF45, 0x8A626732, 0xBE85CFAB, 0xAD2757DC, 0xD74A9E99, 0xC4E806EE, 0xF00FAE77, 0xE3AD3600,
+	0x3B11CD7C, 0x28B3550B, 0x1C54FD92, 0x0FF665E5, 0x759BACA0, 0x663934D7, 0x52DE9C4E, 0x417C0439,
+	0xA6050EC4, 0xB5A796B3, 0x81403E2A, 0x92E2A65D, 0xE88F6F18, 0xFB2DF76F, 0xCFCA5FF6, 0xDC68C781,
+	0x7B5FDFFF, 0x68FD4788, 0x5C1AEF11, 0x4FB87766, 0x35D5BE23, 0x26772654, 0x12908ECD, 0x013216BA,
+	0xE64B1C47, 0xF5E98430, 0xC10E2CA9, 0xD2ACB4DE, 0xA8C17D9B, 0xBB63E5EC, 0x8F844D75, 0x9C26D502,
+	0x449A2E7E, 0x5738B609, 0x63DF1E90, 0x707D86E7, 0x0A104FA2, 0x19B2D7D5, 0x2D557F4C, 0x3EF7E73B,
+	0xD98EEDC6, 0xCA2C75B1, 0xFECBDD28, 0xED69455F, 0x97048C1A, 0x84A6146D, 0xB041BCF4, 0xA3E32483
+};
+
+/*
+ * end of the CRC lookup table crc_tableil8_o40
+ */
+
+
+
+/*
+ * The following CRC lookup table was generated automagically using the
+ * following model parameters:
+ *
+ * Generator Polynomial = ................. 0x1EDC6F41 Generator Polynomial
+ * Length = .......... 32 bits Reflected Bits = ....................... TRUE
+ * Table Generation Offset = .............. 32 bits Number of Slices =
+ * ..................... 8 slices Slice Lengths = ........................ 8
+ * 8 8 8 8 8 8 8 Directory Name = ....................... .\ File Name =
+ * ............................ 8x256_tables.c
+ */
+
+uint32_t sctp_crc_tableil8_o48[256] =
+{
+	0x00000000, 0xA541927E, 0x4F6F520D, 0xEA2EC073, 0x9EDEA41A, 0x3B9F3664, 0xD1B1F617, 0x74F06469,
+	0x38513EC5, 0x9D10ACBB, 0x773E6CC8, 0xD27FFEB6, 0xA68F9ADF, 0x03CE08A1, 0xE9E0C8D2, 0x4CA15AAC,
+	0x70A27D8A, 0xD5E3EFF4, 0x3FCD2F87, 0x9A8CBDF9, 0xEE7CD990, 0x4B3D4BEE, 0xA1138B9D, 0x045219E3,
+	0x48F3434F, 0xEDB2D131, 0x079C1142, 0xA2DD833C, 0xD62DE755, 0x736C752B, 0x9942B558, 0x3C032726,
+	0xE144FB14, 0x4405696A, 0xAE2BA919, 0x0B6A3B67, 0x7F9A5F0E, 0xDADBCD70, 0x30F50D03, 0x95B49F7D,
+	0xD915C5D1, 0x7C5457AF, 0x967A97DC, 0x333B05A2, 0x47CB61CB, 0xE28AF3B5, 0x08A433C6, 0xADE5A1B8,
+	0x91E6869E, 0x34A714E0, 0xDE89D493, 0x7BC846ED, 0x0F382284, 0xAA79B0FA, 0x40577089, 0xE516E2F7,
+	0xA9B7B85B, 0x0CF62A25, 0xE6D8EA56, 0x43997828, 0x37691C41, 0x92288E3F, 0x78064E4C, 0xDD47DC32,
+	0xC76580D9, 0x622412A7, 0x880AD2D4, 0x2D4B40AA, 0x59BB24C3, 0xFCFAB6BD, 0x16D476CE, 0xB395E4B0,
+	0xFF34BE1C, 0x5A752C62, 0xB05BEC11, 0x151A7E6F, 0x61EA1A06, 0xC4AB8878, 0x2E85480B, 0x8BC4DA75,
+	0xB7C7FD53, 0x12866F2D, 0xF8A8AF5E, 0x5DE93D20, 0x29195949, 0x8C58CB37, 0x66760B44, 0xC337993A,
+	0x8F96C396, 0x2AD751E8, 0xC0F9919B, 0x65B803E5, 0x1148678C, 0xB409F5F2, 0x5E273581, 0xFB66A7FF,
+	0x26217BCD, 0x8360E9B3, 0x694E29C0, 0xCC0FBBBE, 0xB8FFDFD7, 0x1DBE4DA9, 0xF7908DDA, 0x52D11FA4,
+	0x1E704508, 0xBB31D776, 0x511F1705, 0xF45E857B, 0x80AEE112, 0x25EF736C, 0xCFC1B31F, 0x6A802161,
+	0x56830647, 0xF3C29439, 0x19EC544A, 0xBCADC634, 0xC85DA25D, 0x6D1C3023, 0x8732F050, 0x2273622E,
+	0x6ED23882, 0xCB93AAFC, 0x21BD6A8F, 0x84FCF8F1, 0xF00C9C98, 0x554D0EE6, 0xBF63CE95, 0x1A225CEB,
+	0x8B277743, 0x2E66E53D, 0xC448254E, 0x6109B730, 0x15F9D359, 0xB0B84127, 0x5A968154, 0xFFD7132A,
+	0xB3764986, 0x1637DBF8, 0xFC191B8B, 0x595889F5, 0x2DA8ED9C, 0x88E97FE2, 0x62C7BF91, 0xC7862DEF,
+	0xFB850AC9, 0x5EC498B7, 0xB4EA58C4, 0x11ABCABA, 0x655BAED3, 0xC01A3CAD, 0x2A34FCDE, 0x8F756EA0,
+	0xC3D4340C, 0x6695A672, 0x8CBB6601, 0x29FAF47F, 0x5D0A9016, 0xF84B0268, 0x1265C21B, 0xB7245065,
+	0x6A638C57, 0xCF221E29, 0x250CDE5A, 0x804D4C24, 0xF4BD284D, 0x51FCBA33, 0xBBD27A40, 0x1E93E83E,
+	0x5232B292, 0xF77320EC, 0x1D5DE09F, 0xB81C72E1, 0xCCEC1688, 0x69AD84F6, 0x83834485, 0x26C2D6FB,
+	0x1AC1F1DD, 0xBF8063A3, 0x55AEA3D0, 0xF0EF31AE, 0x841F55C7, 0x215EC7B9, 0xCB7007CA, 0x6E3195B4,
+	0x2290CF18, 0x87D15D66, 0x6DFF9D15, 0xC8BE0F6B, 0xBC4E6B02, 0x190FF97C, 0xF321390F, 0x5660AB71,
+	0x4C42F79A, 0xE90365E4, 0x032DA597, 0xA66C37E9, 0xD29C5380, 0x77DDC1FE, 0x9DF3018D, 0x38B293F3,
+	0x7413C95F, 0xD1525B21, 0x3B7C9B52, 0x9E3D092C, 0xEACD6D45, 0x4F8CFF3B, 0xA5A23F48, 0x00E3AD36,
+	0x3CE08A10, 0x99A1186E, 0x738FD81D, 0xD6CE4A63, 0xA23E2E0A, 0x077FBC74, 0xED517C07, 0x4810EE79,
+	0x04B1B4D5, 0xA1F026AB, 0x4BDEE6D8, 0xEE9F74A6, 0x9A6F10CF, 0x3F2E82B1, 0xD50042C2, 0x7041D0BC,
+	0xAD060C8E, 0x08479EF0, 0xE2695E83, 0x4728CCFD, 0x33D8A894, 0x96993AEA, 0x7CB7FA99, 0xD9F668E7,
+	0x9557324B, 0x3016A035, 0xDA386046, 0x7F79F238, 0x0B899651, 0xAEC8042F, 0x44E6C45C, 0xE1A75622,
+	0xDDA47104, 0x78E5E37A, 0x92CB2309, 0x378AB177, 0x437AD51E, 0xE63B4760, 0x0C158713, 0xA954156D,
+	0xE5F54FC1, 0x40B4DDBF, 0xAA9A1DCC, 0x0FDB8FB2, 0x7B2BEBDB, 0xDE6A79A5, 0x3444B9D6, 0x91052BA8
+};
+
+/*
+ * end of the CRC lookup table crc_tableil8_o48
+ */
+
+
+
+/*
+ * The following CRC lookup table was generated automagically using the
+ * following model parameters:
+ *
+ * Generator Polynomial = ................. 0x1EDC6F41 Generator Polynomial
+ * Length = .......... 32 bits Reflected Bits = ....................... TRUE
+ * Table Generation Offset = .............. 32 bits Number of Slices =
+ * ..................... 8 slices Slice Lengths = ........................ 8
+ * 8 8 8 8 8 8 8 Directory Name = ....................... .\ File Name =
+ * ............................ 8x256_tables.c
+ */
+
+uint32_t sctp_crc_tableil8_o56[256] =
+{
+	0x00000000, 0xDD45AAB8, 0xBF672381, 0x62228939, 0x7B2231F3, 0xA6679B4B, 0xC4451272, 0x1900B8CA,
+	0xF64463E6, 0x2B01C95E, 0x49234067, 0x9466EADF, 0x8D665215, 0x5023F8AD, 0x32017194, 0xEF44DB2C,
+	0xE964B13D, 0x34211B85, 0x560392BC, 0x8B463804, 0x924680CE, 0x4F032A76, 0x2D21A34F, 0xF06409F7,
+	0x1F20D2DB, 0xC2657863, 0xA047F15A, 0x7D025BE2, 0x6402E328, 0xB9474990, 0xDB65C0A9, 0x06206A11,
+	0xD725148B, 0x0A60BE33, 0x6842370A, 0xB5079DB2, 0xAC072578, 0x71428FC0, 0x136006F9, 0xCE25AC41,
+	0x2161776D, 0xFC24DDD5, 0x9E0654EC, 0x4343FE54, 0x5A43469E, 0x8706EC26, 0xE524651F, 0x3861CFA7,
+	0x3E41A5B6, 0xE3040F0E, 0x81268637, 0x5C632C8F, 0x45639445, 0x98263EFD, 0xFA04B7C4, 0x27411D7C,
+	0xC805C650, 0x15406CE8, 0x7762E5D1, 0xAA274F69, 0xB327F7A3, 0x6E625D1B, 0x0C40D422, 0xD1057E9A,
+	0xABA65FE7, 0x76E3F55F, 0x14C17C66, 0xC984D6DE, 0xD0846E14, 0x0DC1C4AC, 0x6FE34D95, 0xB2A6E72D,
+	0x5DE23C01, 0x80A796B9, 0xE2851F80, 0x3FC0B538, 0x26C00DF2, 0xFB85A74A, 0x99A72E73, 0x44E284CB,
+	0x42C2EEDA, 0x9F874462, 0xFDA5CD5B, 0x20E067E3, 0x39E0DF29, 0xE4A57591, 0x8687FCA8, 0x5BC25610,
+	0xB4868D3C, 0x69C32784, 0x0BE1AEBD, 0xD6A40405, 0xCFA4BCCF, 0x12E11677, 0x70C39F4E, 0xAD8635F6,
+	0x7C834B6C, 0xA1C6E1D4, 0xC3E468ED, 0x1EA1C255, 0x07A17A9F, 0xDAE4D027, 0xB8C6591E, 0x6583F3A6,
+	0x8AC7288A, 0x57828232, 0x35A00B0B, 0xE8E5A1B3, 0xF1E51979, 0x2CA0B3C1, 0x4E823AF8, 0x93C79040,
+	0x95E7FA51, 0x48A250E9, 0x2A80D9D0, 0xF7C57368, 0xEEC5CBA2, 0x3380611A, 0x51A2E823, 0x8CE7429B,
+	0x63A399B7, 0xBEE6330F, 0xDCC4BA36, 0x0181108E, 0x1881A844, 0xC5C402FC, 0xA7E68BC5, 0x7AA3217D,
+	0x52A0C93F, 0x8FE56387, 0xEDC7EABE, 0x30824006, 0x2982F8CC, 0xF4C75274, 0x96E5DB4D, 0x4BA071F5,
+	0xA4E4AAD9, 0x79A10061, 0x1B838958, 0xC6C623E0, 0xDFC69B2A, 0x02833192, 0x60A1B8AB, 0xBDE41213,
+	0xBBC47802, 0x6681D2BA, 0x04A35B83, 0xD9E6F13B, 0xC0E649F1, 0x1DA3E349, 0x7F816A70, 0xA2C4C0C8,
+	0x4D801BE4, 0x90C5B15C, 0xF2E73865, 0x2FA292DD, 0x36A22A17, 0xEBE780AF, 0x89C50996, 0x5480A32E,
+	0x8585DDB4, 0x58C0770C, 0x3AE2FE35, 0xE7A7548D, 0xFEA7EC47, 0x23E246FF, 0x41C0CFC6, 0x9C85657E,
+	0x73C1BE52, 0xAE8414EA, 0xCCA69DD3, 0x11E3376B, 0x08E38FA1, 0xD5A62519, 0xB784AC20, 0x6AC10698,
+	0x6CE16C89, 0xB1A4C631, 0xD3864F08, 0x0EC3E5B0, 0x17C35D7A, 0xCA86F7C2, 0xA8A47EFB, 0x75E1D443,
+	0x9AA50F6F, 0x47E0A5D7, 0x25C22CEE, 0xF8878656, 0xE1873E9C, 0x3CC29424, 0x5EE01D1D, 0x83A5B7A5,
+	0xF90696D8, 0x24433C60, 0x4661B559, 0x9B241FE1, 0x8224A72B, 0x5F610D93, 0x3D4384AA, 0xE0062E12,
+	0x0F42F53E, 0xD2075F86, 0xB025D6BF, 0x6D607C07, 0x7460C4CD, 0xA9256E75, 0xCB07E74C, 0x16424DF4,
+	0x106227E5, 0xCD278D5D, 0xAF050464, 0x7240AEDC, 0x6B401616, 0xB605BCAE, 0xD4273597, 0x09629F2F,
+	0xE6264403, 0x3B63EEBB, 0x59416782, 0x8404CD3A, 0x9D0475F0, 0x4041DF48, 0x22635671, 0xFF26FCC9,
+	0x2E238253, 0xF36628EB, 0x9144A1D2, 0x4C010B6A, 0x5501B3A0, 0x88441918, 0xEA669021, 0x37233A99,
+	0xD867E1B5, 0x05224B0D, 0x6700C234, 0xBA45688C, 0xA345D046, 0x7E007AFE, 0x1C22F3C7, 0xC167597F,
+	0xC747336E, 0x1A0299D6, 0x782010EF, 0xA565BA57, 0xBC65029D, 0x6120A825, 0x0302211C, 0xDE478BA4,
+	0x31035088, 0xEC46FA30, 0x8E647309, 0x5321D9B1, 0x4A21617B, 0x9764CBC3, 0xF54642FA, 0x2803E842
+};
+
+/*
+ * end of the CRC lookup table crc_tableil8_o56
+ */
+
+
+
+/*
+ * The following CRC lookup table was generated automagically using the
+ * following model parameters:
+ *
+ * Generator Polynomial = ................. 0x1EDC6F41 Generator Polynomial
+ * Length = .......... 32 bits Reflected Bits = ....................... TRUE
+ * Table Generation Offset = .............. 32 bits Number of Slices =
+ * ..................... 8 slices Slice Lengths = ........................ 8
+ * 8 8 8 8 8 8 8 Directory Name = ....................... .\ File Name =
+ * ............................ 8x256_tables.c
+ */
+
+uint32_t sctp_crc_tableil8_o64[256] =
+{
+	0x00000000, 0x38116FAC, 0x7022DF58, 0x4833B0F4, 0xE045BEB0, 0xD854D11C, 0x906761E8, 0xA8760E44,
+	0xC5670B91, 0xFD76643D, 0xB545D4C9, 0x8D54BB65, 0x2522B521, 0x1D33DA8D, 0x55006A79, 0x6D1105D5,
+	0x8F2261D3, 0xB7330E7F, 0xFF00BE8B, 0xC711D127, 0x6F67DF63, 0x5776B0CF, 0x1F45003B, 0x27546F97,
+	0x4A456A42, 0x725405EE, 0x3A67B51A, 0x0276DAB6, 0xAA00D4F2, 0x9211BB5E, 0xDA220BAA, 0xE2336406,
+	0x1BA8B557, 0x23B9DAFB, 0x6B8A6A0F, 0x539B05A3, 0xFBED0BE7, 0xC3FC644B, 0x8BCFD4BF, 0xB3DEBB13,
+	0xDECFBEC6, 0xE6DED16A, 0xAEED619E, 0x96FC0E32, 0x3E8A0076, 0x069B6FDA, 0x4EA8DF2E, 0x76B9B082,
+	0x948AD484, 0xAC9BBB28, 0xE4A80BDC, 0xDCB96470, 0x74CF6A34, 0x4CDE0598, 0x04EDB56C, 0x3CFCDAC0,
+	0x51EDDF15, 0x69FCB0B9, 0x21CF004D, 0x19DE6FE1, 0xB1A861A5, 0x89B90E09, 0xC18ABEFD, 0xF99BD151,
+	0x37516AAE, 0x0F400502, 0x4773B5F6, 0x7F62DA5A, 0xD714D41E, 0xEF05BBB2, 0xA7360B46, 0x9F2764EA,
+	0xF236613F, 0xCA270E93, 0x8214BE67, 0xBA05D1CB, 0x1273DF8F, 0x2A62B023, 0x625100D7, 0x5A406F7B,
+	0xB8730B7D, 0x806264D1, 0xC851D425, 0xF040BB89, 0x5836B5CD, 0x6027DA61, 0x28146A95, 0x10050539,
+	0x7D1400EC, 0x45056F40, 0x0D36DFB4, 0x3527B018, 0x9D51BE5C, 0xA540D1F0, 0xED736104, 0xD5620EA8,
+	0x2CF9DFF9, 0x14E8B055, 0x5CDB00A1, 0x64CA6F0D, 0xCCBC6149, 0xF4AD0EE5, 0xBC9EBE11, 0x848FD1BD,
+	0xE99ED468, 0xD18FBBC4, 0x99BC0B30, 0xA1AD649C, 0x09DB6AD8, 0x31CA0574, 0x79F9B580, 0x41E8DA2C,
+	0xA3DBBE2A, 0x9BCAD186, 0xD3F96172, 0xEBE80EDE, 0x439E009A, 0x7B8F6F36, 0x33BCDFC2, 0x0BADB06E,
+	0x66BCB5BB, 0x5EADDA17, 0x169E6AE3, 0x2E8F054F, 0x86F90B0B, 0xBEE864A7, 0xF6DBD453, 0xCECABBFF,
+	0x6EA2D55C, 0x56B3BAF0, 0x1E800A04, 0x269165A8, 0x8EE76BEC, 0xB6F60440, 0xFEC5B4B4, 0xC6D4DB18,
+	0xABC5DECD, 0x93D4B161, 0xDBE70195, 0xE3F66E39, 0x4B80607D, 0x73910FD1, 0x3BA2BF25, 0x03B3D089,
+	0xE180B48F, 0xD991DB23, 0x91A26BD7, 0xA9B3047B, 0x01C50A3F, 0x39D46593, 0x71E7D567, 0x49F6BACB,
+	0x24E7BF1E, 0x1CF6D0B2, 0x54C56046, 0x6CD40FEA, 0xC4A201AE, 0xFCB36E02, 0xB480DEF6, 0x8C91B15A,
+	0x750A600B, 0x4D1B0FA7, 0x0528BF53, 0x3D39D0FF, 0x954FDEBB, 0xAD5EB117, 0xE56D01E3, 0xDD7C6E4F,
+	0xB06D6B9A, 0x887C0436, 0xC04FB4C2, 0xF85EDB6E, 0x5028D52A, 0x6839BA86, 0x200A0A72, 0x181B65DE,
+	0xFA2801D8, 0xC2396E74, 0x8A0ADE80, 0xB21BB12C, 0x1A6DBF68, 0x227CD0C4, 0x6A4F6030, 0x525E0F9C,
+	0x3F4F0A49, 0x075E65E5, 0x4F6DD511, 0x777CBABD, 0xDF0AB4F9, 0xE71BDB55, 0xAF286BA1, 0x9739040D,
+	0x59F3BFF2, 0x61E2D05E, 0x29D160AA, 0x11C00F06, 0xB9B60142, 0x81A76EEE, 0xC994DE1A, 0xF185B1B6,
+	0x9C94B463, 0xA485DBCF, 0xECB66B3B, 0xD4A70497, 0x7CD10AD3, 0x44C0657F, 0x0CF3D58B, 0x34E2BA27,
+	0xD6D1DE21, 0xEEC0B18D, 0xA6F30179, 0x9EE26ED5, 0x36946091, 0x0E850F3D, 0x46B6BFC9, 0x7EA7D065,
+	0x13B6D5B0, 0x2BA7BA1C, 0x63940AE8, 0x5B856544, 0xF3F36B00, 0xCBE204AC, 0x83D1B458, 0xBBC0DBF4,
+	0x425B0AA5, 0x7A4A6509, 0x3279D5FD, 0x0A68BA51, 0xA21EB415, 0x9A0FDBB9, 0xD23C6B4D, 0xEA2D04E1,
+	0x873C0134, 0xBF2D6E98, 0xF71EDE6C, 0xCF0FB1C0, 0x6779BF84, 0x5F68D028, 0x175B60DC, 0x2F4A0F70,
+	0xCD796B76, 0xF56804DA, 0xBD5BB42E, 0x854ADB82, 0x2D3CD5C6, 0x152DBA6A, 0x5D1E0A9E, 0x650F6532,
+	0x081E60E7, 0x300F0F4B, 0x783CBFBF, 0x402DD013, 0xE85BDE57, 0xD04AB1FB, 0x9879010F, 0xA0686EA3
+};
+
+/*
+ * end of the CRC lookup table crc_tableil8_o64
+ */
+
+
+
+/*
+ * The following CRC lookup table was generated automagically using the
+ * following model parameters:
+ *
+ * Generator Polynomial = ................. 0x1EDC6F41 Generator Polynomial
+ * Length = .......... 32 bits Reflected Bits = ....................... TRUE
+ * Table Generation Offset = .............. 32 bits Number of Slices =
+ * ..................... 8 slices Slice Lengths = ........................ 8
+ * 8 8 8 8 8 8 8 Directory Name = ....................... .\ File Name =
+ * ............................ 8x256_tables.c
+ */
+
+uint32_t sctp_crc_tableil8_o72[256] =
+{
+	0x00000000, 0xEF306B19, 0xDB8CA0C3, 0x34BCCBDA, 0xB2F53777, 0x5DC55C6E, 0x697997B4, 0x8649FCAD,
+	0x6006181F, 0x8F367306, 0xBB8AB8DC, 0x54BAD3C5, 0xD2F32F68, 0x3DC34471, 0x097F8FAB, 0xE64FE4B2,
+	0xC00C303E, 0x2F3C5B27, 0x1B8090FD, 0xF4B0FBE4, 0x72F90749, 0x9DC96C50, 0xA975A78A, 0x4645CC93,
+	0xA00A2821, 0x4F3A4338, 0x7B8688E2, 0x94B6E3FB, 0x12FF1F56, 0xFDCF744F, 0xC973BF95, 0x2643D48C,
+	0x85F4168D, 0x6AC47D94, 0x5E78B64E, 0xB148DD57, 0x370121FA, 0xD8314AE3, 0xEC8D8139, 0x03BDEA20,
+	0xE5F20E92, 0x0AC2658B, 0x3E7EAE51, 0xD14EC548, 0x570739E5, 0xB83752FC, 0x8C8B9926, 0x63BBF23F,
+	0x45F826B3, 0xAAC84DAA, 0x9E748670, 0x7144ED69, 0xF70D11C4, 0x183D7ADD, 0x2C81B107, 0xC3B1DA1E,
+	0x25FE3EAC, 0xCACE55B5, 0xFE729E6F, 0x1142F576, 0x970B09DB, 0x783B62C2, 0x4C87A918, 0xA3B7C201,
+	0x0E045BEB, 0xE13430F2, 0xD588FB28, 0x3AB89031, 0xBCF16C9C, 0x53C10785, 0x677DCC5F, 0x884DA746,
+	0x6E0243F4, 0x813228ED, 0xB58EE337, 0x5ABE882E, 0xDCF77483, 0x33C71F9A, 0x077BD440, 0xE84BBF59,
+	0xCE086BD5, 0x213800CC, 0x1584CB16, 0xFAB4A00F, 0x7CFD5CA2, 0x93CD37BB, 0xA771FC61, 0x48419778,
+	0xAE0E73CA, 0x413E18D3, 0x7582D309, 0x9AB2B810, 0x1CFB44BD, 0xF3CB2FA4, 0xC777E47E, 0x28478F67,
+	0x8BF04D66, 0x64C0267F, 0x507CEDA5, 0xBF4C86BC, 0x39057A11, 0xD6351108, 0xE289DAD2, 0x0DB9B1CB,
+	0xEBF65579, 0x04C63E60, 0x307AF5BA, 0xDF4A9EA3, 0x5903620E, 0xB6330917, 0x828FC2CD, 0x6DBFA9D4,
+	0x4BFC7D58, 0xA4CC1641, 0x9070DD9B, 0x7F40B682, 0xF9094A2F, 0x16392136, 0x2285EAEC, 0xCDB581F5,
+	0x2BFA6547, 0xC4CA0E5E, 0xF076C584, 0x1F46AE9D, 0x990F5230, 0x763F3929, 0x4283F2F3, 0xADB399EA,
+	0x1C08B7D6, 0xF338DCCF, 0xC7841715, 0x28B47C0C, 0xAEFD80A1, 0x41CDEBB8, 0x75712062, 0x9A414B7B,
+	0x7C0EAFC9, 0x933EC4D0, 0xA7820F0A, 0x48B26413, 0xCEFB98BE, 0x21CBF3A7, 0x1577387D, 0xFA475364,
+	0xDC0487E8, 0x3334ECF1, 0x0788272B, 0xE8B84C32, 0x6EF1B09F, 0x81C1DB86, 0xB57D105C, 0x5A4D7B45,
+	0xBC029FF7, 0x5332F4EE, 0x678E3F34, 0x88BE542D, 0x0EF7A880, 0xE1C7C399, 0xD57B0843, 0x3A4B635A,
+	0x99FCA15B, 0x76CCCA42, 0x42700198, 0xAD406A81, 0x2B09962C, 0xC439FD35, 0xF08536EF, 0x1FB55DF6,
+	0xF9FAB944, 0x16CAD25D, 0x22761987, 0xCD46729E, 0x4B0F8E33, 0xA43FE52A, 0x90832EF0, 0x7FB345E9,
+	0x59F09165, 0xB6C0FA7C, 0x827C31A6, 0x6D4C5ABF, 0xEB05A612, 0x0435CD0B, 0x308906D1, 0xDFB96DC8,
+	0x39F6897A, 0xD6C6E263, 0xE27A29B9, 0x0D4A42A0, 0x8B03BE0D, 0x6433D514, 0x508F1ECE, 0xBFBF75D7,
+	0x120CEC3D, 0xFD3C8724, 0xC9804CFE, 0x26B027E7, 0xA0F9DB4A, 0x4FC9B053, 0x7B757B89, 0x94451090,
+	0x720AF422, 0x9D3A9F3B, 0xA98654E1, 0x46B63FF8, 0xC0FFC355, 0x2FCFA84C, 0x1B736396, 0xF443088F,
+	0xD200DC03, 0x3D30B71A, 0x098C7CC0, 0xE6BC17D9, 0x60F5EB74, 0x8FC5806D, 0xBB794BB7, 0x544920AE,
+	0xB206C41C, 0x5D36AF05, 0x698A64DF, 0x86BA0FC6, 0x00F3F36B, 0xEFC39872, 0xDB7F53A8, 0x344F38B1,
+	0x97F8FAB0, 0x78C891A9, 0x4C745A73, 0xA344316A, 0x250DCDC7, 0xCA3DA6DE, 0xFE816D04, 0x11B1061D,
+	0xF7FEE2AF, 0x18CE89B6, 0x2C72426C, 0xC3422975, 0x450BD5D8, 0xAA3BBEC1, 0x9E87751B, 0x71B71E02,
+	0x57F4CA8E, 0xB8C4A197, 0x8C786A4D, 0x63480154, 0xE501FDF9, 0x0A3196E0, 0x3E8D5D3A, 0xD1BD3623,
+	0x37F2D291, 0xD8C2B988, 0xEC7E7252, 0x034E194B, 0x8507E5E6, 0x6A378EFF, 0x5E8B4525, 0xB1BB2E3C
+};
+
+/*
+ * end of the CRC lookup table crc_tableil8_o72
+ */
+
+
+
+/*
+ * The following CRC lookup table was generated automagically using the
+ * following model parameters:
+ *
+ * Generator Polynomial = ................. 0x1EDC6F41 Generator Polynomial
+ * Length = .......... 32 bits Reflected Bits = ....................... TRUE
+ * Table Generation Offset = .............. 32 bits Number of Slices =
+ * ..................... 8 slices Slice Lengths = ........................ 8
+ * 8 8 8 8 8 8 8 Directory Name = ....................... .\ File Name =
+ * ............................ 8x256_tables.c
+ */
+
+uint32_t sctp_crc_tableil8_o80[256] =
+{
+	0x00000000, 0x68032CC8, 0xD0065990, 0xB8057558, 0xA5E0C5D1, 0xCDE3E919, 0x75E69C41, 0x1DE5B089,
+	0x4E2DFD53, 0x262ED19B, 0x9E2BA4C3, 0xF628880B, 0xEBCD3882, 0x83CE144A, 0x3BCB6112, 0x53C84DDA,
+	0x9C5BFAA6, 0xF458D66E, 0x4C5DA336, 0x245E8FFE, 0x39BB3F77, 0x51B813BF, 0xE9BD66E7, 0x81BE4A2F,
+	0xD27607F5, 0xBA752B3D, 0x02705E65, 0x6A7372AD, 0x7796C224, 0x1F95EEEC, 0xA7909BB4, 0xCF93B77C,
+	0x3D5B83BD, 0x5558AF75, 0xED5DDA2D, 0x855EF6E5, 0x98BB466C, 0xF0B86AA4, 0x48BD1FFC, 0x20BE3334,
+	0x73767EEE, 0x1B755226, 0xA370277E, 0xCB730BB6, 0xD696BB3F, 0xBE9597F7, 0x0690E2AF, 0x6E93CE67,
+	0xA100791B, 0xC90355D3, 0x7106208B, 0x19050C43, 0x04E0BCCA, 0x6CE39002, 0xD4E6E55A, 0xBCE5C992,
+	0xEF2D8448, 0x872EA880, 0x3F2BDDD8, 0x5728F110, 0x4ACD4199, 0x22CE6D51, 0x9ACB1809, 0xF2C834C1,
+	0x7AB7077A, 0x12B42BB2, 0xAAB15EEA, 0xC2B27222, 0xDF57C2AB, 0xB754EE63, 0x0F519B3B, 0x6752B7F3,
+	0x349AFA29, 0x5C99D6E1, 0xE49CA3B9, 0x8C9F8F71, 0x917A3FF8, 0xF9791330, 0x417C6668, 0x297F4AA0,
+	0xE6ECFDDC, 0x8EEFD114, 0x36EAA44C, 0x5EE98884, 0x430C380D, 0x2B0F14C5, 0x930A619D, 0xFB094D55,
+	0xA8C1008F, 0xC0C22C47, 0x78C7591F, 0x10C475D7, 0x0D21C55E, 0x6522E996, 0xDD279CCE, 0xB524B006,
+	0x47EC84C7, 0x2FEFA80F, 0x97EADD57, 0xFFE9F19F, 0xE20C4116, 0x8A0F6DDE, 0x320A1886, 0x5A09344E,
+	0x09C17994, 0x61C2555C, 0xD9C72004, 0xB1C40CCC, 0xAC21BC45, 0xC422908D, 0x7C27E5D5, 0x1424C91D,
+	0xDBB77E61, 0xB3B452A9, 0x0BB127F1, 0x63B20B39, 0x7E57BBB0, 0x16549778, 0xAE51E220, 0xC652CEE8,
+	0x959A8332, 0xFD99AFFA, 0x459CDAA2, 0x2D9FF66A, 0x307A46E3, 0x58796A2B, 0xE07C1F73, 0x887F33BB,
+	0xF56E0EF4, 0x9D6D223C, 0x25685764, 0x4D6B7BAC, 0x508ECB25, 0x388DE7ED, 0x808892B5, 0xE88BBE7D,
+	0xBB43F3A7, 0xD340DF6F, 0x6B45AA37, 0x034686FF, 0x1EA33676, 0x76A01ABE, 0xCEA56FE6, 0xA6A6432E,
+	0x6935F452, 0x0136D89A, 0xB933ADC2, 0xD130810A, 0xCCD53183, 0xA4D61D4B, 0x1CD36813, 0x74D044DB,
+	0x27180901, 0x4F1B25C9, 0xF71E5091, 0x9F1D7C59, 0x82F8CCD0, 0xEAFBE018, 0x52FE9540, 0x3AFDB988,
+	0xC8358D49, 0xA036A181, 0x1833D4D9, 0x7030F811, 0x6DD54898, 0x05D66450, 0xBDD31108, 0xD5D03DC0,
+	0x8618701A, 0xEE1B5CD2, 0x561E298A, 0x3E1D0542, 0x23F8B5CB, 0x4BFB9903, 0xF3FEEC5B, 0x9BFDC093,
+	0x546E77EF, 0x3C6D5B27, 0x84682E7F, 0xEC6B02B7, 0xF18EB23E, 0x998D9EF6, 0x2188EBAE, 0x498BC766,
+	0x1A438ABC, 0x7240A674, 0xCA45D32C, 0xA246FFE4, 0xBFA34F6D, 0xD7A063A5, 0x6FA516FD, 0x07A63A35,
+	0x8FD9098E, 0xE7DA2546, 0x5FDF501E, 0x37DC7CD6, 0x2A39CC5F, 0x423AE097, 0xFA3F95CF, 0x923CB907,
+	0xC1F4F4DD, 0xA9F7D815, 0x11F2AD4D, 0x79F18185, 0x6414310C, 0x0C171DC4, 0xB412689C, 0xDC114454,
+	0x1382F328, 0x7B81DFE0, 0xC384AAB8, 0xAB878670, 0xB66236F9, 0xDE611A31, 0x66646F69, 0x0E6743A1,
+	0x5DAF0E7B, 0x35AC22B3, 0x8DA957EB, 0xE5AA7B23, 0xF84FCBAA, 0x904CE762, 0x2849923A, 0x404ABEF2,
+	0xB2828A33, 0xDA81A6FB, 0x6284D3A3, 0x0A87FF6B, 0x17624FE2, 0x7F61632A, 0xC7641672, 0xAF673ABA,
+	0xFCAF7760, 0x94AC5BA8, 0x2CA92EF0, 0x44AA0238, 0x594FB2B1, 0x314C9E79, 0x8949EB21, 0xE14AC7E9,
+	0x2ED97095, 0x46DA5C5D, 0xFEDF2905, 0x96DC05CD, 0x8B39B544, 0xE33A998C, 0x5B3FECD4, 0x333CC01C,
+	0x60F48DC6, 0x08F7A10E, 0xB0F2D456, 0xD8F1F89E, 0xC5144817, 0xAD1764DF, 0x15121187, 0x7D113D4F
+};
+
+/*
+ * end of the CRC lookup table crc_tableil8_o80
+ */
+
+
+
+/*
+ * The following CRC lookup table was generated automagically using the
+ * following model parameters:
+ *
+ * Generator Polynomial = ................. 0x1EDC6F41 Generator Polynomial
+ * Length = .......... 32 bits Reflected Bits = ....................... TRUE
+ * Table Generation Offset = .............. 32 bits Number of Slices =
+ * ..................... 8 slices Slice Lengths = ........................ 8
+ * 8 8 8 8 8 8 8 Directory Name = ....................... .\ File Name =
+ * ............................ 8x256_tables.c
+ */
+
+uint32_t sctp_crc_tableil8_o88[256] =
+{
+	0x00000000, 0x493C7D27, 0x9278FA4E, 0xDB448769, 0x211D826D, 0x6821FF4A, 0xB3657823, 0xFA590504,
+	0x423B04DA, 0x0B0779FD, 0xD043FE94, 0x997F83B3, 0x632686B7, 0x2A1AFB90, 0xF15E7CF9, 0xB86201DE,
+	0x847609B4, 0xCD4A7493, 0x160EF3FA, 0x5F328EDD, 0xA56B8BD9, 0xEC57F6FE, 0x37137197, 0x7E2F0CB0,
+	0xC64D0D6E, 0x8F717049, 0x5435F720, 0x1D098A07, 0xE7508F03, 0xAE6CF224, 0x7528754D, 0x3C14086A,
+	0x0D006599, 0x443C18BE, 0x9F789FD7, 0xD644E2F0, 0x2C1DE7F4, 0x65219AD3, 0xBE651DBA, 0xF759609D,
+	0x4F3B6143, 0x06071C64, 0xDD439B0D, 0x947FE62A, 0x6E26E32E, 0x271A9E09, 0xFC5E1960, 0xB5626447,
+	0x89766C2D, 0xC04A110A, 0x1B0E9663, 0x5232EB44, 0xA86BEE40, 0xE1579367, 0x3A13140E, 0x732F6929,
+	0xCB4D68F7, 0x827115D0, 0x593592B9, 0x1009EF9E, 0xEA50EA9A, 0xA36C97BD, 0x782810D4, 0x31146DF3,
+	0x1A00CB32, 0x533CB615, 0x8878317C, 0xC1444C5B, 0x3B1D495F, 0x72213478, 0xA965B311, 0xE059CE36,
+	0x583BCFE8, 0x1107B2CF, 0xCA4335A6, 0x837F4881, 0x79264D85, 0x301A30A2, 0xEB5EB7CB, 0xA262CAEC,
+	0x9E76C286, 0xD74ABFA1, 0x0C0E38C8, 0x453245EF, 0xBF6B40EB, 0xF6573DCC, 0x2D13BAA5, 0x642FC782,
+	0xDC4DC65C, 0x9571BB7B, 0x4E353C12, 0x07094135, 0xFD504431, 0xB46C3916, 0x6F28BE7F, 0x2614C358,
+	0x1700AEAB, 0x5E3CD38C, 0x857854E5, 0xCC4429C2, 0x361D2CC6, 0x7F2151E1, 0xA465D688, 0xED59ABAF,
+	0x553BAA71, 0x1C07D756, 0xC743503F, 0x8E7F2D18, 0x7426281C, 0x3D1A553B, 0xE65ED252, 0xAF62AF75,
+	0x9376A71F, 0xDA4ADA38, 0x010E5D51, 0x48322076, 0xB26B2572, 0xFB575855, 0x2013DF3C, 0x692FA21B,
+	0xD14DA3C5, 0x9871DEE2, 0x4335598B, 0x0A0924AC, 0xF05021A8, 0xB96C5C8F, 0x6228DBE6, 0x2B14A6C1,
+	0x34019664, 0x7D3DEB43, 0xA6796C2A, 0xEF45110D, 0x151C1409, 0x5C20692E, 0x8764EE47, 0xCE589360,
+	0x763A92BE, 0x3F06EF99, 0xE44268F0, 0xAD7E15D7, 0x572710D3, 0x1E1B6DF4, 0xC55FEA9D, 0x8C6397BA,
+	0xB0779FD0, 0xF94BE2F7, 0x220F659E, 0x6B3318B9, 0x916A1DBD, 0xD856609A, 0x0312E7F3, 0x4A2E9AD4,
+	0xF24C9B0A, 0xBB70E62D, 0x60346144, 0x29081C63, 0xD3511967, 0x9A6D6440, 0x4129E329, 0x08159E0E,
+	0x3901F3FD, 0x703D8EDA, 0xAB7909B3, 0xE2457494, 0x181C7190, 0x51200CB7, 0x8A648BDE, 0xC358F6F9,
+	0x7B3AF727, 0x32068A00, 0xE9420D69, 0xA07E704E, 0x5A27754A, 0x131B086D, 0xC85F8F04, 0x8163F223,
+	0xBD77FA49, 0xF44B876E, 0x2F0F0007, 0x66337D20, 0x9C6A7824, 0xD5560503, 0x0E12826A, 0x472EFF4D,
+	0xFF4CFE93, 0xB67083B4, 0x6D3404DD, 0x240879FA, 0xDE517CFE, 0x976D01D9, 0x4C2986B0, 0x0515FB97,
+	0x2E015D56, 0x673D2071, 0xBC79A718, 0xF545DA3F, 0x0F1CDF3B, 0x4620A21C, 0x9D642575, 0xD4585852,
+	0x6C3A598C, 0x250624AB, 0xFE42A3C2, 0xB77EDEE5, 0x4D27DBE1, 0x041BA6C6, 0xDF5F21AF, 0x96635C88,
+	0xAA7754E2, 0xE34B29C5, 0x380FAEAC, 0x7133D38B, 0x8B6AD68F, 0xC256ABA8, 0x19122CC1, 0x502E51E6,
+	0xE84C5038, 0xA1702D1F, 0x7A34AA76, 0x3308D751, 0xC951D255, 0x806DAF72, 0x5B29281B, 0x1215553C,
+	0x230138CF, 0x6A3D45E8, 0xB179C281, 0xF845BFA6, 0x021CBAA2, 0x4B20C785, 0x906440EC, 0xD9583DCB,
+	0x613A3C15, 0x28064132, 0xF342C65B, 0xBA7EBB7C, 0x4027BE78, 0x091BC35F, 0xD25F4436, 0x9B633911,
+	0xA777317B, 0xEE4B4C5C, 0x350FCB35, 0x7C33B612, 0x866AB316, 0xCF56CE31, 0x14124958, 0x5D2E347F,
+	0xE54C35A1, 0xAC704886, 0x7734CFEF, 0x3E08B2C8, 0xC451B7CC, 0x8D6DCAEB, 0x56294D82, 0x1F1530A5
+};
+
+/*
+ * end of the CRC lookup table crc_tableil8_o88
+ */
+
+static uint32_t
+sctp_crc32c_sb8_64_bit(uint32_t crc,
+    unsigned char *p_buf,
+    uint32_t length,
+    uint32_t init_bytes)
+{
+	uint32_t li;
+	uint32_t term1, term2;
+	uint32_t running_length;
+	uint32_t end_bytes;
+
+	running_length = ((length - init_bytes) / 8) * 8;
+	end_bytes = length - init_bytes - running_length;
+
+	for (li = 0; li < init_bytes; li++)
+		crc = sctp_crc_tableil8_o32[(crc ^ *p_buf++) & 0x000000FF] ^
+		    (crc >> 8);
+	for (li = 0; li < running_length / 8; li++) {
+#if BYTE_ORDER == BIG_ENDIAN
+		crc ^= *p_buf++;
+		crc ^= (*p_buf++) << 8;
+		crc ^= (*p_buf++) << 16;
+		crc ^= (*p_buf++) << 24;
+#else
+		crc ^= *(uint32_t *) p_buf;
+		p_buf += 4;
+#endif
+		term1 = sctp_crc_tableil8_o88[crc & 0x000000FF] ^
+		    sctp_crc_tableil8_o80[(crc >> 8) & 0x000000FF];
+		term2 = crc >> 16;
+		crc = term1 ^
+		    sctp_crc_tableil8_o72[term2 & 0x000000FF] ^
+		    sctp_crc_tableil8_o64[(term2 >> 8) & 0x000000FF];
+
+#if BYTE_ORDER == BIG_ENDIAN
+		crc ^= sctp_crc_tableil8_o56[*p_buf++];
+		crc ^= sctp_crc_tableil8_o48[*p_buf++];
+		crc ^= sctp_crc_tableil8_o40[*p_buf++];
+		crc ^= sctp_crc_tableil8_o32[*p_buf++];
+#else
+		term1 = sctp_crc_tableil8_o56[(*(uint32_t *) p_buf) & 0x000000FF] ^
+		    sctp_crc_tableil8_o48[((*(uint32_t *) p_buf) >> 8) & 0x000000FF];
+
+		term2 = (*(uint32_t *) p_buf) >> 16;
+		crc = crc ^
+		    term1 ^
+		    sctp_crc_tableil8_o40[term2 & 0x000000FF] ^
+		    sctp_crc_tableil8_o32[(term2 >> 8) & 0x000000FF];
+		p_buf += 4;
+#endif
+	}
+	for (li = 0; li < end_bytes; li++)
+		crc = sctp_crc_tableil8_o32[(crc ^ *p_buf++) & 0x000000FF] ^
+		    (crc >> 8);
+	return crc;
+}
+
+
+/**
+ *
+ * Routine Description:
+ *
+ * warms the tables
+ *
+ * Arguments:
+ *
+ *		none
+ *
+ * Return value:
+ *
+ *		none
+ */
+uint32_t
+update_crc32(uint32_t crc32c,
+    unsigned char *buffer,
+    unsigned int length)
+{
+	uint32_t offset;
+
+	if (length == 0) {
+		return (crc32c);
+	}
+	offset = ((uintptr_t) buffer) & 0x3;
+	return (sctp_crc32c_sb8_64_bit(crc32c, buffer, length, offset));
+}
+
+uint32_t sctp_crc_c[256] = {
+	0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4,
+	0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
+	0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
+	0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
+	0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B,
+	0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
+	0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54,
+	0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
+	0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
+	0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
+	0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5,
+	0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
+	0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45,
+	0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
+	0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
+	0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
+	0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48,
+	0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
+	0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687,
+	0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
+	0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
+	0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
+	0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8,
+	0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
+	0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096,
+	0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
+	0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
+	0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
+	0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9,
+	0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
+	0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36,
+	0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
+	0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
+	0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
+	0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043,
+	0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
+	0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3,
+	0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
+	0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
+	0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
+	0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652,
+	0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
+	0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D,
+	0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
+	0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
+	0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
+	0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2,
+	0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
+	0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530,
+	0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
+	0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
+	0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
+	0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F,
+	0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
+	0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90,
+	0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
+	0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
+	0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
+	0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321,
+	0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
+	0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81,
+	0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
+	0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
+	0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351,
+};
+
+
+#define SCTP_CRC32C(c,d) (c=(c>>8)^sctp_crc_c[(c^(d))&0xFF])
+
+uint32_t
+old_update_crc32(uint32_t crc32c,
+    unsigned char *buffer,
+    unsigned int length)
+{
+	unsigned int i;
+
+	for (i = 0; i < length; i++) {
+		SCTP_CRC32C(crc32c, buffer[i]);
+	}
+	return (crc32c);
+}
+
+
+uint32_t
+sctp_csum_finalize(uint32_t crc32c)
+{
+	uint32_t result;
+
+#if BYTE_ORDER == BIG_ENDIAN
+	uint8_t byte0, byte1, byte2, byte3;
+
+#endif
+	/* Complement the result */
+	result = ~crc32c;
+#if BYTE_ORDER == BIG_ENDIAN
+	/*
+	 * For BIG-ENDIAN.. aka Motorola byte order the result is in
+	 * little-endian form. So we must manually swap the bytes. Then we
+	 * can call htonl() which does nothing...
+	 */
+	byte0 = result & 0x000000ff;
+	byte1 = (result >> 8) & 0x000000ff;
+	byte2 = (result >> 16) & 0x000000ff;
+	byte3 = (result >> 24) & 0x000000ff;
+	crc32c = ((byte0 << 24) | (byte1 << 16) | (byte2 << 8) | byte3);
+#else
+	/*
+	 * For INTEL platforms the result comes out in network order. No
+	 * htonl is required or the swap above. So we optimize out both the
+	 * htonl and the manual swap above.
+	 */
+	crc32c = result;
+#endif
+	return (crc32c);
+}
+
+#endif
Index: tcp_timer.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_timer.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet/tcp_timer.h -L sys/netinet/tcp_timer.h -u -r1.2 -r1.3
--- sys/netinet/tcp_timer.h
+++ sys/netinet/tcp_timer.h
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp_timer.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/tcp_timer.h,v 1.27.2.1 2006/03/01 21:13:29 andre Exp $
+ * $FreeBSD: src/sys/netinet/tcp_timer.h,v 1.39 2007/09/24 05:26:24 silby Exp $
  */
 
 #ifndef _NETINET_TCP_TIMER_H_
@@ -89,6 +89,8 @@
 #define	TCPTV_INFLIGHT_RTTTHRESH (10*hz/1000)	/* below which inflight
 						   disengages, in msec */
 
+#define TCPTV_FINWAIT2_TIMEOUT (60*hz)         /* FIN_WAIT_2 timeout if no receiver */
+
 /*
  * Minimum retransmit timer is 3 ticks, for algorithmic stability.
  * TCPT_RANGESET() will add another TCPTV_CPU_VAR to deal with
@@ -109,7 +111,7 @@
  * The prior minimum of 1*hz (1 second) badly breaks throughput on any
  * networks faster then a modem that has minor (e.g. 1%) packet loss.
  */
-#define	TCPTV_MIN	( 3 )			/* minimum allowable value */
+#define	TCPTV_MIN	( hz/33 )		/* minimum allowable value */
 #define TCPTV_CPU_VAR	( hz/5 )		/* cpu variance allowed (200ms) */
 #define	TCPTV_REXMTMAX	( 64*hz)		/* max allowable REXMT value */
 
@@ -133,11 +135,25 @@
 	(tv) = (value) + tcp_rexmit_slop; \
 	if ((u_long)(tv) < (u_long)(tvmin)) \
 		(tv) = (tvmin); \
-	else if ((u_long)(tv) > (u_long)(tvmax)) \
+	if ((u_long)(tv) > (u_long)(tvmax)) \
 		(tv) = (tvmax); \
 } while(0)
 
 #ifdef _KERNEL
+
+struct tcp_timer {
+	struct	callout tt_rexmt;	/* retransmit timer */
+	struct	callout tt_persist;	/* retransmit persistence */
+	struct	callout tt_keep;	/* keepalive */
+	struct	callout tt_2msl;	/* 2*msl TIME_WAIT timer */
+	struct	callout tt_delack;	/* delayed ACK timer */
+};
+#define TT_DELACK	0x01
+#define TT_REXMT	0x02
+#define TT_PERSIST	0x04
+#define TT_KEEP		0x08
+#define TT_2MSL		0x10
+
 extern int tcp_keepinit;		/* time to establish connection */
 extern int tcp_keepidle;		/* time before keepalive probes begin */
 extern int tcp_keepintvl;		/* time between keepalive probes */
@@ -150,14 +166,13 @@
 extern int tcp_ttl;			/* time to live for TCP segs */
 extern int tcp_backoff[];
 
-struct tcptw;
+extern int tcp_finwait2_timeout;
+extern int tcp_fast_finwait2_recycle;
 
 void	tcp_timer_init(void);
 void	tcp_timer_2msl(void *xtp);
 struct tcptw *
-	tcp_timer_2msl_tw(int _reuse);		/* XXX temporary */
-void	tcp_timer_2msl_reset(struct tcptw *_tw, int _timeo);
-void	tcp_timer_2msl_stop(struct tcptw *_tw);
+	tcp_tw_2msl_scan(int _reuse);		/* XXX temporary */
 void	tcp_timer_keep(void *xtp);
 void	tcp_timer_persist(void *xtp);
 void	tcp_timer_rexmt(void *xtp);
Index: tcp_output.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_output.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -L sys/netinet/tcp_output.c -L sys/netinet/tcp_output.c -u -r1.3 -r1.4
--- sys/netinet/tcp_output.c
+++ sys/netinet/tcp_output.c
@@ -27,29 +27,28 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp_output.c	8.4 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_output.c,v 1.112 2005/05/21 00:38:29 ps Exp $
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/tcp_output.c,v 1.141.2.3 2007/12/05 10:37:17 bz Exp $");
+
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_mac.h"
 #include "opt_tcpdebug.h"
-#include "opt_tcp_sack.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/domain.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
-#include <sys/mac.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
-#include <sys/syslog.h>
 
 #include <net/route.h>
 
@@ -58,6 +57,7 @@
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
+#include <netinet/ip_options.h>
 #ifdef INET6
 #include <netinet6/in6_pcb.h>
 #include <netinet/ip6.h>
@@ -75,16 +75,13 @@
 #endif
 
 #ifdef IPSEC
-#include <netinet6/ipsec.h>
-#endif /*IPSEC*/
-
-#ifdef FAST_IPSEC
 #include <netipsec/ipsec.h>
-#define	IPSEC
-#endif /*FAST_IPSEC*/
+#endif /*IPSEC*/
 
 #include <machine/in_cksum.h>
 
+#include <security/mac/mac_framework.h>
+
 #ifdef notyet
 extern struct mbuf *m_copypack();
 #endif
@@ -102,8 +99,12 @@
 	&ss_fltsz_local, 1, "Slow start flight size for local networks");
 
 int     tcp_do_newreno = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, newreno, CTLFLAG_RW, &tcp_do_newreno,
-	0, "Enable NewReno Algorithms");
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, newreno, CTLFLAG_RW,
+	&tcp_do_newreno, 0, "Enable NewReno Algorithms");
+
+int	tcp_do_tso = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_RW,
+	&tcp_do_tso, 0, "Enable TCP Segmentation Offload");
 
 int	tcp_do_autosndbuf = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto, CTLFLAG_RW,
@@ -117,6 +118,7 @@
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_max, CTLFLAG_RW,
 	&tcp_autosndbuf_max, 0, "Max size of automatic send buffer");
 
+
 /*
  * Tcp output routine: figure out what should be sent and send it.
  */
@@ -126,19 +128,20 @@
 	struct socket *so = tp->t_inpcb->inp_socket;
 	long len, recwin, sendwin;
 	int off, flags, error;
-#ifdef TCP_SIGNATURE
-	int sigoff = 0;
-#endif
 	struct mbuf *m;
 	struct ip *ip = NULL;
 	struct ipovly *ipov = NULL;
 	struct tcphdr *th;
 	u_char opt[TCP_MAXOLEN];
 	unsigned ipoptlen, optlen, hdrlen;
+#ifdef IPSEC
+	unsigned ipsec_optlen = 0;
+#endif
 	int idle, sendalot;
-	int i, sack_rxmit;
-	int sack_bytes_rxmt;
+	int sack_rxmit, sack_bytes_rxmt;
 	struct sackhole *p;
+	int tso = 0;
+	struct tcpopt to;
 #if 0
 	int maxburst = TCP_MAXBURST;
 #endif
@@ -191,7 +194,8 @@
 	 * snd_nxt.  There may be SACK information that allows us to avoid
 	 * resending already delivered data.  Adjust snd_nxt accordingly.
 	 */
-	if (tp->sack_enable && SEQ_LT(tp->snd_nxt, tp->snd_max))
+	if ((tp->t_flags & TF_SACK_PERMIT) &&
+	    SEQ_LT(tp->snd_nxt, tp->snd_max))
 		tcp_sack_adjust(tp);
 	sendalot = 0;
 	off = tp->snd_nxt - tp->snd_una;
@@ -213,7 +217,7 @@
 	sack_bytes_rxmt = 0;
 	len = 0;
 	p = NULL;
-	if (tp->sack_enable && IN_FASTRECOVERY(tp) &&
+	if ((tp->t_flags & TF_SACK_PERMIT) && IN_FASTRECOVERY(tp) &&
 	    (p = tcp_sack_output(tp, &sack_bytes_rxmt))) {
 		long cwin;
 		
@@ -292,7 +296,7 @@
 				flags &= ~TH_FIN;
 			sendwin = 1;
 		} else {
-			callout_stop(tp->tt_persist);
+			tcp_timer_activate(tp, TT_PERSIST, 0);
 			tp->t_rxtshift = 0;
 		}
 	}
@@ -350,7 +354,8 @@
 	 * know that foreign host supports TAO, suppress sending segment.
 	 */
 	if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
-		flags &= ~TH_SYN;
+		if (tp->t_state != TCPS_SYN_RECEIVED)
+			flags &= ~TH_SYN;
 		off--, len++;
 	}
 
@@ -377,10 +382,10 @@
 		 */
 		len = 0;
 		if (sendwin == 0) {
-			callout_stop(tp->tt_rexmt);
+			tcp_timer_activate(tp, TT_REXMT, 0);
 			tp->t_rxtshift = 0;
 			tp->snd_nxt = tp->snd_una;
-			if (!callout_active(tp->tt_persist))
+			if (!tcp_timer_active(tp, TT_PERSIST))
 				tcp_setpersist(tp);
 		}
 	}
@@ -410,7 +415,16 @@
 	 * growing of the send buffer before it reaches its allowed
 	 * maximum.
 	 *
-	 * Optional: Shrink send buffer during idle periods together
+	 * It scales directly with slow start or congestion window
+	 * and does at most one step per received ACK.  This fast
+	 * scaling has the drawback of growing the send buffer beyond
+	 * what is strictly necessary to make full use of a given
+	 * delay*bandwith product.  However testing has shown this not
+	 * to be much of an problem.  At worst we are trading wasting
+	 * of available bandwith (the non-use of it) for wasting some
+	 * socket buffer memory.
+	 *
+	 * TODO: Shrink send buffer during idle periods together
 	 * with congestion window.  Requires another timer.  Has to
 	 * wait for upcoming tcp timer rewrite.
 	 */
@@ -419,13 +433,6 @@
 		    so->so_snd.sb_cc >= (so->so_snd.sb_hiwat / 8 * 7) &&
 		    so->so_snd.sb_cc < tcp_autosndbuf_max &&
 		    sendwin >= (so->so_snd.sb_cc - (tp->snd_nxt - tp->snd_una))) {
-#if 0
-			log(LOG_DEBUG, "%s: inc sockbuf, old %i, new %i, "
-			    "sb_cc %i, snd_wnd %i, sendwnd %i\n",
-			    __func__, so->so_snd.sb_hiwat,
-			    so->so_snd.sb_hiwat + tcp_autosndbuf_inc,
-			    so->so_snd.sb_cc, (int)tp->snd_wnd, (int)sendwin);
-#endif
 			if (!sbreserve_locked(&so->so_snd,
 			    min(so->so_snd.sb_hiwat + tcp_autosndbuf_inc,
 			     tcp_autosndbuf_max), so, curthread))
@@ -437,10 +444,41 @@
 	 * Truncate to the maximum segment length or enable TCP Segmentation
 	 * Offloading (if supported by hardware) and ensure that FIN is removed
 	 * if the length no longer contains the last data byte.
+	 *
+	 * TSO may only be used if we are in a pure bulk sending state.  The
+	 * presence of TCP-MD5, SACK retransmits, SACK advertizements and
+	 * IP options prevent using TSO.  With TSO the TCP header is the same
+	 * (except for the sequence number) for all generated packets.  This
+	 * makes it impossible to transmit any options which vary per generated
+	 * segment or packet.
+	 *
+	 * The length of TSO bursts is limited to TCP_MAXWIN.  That limit and
+	 * removal of FIN (if not already catched here) are handled later after
+	 * the exact length of the TCP options are known.
+	 */
+#ifdef IPSEC
+	/*
+	 * Pre-calculate here as we save another lookup into the darknesses
+	 * of IPsec that way and can actually decide if TSO is ok.
 	 */
+	ipsec_optlen = ipsec_hdrsiz_tcp(tp);
+#endif
 	if (len > tp->t_maxseg) {
-		len = tp->t_maxseg;
-		sendalot = 1;
+		if ((tp->t_flags & TF_TSO) && tcp_do_tso &&
+		    ((tp->t_flags & TF_SIGNATURE) == 0) &&
+		    tp->rcv_numsacks == 0 && sack_rxmit == 0 &&
+		    tp->t_inpcb->inp_options == NULL &&
+		    tp->t_inpcb->in6p_options == NULL
+#ifdef IPSEC
+		    && ipsec_optlen == 0
+#endif
+		    ) {
+			tso = 1;
+		} else {
+			len = tp->t_maxseg;
+			sendalot = 1;
+			tso = 0;
+		}
 	}
 	if (sack_rxmit) {
 		if (SEQ_LT(p->rxmit + len, tp->snd_una + so->so_snd.sb_cc))
@@ -456,7 +494,7 @@
 	 * Sender silly window avoidance.   We transmit under the following
 	 * conditions when len is non-zero:
 	 *
-	 *	- We have a full segment
+	 *	- We have a full segment (or more with TSO)
 	 *	- This is the last buffer in a write()/send() and we are
 	 *	  either idle or running NODELAY
 	 *	- we've timed out (e.g. persist timer)
@@ -465,7 +503,7 @@
 	 *	- we need to retransmit
 	 */
 	if (len) {
-		if (len == tp->t_maxseg)
+		if (len >= tp->t_maxseg)
 			goto send;
 		/*
 		 * NOTE! on localhost connections an 'ack' from the remote
@@ -497,8 +535,11 @@
 	 * max size segments, or at least 50% of the maximum possible
 	 * window, then want to send a window update to peer.
 	 * Skip this if the connection is in T/TCP half-open state.
+	 * Don't send pure window updates when the peer has closed
+	 * the connection and won't ever send more data.
 	 */
-	if (recwin > 0 && !(tp->t_flags & TF_NEEDSYN)) {
+	if (recwin > 0 && !(tp->t_flags & TF_NEEDSYN) &&
+	    !TCPS_HAVERCVDFIN(tp->t_state)) {
 		/*
 		 * "adv" is the amount we can increase the window,
 		 * taking into account that we are limited by
@@ -536,11 +577,11 @@
 	 * after the retransmission timer has been turned off.  Make sure
 	 * that the retransmission timer is set.
 	 */
-	if (tp->sack_enable && SEQ_GT(tp->snd_max, tp->snd_una) &&
-	    !callout_active(tp->tt_rexmt) &&
-	    !callout_active(tp->tt_persist)) {
-		callout_reset(tp->tt_rexmt, tp->t_rxtcur,
-			      tcp_timer_rexmt, tp);
+	if ((tp->t_flags & TF_SACK_PERMIT) &&
+	    SEQ_GT(tp->snd_max, tp->snd_una) &&
+	    !tcp_timer_active(tp, TT_REXMT) &&
+	    !tcp_timer_active(tp, TT_PERSIST)) {
+		tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
 		goto just_return;
 	} 
 	/*
@@ -551,11 +592,11 @@
 	 *	persisting		to move a small or zero window
 	 *	(re)transmitting	and thereby not persisting
 	 *
-	 * callout_active(tp->tt_persist)
+	 * tcp_timer_active(tp, TT_PERSIST)
 	 *	is true when we are in persist state.
 	 * (tp->t_flags & TF_FORCEDATA)
 	 *	is set when we are called to send a persist packet.
-	 * callout_active(tp->tt_rexmt)
+	 * tcp_timer_active(tp, TT_REXMT)
 	 *	is set when we are retransmitting
 	 * The output side is idle when both timers are zero.
 	 *
@@ -565,8 +606,8 @@
 	 * if window is nonzero, transmit what we can,
 	 * otherwise force out a byte.
 	 */
-	if (so->so_snd.sb_cc && !callout_active(tp->tt_rexmt) &&
-	    !callout_active(tp->tt_persist)) {
+	if (so->so_snd.sb_cc && !tcp_timer_active(tp, TT_REXMT) &&
+	    !tcp_timer_active(tp, TT_PERSIST)) {
 		tp->t_rxtshift = 0;
 		tcp_setpersist(tp);
 	}
@@ -595,157 +636,63 @@
 	else
 #endif
 	hdrlen = sizeof (struct tcpiphdr);
-	if (flags & TH_SYN) {
-		tp->snd_nxt = tp->iss;
-		if ((tp->t_flags & TF_NOOPT) == 0) {
-			u_short mss;
-
-			opt[0] = TCPOPT_MAXSEG;
-			opt[1] = TCPOLEN_MAXSEG;
-			mss = htons((u_short) tcp_mssopt(&tp->t_inpcb->inp_inc));
-			(void)memcpy(opt + 2, &mss, sizeof(mss));
-			optlen = TCPOLEN_MAXSEG;
-
-			if ((tp->t_flags & TF_REQ_SCALE) &&
-			    ((flags & TH_ACK) == 0 ||
-			    (tp->t_flags & TF_RCVD_SCALE))) {
-				*((u_int32_t *)(opt + optlen)) = htonl(
-					TCPOPT_NOP << 24 |
-					TCPOPT_WINDOW << 16 |
-					TCPOLEN_WINDOW << 8 |
-					tp->request_r_scale);
-				optlen += 4;
-			}
-		}
-	}
 
 	/*
-	 * Send a timestamp and echo-reply if this is a SYN and our side
-	 * wants to use timestamps (TF_REQ_TSTMP is set) or both our side
-	 * and our peer have sent timestamps in our SYN's.
-	 */
-	if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
-	    (flags & TH_RST) == 0 &&
-	    ((flags & TH_ACK) == 0 ||
-	     (tp->t_flags & TF_RCVD_TSTMP))) {
-		u_int32_t *lp = (u_int32_t *)(opt + optlen);
-
-		/* Form timestamp option as shown in appendix A of RFC 1323. */
-		*lp++ = htonl(TCPOPT_TSTAMP_HDR);
-		*lp++ = htonl(ticks);
-		*lp   = htonl(tp->ts_recent);
-		optlen += TCPOLEN_TSTAMP_APPA;
-	}
-
-	/* Set receive buffer autosizing timestamp. */
-	if (tp->rfbuf_ts == 0 && (so->so_rcv.sb_flags & SB_AUTOSIZE))
-		tp->rfbuf_ts = ticks;
-
-#ifdef TCP_SIGNATURE
-#ifdef INET6
-	if (!isipv6)
-#endif
-	if (tp->t_flags & TF_SIGNATURE) {
-		int i;
-		u_char *bp;
-
-		/* Initialize TCP-MD5 option (RFC2385) */
-		bp = (u_char *)opt + optlen;
-		*bp++ = TCPOPT_SIGNATURE;
-		*bp++ = TCPOLEN_SIGNATURE;
-		sigoff = optlen + 2;
-		for (i = 0; i < TCP_SIGLEN; i++)
-			*bp++ = 0;
-		optlen += TCPOLEN_SIGNATURE;
-	}
-#endif /* TCP_SIGNATURE */
-
-	if (tp->sack_enable && ((tp->t_flags & TF_NOOPT) == 0)) {
-		/* 
-		 * Tack on the SACK permitted option *last*.
-		 * And do padding of options after tacking this on.
-		 * This is because of MSS, TS, WinScale and Signatures are
-		 * all present, we have just 2 bytes left for the SACK
-		 * permitted option, which is just enough.
-		 */
-		/*
-		 * If this is the first SYN of connection (not a SYN
-		 * ACK), include SACK permitted option.  If this is a
-		 * SYN ACK, include SACK permitted option if peer has
-		 * already done so. This is only for active connect,
-		 * since the syncache takes care of the passive connect.
-		 */
-		if ((flags & TH_SYN) &&
-		    (!(flags & TH_ACK) || (tp->t_flags & TF_SACK_PERMIT))) {
-			u_char *bp;
-			bp = (u_char *)opt + optlen;
-
-			*bp++ = TCPOPT_SACK_PERMITTED;
-			*bp++ = TCPOLEN_SACK_PERMITTED;
-			optlen += TCPOLEN_SACK_PERMITTED;
+	 * Compute options for segment.
+	 * We only have to care about SYN and established connection
+	 * segments.  Options for SYN-ACK segments are handled in TCP
+	 * syncache.
+	 */
+	if ((tp->t_flags & TF_NOOPT) == 0) {
+		to.to_flags = 0;
+		/* Maximum segment size. */
+		if (flags & TH_SYN) {
+			tp->snd_nxt = tp->iss;
+			to.to_mss = tcp_mssopt(&tp->t_inpcb->inp_inc);
+			to.to_flags |= TOF_MSS;
+		}
+		/* Window scaling. */
+		if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) {
+			to.to_wscale = tp->request_r_scale;
+			to.to_flags |= TOF_SCALE;
+		}
+		/* Timestamps. */
+		if ((tp->t_flags & TF_RCVD_TSTMP) ||
+		    ((flags & TH_SYN) && (tp->t_flags & TF_REQ_TSTMP))) {
+			to.to_tsval = ticks + tp->ts_offset;
+			to.to_tsecr = tp->ts_recent;
+			to.to_flags |= TOF_TS;
+			/* Set receive buffer autosizing timestamp. */
+			if (tp->rfbuf_ts == 0 &&
+			    (so->so_rcv.sb_flags & SB_AUTOSIZE))
+				tp->rfbuf_ts = ticks;
 		}
-
-		/*
-		 * Send SACKs if necessary.  This should be the last
-		 * option processed.  Only as many SACKs are sent as
-		 * are permitted by the maximum options size.
-		 *
-		 * In general, SACK blocks consume 8*n+2 bytes.
-		 * So a full size SACK blocks option is 34 bytes
-		 * (to generate 4 SACK blocks).  At a minimum,
-		 * we need 10 bytes (to generate 1 SACK block).
-		 * If TCP Timestamps (12 bytes) and TCP Signatures
-		 * (18 bytes) are both present, we'll just have
-		 * 10 bytes for SACK options 40 - (12 + 18).
-		 */
-		if (TCPS_HAVEESTABLISHED(tp->t_state) &&
-		    (tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0 &&
-		    MAX_TCPOPTLEN - optlen - 2 >= TCPOLEN_SACK) {
-			int nsack, sackoptlen, padlen;
-			u_char *bp = (u_char *)opt + optlen;
-			u_int32_t *lp;
-
-			nsack = (MAX_TCPOPTLEN - optlen - 2) / TCPOLEN_SACK;
-			nsack = min(nsack, tp->rcv_numsacks);
-			sackoptlen = (2 + nsack * TCPOLEN_SACK);
-
-			/*
-			 * First we need to pad options so that the
-			 * SACK blocks can start at a 4-byte boundary
-			 * (sack option and length are at a 2 byte offset).
-			 */
-			padlen = (MAX_TCPOPTLEN - optlen - sackoptlen) % 4;
-			optlen += padlen;
-			while (padlen-- > 0)
-				*bp++ = TCPOPT_NOP;
-
-			tcpstat.tcps_sack_send_blocks++;
-			*bp++ = TCPOPT_SACK;
-			*bp++ = sackoptlen;
-			lp = (u_int32_t *)bp;
-			for (i = 0; i < nsack; i++) {
-				struct sackblk sack = tp->sackblks[i];
-				*lp++ = htonl(sack.start);
-				*lp++ = htonl(sack.end);
+		/* Selective ACK's. */
+		if (tp->t_flags & TF_SACK_PERMIT) {
+			if (flags & TH_SYN)
+				to.to_flags |= TOF_SACKPERM;
+			else if (TCPS_HAVEESTABLISHED(tp->t_state) &&
+			    (tp->t_flags & TF_SACK_PERMIT) &&
+			    tp->rcv_numsacks > 0) {
+				to.to_flags |= TOF_SACK;
+				to.to_nsacks = tp->rcv_numsacks;
+				to.to_sacks = (u_char *)tp->sackblks;
 			}
-			optlen += sackoptlen;
 		}
-	}
-
-	/* Pad TCP options to a 4 byte boundary */
-	if (optlen < MAX_TCPOPTLEN && (optlen % sizeof(u_int32_t))) {
-		int pad = sizeof(u_int32_t) - (optlen % sizeof(u_int32_t));
-		u_char *bp = (u_char *)opt + optlen;
+#ifdef TCP_SIGNATURE
+		/* TCP-MD5 (RFC2385). */
+#ifdef INET6
+		if (!isipv6 && (tp->t_flags & TF_SIGNATURE))
+#else
+		if (tp->t_flags & TF_SIGNATURE)
+#endif /* INET6 */
+			to.to_flags |= TOF_SIGNATURE;
+#endif /* TCP_SIGNATURE */
 
-		optlen += pad;
-		while (pad) {
-			*bp++ = TCPOPT_EOL;
-			pad--;
-		}
+		/* Processing the options. */
+		hdrlen += optlen = tcp_addoptions(&to, opt);
 	}
 
-	hdrlen += optlen;
-
 #ifdef INET6
 	if (isipv6)
 		ipoptlen = ip6_optlen(tp->t_inpcb);
@@ -757,7 +704,7 @@
 	else
 		ipoptlen = 0;
 #ifdef IPSEC
-	ipoptlen += ipsec_hdrsiz_tcp(tp);
+	ipoptlen += ipsec_optlen;
 #endif
 
 	/*
@@ -765,14 +712,27 @@
 	 * bump the packet length beyond the t_maxopd length.
 	 * Clear the FIN bit because we cut off the tail of
 	 * the segment.
+	 *
+	 * When doing TSO limit a burst to TCP_MAXWIN minus the
+	 * IP, TCP and Options length to keep ip->ip_len from
+	 * overflowing.  Prevent the last segment from being
+	 * fractional thus making them all equal sized and set
+	 * the flag to continue sending.  TSO is disabled when
+	 * IP options or IPSEC are present.
 	 */
 	if (len + optlen + ipoptlen > tp->t_maxopd) {
-		/*
-		 * If there is still more to send, don't close the connection.
-		 */
 		flags &= ~TH_FIN;
-		len = tp->t_maxopd - optlen - ipoptlen;
-		sendalot = 1;
+		if (tso) {
+			if (len > TCP_MAXWIN - hdrlen - optlen) {
+				len = TCP_MAXWIN - hdrlen - optlen;
+				len = len - (len % (tp->t_maxopd - optlen));
+				sendalot = 1;
+			} else if (tp->t_flags & TF_NEEDFIN)
+				sendalot = 1;
+		} else {
+			len = tp->t_maxopd - optlen - ipoptlen;
+			sendalot = 1;
+		}
 	}
 
 /*#ifdef DIAGNOSTIC*/
@@ -790,9 +750,12 @@
 	 * the template for sends on this connection.
 	 */
 	if (len) {
+		struct mbuf *mb;
+		u_int moff;
+
 		if ((tp->t_flags & TF_FORCEDATA) && len == 1)
 			tcpstat.tcps_sndprobe++;
-		else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
+		else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) {
 			tcpstat.tcps_sndrexmitpack++;
 			tcpstat.tcps_sndrexmitbyte += len;
 		} else {
@@ -812,7 +775,7 @@
 		m->m_len += hdrlen;
 		m->m_data -= hdrlen;
 #else
-		MGETHDR(m, M_DONTWAIT, MT_HEADER);
+		MGETHDR(m, M_DONTWAIT, MT_DATA);
 		if (m == NULL) {
 			SOCKBUF_UNLOCK(&so->so_snd);
 			error = ENOBUFS;
@@ -831,13 +794,20 @@
 #endif
 		m->m_data += max_linkhdr;
 		m->m_len = hdrlen;
+
+		/*
+		 * Start the m_copy functions from the closest mbuf
+		 * to the offset in the socket buffer chain.
+		 */
+		mb = sbsndptr(&so->so_snd, off, len, &moff);
+
 		if (len <= MHLEN - hdrlen - max_linkhdr) {
-			m_copydata(so->so_snd.sb_mb, off, (int) len,
+			m_copydata(mb, moff, (int)len,
 			    mtod(m, caddr_t) + hdrlen);
 			m->m_len += len;
 		} else {
-			m->m_next = m_copy(so->so_snd.sb_mb, off, (int) len);
-			if (m->m_next == 0) {
+			m->m_next = m_copy(mb, moff, (int)len);
+			if (m->m_next == NULL) {
 				SOCKBUF_UNLOCK(&so->so_snd);
 				(void) m_free(m);
 				error = ENOBUFS;
@@ -865,7 +835,7 @@
 		else
 			tcpstat.tcps_sndwinup++;
 
-		MGETHDR(m, M_DONTWAIT, MT_HEADER);
+		MGETHDR(m, M_DONTWAIT, MT_DATA);
 		if (m == NULL) {
 			error = ENOBUFS;
 			goto out;
@@ -920,8 +890,8 @@
 	 * (retransmit and persist are mutually exclusive...)
 	 */
 	if (sack_rxmit == 0) {
-		if (len || (flags & (TH_SYN|TH_FIN))
-		    || callout_active(tp->tt_persist))
+		if (len || (flags & (TH_SYN|TH_FIN)) ||
+		    tcp_timer_active(tp, TT_PERSIST))
 			th->th_seq = htonl(tp->snd_nxt);
 		else
 			th->th_seq = htonl(tp->snd_max);
@@ -947,8 +917,17 @@
 		recwin = (long)(tp->rcv_adv - tp->rcv_nxt);
 	if (recwin > (long)TCP_MAXWIN << tp->rcv_scale)
 		recwin = (long)TCP_MAXWIN << tp->rcv_scale;
-	th->th_win = htons((u_short) (recwin >> tp->rcv_scale));
 
+	/*
+	 * According to RFC1323 the window field in a SYN (i.e., a <SYN>
+	 * or <SYN,ACK>) segment itself is never scaled.  The <SYN,ACK>
+	 * case is handled in syncache.
+	 */
+	if (flags & TH_SYN)
+		th->th_win = htons((u_short)
+				(min(sbspace(&so->so_rcv), TCP_MAXWIN)));
+	else
+		th->th_win = htons((u_short)(recwin >> tp->rcv_scale));
 
 	/*
 	 * Adjust the RXWIN0SENT flag - indicate that we have advertised
@@ -978,9 +957,11 @@
 #ifdef INET6
 	if (!isipv6)
 #endif
-	if (tp->t_flags & TF_SIGNATURE)
+	if (tp->t_flags & TF_SIGNATURE) {
+		int sigoff = to.to_signature - opt;
 		tcp_signature_compute(m, sizeof(struct ip), len, optlen,
 		    (u_char *)(th + 1) + sigoff, IPSEC_DIR_OUTBOUND);
+	}
 #endif
 
 	/*
@@ -1010,11 +991,21 @@
 	}
 
 	/*
+	 * Enable TSO and specify the size of the segments.
+	 * The TCP pseudo header checksum is always provided.
+	 * XXX: Fixme: This is currently not the case for IPv6.
+	 */
+	if (tso) {
+		m->m_pkthdr.csum_flags = CSUM_TSO;
+		m->m_pkthdr.tso_segsz = tp->t_maxopd - optlen;
+	}
+
+	/*
 	 * In transmit state, time the transmission and arrange for
 	 * the retransmit.  In persist state, just set snd_max.
 	 */
 	if ((tp->t_flags & TF_FORCEDATA) == 0 || 
-	    !callout_active(tp->tt_persist)) {
+	    !tcp_timer_active(tp, TT_PERSIST)) {
 		tcp_seq startseq = tp->snd_nxt;
 
 		/*
@@ -1053,15 +1044,14 @@
 		 * of retransmit time.
 		 */
 timer:
-		if (!callout_active(tp->tt_rexmt) &&
+		if (!tcp_timer_active(tp, TT_REXMT) &&
 		    ((sack_rxmit && tp->snd_nxt != tp->snd_max) ||
 		     (tp->snd_nxt != tp->snd_una))) {
-			if (callout_active(tp->tt_persist)) {
-				callout_stop(tp->tt_persist);
+			if (tcp_timer_active(tp, TT_PERSIST)) {
+				tcp_timer_activate(tp, TT_PERSIST, 0);
 				tp->t_rxtshift = 0;
 			}
-			callout_reset(tp->tt_rexmt, tp->t_rxtcur,
-				      tcp_timer_rexmt, tp);
+			tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
 		}
 	} else {
 		/*
@@ -1151,51 +1141,72 @@
 		/*
 		 * We know that the packet was lost, so back out the
 		 * sequence number advance, if any.
+		 *
+		 * If the error is EPERM the packet got blocked by the
+		 * local firewall.  Normally we should terminate the
+		 * connection but the blocking may have been spurious
+		 * due to a firewall reconfiguration cycle.  So we treat
+		 * it like a packet loss and let the retransmit timer and
+		 * timeouts do their work over time.
+		 * XXX: It is a POLA question whether calling tcp_drop right
+		 * away would be the really correct behavior instead.
 		 */
-		if ((tp->t_flags & TF_FORCEDATA) == 0 || 
-		    !callout_active(tp->tt_persist)) {
-			/*
-			 * No need to check for TH_FIN here because
-			 * the TF_SENTFIN flag handles that case.
-			 */
-			if ((flags & TH_SYN) == 0) {
-				if (sack_rxmit) {
-					p->rxmit -= len;
-					tp->sackhint.sack_bytes_rexmit -= len;
-					KASSERT(tp->sackhint.sack_bytes_rexmit
-						>= 0,
-						("sackhint bytes rtx >= 0"));
-				} else
-					tp->snd_nxt -= len;
-			}
+		if (((tp->t_flags & TF_FORCEDATA) == 0 ||
+		    !tcp_timer_active(tp, TT_PERSIST)) &&
+		    ((flags & TH_SYN) == 0) &&
+		    (error != EPERM)) {
+			if (sack_rxmit) {
+				p->rxmit -= len;
+				tp->sackhint.sack_bytes_rexmit -= len;
+				KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
+				    ("sackhint bytes rtx >= 0"));
+			} else
+				tp->snd_nxt -= len;
 		}
-
 out:
 		SOCKBUF_UNLOCK_ASSERT(&so->so_snd);	/* Check gotos. */
-		if (error == ENOBUFS) {
-	                if (!callout_active(tp->tt_rexmt) &&
-			    !callout_active(tp->tt_persist))
-	                        callout_reset(tp->tt_rexmt, tp->t_rxtcur,
-				    tcp_timer_rexmt, tp);
+		switch (error) {
+		case EPERM:
+			tp->t_softerror = error;
+			return (error);
+		case ENOBUFS:
+	                if (!tcp_timer_active(tp, TT_REXMT) &&
+			    !tcp_timer_active(tp, TT_PERSIST))
+	                        tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
 			tp->snd_cwnd = tp->t_maxseg;
 			return (0);
-		}
-		if (error == EMSGSIZE) {
+		case EMSGSIZE:
 			/*
-			 * ip_output() will have already fixed the route
-			 * for us.  tcp_mtudisc() will, as its last action,
-			 * initiate retransmission, so it is important to
-			 * not do so here.
+			 * For some reason the interface we used initially
+			 * to send segments changed to another or lowered
+			 * its MTU.
+			 *
+			 * tcp_mtudisc() will find out the new MTU and as
+			 * its last action, initiate retransmission, so it
+			 * is important to not do so here.
+			 *
+			 * If TSO was active we either got an interface
+			 * without TSO capabilits or TSO was turned off.
+			 * Disable it for this connection as too and
+			 * immediatly retry with MSS sized segments generated
+			 * by this function.
 			 */
+			if (tso)
+				tp->t_flags &= ~TF_TSO;
 			tcp_mtudisc(tp->t_inpcb, 0);
-			return 0;
-		}
-		if ((error == EHOSTUNREACH || error == ENETDOWN)
-		    && TCPS_HAVERCVDSYN(tp->t_state)) {
-			tp->t_softerror = error;
 			return (0);
+		case EHOSTDOWN:
+		case EHOSTUNREACH:
+		case ENETDOWN:
+		case ENETUNREACH:
+			if (TCPS_HAVERCVDSYN(tp->t_state)) {
+				tp->t_softerror = error;
+				return (0);
+			}
+			/* FALLTHROUGH */
+		default:
+			return (error);
 		}
-		return (error);
 	}
 	tcpstat.tcps_sndtotal++;
 
@@ -1209,8 +1220,8 @@
 		tp->rcv_adv = tp->rcv_nxt + recwin;
 	tp->last_ack_sent = tp->rcv_nxt;
 	tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
-	if (callout_active(tp->tt_delack))
-		callout_stop(tp->tt_delack);
+	if (tcp_timer_active(tp, TT_DELACK))
+		tcp_timer_activate(tp, TT_DELACK, 0);
 #if 0
 	/*
 	 * This completely breaks TCP if newreno is turned on.  What happens
@@ -1227,20 +1238,159 @@
 }
 
 void
-tcp_setpersist(tp)
-	register struct tcpcb *tp;
+tcp_setpersist(struct tcpcb *tp)
 {
 	int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
 	int tt;
 
-	if (callout_active(tp->tt_rexmt))
+	if (tcp_timer_active(tp, TT_REXMT))
 		panic("tcp_setpersist: retransmit pending");
 	/*
 	 * Start/restart persistance timer.
 	 */
 	TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift],
 		      TCPTV_PERSMIN, TCPTV_PERSMAX);
-	callout_reset(tp->tt_persist, tt, tcp_timer_persist, tp);
+	tcp_timer_activate(tp, TT_PERSIST, tt);
 	if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
 		tp->t_rxtshift++;
 }
+
+/*
+ * Insert TCP options according to the supplied parameters to the place
+ * optp in a consistent way.  Can handle unaligned destinations.
+ *
+ * The order of the option processing is crucial for optimal packing and
+ * alignment for the scarce option space.
+ *
+ * The optimal order for a SYN/SYN-ACK segment is:
+ *   MSS (4) + NOP (1) + Window scale (3) + SACK permitted (2) +
+ *   Timestamp (10) + Signature (18) = 38 bytes out of a maximum of 40.
+ *
+ * The SACK options should be last.  SACK blocks consume 8*n+2 bytes.
+ * So a full size SACK blocks option is 34 bytes (with 4 SACK blocks).
+ * At minimum we need 10 bytes (to generate 1 SACK block).  If both
+ * TCP Timestamps (12 bytes) and TCP Signatures (18 bytes) are present,
+ * we only have 10 bytes for SACK options (40 - (12 + 18)).
+ */
+int
+tcp_addoptions(struct tcpopt *to, u_char *optp)
+{
+	u_int mask, optlen = 0;
+
+	for (mask = 1; mask < TOF_MAXOPT; mask <<= 1) {
+		if ((to->to_flags & mask) != mask)
+			continue;
+		switch (to->to_flags & mask) {
+		case TOF_MSS:
+			while (optlen % 4) {
+				optlen += TCPOLEN_NOP;
+				*optp++ = TCPOPT_NOP;
+			}
+			optlen += TCPOLEN_MAXSEG;
+			*optp++ = TCPOPT_MAXSEG;
+			*optp++ = TCPOLEN_MAXSEG;
+			to->to_mss = htons(to->to_mss);
+			bcopy((u_char *)&to->to_mss, optp, sizeof(to->to_mss));
+			optp += sizeof(to->to_mss);
+			break;
+		case TOF_SCALE:
+			while (!optlen || optlen % 2 != 1) {
+				optlen += TCPOLEN_NOP;
+				*optp++ = TCPOPT_NOP;
+			}
+			optlen += TCPOLEN_WINDOW;
+			*optp++ = TCPOPT_WINDOW;
+			*optp++ = TCPOLEN_WINDOW;
+			*optp++ = to->to_wscale;
+			break;
+		case TOF_SACKPERM:
+			while (optlen % 2) {
+				optlen += TCPOLEN_NOP;
+				*optp++ = TCPOPT_NOP;
+			}
+			optlen += TCPOLEN_SACK_PERMITTED;
+			*optp++ = TCPOPT_SACK_PERMITTED;
+			*optp++ = TCPOLEN_SACK_PERMITTED;
+			break;
+		case TOF_TS:
+			while (!optlen || optlen % 4 != 2) {
+				optlen += TCPOLEN_NOP;
+				*optp++ = TCPOPT_NOP;
+			}
+			optlen += TCPOLEN_TIMESTAMP;
+			*optp++ = TCPOPT_TIMESTAMP;
+			*optp++ = TCPOLEN_TIMESTAMP;
+			to->to_tsval = htonl(to->to_tsval);
+			to->to_tsecr = htonl(to->to_tsecr);
+			bcopy((u_char *)&to->to_tsval, optp, sizeof(to->to_tsval));
+			optp += sizeof(to->to_tsval);
+			bcopy((u_char *)&to->to_tsecr, optp, sizeof(to->to_tsecr));
+			optp += sizeof(to->to_tsecr);
+			break;
+		case TOF_SIGNATURE:
+			{
+			int siglen = TCPOLEN_SIGNATURE - 2;
+
+			while (!optlen || optlen % 4 != 2) {
+				optlen += TCPOLEN_NOP;
+				*optp++ = TCPOPT_NOP;
+			}
+			if (TCP_MAXOLEN - optlen < TCPOLEN_SIGNATURE)
+				continue;
+			optlen += TCPOLEN_SIGNATURE;
+			*optp++ = TCPOPT_SIGNATURE;
+			*optp++ = TCPOLEN_SIGNATURE;
+			to->to_signature = optp;
+			while (siglen--)
+				 *optp++ = 0;
+			break;
+			}
+		case TOF_SACK:
+			{
+			int sackblks = 0;
+			struct sackblk *sack = (struct sackblk *)to->to_sacks;
+			tcp_seq sack_seq;
+
+			while (!optlen || optlen % 4 != 2) {
+				optlen += TCPOLEN_NOP;
+				*optp++ = TCPOPT_NOP;
+			}
+			if (TCP_MAXOLEN - optlen < 2 + TCPOLEN_SACK)
+				continue;
+			optlen += TCPOLEN_SACKHDR;
+			*optp++ = TCPOPT_SACK;
+			sackblks = min(to->to_nsacks,
+					(TCP_MAXOLEN - optlen) / TCPOLEN_SACK);
+			*optp++ = TCPOLEN_SACKHDR + sackblks * TCPOLEN_SACK;
+			while (sackblks--) {
+				sack_seq = htonl(sack->start);
+				bcopy((u_char *)&sack_seq, optp, sizeof(sack_seq));
+				optp += sizeof(sack_seq);
+				sack_seq = htonl(sack->end);
+				bcopy((u_char *)&sack_seq, optp, sizeof(sack_seq));
+				optp += sizeof(sack_seq);
+				optlen += TCPOLEN_SACK;
+				sack++;
+			}
+			tcpstat.tcps_sack_send_blocks++;
+			break;
+			}
+		default:
+			panic("%s: unknown TCP option type", __func__);
+			break;
+		}
+	}
+
+	/* Terminate and pad TCP options to a 4 byte boundary. */
+	if (optlen % 4) {
+		optlen += TCPOLEN_EOL;
+		*optp++ = TCPOPT_EOL;
+	}
+	while (optlen % 4) {
+		optlen += TCPOLEN_NOP;
+		*optp++ = TCPOPT_NOP;
+	}
+
+	KASSERT(optlen <= TCP_MAXOLEN, ("%s: TCP options too long", __func__));
+	return (optlen);
+}
Index: in_var.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/in_var.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/in_var.h -L sys/netinet/in_var.h -u -r1.1.1.1 -r1.2
--- sys/netinet/in_var.h
+++ sys/netinet/in_var.h
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)in_var.h	8.2 (Berkeley) 1/9/95
- * $FreeBSD: src/sys/netinet/in_var.h,v 1.53.2.2 2005/08/24 17:30:44 rwatson Exp $
+ * $FreeBSD: src/sys/netinet/in_var.h,v 1.61 2007/06/12 16:24:53 bms Exp $
  */
 
 #ifndef _NETINET_IN_VAR_H_
@@ -94,6 +94,19 @@
 #define INADDR_HASH(x) \
 	(&in_ifaddrhashtbl[INADDR_HASHVAL(x) & in_ifaddrhmask])
 
+/*
+ * Macro for finding the internet address structure (in_ifaddr)
+ * corresponding to one of our IP addresses (in_addr).
+ */
+#define INADDR_TO_IFADDR(addr, ia) \
+	/* struct in_addr addr; */ \
+	/* struct in_ifaddr *ia; */ \
+do { \
+\
+	LIST_FOREACH(ia, INADDR_HASH((addr).s_addr), ia_hash) \
+		if (IA_SIN(ia)->sin_addr.s_addr == (addr).s_addr) \
+			break; \
+} while (0)
 
 /*
  * Macro for finding the interface (ifnet structure) corresponding to one
@@ -105,9 +118,7 @@
 { \
 	struct in_ifaddr *ia; \
 \
-	LIST_FOREACH(ia, INADDR_HASH((addr).s_addr), ia_hash) \
-		if (IA_SIN(ia)->sin_addr.s_addr == (addr).s_addr) \
-			break; \
+	INADDR_TO_IFADDR(addr, ia); \
 	(ifp) = (ia == NULL) ? NULL : ia->ia_ifp; \
 }
 
@@ -136,6 +147,12 @@
 	int    rti_type; /* type of router which is querier on this interface */
 	int    rti_time; /* # of slow timeouts since last old query */
 	SLIST_ENTRY(router_info) rti_list;
+#ifdef notyet
+	int	rti_timev1;	/* IGMPv1 querier present */
+	int	rti_timev2;	/* IGMPv2 querier present */
+	int	rti_timer;	/* report to general query */
+	int	rti_qrv;	/* querier robustness */
+#endif
 };
 
 /*
@@ -154,8 +171,45 @@
 	u_int	inm_timer;		/* IGMP membership report timer */
 	u_int	inm_state;		/*  state of the membership */
 	struct	router_info *inm_rti;	/* router info*/
+	u_int	inm_refcount;		/* reference count */
+#ifdef notyet		/* IGMPv3 source-specific multicast fields */
+	TAILQ_HEAD(, in_msfentry) inm_msf;	/* all active source filters */
+	TAILQ_HEAD(, in_msfentry) inm_msf_record;	/* recorded sources */
+	TAILQ_HEAD(, in_msfentry) inm_msf_exclude;	/* exclude sources */
+	TAILQ_HEAD(, in_msfentry) inm_msf_include;	/* include sources */
+	/* XXX: should this lot go to the router_info structure? */
+	/* XXX: can/should these be callouts? */
+	/* IGMP protocol timers */
+	int32_t		inm_ti_curstate;	/* current state timer */
+	int32_t		inm_ti_statechg;	/* state change timer */
+	/* IGMP report timers */
+	uint16_t	inm_rpt_statechg;	/* state change report timer */
+	uint16_t	inm_rpt_toxx;		/* fmode change report timer */
+	/* IGMP protocol state */
+	uint16_t	inm_fmode;		/* filter mode */
+	uint32_t	inm_recsrc_count;	/* # of recorded sources */
+	uint16_t	inm_exclude_sock_count;	/* # of exclude-mode sockets */
+	uint16_t	inm_gass_count;		/* # of g-a-s queries */
+#endif
 };
 
+#ifdef notyet
+/*
+ * Internet multicast source filter list. This list is used to store
+ * IP multicast source addresses for each membership on an interface.
+ * TODO: Allocate these structures using UMA.
+ * TODO: Find an easier way of linking the struct into two lists at once.
+ */
+struct in_msfentry {
+	TAILQ_ENTRY(in_msfentry) isf_link;	/* next filter in all-list */
+	TAILQ_ENTRY(in_msfentry) isf_next;	/* next filter in queue */
+	struct in_addr	isf_addr;	/* the address of this source */
+	uint16_t	isf_refcount;	/* reference count */
+	uint16_t	isf_reporttag;	/* what to report to the IGMP router */
+	uint16_t	isf_rexmit;	/* retransmission state/count */
+};
+#endif
+
 #ifdef _KERNEL
 
 #ifdef SYSCTL_DECL
@@ -234,15 +288,22 @@
 } while(0)
 
 struct	route;
+struct	ip_moptions;
+
+size_t	imo_match_group(struct ip_moptions *, struct ifnet *,
+	    struct sockaddr *);
+struct	in_msource *imo_match_source(struct ip_moptions *, size_t,
+	    struct sockaddr *);
 struct	in_multi *in_addmulti(struct in_addr *, struct ifnet *);
 void	in_delmulti(struct in_multi *);
+void	in_delmulti_locked(struct in_multi *);
 int	in_control(struct socket *, u_long, caddr_t, struct ifnet *,
 	    struct thread *);
 void	in_rtqdrain(void);
 void	ip_input(struct mbuf *);
 int	in_ifadown(struct ifaddr *ifa, int);
 void	in_ifscrub(struct ifnet *, struct in_ifaddr *);
-int	ip_fastforward(struct mbuf *);
+struct	mbuf	*ip_fastforward(struct mbuf *);
 
 #endif /* _KERNEL */
 
Index: ip_fw2.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_fw2.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet/ip_fw2.c -L sys/netinet/ip_fw2.c -u -r1.2 -r1.3
--- sys/netinet/ip_fw2.c
+++ sys/netinet/ip_fw2.c
@@ -21,10 +21,11 @@
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/netinet/ip_fw2.c,v 1.106.2.12 2006/03/09 13:42:44 glebius Exp $
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_fw2.c,v 1.175.4.1 2008/01/28 17:44:30 rwatson Exp $");
+
 #define        DEB(x)
 #define        DDB(x) x
 
@@ -34,7 +35,7 @@
 
 #if !defined(KLD_MODULE)
 #include "opt_ipfw.h"
-#include "opt_ip6fw.h"
+#include "opt_ipdivert.h"
 #include "opt_ipdn.h"
 #include "opt_inet.h"
 #ifndef INET
@@ -43,16 +44,21 @@
 #endif
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
+#include "opt_mac.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/condvar.h>
+#include <sys/eventhandler.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/kernel.h>
+#include <sys/lock.h>
 #include <sys/jail.h>
 #include <sys/module.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
+#include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
@@ -61,6 +67,10 @@
 #include <net/if.h>
 #include <net/radix.h>
 #include <net/route.h>
+#include <net/pf_mtag.h>
+
+#define	IPFW_INTERNAL	/* Access to protected data structures in ip_fw.h. */
+
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
@@ -71,21 +81,23 @@
 #include <netinet/ip_fw.h>
 #include <netinet/ip_divert.h>
 #include <netinet/ip_dummynet.h>
+#include <netinet/ip_carp.h>
+#include <netinet/pim.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcpip.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
-
+#include <netinet/sctp.h>
+#ifdef IPFIREWALL_NAT
+#include <netinet/libalias/alias.h>
+#include <netinet/libalias/alias_local.h>
+#endif
 #include <netgraph/ng_ipfw.h>
 
 #include <altq/if_altq.h>
 
-#ifdef IPSEC
-#include <netinet6/ipsec.h>
-#endif
-
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #ifdef INET6
@@ -96,6 +108,8 @@
 
 #include <machine/in_cksum.h>	/* XXX for in_cksum */
 
+#include <security/mac/mac_framework.h>
+
 /*
  * set_disable contains one bit per set value (0..31).
  * If the bit is set, all rules with the corresponding set
@@ -130,55 +144,19 @@
 struct ip_fw_chain {
 	struct ip_fw	*rules;		/* list of rules */
 	struct ip_fw	*reap;		/* list of rules to reap */
+	LIST_HEAD(, cfg_nat) nat;       /* list of nat entries */
 	struct radix_node_head *tables[IPFW_TABLES_MAX];
-	struct mtx	mtx;		/* lock guarding rule list */
-	int		busy_count;	/* busy count for rw locks */
-	int		want_write;
-	struct cv	cv;
+	struct rwlock	rwmtx;
 };
 #define	IPFW_LOCK_INIT(_chain) \
-	mtx_init(&(_chain)->mtx, "IPFW static rules", NULL, \
-		MTX_DEF | MTX_RECURSE)
-#define	IPFW_LOCK_DESTROY(_chain)	mtx_destroy(&(_chain)->mtx)
-#define	IPFW_WLOCK_ASSERT(_chain)	do {				\
-	mtx_assert(&(_chain)->mtx, MA_OWNED);				\
-	NET_ASSERT_GIANT();						\
-} while (0)
-
-static __inline void
-IPFW_RLOCK(struct ip_fw_chain *chain)
-{
-	mtx_lock(&chain->mtx);
-	chain->busy_count++;
-	mtx_unlock(&chain->mtx);
-}
-
-static __inline void
-IPFW_RUNLOCK(struct ip_fw_chain *chain)
-{
-	mtx_lock(&chain->mtx);
-	chain->busy_count--;
-	if (chain->busy_count == 0 && chain->want_write)
-		cv_signal(&chain->cv);
-	mtx_unlock(&chain->mtx);
-}
-
-static __inline void
-IPFW_WLOCK(struct ip_fw_chain *chain)
-{
-	mtx_lock(&chain->mtx);
-	chain->want_write++;
-	while (chain->busy_count > 0)
-		cv_wait(&chain->cv, &chain->mtx);
-}
-
-static __inline void
-IPFW_WUNLOCK(struct ip_fw_chain *chain)
-{
-	chain->want_write--;
-	cv_signal(&chain->cv);
-	mtx_unlock(&chain->mtx);
-}
+	rw_init(&(_chain)->rwmtx, "IPFW static rules")
+#define	IPFW_LOCK_DESTROY(_chain)	rw_destroy(&(_chain)->rwmtx)
+#define	IPFW_WLOCK_ASSERT(_chain)	rw_assert(&(_chain)->rwmtx, RA_WLOCKED)
+
+#define IPFW_RLOCK(p) rw_rlock(&(p)->rwmtx)
+#define IPFW_RUNLOCK(p) rw_runlock(&(p)->rwmtx)
+#define IPFW_WLOCK(p) rw_wlock(&(p)->rwmtx)
+#define IPFW_WUNLOCK(p) rw_wunlock(&(p)->rwmtx)
 
 /*
  * list of rules for layer 3
@@ -197,11 +175,13 @@
 static int fw_debug = 1;
 static int autoinc_step = 100; /* bounded to 1..1000 in add_rule() */
 
+extern int ipfw_chg_hook(SYSCTL_HANDLER_ARGS);
+
 #ifdef SYSCTL_NODE
 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
-SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, enable,
-    CTLFLAG_RW | CTLFLAG_SECURE3,
-    &fw_enable, 0, "Enable ipfw");
+SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, enable,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &fw_enable, 0,
+    ipfw_chg_hook, "I", "Enable ipfw");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step, CTLFLAG_RW,
     &autoinc_step, 0, "Rule number autincrement step");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, one_pass,
@@ -327,6 +307,9 @@
 #endif /* INET6 */
 #endif /* SYSCTL_NODE */
 
+#ifdef IPFIREWALL_NAT
+MODULE_DEPEND(ipfw, libalias, 1, 1, 1);
+#endif
 static int fw_deny_unknown_exthdrs = 1;
 
 
@@ -336,6 +319,7 @@
  */
 #define	L3HDR(T, ip)	((T *)((u_int32_t *)(ip) + (ip)->ip_hl))
 #define	TCP(p)		((struct tcphdr *)(p))
+#define	SCTP(p)		((struct sctphdr *)(p))
 #define	UDP(p)		((struct udphdr *)(p))
 #define	ICMP(p)		((struct icmphdr *)(p))
 #define	ICMP6(p)	((struct icmp6_hdr *)(p))
@@ -496,8 +480,6 @@
 
 		/* XXX lock? */
 		TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) {
-			if (ia->ifa_addr == NULL)
-				continue;
 			if (ia->ifa_addr->sa_family != AF_INET)
 				continue;
 			if (cmd->p.ip.s_addr == ((struct sockaddr_in *)
@@ -605,10 +587,7 @@
 	struct in6_addr copia;
 
 	TAILQ_FOREACH(mdc, &ifnet, if_link)
-		for (mdc2 = mdc->if_addrlist.tqh_first; mdc2;
-		    mdc2 = mdc2->ifa_list.tqe_next) {
-			if (!mdc2->ifa_addr)
-				continue;
+		TAILQ_FOREACH(mdc2, &mdc->if_addrlist, ifa_list) {
 			if (mdc2->ifa_addr->sa_family == AF_INET6) {
 				fdm = (struct in6_ifaddr *)mdc2;
 				copia = fdm->ia_addr.sin6_addr;
@@ -672,11 +651,11 @@
 hash_packet6(struct ipfw_flow_id *id)
 {
 	u_int32_t i;
-	i = (id->dst_ip6.__u6_addr.__u6_addr32[0]) ^
-	    (id->dst_ip6.__u6_addr.__u6_addr32[1]) ^
-	    (id->dst_ip6.__u6_addr.__u6_addr32[2]) ^
+	i = (id->dst_ip6.__u6_addr.__u6_addr32[2]) ^
 	    (id->dst_ip6.__u6_addr.__u6_addr32[3]) ^
-	    (id->dst_port) ^ (id->src_port) ^ (id->flow_id6);
+	    (id->src_ip6.__u6_addr.__u6_addr32[2]) ^
+	    (id->src_ip6.__u6_addr.__u6_addr32[3]) ^
+	    (id->dst_port) ^ (id->src_port);
 	return i;
 }
 
@@ -695,11 +674,12 @@
 }
 
 static void
-send_reject6(struct ip_fw_args *args, int code, u_short offset, u_int hlen)
+send_reject6(struct ip_fw_args *args, int code, u_int hlen, struct ip6_hdr *ip6)
 {
-	if (code == ICMP6_UNREACH_RST && offset == 0 &&
-	    args->f_id.proto == IPPROTO_TCP) {
-		struct ip6_hdr *ip6;
+	struct mbuf *m;
+
+	m = args->m;
+	if (code == ICMP6_UNREACH_RST && args->f_id.proto == IPPROTO_TCP) {
 		struct tcphdr *tcp;
 		tcp_seq ack, seq;
 		int flags;
@@ -707,18 +687,11 @@
 			struct ip6_hdr ip6;
 			struct tcphdr th;
 		} ti;
-
-		if (args->m->m_len < (hlen+sizeof(struct tcphdr))) {
-			args->m = m_pullup(args->m, hlen+sizeof(struct tcphdr));
-			if (args->m == NULL)
-				return;
-		}
-
-		ip6 = mtod(args->m, struct ip6_hdr *);
-		tcp = (struct tcphdr *)(mtod(args->m, char *) + hlen);
+		tcp = (struct tcphdr *)((char *)ip6 + hlen);
 
 		if ((tcp->th_flags & TH_RST) != 0) {
-			m_freem(args->m);
+			m_freem(m);
+			args->m = NULL;
 			return;
 		}
 
@@ -734,14 +707,20 @@
 			flags = TH_RST;
 		} else {
 			ack = ti.th.th_seq;
-			if (((args->m)->m_flags & M_PKTHDR) != 0) {
-				ack += (args->m)->m_pkthdr.len - hlen
+			if ((m->m_flags & M_PKTHDR) != 0) {
+				/*
+				 * total new data to ACK is:
+				 * total packet length,
+				 * minus the header length,
+				 * minus the tcp header length.
+				 */
+				ack += m->m_pkthdr.len - hlen
 					- (ti.th.th_off << 2);
 			} else if (ip6->ip6_plen) {
-				ack += ntohs(ip6->ip6_plen) + sizeof(*ip6)
-					- hlen - (ti.th.th_off << 2);
+				ack += ntohs(ip6->ip6_plen) + sizeof(*ip6) -
+				    hlen - (ti.th.th_off << 2);
 			} else {
-				m_freem(args->m);
+				m_freem(m);
 				return;
 			}
 			if (tcp->th_flags & TH_SYN)
@@ -750,14 +729,28 @@
 			flags = TH_RST|TH_ACK;
 		}
 		bcopy(&ti, ip6, sizeof(ti));
-		tcp_respond(NULL, ip6, (struct tcphdr *)(ip6 + 1),
-			args->m, ack, seq, flags);
-
+		/*
+		 * m is only used to recycle the mbuf
+		 * The data in it is never read so we don't need
+		 * to correct the offsets or anything
+		 */
+		tcp_respond(NULL, ip6, tcp, m, ack, seq, flags);
 	} else if (code != ICMP6_UNREACH_RST) { /* Send an ICMPv6 unreach. */
-		icmp6_error(args->m, ICMP6_DST_UNREACH, code, 0);
-
+#if 0
+		/*
+		 * Unlike above, the mbufs need to line up with the ip6 hdr,
+		 * as the contents are read. We need to m_adj() the
+		 * needed amount.
+		 * The mbuf will however be thrown away so we can adjust it.
+		 * Remember we did an m_pullup on it already so we
+		 * can make some assumptions about contiguousness.
+		 */
+		if (args->L3offset)
+			m_adj(m, args->L3offset);
+#endif
+		icmp6_error(m, ICMP6_DST_UNREACH, code, 0);
 	} else
-		m_freem(args->m);
+		m_freem(m);
 
 	args->m = NULL;
 }
@@ -775,7 +768,8 @@
  */
 static void
 ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args,
-	struct mbuf *m, struct ifnet *oif, u_short offset)
+    struct mbuf *m, struct ifnet *oif, u_short offset, uint32_t tablearg,
+    struct ip *ip)
 {
 	struct ether_header *eh = args->eh;
 	char *action;
@@ -812,6 +806,9 @@
 		if (cmd->opcode == O_PROB)
 			cmd += F_LEN(cmd);
 
+		if (cmd->opcode == O_TAG)
+			cmd += F_LEN(cmd);
+
 		action = action2;
 		switch (cmd->opcode) {
 		case O_DENY:
@@ -865,9 +862,15 @@
 		case O_FORWARD_IP: {
 			ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd;
 			int len;
+			struct in_addr dummyaddr;
+			if (sa->sa.sin_addr.s_addr == INADDR_ANY)
+				dummyaddr.s_addr = htonl(tablearg);
+			else
+				dummyaddr.s_addr = sa->sa.sin_addr.s_addr;
 
 			len = snprintf(SNPARGS(action2, 0), "Forward to %s",
-				inet_ntoa(sa->sa.sin_addr));
+				inet_ntoa(dummyaddr));
+
 			if (sa->sa.sin_port)
 				snprintf(SNPARGS(action2, len), ":%d",
 				    sa->sa.sin_port);
@@ -881,6 +884,9 @@
 			snprintf(SNPARGS(action2, 0), "Ngtee %d",
 				cmd->arg1);
 			break;
+		case O_NAT:
+			action = "Nat";
+ 			break;
 		default:
 			action = "UNKNOWN";
 			break;
@@ -896,8 +902,6 @@
 		struct icmphdr *icmp;
 		struct tcphdr *tcp;
 		struct udphdr *udp;
-		/* Initialize to make compiler happy. */
-		struct ip *ip = NULL;
 #ifdef INET6
 		struct ip6_hdr *ip6 = NULL;
 		struct icmp6_hdr *icmp6;
@@ -905,19 +909,19 @@
 		src[0] = '\0';
 		dst[0] = '\0';
 #ifdef INET6
-		if (args->f_id.addr_type == 6) {
+		if (IS_IP6_FLOW_ID(&(args->f_id))) {
+			char ip6buf[INET6_ADDRSTRLEN];
 			snprintf(src, sizeof(src), "[%s]",
-			    ip6_sprintf(&args->f_id.src_ip6));
+			    ip6_sprintf(ip6buf, &args->f_id.src_ip6));
 			snprintf(dst, sizeof(dst), "[%s]",
-			    ip6_sprintf(&args->f_id.dst_ip6));
+			    ip6_sprintf(ip6buf, &args->f_id.dst_ip6));
 
-			ip6 = (struct ip6_hdr *)mtod(m, struct ip6_hdr *);
-			tcp = (struct tcphdr *)(mtod(args->m, char *) + hlen);
-			udp = (struct udphdr *)(mtod(args->m, char *) + hlen);
+			ip6 = (struct ip6_hdr *)ip;
+			tcp = (struct tcphdr *)(((char *)ip) + hlen);
+			udp = (struct udphdr *)(((char *)ip) + hlen);
 		} else
 #endif
 		{
-			ip = mtod(m, struct ip *);
 			tcp = L3HDR(struct tcphdr, ip);
 			udp = L3HDR(struct udphdr, ip);
 
@@ -961,7 +965,7 @@
 			break;
 #ifdef INET6
 		case IPPROTO_ICMPV6:
-			icmp6 = (struct icmp6_hdr *)(mtod(args->m, char *) + hlen);
+			icmp6 = (struct icmp6_hdr *)(((char *)ip) + hlen);
 			if (offset == 0)
 				len = snprintf(SNPARGS(proto, 0),
 				    "ICMPv6:%u.%u ",
@@ -980,7 +984,7 @@
 		}
 
 #ifdef INET6
-		if (args->f_id.addr_type == 6) {
+		if (IS_IP6_FLOW_ID(&(args->f_id))) {
 			if (offset & (IP6F_OFF_MASK | IP6F_MORE_FRAG))
 				snprintf(SNPARGS(fragment, 0),
 				    " (frag %08x:%d@%d%s)",
@@ -1096,9 +1100,9 @@
 	if (ipfw_dyn_v == NULL || dyn_count == 0)
 		return;
 	/* do not expire more than once per second, it is useless */
-	if (!FORCE && last_remove == time_second)
+	if (!FORCE && last_remove == time_uptime)
 		return;
-	last_remove = time_second;
+	last_remove = time_uptime;
 
 	/*
 	 * because O_LIMIT refer to parent rules, during the first pass only
@@ -1130,7 +1134,7 @@
 				}
 			} else {
 				if (!FORCE &&
-				    !TIME_LEQ( q->expire, time_second ))
+				    !TIME_LEQ( q->expire, time_uptime ))
 					goto next;
 			}
              if (q->dyn_type != O_LIMIT_PARENT || !q->count) {
@@ -1152,7 +1156,7 @@
  */
 static ipfw_dyn_rule *
 lookup_dyn_rule_locked(struct ipfw_flow_id *pkt, int *match_direction,
-	struct tcphdr *tcp)
+    struct tcphdr *tcp)
 {
 	/*
 	 * stateful ipfw extensions.
@@ -1173,7 +1177,7 @@
 	for (prev=NULL, q = ipfw_dyn_v[i] ; q != NULL ; ) {
 		if (q->dyn_type == O_LIMIT_PARENT && q->count)
 			goto next;
-		if (TIME_LEQ( q->expire, time_second)) { /* expire entry */
+		if (TIME_LEQ( q->expire, time_uptime)) { /* expire entry */
 			UNLINK_DYN_RULE(prev, ipfw_dyn_v[i], q);
 			continue;
 		}
@@ -1235,7 +1239,7 @@
 		q->state |= (dir == MATCH_FORWARD ) ? flags : (flags << 8);
 		switch (q->state) {
 		case TH_SYN:				/* opening */
-			q->expire = time_second + dyn_syn_lifetime;
+			q->expire = time_uptime + dyn_syn_lifetime;
 			break;
 
 		case BOTH_SYN:			/* move to established */
@@ -1258,13 +1262,13 @@
 				}
 			    }
 			}
-			q->expire = time_second + dyn_ack_lifetime;
+			q->expire = time_uptime + dyn_ack_lifetime;
 			break;
 
 		case BOTH_SYN | BOTH_FIN:	/* both sides closed */
 			if (dyn_fin_lifetime >= dyn_keepalive_period)
 				dyn_fin_lifetime = dyn_keepalive_period - 1;
-			q->expire = time_second + dyn_fin_lifetime;
+			q->expire = time_uptime + dyn_fin_lifetime;
 			break;
 
 		default:
@@ -1278,14 +1282,14 @@
 #endif
 			if (dyn_rst_lifetime >= dyn_keepalive_period)
 				dyn_rst_lifetime = dyn_keepalive_period - 1;
-			q->expire = time_second + dyn_rst_lifetime;
+			q->expire = time_uptime + dyn_rst_lifetime;
 			break;
 		}
 	} else if (pkt->proto == IPPROTO_UDP) {
-		q->expire = time_second + dyn_udp_lifetime;
+		q->expire = time_uptime + dyn_udp_lifetime;
 	} else {
 		/* other protocols */
-		q->expire = time_second + dyn_short_lifetime;
+		q->expire = time_uptime + dyn_short_lifetime;
 	}
 done:
 	if (match_direction)
@@ -1295,7 +1299,7 @@
 
 static ipfw_dyn_rule *
 lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction,
-	struct tcphdr *tcp)
+    struct tcphdr *tcp)
 {
 	ipfw_dyn_rule *q;
 
@@ -1379,7 +1383,7 @@
 	}
 
 	r->id = *id;
-	r->expire = time_second + dyn_syn_lifetime;
+	r->expire = time_uptime + dyn_syn_lifetime;
 	r->rule = rule;
 	r->dyn_type = dyn_type;
 	r->pcnt = r->bcnt = 0;
@@ -1429,7 +1433,7 @@
 				 pkt->dst_ip == q->id.dst_ip)
 			    )
 			) {
-				q->expire = time_second + dyn_short_lifetime;
+				q->expire = time_uptime + dyn_short_lifetime;
 				DEB(printf("ipfw: lookup_dyn_parent found 0x%p\n",q);)
 				return q;
 			}
@@ -1445,62 +1449,76 @@
  */
 static int
 install_state(struct ip_fw *rule, ipfw_insn_limit *cmd,
-	struct ip_fw_args *args)
+    struct ip_fw_args *args, uint32_t tablearg)
 {
 	static int last_log;
-
 	ipfw_dyn_rule *q;
+	struct in_addr da;
+	char src[48], dst[48];
 
-	DEB(printf("ipfw: install state type %d 0x%08x %u -> 0x%08x %u\n",
-	    cmd->o.opcode,
+	src[0] = '\0';
+	dst[0] = '\0';
+
+	DEB(
+	printf("ipfw: %s: type %d 0x%08x %u -> 0x%08x %u\n",
+	    __func__, cmd->o.opcode,
 	    (args->f_id.src_ip), (args->f_id.src_port),
-	    (args->f_id.dst_ip), (args->f_id.dst_port) );)
+	    (args->f_id.dst_ip), (args->f_id.dst_port));
+	)
 
 	IPFW_DYN_LOCK();
 
 	q = lookup_dyn_rule_locked(&args->f_id, NULL, NULL);
 
-	if (q != NULL) { /* should never occur */
-		if (last_log != time_second) {
-			last_log = time_second;
-			printf("ipfw: install_state: entry already present, done\n");
+	if (q != NULL) {	/* should never occur */
+		if (last_log != time_uptime) {
+			last_log = time_uptime;
+			printf("ipfw: %s: entry already present, done\n",
+			    __func__);
 		}
 		IPFW_DYN_UNLOCK();
-		return 0;
+		return (0);
 	}
 
 	if (dyn_count >= dyn_max)
-		/*
-		 * Run out of slots, try to remove any expired rule.
-		 */
+		/* Run out of slots, try to remove any expired rule. */
 		remove_dyn_rule(NULL, (ipfw_dyn_rule *)1);
 
 	if (dyn_count >= dyn_max) {
-		if (last_log != time_second) {
-			last_log = time_second;
-			printf("ipfw: install_state: Too many dynamic rules\n");
+		if (last_log != time_uptime) {
+			last_log = time_uptime;
+			printf("ipfw: %s: Too many dynamic rules\n", __func__);
 		}
 		IPFW_DYN_UNLOCK();
-		return 1; /* cannot install, notify caller */
+		return (1);	/* cannot install, notify caller */
 	}
 
 	switch (cmd->o.opcode) {
-	case O_KEEP_STATE: /* bidir rule */
+	case O_KEEP_STATE:	/* bidir rule */
 		add_dyn_rule(&args->f_id, O_KEEP_STATE, rule);
 		break;
 
-	case O_LIMIT: /* limit number of sessions */
-	    {
-		u_int16_t limit_mask = cmd->limit_mask;
+	case O_LIMIT: {		/* limit number of sessions */
 		struct ipfw_flow_id id;
 		ipfw_dyn_rule *parent;
+		uint32_t conn_limit;
+		uint16_t limit_mask = cmd->limit_mask;
 
-		DEB(printf("ipfw: installing dyn-limit rule %d\n",
-		    cmd->conn_limit);)
+		conn_limit = (cmd->conn_limit == IP_FW_TABLEARG) ?
+		    tablearg : cmd->conn_limit;
+		  
+		DEB(
+		if (cmd->conn_limit == IP_FW_TABLEARG)
+			printf("ipfw: %s: O_LIMIT rule, conn_limit: %u "
+			    "(tablearg)\n", __func__, conn_limit);
+		else
+			printf("ipfw: %s: O_LIMIT rule, conn_limit: %u\n",
+			    __func__, conn_limit);
+		)
 
-		id.dst_ip = id.src_ip = 0;
-		id.dst_port = id.src_port = 0;
+		id.dst_ip = id.src_ip = id.dst_port = id.src_port = 0;
 		id.proto = args->f_id.proto;
+		id.addr_type = args->f_id.addr_type;
 
 		if (IS_IP6_FLOW_ID (&(args->f_id))) {
 			if (limit_mask & DYN_SRC_ADDR)
@@ -1517,38 +1535,68 @@
 			id.src_port = args->f_id.src_port;
 		if (limit_mask & DYN_DST_PORT)
 			id.dst_port = args->f_id.dst_port;
-		parent = lookup_dyn_parent(&id, rule);
-		if (parent == NULL) {
-			printf("ipfw: add parent failed\n");
+		if ((parent = lookup_dyn_parent(&id, rule)) == NULL) {
+			printf("ipfw: %s: add parent failed\n", __func__);
 			IPFW_DYN_UNLOCK();
-			return 1;
+			return (1);
 		}
-		if (parent->count >= cmd->conn_limit) {
-			/*
-			 * See if we can remove some expired rule.
-			 */
+
+		if (parent->count >= conn_limit) {
+			/* See if we can remove some expired rule. */
 			remove_dyn_rule(rule, parent);
-			if (parent->count >= cmd->conn_limit) {
-				if (fw_verbose && last_log != time_second) {
-					last_log = time_second;
+			if (parent->count >= conn_limit) {
+				if (fw_verbose && last_log != time_uptime) {
+					last_log = time_uptime;
+#ifdef INET6
+					/*
+					 * XXX IPv6 flows are not
+					 * supported yet.
+					 */
+					if (IS_IP6_FLOW_ID(&(args->f_id))) {
+						char ip6buf[INET6_ADDRSTRLEN];
+						snprintf(src, sizeof(src),
+						    "[%s]", ip6_sprintf(ip6buf,
+							&args->f_id.src_ip6));
+						snprintf(dst, sizeof(dst),
+						    "[%s]", ip6_sprintf(ip6buf,
+							&args->f_id.dst_ip6));
+					} else
+#endif
+					{
+						da.s_addr =
+						    htonl(args->f_id.src_ip);
+						inet_ntoa_r(da, src);
+						da.s_addr =
+						    htonl(args->f_id.dst_ip);
+						inet_ntoa_r(da, dst);
+					}
 					log(LOG_SECURITY | LOG_DEBUG,
-					    "drop session, too many entries\n");
+					    "ipfw: %d %s %s:%u -> %s:%u, %s\n",
+					    parent->rule->rulenum,
+					    "drop session",
+					    src, (args->f_id.src_port),
+					    dst, (args->f_id.dst_port),
+					    "too many entries");
 				}
 				IPFW_DYN_UNLOCK();
-				return 1;
+				return (1);
 			}
 		}
 		add_dyn_rule(&args->f_id, O_LIMIT, (struct ip_fw *)parent);
-	    }
 		break;
+	}
 	default:
-		printf("ipfw: unknown dynamic rule type %u\n", cmd->o.opcode);
+		printf("ipfw: %s: unknown dynamic rule type %u\n",
+		    __func__, cmd->o.opcode);
 		IPFW_DYN_UNLOCK();
-		return 1;
+		return (1);
 	}
-	lookup_dyn_rule_locked(&args->f_id, NULL, NULL); /* XXX just set lifetime */
+
+	/* XXX just set lifetime */
+	lookup_dyn_rule_locked(&args->f_id, NULL, NULL);
+
 	IPFW_DYN_UNLOCK();
-	return 0;
+	return (0);
 }
 
 /*
@@ -1556,18 +1604,31 @@
  * When flags & TH_RST, we are sending a RST packet, because of a
  * "reset" action matched the packet.
  * Otherwise we are sending a keepalive, and flags & TH_
+ * The 'replyto' mbuf is the mbuf being replied to, if any, and is required
+ * so that MAC can label the reply appropriately.
  */
 static struct mbuf *
-send_pkt(struct ipfw_flow_id *id, u_int32_t seq, u_int32_t ack, int flags)
+send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq,
+    u_int32_t ack, int flags)
 {
 	struct mbuf *m;
 	struct ip *ip;
 	struct tcphdr *tcp;
 
-	MGETHDR(m, M_DONTWAIT, MT_HEADER);
+	MGETHDR(m, M_DONTWAIT, MT_DATA);
 	if (m == 0)
 		return (NULL);
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
+
+#ifdef MAC
+	if (replyto != NULL)
+		mac_create_mbuf_netlayer(replyto, m);
+	else
+		mac_create_mbuf_from_firewall(m);
+#else
+	(void)replyto;		/* don't warn about unused arg */
+#endif
+
 	m->m_pkthdr.len = m->m_len = sizeof(struct ip) + sizeof(struct tcphdr);
 	m->m_data += max_linkhdr;
 
@@ -1636,24 +1697,33 @@
  * sends a reject message, consuming the mbuf passed as an argument.
  */
 static void
-send_reject(struct ip_fw_args *args, int code, u_short offset, int ip_len)
+send_reject(struct ip_fw_args *args, int code, int ip_len, struct ip *ip)
 {
 
+#if 0
+	/* XXX When ip is not guaranteed to be at mtod() we will
+	 * need to account for this */
+	 * The mbuf will however be thrown away so we can adjust it.
+	 * Remember we did an m_pullup on it already so we
+	 * can make some assumptions about contiguousness.
+	 */
+	if (args->L3offset)
+		m_adj(m, args->L3offset);
+#endif
 	if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */
 		/* We need the IP header in host order for icmp_error(). */
 		if (args->eh != NULL) {
-			struct ip *ip = mtod(args->m, struct ip *);
 			ip->ip_len = ntohs(ip->ip_len);
 			ip->ip_off = ntohs(ip->ip_off);
 		}
 		icmp_error(args->m, ICMP_UNREACH, code, 0L, 0);
-	} else if (offset == 0 && args->f_id.proto == IPPROTO_TCP) {
+	} else if (args->f_id.proto == IPPROTO_TCP) {
 		struct tcphdr *const tcp =
 		    L3HDR(struct tcphdr, mtod(args->m, struct ip *));
 		if ( (tcp->th_flags & TH_RST) == 0) {
 			struct mbuf *m;
-			m = send_pkt(&(args->f_id), ntohl(tcp->th_seq),
-				ntohl(tcp->th_ack),
+			m = send_pkt(args->m, &(args->f_id),
+				ntohl(tcp->th_seq), ntohl(tcp->th_ack),
 				tcp->th_flags | TH_RST);
 			if (m != NULL)
 				ip_output(m, NULL, NULL, 0, NULL, NULL);
@@ -1691,6 +1761,8 @@
 		cmd += F_LEN(cmd);
 	if (cmd->opcode == O_ALTQ)
 		cmd += F_LEN(cmd);
+	if (cmd->opcode == O_TAG)
+		cmd += F_LEN(cmd);
 	if ( cmd->opcode == O_SKIPTO )
 		for (rule = me->next; rule ; rule = rule->next)
 			if (rule->rulenum >= cmd->arg1)
@@ -1703,7 +1775,7 @@
 
 static int
 add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
-	uint8_t mlen, uint32_t value)
+    uint8_t mlen, uint32_t value)
 {
 	struct radix_node_head *rnh;
 	struct table_entry *ent;
@@ -1731,7 +1803,7 @@
 
 static int
 del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
-	uint8_t mlen)
+    uint8_t mlen)
 {
 	struct radix_node_head *rnh;
 	struct table_entry *ent;
@@ -1812,7 +1884,7 @@
 
 static int
 lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
-	uint32_t *val)
+    uint32_t *val)
 {
 	struct radix_node_head *rnh;
 	struct table_entry *ent;
@@ -1904,11 +1976,10 @@
 }
 
 static int
-check_uidgid(ipfw_insn_u32 *insn,
-	int proto, struct ifnet *oif,
-	struct in_addr dst_ip, u_int16_t dst_port,
-	struct in_addr src_ip, u_int16_t src_port,
-	struct ip_fw_ugid *ugp, int *lookup, struct inpcb *inp)
+check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif,
+    struct in_addr dst_ip, u_int16_t dst_port, struct in_addr src_ip,
+    u_int16_t src_port, struct ip_fw_ugid *ugp, int *lookup,
+    struct inpcb *inp)
 {
 	struct inpcbinfo *pi;
 	int wildcard;
@@ -1939,7 +2010,7 @@
 		wildcard = 0;
 		pi = &tcbinfo;
 	} else if (proto == IPPROTO_UDP) {
-		wildcard = 1;
+		wildcard = INPLOOKUP_WILDCARD;
 		pi = &udbinfo;
 	} else
 		return 0;
@@ -1989,6 +2060,185 @@
 	return match;
 }
 
+#ifdef IPFIREWALL_NAT
+static eventhandler_tag ifaddr_event_tag;
+
+static void 
+ifaddr_change(void *arg __unused, struct ifnet *ifp)
+{
+	struct cfg_nat *ptr;
+	struct ifaddr *ifa;
+
+	IPFW_WLOCK(&layer3_chain);			
+	/* Check every nat entry... */
+	LIST_FOREACH(ptr, &layer3_chain.nat, _next) {
+		/* ...using nic 'ifp->if_xname' as dynamic alias address. */
+		if (strncmp(ptr->if_name, ifp->if_xname, IF_NAMESIZE) == 0) {
+			mtx_lock(&ifp->if_addr_mtx);
+			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
+				if (ifa->ifa_addr == NULL)
+					continue;
+				if (ifa->ifa_addr->sa_family != AF_INET)
+					continue;
+				ptr->ip = ((struct sockaddr_in *) 
+				    (ifa->ifa_addr))->sin_addr;
+				LibAliasSetAddress(ptr->lib, ptr->ip);
+			}
+			mtx_unlock(&ifp->if_addr_mtx);
+		}
+	}
+	IPFW_WUNLOCK(&layer3_chain);	
+}
+
+static void
+flush_nat_ptrs(const int i)
+{
+	struct ip_fw *rule;
+
+	IPFW_WLOCK_ASSERT(&layer3_chain);
+	for (rule = layer3_chain.rules; rule; rule = rule->next) {
+		ipfw_insn_nat *cmd = (ipfw_insn_nat *)ACTION_PTR(rule);
+		if (cmd->o.opcode != O_NAT)
+			continue;
+		if (cmd->nat != NULL && cmd->nat->id == i)
+			cmd->nat = NULL;
+	}
+}
+
+static struct cfg_nat *
+lookup_nat(const int i)
+{
+	struct cfg_nat *ptr;
+
+	LIST_FOREACH(ptr, &layer3_chain.nat, _next)
+		if (ptr->id == i)
+			return(ptr);
+	return (NULL);
+}
+
+#define HOOK_NAT(b, p) do {                                     \
+	IPFW_WLOCK_ASSERT(&layer3_chain);                       \
+        LIST_INSERT_HEAD(b, p, _next);                          \
+} while (0)
+
+#define UNHOOK_NAT(p) do {                                      \
+	IPFW_WLOCK_ASSERT(&layer3_chain);                       \
+        LIST_REMOVE(p, _next);                                  \
+} while (0)
+
+#define HOOK_REDIR(b, p) do {                                   \
+        LIST_INSERT_HEAD(b, p, _next);                          \
+} while (0)
+
+#define HOOK_SPOOL(b, p) do {                                   \
+        LIST_INSERT_HEAD(b, p, _next);                          \
+} while (0)
+
+static void
+del_redir_spool_cfg(struct cfg_nat *n, struct redir_chain *head)
+{
+	struct cfg_redir *r, *tmp_r;
+	struct cfg_spool *s, *tmp_s;
+	int i, num;
+
+	LIST_FOREACH_SAFE(r, head, _next, tmp_r) {
+		num = 1; /* Number of alias_link to delete. */
+		switch (r->mode) {
+		case REDIR_PORT:
+			num = r->pport_cnt;
+			/* FALLTHROUGH */
+		case REDIR_ADDR:
+		case REDIR_PROTO:
+			/* Delete all libalias redirect entry. */
+			for (i = 0; i < num; i++)
+				LibAliasRedirectDelete(n->lib, r->alink[i]);
+			/* Del spool cfg if any. */
+			LIST_FOREACH_SAFE(s, &r->spool_chain, _next, tmp_s) {
+				LIST_REMOVE(s, _next);
+				free(s, M_IPFW);
+			}
+			free(r->alink, M_IPFW);
+			LIST_REMOVE(r, _next);
+			free(r, M_IPFW);
+			break;
+		default:
+			printf("unknown redirect mode: %u\n", r->mode);				
+			/* XXX - panic?!?!? */
+			break; 
+		}
+	}
+}
+
+static int
+add_redir_spool_cfg(char *buf, struct cfg_nat *ptr)
+{
+	struct cfg_redir *r, *ser_r;
+	struct cfg_spool *s, *ser_s;
+	int cnt, off, i;
+	char *panic_err;
+
+	for (cnt = 0, off = 0; cnt < ptr->redir_cnt; cnt++) {
+		ser_r = (struct cfg_redir *)&buf[off];
+		r = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO);
+		memcpy(r, ser_r, SOF_REDIR);
+		LIST_INIT(&r->spool_chain);
+		off += SOF_REDIR;
+		r->alink = malloc(sizeof(struct alias_link *) * r->pport_cnt,
+		    M_IPFW, M_WAITOK | M_ZERO);
+		switch (r->mode) {
+		case REDIR_ADDR:
+			r->alink[0] = LibAliasRedirectAddr(ptr->lib, r->laddr,
+			    r->paddr);
+			break;
+		case REDIR_PORT:
+			for (i = 0 ; i < r->pport_cnt; i++) {
+				/* If remotePort is all ports, set it to 0. */
+				u_short remotePortCopy = r->rport + i;
+				if (r->rport_cnt == 1 && r->rport == 0)
+					remotePortCopy = 0;
+				r->alink[i] = LibAliasRedirectPort(ptr->lib, 
+				    r->laddr, htons(r->lport + i), r->raddr, 
+				    htons(remotePortCopy), r->paddr, 
+				    htons(r->pport + i), r->proto);
+				if (r->alink[i] == NULL) {
+					r->alink[0] = NULL;
+					break;
+				}
+			}
+			break;
+		case REDIR_PROTO:
+			r->alink[0] = LibAliasRedirectProto(ptr->lib ,r->laddr,
+			    r->raddr, r->paddr, r->proto);
+			break;
+		default:
+			printf("unknown redirect mode: %u\n", r->mode);
+			break; 
+		}
+		if (r->alink[0] == NULL) {
+			panic_err = "LibAliasRedirect* returned NULL";
+			goto bad;
+		} else /* LSNAT handling. */
+			for (i = 0; i < r->spool_cnt; i++) {
+				ser_s = (struct cfg_spool *)&buf[off];
+				s = malloc(SOF_REDIR, M_IPFW, 
+				    M_WAITOK | M_ZERO);
+				memcpy(s, ser_s, SOF_SPOOL);
+				LibAliasAddServer(ptr->lib, r->alink[0], 
+				    s->addr, htons(s->port));						  
+				off += SOF_SPOOL;
+				/* Hook spool entry. */
+				HOOK_SPOOL(&r->spool_chain, s);
+			}
+		/* And finally hook this redir entry. */
+		HOOK_REDIR(&ptr->redir_chain, r);
+	}
+	return (1);
+bad:
+	/* something really bad happened: panic! */
+	panic("%s\n", panic_err);
+}
+#endif
+
 /*
  * The main check routine for the firewall.
  *
@@ -2000,6 +2250,8 @@
  *	args->m	(in/out) The packet; we set to NULL when/if we nuke it.
  *		Starts with the IP header.
  *	args->eh (in)	Mac header if present, or NULL for layer3 packet.
+ *	args->L3offset	Number of bytes bypassed if we came from L2.
+ *			e.g. often sizeof(eh)  ** NOTYET **
  *	args->oif	Outgoing interface, or NULL if packet is incoming.
  *		The incoming interface is in the mbuf. (in)
  *	args->divert_rule (in/out)
@@ -2021,12 +2273,11 @@
  *	IP_FW_NETGRAPH	into netgraph, cookie args->cookie
  *
  */
-
 int
 ipfw_chk(struct ip_fw_args *args)
 {
 	/*
-	 * Local variables hold state during the processing of a packet.
+	 * Local variables holding state during the processing of a packet:
 	 *
 	 * IMPORTANT NOTE: to speed up the processing of rules, there
 	 * are some assumption on the values of the variables, which
@@ -2036,15 +2287,18 @@
 	 *
 	 * args->eh	The MAC header. It is non-null for a layer2
 	 *	packet, it is NULL for a layer-3 packet.
+	 * **notyet**
+	 * args->L3offset Offset in the packet to the L3 (IP or equiv.) header.
 	 *
 	 * m | args->m	Pointer to the mbuf, as received from the caller.
 	 *	It may change if ipfw_chk() does an m_pullup, or if it
 	 *	consumes the packet because it calls send_reject().
 	 *	XXX This has to change, so that ipfw_chk() never modifies
 	 *	or consumes the buffer.
-	 * ip	is simply an alias of the value of m, and it is kept
-	 *	in sync with it (the packet is	supposed to start with
-	 *	the ip header).
+	 * ip	is the beginning of the ip(4 or 6) header.
+	 *	Calculated by adding the L3offset to the start of data.
+	 *	(Until we start using L3offset, the packet is
+	 *	supposed to start with the ip header).
 	 */
 	struct mbuf *m = args->m;
 	struct ip *ip = mtod(m, struct ip *);
@@ -2069,7 +2323,7 @@
 
 	/*
 	 * oif | args->oif	If NULL, ipfw_chk has been called on the
-	 *	inbound path (ether_input, bdg_forward, ip_input).
+	 *	inbound path (ether_input, ip_input).
 	 *	If non-NULL, ipfw_chk has been called on the outbound path
 	 *	(ether_output, ip_output).
 	 */
@@ -2115,6 +2369,7 @@
 	struct in_addr src_ip, dst_ip;		/* NOTE: network format	*/
 	u_int16_t ip_len=0;
 	int pktlen;
+	u_int16_t	etype = 0;	/* Host order stored ether type */
 
 	/*
 	 * dyn_dir = MATCH_UNKNOWN when rules unchecked,
@@ -2163,14 +2418,20 @@
 	p = (mtod(m, char *) + (len));					\
 } while (0)
 
+	/*
+	 * if we have an ether header,
+	 */
+	if (args->eh)
+		etype = ntohs(args->eh->ether_type);
+
 	/* Identify IP packets and fill up variables. */
 	if (pktlen >= sizeof(struct ip6_hdr) &&
-	    (args->eh == NULL || ntohs(args->eh->ether_type)==ETHERTYPE_IPV6) &&
-	    mtod(m, struct ip *)->ip_v == 6) {
+	    (args->eh == NULL || etype == ETHERTYPE_IPV6) && ip->ip_v == 6) {
+		struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
 		is_ipv6 = 1;
 		args->f_id.addr_type = 6;
 		hlen = sizeof(struct ip6_hdr);
-		proto = mtod(m, struct ip6_hdr *)->ip6_nxt;
+		proto = ip6->ip6_nxt;
 
 		/* Search extension headers to find upper layer protocols */
 		while (ulp == NULL) {
@@ -2187,6 +2448,12 @@
 				args->f_id.flags = TCP(ulp)->th_flags;
 				break;
 
+			case IPPROTO_SCTP:
+				PULLUP_TO(hlen, ulp, struct sctphdr);
+				src_port = SCTP(ulp)->src_port;
+				dst_port = SCTP(ulp)->dest_port;
+				break;
+
 			case IPPROTO_UDP:
 				PULLUP_TO(hlen, ulp, struct udphdr);
 				dst_port = UDP(ulp)->uh_dport;
@@ -2203,7 +2470,14 @@
 
 			case IPPROTO_ROUTING:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_rthdr);
-				if (((struct ip6_rthdr *)ulp)->ip6r_type != 0) {
+				switch (((struct ip6_rthdr *)ulp)->ip6r_type) {
+				case 0:
+					ext_hd |= EXT_RTHDR0;
+					break;
+				case 2:
+					ext_hd |= EXT_RTHDR2;
+					break;
+				default:
 					printf("IPFW2: IPV6 - Unknown Routing "
 					    "Header type(%d)\n",
 					    ((struct ip6_rthdr *)ulp)->ip6r_type);
@@ -2274,24 +2548,48 @@
 				PULLUP_TO(hlen, ulp, struct ip6_ext);
 				break;
 
+			case IPPROTO_PIM:
+				/* XXX PIM header check? */
+				PULLUP_TO(hlen, ulp, struct pim);
+				break;
+
+			case IPPROTO_CARP:
+				PULLUP_TO(hlen, ulp, struct carp_header);
+				if (((struct carp_header *)ulp)->carp_version !=
+				    CARP_VERSION) 
+					return (IP_FW_DENY);
+				if (((struct carp_header *)ulp)->carp_type !=
+				    CARP_ADVERTISEMENT) 
+					return (IP_FW_DENY);
+				break;
+
+			case IPPROTO_IPV6:	/* RFC 2893 */
+				PULLUP_TO(hlen, ulp, struct ip6_hdr);
+				break;
+
+			case IPPROTO_IPV4:	/* RFC 2893 */
+				PULLUP_TO(hlen, ulp, struct ip);
+				break;
+
 			default:
 				printf("IPFW2: IPV6 - Unknown Extension "
 				    "Header(%d), ext_hd=%x\n", proto, ext_hd);
 				if (fw_deny_unknown_exthdrs)
 				    return (IP_FW_DENY);
+				PULLUP_TO(hlen, ulp, struct ip6_ext);
 				break;
 			} /*switch */
 		}
-		args->f_id.src_ip6 = mtod(m,struct ip6_hdr *)->ip6_src;
-		args->f_id.dst_ip6 = mtod(m,struct ip6_hdr *)->ip6_dst;
+		ip = mtod(m, struct ip *);
+		ip6 = (struct ip6_hdr *)ip;
+		args->f_id.src_ip6 = ip6->ip6_src;
+		args->f_id.dst_ip6 = ip6->ip6_dst;
 		args->f_id.src_ip = 0;
 		args->f_id.dst_ip = 0;
-		args->f_id.flow_id6 = ntohl(mtod(m, struct ip6_hdr *)->ip6_flow);
+		args->f_id.flow_id6 = ntohl(ip6->ip6_flow);
 	} else if (pktlen >= sizeof(struct ip) &&
-	    (args->eh == NULL || ntohs(args->eh->ether_type) == ETHERTYPE_IP) &&
-	    mtod(m, struct ip *)->ip_v == 4) {
+	    (args->eh == NULL || etype == ETHERTYPE_IP) && ip->ip_v == 4) {
 	    	is_ipv4 = 1;
-		ip = mtod(m, struct ip *);
 		hlen = ip->ip_hl << 2;
 		args->f_id.addr_type = 4;
 
@@ -2335,6 +2633,7 @@
 			}
 		}
 
+		ip = mtod(m, struct ip *);
 		args->f_id.src_ip = ntohl(src_ip.s_addr);
 		args->f_id.dst_ip = ntohl(dst_ip.s_addr);
 	}
@@ -2501,15 +2800,14 @@
 
 			case O_MAC_TYPE:
 				if (args->eh != NULL) {
-					u_int16_t t =
-					    ntohs(args->eh->ether_type);
 					u_int16_t *p =
 					    ((ipfw_insn_u16 *)cmd)->ports;
 					int i;
 
 					for (i = cmdlen - 1; !match && i>0;
 					    i--, p += 2)
-						match = (t>=p[0] && t<=p[1]);
+						match = (etype >= p[0] &&
+						    etype <= p[1]);
 				}
 				break;
 
@@ -2661,12 +2959,12 @@
 
 			case O_IPOPT:
 				match = (is_ipv4 &&
-				    ipopts_match(mtod(m, struct ip *), cmd) );
+				    ipopts_match(ip, cmd) );
 				break;
 
 			case O_IPVER:
 				match = (is_ipv4 &&
-				    cmd->arg1 == mtod(m, struct ip *)->ip_v);
+				    cmd->arg1 == ip->ip_v);
 				break;
 
 			case O_IPID:
@@ -2680,9 +2978,9 @@
 				    if (cmd->opcode == O_IPLEN)
 					x = ip_len;
 				    else if (cmd->opcode == O_IPTTL)
-					x = mtod(m, struct ip *)->ip_ttl;
+					x = ip->ip_ttl;
 				    else /* must be IPID */
-					x = ntohs(mtod(m, struct ip *)->ip_id);
+					x = ntohs(ip->ip_id);
 				    if (cmdlen == 1) {
 					match = (cmd->arg1 == x);
 					break;
@@ -2697,12 +2995,12 @@
 
 			case O_IPPRECEDENCE:
 				match = (is_ipv4 &&
-				    (cmd->arg1 == (mtod(m, struct ip *)->ip_tos & 0xe0)) );
+				    (cmd->arg1 == (ip->ip_tos & 0xe0)) );
 				break;
 
 			case O_IPTOS:
 				match = (is_ipv4 &&
-				    flags_match(cmd, mtod(m, struct ip *)->ip_tos));
+				    flags_match(cmd, ip->ip_tos));
 				break;
 
 			case O_TCPDATALEN:
@@ -2763,37 +3061,34 @@
 				break;
 
 			case O_ALTQ: {
-				struct altq_tag *at;
+				struct pf_mtag *at;
 				ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
 
 				match = 1;
-				mtag = m_tag_find(m, PACKET_TAG_PF_QID, NULL);
-				if (mtag != NULL)
+				at = pf_find_mtag(m);
+				if (at != NULL && at->qid != 0)
 					break;
-				mtag = m_tag_get(PACKET_TAG_PF_QID,
-						sizeof(struct altq_tag),
-						M_NOWAIT);
-				if (mtag == NULL) {
+				at = pf_get_mtag(m);
+				if (at == NULL) {
 					/*
 					 * Let the packet fall back to the
 					 * default ALTQ.
 					 */
 					break;
 				}
-				at = (struct altq_tag *)(mtag+1);
 				at->qid = altq->qid;
 				if (is_ipv4)
 					at->af = AF_INET;
 				else
 					at->af = AF_LINK;
 				at->hdr = ip;
-				m_tag_prepend(m, mtag);
 				break;
 			}
 
 			case O_LOG:
 				if (fw_verbose)
-					ipfw_log(f, hlen, args, m, oif, offset);
+					ipfw_log(f, hlen, args, m,
+					    oif, offset, tablearg, ip);
 				match = 1;
 				break;
 
@@ -2847,13 +3142,10 @@
 				break;
 
 			case O_IPSEC:
-#ifdef FAST_IPSEC
+#ifdef IPSEC
 				match = (m_tag_find(m,
 				    PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL);
 #endif
-#ifdef IPSEC
-				match = (ipsec_getnhist(m) != 0);
-#endif
 				/* otherwise no match */
 				break;
 
@@ -2870,22 +3162,25 @@
 				    &((ipfw_insn_ip6 *)cmd)->addr6);
 				break;
 			case O_IP6_SRC_MASK:
-				if (is_ipv6) {
-					ipfw_insn_ip6 *te = (ipfw_insn_ip6 *)cmd;
-					struct in6_addr p = args->f_id.src_ip6;
-
-					APPLY_MASK(&p, &te->mask6);
-					match = IN6_ARE_ADDR_EQUAL(&te->addr6, &p);
-				}
-				break;
-
 			case O_IP6_DST_MASK:
 				if (is_ipv6) {
-					ipfw_insn_ip6 *te = (ipfw_insn_ip6 *)cmd;
-					struct in6_addr p = args->f_id.dst_ip6;
-
-					APPLY_MASK(&p, &te->mask6);
-					match = IN6_ARE_ADDR_EQUAL(&te->addr6, &p);
+					int i = cmdlen - 1;
+					struct in6_addr p;
+					struct in6_addr *d =
+					    &((ipfw_insn_ip6 *)cmd)->addr6;
+
+					for (; !match && i > 0; d += 2,
+					    i -= F_INSN_SIZE(struct in6_addr)
+					    * 2) {
+						p = (cmd->opcode ==
+						    O_IP6_SRC_MASK) ?
+						    args->f_id.src_ip6:
+						    args->f_id.dst_ip6;
+						APPLY_MASK(&p, &d[1]);
+						match =
+						    IN6_ARE_ADDR_EQUAL(&d[0],
+						    &p);
+					}
 				}
 				break;
 
@@ -2917,6 +3212,62 @@
 				match = is_ipv4;
 				break;
 
+			case O_TAG: {
+				uint32_t tag = (cmd->arg1 == IP_FW_TABLEARG) ?
+				    tablearg : cmd->arg1;
+
+				/* Packet is already tagged with this tag? */
+				mtag = m_tag_locate(m, MTAG_IPFW, tag, NULL);
+
+				/* We have `untag' action when F_NOT flag is
+				 * present. And we must remove this mtag from
+				 * mbuf and reset `match' to zero (`match' will
+				 * be inversed later).
+				 * Otherwise we should allocate new mtag and
+				 * push it into mbuf.
+				 */
+				if (cmd->len & F_NOT) { /* `untag' action */
+					if (mtag != NULL)
+						m_tag_delete(m, mtag);
+				} else if (mtag == NULL) {
+					if ((mtag = m_tag_alloc(MTAG_IPFW,
+					    tag, 0, M_NOWAIT)) != NULL)
+						m_tag_prepend(m, mtag);
+				}
+				match = (cmd->len & F_NOT) ? 0: 1;
+				break;
+			}
+
+			case O_TAGGED: {
+				uint32_t tag = (cmd->arg1 == IP_FW_TABLEARG) ?
+				    tablearg : cmd->arg1;
+
+				if (cmdlen == 1) {
+					match = m_tag_locate(m, MTAG_IPFW,
+					    tag, NULL) != NULL;
+					break;
+				}
+
+				/* we have ranges */
+				for (mtag = m_tag_first(m);
+				    mtag != NULL && !match;
+				    mtag = m_tag_next(m, mtag)) {
+					uint16_t *p;
+					int i;
+
+					if (mtag->m_tag_cookie != MTAG_IPFW)
+						continue;
+
+					p = ((ipfw_insn_u16 *)cmd)->ports;
+					i = cmdlen - 1;
+					for(; !match && i > 0; i--, p += 2)
+						match =
+						    mtag->m_tag_id >= p[0] &&
+						    mtag->m_tag_id <= p[1];
+				}
+				break;
+			}
+				
 			/*
 			 * The second set of opcodes represents 'actions',
 			 * i.e. the terminal part of a rule once the packet
@@ -2936,7 +3287,7 @@
 			 *   or to the SKIPTO target ('goto again' after
 			 *   having set f, cmd and l), respectively.
 			 *
-			 * O_LOG and O_ALTQ action parameters:
+			 * O_TAG, O_LOG and O_ALTQ action parameters:
 			 *   perform some action and set match = 1;
 			 *
 			 * O_LIMIT and O_KEEP_STATE: these opcodes are
@@ -2961,7 +3312,7 @@
 			case O_LIMIT:
 			case O_KEEP_STATE:
 				if (install_state(f,
-				    (ipfw_insn_limit *)cmd, args)) {
+				    (ipfw_insn_limit *)cmd, args, tablearg)) {
 					retval = IP_FW_DENY;
 					goto done; /* error/limit violation */
 				}
@@ -3052,7 +3403,7 @@
 			case O_SKIPTO:
 				f->pcnt++;	/* update stats */
 				f->bcnt += pktlen;
-				f->timestamp = time_second;
+				f->timestamp = time_uptime;
 				if (cmd->opcode == O_COUNT)
 					goto next_rule;
 				/* handle skipto */
@@ -3072,20 +3423,21 @@
 				     is_icmp_query(ICMP(ulp))) &&
 				    !(m->m_flags & (M_BCAST|M_MCAST)) &&
 				    !IN_MULTICAST(ntohl(dst_ip.s_addr))) {
-					send_reject(args, cmd->arg1,
-					    offset,ip_len);
+					send_reject(args, cmd->arg1, ip_len, ip);
 					m = args->m;
 				}
 				/* FALLTHROUGH */
 #ifdef INET6
 			case O_UNREACH6:
 				if (hlen > 0 && is_ipv6 &&
+				    ((offset & IP6F_OFF_MASK) == 0) &&
 				    (proto != IPPROTO_ICMPV6 ||
 				     (is_icmp6_query(args->f_id.flags) == 1)) &&
 				    !(m->m_flags & (M_BCAST|M_MCAST)) &&
 				    !IN6_IS_ADDR_MULTICAST(&args->f_id.dst_ip6)) {
-					send_reject6(args, cmd->arg1,
-					    offset, hlen);
+					send_reject6(
+					    args, cmd->arg1, hlen,
+					    (struct ip6_hdr *)ip);
 					m = args->m;
 				}
 				/* FALLTHROUGH */
@@ -3094,14 +3446,26 @@
 				retval = IP_FW_DENY;
 				goto done;
 
-			case O_FORWARD_IP:
+			case O_FORWARD_IP: {
+				struct sockaddr_in *sa;
+				sa = &(((ipfw_insn_sa *)cmd)->sa);
 				if (args->eh)	/* not valid on layer2 pkts */
 					break;
-				if (!q || dyn_dir == MATCH_FORWARD)
-					args->next_hop =
-					    &((ipfw_insn_sa *)cmd)->sa;
+				if (!q || dyn_dir == MATCH_FORWARD) {
+					if (sa->sin_addr.s_addr == INADDR_ANY) {
+						bcopy(sa, &args->hopstore,
+							sizeof(*sa));
+						args->hopstore.sin_addr.s_addr =
+						    htonl(tablearg);
+						args->next_hop =
+						    &args->hopstore;
+					} else {
+						args->next_hop = sa;
+					}
+				}
 				retval = IP_FW_PASS;
-				goto done;
+			    }
+			    goto done;
 
 			case O_NETGRAPH:
 			case O_NGTEE:
@@ -3114,6 +3478,179 @@
 				    IP_FW_NETGRAPH : IP_FW_NGTEE;
 				goto done;
 
+#ifdef IPFIREWALL_NAT
+			case O_NAT: {
+				struct cfg_nat *t;
+				struct mbuf *mcl;
+				/* XXX - libalias duct tape */
+				int ldt; 
+				char *c;
+				
+				ldt = 0;
+				args->rule = f;	/* Report matching rule. */
+				retval = 0;
+				t = ((ipfw_insn_nat *)cmd)->nat;
+				if (t == NULL) {
+					t = lookup_nat(cmd->arg1);
+					if (t == NULL) {
+						retval = IP_FW_DENY;
+						goto done;
+					} else 
+						((ipfw_insn_nat *)cmd)->nat = 
+						    t;
+				}
+				if ((mcl = m_megapullup(m, m->m_pkthdr.len)) ==
+				    NULL)
+					goto badnat;
+				ip = mtod(mcl, struct ip *);
+				if (args->eh == NULL) {
+					ip->ip_len = htons(ip->ip_len);
+					ip->ip_off = htons(ip->ip_off);
+				}
+
+				/* 
+				 * XXX - Libalias checksum offload 'duct tape':
+				 * 
+				 * locally generated packets have only
+				 * pseudo-header checksum calculated
+				 * and libalias will screw it[1], so
+				 * mark them for later fix.  Moreover
+				 * there are cases when libalias
+				 * modify tcp packet data[2], mark it
+				 * for later fix too.
+				 *
+				 * [1] libalias was never meant to run
+				 * in kernel, so it doesn't have any
+				 * knowledge about checksum
+				 * offloading, and it expects a packet
+				 * with a full internet
+				 * checksum. Unfortunately, packets
+				 * generated locally will have just the
+				 * pseudo header calculated, and when
+				 * libalias tries to adjust the
+				 * checksum it will actually screw it.
+				 *
+				 * [2] when libalias modify tcp's data
+				 * content, full TCP checksum has to
+				 * be recomputed: the problem is that
+				 * libalias doesn't have any idea
+				 * about checksum offloading To
+				 * workaround this, we do not do
+				 * checksumming in LibAlias, but only
+				 * mark the packets in th_x2 field. If
+				 * we receive a marked packet, we
+				 * calculate correct checksum for it
+				 * aware of offloading.  Why such a
+				 * terrible hack instead of
+				 * recalculating checksum for each
+				 * packet?  Because the previous
+				 * checksum was not checked!
+				 * Recalculating checksums for EVERY
+				 * packet will hide ALL transmission
+				 * errors. Yes, marked packets still
+				 * suffer from this problem. But,
+				 * sigh, natd(8) has this problem,
+				 * too.
+				 *
+				 * TODO: -make libalias mbuf aware (so
+				 * it can handle delayed checksum and tso)
+				 */
+
+				if (mcl->m_pkthdr.rcvif == NULL && 
+				    mcl->m_pkthdr.csum_flags & 
+				    CSUM_DELAY_DATA)
+					ldt = 1;
+
+				c = mtod(mcl, char *);
+				if (oif == NULL)
+					retval = LibAliasIn(t->lib, c, 
+					    MCLBYTES);
+				else
+					retval = LibAliasOut(t->lib, c, 
+					    MCLBYTES);
+				if (retval != PKT_ALIAS_OK) {
+					/* XXX - should i add some logging? */
+					m_free(mcl);
+				badnat:
+					args->m = NULL;
+					retval = IP_FW_DENY;
+					goto done;
+				}
+				mcl->m_pkthdr.len = mcl->m_len = 
+				    ntohs(ip->ip_len);
+
+				/* 
+				 * XXX - libalias checksum offload 
+				 * 'duct tape' (see above) 
+				 */
+
+				if ((ip->ip_off & htons(IP_OFFMASK)) == 0 && 
+				    ip->ip_p == IPPROTO_TCP) {
+					struct tcphdr 	*th; 
+
+					th = (struct tcphdr *)(ip + 1);
+					if (th->th_x2) 
+						ldt = 1;
+				}
+
+				if (ldt) {
+					struct tcphdr 	*th;
+					struct udphdr 	*uh;
+					u_short cksum;
+
+					ip->ip_len = ntohs(ip->ip_len);
+					cksum = in_pseudo(
+						ip->ip_src.s_addr,
+						ip->ip_dst.s_addr, 
+						htons(ip->ip_p + ip->ip_len - 
+					            (ip->ip_hl << 2))
+						);
+					
+					switch (ip->ip_p) {
+					case IPPROTO_TCP:
+						th = (struct tcphdr *)(ip + 1);
+						/* 
+						 * Maybe it was set in 
+						 * libalias... 
+						 */
+						th->th_x2 = 0;
+						th->th_sum = cksum;
+						mcl->m_pkthdr.csum_data = 
+						    offsetof(struct tcphdr,
+						    th_sum);
+						break;
+					case IPPROTO_UDP:
+						uh = (struct udphdr *)(ip + 1);
+						uh->uh_sum = cksum;
+						mcl->m_pkthdr.csum_data = 
+						    offsetof(struct udphdr,
+						    uh_sum);
+						break;						
+					}
+					/* 
+					 * No hw checksum offloading: do it 
+					 * by ourself. 
+					 */
+					if ((mcl->m_pkthdr.csum_flags & 
+					     CSUM_DELAY_DATA) == 0) {
+						in_delayed_cksum(mcl);
+						mcl->m_pkthdr.csum_flags &= 
+						    ~CSUM_DELAY_DATA;
+					}
+					ip->ip_len = htons(ip->ip_len);
+				}
+
+				if (args->eh == NULL) {
+					ip->ip_len = ntohs(ip->ip_len);
+					ip->ip_off = ntohs(ip->ip_off);
+				}
+
+				args->m = mcl;
+				retval = IP_FW_NAT; 
+				goto done;
+			}
+#endif
+
 			default:
 				panic("-- unknown opcode %d\n", cmd->opcode);
 			} /* end of switch() on opcodes */
@@ -3142,7 +3679,7 @@
 	/* Update statistics */
 	f->pcnt++;
 	f->bcnt += pktlen;
-	f->timestamp = time_second;
+	f->timestamp = time_uptime;
 	IPFW_RUNLOCK(chain);
 	return (retval);
 
@@ -3168,34 +3705,6 @@
 }
 
 /*
- * When pipes/queues are deleted, clear the "pipe_ptr" pointer to a given
- * pipe/queue, or to all of them (match == NULL).
- */
-void
-flush_pipe_ptrs(struct dn_flow_set *match)
-{
-	struct ip_fw *rule;
-
-	IPFW_WLOCK(&layer3_chain);
-	for (rule = layer3_chain.rules; rule; rule = rule->next) {
-		ipfw_insn_pipe *cmd = (ipfw_insn_pipe *)ACTION_PTR(rule);
-
-		if (cmd->o.opcode != O_PIPE && cmd->o.opcode != O_QUEUE)
-			continue;
-		/*
-		 * XXX Use bcmp/bzero to handle pipe_ptr to overcome
-		 * possible alignment problems on 64-bit architectures.
-		 * This code is seldom used so we do not worry too
-		 * much about efficiency.
-		 */
-		if (match == NULL ||
-		    !bcmp(&cmd->pipe_ptr, &match, sizeof(match)) )
-			bzero(&cmd->pipe_ptr, sizeof(cmd->pipe_ptr));
-	}
-	IPFW_WUNLOCK(&layer3_chain);
-}
-
-/*
  * Add a new rule to the list. Copy the rule into a malloc'ed area, then
  * possibly create a rule number and add the rule to the list.
  * Update the rule_number in the input struct so the caller knows it as well.
@@ -3285,7 +3794,8 @@
  * Arguments are not checked, so they better be correct.
  */
 static struct ip_fw *
-remove_rule(struct ip_fw_chain *chain, struct ip_fw *rule, struct ip_fw *prev)
+remove_rule(struct ip_fw_chain *chain, struct ip_fw *rule,
+    struct ip_fw *prev)
 {
 	struct ip_fw *n;
 	int l = RULESIZE(rule);
@@ -3360,6 +3870,7 @@
  *	2	move rules with given number to new set
  *	3	move rules with given set number to new set
  *	4	swap sets with given numbers
+ *	5	delete rules with given number and with given set number
  */
 static int
 del_entry(struct ip_fw_chain *chain, u_int32_t arg)
@@ -3372,11 +3883,9 @@
 	cmd = (arg >> 24) & 0xff;
 	new_set = (arg >> 16) & 0xff;
 
-	if (cmd > 4)
+	if (cmd > 5 || new_set > RESVD_SET)
 		return EINVAL;
-	if (new_set > RESVD_SET)
-		return EINVAL;
-	if (cmd == 0 || cmd == 2) {
+	if (cmd == 0 || cmd == 2 || cmd == 5) {
 		if (rulenum >= IPFW_DEFAULT_RULE)
 			return EINVAL;
 	} else {
@@ -3440,6 +3949,25 @@
 			else if (rule->set == new_set)
 				rule->set = rulenum;
 		break;
+	case 5: /* delete rules with given number and with given set number.
+		 * rulenum - given rule number;
+		 * new_set - given set number.
+		 */
+		for (; rule->rulenum < rulenum; prev = rule, rule = rule->next)
+			;
+		if (rule->rulenum != rulenum) {
+			IPFW_WUNLOCK(chain);
+			return (EINVAL);
+		}
+		flush_rule_ptrs(chain);
+		while (rule->rulenum == rulenum) {
+			if (rule->set == new_set)
+				rule = remove_rule(chain, rule, prev);
+			else {
+				prev = rule;
+				rule = rule->next;
+			}
+		}
 	}
 	/*
 	 * Look for rules to reclaim.  We grab the list before
@@ -3473,23 +4001,39 @@
 
 /**
  * Reset some or all counters on firewall rules.
- * @arg frwl is null to clear all entries, or contains a specific
- * rule number.
- * @arg log_only is 1 if we only want to reset logs, zero otherwise.
+ * The argument `arg' is an u_int32_t. The low 16 bit are the rule number,
+ * the next 8 bits are the set number, the top 8 bits are the command:
+ *	0	work with rules from all set's;
+ *	1	work with rules only from specified set.
+ * Specified rule number is zero if we want to clear all entries.
+ * log_only is 1 if we only want to reset logs, zero otherwise.
  */
 static int
-zero_entry(struct ip_fw_chain *chain, int rulenum, int log_only)
+zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only)
 {
 	struct ip_fw *rule;
 	char *msg;
 
+	uint16_t rulenum = arg & 0xffff;
+	uint8_t set = (arg >> 16) & 0xff;
+	uint8_t cmd = (arg >> 24) & 0xff;
+
+	if (cmd > 1)
+		return (EINVAL);
+	if (cmd == 1 && set > RESVD_SET)
+		return (EINVAL);
+
 	IPFW_WLOCK(chain);
 	if (rulenum == 0) {
 		norule_counter = 0;
-		for (rule = chain->rules; rule; rule = rule->next)
+		for (rule = chain->rules; rule; rule = rule->next) {
+			/* Skip rules from another set. */
+			if (cmd == 1 && rule->set != set)
+				continue;
 			clear_counters(rule, log_only);
+		}
 		msg = log_only ? "ipfw: All logging counts reset.\n" :
-				"ipfw: Accounting cleared.\n";
+		    "ipfw: Accounting cleared.\n";
 	} else {
 		int cleared = 0;
 		/*
@@ -3499,7 +4043,8 @@
 		for (rule = chain->rules; rule; rule = rule->next)
 			if (rule->rulenum == rulenum) {
 				while (rule && rule->rulenum == rulenum) {
-					clear_counters(rule, log_only);
+					if (cmd == 0 || rule->set == set)
+						clear_counters(rule, log_only);
 					rule = rule->next;
 				}
 				cleared = 1;
@@ -3510,7 +4055,7 @@
 			return (EINVAL);
 		}
 		msg = log_only ? "ipfw: Entry %d logging count reset.\n" :
-				"ipfw: Entry %d cleared.\n";
+		    "ipfw: Entry %d cleared.\n";
 	}
 	IPFW_WUNLOCK(chain);
 
@@ -3587,6 +4132,7 @@
 		case O_IP6:
 #endif
 		case O_IP4:
+		case O_TAG:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn))
 				goto bad_size;
 			break;
@@ -3659,6 +4205,7 @@
 		case O_IPTTL:
 		case O_IPLEN:
 		case O_TCPDATALEN:
+		case O_TAGGED:
 			if (cmdlen < 1 || cmdlen > 31)
 				goto bad_size;
 			break;
@@ -3684,7 +4231,7 @@
 
 		case O_PIPE:
 		case O_QUEUE:
-			if (cmdlen != F_INSN_SIZE(ipfw_insn_pipe))
+			if (cmdlen != F_INSN_SIZE(ipfw_insn))
 				goto bad_size;
 			goto check_action;
 
@@ -3709,6 +4256,14 @@
 				return EINVAL;
 			else
 				goto check_size;
+		case O_NAT:
+#ifdef IPFIREWALL_NAT
+			if (cmdlen != F_INSN_SIZE(ipfw_insn_nat))
+ 				goto bad_size;		
+ 			goto check_action;
+#else
+			return EINVAL;
+#endif
 		case O_FORWARD_MAC: /* XXX not implemented yet */
 		case O_CHECK_STATE:
 		case O_COUNT:
@@ -3809,7 +4364,9 @@
 	char *ep = bp + space;
 	struct ip_fw *rule;
 	int i;
+	time_t	boot_seconds;
 
+        boot_seconds = boottime.tv_sec;
 	/* XXX this can take a long time and locking will block packet flow */
 	IPFW_RLOCK(chain);
 	for (rule = chain->rules; rule ; rule = rule->next) {
@@ -3822,8 +4379,15 @@
 		i = RULESIZE(rule);
 		if (bp + i <= ep) {
 			bcopy(rule, bp, i);
+			/*
+			 * XXX HACK. Store the disable mask in the "next" pointer
+			 * in a wild attempt to keep the ABI the same.
+			 * Why do we do this on EVERY rule?
+			 */
 			bcopy(&set_disable, &(((struct ip_fw *)bp)->next_rule),
 			    sizeof(set_disable));
+			if (((struct ip_fw *)bp)->timestamp)
+				((struct ip_fw *)bp)->timestamp += boot_seconds;
 			bp += i;
 		}
 	}
@@ -3841,6 +4405,14 @@
 					bcopy(&(p->rule->rulenum), &(dst->rule),
 					    sizeof(p->rule->rulenum));
 					/*
+					 * store set number into high word of
+					 * dst->rule pointer.
+					 */
+					bcopy(&(p->rule->set),
+					    (char *)&dst->rule +
+					    sizeof(p->rule->rulenum),
+					    sizeof(p->rule->set));
+					/*
 					 * store a non-null value in "next".
 					 * The userland code will interpret a
 					 * NULL here as a marker
@@ -3849,8 +4421,8 @@
 					bcopy(&dst, &dst->next, sizeof(dst));
 					last = dst;
 					dst->expire =
-					    TIME_LEQ(dst->expire, time_second) ?
-						0 : dst->expire - time_second ;
+					    TIME_LEQ(dst->expire, time_uptime) ?
+						0 : dst->expire - time_uptime ;
 					bp += sizeof(ipfw_dyn_rule);
 				}
 			}
@@ -3869,12 +4441,12 @@
 ipfw_ctl(struct sockopt *sopt)
 {
 #define	RULE_MAXSIZE	(256*sizeof(u_int32_t))
-	int error, rule_num;
+	int error;
 	size_t size;
 	struct ip_fw *buf, *rule;
 	u_int32_t rulenum[2];
 
-	error = suser(sopt->sopt_td);
+	error = priv_check(sopt->sopt_td, PRIV_NETINET_IPFW);
 	if (error)
 		return (error);
 
@@ -3884,14 +4456,9 @@
 	 */
 	if (sopt->sopt_name == IP_FW_ADD ||
 	    (sopt->sopt_dir == SOPT_SET && sopt->sopt_name != IP_FW_RESETLOG)) {
-#if __FreeBSD_version >= 500034
 		error = securelevel_ge(sopt->sopt_td->td_ucred, 3);
 		if (error)
 			return (error);
-#else /* FreeBSD 4.x */
-		if (securelevel >= 3)
-			return (EPERM);
-#endif
 	}
 
 	error = 0;
@@ -3941,9 +4508,10 @@
 		IPFW_WLOCK(&layer3_chain);
 		layer3_chain.reap = NULL;
 		free_chain(&layer3_chain, 0 /* keep default rule */);
-		rule = layer3_chain.reap, layer3_chain.reap = NULL;
+		rule = layer3_chain.reap;
+		layer3_chain.reap = NULL;
 		IPFW_WUNLOCK(&layer3_chain);
-		if (layer3_chain.reap != NULL)
+		if (rule != NULL)
 			reap_rules(rule);
 		break;
 
@@ -3991,15 +4559,15 @@
 		break;
 
 	case IP_FW_ZERO:
-	case IP_FW_RESETLOG: /* argument is an int, the rule number */
-		rule_num = 0;
+	case IP_FW_RESETLOG: /* argument is an u_int_32, the rule number */
+		rulenum[0] = 0;
 		if (sopt->sopt_val != 0) {
-		    error = sooptcopyin(sopt, &rule_num,
-			    sizeof(int), sizeof(int));
+		    error = sooptcopyin(sopt, rulenum,
+			    sizeof(u_int32_t), sizeof(u_int32_t));
 		    if (error)
 			break;
 		}
-		error = zero_entry(&layer3_chain, rule_num,
+		error = zero_entry(&layer3_chain, rulenum[0],
 			sopt->sopt_name == IP_FW_RESETLOG);
 		break;
 
@@ -4069,10 +4637,6 @@
 			}
 			size = sopt->sopt_valsize;
 			tbl = malloc(size, M_TEMP, M_WAITOK);
-			if (tbl == NULL) {
-				error = ENOMEM;
-				break;
-			}
 			error = sooptcopyin(sopt, tbl, size, sizeof(*tbl));
 			if (error) {
 				free(tbl, M_TEMP);
@@ -4092,6 +4656,187 @@
 		}
 		break;
 
+#ifdef IPFIREWALL_NAT
+	case IP_FW_NAT_CFG:
+	{
+		struct cfg_nat *ptr, *ser_n;
+		char *buf;
+
+		buf = malloc(NAT_BUF_LEN, M_IPFW, M_WAITOK | M_ZERO);
+		error = sooptcopyin(sopt, buf, NAT_BUF_LEN, 
+		    sizeof(struct cfg_nat));
+		ser_n = (struct cfg_nat *)buf;
+
+		/* 
+		 * Find/create nat rule.
+		 */
+		IPFW_WLOCK(&layer3_chain);
+		ptr = lookup_nat(ser_n->id);		
+		if (ptr == NULL) {
+			/* New rule: allocate and init new instance. */
+			ptr = malloc(sizeof(struct cfg_nat), 
+		            M_IPFW, M_NOWAIT | M_ZERO);
+			if (ptr == NULL) {
+				IPFW_WUNLOCK(&layer3_chain);				
+				free(buf, M_IPFW);
+				return (ENOSPC);				
+			}
+			ptr->lib = LibAliasInit(NULL);
+			if (ptr->lib == NULL) {
+				IPFW_WUNLOCK(&layer3_chain);
+				free(ptr, M_IPFW);
+				free(buf, M_IPFW);		
+				return (EINVAL);
+			}
+			LIST_INIT(&ptr->redir_chain);
+		} else {
+			/* Entry already present: temporarly unhook it. */
+			UNHOOK_NAT(ptr);
+			flush_nat_ptrs(ser_n->id);
+		}
+		IPFW_WUNLOCK(&layer3_chain);
+
+		/* 
+		 * Basic nat configuration.
+		 */
+		ptr->id = ser_n->id;
+		/* 
+		 * XXX - what if this rule doesn't nat any ip and just 
+		 * redirect? 
+		 * do we set aliasaddress to 0.0.0.0?
+		 */
+		ptr->ip = ser_n->ip;
+		ptr->redir_cnt = ser_n->redir_cnt;
+		ptr->mode = ser_n->mode;
+		LibAliasSetMode(ptr->lib, ser_n->mode, ser_n->mode);
+		LibAliasSetAddress(ptr->lib, ptr->ip);
+		memcpy(ptr->if_name, ser_n->if_name, IF_NAMESIZE);
+
+		/* 
+		 * Redir and LSNAT configuration.
+		 */
+		/* Delete old cfgs. */
+		del_redir_spool_cfg(ptr, &ptr->redir_chain);
+		/* Add new entries. */
+		add_redir_spool_cfg(&buf[(sizeof(struct cfg_nat))], ptr);
+		free(buf, M_IPFW);
+		IPFW_WLOCK(&layer3_chain);
+		HOOK_NAT(&layer3_chain.nat, ptr);
+		IPFW_WUNLOCK(&layer3_chain);
+	}
+	break;
+
+	case IP_FW_NAT_DEL:
+	{
+		struct cfg_nat *ptr;
+		int i;
+		
+		error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
+		IPFW_WLOCK(&layer3_chain);
+		ptr = lookup_nat(i);
+		if (ptr == NULL) {
+			error = EINVAL;
+			IPFW_WUNLOCK(&layer3_chain);
+			break;
+		}
+		UNHOOK_NAT(ptr);
+		flush_nat_ptrs(i);
+		IPFW_WUNLOCK(&layer3_chain);
+		del_redir_spool_cfg(ptr, &ptr->redir_chain);
+		LibAliasUninit(ptr->lib);
+		free(ptr, M_IPFW);
+	}
+	break;
+
+	case IP_FW_NAT_GET_CONFIG:
+	{
+		uint8_t *data;
+		struct cfg_nat *n;
+		struct cfg_redir *r;
+		struct cfg_spool *s;
+		int nat_cnt, off;
+		
+		nat_cnt = 0;
+		off = sizeof(nat_cnt);
+
+		data = malloc(NAT_BUF_LEN, M_IPFW, M_WAITOK | M_ZERO);
+		IPFW_RLOCK(&layer3_chain);
+		/* Serialize all the data. */
+		LIST_FOREACH(n, &layer3_chain.nat, _next) {
+			nat_cnt++;
+			if (off + SOF_NAT < NAT_BUF_LEN) {
+				bcopy(n, &data[off], SOF_NAT);
+				off += SOF_NAT;
+				LIST_FOREACH(r, &n->redir_chain, _next) {
+					if (off + SOF_REDIR < NAT_BUF_LEN) {
+						bcopy(r, &data[off], 
+						    SOF_REDIR);
+						off += SOF_REDIR;
+						LIST_FOREACH(s, &r->spool_chain, 
+						    _next) {							     
+							if (off + SOF_SPOOL < 
+							    NAT_BUF_LEN) {
+								bcopy(s, 
+								    &data[off],
+								    SOF_SPOOL);
+								off += 
+								    SOF_SPOOL;
+							} else
+								goto nospace;
+						}
+					} else
+						goto nospace;
+				}
+			} else
+				goto nospace;
+		}
+		bcopy(&nat_cnt, data, sizeof(nat_cnt));
+		IPFW_RUNLOCK(&layer3_chain);
+		error = sooptcopyout(sopt, data, NAT_BUF_LEN);
+		free(data, M_IPFW);
+		break;
+	nospace:
+		IPFW_RUNLOCK(&layer3_chain);
+		printf("serialized data buffer not big enough:"
+		    "please increase NAT_BUF_LEN\n");
+		free(data, M_IPFW);
+	}
+	break;
+
+	case IP_FW_NAT_GET_LOG:
+	{
+		uint8_t *data;
+		struct cfg_nat *ptr;
+		int i, size, cnt, sof;
+
+		data = NULL;
+		sof = LIBALIAS_BUF_SIZE;
+		cnt = 0;
+
+		IPFW_RLOCK(&layer3_chain);
+		size = i = 0;
+		LIST_FOREACH(ptr, &layer3_chain.nat, _next) {
+			if (ptr->lib->logDesc == NULL) 
+				continue;
+			cnt++;
+			size = cnt * (sof + sizeof(int));
+			data = realloc(data, size, M_IPFW, M_NOWAIT | M_ZERO);
+			if (data == NULL) {
+				IPFW_RUNLOCK(&layer3_chain);
+				return (ENOSPC);
+			}
+			bcopy(&ptr->id, &data[i], sizeof(int));
+			i += sizeof(int);
+			bcopy(ptr->lib->logDesc, &data[i], sof);
+			i += sof;
+		}
+		IPFW_RUNLOCK(&layer3_chain);
+		error = sooptcopyout(sopt, data, size);
+		free(data, M_IPFW);
+	}
+	break;
+#endif
+
 	default:
 		printf("ipfw: ipfw_ctl invalid option %d\n", sopt->sopt_name);
 		error = EINVAL;
@@ -4141,17 +4886,17 @@
 				continue;
 			if ( (q->state & BOTH_SYN) != BOTH_SYN)
 				continue;
-			if (TIME_LEQ( time_second+dyn_keepalive_interval,
+			if (TIME_LEQ( time_uptime+dyn_keepalive_interval,
 			    q->expire))
 				continue;	/* too early */
-			if (TIME_LEQ(q->expire, time_second))
+			if (TIME_LEQ(q->expire, time_uptime))
 				continue;	/* too late, rule expired */
 
-			*mtailp = send_pkt(&(q->id), q->ack_rev - 1,
+			*mtailp = send_pkt(NULL, &(q->id), q->ack_rev - 1,
 				q->ack_fwd, TH_SYN);
 			if (*mtailp != NULL)
 				mtailp = &(*mtailp)->m_nextpkt;
-			*mtailp = send_pkt(&(q->id), q->ack_fwd - 1,
+			*mtailp = send_pkt(NULL, &(q->id), q->ack_fwd - 1,
 				q->ack_rev, 0);
 			if (*mtailp != NULL)
 				mtailp = &(*mtailp)->m_nextpkt;
@@ -4177,24 +4922,24 @@
 	/* Setup IPv6 fw sysctl tree. */
 	sysctl_ctx_init(&ip6_fw_sysctl_ctx);
 	ip6_fw_sysctl_tree = SYSCTL_ADD_NODE(&ip6_fw_sysctl_ctx,
-		SYSCTL_STATIC_CHILDREN(_net_inet6_ip6), OID_AUTO, "fw",
-		CTLFLAG_RW | CTLFLAG_SECURE, 0, "Firewall");
+	    SYSCTL_STATIC_CHILDREN(_net_inet6_ip6), OID_AUTO, "fw",
+	    CTLFLAG_RW | CTLFLAG_SECURE, 0, "Firewall");
+	SYSCTL_ADD_PROC(&ip6_fw_sysctl_ctx, SYSCTL_CHILDREN(ip6_fw_sysctl_tree),
+	    OID_AUTO, "enable", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3,
+	    &fw6_enable, 0, ipfw_chg_hook, "I", "Enable ipfw+6");
 	SYSCTL_ADD_INT(&ip6_fw_sysctl_ctx, SYSCTL_CHILDREN(ip6_fw_sysctl_tree),
-		OID_AUTO, "deny_unknown_exthdrs", CTLFLAG_RW | CTLFLAG_SECURE,
-		&fw_deny_unknown_exthdrs, 0,
-		"Deny packets with unknown IPv6 Extension Headers");
+	    OID_AUTO, "deny_unknown_exthdrs", CTLFLAG_RW | CTLFLAG_SECURE,
+	    &fw_deny_unknown_exthdrs, 0,
+	    "Deny packets with unknown IPv6 Extension Headers");
 #endif
 
 	layer3_chain.rules = NULL;
-	layer3_chain.want_write = 0;
-	layer3_chain.busy_count = 0;
-	cv_init(&layer3_chain.cv, "Condition variable for IPFW rw locks");
 	IPFW_LOCK_INIT(&layer3_chain);
-	ipfw_dyn_rule_zone = uma_zcreate("IPFW dynamic rule zone",
+	ipfw_dyn_rule_zone = uma_zcreate("IPFW dynamic rule",
 	    sizeof(ipfw_dyn_rule), NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, 0);
 	IPFW_DYN_LOCK_INIT();
-	callout_init(&ipfw_timeout, NET_CALLOUT_MPSAFE);
+	callout_init(&ipfw_timeout, CALLOUT_MPSAFE);
 
 	bzero(&default_rule, sizeof default_rule);
 
@@ -4221,7 +4966,11 @@
 	}
 
 	ip_fw_default_rule = layer3_chain.rules;
-	printf("ipfw2 (+ipv6) initialized, divert %s, "
+	printf("ipfw2 "
+#ifdef INET6
+		"(+ipv6) "
+#endif
+		"initialized, divert %s, "
 		"rule-based forwarding "
 #ifdef IPFIREWALL_FORWARD
 		"enabled, "
@@ -4259,8 +5008,12 @@
 	}
 	ip_fw_ctl_ptr = ipfw_ctl;
 	ip_fw_chk_ptr = ipfw_chk;
-	callout_reset(&ipfw_timeout, hz, ipfw_tick, NULL);
-
+	callout_reset(&ipfw_timeout, hz, ipfw_tick, NULL);	
+#ifdef IPFIREWALL_NAT
+	LIST_INIT(&layer3_chain.nat);
+	ifaddr_event_tag = EVENTHANDLER_REGISTER(ifaddr_event, ifaddr_change, 
+	    NULL, EVENTHANDLER_PRI_ANY);
+#endif
 	return (0);
 }
 
@@ -4268,12 +5021,24 @@
 ipfw_destroy(void)
 {
 	struct ip_fw *reap;
+#ifdef IPFIREWALL_NAT
+	struct cfg_nat *ptr, *ptr_temp;
+#endif
 
 	ip_fw_chk_ptr = NULL;
 	ip_fw_ctl_ptr = NULL;
 	callout_drain(&ipfw_timeout);
 	IPFW_WLOCK(&layer3_chain);
 	flush_tables(&layer3_chain);
+#ifdef IPFIREWALL_NAT
+	LIST_FOREACH_SAFE(ptr, &layer3_chain.nat, _next, ptr_temp) {
+		LIST_REMOVE(ptr, _next);
+		del_redir_spool_cfg(ptr, &ptr->redir_chain);
+		LibAliasUninit(ptr->lib);
+		free(ptr, M_IPFW);
+	}
+	EVENTHANDLER_DEREGISTER(ifaddr_event, ifaddr_event_tag);
+#endif
 	layer3_chain.reap = NULL;
 	free_chain(&layer3_chain, 1 /* kill default rule */);
 	reap = layer3_chain.reap, layer3_chain.reap = NULL;
@@ -4282,6 +5047,8 @@
 		reap_rules(reap);
 	IPFW_DYN_LOCK_DESTROY();
 	uma_zdestroy(ipfw_dyn_rule_zone);
+	if (ipfw_dyn_v != NULL)
+		free(ipfw_dyn_v, M_IPFW);
 	IPFW_LOCK_DESTROY(&layer3_chain);
 
 #ifdef INET6
Index: tcp_fsm.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_fsm.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/tcp_fsm.h -L sys/netinet/tcp_fsm.h -u -r1.1.1.1 -r1.2
--- sys/netinet/tcp_fsm.h
+++ sys/netinet/tcp_fsm.h
@@ -1,6 +1,7 @@
 /*-
  * Copyright (c) 1982, 1986, 1993
- *	The Regents of the University of California.  All rights reserved.
+ *	The Regents of the University of California.
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -27,14 +28,15 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp_fsm.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/tcp_fsm.h,v 1.18 2005/01/07 01:45:45 imp Exp $
+ * $FreeBSD: src/sys/netinet/tcp_fsm.h,v 1.20 2007/07/30 11:06:41 des Exp $
  */
 
 #ifndef _NETINET_TCP_FSM_H_
-#define _NETINET_TCP_FSM_H_
+#define	_NETINET_TCP_FSM_H_
 
 /*
  * TCP FSM state definitions.
+ *
  * Per RFC793, September, 1981.
  */
 
@@ -75,10 +77,10 @@
 
 #ifdef	TCPOUTFLAGS
 /*
- * Flags used when sending segments in tcp_output.
- * Basic flags (TH_RST,TH_ACK,TH_SYN,TH_FIN) are totally
- * determined by state, with the proviso that TH_FIN is sent only
- * if all data queued for output is included in the segment.
+ * Flags used when sending segments in tcp_output.  Basic flags (TH_RST,
+ * TH_ACK,TH_SYN,TH_FIN) are totally determined by state, with the proviso
+ * that TH_FIN is sent only if all data queued for output is included in the
+ * segment.
  */
 static u_char	tcp_outflags[TCP_NSTATES] = {
 	TH_RST|TH_ACK,		/* 0, CLOSED */
@@ -100,7 +102,7 @@
 #endif
 
 #ifdef	TCPSTATES
-const char *tcpstates[] = {
+static char const * const tcpstates[] = {
 	"CLOSED",	"LISTEN",	"SYN_SENT",	"SYN_RCVD",
 	"ESTABLISHED",	"CLOSE_WAIT",	"FIN_WAIT_1",	"CLOSING",
 	"LAST_ACK",	"FIN_WAIT_2",	"TIME_WAIT",
--- /dev/null
+++ sys/netinet/sctp_sysctl.h
@@ -0,0 +1,464 @@
+/*-
+ * Copyright (c) 2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_sysctl.h,v 1.13 2007/09/15 19:07:42 rrs Exp $");
+
+#ifndef __sctp_sysctl_h__
+#define __sctp_sysctl_h__
+
+#include <netinet/sctp_os.h>
+#include <netinet/sctp_constants.h>
+
+/*
+ * limits for the sysctl variables
+ */
+/* maxdgram: Maximum outgoing SCTP buffer size */
+#define SCTPCTL_MAXDGRAM_DESC		"Maximum outgoing SCTP buffer size"
+#define SCTPCTL_MAXDGRAM_MIN		0
+#define SCTPCTL_MAXDGRAM_MAX		0xFFFFFFFF
+#define SCTPCTL_MAXDGRAM_DEFAULT	262144	/* 256k */
+
+/* recvspace: Maximum incoming SCTP buffer size */
+#define SCTPCTL_RECVSPACE_DESC		"Maximum incoming SCTP buffer size"
+#define SCTPCTL_RECVSPACE_MIN		0
+#define SCTPCTL_RECVSPACE_MAX		0xFFFFFFFF
+#define SCTPCTL_RECVSPACE_DEFAULT	262144	/* 256k */
+
+/* autoasconf: Enable SCTP Auto-ASCONF */
+#define SCTPCTL_AUTOASCONF_DESC		"Enable SCTP Auto-ASCONF"
+#define SCTPCTL_AUTOASCONF_MIN		0
+#define SCTPCTL_AUTOASCONF_MAX		1
+#define SCTPCTL_AUTOASCONF_DEFAULT	SCTP_DEFAULT_AUTO_ASCONF
+
+/* ecn_enable: Enable SCTP ECN */
+#define SCTPCTL_ECN_ENABLE_DESC		"Enable SCTP ECN"
+#define SCTPCTL_ECN_ENABLE_MIN		0
+#define SCTPCTL_ECN_ENABLE_MAX		1
+#define SCTPCTL_ECN_ENABLE_DEFAULT	1
+
+/* ecn_nonce: Enable SCTP ECN Nonce */
+#define SCTPCTL_ECN_NONCE_DESC		"Enable SCTP ECN Nonce"
+#define SCTPCTL_ECN_NONCE_MIN		0
+#define SCTPCTL_ECN_NONCE_MAX		1
+#define SCTPCTL_ECN_NONCE_DEFAULT	0
+
+/* strict_sacks: Enable SCTP Strict SACK checking */
+#define SCTPCTL_STRICT_SACKS_DESC	"Enable SCTP Strict SACK checking"
+#define SCTPCTL_STRICT_SACKS_MIN	0
+#define SCTPCTL_STRICT_SACKS_MAX	1
+#define SCTPCTL_STRICT_SACKS_DEFAULT	0
+
+/* loopback_nocsum: Enable NO Csum on packets sent on loopback */
+#define SCTPCTL_LOOPBACK_NOCSUM_DESC	"Enable NO Csum on packets sent on loopback"
+#define SCTPCTL_LOOPBACK_NOCSUM_MIN	0
+#define SCTPCTL_LOOPBACK_NOCSUM_MAX	1
+#define SCTPCTL_LOOPBACK_NOCSUM_DEFAULT	1
+
+/* strict_init: Enable strict INIT/INIT-ACK singleton enforcement */
+#define SCTPCTL_STRICT_INIT_DESC	"Enable strict INIT/INIT-ACK singleton enforcement"
+#define SCTPCTL_STRICT_INIT_MIN		0
+#define SCTPCTL_STRICT_INIT_MAX		1
+#define SCTPCTL_STRICT_INIT_DEFAULT	1
+
+/* peer_chkoh: Amount to debit peers rwnd per chunk sent */
+#define SCTPCTL_PEER_CHKOH_DESC		"Amount to debit peers rwnd per chunk sent"
+#define SCTPCTL_PEER_CHKOH_MIN		0
+#define SCTPCTL_PEER_CHKOH_MAX		0xFFFFFFFF
+#define SCTPCTL_PEER_CHKOH_DEFAULT	256
+
+/* maxburst: Default max burst for sctp endpoints */
+#define SCTPCTL_MAXBURST_DESC		"Default max burst for sctp endpoints"
+#define SCTPCTL_MAXBURST_MIN		1
+#define SCTPCTL_MAXBURST_MAX		0xFFFFFFFF
+#define SCTPCTL_MAXBURST_DEFAULT	SCTP_DEF_MAX_BURST
+
+/* maxchunks: Default max chunks on queue per asoc */
+#define SCTPCTL_MAXCHUNKS_DESC		"Default max chunks on queue per asoc"
+#define SCTPCTL_MAXCHUNKS_MIN		0
+#define SCTPCTL_MAXCHUNKS_MAX		0xFFFFFFFF
+#define SCTPCTL_MAXCHUNKS_DEFAULT	SCTP_ASOC_MAX_CHUNKS_ON_QUEUE
+
+/* tcbhashsize: Tuneable for Hash table sizes */
+#define SCTPCTL_TCBHASHSIZE_DESC	"Tunable for TCB hash table sizes"
+#define SCTPCTL_TCBHASHSIZE_MIN		1
+#define SCTPCTL_TCBHASHSIZE_MAX		0xFFFFFFFF
+#define SCTPCTL_TCBHASHSIZE_DEFAULT	SCTP_TCBHASHSIZE
+
+/* pcbhashsize: Tuneable for PCB Hash table sizes */
+#define SCTPCTL_PCBHASHSIZE_DESC	"Tunable for PCB hash table sizes"
+#define SCTPCTL_PCBHASHSIZE_MIN		1
+#define SCTPCTL_PCBHASHSIZE_MAX		0xFFFFFFFF
+#define SCTPCTL_PCBHASHSIZE_DEFAULT	SCTP_PCBHASHSIZE
+
+/* min_split_point: Minimum size when splitting a chunk */
+#define SCTPCTL_MIN_SPLIT_POINT_DESC	"Minimum size when splitting a chunk"
+#define SCTPCTL_MIN_SPLIT_POINT_MIN	0
+#define SCTPCTL_MIN_SPLIT_POINT_MAX	0xFFFFFFFF
+#define SCTPCTL_MIN_SPLIT_POINT_DEFAULT	SCTP_DEFAULT_SPLIT_POINT_MIN
+
+/* chunkscale: Tuneable for Scaling of number of chunks and messages */
+#define SCTPCTL_CHUNKSCALE_DESC		"Tuneable for Scaling of number of chunks and messages"
+#define SCTPCTL_CHUNKSCALE_MIN		1
+#define SCTPCTL_CHUNKSCALE_MAX		0xFFFFFFFF
+#define SCTPCTL_CHUNKSCALE_DEFAULT	SCTP_CHUNKQUEUE_SCALE
+
+/* delayed_sack_time: Default delayed SACK timer in msec */
+#define SCTPCTL_DELAYED_SACK_TIME_DESC	"Default delayed SACK timer in msec"
+#define SCTPCTL_DELAYED_SACK_TIME_MIN	0
+#define SCTPCTL_DELAYED_SACK_TIME_MAX	0xFFFFFFFF
+#define SCTPCTL_DELAYED_SACK_TIME_DEFAULT	SCTP_RECV_MSEC
+
+/* sack_freq: Default SACK frequency */
+#define SCTPCTL_SACK_FREQ_DESC		"Default SACK frequency"
+#define SCTPCTL_SACK_FREQ_MIN		0
+#define SCTPCTL_SACK_FREQ_MAX		0xFFFFFFFF
+#define SCTPCTL_SACK_FREQ_DEFAULT	SCTP_DEFAULT_SACK_FREQ
+
+/* sys_resource: Max number of cached resources in the system */
+#define SCTPCTL_SYS_RESOURCE_DESC	"Max number of cached resources in the system"
+#define SCTPCTL_SYS_RESOURCE_MIN	0
+#define SCTPCTL_SYS_RESOURCE_MAX	0xFFFFFFFF
+#define SCTPCTL_SYS_RESOURCE_DEFAULT	SCTP_DEF_SYSTEM_RESC_LIMIT
+
+/* asoc_resource: Max number of cached resources in an asoc */
+#define SCTPCTL_ASOC_RESOURCE_DESC	"Max number of cached resources in an asoc"
+#define SCTPCTL_ASOC_RESOURCE_MIN	0
+#define SCTPCTL_ASOC_RESOURCE_MAX	0xFFFFFFFF
+#define SCTPCTL_ASOC_RESOURCE_DEFAULT	SCTP_DEF_ASOC_RESC_LIMIT
+
+/* heartbeat_interval: Default heartbeat interval in msec */
+#define SCTPCTL_HEARTBEAT_INTERVAL_DESC	"Default heartbeat interval in msec"
+#define SCTPCTL_HEARTBEAT_INTERVAL_MIN	0
+#define SCTPCTL_HEARTBEAT_INTERVAL_MAX	0xFFFFFFFF
+#define SCTPCTL_HEARTBEAT_INTERVAL_DEFAULT	SCTP_HB_DEFAULT_MSEC
+
+/* pmtu_raise_time: Default PMTU raise timer in sec */
+#define SCTPCTL_PMTU_RAISE_TIME_DESC	"Default PMTU raise timer in sec"
+#define SCTPCTL_PMTU_RAISE_TIME_MIN	0
+#define SCTPCTL_PMTU_RAISE_TIME_MAX	0xFFFFFFFF
+#define SCTPCTL_PMTU_RAISE_TIME_DEFAULT	SCTP_DEF_PMTU_RAISE_SEC
+
+/* shutdown_guard_time: Default shutdown guard timer in sec */
+#define SCTPCTL_SHUTDOWN_GUARD_TIME_DESC	"Default shutdown guard timer in sec"
+#define SCTPCTL_SHUTDOWN_GUARD_TIME_MIN		0
+#define SCTPCTL_SHUTDOWN_GUARD_TIME_MAX		0xFFFFFFFF
+#define SCTPCTL_SHUTDOWN_GUARD_TIME_DEFAULT	SCTP_DEF_MAX_SHUTDOWN_SEC
+
+/* secret_lifetime: Default secret lifetime in sec */
+#define SCTPCTL_SECRET_LIFETIME_DESC	"Default secret lifetime in sec"
+#define SCTPCTL_SECRET_LIFETIME_MIN	0
+#define SCTPCTL_SECRET_LIFETIME_MAX	0xFFFFFFFF
+#define SCTPCTL_SECRET_LIFETIME_DEFAULT	SCTP_DEFAULT_SECRET_LIFE_SEC
+
+/* rto_max: Default maximum retransmission timeout in msec */
+#define SCTPCTL_RTO_MAX_DESC		"Default maximum retransmission timeout in msec"
+#define SCTPCTL_RTO_MAX_MIN		0
+#define SCTPCTL_RTO_MAX_MAX		0xFFFFFFFF
+#define SCTPCTL_RTO_MAX_DEFAULT		SCTP_RTO_UPPER_BOUND
+
+/* rto_min: Default minimum retransmission timeout in msec */
+#define SCTPCTL_RTO_MIN_DESC		"Default minimum retransmission timeout in msec"
+#define SCTPCTL_RTO_MIN_MIN		0
+#define SCTPCTL_RTO_MIN_MAX		0xFFFFFFFF
+#define SCTPCTL_RTO_MIN_DEFAULT		SCTP_RTO_LOWER_BOUND
+
+/* rto_initial: Default initial retransmission timeout in msec */
+#define SCTPCTL_RTO_INITIAL_DESC	"Default initial retransmission timeout in msec"
+#define SCTPCTL_RTO_INITIAL_MIN		0
+#define SCTPCTL_RTO_INITIAL_MAX		0xFFFFFFFF
+#define SCTPCTL_RTO_INITIAL_DEFAULT	SCTP_RTO_INITIAL
+
+/* init_rto_max: Default maximum retransmission timeout during association setup in msec */
+#define SCTPCTL_INIT_RTO_MAX_DESC	"Default maximum retransmission timeout during association setup in msec"
+#define SCTPCTL_INIT_RTO_MAX_MIN	0
+#define SCTPCTL_INIT_RTO_MAX_MAX	0xFFFFFFFF
+#define SCTPCTL_INIT_RTO_MAX_DEFAULT	SCTP_RTO_UPPER_BOUND
+
+/* valid_cookie_life: Default cookie lifetime in sec */
+#define SCTPCTL_VALID_COOKIE_LIFE_DESC	"Default cookie lifetime in sec"
+#define SCTPCTL_VALID_COOKIE_LIFE_MIN	0
+#define SCTPCTL_VALID_COOKIE_LIFE_MAX	0xFFFFFFFF
+#define SCTPCTL_VALID_COOKIE_LIFE_DEFAULT	SCTP_DEFAULT_COOKIE_LIFE
+
+/* init_rtx_max: Default maximum number of retransmission for INIT chunks */
+#define SCTPCTL_INIT_RTX_MAX_DESC	"Default maximum number of retransmission for INIT chunks"
+#define SCTPCTL_INIT_RTX_MAX_MIN	0
+#define SCTPCTL_INIT_RTX_MAX_MAX	0xFFFFFFFF
+#define SCTPCTL_INIT_RTX_MAX_DEFAULT	SCTP_DEF_MAX_INIT
+
+/* assoc_rtx_max: Default maximum number of retransmissions per association */
+#define SCTPCTL_ASSOC_RTX_MAX_DESC	"Default maximum number of retransmissions per association"
+#define SCTPCTL_ASSOC_RTX_MAX_MIN	0
+#define SCTPCTL_ASSOC_RTX_MAX_MAX	0xFFFFFFFF
+#define SCTPCTL_ASSOC_RTX_MAX_DEFAULT	SCTP_DEF_MAX_SEND
+
+/* path_rtx_max: Default maximum of retransmissions per path */
+#define SCTPCTL_PATH_RTX_MAX_DESC	"Default maximum of retransmissions per path"
+#define SCTPCTL_PATH_RTX_MAX_MIN	0
+#define SCTPCTL_PATH_RTX_MAX_MAX	0xFFFFFFFF
+#define SCTPCTL_PATH_RTX_MAX_DEFAULT	SCTP_DEF_MAX_PATH_RTX
+
+/* add_more_on_output: When space wise is it worthwhile to try to add more to a socket send buffer */
+#define SCTPCTL_ADD_MORE_ON_OUTPUT_DESC	"When space wise is it worthwhile to try to add more to a socket send buffer"
+#define SCTPCTL_ADD_MORE_ON_OUTPUT_MIN	0
+#define SCTPCTL_ADD_MORE_ON_OUTPUT_MAX	0xFFFFFFFF
+#define SCTPCTL_ADD_MORE_ON_OUTPUT_DEFAULT SCTP_DEFAULT_ADD_MORE
+
+/* outgoing_streams: Default number of outgoing streams */
+#define SCTPCTL_OUTGOING_STREAMS_DESC	"Default number of outgoing streams"
+#define SCTPCTL_OUTGOING_STREAMS_MIN	1
+#define SCTPCTL_OUTGOING_STREAMS_MAX	65535
+#define SCTPCTL_OUTGOING_STREAMS_DEFAULT SCTP_OSTREAM_INITIAL
+
+/* cmt_on_off: CMT on/off flag */
+#define SCTPCTL_CMT_ON_OFF_DESC		"CMT on/off flag"
+#define SCTPCTL_CMT_ON_OFF_MIN		0
+#define SCTPCTL_CMT_ON_OFF_MAX		1
+#define SCTPCTL_CMT_ON_OFF_DEFAULT	0
+
+/* cmt_use_dac: CMT DAC on/off flag */
+#define SCTPCTL_CMT_USE_DAC_DESC	"CMT DAC on/off flag"
+#define SCTPCTL_CMT_USE_DAC_MIN		0
+#define SCTPCTL_CMT_USE_DAC_MAX		1
+#define SCTPCTL_CMT_USE_DAC_DEFAULT    	0
+
+/* JRS 5/2107 - CMT PF type flag */
+#define SCTPCTL_CMT_PF_DESC		"CMT PF type flag"
+#define SCTPCTL_CMT_PF_MIN		0
+#define SCTPCTL_CMT_PF_MAX		2
+#define SCTPCTL_CMT_PF_DEFAULT		0
+
+/* cwnd_maxburst: Use a CWND adjusting maxburst */
+#define SCTPCTL_CWND_MAXBURST_DESC	"Use a CWND adjusting maxburst"
+#define SCTPCTL_CWND_MAXBURST_MIN	0
+#define SCTPCTL_CWND_MAXBURST_MAX	1
+#define SCTPCTL_CWND_MAXBURST_DEFAULT	1
+
+/* early_fast_retran: Early Fast Retransmit with timer */
+#define SCTPCTL_EARLY_FAST_RETRAN_DESC	"Early Fast Retransmit with timer"
+#define SCTPCTL_EARLY_FAST_RETRAN_MIN	0
+#define SCTPCTL_EARLY_FAST_RETRAN_MAX	0xFFFFFFFF
+#define SCTPCTL_EARLY_FAST_RETRAN_DEFAULT	0
+
+/* early_fast_retran_msec: Early Fast Retransmit minimum timer value */
+#define SCTPCTL_EARLY_FAST_RETRAN_MSEC_DESC	"Early Fast Retransmit minimum timer value"
+#define SCTPCTL_EARLY_FAST_RETRAN_MSEC_MIN	0
+#define SCTPCTL_EARLY_FAST_RETRAN_MSEC_MAX	0xFFFFFFFF
+#define SCTPCTL_EARLY_FAST_RETRAN_MSEC_DEFAULT	SCTP_MINFR_MSEC_TIMER
+
+/* asconf_auth_nochk: Disable SCTP ASCONF AUTH requirement */
+#define SCTPCTL_ASCONF_AUTH_NOCHK_DESC	"Disable SCTP ASCONF AUTH requirement"
+#define SCTPCTL_ASCONF_AUTH_NOCHK_MIN	0
+#define SCTPCTL_ASCONF_AUTH_NOCHK_MAX	1
+#define SCTPCTL_ASCONF_AUTH_NOCHK_DEFAULT	0
+
+/* auth_disable: Disable SCTP AUTH function */
+#define SCTPCTL_AUTH_DISABLE_DESC	"Disable SCTP AUTH function"
+#define SCTPCTL_AUTH_DISABLE_MIN	0
+#define SCTPCTL_AUTH_DISABLE_MAX	1
+#define SCTPCTL_AUTH_DISABLE_DEFAULT	0
+
+/* nat_friendly: SCTP NAT friendly operation */
+#define SCTPCTL_NAT_FRIENDLY_DESC	"SCTP NAT friendly operation"
+#define SCTPCTL_NAT_FRIENDLY_MIN	0
+#define SCTPCTL_NAT_FRIENDLY_MAX	1
+#define SCTPCTL_NAT_FRIENDLY_DEFAULT	1
+
+/* abc_l_var: SCTP ABC max increase per SACK (L) */
+#define SCTPCTL_ABC_L_VAR_DESC		"SCTP ABC max increase per SACK (L)"
+#define SCTPCTL_ABC_L_VAR_MIN		0
+#define SCTPCTL_ABC_L_VAR_MAX		0xFFFFFFFF
+#define SCTPCTL_ABC_L_VAR_DEFAULT	1
+
+/* max_chained_mbufs: Default max number of small mbufs on a chain */
+#define SCTPCTL_MAX_CHAINED_MBUFS_DESC	"Default max number of small mbufs on a chain"
+#define SCTPCTL_MAX_CHAINED_MBUFS_MIN	0
+#define SCTPCTL_MAX_CHAINED_MBUFS_MAX	0xFFFFFFFF
+#define SCTPCTL_MAX_CHAINED_MBUFS_DEFAULT	SCTP_DEFAULT_MBUFS_IN_CHAIN
+
+/* do_sctp_drain: Should SCTP respond to the drain calls */
+#define SCTPCTL_DO_SCTP_DRAIN_DESC	"Should SCTP respond to the drain calls"
+#define SCTPCTL_DO_SCTP_DRAIN_MIN	0
+#define SCTPCTL_DO_SCTP_DRAIN_MAX	1
+#define SCTPCTL_DO_SCTP_DRAIN_DEFAULT	1
+
+/* hb_max_burst: Confirmation Heartbeat max burst? */
+#define SCTPCTL_HB_MAX_BURST_DESC	"Confirmation Heartbeat max burst"
+#define SCTPCTL_HB_MAX_BURST_MIN	1
+#define SCTPCTL_HB_MAX_BURST_MAX	0xFFFFFFFF
+#define SCTPCTL_HB_MAX_BURST_DEFAULT	SCTP_DEF_MAX_BURST
+
+/* abort_at_limit: When one-2-one hits qlimit abort */
+#define SCTPCTL_ABORT_AT_LIMIT_DESC	"When one-2-one hits qlimit abort"
+#define SCTPCTL_ABORT_AT_LIMIT_MIN	0
+#define SCTPCTL_ABORT_AT_LIMIT_MAX	1
+#define SCTPCTL_ABORT_AT_LIMIT_DEFAULT	0
+
+/* strict_data_order: Enforce strict data ordering, abort if control inside data */
+#define SCTPCTL_STRICT_DATA_ORDER_DESC	"Enforce strict data ordering, abort if control inside data"
+#define SCTPCTL_STRICT_DATA_ORDER_MIN	0
+#define SCTPCTL_STRICT_DATA_ORDER_MAX	1
+#define SCTPCTL_STRICT_DATA_ORDER_DEFAULT	0
+
+/* min_residual: min residual in a data fragment leftover */
+#define SCTPCTL_MIN_RESIDUAL_DESC	"Minimum residual data chunk in second part of split"
+#define SCTPCTL_MIN_RESIDUAL_MIN	20
+#define SCTPCTL_MIN_RESIDUAL_MAX	65535
+#define SCTPCTL_MIN_RESIDUAL_DEFAULT	1452
+
+/* max_retran_chunk: max chunk retransmissions */
+#define SCTPCTL_MAX_RETRAN_CHUNK_DESC	"Maximum times an unlucky chunk can be retran'd before assoc abort"
+#define SCTPCTL_MAX_RETRAN_CHUNK_MIN	0
+#define SCTPCTL_MAX_RETRAN_CHUNK_MAX	65535
+#define SCTPCTL_MAX_RETRAN_CHUNK_DEFAULT	30
+
+/* sctp_logging: This gives us logging when the options are enabled */
+#define SCTPCTL_LOGGING_LEVEL_DESC	"Ltrace/KTR trace logging level"
+#define SCTPCTL_LOGGING_LEVEL_MIN	0
+#define SCTPCTL_LOGGING_LEVEL_MAX	0xffffffff
+#define SCTPCTL_LOGGING_LEVEL_DEFAULT	0
+
+/* JRS - default congestion control module sysctl */
+#define SCTPCTL_DEFAULT_CC_MODULE_DESC		"Default congestion control module"
+#define SCTPCTL_DEFAULT_CC_MODULE_MIN		0
+#define SCTPCTL_DEFAULT_CC_MODULE_MAX		2
+#define SCTPCTL_DEFAULT_CC_MODULE_DEFAULT	0
+
+/* RRS - default fragment interleave */
+#define SCTPCTL_DEFAULT_FRAG_INTERLEAVE_DESC	"Default fragment interleave level"
+#define SCTPCTL_DEFAULT_FRAG_INTERLEAVE_MIN	0
+#define SCTPCTL_DEFAULT_FRAG_INTERLEAVE_MAX	2
+#define SCTPCTL_DEFAULT_FRAG_INTERLEAVE_DEFAULT	1
+
+/* mobility_base: Enable SCTP mobility support */
+#define SCTPCTL_MOBILITY_BASE_DESC	"Enable SCTP base mobility"
+#define SCTPCTL_MOBILITY_BASE_MIN	0
+#define SCTPCTL_MOBILITY_BASE_MAX	1
+#define SCTPCTL_MOBILITY_BASE_DEFAULT	SCTP_DEFAULT_MOBILITY_BASE
+
+/* mobility_fasthandoff: Enable SCTP fast handoff support */
+#define SCTPCTL_MOBILITY_FASTHANDOFF_DESC	"Enable SCTP fast handoff"
+#define SCTPCTL_MOBILITY_FASTHANDOFF_MIN	0
+#define SCTPCTL_MOBILITY_FASTHANDOFF_MAX	1
+#define SCTPCTL_MOBILITY_FASTHANDOFF_DEFAULT	SCTP_DEFAULT_MOBILITY_FASTHANDOFF
+
+#if defined(SCTP_DEBUG)
+/* debug: Configure debug output */
+#define SCTPCTL_DEBUG_DESC	"Configure debug output"
+#define SCTPCTL_DEBUG_MIN	0
+#define SCTPCTL_DEBUG_MAX	0xFFFFFFFF
+#define SCTPCTL_DEBUG_DEFAULT	0
+#endif
+
+
+
+#if defined(_KERNEL)
+
+/*
+ * variable definitions
+ */
+extern uint32_t sctp_sendspace;
+extern uint32_t sctp_recvspace;
+extern uint32_t sctp_auto_asconf;
+extern uint32_t sctp_ecn_enable;
+extern uint32_t sctp_ecn_nonce;
+extern uint32_t sctp_strict_sacks;
+extern uint32_t sctp_no_csum_on_loopback;
+extern uint32_t sctp_strict_init;
+extern uint32_t sctp_peer_chunk_oh;
+extern uint32_t sctp_max_burst_default;
+extern uint32_t sctp_max_chunks_on_queue;
+extern uint32_t sctp_hashtblsize;
+extern uint32_t sctp_pcbtblsize;
+extern uint32_t sctp_min_split_point;
+extern uint32_t sctp_chunkscale;
+extern uint32_t sctp_delayed_sack_time_default;
+extern uint32_t sctp_sack_freq_default;
+extern uint32_t sctp_system_free_resc_limit;
+extern uint32_t sctp_asoc_free_resc_limit;
+extern uint32_t sctp_heartbeat_interval_default;
+extern uint32_t sctp_pmtu_raise_time_default;
+extern uint32_t sctp_shutdown_guard_time_default;
+extern uint32_t sctp_secret_lifetime_default;
+extern uint32_t sctp_rto_max_default;
+extern uint32_t sctp_rto_min_default;
+extern uint32_t sctp_rto_initial_default;
+extern uint32_t sctp_init_rto_max_default;
+extern uint32_t sctp_valid_cookie_life_default;
+extern uint32_t sctp_init_rtx_max_default;
+extern uint32_t sctp_assoc_rtx_max_default;
+extern uint32_t sctp_path_rtx_max_default;
+extern uint32_t sctp_add_more_threshold;
+extern uint32_t sctp_nr_outgoing_streams_default;
+extern uint32_t sctp_cmt_on_off;
+extern uint32_t sctp_cmt_use_dac;
+
+/* JRS 5/21/07 - CMT PF type flag variables  */
+extern uint32_t sctp_cmt_pf;
+extern uint32_t sctp_use_cwnd_based_maxburst;
+extern uint32_t sctp_early_fr;
+extern uint32_t sctp_early_fr_msec;
+extern uint32_t sctp_asconf_auth_nochk;
+extern uint32_t sctp_auth_disable;
+extern uint32_t sctp_nat_friendly;
+extern uint32_t sctp_L2_abc_variable;
+extern uint32_t sctp_mbuf_threshold_count;
+extern uint32_t sctp_do_drain;
+extern uint32_t sctp_hb_maxburst;
+extern uint32_t sctp_abort_if_one_2_one_hits_limit;
+extern uint32_t sctp_strict_data_order;
+extern uint32_t sctp_min_residual;
+extern uint32_t sctp_max_retran_chunk;
+extern uint32_t sctp_logging_level;
+
+/* JRS - Variable for the default congestion control module */
+extern uint32_t sctp_default_cc_module;
+extern uint32_t sctp_default_frag_interleave;
+extern uint32_t sctp_mobility_base;
+extern uint32_t sctp_mobility_fasthandoff;
+
+#if defined(SCTP_LOCAL_TRACE_BUF)
+extern struct sctp_log sctp_log;
+
+#endif
+#if defined(SCTP_DEBUG)
+extern uint32_t sctp_debug_on;
+
+#endif
+
+extern struct sctpstat sctpstat;
+
+#if defined(SYSCTL_DECL)
+SYSCTL_DECL(_net_inet_sctp);
+#endif
+
+#endif				/* _KERNEL */
+#endif				/* __sctp_sysctl_h__ */
--- /dev/null
+++ sys/netinet/sctp_peeloff.h
@@ -0,0 +1,52 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_peeloff.h,v 1.6 2005/03/06 16:04:18 itojun Exp $	 */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_peeloff.h,v 1.3 2007/05/08 17:01:10 rrs Exp $");
+
+#ifndef __sctp_peeloff_h__
+#define __sctp_peeloff_h__
+
+
+
+
+#if defined(_KERNEL)
+
+int sctp_can_peel_off(struct socket *, sctp_assoc_t);
+int sctp_do_peeloff(struct socket *, struct socket *, sctp_assoc_t);
+struct socket *sctp_get_peeloff(struct socket *, sctp_assoc_t, int *);
+
+
+
+#endif				/* _KERNEL */
+
+#endif
--- /dev/null
+++ sys/netinet/sctp_auth.h
@@ -0,0 +1,230 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_auth.h,v 1.5 2007/06/09 13:46:57 rrs Exp $");
+
+#ifndef __SCTP_AUTH_H__
+#define __SCTP_AUTH_H__
+
+
+/* digest lengths */
+#define SCTP_AUTH_DIGEST_LEN_SHA1	20
+#define SCTP_AUTH_DIGEST_LEN_MD5	16
+#define SCTP_AUTH_DIGEST_LEN_SHA224	28
+#define SCTP_AUTH_DIGEST_LEN_SHA256	32
+#define SCTP_AUTH_DIGEST_LEN_SHA384	48
+#define SCTP_AUTH_DIGEST_LEN_SHA512	64
+#define SCTP_AUTH_DIGEST_LEN_MAX	64
+
+/* random sizes */
+#define SCTP_AUTH_RANDOM_SIZE_DEFAULT	32
+#define SCTP_AUTH_RANDOM_SIZE_REQUIRED	32
+#define SCTP_AUTH_RANDOM_SIZE_MAX	256
+
+/* union of all supported HMAC algorithm contexts */
+typedef union sctp_hash_context {
+	SHA1_CTX sha1;
+	MD5_CTX md5;
+#ifdef HAVE_SHA2
+	SHA256_CTX sha256;
+	SHA384_CTX sha384;
+	SHA512_CTX sha512;
+#endif
+}                 sctp_hash_context_t;
+
+typedef struct sctp_key {
+	uint32_t keylen;
+	uint8_t key[0];
+}        sctp_key_t;
+
+typedef struct sctp_shared_key {
+	LIST_ENTRY(sctp_shared_key) next;
+	sctp_key_t *key;	/* key text */
+	uint16_t keyid;		/* shared key ID */
+}               sctp_sharedkey_t;
+
+LIST_HEAD(sctp_keyhead, sctp_shared_key);
+
+/* authentication chunks list */
+typedef struct sctp_auth_chklist {
+	uint8_t chunks[256];
+	uint8_t num_chunks;
+}                 sctp_auth_chklist_t;
+
+/* hmac algos supported list */
+typedef struct sctp_hmaclist {
+	uint16_t max_algo;	/* max algorithms allocated */
+	uint16_t num_algo;	/* num algorithms used */
+	uint16_t hmac[0];
+}             sctp_hmaclist_t;
+
+/* authentication info */
+typedef struct sctp_authinfo {
+	sctp_key_t *random;	/* local random key (concatenated) */
+	uint32_t random_len;	/* local random number length for param */
+	sctp_key_t *peer_random;/* peer's random key (concatenated) */
+	uint16_t assoc_keyid;	/* current send keyid (cached) */
+	uint16_t recv_keyid;	/* last recv keyid (cached) */
+	sctp_key_t *assoc_key;	/* cached send key */
+	sctp_key_t *recv_key;	/* cached recv key */
+}             sctp_authinfo_t;
+
+
+
+/*
+ * Macros
+ */
+#define sctp_auth_is_required_chunk(chunk, list) ((list == NULL) ? (0) : (list->chunks[chunk] != 0))
+
+/*
+ * function prototypes
+ */
+
+/* socket option api functions */
+extern sctp_auth_chklist_t *sctp_alloc_chunklist(void);
+extern void sctp_free_chunklist(sctp_auth_chklist_t * chklist);
+extern void sctp_clear_chunklist(sctp_auth_chklist_t * chklist);
+extern sctp_auth_chklist_t *sctp_copy_chunklist(sctp_auth_chklist_t * chklist);
+extern int sctp_auth_add_chunk(uint8_t chunk, sctp_auth_chklist_t * list);
+extern int sctp_auth_delete_chunk(uint8_t chunk, sctp_auth_chklist_t * list);
+extern size_t sctp_auth_get_chklist_size(const sctp_auth_chklist_t * list);
+extern void sctp_auth_set_default_chunks(sctp_auth_chklist_t * list);
+extern int
+    sctp_serialize_auth_chunks(const sctp_auth_chklist_t * list, uint8_t * ptr);
+extern int sctp_pack_auth_chunks(const sctp_auth_chklist_t * list, uint8_t * ptr);
+extern int
+sctp_unpack_auth_chunks(const uint8_t * ptr, uint8_t num_chunks,
+    sctp_auth_chklist_t * list);
+
+/* key handling */
+extern sctp_key_t *sctp_alloc_key(uint32_t keylen);
+extern void sctp_free_key(sctp_key_t * key);
+extern void sctp_print_key(sctp_key_t * key, const char *str);
+extern void sctp_show_key(sctp_key_t * key, const char *str);
+extern sctp_key_t *sctp_generate_random_key(uint32_t keylen);
+extern sctp_key_t *sctp_set_key(uint8_t * key, uint32_t keylen);
+extern sctp_key_t *
+sctp_compute_hashkey(sctp_key_t * key1, sctp_key_t * key2,
+    sctp_key_t * shared);
+
+/* shared key handling */
+extern sctp_sharedkey_t *sctp_alloc_sharedkey(void);
+extern void sctp_free_sharedkey(sctp_sharedkey_t * skey);
+extern sctp_sharedkey_t *
+                 sctp_find_sharedkey(struct sctp_keyhead *shared_keys, uint16_t key_id);
+extern void
+sctp_insert_sharedkey(struct sctp_keyhead *shared_keys,
+    sctp_sharedkey_t * new_skey);
+extern int
+sctp_copy_skeylist(const struct sctp_keyhead *src,
+    struct sctp_keyhead *dest);
+
+/* hmac list handling */
+extern sctp_hmaclist_t *sctp_alloc_hmaclist(uint8_t num_hmacs);
+extern void sctp_free_hmaclist(sctp_hmaclist_t * list);
+extern int sctp_auth_add_hmacid(sctp_hmaclist_t * list, uint16_t hmac_id);
+extern sctp_hmaclist_t *sctp_copy_hmaclist(sctp_hmaclist_t * list);
+extern sctp_hmaclist_t *sctp_default_supported_hmaclist(void);
+extern uint16_t 
+sctp_negotiate_hmacid(sctp_hmaclist_t * peer,
+    sctp_hmaclist_t * local);
+extern int sctp_serialize_hmaclist(sctp_hmaclist_t * list, uint8_t * ptr);
+extern int 
+sctp_verify_hmac_param(struct sctp_auth_hmac_algo *hmacs,
+    uint32_t num_hmacs);
+
+extern sctp_authinfo_t *sctp_alloc_authinfo(void);
+extern void sctp_free_authinfo(sctp_authinfo_t * authinfo);
+
+/* keyed-HMAC functions */
+extern uint32_t sctp_get_auth_chunk_len(uint16_t hmac_algo);
+extern uint32_t sctp_get_hmac_digest_len(uint16_t hmac_algo);
+extern uint32_t
+sctp_hmac(uint16_t hmac_algo, uint8_t * key, uint32_t keylen,
+    uint8_t * text, uint32_t textlen, uint8_t * digest);
+extern int
+sctp_verify_hmac(uint16_t hmac_algo, uint8_t * key, uint32_t keylen,
+    uint8_t * text, uint32_t textlen, uint8_t * digest,
+    uint32_t digestlen);
+extern uint32_t
+sctp_compute_hmac(uint16_t hmac_algo, sctp_key_t * key,
+    uint8_t * text, uint32_t textlen, uint8_t * digest);
+extern int sctp_auth_is_supported_hmac(sctp_hmaclist_t * list, uint16_t id);
+
+/* mbuf versions */
+extern uint32_t
+sctp_hmac_m(uint16_t hmac_algo, uint8_t * key, uint32_t keylen,
+    struct mbuf *m, uint32_t m_offset, uint8_t * digest, uint32_t trailer);
+extern uint32_t
+sctp_compute_hmac_m(uint16_t hmac_algo, sctp_key_t * key, struct mbuf *m,
+    uint32_t m_offset, uint8_t * digest);
+
+/*
+ * authentication routines
+ */
+extern void sctp_clear_cachedkeys(struct sctp_tcb *stcb, uint16_t keyid);
+extern void sctp_clear_cachedkeys_ep(struct sctp_inpcb *inp, uint16_t keyid);
+extern int sctp_delete_sharedkey(struct sctp_tcb *stcb, uint16_t keyid);
+extern int sctp_delete_sharedkey_ep(struct sctp_inpcb *inp, uint16_t keyid);
+extern int sctp_auth_setactivekey(struct sctp_tcb *stcb, uint16_t keyid);
+extern int sctp_auth_setactivekey_ep(struct sctp_inpcb *inp, uint16_t keyid);
+
+extern void
+sctp_auth_get_cookie_params(struct sctp_tcb *stcb, struct mbuf *m,
+    uint32_t offset, uint32_t length);
+extern void
+sctp_fill_hmac_digest_m(struct mbuf *m, uint32_t auth_offset,
+    struct sctp_auth_chunk *auth,
+    struct sctp_tcb *stcb);
+extern struct mbuf *
+sctp_add_auth_chunk(struct mbuf *m, struct mbuf **m_end,
+    struct sctp_auth_chunk **auth_ret,
+    uint32_t * offset, struct sctp_tcb *stcb,
+    uint8_t chunk);
+extern int
+sctp_handle_auth(struct sctp_tcb *stcb, struct sctp_auth_chunk *ch,
+    struct mbuf *m, uint32_t offset);
+extern void
+sctp_notify_authentication(struct sctp_tcb *stcb,
+    uint32_t indication, uint16_t keyid,
+    uint16_t alt_keyid);
+extern int
+    sctp_validate_init_auth_params(struct mbuf *m, int offset, int limit);
+extern void
+     sctp_initialize_auth_params(struct sctp_inpcb *inp, struct sctp_tcb *stcb);
+
+
+/* test functions */
+extern void sctp_test_hmac_sha1(void);
+extern void sctp_test_hmac_md5(void);
+extern void sctp_test_authkey(void);
+
+#endif				/* __SCTP_AUTH_H__ */
--- /dev/null
+++ sys/netinet/sctp_cc_functions.h
@@ -0,0 +1,116 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_cc_functions.h,v 1.2 2007/09/10 17:06:25 rrs Exp $");
+
+#ifndef __sctp_cc_functions_h__
+#define __sctp_cc_functions_h__
+
+#if defined(_KERNEL)
+
+void
+sctp_set_initial_cc_param(struct sctp_tcb *stcb,
+    struct sctp_nets *net);
+
+void
+sctp_cwnd_update_after_fr(struct sctp_tcb *stcb,
+    struct sctp_association *asoc);
+
+void
+sctp_cwnd_update_after_sack(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    int accum_moved, int reneged_all, int will_exit);
+
+void
+sctp_cwnd_update_after_timeout(struct sctp_tcb *stcb,
+    struct sctp_nets *net);
+
+void
+sctp_hs_cwnd_update_after_fr(struct sctp_tcb *stcb,
+    struct sctp_association *asoc);
+
+void
+sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    int accum_moved, int reneged_all, int will_exit);
+
+void
+sctp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb,
+    struct sctp_nets *net);
+
+void
+sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb,
+    struct sctp_nets *net, struct sctp_pktdrop_chunk *cp,
+    uint32_t * bottle_bw, uint32_t * on_queue);
+
+void
+sctp_cwnd_update_after_output(struct sctp_tcb *stcb,
+    struct sctp_nets *net, int burst_limit);
+
+void
+sctp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb, struct sctp_nets *net);
+
+/*
+ * HTCP algorithms are directly taken from
+ * R.N.Shorten, D.J.Leith and are work/outcome from
+ * a Cisco-URP grant to enhance HTCP for satellite
+ * communications. We use the BSD Liscense
+ * granted from his source and have modified his
+ * algorithms to fit within the SCTP BSD framework.
+ */
+
+void
+sctp_htcp_set_initial_cc_param(struct sctp_tcb *stcb,
+    struct sctp_nets *net);
+
+void
+sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb,
+    struct sctp_association *asoc);
+
+void
+sctp_htcp_cwnd_update_after_sack(struct sctp_tcb *stcb,
+    struct sctp_association *asoc,
+    int accum_moved, int reneged_all, int will_exit);
+
+void
+sctp_htcp_cwnd_update_after_timeout(struct sctp_tcb *stcb,
+    struct sctp_nets *net);
+
+void
+sctp_htcp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb,
+    struct sctp_nets *net);
+
+void
+sctp_htcp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb, struct sctp_nets *net);
+
+#endif
+#endif
--- /dev/null
+++ sys/netinet/sctp_timer.c
@@ -0,0 +1,1930 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_timer.c,v 1.29 2005/03/06 16:04:18 itojun Exp $	 */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_timer.c,v 1.32.2.1 2007/10/25 12:27:06 rrs Exp $");
+
+#define _IP_VHL
+#include <netinet/sctp_os.h>
+#include <netinet/sctp_pcb.h>
+#ifdef INET6
+#include <netinet6/sctp6_var.h>
+#endif
+#include <netinet/sctp_var.h>
+#include <netinet/sctp_sysctl.h>
+#include <netinet/sctp_timer.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_output.h>
+#include <netinet/sctp_header.h>
+#include <netinet/sctp_indata.h>
+#include <netinet/sctp_asconf.h>
+#include <netinet/sctp_input.h>
+#include <netinet/sctp.h>
+#include <netinet/sctp_uio.h>
+
+
+
+void
+sctp_early_fr_timer(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	struct sctp_tmit_chunk *chk, *tp2;
+	struct timeval now, min_wait, tv;
+	unsigned int cur_rtt, cnt = 0, cnt_resend = 0;
+
+	/* an early FR is occuring. */
+	(void)SCTP_GETTIME_TIMEVAL(&now);
+	/* get cur rto in micro-seconds */
+	if (net->lastsa == 0) {
+		/* Hmm no rtt estimate yet? */
+		cur_rtt = stcb->asoc.initial_rto >> 2;
+	} else {
+
+		cur_rtt = ((net->lastsa >> 2) + net->lastsv) >> 1;
+	}
+	if (cur_rtt < sctp_early_fr_msec) {
+		cur_rtt = sctp_early_fr_msec;
+	}
+	cur_rtt *= 1000;
+	tv.tv_sec = cur_rtt / 1000000;
+	tv.tv_usec = cur_rtt % 1000000;
+	min_wait = now;
+	timevalsub(&min_wait, &tv);
+	if (min_wait.tv_sec < 0 || min_wait.tv_usec < 0) {
+		/*
+		 * if we hit here, we don't have enough seconds on the clock
+		 * to account for the RTO. We just let the lower seconds be
+		 * the bounds and don't worry about it. This may mean we
+		 * will mark a lot more than we should.
+		 */
+		min_wait.tv_sec = min_wait.tv_usec = 0;
+	}
+	chk = TAILQ_LAST(&stcb->asoc.sent_queue, sctpchunk_listhead);
+	for (; chk != NULL; chk = tp2) {
+		tp2 = TAILQ_PREV(chk, sctpchunk_listhead, sctp_next);
+		if (chk->whoTo != net) {
+			continue;
+		}
+		if (chk->sent == SCTP_DATAGRAM_RESEND)
+			cnt_resend++;
+		else if ((chk->sent > SCTP_DATAGRAM_UNSENT) &&
+		    (chk->sent < SCTP_DATAGRAM_RESEND)) {
+			/* pending, may need retran */
+			if (chk->sent_rcv_time.tv_sec > min_wait.tv_sec) {
+				/*
+				 * we have reached a chunk that was sent
+				 * some seconds past our min.. forget it we
+				 * will find no more to send.
+				 */
+				continue;
+			} else if (chk->sent_rcv_time.tv_sec == min_wait.tv_sec) {
+				/*
+				 * we must look at the micro seconds to
+				 * know.
+				 */
+				if (chk->sent_rcv_time.tv_usec >= min_wait.tv_usec) {
+					/*
+					 * ok it was sent after our boundary
+					 * time.
+					 */
+					continue;
+				}
+			}
+			if (sctp_logging_level & SCTP_EARLYFR_LOGGING_ENABLE) {
+				sctp_log_fr(chk->rec.data.TSN_seq, chk->snd_count,
+				    4, SCTP_FR_MARKED_EARLY);
+			}
+			SCTP_STAT_INCR(sctps_earlyfrmrkretrans);
+			chk->sent = SCTP_DATAGRAM_RESEND;
+			sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+			/* double book size since we are doing an early FR */
+			chk->book_size_scale++;
+			cnt += chk->send_size;
+			if ((cnt + net->flight_size) > net->cwnd) {
+				/* Mark all we could possibly resend */
+				break;
+			}
+		}
+	}
+	if (cnt) {
+		/*
+		 * JRS - Use the congestion control given in the congestion
+		 * control module
+		 */
+		stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer(inp, stcb, net);
+	} else if (cnt_resend) {
+		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED);
+	}
+	/* Restart it? */
+	if (net->flight_size < net->cwnd) {
+		SCTP_STAT_INCR(sctps_earlyfrstrtmr);
+		sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net);
+	}
+}
+
+void
+sctp_audit_retranmission_queue(struct sctp_association *asoc)
+{
+	struct sctp_tmit_chunk *chk;
+
+	SCTPDBG(SCTP_DEBUG_TIMER4, "Audit invoked on send queue cnt:%d onqueue:%d\n",
+	    asoc->sent_queue_retran_cnt,
+	    asoc->sent_queue_cnt);
+	asoc->sent_queue_retran_cnt = 0;
+	asoc->sent_queue_cnt = 0;
+	TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
+		if (chk->sent == SCTP_DATAGRAM_RESEND) {
+			sctp_ucount_incr(asoc->sent_queue_retran_cnt);
+		}
+		asoc->sent_queue_cnt++;
+	}
+	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
+		if (chk->sent == SCTP_DATAGRAM_RESEND) {
+			sctp_ucount_incr(asoc->sent_queue_retran_cnt);
+		}
+	}
+	SCTPDBG(SCTP_DEBUG_TIMER4, "Audit completes retran:%d onqueue:%d\n",
+	    asoc->sent_queue_retran_cnt,
+	    asoc->sent_queue_cnt);
+}
+
+int
+sctp_threshold_management(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net, uint16_t threshold)
+{
+	if (net) {
+		net->error_count++;
+		SCTPDBG(SCTP_DEBUG_TIMER4, "Error count for %p now %d thresh:%d\n",
+		    net, net->error_count,
+		    net->failure_threshold);
+		if (net->error_count > net->failure_threshold) {
+			/* We had a threshold failure */
+			if (net->dest_state & SCTP_ADDR_REACHABLE) {
+				net->dest_state &= ~SCTP_ADDR_REACHABLE;
+				net->dest_state |= SCTP_ADDR_NOT_REACHABLE;
+				net->dest_state &= ~SCTP_ADDR_REQ_PRIMARY;
+				if (net == stcb->asoc.primary_destination) {
+					net->dest_state |= SCTP_ADDR_WAS_PRIMARY;
+				}
+				/*
+				 * JRS 5/14/07 - If a destination is
+				 * unreachable, the PF bit is turned off.
+				 * This allows an unambiguous use of the PF
+				 * bit for destinations that are reachable
+				 * but potentially failed. If the
+				 * destination is set to the unreachable
+				 * state, also set the destination to the PF
+				 * state.
+				 */
+				/*
+				 * Add debug message here if destination is
+				 * not in PF state.
+				 */
+				/* Stop any running T3 timers here? */
+				if (sctp_cmt_on_off && sctp_cmt_pf) {
+					net->dest_state &= ~SCTP_ADDR_PF;
+					SCTPDBG(SCTP_DEBUG_TIMER4, "Destination %p moved from PF to unreachable.\n",
+					    net);
+				}
+				sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN,
+				    stcb,
+				    SCTP_FAILED_THRESHOLD,
+				    (void *)net, SCTP_SO_NOT_LOCKED);
+			}
+		}
+		/*********HOLD THIS COMMENT FOR PATCH OF ALTERNATE
+		 *********ROUTING CODE
+		 */
+		/*********HOLD THIS COMMENT FOR END OF PATCH OF ALTERNATE
+		 *********ROUTING CODE
+		 */
+	}
+	if (stcb == NULL)
+		return (0);
+
+	if (net) {
+		if ((net->dest_state & SCTP_ADDR_UNCONFIRMED) == 0) {
+			if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+				sctp_misc_ints(SCTP_THRESHOLD_INCR,
+				    stcb->asoc.overall_error_count,
+				    (stcb->asoc.overall_error_count + 1),
+				    SCTP_FROM_SCTP_TIMER,
+				    __LINE__);
+			}
+			stcb->asoc.overall_error_count++;
+		}
+	} else {
+		if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+			sctp_misc_ints(SCTP_THRESHOLD_INCR,
+			    stcb->asoc.overall_error_count,
+			    (stcb->asoc.overall_error_count + 1),
+			    SCTP_FROM_SCTP_TIMER,
+			    __LINE__);
+		}
+		stcb->asoc.overall_error_count++;
+	}
+	SCTPDBG(SCTP_DEBUG_TIMER4, "Overall error count for %p now %d thresh:%u state:%x\n",
+	    &stcb->asoc, stcb->asoc.overall_error_count,
+	    (uint32_t) threshold,
+	    ((net == NULL) ? (uint32_t) 0 : (uint32_t) net->dest_state));
+	/*
+	 * We specifically do not do >= to give the assoc one more change
+	 * before we fail it.
+	 */
+	if (stcb->asoc.overall_error_count > threshold) {
+		/* Abort notification sends a ULP notify */
+		struct mbuf *oper;
+
+		oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+		    0, M_DONTWAIT, 1, MT_DATA);
+		if (oper) {
+			struct sctp_paramhdr *ph;
+			uint32_t *ippp;
+
+			SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
+			    sizeof(uint32_t);
+			ph = mtod(oper, struct sctp_paramhdr *);
+			ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+			ph->param_length = htons(SCTP_BUF_LEN(oper));
+			ippp = (uint32_t *) (ph + 1);
+			*ippp = htonl(SCTP_FROM_SCTP_TIMER + SCTP_LOC_1);
+		}
+		inp->last_abort_code = SCTP_FROM_SCTP_TIMER + SCTP_LOC_1;
+		printf("Aborting association threshold:%d overall error count:%d\n",
+		    threshold,
+		    stcb->asoc.overall_error_count);
+		sctp_abort_an_association(inp, stcb, SCTP_FAILED_THRESHOLD, oper, SCTP_SO_NOT_LOCKED);
+		return (1);
+	}
+	return (0);
+}
+
+struct sctp_nets *
+sctp_find_alternate_net(struct sctp_tcb *stcb,
+    struct sctp_nets *net,
+    int mode)
+{
+	/* Find and return an alternate network if possible */
+	struct sctp_nets *alt, *mnet, *min_errors_net = NULL, *max_cwnd_net = NULL;
+	int once;
+
+	/* JRS 5/14/07 - Initialize min_errors to an impossible value. */
+	int min_errors = -1;
+	uint32_t max_cwnd = 0;
+
+	if (stcb->asoc.numnets == 1) {
+		/* No others but net */
+		return (TAILQ_FIRST(&stcb->asoc.nets));
+	}
+	/*
+	 * JRS 5/14/07 - If mode is set to 2, use the CMT PF find alternate
+	 * net algorithm. This algorithm chooses the active destination (not
+	 * in PF state) with the largest cwnd value. If all destinations are
+	 * in PF state, unreachable, or unconfirmed, choose the desination
+	 * that is in PF state with the lowest error count. In case of a
+	 * tie, choose the destination that was most recently active.
+	 */
+	if (mode == 2) {
+		TAILQ_FOREACH(mnet, &stcb->asoc.nets, sctp_next) {
+			/*
+			 * JRS 5/14/07 - If the destination is unreachable
+			 * or unconfirmed, skip it.
+			 */
+			if (((mnet->dest_state & SCTP_ADDR_REACHABLE) != SCTP_ADDR_REACHABLE) ||
+			    (mnet->dest_state & SCTP_ADDR_UNCONFIRMED)) {
+				continue;
+			}
+			/*
+			 * JRS 5/14/07 -  If the destination is reachable
+			 * but in PF state, compare the error count of the
+			 * destination to the minimum error count seen thus
+			 * far. Store the destination with the lower error
+			 * count.  If the error counts are equal, store the
+			 * destination that was most recently active.
+			 */
+			if (mnet->dest_state & SCTP_ADDR_PF) {
+				/*
+				 * JRS 5/14/07 - If the destination under
+				 * consideration is the current destination,
+				 * work as if the error count is one higher.
+				 * The actual error count will not be
+				 * incremented until later in the t3
+				 * handler.
+				 */
+				if (mnet == net) {
+					if (min_errors == -1) {
+						min_errors = mnet->error_count + 1;
+						min_errors_net = mnet;
+					} else if (mnet->error_count + 1 < min_errors) {
+						min_errors = mnet->error_count + 1;
+						min_errors_net = mnet;
+					} else if (mnet->error_count + 1 == min_errors
+					    && mnet->last_active > min_errors_net->last_active) {
+						min_errors_net = mnet;
+						min_errors = mnet->error_count + 1;
+					}
+					continue;
+				} else {
+					if (min_errors == -1) {
+						min_errors = mnet->error_count;
+						min_errors_net = mnet;
+					} else if (mnet->error_count < min_errors) {
+						min_errors = mnet->error_count;
+						min_errors_net = mnet;
+					} else if (mnet->error_count == min_errors
+					    && mnet->last_active > min_errors_net->last_active) {
+						min_errors_net = mnet;
+						min_errors = mnet->error_count;
+					}
+					continue;
+				}
+			}
+			/*
+			 * JRS 5/14/07 - If the destination is reachable and
+			 * not in PF state, compare the cwnd of the
+			 * destination to the highest cwnd seen thus far.
+			 * Store the destination with the higher cwnd value.
+			 * If the cwnd values are equal, randomly choose one
+			 * of the two destinations.
+			 */
+			if (max_cwnd < mnet->cwnd) {
+				max_cwnd_net = mnet;
+				max_cwnd = mnet->cwnd;
+			} else if (max_cwnd == mnet->cwnd) {
+				uint32_t rndval;
+				uint8_t this_random;
+
+				if (stcb->asoc.hb_random_idx > 3) {
+					rndval = sctp_select_initial_TSN(&stcb->sctp_ep->sctp_ep);
+					memcpy(stcb->asoc.hb_random_values, &rndval, sizeof(stcb->asoc.hb_random_values));
+					this_random = stcb->asoc.hb_random_values[0];
+					stcb->asoc.hb_random_idx++;
+					stcb->asoc.hb_ect_randombit = 0;
+				} else {
+					this_random = stcb->asoc.hb_random_values[stcb->asoc.hb_random_idx];
+					stcb->asoc.hb_random_idx++;
+					stcb->asoc.hb_ect_randombit = 0;
+				}
+				if (this_random % 2 == 1) {
+					max_cwnd_net = mnet;
+					max_cwnd = mnet->cwnd;
+					//Useless ?
+				}
+			}
+		}
+		/*
+		 * JRS 5/14/07 - After all destination have been considered
+		 * as alternates, check to see if there was some active
+		 * destination (not in PF state).  If not, check to see if
+		 * there was some PF destination with the minimum number of
+		 * errors.  If not, return the original destination.  If
+		 * there is a min_errors_net, remove the PF flag from that
+		 * destination, set the cwnd to one or two MTUs, and return
+		 * the destination as an alt. If there was some active
+		 * destination with a highest cwnd, return the destination
+		 * as an alt.
+		 */
+		if (max_cwnd_net == NULL) {
+			if (min_errors_net == NULL) {
+				return (net);
+			}
+			min_errors_net->dest_state &= ~SCTP_ADDR_PF;
+			min_errors_net->cwnd = min_errors_net->mtu * sctp_cmt_pf;
+			if (SCTP_OS_TIMER_PENDING(&min_errors_net->rxt_timer.timer)) {
+				sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+				    stcb, min_errors_net,
+				    SCTP_FROM_SCTP_TIMER + SCTP_LOC_2);
+			}
+			SCTPDBG(SCTP_DEBUG_TIMER4, "Destination %p moved from PF to active with %d errors.\n",
+			    min_errors_net, min_errors_net->error_count);
+			return (min_errors_net);
+		} else {
+			return (max_cwnd_net);
+		}
+	}
+	/*
+	 * JRS 5/14/07 - If mode is set to 1, use the CMT policy for
+	 * choosing an alternate net.
+	 */ 
+	else if (mode == 1) {
+		TAILQ_FOREACH(mnet, &stcb->asoc.nets, sctp_next) {
+			if (((mnet->dest_state & SCTP_ADDR_REACHABLE) != SCTP_ADDR_REACHABLE) ||
+			    (mnet->dest_state & SCTP_ADDR_UNCONFIRMED)
+			    ) {
+				/*
+				 * will skip ones that are not-reachable or
+				 * unconfirmed
+				 */
+				continue;
+			}
+			if (max_cwnd < mnet->cwnd) {
+				max_cwnd_net = mnet;
+				max_cwnd = mnet->cwnd;
+			} else if (max_cwnd == mnet->cwnd) {
+				uint32_t rndval;
+				uint8_t this_random;
+
+				if (stcb->asoc.hb_random_idx > 3) {
+					rndval = sctp_select_initial_TSN(&stcb->sctp_ep->sctp_ep);
+					memcpy(stcb->asoc.hb_random_values, &rndval,
+					    sizeof(stcb->asoc.hb_random_values));
+					this_random = stcb->asoc.hb_random_values[0];
+					stcb->asoc.hb_random_idx = 0;
+					stcb->asoc.hb_ect_randombit = 0;
+				} else {
+					this_random = stcb->asoc.hb_random_values[stcb->asoc.hb_random_idx];
+					stcb->asoc.hb_random_idx++;
+					stcb->asoc.hb_ect_randombit = 0;
+				}
+				if (this_random % 2) {
+					max_cwnd_net = mnet;
+					max_cwnd = mnet->cwnd;
+				}
+			}
+		}
+		if (max_cwnd_net) {
+			return (max_cwnd_net);
+		}
+	}
+	mnet = net;
+	once = 0;
+
+	if (mnet == NULL) {
+		mnet = TAILQ_FIRST(&stcb->asoc.nets);
+	}
+	do {
+		alt = TAILQ_NEXT(mnet, sctp_next);
+		if (alt == NULL) {
+			once++;
+			if (once > 1) {
+				break;
+			}
+			alt = TAILQ_FIRST(&stcb->asoc.nets);
+		}
+		if (alt->ro.ro_rt == NULL) {
+			if (alt->ro._s_addr) {
+				sctp_free_ifa(alt->ro._s_addr);
+				alt->ro._s_addr = NULL;
+			}
+			alt->src_addr_selected = 0;
+		}
+		if (
+		    ((alt->dest_state & SCTP_ADDR_REACHABLE) == SCTP_ADDR_REACHABLE) &&
+		    (alt->ro.ro_rt != NULL) &&
+		/* sa_ignore NO_NULL_CHK */
+		    (!(alt->dest_state & SCTP_ADDR_UNCONFIRMED))
+		    ) {
+			/* Found a reachable address */
+			break;
+		}
+		mnet = alt;
+	} while (alt != NULL);
+
+	if (alt == NULL) {
+		/* Case where NO insv network exists (dormant state) */
+		/* we rotate destinations */
+		once = 0;
+		mnet = net;
+		do {
+			alt = TAILQ_NEXT(mnet, sctp_next);
+			if (alt == NULL) {
+				once++;
+				if (once > 1) {
+					break;
+				}
+				alt = TAILQ_FIRST(&stcb->asoc.nets);
+			}
+			/* sa_ignore NO_NULL_CHK */
+			if ((!(alt->dest_state & SCTP_ADDR_UNCONFIRMED)) &&
+			    (alt != net)) {
+				/* Found an alternate address */
+				break;
+			}
+			mnet = alt;
+		} while (alt != NULL);
+	}
+	if (alt == NULL) {
+		return (net);
+	}
+	return (alt);
+}
+
+
+
+static void
+sctp_backoff_on_timeout(struct sctp_tcb *stcb,
+    struct sctp_nets *net,
+    int win_probe,
+    int num_marked)
+{
+	if (net->RTO == 0) {
+		net->RTO = stcb->asoc.minrto;
+	}
+	net->RTO <<= 1;
+	if (net->RTO > stcb->asoc.maxrto) {
+		net->RTO = stcb->asoc.maxrto;
+	}
+	if ((win_probe == 0) && num_marked) {
+		/* We don't apply penalty to window probe scenarios */
+		/* JRS - Use the congestion control given in the CC module */
+		stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout(stcb, net);
+	}
+}
+
+static int
+sctp_mark_all_for_resend(struct sctp_tcb *stcb,
+    struct sctp_nets *net,
+    struct sctp_nets *alt,
+    int window_probe,
+    int *num_marked)
+{
+
+	/*
+	 * Mark all chunks (well not all) that were sent to *net for
+	 * retransmission. Move them to alt for there destination as well...
+	 * We only mark chunks that have been outstanding long enough to
+	 * have received feed-back.
+	 */
+	struct sctp_tmit_chunk *chk, *tp2, *could_be_sent = NULL;
+	struct sctp_nets *lnets;
+	struct timeval now, min_wait, tv;
+	int cur_rtt;
+	int audit_tf, num_mk, fir;
+	unsigned int cnt_mk;
+	uint32_t orig_flight, orig_tf;
+	uint32_t tsnlast, tsnfirst;
+
+
+	/* none in flight now */
+	audit_tf = 0;
+	fir = 0;
+	/*
+	 * figure out how long a data chunk must be pending before we can
+	 * mark it ..
+	 */
+	(void)SCTP_GETTIME_TIMEVAL(&now);
+	/* get cur rto in micro-seconds */
+	cur_rtt = (((net->lastsa >> 2) + net->lastsv) >> 1);
+	cur_rtt *= 1000;
+	if (sctp_logging_level & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
+		sctp_log_fr(cur_rtt,
+		    stcb->asoc.peers_rwnd,
+		    window_probe,
+		    SCTP_FR_T3_MARK_TIME);
+		sctp_log_fr(net->flight_size,
+		    SCTP_OS_TIMER_PENDING(&net->fr_timer.timer),
+		    SCTP_OS_TIMER_ACTIVE(&net->fr_timer.timer),
+		    SCTP_FR_CWND_REPORT);
+		sctp_log_fr(net->flight_size, net->cwnd, stcb->asoc.total_flight, SCTP_FR_CWND_REPORT);
+	}
+	tv.tv_sec = cur_rtt / 1000000;
+	tv.tv_usec = cur_rtt % 1000000;
+	min_wait = now;
+	timevalsub(&min_wait, &tv);
+	if (min_wait.tv_sec < 0 || min_wait.tv_usec < 0) {
+		/*
+		 * if we hit here, we don't have enough seconds on the clock
+		 * to account for the RTO. We just let the lower seconds be
+		 * the bounds and don't worry about it. This may mean we
+		 * will mark a lot more than we should.
+		 */
+		min_wait.tv_sec = min_wait.tv_usec = 0;
+	}
+	if (sctp_logging_level & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
+		sctp_log_fr(cur_rtt, now.tv_sec, now.tv_usec, SCTP_FR_T3_MARK_TIME);
+		sctp_log_fr(0, min_wait.tv_sec, min_wait.tv_usec, SCTP_FR_T3_MARK_TIME);
+	}
+	/*
+	 * Our rwnd will be incorrect here since we are not adding back the
+	 * cnt * mbuf but we will fix that down below.
+	 */
+	orig_flight = net->flight_size;
+	orig_tf = stcb->asoc.total_flight;
+
+	net->fast_retran_ip = 0;
+	/* Now on to each chunk */
+	num_mk = cnt_mk = 0;
+	tsnfirst = tsnlast = 0;
+	chk = TAILQ_FIRST(&stcb->asoc.sent_queue);
+	for (; chk != NULL; chk = tp2) {
+		tp2 = TAILQ_NEXT(chk, sctp_next);
+		if ((compare_with_wrap(stcb->asoc.last_acked_seq,
+		    chk->rec.data.TSN_seq,
+		    MAX_TSN)) ||
+		    (stcb->asoc.last_acked_seq == chk->rec.data.TSN_seq)) {
+			/* Strange case our list got out of order? */
+			SCTP_PRINTF("Our list is out of order?\n");
+			panic("Out of order list");
+		}
+		if ((chk->whoTo == net) && (chk->sent < SCTP_DATAGRAM_ACKED)) {
+			/*
+			 * found one to mark: If it is less than
+			 * DATAGRAM_ACKED it MUST not be a skipped or marked
+			 * TSN but instead one that is either already set
+			 * for retransmission OR one that needs
+			 * retransmission.
+			 */
+
+			/* validate its been outstanding long enough */
+			if (sctp_logging_level & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
+				sctp_log_fr(chk->rec.data.TSN_seq,
+				    chk->sent_rcv_time.tv_sec,
+				    chk->sent_rcv_time.tv_usec,
+				    SCTP_FR_T3_MARK_TIME);
+			}
+			if ((chk->sent_rcv_time.tv_sec > min_wait.tv_sec) && (window_probe == 0)) {
+				/*
+				 * we have reached a chunk that was sent
+				 * some seconds past our min.. forget it we
+				 * will find no more to send.
+				 */
+				if (sctp_logging_level & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
+					sctp_log_fr(0,
+					    chk->sent_rcv_time.tv_sec,
+					    chk->sent_rcv_time.tv_usec,
+					    SCTP_FR_T3_STOPPED);
+				}
+				continue;
+			} else if ((chk->sent_rcv_time.tv_sec == min_wait.tv_sec) &&
+			    (window_probe == 0)) {
+				/*
+				 * we must look at the micro seconds to
+				 * know.
+				 */
+				if (chk->sent_rcv_time.tv_usec >= min_wait.tv_usec) {
+					/*
+					 * ok it was sent after our boundary
+					 * time.
+					 */
+					if (sctp_logging_level & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
+						sctp_log_fr(0,
+						    chk->sent_rcv_time.tv_sec,
+						    chk->sent_rcv_time.tv_usec,
+						    SCTP_FR_T3_STOPPED);
+					}
+					continue;
+				}
+			}
+			if (PR_SCTP_TTL_ENABLED(chk->flags)) {
+				/* Is it expired? */
+				if ((now.tv_sec > chk->rec.data.timetodrop.tv_sec) ||
+				    ((chk->rec.data.timetodrop.tv_sec == now.tv_sec) &&
+				    (now.tv_usec > chk->rec.data.timetodrop.tv_usec))) {
+					/* Yes so drop it */
+					if (chk->data) {
+						(void)sctp_release_pr_sctp_chunk(stcb,
+						    chk,
+						    (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT),
+						    &stcb->asoc.sent_queue, SCTP_SO_NOT_LOCKED);
+					}
+				}
+				continue;
+			}
+			if (PR_SCTP_RTX_ENABLED(chk->flags)) {
+				/* Has it been retransmitted tv_sec times? */
+				if (chk->snd_count > chk->rec.data.timetodrop.tv_sec) {
+					if (chk->data) {
+						(void)sctp_release_pr_sctp_chunk(stcb,
+						    chk,
+						    (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT),
+						    &stcb->asoc.sent_queue, SCTP_SO_NOT_LOCKED);
+					}
+				}
+				continue;
+			}
+			if (chk->sent < SCTP_DATAGRAM_RESEND) {
+				sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+				num_mk++;
+				if (fir == 0) {
+					fir = 1;
+					tsnfirst = chk->rec.data.TSN_seq;
+				}
+				tsnlast = chk->rec.data.TSN_seq;
+				if (sctp_logging_level & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
+					sctp_log_fr(chk->rec.data.TSN_seq, chk->snd_count,
+					    0, SCTP_FR_T3_MARKED);
+				}
+				if (chk->rec.data.chunk_was_revoked) {
+					/* deflate the cwnd */
+					chk->whoTo->cwnd -= chk->book_size;
+					chk->rec.data.chunk_was_revoked = 0;
+				}
+				net->marked_retrans++;
+				stcb->asoc.marked_retrans++;
+				if (sctp_logging_level & SCTP_FLIGHT_LOGGING_ENABLE) {
+					sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_RSND_TO,
+					    chk->whoTo->flight_size,
+					    chk->book_size,
+					    (uintptr_t) chk->whoTo,
+					    chk->rec.data.TSN_seq);
+				}
+				sctp_flight_size_decrease(chk);
+				sctp_total_flight_decrease(stcb, chk);
+				stcb->asoc.peers_rwnd += chk->send_size;
+				stcb->asoc.peers_rwnd += sctp_peer_chunk_oh;
+			}
+			chk->sent = SCTP_DATAGRAM_RESEND;
+			SCTP_STAT_INCR(sctps_markedretrans);
+
+			/* reset the TSN for striking and other FR stuff */
+			chk->rec.data.doing_fast_retransmit = 0;
+			/* Clear any time so NO RTT is being done */
+			chk->do_rtt = 0;
+			if (alt != net) {
+				sctp_free_remote_addr(chk->whoTo);
+				chk->no_fr_allowed = 1;
+				chk->whoTo = alt;
+				atomic_add_int(&alt->ref_count, 1);
+			} else {
+				chk->no_fr_allowed = 0;
+				if (TAILQ_EMPTY(&stcb->asoc.send_queue)) {
+					chk->rec.data.fast_retran_tsn = stcb->asoc.sending_seq;
+				} else {
+					chk->rec.data.fast_retran_tsn = (TAILQ_FIRST(&stcb->asoc.send_queue))->rec.data.TSN_seq;
+				}
+			}
+			/*
+			 * CMT: Do not allow FRs on retransmitted TSNs.
+			 */
+			if (sctp_cmt_on_off == 1) {
+				chk->no_fr_allowed = 1;
+			}
+		} else if (chk->sent == SCTP_DATAGRAM_ACKED) {
+			/* remember highest acked one */
+			could_be_sent = chk;
+		}
+		if (chk->sent == SCTP_DATAGRAM_RESEND) {
+			cnt_mk++;
+		}
+	}
+	if ((orig_flight - net->flight_size) != (orig_tf - stcb->asoc.total_flight)) {
+		/* we did not subtract the same things? */
+		audit_tf = 1;
+	}
+	if (sctp_logging_level & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
+		sctp_log_fr(tsnfirst, tsnlast, num_mk, SCTP_FR_T3_TIMEOUT);
+	}
+#ifdef SCTP_DEBUG
+	if (num_mk) {
+		SCTPDBG(SCTP_DEBUG_TIMER1, "LAST TSN marked was %x\n",
+		    tsnlast);
+		SCTPDBG(SCTP_DEBUG_TIMER1, "Num marked for retransmission was %d peer-rwd:%ld\n",
+		    num_mk, (u_long)stcb->asoc.peers_rwnd);
+		SCTPDBG(SCTP_DEBUG_TIMER1, "LAST TSN marked was %x\n",
+		    tsnlast);
+		SCTPDBG(SCTP_DEBUG_TIMER1, "Num marked for retransmission was %d peer-rwd:%d\n",
+		    num_mk,
+		    (int)stcb->asoc.peers_rwnd);
+	}
+#endif
+	*num_marked = num_mk;
+	if ((stcb->asoc.sent_queue_retran_cnt == 0) && (could_be_sent)) {
+		/* fix it so we retransmit the highest acked anyway */
+		sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+		cnt_mk++;
+		could_be_sent->sent = SCTP_DATAGRAM_RESEND;
+	}
+	if (stcb->asoc.sent_queue_retran_cnt != cnt_mk) {
+#ifdef INVARIANTS
+		SCTP_PRINTF("Local Audit says there are %d for retran asoc cnt:%d we marked:%d this time\n",
+		    cnt_mk, stcb->asoc.sent_queue_retran_cnt, num_mk);
+#endif
+#ifndef SCTP_AUDITING_ENABLED
+		stcb->asoc.sent_queue_retran_cnt = cnt_mk;
+#endif
+	}
+	/* Now check for a ECN Echo that may be stranded */
+	TAILQ_FOREACH(chk, &stcb->asoc.control_send_queue, sctp_next) {
+		if ((chk->whoTo == net) &&
+		    (chk->rec.chunk_id.id == SCTP_ECN_ECHO)) {
+			sctp_free_remote_addr(chk->whoTo);
+			chk->whoTo = alt;
+			if (chk->sent != SCTP_DATAGRAM_RESEND) {
+				chk->sent = SCTP_DATAGRAM_RESEND;
+				sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+			}
+			atomic_add_int(&alt->ref_count, 1);
+		}
+	}
+	if (audit_tf) {
+		SCTPDBG(SCTP_DEBUG_TIMER4,
+		    "Audit total flight due to negative value net:%p\n",
+		    net);
+		stcb->asoc.total_flight = 0;
+		stcb->asoc.total_flight_count = 0;
+		/* Clear all networks flight size */
+		TAILQ_FOREACH(lnets, &stcb->asoc.nets, sctp_next) {
+			lnets->flight_size = 0;
+			SCTPDBG(SCTP_DEBUG_TIMER4,
+			    "Net:%p c-f cwnd:%d ssthresh:%d\n",
+			    lnets, lnets->cwnd, lnets->ssthresh);
+		}
+		TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
+			if (chk->sent < SCTP_DATAGRAM_RESEND) {
+				if (sctp_logging_level & SCTP_FLIGHT_LOGGING_ENABLE) {
+					sctp_misc_ints(SCTP_FLIGHT_LOG_UP,
+					    chk->whoTo->flight_size,
+					    chk->book_size,
+					    (uintptr_t) chk->whoTo,
+					    chk->rec.data.TSN_seq);
+				}
+				sctp_flight_size_increase(chk);
+				sctp_total_flight_increase(stcb, chk);
+			}
+		}
+	}
+	/*
+	 * Setup the ecn nonce re-sync point. We do this since
+	 * retranmissions are NOT setup for ECN. This means that do to
+	 * Karn's rule, we don't know the total of the peers ecn bits.
+	 */
+	chk = TAILQ_FIRST(&stcb->asoc.send_queue);
+	if (chk == NULL) {
+		stcb->asoc.nonce_resync_tsn = stcb->asoc.sending_seq;
+	} else {
+		stcb->asoc.nonce_resync_tsn = chk->rec.data.TSN_seq;
+	}
+	stcb->asoc.nonce_wait_for_ecne = 0;
+	stcb->asoc.nonce_sum_check = 0;
+	/* We return 1 if we only have a window probe outstanding */
+	return (0);
+}
+
+static void
+sctp_move_all_chunks_to_alt(struct sctp_tcb *stcb,
+    struct sctp_nets *net,
+    struct sctp_nets *alt)
+{
+	struct sctp_association *asoc;
+	struct sctp_stream_out *outs;
+	struct sctp_tmit_chunk *chk;
+	struct sctp_stream_queue_pending *sp;
+
+	if (net == alt)
+		/* nothing to do */
+		return;
+
+	asoc = &stcb->asoc;
+
+	/*
+	 * now through all the streams checking for chunks sent to our bad
+	 * network.
+	 */
+	TAILQ_FOREACH(outs, &asoc->out_wheel, next_spoke) {
+		/* now clean up any chunks here */
+		TAILQ_FOREACH(sp, &outs->outqueue, next) {
+			if (sp->net == net) {
+				sctp_free_remote_addr(sp->net);
+				sp->net = alt;
+				atomic_add_int(&alt->ref_count, 1);
+			}
+		}
+	}
+	/* Now check the pending queue */
+	TAILQ_FOREACH(chk, &asoc->send_queue, sctp_next) {
+		if (chk->whoTo == net) {
+			sctp_free_remote_addr(chk->whoTo);
+			chk->whoTo = alt;
+			atomic_add_int(&alt->ref_count, 1);
+		}
+	}
+
+}
+
+int
+sctp_t3rxt_timer(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	struct sctp_nets *alt;
+	int win_probe, num_mk;
+
+	if (sctp_logging_level & SCTP_FR_LOGGING_ENABLE) {
+		sctp_log_fr(0, 0, 0, SCTP_FR_T3_TIMEOUT);
+	}
+	if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+		struct sctp_nets *lnet;
+
+		TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
+			if (net == lnet) {
+				sctp_log_cwnd(stcb, lnet, 1, SCTP_CWND_LOG_FROM_T3);
+			} else {
+				sctp_log_cwnd(stcb, lnet, 0, SCTP_CWND_LOG_FROM_T3);
+			}
+		}
+	}
+	/* Find an alternate and mark those for retransmission */
+	if ((stcb->asoc.peers_rwnd == 0) &&
+	    (stcb->asoc.total_flight < net->mtu)) {
+		SCTP_STAT_INCR(sctps_timowindowprobe);
+		win_probe = 1;
+	} else {
+		win_probe = 0;
+	}
+
+	/*
+	 * JRS 5/14/07 - If CMT PF is on and the destination if not already
+	 * in PF state, set the destination to PF state and store the
+	 * current time as the time that the destination was last active. In
+	 * addition, find an alternate destination with PF-based
+	 * find_alt_net().
+	 */
+	if (sctp_cmt_on_off && sctp_cmt_pf) {
+		if ((net->dest_state & SCTP_ADDR_PF) != SCTP_ADDR_PF) {
+			net->dest_state |= SCTP_ADDR_PF;
+			net->last_active = sctp_get_tick_count();
+			SCTPDBG(SCTP_DEBUG_TIMER4, "Destination %p moved from active to PF.\n",
+			    net);
+		}
+		alt = sctp_find_alternate_net(stcb, net, 2);
+	} else if (sctp_cmt_on_off) {
+		/*
+		 * CMT: Using RTX_SSTHRESH policy for CMT. If CMT is being
+		 * used, then pick dest with largest ssthresh for any
+		 * retransmission.
+		 */
+		alt = net;
+		alt = sctp_find_alternate_net(stcb, alt, 1);
+		/*
+		 * CUCv2: If a different dest is picked for the
+		 * retransmission, then new (rtx-)pseudo_cumack needs to be
+		 * tracked for orig dest. Let CUCv2 track new (rtx-)
+		 * pseudo-cumack always.
+		 */
+		net->find_pseudo_cumack = 1;
+		net->find_rtx_pseudo_cumack = 1;
+	} else {		/* CMT is OFF */
+		alt = sctp_find_alternate_net(stcb, net, 0);
+	}
+
+	(void)sctp_mark_all_for_resend(stcb, net, alt, win_probe, &num_mk);
+	/* FR Loss recovery just ended with the T3. */
+	stcb->asoc.fast_retran_loss_recovery = 0;
+
+	/* CMT FR loss recovery ended with the T3 */
+	net->fast_retran_loss_recovery = 0;
+
+	/*
+	 * setup the sat loss recovery that prevents satellite cwnd advance.
+	 */
+	stcb->asoc.sat_t3_loss_recovery = 1;
+	stcb->asoc.sat_t3_recovery_tsn = stcb->asoc.sending_seq;
+
+	/* Backoff the timer and cwnd */
+	sctp_backoff_on_timeout(stcb, net, win_probe, num_mk);
+	if (win_probe == 0) {
+		/* We don't do normal threshold management on window probes */
+		if (sctp_threshold_management(inp, stcb, net,
+		    stcb->asoc.max_send_times)) {
+			/* Association was destroyed */
+			return (1);
+		} else {
+			if (net != stcb->asoc.primary_destination) {
+				/* send a immediate HB if our RTO is stale */
+				struct timeval now;
+				unsigned int ms_goneby;
+
+				(void)SCTP_GETTIME_TIMEVAL(&now);
+				if (net->last_sent_time.tv_sec) {
+					ms_goneby = (now.tv_sec - net->last_sent_time.tv_sec) * 1000;
+				} else {
+					ms_goneby = 0;
+				}
+				if ((ms_goneby > net->RTO) || (net->RTO == 0)) {
+					/*
+					 * no recent feed back in an RTO or
+					 * more, request a RTT update
+					 */
+					if (sctp_send_hb(stcb, 1, net) < 0)
+						return 1;
+				}
+			}
+		}
+	} else {
+		/*
+		 * For a window probe we don't penalize the net's but only
+		 * the association. This may fail it if SACKs are not coming
+		 * back. If sack's are coming with rwnd locked at 0, we will
+		 * continue to hold things waiting for rwnd to raise
+		 */
+		if (sctp_threshold_management(inp, stcb, NULL,
+		    stcb->asoc.max_send_times)) {
+			/* Association was destroyed */
+			return (1);
+		}
+	}
+	if (net->dest_state & SCTP_ADDR_NOT_REACHABLE) {
+		/* Move all pending over too */
+		sctp_move_all_chunks_to_alt(stcb, net, alt);
+
+		/*
+		 * Get the address that failed, to force a new src address
+		 * selecton and a route allocation.
+		 */
+		if (net->ro._s_addr) {
+			sctp_free_ifa(net->ro._s_addr);
+			net->ro._s_addr = NULL;
+		}
+		net->src_addr_selected = 0;
+
+		/* Force a route allocation too */
+		if (net->ro.ro_rt) {
+			RTFREE(net->ro.ro_rt);
+			net->ro.ro_rt = NULL;
+		}
+		/* Was it our primary? */
+		if ((stcb->asoc.primary_destination == net) && (alt != net)) {
+			/*
+			 * Yes, note it as such and find an alternate note:
+			 * this means HB code must use this to resent the
+			 * primary if it goes active AND if someone does a
+			 * change-primary then this flag must be cleared
+			 * from any net structures.
+			 */
+			if (sctp_set_primary_addr(stcb,
+			    (struct sockaddr *)NULL,
+			    alt) == 0) {
+				net->dest_state |= SCTP_ADDR_WAS_PRIMARY;
+			}
+		}
+	} else if (sctp_cmt_on_off && sctp_cmt_pf && (net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF) {
+		/*
+		 * JRS 5/14/07 - If the destination hasn't failed completely
+		 * but is in PF state, a PF-heartbeat needs to be sent
+		 * manually.
+		 */
+		if (sctp_send_hb(stcb, 1, net) < 0)
+			return 1;
+	}
+	/*
+	 * Special case for cookie-echo'ed case, we don't do output but must
+	 * await the COOKIE-ACK before retransmission
+	 */
+	if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED) {
+		/*
+		 * Here we just reset the timer and start again since we
+		 * have not established the asoc
+		 */
+		sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
+		return (0);
+	}
+	if (stcb->asoc.peer_supports_prsctp) {
+		struct sctp_tmit_chunk *lchk;
+
+		lchk = sctp_try_advance_peer_ack_point(stcb, &stcb->asoc);
+		/* C3. See if we need to send a Fwd-TSN */
+		if (compare_with_wrap(stcb->asoc.advanced_peer_ack_point,
+		    stcb->asoc.last_acked_seq, MAX_TSN)) {
+			/*
+			 * ISSUE with ECN, see FWD-TSN processing for notes
+			 * on issues that will occur when the ECN NONCE
+			 * stuff is put into SCTP for cross checking.
+			 */
+			send_forward_tsn(stcb, &stcb->asoc);
+			if (lchk) {
+				/* Assure a timer is up */
+				sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, lchk->whoTo);
+			}
+		}
+	}
+	if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+		sctp_log_cwnd(stcb, net, net->cwnd, SCTP_CWND_LOG_FROM_RTX);
+	}
+	return (0);
+}
+
+int
+sctp_t1init_timer(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	/* bump the thresholds */
+	if (stcb->asoc.delayed_connection) {
+		/*
+		 * special hook for delayed connection. The library did NOT
+		 * complete the rest of its sends.
+		 */
+		stcb->asoc.delayed_connection = 0;
+		sctp_send_initiate(inp, stcb, SCTP_SO_NOT_LOCKED);
+		return (0);
+	}
+	if (SCTP_GET_STATE((&stcb->asoc)) != SCTP_STATE_COOKIE_WAIT) {
+		return (0);
+	}
+	if (sctp_threshold_management(inp, stcb, net,
+	    stcb->asoc.max_init_times)) {
+		/* Association was destroyed */
+		return (1);
+	}
+	stcb->asoc.dropped_special_cnt = 0;
+	sctp_backoff_on_timeout(stcb, stcb->asoc.primary_destination, 1, 0);
+	if (stcb->asoc.initial_init_rto_max < net->RTO) {
+		net->RTO = stcb->asoc.initial_init_rto_max;
+	}
+	if (stcb->asoc.numnets > 1) {
+		/* If we have more than one addr use it */
+		struct sctp_nets *alt;
+
+		alt = sctp_find_alternate_net(stcb, stcb->asoc.primary_destination, 0);
+		if ((alt != NULL) && (alt != stcb->asoc.primary_destination)) {
+			sctp_move_all_chunks_to_alt(stcb, stcb->asoc.primary_destination, alt);
+			stcb->asoc.primary_destination = alt;
+		}
+	}
+	/* Send out a new init */
+	sctp_send_initiate(inp, stcb, SCTP_SO_NOT_LOCKED);
+	return (0);
+}
+
+/*
+ * For cookie and asconf we actually need to find and mark for resend, then
+ * increment the resend counter (after all the threshold management stuff of
+ * course).
+ */
+int
+sctp_cookie_timer(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	struct sctp_nets *alt;
+	struct sctp_tmit_chunk *cookie;
+
+	/* first before all else we must find the cookie */
+	TAILQ_FOREACH(cookie, &stcb->asoc.control_send_queue, sctp_next) {
+		if (cookie->rec.chunk_id.id == SCTP_COOKIE_ECHO) {
+			break;
+		}
+	}
+	if (cookie == NULL) {
+		if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED) {
+			/* FOOBAR! */
+			struct mbuf *oper;
+
+			oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+			    0, M_DONTWAIT, 1, MT_DATA);
+			if (oper) {
+				struct sctp_paramhdr *ph;
+				uint32_t *ippp;
+
+				SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
+				    sizeof(uint32_t);
+				ph = mtod(oper, struct sctp_paramhdr *);
+				ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+				ph->param_length = htons(SCTP_BUF_LEN(oper));
+				ippp = (uint32_t *) (ph + 1);
+				*ippp = htonl(SCTP_FROM_SCTP_TIMER + SCTP_LOC_3);
+			}
+			inp->last_abort_code = SCTP_FROM_SCTP_TIMER + SCTP_LOC_4;
+			sctp_abort_an_association(inp, stcb, SCTP_INTERNAL_ERROR,
+			    oper, SCTP_SO_NOT_LOCKED);
+		} else {
+#ifdef INVARIANTS
+			panic("Cookie timer expires in wrong state?");
+#else
+			SCTP_PRINTF("Strange in state %d not cookie-echoed yet c-e timer expires?\n", SCTP_GET_STATE(&stcb->asoc));
+			return (0);
+#endif
+		}
+		return (0);
+	}
+	/* Ok we found the cookie, threshold management next */
+	if (sctp_threshold_management(inp, stcb, cookie->whoTo,
+	    stcb->asoc.max_init_times)) {
+		/* Assoc is over */
+		return (1);
+	}
+	/*
+	 * cleared theshold management now lets backoff the address & select
+	 * an alternate
+	 */
+	stcb->asoc.dropped_special_cnt = 0;
+	sctp_backoff_on_timeout(stcb, cookie->whoTo, 1, 0);
+	alt = sctp_find_alternate_net(stcb, cookie->whoTo, 0);
+	if (alt != cookie->whoTo) {
+		sctp_free_remote_addr(cookie->whoTo);
+		cookie->whoTo = alt;
+		atomic_add_int(&alt->ref_count, 1);
+	}
+	/* Now mark the retran info */
+	if (cookie->sent != SCTP_DATAGRAM_RESEND) {
+		sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+	}
+	cookie->sent = SCTP_DATAGRAM_RESEND;
+	/*
+	 * Now call the output routine to kick out the cookie again, Note we
+	 * don't mark any chunks for retran so that FR will need to kick in
+	 * to move these (or a send timer).
+	 */
+	return (0);
+}
+
+int
+sctp_strreset_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	struct sctp_nets *alt;
+	struct sctp_tmit_chunk *strrst = NULL, *chk = NULL;
+
+	if (stcb->asoc.stream_reset_outstanding == 0) {
+		return (0);
+	}
+	/* find the existing STRRESET, we use the seq number we sent out on */
+	(void)sctp_find_stream_reset(stcb, stcb->asoc.str_reset_seq_out, &strrst);
+	if (strrst == NULL) {
+		return (0);
+	}
+	/* do threshold management */
+	if (sctp_threshold_management(inp, stcb, strrst->whoTo,
+	    stcb->asoc.max_send_times)) {
+		/* Assoc is over */
+		return (1);
+	}
+	/*
+	 * cleared theshold management now lets backoff the address & select
+	 * an alternate
+	 */
+	sctp_backoff_on_timeout(stcb, strrst->whoTo, 1, 0);
+	alt = sctp_find_alternate_net(stcb, strrst->whoTo, 0);
+	sctp_free_remote_addr(strrst->whoTo);
+	strrst->whoTo = alt;
+	atomic_add_int(&alt->ref_count, 1);
+
+	/* See if a ECN Echo is also stranded */
+	TAILQ_FOREACH(chk, &stcb->asoc.control_send_queue, sctp_next) {
+		if ((chk->whoTo == net) &&
+		    (chk->rec.chunk_id.id == SCTP_ECN_ECHO)) {
+			sctp_free_remote_addr(chk->whoTo);
+			if (chk->sent != SCTP_DATAGRAM_RESEND) {
+				chk->sent = SCTP_DATAGRAM_RESEND;
+				sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+			}
+			chk->whoTo = alt;
+			atomic_add_int(&alt->ref_count, 1);
+		}
+	}
+	if (net->dest_state & SCTP_ADDR_NOT_REACHABLE) {
+		/*
+		 * If the address went un-reachable, we need to move to
+		 * alternates for ALL chk's in queue
+		 */
+		sctp_move_all_chunks_to_alt(stcb, net, alt);
+	}
+	/* mark the retran info */
+	if (strrst->sent != SCTP_DATAGRAM_RESEND)
+		sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+	strrst->sent = SCTP_DATAGRAM_RESEND;
+
+	/* restart the timer */
+	sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, inp, stcb, strrst->whoTo);
+	return (0);
+}
+
+int
+sctp_asconf_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	struct sctp_nets *alt;
+	struct sctp_tmit_chunk *asconf, *chk;
+
+	/* is this a first send, or a retransmission? */
+	if (stcb->asoc.asconf_sent == 0) {
+		/* compose a new ASCONF chunk and send it */
+		sctp_send_asconf(stcb, net, SCTP_ADDR_NOT_LOCKED);
+	} else {
+		/*
+		 * Retransmission of the existing ASCONF is needed
+		 */
+
+		/* find the existing ASCONF */
+		TAILQ_FOREACH(asconf, &stcb->asoc.control_send_queue,
+		    sctp_next) {
+			if (asconf->rec.chunk_id.id == SCTP_ASCONF) {
+				break;
+			}
+		}
+		if (asconf == NULL) {
+			return (0);
+		}
+		/* do threshold management */
+		if (sctp_threshold_management(inp, stcb, asconf->whoTo,
+		    stcb->asoc.max_send_times)) {
+			/* Assoc is over */
+			return (1);
+		}
+		if (asconf->snd_count > stcb->asoc.max_send_times) {
+			/*
+			 * Something is rotten: our peer is not responding
+			 * to ASCONFs but apparently is to other chunks.
+			 * i.e. it is not properly handling the chunk type
+			 * upper bits. Mark this peer as ASCONF incapable
+			 * and cleanup.
+			 */
+			SCTPDBG(SCTP_DEBUG_TIMER1, "asconf_timer: Peer has not responded to our repeated ASCONFs\n");
+			sctp_asconf_cleanup(stcb, net);
+			return (0);
+		}
+		/*
+		 * cleared threshold management, so now backoff the net and
+		 * select an alternate
+		 */
+		sctp_backoff_on_timeout(stcb, asconf->whoTo, 1, 0);
+		alt = sctp_find_alternate_net(stcb, asconf->whoTo, 0);
+		sctp_free_remote_addr(asconf->whoTo);
+		asconf->whoTo = alt;
+		atomic_add_int(&alt->ref_count, 1);
+
+		/* See if an ECN Echo is also stranded */
+		TAILQ_FOREACH(chk, &stcb->asoc.control_send_queue, sctp_next) {
+			if ((chk->whoTo == net) &&
+			    (chk->rec.chunk_id.id == SCTP_ECN_ECHO)) {
+				sctp_free_remote_addr(chk->whoTo);
+				chk->whoTo = alt;
+				if (chk->sent != SCTP_DATAGRAM_RESEND) {
+					chk->sent = SCTP_DATAGRAM_RESEND;
+					sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+				}
+				atomic_add_int(&alt->ref_count, 1);
+			}
+		}
+		if (net->dest_state & SCTP_ADDR_NOT_REACHABLE) {
+			/*
+			 * If the address went un-reachable, we need to move
+			 * to the alternate for ALL chunks in queue
+			 */
+			sctp_move_all_chunks_to_alt(stcb, net, alt);
+		}
+		/* mark the retran info */
+		if (asconf->sent != SCTP_DATAGRAM_RESEND)
+			sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+		asconf->sent = SCTP_DATAGRAM_RESEND;
+	}
+	return (0);
+}
+
+/* Mobility adaptation */
+void
+sctp_delete_prim_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	if (stcb->asoc.deleted_primary == NULL) {
+		SCTPDBG(SCTP_DEBUG_ASCONF1, "delete_prim_timer: deleted_primary is not stored...\n");
+		sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED);
+		return;
+	}
+	SCTPDBG(SCTP_DEBUG_ASCONF1, "delete_prim_timer: finished to keep deleted primary ");
+	SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, &stcb->asoc.deleted_primary->ro._l_addr.sa);
+	sctp_free_remote_addr(stcb->asoc.deleted_primary);
+	stcb->asoc.deleted_primary = NULL;
+	sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED);
+	return;
+}
+
+/*
+ * For the shutdown and shutdown-ack, we do not keep one around on the
+ * control queue. This means we must generate a new one and call the general
+ * chunk output routine, AFTER having done threshold management.
+ */
+int
+sctp_shutdown_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	struct sctp_nets *alt;
+
+	/* first threshold managment */
+	if (sctp_threshold_management(inp, stcb, net, stcb->asoc.max_send_times)) {
+		/* Assoc is over */
+		return (1);
+	}
+	/* second select an alternative */
+	alt = sctp_find_alternate_net(stcb, net, 0);
+
+	/* third generate a shutdown into the queue for out net */
+	if (alt) {
+		sctp_send_shutdown(stcb, alt);
+	} else {
+		/*
+		 * if alt is NULL, there is no dest to send to??
+		 */
+		return (0);
+	}
+	/* fourth restart timer */
+	sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, inp, stcb, alt);
+	return (0);
+}
+
+int
+sctp_shutdownack_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	struct sctp_nets *alt;
+
+	/* first threshold managment */
+	if (sctp_threshold_management(inp, stcb, net, stcb->asoc.max_send_times)) {
+		/* Assoc is over */
+		return (1);
+	}
+	/* second select an alternative */
+	alt = sctp_find_alternate_net(stcb, net, 0);
+
+	/* third generate a shutdown into the queue for out net */
+	sctp_send_shutdown_ack(stcb, alt);
+
+	/* fourth restart timer */
+	sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK, inp, stcb, alt);
+	return (0);
+}
+
+static void
+sctp_audit_stream_queues_for_size(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb)
+{
+	struct sctp_stream_out *outs;
+	struct sctp_stream_queue_pending *sp;
+	unsigned int chks_in_queue = 0;
+	int being_filled = 0;
+
+	/*
+	 * This function is ONLY called when the send/sent queues are empty.
+	 */
+	if ((stcb == NULL) || (inp == NULL))
+		return;
+
+	if (stcb->asoc.sent_queue_retran_cnt) {
+		SCTP_PRINTF("Hmm, sent_queue_retran_cnt is non-zero %d\n",
+		    stcb->asoc.sent_queue_retran_cnt);
+		stcb->asoc.sent_queue_retran_cnt = 0;
+	}
+	SCTP_TCB_SEND_LOCK(stcb);
+	if (TAILQ_EMPTY(&stcb->asoc.out_wheel)) {
+		int i, cnt = 0;
+
+		/* Check to see if a spoke fell off the wheel */
+		for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+			if (!TAILQ_EMPTY(&stcb->asoc.strmout[i].outqueue)) {
+				sctp_insert_on_wheel(stcb, &stcb->asoc, &stcb->asoc.strmout[i], 1);
+				cnt++;
+			}
+		}
+		if (cnt) {
+			/* yep, we lost a spoke or two */
+			SCTP_PRINTF("Found an additional %d streams NOT on outwheel, corrected\n", cnt);
+		} else {
+			/* no spokes lost, */
+			stcb->asoc.total_output_queue_size = 0;
+		}
+		SCTP_TCB_SEND_UNLOCK(stcb);
+		return;
+	}
+	SCTP_TCB_SEND_UNLOCK(stcb);
+	/* Check to see if some data queued, if so report it */
+	TAILQ_FOREACH(outs, &stcb->asoc.out_wheel, next_spoke) {
+		if (!TAILQ_EMPTY(&outs->outqueue)) {
+			TAILQ_FOREACH(sp, &outs->outqueue, next) {
+				if (sp->msg_is_complete)
+					being_filled++;
+				chks_in_queue++;
+			}
+		}
+	}
+	if (chks_in_queue != stcb->asoc.stream_queue_cnt) {
+		SCTP_PRINTF("Hmm, stream queue cnt at %d I counted %d in stream out wheel\n",
+		    stcb->asoc.stream_queue_cnt, chks_in_queue);
+	}
+	if (chks_in_queue) {
+		/* call the output queue function */
+		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED);
+		if ((TAILQ_EMPTY(&stcb->asoc.send_queue)) &&
+		    (TAILQ_EMPTY(&stcb->asoc.sent_queue))) {
+			/*
+			 * Probably should go in and make it go back through
+			 * and add fragments allowed
+			 */
+			if (being_filled == 0) {
+				SCTP_PRINTF("Still nothing moved %d chunks are stuck\n",
+				    chks_in_queue);
+			}
+		}
+	} else {
+		SCTP_PRINTF("Found no chunks on any queue tot:%lu\n",
+		    (u_long)stcb->asoc.total_output_queue_size);
+		stcb->asoc.total_output_queue_size = 0;
+	}
+}
+
+int
+sctp_heartbeat_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net, int cnt_of_unconf)
+{
+	int ret;
+
+	if (net) {
+		if (net->hb_responded == 0) {
+			if (net->ro._s_addr) {
+				/*
+				 * Invalidate the src address if we did not
+				 * get a response last time.
+				 */
+				sctp_free_ifa(net->ro._s_addr);
+				net->ro._s_addr = NULL;
+				net->src_addr_selected = 0;
+			}
+			sctp_backoff_on_timeout(stcb, net, 1, 0);
+		}
+		/* Zero PBA, if it needs it */
+		if (net->partial_bytes_acked) {
+			net->partial_bytes_acked = 0;
+		}
+	}
+	if ((stcb->asoc.total_output_queue_size > 0) &&
+	    (TAILQ_EMPTY(&stcb->asoc.send_queue)) &&
+	    (TAILQ_EMPTY(&stcb->asoc.sent_queue))) {
+		sctp_audit_stream_queues_for_size(inp, stcb);
+	}
+	/* Send a new HB, this will do threshold managment, pick a new dest */
+	if (cnt_of_unconf == 0) {
+		if (sctp_send_hb(stcb, 0, NULL) < 0) {
+			return (1);
+		}
+	} else {
+		/*
+		 * this will send out extra hb's up to maxburst if there are
+		 * any unconfirmed addresses.
+		 */
+		uint32_t cnt_sent = 0;
+
+		TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+			if ((net->dest_state & SCTP_ADDR_UNCONFIRMED) &&
+			    (net->dest_state & SCTP_ADDR_REACHABLE)) {
+				cnt_sent++;
+				if (net->hb_responded == 0) {
+					/* Did we respond last time? */
+					if (net->ro._s_addr) {
+						sctp_free_ifa(net->ro._s_addr);
+						net->ro._s_addr = NULL;
+						net->src_addr_selected = 0;
+					}
+				}
+				ret = sctp_send_hb(stcb, 1, net);
+				if (ret < 0)
+					return 1;
+				else if (ret == 0) {
+					break;
+				}
+				if (cnt_sent >= sctp_hb_maxburst)
+					break;
+			}
+		}
+	}
+	return (0);
+}
+
+int
+sctp_is_hb_timer_running(struct sctp_tcb *stcb)
+{
+	if (SCTP_OS_TIMER_PENDING(&stcb->asoc.hb_timer.timer)) {
+		/* its running */
+		return (1);
+	} else {
+		/* nope */
+		return (0);
+	}
+}
+
+int
+sctp_is_sack_timer_running(struct sctp_tcb *stcb)
+{
+	if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
+		/* its running */
+		return (1);
+	} else {
+		/* nope */
+		return (0);
+	}
+}
+
+#define SCTP_NUMBER_OF_MTU_SIZES 18
+static uint32_t mtu_sizes[] = {
+	68,
+	296,
+	508,
+	512,
+	544,
+	576,
+	1006,
+	1492,
+	1500,
+	1536,
+	2002,
+	2048,
+	4352,
+	4464,
+	8166,
+	17914,
+	32000,
+	65535
+};
+
+
+static uint32_t
+sctp_getnext_mtu(struct sctp_inpcb *inp, uint32_t cur_mtu)
+{
+	/* select another MTU that is just bigger than this one */
+	int i;
+
+	for (i = 0; i < SCTP_NUMBER_OF_MTU_SIZES; i++) {
+		if (cur_mtu < mtu_sizes[i]) {
+			/* no max_mtu is bigger than this one */
+			return (mtu_sizes[i]);
+		}
+	}
+	/* here return the highest allowable */
+	return (cur_mtu);
+}
+
+
+void
+sctp_pathmtu_timer(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	uint32_t next_mtu;
+
+	/* restart the timer in any case */
+	next_mtu = sctp_getnext_mtu(inp, net->mtu);
+	if (next_mtu <= net->mtu) {
+		/* nothing to do */
+		return;
+	} {
+		uint32_t mtu;
+
+		if ((net->src_addr_selected == 0) ||
+		    (net->ro._s_addr == NULL) ||
+		    (net->ro._s_addr->localifa_flags & SCTP_BEING_DELETED)) {
+			if ((net->ro._s_addr != NULL) && (net->ro._s_addr->localifa_flags & SCTP_BEING_DELETED)) {
+				sctp_free_ifa(net->ro._s_addr);
+				net->ro._s_addr = NULL;
+				net->src_addr_selected = 0;
+			} else if (net->ro._s_addr == NULL) {
+				net->ro._s_addr = sctp_source_address_selection(inp,
+				    stcb,
+				    (sctp_route_t *) & net->ro,
+				    net, 0, stcb->asoc.vrf_id);
+			}
+			if (net->ro._s_addr)
+				net->src_addr_selected = 1;
+		}
+		if (net->ro._s_addr) {
+			mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._s_addr.sa, net->ro.ro_rt);
+			if (mtu > next_mtu) {
+				net->mtu = next_mtu;
+			}
+		}
+	}
+	/* restart the timer */
+	sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net);
+}
+
+void
+sctp_autoclose_timer(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
+    struct sctp_nets *net)
+{
+	struct timeval tn, *tim_touse;
+	struct sctp_association *asoc;
+	int ticks_gone_by;
+
+	(void)SCTP_GETTIME_TIMEVAL(&tn);
+	if (stcb->asoc.sctp_autoclose_ticks &&
+	    sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE)) {
+		/* Auto close is on */
+		asoc = &stcb->asoc;
+		/* pick the time to use */
+		if (asoc->time_last_rcvd.tv_sec >
+		    asoc->time_last_sent.tv_sec) {
+			tim_touse = &asoc->time_last_rcvd;
+		} else {
+			tim_touse = &asoc->time_last_sent;
+		}
+		/* Now has long enough transpired to autoclose? */
+		ticks_gone_by = SEC_TO_TICKS(tn.tv_sec - tim_touse->tv_sec);
+		if ((ticks_gone_by > 0) &&
+		    (ticks_gone_by >= (int)asoc->sctp_autoclose_ticks)) {
+			/*
+			 * autoclose time has hit, call the output routine,
+			 * which should do nothing just to be SURE we don't
+			 * have hanging data. We can then safely check the
+			 * queues and know that we are clear to send
+			 * shutdown
+			 */
+			sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_AUTOCLOSE_TMR, SCTP_SO_NOT_LOCKED);
+			/* Are we clean? */
+			if (TAILQ_EMPTY(&asoc->send_queue) &&
+			    TAILQ_EMPTY(&asoc->sent_queue)) {
+				/*
+				 * there is nothing queued to send, so I'm
+				 * done...
+				 */
+				if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) {
+					/* only send SHUTDOWN 1st time thru */
+					sctp_send_shutdown(stcb, stcb->asoc.primary_destination);
+					if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+					    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+						SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+					}
+					SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+					SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
+					    stcb->sctp_ep, stcb,
+					    asoc->primary_destination);
+					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+					    stcb->sctp_ep, stcb,
+					    asoc->primary_destination);
+				}
+			}
+		} else {
+			/*
+			 * No auto close at this time, reset t-o to check
+			 * later
+			 */
+			int tmp;
+
+			/* fool the timer startup to use the time left */
+			tmp = asoc->sctp_autoclose_ticks;
+			asoc->sctp_autoclose_ticks -= ticks_gone_by;
+			sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE, inp, stcb,
+			    net);
+			/* restore the real tick value */
+			asoc->sctp_autoclose_ticks = tmp;
+		}
+	}
+}
+
+void
+sctp_iterator_timer(struct sctp_iterator *it)
+{
+	int iteration_count = 0;
+	int inp_skip = 0;
+
+	/*
+	 * only one iterator can run at a time. This is the only way we can
+	 * cleanly pull ep's from underneath all the running interators when
+	 * a ep is freed.
+	 */
+	SCTP_ITERATOR_LOCK();
+	if (it->inp == NULL) {
+		/* iterator is complete */
+done_with_iterator:
+		SCTP_ITERATOR_UNLOCK();
+		SCTP_INP_INFO_WLOCK();
+		TAILQ_REMOVE(&sctppcbinfo.iteratorhead, it, sctp_nxt_itr);
+		/* stopping the callout is not needed, in theory */
+		SCTP_INP_INFO_WUNLOCK();
+		(void)SCTP_OS_TIMER_STOP(&it->tmr.timer);
+		if (it->function_atend != NULL) {
+			(*it->function_atend) (it->pointer, it->val);
+		}
+		SCTP_FREE(it, SCTP_M_ITER);
+		return;
+	}
+select_a_new_ep:
+	SCTP_INP_WLOCK(it->inp);
+	while (((it->pcb_flags) &&
+	    ((it->inp->sctp_flags & it->pcb_flags) != it->pcb_flags)) ||
+	    ((it->pcb_features) &&
+	    ((it->inp->sctp_features & it->pcb_features) != it->pcb_features))) {
+		/* endpoint flags or features don't match, so keep looking */
+		if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
+			SCTP_INP_WUNLOCK(it->inp);
+			goto done_with_iterator;
+		}
+		SCTP_INP_WUNLOCK(it->inp);
+		it->inp = LIST_NEXT(it->inp, sctp_list);
+		if (it->inp == NULL) {
+			goto done_with_iterator;
+		}
+		SCTP_INP_WLOCK(it->inp);
+	}
+	if ((it->inp->inp_starting_point_for_iterator != NULL) &&
+	    (it->inp->inp_starting_point_for_iterator != it)) {
+		SCTP_PRINTF("Iterator collision, waiting for one at %p\n",
+		    it->inp);
+		SCTP_INP_WUNLOCK(it->inp);
+		goto start_timer_return;
+	}
+	/* mark the current iterator on the endpoint */
+	it->inp->inp_starting_point_for_iterator = it;
+	SCTP_INP_WUNLOCK(it->inp);
+	SCTP_INP_RLOCK(it->inp);
+	/* now go through each assoc which is in the desired state */
+	if (it->done_current_ep == 0) {
+		if (it->function_inp != NULL)
+			inp_skip = (*it->function_inp) (it->inp, it->pointer, it->val);
+		it->done_current_ep = 1;
+	}
+	if (it->stcb == NULL) {
+		/* run the per instance function */
+		it->stcb = LIST_FIRST(&it->inp->sctp_asoc_list);
+	}
+	SCTP_INP_RUNLOCK(it->inp);
+	if ((inp_skip) || it->stcb == NULL) {
+		if (it->function_inp_end != NULL) {
+			inp_skip = (*it->function_inp_end) (it->inp,
+			    it->pointer,
+			    it->val);
+		}
+		goto no_stcb;
+	}
+	if ((it->stcb) &&
+	    (it->stcb->asoc.stcb_starting_point_for_iterator == it)) {
+		it->stcb->asoc.stcb_starting_point_for_iterator = NULL;
+	}
+	while (it->stcb) {
+		SCTP_TCB_LOCK(it->stcb);
+		if (it->asoc_state && ((it->stcb->asoc.state & it->asoc_state) != it->asoc_state)) {
+			/* not in the right state... keep looking */
+			SCTP_TCB_UNLOCK(it->stcb);
+			goto next_assoc;
+		}
+		/* mark the current iterator on the assoc */
+		it->stcb->asoc.stcb_starting_point_for_iterator = it;
+		/* see if we have limited out the iterator loop */
+		iteration_count++;
+		if (iteration_count > SCTP_ITERATOR_MAX_AT_ONCE) {
+	start_timer_return:
+			/* set a timer to continue this later */
+			if (it->stcb)
+				SCTP_TCB_UNLOCK(it->stcb);
+			sctp_timer_start(SCTP_TIMER_TYPE_ITERATOR,
+			    (struct sctp_inpcb *)it, NULL, NULL);
+			SCTP_ITERATOR_UNLOCK();
+			return;
+		}
+		/* run function on this one */
+		(*it->function_assoc) (it->inp, it->stcb, it->pointer, it->val);
+
+		/*
+		 * we lie here, it really needs to have its own type but
+		 * first I must verify that this won't effect things :-0
+		 */
+		if (it->no_chunk_output == 0)
+			sctp_chunk_output(it->inp, it->stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED);
+
+		SCTP_TCB_UNLOCK(it->stcb);
+next_assoc:
+		it->stcb = LIST_NEXT(it->stcb, sctp_tcblist);
+		if (it->stcb == NULL) {
+			if (it->function_inp_end != NULL) {
+				inp_skip = (*it->function_inp_end) (it->inp,
+				    it->pointer,
+				    it->val);
+			}
+		}
+	}
+no_stcb:
+	/* done with all assocs on this endpoint, move on to next endpoint */
+	it->done_current_ep = 0;
+	SCTP_INP_WLOCK(it->inp);
+	it->inp->inp_starting_point_for_iterator = NULL;
+	SCTP_INP_WUNLOCK(it->inp);
+	if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
+		it->inp = NULL;
+	} else {
+		SCTP_INP_INFO_RLOCK();
+		it->inp = LIST_NEXT(it->inp, sctp_list);
+		SCTP_INP_INFO_RUNLOCK();
+	}
+	if (it->inp == NULL) {
+		goto done_with_iterator;
+	}
+	goto select_a_new_ep;
+}
Index: ip_mroute.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_mroute.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/ip_mroute.c -L sys/netinet/ip_mroute.c -u -r1.1.1.2 -r1.2
--- sys/netinet/ip_mroute.c
+++ sys/netinet/ip_mroute.c
@@ -51,24 +51,25 @@
  * MROUTING Revision: 3.5
  * and PIM-SMv2 and PIM-DM support, advanced API support,
  * bandwidth metering and signaling
- *
- * $FreeBSD: src/sys/netinet/ip_mroute.c,v 1.111.2.2 2006/01/31 16:13:22 andre Exp $
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_mroute.c,v 1.138 2007/10/07 20:44:23 silby Exp $");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
 #include "opt_mac.h"
 #include "opt_mrouting.h"
 
-#ifdef PIM
 #define _PIM_VT 1
-#endif
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
-#include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
+#include <sys/priv.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
@@ -90,13 +91,20 @@
 #include <netinet/ip_encap.h>
 #include <netinet/ip_mroute.h>
 #include <netinet/ip_var.h>
-#ifdef PIM
+#include <netinet/ip_options.h>
 #include <netinet/pim.h>
 #include <netinet/pim_var.h>
-#endif
 #include <netinet/udp.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_mroute.h>
+#include <netinet6/ip6_var.h>
+#endif
 #include <machine/in_cksum.h>
 
+#include <security/mac/mac_framework.h>
+
 /*
  * Control debugging code for rsvp and multicast routing code.
  * Can only set them with the debugger.
@@ -123,6 +131,10 @@
  * to cover not only the specific data structure but also related data
  * structures.  It may be better to add more fine-grained locking later;
  * it's not clear how performance-critical this code is.
+ *
+ * XXX: This module could particularly benefit from being cleaned
+ *      up to use the <sys/queue.h> macros.
+ *
  */
 
 static struct mrtstat	mrtstat;
@@ -135,13 +147,18 @@
     &mfctable, sizeof(mfctable), "S,*mfc[MFCTBLSIZ]",
     "Multicast Forwarding Table (struct *mfc[MFCTBLSIZ], netinet/ip_mroute.h)");
 
+static struct mtx mrouter_mtx;
+#define	MROUTER_LOCK()		mtx_lock(&mrouter_mtx)
+#define	MROUTER_UNLOCK()	mtx_unlock(&mrouter_mtx)
+#define	MROUTER_LOCK_ASSERT()	mtx_assert(&mrouter_mtx, MA_OWNED)
+#define	MROUTER_LOCK_INIT()	\
+	mtx_init(&mrouter_mtx, "IPv4 multicast forwarding", NULL, MTX_DEF)
+#define	MROUTER_LOCK_DESTROY()	mtx_destroy(&mrouter_mtx)
+
 static struct mtx mfc_mtx;
 #define	MFC_LOCK()	mtx_lock(&mfc_mtx)
 #define	MFC_UNLOCK()	mtx_unlock(&mfc_mtx)
-#define	MFC_LOCK_ASSERT()	do {					\
-	mtx_assert(&mfc_mtx, MA_OWNED);					\
-	NET_ASSERT_GIANT();						\
-} while (0)
+#define	MFC_LOCK_ASSERT()	mtx_assert(&mfc_mtx, MA_OWNED)
 #define	MFC_LOCK_INIT()	mtx_init(&mfc_mtx, "mroute mfc table", NULL, MTX_DEF)
 #define	MFC_LOCK_DESTROY()	mtx_destroy(&mfc_mtx)
 
@@ -159,46 +176,14 @@
 
 static u_char		nexpire[MFCTBLSIZ];
 
+static eventhandler_tag if_detach_event_tag = NULL;
+
 static struct callout expire_upcalls_ch;
 
 #define		EXPIRE_TIMEOUT	(hz / 4)	/* 4x / second		*/
 #define		UPCALL_EXPIRE	6		/* number of timeouts	*/
 
-/*
- * Define the token bucket filter structures
- * tbftable -> each vif has one of these for storing info
- */
-
-static struct tbf tbftable[MAXVIFS];
-#define		TBF_REPROCESS	(hz / 100)	/* 100x / second */
-
-/*
- * 'Interfaces' associated with decapsulator (so we can tell
- * packets that went through it from ones that get reflected
- * by a broken gateway).  These interfaces are never linked into
- * the system ifnet list & no routes point to them.  I.e., packets
- * can't be sent this way.  They only exist as a placeholder for
- * multicast source verification.
- */
-static struct ifnet multicast_decap_if[MAXVIFS];
-
 #define ENCAP_TTL 64
-#define ENCAP_PROTO IPPROTO_IPIP	/* 4 */
-
-/* prototype IP hdr for encapsulated packets */
-static struct ip multicast_encap_iphdr = {
-#if BYTE_ORDER == LITTLE_ENDIAN
-	sizeof(struct ip) >> 2, IPVERSION,
-#else
-	IPVERSION, sizeof(struct ip) >> 2,
-#endif
-	0,				/* tos */
-	sizeof(struct ip),		/* total length */
-	0,				/* id */
-	0,				/* frag offset */
-	ENCAP_TTL, ENCAP_PROTO,
-	0,				/* checksum */
-};
 
 /*
  * Bandwidth meter variables and constants
@@ -222,12 +207,45 @@
 static struct callout bw_upcalls_ch;
 #define BW_UPCALLS_PERIOD (hz)		/* periodical flush of bw upcalls */
 
-#ifdef PIM
 static struct pimstat pimstat;
+
+SYSCTL_NODE(_net_inet, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM");
 SYSCTL_STRUCT(_net_inet_pim, PIMCTL_STATS, stats, CTLFLAG_RD,
     &pimstat, pimstat,
     "PIM Statistics (struct pimstat, netinet/pim_var.h)");
 
+static u_long	pim_squelch_wholepkt = 0;
+SYSCTL_ULONG(_net_inet_pim, OID_AUTO, squelch_wholepkt, CTLFLAG_RW,
+    &pim_squelch_wholepkt, 0,
+    "Disable IGMP_WHOLEPKT notifications if rendezvous point is unspecified");
+
+extern  struct domain inetdomain;
+struct protosw in_pim_protosw = {
+	.pr_type =		SOCK_RAW,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		IPPROTO_PIM,
+	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
+	.pr_input =		pim_input,
+	.pr_output =		(pr_output_t*)rip_output,
+	.pr_ctloutput =		rip_ctloutput,
+	.pr_usrreqs =		&rip_usrreqs
+};
+static const struct encaptab *pim_encap_cookie;
+
+#ifdef INET6
+/* ip6_mroute.c glue */
+extern struct in6_protosw in6_pim_protosw;
+static const struct encaptab *pim6_encap_cookie;
+
+extern int X_ip6_mrouter_set(struct socket *, struct sockopt *);
+extern int X_ip6_mrouter_get(struct socket *, struct sockopt *);
+extern int X_ip6_mrouter_done(void);
+extern int X_ip6_mforward(struct ip6_hdr *, struct ifnet *, struct mbuf *);
+extern int X_mrt6_ioctl(int, caddr_t);
+#endif
+
+static int pim_encapcheck(const struct mbuf *, int, int, void *);
+
 /*
  * Note: the PIM Register encapsulation adds the following in front of a
  * data packet:
@@ -272,25 +290,11 @@
 
 static struct ifnet multicast_register_if;
 static vifi_t reg_vif_num = VIFI_INVALID;
-#endif /* PIM */
 
 /*
  * Private variables.
  */
 static vifi_t	   numvifs;
-static const struct encaptab *encap_cookie;
-
-/*
- * one-back cache used by mroute_encapcheck to locate a tunnel's vif
- * given a datagram's src ip address.
- */
-static u_long last_encap_src;
-static struct vif *last_encap_vif;
-
-/*
- * Callout for queue processing.
- */
-static struct callout tbf_reprocess_ch;
 
 static u_long	X_ip_mcast_src(int vifi);
 static int	X_ip_mforward(struct ip *ip, struct ifnet *ifp,
@@ -303,8 +307,10 @@
 
 static int get_sg_cnt(struct sioc_sg_req *);
 static int get_vif_cnt(struct sioc_vif_req *);
+static void if_detached_event(void *arg __unused, struct ifnet *);
 static int ip_mrouter_init(struct socket *, int);
 static int add_vif(struct vifctl *);
+static int del_vif_locked(vifi_t);
 static int del_vif(vifi_t);
 static int add_mfc(struct mfcctl2 *);
 static int del_mfc(struct mfcctl2 *);
@@ -314,15 +320,7 @@
 static void expire_upcalls(void *);
 static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, vifi_t);
 static void phyint_send(struct ip *, struct vif *, struct mbuf *);
-static void encap_send(struct ip *, struct vif *, struct mbuf *);
-static void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long);
-static void tbf_queue(struct vif *, struct mbuf *);
-static void tbf_process_q(struct vif *);
-static void tbf_reprocess_q(void *);
-static int tbf_dq_sel(struct vif *, struct ip *);
-static void tbf_send_packet(struct vif *, struct mbuf *);
-static void tbf_update_tokens(struct vif *);
-static int priority(struct vif *, struct ip *);
+static void send_packet(struct vif *, struct mbuf *);
 
 /*
  * Bandwidth monitoring
@@ -340,7 +338,6 @@
 static void expire_bw_upcalls_send(void *);
 static void expire_bw_meter_process(void *);
 
-#ifdef PIM
 static int pim_register_send(struct ip *, struct vif *,
 		struct mbuf *, struct mfc *);
 static int pim_register_send_rp(struct ip *, struct vif *,
@@ -348,7 +345,6 @@
 static int pim_register_send_upcall(struct ip *, struct vif *,
 		struct mbuf *, struct mfc *);
 static struct mbuf *pim_register_prepare(struct ip *, struct mbuf *);
-#endif
 
 /*
  * whether or not special PIM assert processing is enabled.
@@ -378,7 +374,6 @@
 
 /*
  * Find a route for a given origin IP address and Multicast group address
- * Type of service parameter to be added in the future!!!
  * Statistics are updated by the caller if needed
  * (mrtstat.mrts_mfc_lookups and mrtstat.mrts_mfc_misses)
  */
@@ -566,7 +561,7 @@
      * Typically, only root can create the raw socket in order to execute
      * this ioctl method, however the request might be coming from a prison
      */
-    error = suser(curthread);
+    error = priv_check(curthread, PRIV_NETINET_MROUTE);
     if (error)
 	return (error);
     switch (cmd) {
@@ -639,18 +634,74 @@
     pim_assert = 0;
     mrt_api_config = 0;
 
-    callout_init(&expire_upcalls_ch, NET_CALLOUT_MPSAFE);
+    callout_init(&expire_upcalls_ch, CALLOUT_MPSAFE);
 
     bw_upcalls_n = 0;
     bzero((caddr_t)bw_meter_timers, sizeof(bw_meter_timers));
-    callout_init(&bw_upcalls_ch, NET_CALLOUT_MPSAFE);
-    callout_init(&bw_meter_ch, NET_CALLOUT_MPSAFE);
-
-    callout_init(&tbf_reprocess_ch, NET_CALLOUT_MPSAFE);
+    callout_init(&bw_upcalls_ch, CALLOUT_MPSAFE);
+    callout_init(&bw_meter_ch, CALLOUT_MPSAFE);
 }
 
-static struct mtx mrouter_mtx;		/* used to synch init/done work */
+static void
+if_detached_event(void *arg __unused, struct ifnet *ifp)
+{
+    vifi_t vifi;
+    int i;
+    struct mfc *mfc;
+    struct mfc *nmfc;
+    struct mfc **ppmfc;	/* Pointer to previous node's next-pointer */
+    struct rtdetq *pq;
+    struct rtdetq *npq;
+
+    MROUTER_LOCK();
+    if (ip_mrouter == NULL) {
+	MROUTER_UNLOCK();
+    }
 
+    /*
+     * Tear down multicast forwarder state associated with this ifnet.
+     * 1. Walk the vif list, matching vifs against this ifnet.
+     * 2. Walk the multicast forwarding cache (mfc) looking for
+     *    inner matches with this vif's index.
+     * 3. Free any pending mbufs for this mfc.
+     * 4. Free the associated mfc entry and state associated with this vif.
+     *    Be very careful about unlinking from a singly-linked list whose
+     *    "head node" is a pointer in a simple array.
+     * 5. Free vif state. This should disable ALLMULTI on the interface.
+     */
+    VIF_LOCK();
+    MFC_LOCK();
+    for (vifi = 0; vifi < numvifs; vifi++) {
+	if (viftable[vifi].v_ifp != ifp)
+		continue;
+	for (i = 0; i < MFCTBLSIZ; i++) {
+	    ppmfc = &mfctable[i];
+	    for (mfc = mfctable[i]; mfc != NULL; ) {
+		nmfc = mfc->mfc_next;
+		if (mfc->mfc_parent == vifi) {
+		    for (pq = mfc->mfc_stall; pq != NULL; ) {
+			npq = pq->next;
+			m_freem(pq->m);
+			free(pq, M_MRTABLE);
+			pq = npq;
+		    }
+		    free_bw_list(mfc->mfc_bw_meter);
+		    free(mfc, M_MRTABLE);
+		    *ppmfc = nmfc;
+		} else {
+		    ppmfc = &mfc->mfc_next;
+		}
+		mfc = nmfc;
+	    }
+	}
+	del_vif_locked(vifi);
+    }
+    MFC_UNLOCK();
+    VIF_UNLOCK();
+
+    MROUTER_UNLOCK();
+}
+                        
 /*
  * Enable multicast routing
  */
@@ -667,13 +718,20 @@
     if (version != 1)
 	return ENOPROTOOPT;
 
-    mtx_lock(&mrouter_mtx);
+    MROUTER_LOCK();
 
     if (ip_mrouter != NULL) {
-	mtx_unlock(&mrouter_mtx);
+	MROUTER_UNLOCK();
 	return EADDRINUSE;
     }
 
+    if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, 
+        if_detached_event, NULL, EVENTHANDLER_PRI_ANY);
+    if (if_detach_event_tag == NULL) {
+	MROUTER_UNLOCK();
+	return (ENOMEM);
+    }
+
     callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls, NULL);
 
     callout_reset(&bw_upcalls_ch, BW_UPCALLS_PERIOD,
@@ -682,7 +740,7 @@
 
     ip_mrouter = so;
 
-    mtx_unlock(&mrouter_mtx);
+    MROUTER_UNLOCK();
 
     if (mrtdebug)
 	log(LOG_DEBUG, "ip_mrouter_init\n");
@@ -703,10 +761,10 @@
     struct mfc *rt;
     struct rtdetq *rte;
 
-    mtx_lock(&mrouter_mtx);
+    MROUTER_LOCK();
 
     if (ip_mrouter == NULL) {
-	mtx_unlock(&mrouter_mtx);
+	MROUTER_UNLOCK();
 	return EINVAL;
     }
 
@@ -717,16 +775,6 @@
     mrt_api_config = 0;
 
     VIF_LOCK();
-    if (encap_cookie) {
-	const struct encaptab *c = encap_cookie;
-	encap_cookie = NULL;
-	encap_detach(c);
-    }
-    VIF_UNLOCK();
-
-    callout_stop(&tbf_reprocess_ch);
-
-    VIF_LOCK();
     /*
      * For each phyint in use, disable promiscuous reception of all IP
      * multicasts.
@@ -743,11 +791,11 @@
 	    if_allmulti(ifp, 0);
 	}
     }
-    bzero((caddr_t)tbftable, sizeof(tbftable));
     bzero((caddr_t)viftable, sizeof(viftable));
     numvifs = 0;
     pim_assert = 0;
     VIF_UNLOCK();
+    EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag);
 
     /*
      * Free all multicast forwarding cache entries.
@@ -779,16 +827,9 @@
     bzero(bw_meter_timers, sizeof(bw_meter_timers));
     MFC_UNLOCK();
 
-    /*
-     * Reset de-encapsulation cache
-     */
-    last_encap_src = INADDR_ANY;
-    last_encap_vif = NULL;
-#ifdef PIM
     reg_vif_num = VIFI_INVALID;
-#endif
 
-    mtx_unlock(&mrouter_mtx);
+    MROUTER_UNLOCK();
 
     if (mrtdebug)
 	log(LOG_DEBUG, "ip_mrouter_done\n");
@@ -847,90 +888,6 @@
 }
 
 /*
- * Decide if a packet is from a tunnelled peer.
- * Return 0 if not, 64 if so.  XXX yuck.. 64 ???
- */
-static int
-mroute_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
-{
-    struct ip *ip = mtod(m, struct ip *);
-    int hlen = ip->ip_hl << 2;
-
-    /*
-     * don't claim the packet if it's not to a multicast destination or if
-     * we don't have an encapsulating tunnel with the source.
-     * Note:  This code assumes that the remote site IP address
-     * uniquely identifies the tunnel (i.e., that this site has
-     * at most one tunnel with the remote site).
-     */
-    if (!IN_MULTICAST(ntohl(((struct ip *)((char *)ip+hlen))->ip_dst.s_addr)))
-	return 0;
-    if (ip->ip_src.s_addr != last_encap_src) {
-	struct vif *vifp = viftable;
-	struct vif *vife = vifp + numvifs;
-
-	last_encap_src = ip->ip_src.s_addr;
-	last_encap_vif = NULL;
-	for ( ; vifp < vife; ++vifp)
-	    if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) {
-		if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT)) == VIFF_TUNNEL)
-		    last_encap_vif = vifp;
-		break;
-	    }
-    }
-    if (last_encap_vif == NULL) {
-	last_encap_src = INADDR_ANY;
-	return 0;
-    }
-    return 64;
-}
-
-/*
- * De-encapsulate a packet and feed it back through ip input (this
- * routine is called whenever IP gets a packet that mroute_encap_func()
- * claimed).
- */
-static void
-mroute_encap_input(struct mbuf *m, int off)
-{
-    struct ip *ip = mtod(m, struct ip *);
-    int hlen = ip->ip_hl << 2;
-
-    if (hlen > sizeof(struct ip))
-	ip_stripoptions(m, (struct mbuf *) 0);
-    m->m_data += sizeof(struct ip);
-    m->m_len -= sizeof(struct ip);
-    m->m_pkthdr.len -= sizeof(struct ip);
-
-    m->m_pkthdr.rcvif = last_encap_vif->v_ifp;
-
-    netisr_queue(NETISR_IP, m);		/* mbuf is free'd on failure. */
-    /*
-     * normally we would need a "schednetisr(NETISR_IP)"
-     * here but we were called by ip_input and it is going
-     * to loop back & try to dequeue the packet we just
-     * queued as soon as we return so we avoid the
-     * unnecessary software interrrupt.
-     *
-     * XXX
-     * This no longer holds - we may have direct-dispatched the packet,
-     * or there may be a queue processing limit.
-     */
-}
-
-extern struct domain inetdomain;
-static struct protosw mroute_encap_protosw =
-{
-	.pr_type =		SOCK_RAW,
-	.pr_domain =		&inetdomain,
-	.pr_protocol =		IPPROTO_IPV4,
-	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		mroute_encap_input,
-	.pr_ctloutput =		rip_ctloutput,
-	.pr_usrreqs =		&rip_usrreqs
-};
-
-/*
  * Add a vif to the vif table
  */
 static int
@@ -941,13 +898,18 @@
     struct ifaddr *ifa;
     struct ifnet *ifp;
     int error;
-    struct tbf *v_tbf = tbftable + vifcp->vifc_vifi;
 
     VIF_LOCK();
     if (vifcp->vifc_vifi >= MAXVIFS) {
 	VIF_UNLOCK();
 	return EINVAL;
     }
+    /* rate limiting is no longer supported by this code */
+    if (vifcp->vifc_rate_limit != 0) {
+	log(LOG_ERR, "rate limiting is no longer supported\n");
+	VIF_UNLOCK();
+	return EINVAL;
+    }
     if (vifp->v_lcl_addr.s_addr != INADDR_ANY) {
 	VIF_UNLOCK();
 	return EADDRINUSE;
@@ -958,7 +920,6 @@
     }
 
     /* Find the interface with an address in AF_INET family */
-#ifdef PIM
     if (vifcp->vifc_flags & VIFF_REGISTER) {
 	/*
 	 * XXX: Because VIFF_REGISTER does not really need a valid
@@ -966,9 +927,7 @@
 	 * check its address.
 	 */
 	ifp = NULL;
-    } else
-#endif
-    {
+    } else {
 	sin.sin_addr = vifcp->vifc_lcl_addr;
 	ifa = ifa_ifwithaddr((struct sockaddr *)&sin);
 	if (ifa == NULL) {
@@ -978,43 +937,10 @@
 	ifp = ifa->ifa_ifp;
     }
 
-    if (vifcp->vifc_flags & VIFF_TUNNEL) {
-	if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) {
-	    /*
-	     * An encapsulating tunnel is wanted.  Tell
-	     * mroute_encap_input() to start paying attention
-	     * to encapsulated packets.
-	     */
-	    if (encap_cookie == NULL) {
-		int i;
-
-		encap_cookie = encap_attach_func(AF_INET, IPPROTO_IPV4,
-				mroute_encapcheck,
-				(struct protosw *)&mroute_encap_protosw, NULL);
-
-		if (encap_cookie == NULL) {
-		    printf("ip_mroute: unable to attach encap\n");
-		    VIF_UNLOCK();
-		    return EIO;	/* XXX */
-		}
-		for (i = 0; i < MAXVIFS; ++i) {
-		    if_initname(&multicast_decap_if[i], "mdecap", i);
-		}
-	    }
-	    /*
-	     * Set interface to fake encapsulator interface
-	     */
-	    ifp = &multicast_decap_if[vifcp->vifc_vifi];
-	    /*
-	     * Prepare cached route entry
-	     */
-	    bzero(&vifp->v_route, sizeof(vifp->v_route));
-	} else {
-	    log(LOG_ERR, "source routed tunnels not supported\n");
-	    VIF_UNLOCK();
-	    return EOPNOTSUPP;
-	}
-#ifdef PIM
+    if ((vifcp->vifc_flags & VIFF_TUNNEL) != 0) {
+	log(LOG_ERR, "tunnels are no longer supported\n");
+	VIF_UNLOCK();
+	return EOPNOTSUPP;
     } else if (vifcp->vifc_flags & VIFF_REGISTER) {
 	ifp = &multicast_register_if;
 	if (mrtdebug)
@@ -1023,10 +949,8 @@
 	if (reg_vif_num == VIFI_INVALID) {
 	    if_initname(&multicast_register_if, "register_vif", 0);
 	    multicast_register_if.if_flags = IFF_LOOPBACK;
-	    bzero(&vifp->v_route, sizeof(vifp->v_route));
 	    reg_vif_num = vifcp->vifc_vifi;
 	}
-#endif
     } else {		/* Make sure the interface supports multicast */
 	if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 	    VIF_UNLOCK();
@@ -1041,21 +965,11 @@
 	}
     }
 
-    /* define parameters for the tbf structure */
-    vifp->v_tbf = v_tbf;
-    GET_TIME(vifp->v_tbf->tbf_last_pkt_t);
-    vifp->v_tbf->tbf_n_tok = 0;
-    vifp->v_tbf->tbf_q_len = 0;
-    vifp->v_tbf->tbf_max_q_len = MAXQSIZE;
-    vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL;
-
     vifp->v_flags     = vifcp->vifc_flags;
     vifp->v_threshold = vifcp->vifc_threshold;
     vifp->v_lcl_addr  = vifcp->vifc_lcl_addr;
     vifp->v_rmt_addr  = vifcp->vifc_rmt_addr;
     vifp->v_ifp       = ifp;
-    /* scaling up here allows division by 1024 in critical code */
-    vifp->v_rate_limit= vifcp->vifc_rate_limit * 1024 / 1000;
     vifp->v_rsvp_on   = 0;
     vifp->v_rsvpd     = NULL;
     /* initialize per vif pkt counters */
@@ -1063,6 +977,7 @@
     vifp->v_pkt_out   = 0;
     vifp->v_bytes_in  = 0;
     vifp->v_bytes_out = 0;
+    bzero(&vifp->v_route, sizeof(vifp->v_route));
 
     /* Adjust numvifs up if the vifi is higher than numvifs */
     if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1;
@@ -1070,13 +985,12 @@
     VIF_UNLOCK();
 
     if (mrtdebug)
-	log(LOG_DEBUG, "add_vif #%d, lcladdr %lx, %s %lx, thresh %x, rate %d\n",
+	log(LOG_DEBUG, "add_vif #%d, lcladdr %lx, %s %lx, thresh %x\n",
 	    vifcp->vifc_vifi,
 	    (u_long)ntohl(vifcp->vifc_lcl_addr.s_addr),
 	    (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
 	    (u_long)ntohl(vifcp->vifc_rmt_addr.s_addr),
-	    vifcp->vifc_threshold,
-	    vifcp->vifc_rate_limit);
+	    vifcp->vifc_threshold);
 
     return 0;
 }
@@ -1085,46 +999,26 @@
  * Delete a vif from the vif table
  */
 static int
-del_vif(vifi_t vifi)
+del_vif_locked(vifi_t vifi)
 {
     struct vif *vifp;
 
-    VIF_LOCK();
+    VIF_LOCK_ASSERT();
 
     if (vifi >= numvifs) {
-	VIF_UNLOCK();
 	return EINVAL;
     }
     vifp = &viftable[vifi];
     if (vifp->v_lcl_addr.s_addr == INADDR_ANY) {
-	VIF_UNLOCK();
 	return EADDRNOTAVAIL;
     }
 
     if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER)))
 	if_allmulti(vifp->v_ifp, 0);
 
-    if (vifp == last_encap_vif) {
-	last_encap_vif = NULL;
-	last_encap_src = INADDR_ANY;
-    }
-
-    /*
-     * Free packets queued at the interface
-     */
-    while (vifp->v_tbf->tbf_q) {
-	struct mbuf *m = vifp->v_tbf->tbf_q;
-
-	vifp->v_tbf->tbf_q = m->m_act;
-	m_freem(m);
-    }
-
-#ifdef PIM
     if (vifp->v_flags & VIFF_REGISTER)
 	reg_vif_num = VIFI_INVALID;
-#endif
 
-    bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf)));
     bzero((caddr_t)vifp, sizeof (*vifp));
 
     if (mrtdebug)
@@ -1136,9 +1030,19 @@
 	    break;
     numvifs = vifi;
 
+    return 0;
+}
+
+static int
+del_vif(vifi_t vifi)
+{
+    int cc;
+
+    VIF_LOCK();
+    cc = del_vif_locked(vifi);
     VIF_UNLOCK();
 
-    return 0;
+    return cc;
 }
 
 /*
@@ -1346,7 +1250,7 @@
 }
 
 /*
- * Send a message to mrouted on the multicast routing socket
+ * Send a message to the routing daemon on the multicast routing socket
  */
 static int
 socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src)
@@ -1402,8 +1306,8 @@
 	 * Source-route tunnels are no longer supported.
 	 */
 	static int last_log;
-	if (last_log != time_second) {
-	    last_log = time_second;
+	if (last_log != time_uptime) {
+	    last_log = time_uptime;
 	    log(LOG_ERR,
 		"ip_mforward: received source-routed packet from %lx\n",
 		(u_long)ntohl(ip->ip_src.s_addr));
@@ -1414,7 +1318,7 @@
     VIF_LOCK();
     MFC_LOCK();
     if (imo && ((vifi = imo->imo_multicast_vif) < numvifs)) {
-	if (ip->ip_ttl < 255)
+	if (ip->ip_ttl < MAXTTL)
 	    ip->ip_ttl++;	/* compensate for -1 in *_send routines */
 	if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
 	    struct vif *vifp = viftable + vifi;
@@ -1441,7 +1345,7 @@
      * Don't forward a packet with time-to-live of zero or one,
      * or a packet destined to a local-only group.
      */
-    if (ip->ip_ttl <= 1 || ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) {
+    if (ip->ip_ttl <= 1 || IN_LOCAL_GROUP(ntohl(ip->ip_dst.s_addr))) {
 	MFC_UNLOCK();
 	VIF_UNLOCK();
 	return 0;
@@ -1690,17 +1594,6 @@
     int plen = ip->ip_len;
 
     VIF_LOCK_ASSERT();
-/*
- * Macro to send packet on vif.  Since RSVP packets don't get counted on
- * input, they shouldn't get counted on output, so statistics keeping is
- * separate.
- */
-#define MC_SEND(ip,vifp,m) {				\
-		if ((vifp)->v_flags & VIFF_TUNNEL)	\
-		    encap_send((ip), (vifp), (m));	\
-		else					\
-		    phyint_send((ip), (vifp), (m));	\
-}
 
     /*
      * If xmt_vif is not -1, send on only the requested vif.
@@ -1708,12 +1601,10 @@
      * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.)
      */
     if (xmt_vif < numvifs) {
-#ifdef PIM
 	if (viftable[xmt_vif].v_flags & VIFF_REGISTER)
-	    pim_register_send(ip, viftable + xmt_vif, m, rt);
+		pim_register_send(ip, viftable + xmt_vif, m, rt);
 	else
-#endif
-	MC_SEND(ip, viftable + xmt_vif, m);
+		phyint_send(ip, viftable + xmt_vif, m);
 	return 1;
     }
 
@@ -1740,10 +1631,8 @@
 	    struct timeval now;
 	    u_long delta;
 
-#ifdef PIM
 	    if (ifp == &multicast_register_if)
 		pimstat.pims_rcv_registers_wrongiif++;
-#endif
 
 	    /* Get vifi for the incoming packet */
 	    for (vifi=0; vifi < numvifs && viftable[vifi].v_ifp != ifp; vifi++)
@@ -1811,12 +1700,10 @@
 	if ((rt->mfc_ttls[vifi] > 0) && (ip->ip_ttl > rt->mfc_ttls[vifi])) {
 	    viftable[vifi].v_pkt_out++;
 	    viftable[vifi].v_bytes_out += plen;
-#ifdef PIM
 	    if (viftable[vifi].v_flags & VIFF_REGISTER)
 		pim_register_send(ip, viftable + vifi, m, rt);
 	    else
-#endif
-	    MC_SEND(ip, viftable+vifi, m);
+		phyint_send(ip, viftable + vifi, m);
 	}
 
     /*
@@ -1877,251 +1764,25 @@
     if (mb_copy == NULL)
 	return;
 
-    if (vifp->v_rate_limit == 0)
-	tbf_send_packet(vifp, mb_copy);
-    else
-	tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len);
+    send_packet(vifp, mb_copy);
 }
 
 static void
-encap_send(struct ip *ip, struct vif *vifp, struct mbuf *m)
+send_packet(struct vif *vifp, struct mbuf *m)
 {
-    struct mbuf *mb_copy;
-    struct ip *ip_copy;
-    int i, len = ip->ip_len;
-
-    VIF_LOCK_ASSERT();
-
-    /* Take care of delayed checksums */
-    if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
-	in_delayed_cksum(m);
-	m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
-    }
-
-    /*
-     * copy the old packet & pullup its IP header into the
-     * new mbuf so we can modify it.  Try to fill the new
-     * mbuf since if we don't the ethernet driver will.
-     */
-    MGETHDR(mb_copy, M_DONTWAIT, MT_HEADER);
-    if (mb_copy == NULL)
-	return;
-#ifdef MAC
-    mac_create_mbuf_multicast_encap(m, vifp->v_ifp, mb_copy);
-#endif
-    mb_copy->m_data += max_linkhdr;
-    mb_copy->m_len = sizeof(multicast_encap_iphdr);
-
-    if ((mb_copy->m_next = m_copypacket(m, M_DONTWAIT)) == NULL) {
-	m_freem(mb_copy);
-	return;
-    }
-    i = MHLEN - M_LEADINGSPACE(mb_copy);
-    if (i > len)
-	i = len;
-    mb_copy = m_pullup(mb_copy, i);
-    if (mb_copy == NULL)
-	return;
-    mb_copy->m_pkthdr.len = len + sizeof(multicast_encap_iphdr);
-
-    /*
-     * fill in the encapsulating IP header.
-     */
-    ip_copy = mtod(mb_copy, struct ip *);
-    *ip_copy = multicast_encap_iphdr;
-    ip_copy->ip_id = ip_newid();
-    ip_copy->ip_len += len;
-    ip_copy->ip_src = vifp->v_lcl_addr;
-    ip_copy->ip_dst = vifp->v_rmt_addr;
-
-    /*
-     * turn the encapsulated IP header back into a valid one.
-     */
-    ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr));
-    --ip->ip_ttl;
-    ip->ip_len = htons(ip->ip_len);
-    ip->ip_off = htons(ip->ip_off);
-    ip->ip_sum = 0;
-    mb_copy->m_data += sizeof(multicast_encap_iphdr);
-    ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
-    mb_copy->m_data -= sizeof(multicast_encap_iphdr);
-
-    if (vifp->v_rate_limit == 0)
-	tbf_send_packet(vifp, mb_copy);
-    else
-	tbf_control(vifp, mb_copy, ip, ip_copy->ip_len);
-}
-
-/*
- * Token bucket filter module
- */
-
-static void
-tbf_control(struct vif *vifp, struct mbuf *m, struct ip *ip, u_long p_len)
-{
-    struct tbf *t = vifp->v_tbf;
-
-    VIF_LOCK_ASSERT();
-
-    if (p_len > MAX_BKT_SIZE) {		/* drop if packet is too large */
-	mrtstat.mrts_pkt2large++;
-	m_freem(m);
-	return;
-    }
-
-    tbf_update_tokens(vifp);
-
-    if (t->tbf_q_len == 0) {		/* queue empty...		*/
-	if (p_len <= t->tbf_n_tok) {	/* send packet if enough tokens */
-	    t->tbf_n_tok -= p_len;
-	    tbf_send_packet(vifp, m);
-	} else {			/* no, queue packet and try later */
-	    tbf_queue(vifp, m);
-	    callout_reset(&tbf_reprocess_ch, TBF_REPROCESS,
-		tbf_reprocess_q, vifp);
-	}
-    } else if (t->tbf_q_len < t->tbf_max_q_len) {
-	/* finite queue length, so queue pkts and process queue */
-	tbf_queue(vifp, m);
-	tbf_process_q(vifp);
-    } else {
-	/* queue full, try to dq and queue and process */
-	if (!tbf_dq_sel(vifp, ip)) {
-	    mrtstat.mrts_q_overflow++;
-	    m_freem(m);
-	} else {
-	    tbf_queue(vifp, m);
-	    tbf_process_q(vifp);
-	}
-    }
-}
-
-/*
- * adds a packet to the queue at the interface
- */
-static void
-tbf_queue(struct vif *vifp, struct mbuf *m)
-{
-    struct tbf *t = vifp->v_tbf;
-
-    VIF_LOCK_ASSERT();
-
-    if (t->tbf_t == NULL)	/* Queue was empty */
-	t->tbf_q = m;
-    else			/* Insert at tail */
-	t->tbf_t->m_act = m;
-
-    t->tbf_t = m;		/* Set new tail pointer */
-
-#ifdef DIAGNOSTIC
-    /* Make sure we didn't get fed a bogus mbuf */
-    if (m->m_act)
-	panic("tbf_queue: m_act");
-#endif
-    m->m_act = NULL;
-
-    t->tbf_q_len++;
-}
-
-/*
- * processes the queue at the interface
- */
-static void
-tbf_process_q(struct vif *vifp)
-{
-    struct tbf *t = vifp->v_tbf;
-
-    VIF_LOCK_ASSERT();
-
-    /* loop through the queue at the interface and send as many packets
-     * as possible
-     */
-    while (t->tbf_q_len > 0) {
-	struct mbuf *m = t->tbf_q;
-	int len = mtod(m, struct ip *)->ip_len;
-
-	/* determine if the packet can be sent */
-	if (len > t->tbf_n_tok)	/* not enough tokens, we are done */
-	    break;
-	/* ok, reduce no of tokens, dequeue and send the packet. */
-	t->tbf_n_tok -= len;
-
-	t->tbf_q = m->m_act;
-	if (--t->tbf_q_len == 0)
-	    t->tbf_t = NULL;
-
-	m->m_act = NULL;
-	tbf_send_packet(vifp, m);
-    }
-}
-
-static void
-tbf_reprocess_q(void *xvifp)
-{
-    struct vif *vifp = xvifp;
-
-    if (ip_mrouter == NULL)
-	return;
-    VIF_LOCK();
-    tbf_update_tokens(vifp);
-    tbf_process_q(vifp);
-    if (vifp->v_tbf->tbf_q_len)
-	callout_reset(&tbf_reprocess_ch, TBF_REPROCESS, tbf_reprocess_q, vifp);
-    VIF_UNLOCK();
-}
-
-/* function that will selectively discard a member of the queue
- * based on the precedence value and the priority
- */
-static int
-tbf_dq_sel(struct vif *vifp, struct ip *ip)
-{
-    u_int p;
-    struct mbuf *m, *last;
-    struct mbuf **np;
-    struct tbf *t = vifp->v_tbf;
-
-    VIF_LOCK_ASSERT();
-
-    p = priority(vifp, ip);
-
-    np = &t->tbf_q;
-    last = NULL;
-    while ((m = *np) != NULL) {
-	if (p > priority(vifp, mtod(m, struct ip *))) {
-	    *np = m->m_act;
-	    /* If we're removing the last packet, fix the tail pointer */
-	    if (m == t->tbf_t)
-		t->tbf_t = last;
-	    m_freem(m);
-	    /* It's impossible for the queue to be empty, but check anyways. */
-	    if (--t->tbf_q_len == 0)
-		t->tbf_t = NULL;
-	    mrtstat.mrts_drop_sel++;
-	    return 1;
-	}
-	np = &m->m_act;
-	last = m;
-    }
-    return 0;
-}
-
-static void
-tbf_send_packet(struct vif *vifp, struct mbuf *m)
-{
-    VIF_LOCK_ASSERT();
-
-    if (vifp->v_flags & VIFF_TUNNEL)	/* If tunnel options */
-	ip_output(m, NULL, &vifp->v_route, IP_FORWARDING, NULL, NULL);
-    else {
 	struct ip_moptions imo;
+	struct in_multi *imm[2];
 	int error;
-	static struct route ro; /* XXX check this */
+
+	VIF_LOCK_ASSERT();
 
 	imo.imo_multicast_ifp  = vifp->v_ifp;
 	imo.imo_multicast_ttl  = mtod(m, struct ip *)->ip_ttl - 1;
 	imo.imo_multicast_loop = 1;
 	imo.imo_multicast_vif  = -1;
+	imo.imo_num_memberships = 0;
+	imo.imo_max_memberships = 2;
+	imo.imo_membership  = &imm[0];
 
 	/*
 	 * Re-entrancy should not be a problem here, because
@@ -2129,84 +1790,13 @@
 	 * should get rejected because they appear to come from
 	 * the loopback interface, thus preventing looping.
 	 */
-	error = ip_output(m, NULL, &ro, IP_FORWARDING, &imo, NULL);
-
-	if (mrtdebug & DEBUG_XMIT)
-	    log(LOG_DEBUG, "phyint_send on vif %d err %d\n",
-		(int)(vifp - viftable), error);
-    }
-}
-
-/* determine the current time and then
- * the elapsed time (between the last time and time now)
- * in milliseconds & update the no. of tokens in the bucket
- */
-static void
-tbf_update_tokens(struct vif *vifp)
-{
-    struct timeval tp;
-    u_long tm;
-    struct tbf *t = vifp->v_tbf;
-
-    VIF_LOCK_ASSERT();
-
-    GET_TIME(tp);
-
-    TV_DELTA(tp, t->tbf_last_pkt_t, tm);
-
-    /*
-     * This formula is actually
-     * "time in seconds" * "bytes/second".
-     *
-     * (tm / 1000000) * (v_rate_limit * 1000 * (1000/1024) / 8)
-     *
-     * The (1000/1024) was introduced in add_vif to optimize
-     * this divide into a shift.
-     */
-    t->tbf_n_tok += tm * vifp->v_rate_limit / 1024 / 8;
-    t->tbf_last_pkt_t = tp;
-
-    if (t->tbf_n_tok > MAX_BKT_SIZE)
-	t->tbf_n_tok = MAX_BKT_SIZE;
-}
-
-static int
-priority(struct vif *vifp, struct ip *ip)
-{
-    int prio = 50; /* the lowest priority -- default case */
-
-    /* temporary hack; may add general packet classifier some day */
-
-    /*
-     * The UDP port space is divided up into four priority ranges:
-     * [0, 16384)     : unclassified - lowest priority
-     * [16384, 32768) : audio - highest priority
-     * [32768, 49152) : whiteboard - medium priority
-     * [49152, 65536) : video - low priority
-     *
-     * Everything else gets lowest priority.
-     */
-    if (ip->ip_p == IPPROTO_UDP) {
-	struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2));
-	switch (ntohs(udp->uh_dport) & 0xc000) {
-	case 0x4000:
-	    prio = 70;
-	    break;
-	case 0x8000:
-	    prio = 60;
-	    break;
-	case 0xc000:
-	    prio = 55;
-	    break;
+	error = ip_output(m, NULL, &vifp->v_route, IP_FORWARDING, &imo, NULL);
+	if (mrtdebug & DEBUG_XMIT) {
+	    log(LOG_DEBUG, "phyint_send on vif %td err %d\n",
+		vifp - viftable, error);
 	}
-    }
-    return prio;
 }
 
-/*
- * End of token bucket filter modifications
- */
-
 static int
 X_ip_rsvp_vif(struct socket *so, struct sockopt *sopt)
 {
@@ -2718,7 +2308,7 @@
      * Allocate a new mbuf, initialize it with the header and
      * the payload for the pending calls.
      */
-    MGETHDR(m, M_DONTWAIT, MT_HEADER);
+    MGETHDR(m, M_DONTWAIT, MT_DATA);
     if (m == NULL) {
 	log(LOG_WARNING, "bw_upcalls_send: cannot allocate mbuf\n");
 	return;
@@ -2950,20 +2540,27 @@
  * End of bandwidth monitoring code
  */
 
-#ifdef PIM
 /*
  * Send the packet up to the user daemon, or eventually do kernel encapsulation
  *
  */
 static int
-pim_register_send(struct ip *ip, struct vif *vifp,
-	struct mbuf *m, struct mfc *rt)
+pim_register_send(struct ip *ip, struct vif *vifp, struct mbuf *m,
+    struct mfc *rt)
 {
     struct mbuf *mb_copy, *mm;
 
     if (mrtdebug & DEBUG_PIM)
 	log(LOG_DEBUG, "pim_register_send: ");
 
+    /*
+     * Do not send IGMP_WHOLEPKT notifications to userland, if the
+     * rendezvous point was unspecified, and we were told not to.
+     */
+    if (pim_squelch_wholepkt != 0 && (mrt_api_config & MRT_MFC_RP) &&
+	(rt->mfc_rp.s_addr == INADDR_ANY))
+	return 0;
+
     mb_copy = pim_register_prepare(ip, m);
     if (mb_copy == NULL)
 	return ENOBUFS;
@@ -3046,7 +2643,7 @@
  */
 static int
 pim_register_send_upcall(struct ip *ip, struct vif *vifp,
-	struct mbuf *mb_copy, struct mfc *rt)
+    struct mbuf *mb_copy, struct mfc *rt)
 {
     struct mbuf *mb_first;
     int len = ntohs(ip->ip_len);
@@ -3058,7 +2655,7 @@
     /*
      * Add a new mbuf with an upcall header
      */
-    MGETHDR(mb_first, M_DONTWAIT, MT_HEADER);
+    MGETHDR(mb_first, M_DONTWAIT, MT_DATA);
     if (mb_first == NULL) {
 	m_freem(mb_copy);
 	return ENOBUFS;
@@ -3099,8 +2696,8 @@
  * Encapsulate the data packet in PIM Register message and send it to the RP.
  */
 static int
-pim_register_send_rp(struct ip *ip, struct vif *vifp,
-	struct mbuf *mb_copy, struct mfc *rt)
+pim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy,
+    struct mfc *rt)
 {
     struct mbuf *mb_first;
     struct ip *ip_outer;
@@ -3118,7 +2715,7 @@
     /*
      * Add a new mbuf with the encapsulating header
      */
-    MGETHDR(mb_first, M_DONTWAIT, MT_HEADER);
+    MGETHDR(mb_first, M_DONTWAIT, MT_DATA);
     if (mb_first == NULL) {
 	m_freem(mb_copy);
 	return ENOBUFS;
@@ -3156,10 +2753,7 @@
     pimhdr->pim.pim_cksum = in_cksum(mb_first, sizeof(pim_encap_pimhdr));
     mb_first->m_data -= sizeof(pim_encap_iphdr);
 
-    if (vifp->v_rate_limit == 0)
-	tbf_send_packet(vifp, mb_first);
-    else
-	tbf_control(vifp, mb_first, ip, ip_outer->ip_len);
+    send_packet(vifp, mb_first);
 
     /* Keep statistics */
     pimstat.pims_snd_registers_msgs++;
@@ -3169,6 +2763,24 @@
 }
 
 /*
+ * pim_encapcheck() is called by the encap[46]_input() path at runtime to
+ * determine if a packet is for PIM; allowing PIM to be dynamically loaded
+ * into the kernel.
+ */
+static int
+pim_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
+{
+
+#ifdef DIAGNOSTIC
+    KASSERT(proto == IPPROTO_PIM, ("not for IPPROTO_PIM"));
+#endif
+    if (proto != IPPROTO_PIM)
+	return 0;	/* not for us; reject the datagram. */
+
+    return 64;		/* claim the datagram. */
+}
+
+/*
  * PIM-SMv2 and PIM-DM messages processing.
  * Receives and verifies the PIM control messages, and passes them
  * up to the listening socket, using rip_input().
@@ -3408,24 +3020,66 @@
 
     return;
 }
-#endif /* PIM */
 
+/*
+ * XXX: This is common code for dealing with initialization for both
+ * the IPv4 and IPv6 multicast forwarding paths. It could do with cleanup.
+ */
 static int
 ip_mroute_modevent(module_t mod, int type, void *unused)
 {
     switch (type) {
     case MOD_LOAD:
-	mtx_init(&mrouter_mtx, "mrouter initialization", NULL, MTX_DEF);
+	MROUTER_LOCK_INIT();
 	MFC_LOCK_INIT();
 	VIF_LOCK_INIT();
 	ip_mrouter_reset();
+	TUNABLE_ULONG_FETCH("net.inet.pim.squelch_wholepkt",
+	    &pim_squelch_wholepkt);
+
+	pim_encap_cookie = encap_attach_func(AF_INET, IPPROTO_PIM,
+	    pim_encapcheck, &in_pim_protosw, NULL);
+	if (pim_encap_cookie == NULL) {
+		printf("ip_mroute: unable to attach pim encap\n");
+		VIF_LOCK_DESTROY();
+		MFC_LOCK_DESTROY();
+		MROUTER_LOCK_DESTROY();
+		return (EINVAL);
+	}
+
+#ifdef INET6
+	pim6_encap_cookie = encap_attach_func(AF_INET6, IPPROTO_PIM,
+	    pim_encapcheck, (struct protosw *)&in6_pim_protosw, NULL);
+	if (pim6_encap_cookie == NULL) {
+		printf("ip_mroute: unable to attach pim6 encap\n");
+		if (pim_encap_cookie) {
+		    encap_detach(pim_encap_cookie);
+		    pim_encap_cookie = NULL;
+		}
+		VIF_LOCK_DESTROY();
+		MFC_LOCK_DESTROY();
+		MROUTER_LOCK_DESTROY();
+		return (EINVAL);
+	}
+#endif
+
 	ip_mcast_src = X_ip_mcast_src;
 	ip_mforward = X_ip_mforward;
 	ip_mrouter_done = X_ip_mrouter_done;
 	ip_mrouter_get = X_ip_mrouter_get;
 	ip_mrouter_set = X_ip_mrouter_set;
+
+#ifdef INET6
+	ip6_mforward = X_ip6_mforward;
+	ip6_mrouter_done = X_ip6_mrouter_done;
+	ip6_mrouter_get = X_ip6_mrouter_get;
+	ip6_mrouter_set = X_ip6_mrouter_set;
+	mrt6_ioctl = X_mrt6_ioctl;
+#endif
+
 	ip_rsvp_force_done = X_ip_rsvp_force_done;
 	ip_rsvp_vif = X_ip_rsvp_vif;
+
 	legal_vif_num = X_legal_vif_num;
 	mrt_ioctl = X_mrt_ioctl;
 	rsvp_input_p = X_rsvp_input;
@@ -3440,24 +3094,49 @@
 	 * just loaded and then unloaded w/o starting up a user
 	 * process we still need to cleanup.
 	 */
-	if (ip_mrouter)
+	if (ip_mrouter
+#ifdef INET6
+	    || ip6_mrouter
+#endif
+	)
 	    return EINVAL;
 
+#ifdef INET6
+	if (pim6_encap_cookie) {
+	    encap_detach(pim6_encap_cookie);
+	    pim6_encap_cookie = NULL;
+	}
+	X_ip6_mrouter_done();
+	ip6_mforward = NULL;
+	ip6_mrouter_done = NULL;
+	ip6_mrouter_get = NULL;
+	ip6_mrouter_set = NULL;
+	mrt6_ioctl = NULL;
+#endif
+
+	if (pim_encap_cookie) {
+	    encap_detach(pim_encap_cookie);
+	    pim_encap_cookie = NULL;
+	}
 	X_ip_mrouter_done();
 	ip_mcast_src = NULL;
 	ip_mforward = NULL;
 	ip_mrouter_done = NULL;
 	ip_mrouter_get = NULL;
 	ip_mrouter_set = NULL;
+
 	ip_rsvp_force_done = NULL;
 	ip_rsvp_vif = NULL;
+
 	legal_vif_num = NULL;
 	mrt_ioctl = NULL;
 	rsvp_input_p = NULL;
+
 	VIF_LOCK_DESTROY();
 	MFC_LOCK_DESTROY();
-	mtx_destroy(&mrouter_mtx);
+	MROUTER_LOCK_DESTROY();
 	break;
+
     default:
 	return EOPNOTSUPP;
     }
Index: alias.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/alias.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/libalias/alias.h -L sys/netinet/libalias/alias.h -u -r1.1.1.1 -r1.2
--- sys/netinet/libalias/alias.h
+++ sys/netinet/libalias/alias.h
@@ -25,10 +25,10 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/netinet/libalias/alias.h,v 1.32 2005/05/05 21:53:17 glebius Exp $
+ * $FreeBSD: src/sys/netinet/libalias/alias.h,v 1.34 2006/12/01 16:27:11 piso Exp $
  */
 
-/*-
+/*
  * Alias.h defines the outside world interfaces for the packet aliasing
  * software.
  *
@@ -39,12 +39,16 @@
 #ifndef _ALIAS_H_
 #define	_ALIAS_H_
 
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+
+#define LIBALIAS_BUF_SIZE 128
 #ifdef	_KERNEL
 /*
  * The kernel version of libalias does not support these features.
  */
 #define	NO_FW_PUNCH
-#define	NO_LOGGING
 #define	NO_USE_SOCKETS
 #endif
 
@@ -180,6 +184,13 @@
 /* Transparent proxying routines. */
 int		LibAliasProxyRule(struct libalias *, const char *_cmd);
 
+/* Module handling API */
+int             LibAliasLoadModule(char *);
+int             LibAliasUnLoadAllModule(void);
+int             LibAliasRefreshModules(void);
+
+/* Mbuf helper function. */
+struct mbuf    *m_megapullup(struct mbuf *, int);
 
 /*
  * Mode flags and other constants.
@@ -192,9 +203,7 @@
  * If PKT_ALIAS_LOG is set, a message will be printed to /var/log/alias.log
  * every time a link is created or deleted.  This is useful for debugging.
  */
-#ifndef	NO_LOGGING
 #define	PKT_ALIAS_LOG			0x01
-#endif
 
 /*
  * If PKT_ALIAS_DENY_INCOMING is set, then incoming connections (e.g. to ftp,
Index: alias.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/alias.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/libalias/alias.c -L sys/netinet/libalias/alias.c -u -r1.1.1.1 -r1.2
--- sys/netinet/libalias/alias.c
+++ sys/netinet/libalias/alias.c
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias.c,v 1.53 2005/06/27 22:21:42 phk Exp $");
+__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias.c,v 1.58 2006/12/15 12:50:06 piso Exp $");
 
 /*
     Alias.c provides supervisory control for the functions of the
@@ -113,9 +113,16 @@
 
 #ifdef _KERNEL
 #include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
 #else
 #include <sys/types.h>
+#include <stdlib.h>
 #include <stdio.h>
+#include <ctype.h>
+#include <dlfcn.h>
+#include <errno.h>
+#include <string.h>
 #endif
 
 #include <netinet/in_systm.h>
@@ -128,22 +135,14 @@
 #ifdef _KERNEL
 #include <netinet/libalias/alias.h>
 #include <netinet/libalias/alias_local.h>
+#include <netinet/libalias/alias_mod.h>
 #else
+#include <err.h>
 #include "alias.h"
 #include "alias_local.h"
+#include "alias_mod.h"
 #endif
 
-#define NETBIOS_NS_PORT_NUMBER 137
-#define NETBIOS_DGM_PORT_NUMBER 138
-#define FTP_CONTROL_PORT_NUMBER 21
-#define IRC_CONTROL_PORT_NUMBER_1 6667
-#define IRC_CONTROL_PORT_NUMBER_2 6668
-#define CUSEEME_PORT_NUMBER 7648
-#define RTSP_CONTROL_PORT_NUMBER_1 554
-#define RTSP_CONTROL_PORT_NUMBER_2 7070
-#define TFTP_PORT_NUMBER 69
-#define PPTP_CONTROL_PORT_NUMBER 1723
-
 static __inline int
 twowords(void *p)
 {
@@ -284,6 +283,8 @@
 static int
 IcmpAliasIn1(struct libalias *la, struct ip *pip)
 {
+
+	LIBALIAS_LOCK_ASSERT(la);
 /*
     De-alias incoming echo and timestamp replies.
     Alias incoming echo and timestamp requests.
@@ -327,6 +328,8 @@
 static int
 IcmpAliasIn2(struct libalias *la, struct ip *pip)
 {
+
+	LIBALIAS_LOCK_ASSERT(la);
 /*
     Alias incoming ICMP error messages containing
     IP header and first 64 bits of datagram.
@@ -430,6 +433,7 @@
 	int iresult;
 	struct icmp *ic;
 
+	LIBALIAS_LOCK_ASSERT(la);
 /* Return if proxy-only mode is enabled */
 	if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY)
 		return (PKT_ALIAS_OK);
@@ -469,6 +473,7 @@
 	struct alias_link *lnk;
 	struct icmp *ic;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	ic = (struct icmp *)ip_next(pip);
 
 /* Save overwritten data for when echo packet returns */
@@ -516,6 +521,7 @@
 	struct tcphdr *tc;
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	ic = (struct icmp *)ip_next(pip);
 	ip = &ic->icmp_ip;
 
@@ -609,6 +615,7 @@
 	int iresult;
 	struct icmp *ic;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	(void)create;
 
 /* Return if proxy-only mode is enabled */
@@ -651,6 +658,7 @@
 */
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK_ASSERT(la);
 /* Return if proxy-only mode is enabled */
 	if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY)
 		return (PKT_ALIAS_OK);
@@ -682,6 +690,7 @@
 */
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	(void)create;
 
 /* Return if proxy-only mode is enabled */
@@ -711,6 +720,7 @@
 	struct udphdr *ud;
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK_ASSERT(la);
 /* Return if proxy-only mode is enabled */
 	if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY)
 		return (PKT_ALIAS_OK);
@@ -725,24 +735,24 @@
 		struct in_addr original_address;
 		u_short alias_port;
 		int accumulate;
-		int r = 0;
+		int r = 0, error;
+		struct alias_data ad = {
+			.lnk = lnk, 
+			.oaddr = &original_address, 
+			.aaddr = &alias_address,
+			.aport = &alias_port,
+			.sport = &ud->uh_sport,
+			.dport = &ud->uh_dport,
+			.maxpktsize = 0
+		};
 
 		alias_address = GetAliasAddress(lnk);
 		original_address = GetOriginalAddress(lnk);
 		alias_port = ud->uh_dport;
 		ud->uh_dport = GetOriginalPort(lnk);
 
-/* Special processing for IP encoding protocols */
-		if (ntohs(ud->uh_dport) == CUSEEME_PORT_NUMBER)
-			AliasHandleCUSeeMeIn(la, pip, original_address);
-/* If NETBIOS Datagram, It should be alias address in UDP Data, too */
-		else if (ntohs(ud->uh_dport) == NETBIOS_DGM_PORT_NUMBER
-		    || ntohs(ud->uh_sport) == NETBIOS_DGM_PORT_NUMBER)
-			r = AliasHandleUdpNbt(la, pip, lnk, &original_address, ud->uh_dport);
-		else if (ntohs(ud->uh_dport) == NETBIOS_NS_PORT_NUMBER
-		    || ntohs(ud->uh_sport) == NETBIOS_NS_PORT_NUMBER)
-			r = AliasHandleUdpNbtNS(la, pip, lnk, &alias_address, &alias_port,
-			    &original_address, &ud->uh_dport);
+		/* Walk out chain. */		
+		error = find_handler(IN, UDP, la, pip, &ad);
 
 /* If UDP checksum is not zero, then adjust since destination port */
 /* is being unaliased and destination address is being altered.    */
@@ -774,7 +784,9 @@
 {
 	struct udphdr *ud;
 	struct alias_link *lnk;
+	int error;
 
+	LIBALIAS_LOCK_ASSERT(la);
 /* Return if proxy-only mode is enabled */
 	if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY)
 		return (PKT_ALIAS_OK);
@@ -787,29 +799,21 @@
 	if (lnk != NULL) {
 		u_short alias_port;
 		struct in_addr alias_address;
+		struct alias_data ad = {
+			.lnk = lnk, 
+			.oaddr = NULL,
+			.aaddr = &alias_address,
+			.aport = &alias_port,
+			.sport = &ud->uh_sport,
+			.dport = &ud->uh_dport,
+			.maxpktsize = 0
+		};
 
 		alias_address = GetAliasAddress(lnk);
 		alias_port = GetAliasPort(lnk);
 
-/* Special processing for IP encoding protocols */
-		if (ntohs(ud->uh_dport) == CUSEEME_PORT_NUMBER)
-			AliasHandleCUSeeMeOut(la, pip, lnk);
-/* If NETBIOS Datagram, It should be alias address in UDP Data, too */
-		else if (ntohs(ud->uh_dport) == NETBIOS_DGM_PORT_NUMBER
-		    || ntohs(ud->uh_sport) == NETBIOS_DGM_PORT_NUMBER)
-			AliasHandleUdpNbt(la, pip, lnk, &alias_address, alias_port);
-		else if (ntohs(ud->uh_dport) == NETBIOS_NS_PORT_NUMBER
-		    || ntohs(ud->uh_sport) == NETBIOS_NS_PORT_NUMBER)
-			AliasHandleUdpNbtNS(la, pip, lnk, &pip->ip_src, &ud->uh_sport,
-			    &alias_address, &alias_port);
-/*
- * We don't know in advance what TID the TFTP server will choose,
- * so we create a wilcard link (destination port is unspecified)
- * that will match any TID from a given destination.
- */
-		else if (ntohs(ud->uh_dport) == TFTP_PORT_NUMBER)
-			FindRtspOut(la, pip->ip_src, pip->ip_dst,
-			    ud->uh_sport, alias_port, IPPROTO_UDP);
+		/* Walk out chain. */		
+		error = find_handler(OUT, UDP, la, pip, &ad);
 
 /* If UDP checksum is not zero, adjust since source port is */
 /* being aliased and source address is being altered        */
@@ -843,6 +847,7 @@
 	struct tcphdr *tc;
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	tc = (struct tcphdr *)ip_next(pip);
 
 	lnk = FindUdpTcpIn(la, pip->ip_src, pip->ip_dst,
@@ -855,15 +860,26 @@
 		struct in_addr proxy_address;
 		u_short alias_port;
 		u_short proxy_port;
-		int accumulate;
+		int accumulate, error;
+
+		/* 
+		 * The init of MANY vars is a bit below, but aliashandlepptpin 
+		 * seems to need the destination port that came within the
+		 * packet and not the original one looks below [*].
+		 */
+
+		struct alias_data ad = {
+			.lnk = lnk, 
+			.oaddr = NULL,
+			.aaddr = NULL,
+			.aport = NULL,
+			.sport = &tc->th_sport,
+			.dport = &tc->th_dport,
+			.maxpktsize = 0
+		};
 
-/* Special processing for IP encoding protocols */
-		if (ntohs(tc->th_dport) == PPTP_CONTROL_PORT_NUMBER
-		    || ntohs(tc->th_sport) == PPTP_CONTROL_PORT_NUMBER)
-			AliasHandlePptpIn(la, pip, lnk);
-		else if (la->skinnyPort != 0 && (ntohs(tc->th_dport) == la->skinnyPort
-		    || ntohs(tc->th_sport) == la->skinnyPort))
-			AliasHandleSkinny(la, pip, lnk);
+		/* Walk out chain. */		
+		error = find_handler(IN, TCP, la, pip, &ad);
 
 		alias_address = GetAliasAddress(lnk);
 		original_address = GetOriginalAddress(lnk);
@@ -872,6 +888,28 @@
 		tc->th_dport = GetOriginalPort(lnk);
 		proxy_port = GetProxyPort(lnk);
 
+		/* 
+		 * Look above, if anyone is going to add find_handler AFTER 
+		 * this aliashandlepptpin/point, please redo alias_data too.
+		 * Uncommenting the piece here below should be enough.
+		 */
+#if 0
+				 struct alias_data ad = {
+					.lnk = lnk,
+					.oaddr = &original_address,
+					.aaddr = &alias_address,
+					.aport = &alias_port,
+					.sport = &ud->uh_sport,
+					.dport = &ud->uh_dport,
+					.maxpktsize = 0
+				};
+		
+				/* Walk out chain. */
+				error = find_handler(la, pip, &ad);
+				if (error == EHDNOF)
+					printf("Protocol handler not found\n");
+#endif
+
 /* Adjust TCP checksum since destination port is being unaliased */
 /* and destination port is being altered.                        */
 		accumulate = alias_port;
@@ -926,7 +964,7 @@
 static int
 TcpAliasOut(struct libalias *la, struct ip *pip, int maxpacketsize, int create)
 {
-	int proxy_type;
+	int proxy_type, error;
 	u_short dest_port;
 	u_short proxy_server_port;
 	struct in_addr dest_address;
@@ -934,6 +972,7 @@
 	struct tcphdr *tc;
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	tc = (struct tcphdr *)ip_next(pip);
 
 	if (create)
@@ -973,6 +1012,15 @@
 		u_short alias_port;
 		struct in_addr alias_address;
 		int accumulate;
+		struct alias_data ad = {
+			.lnk = lnk, 
+			.oaddr = NULL,
+			.aaddr = &alias_address,
+			.aport = &alias_port,
+			.sport = &tc->th_sport,
+			.dport = &tc->th_dport,
+			.maxpktsize = maxpacketsize
+		};
 
 /* Save original destination address, if this is a proxy packet.
    Also modify packet to include destination encoding.  This may
@@ -989,25 +1037,9 @@
 
 /* Monitor TCP connection state */
 		TcpMonitorOut(pip, lnk);
-
-/* Special processing for IP encoding protocols */
-		if (ntohs(tc->th_dport) == FTP_CONTROL_PORT_NUMBER
-		    || ntohs(tc->th_sport) == FTP_CONTROL_PORT_NUMBER)
-			AliasHandleFtpOut(la, pip, lnk, maxpacketsize);
-		else if (ntohs(tc->th_dport) == IRC_CONTROL_PORT_NUMBER_1
-		    || ntohs(tc->th_dport) == IRC_CONTROL_PORT_NUMBER_2)
-			AliasHandleIrcOut(la, pip, lnk, maxpacketsize);
-		else if (ntohs(tc->th_dport) == RTSP_CONTROL_PORT_NUMBER_1
-			    || ntohs(tc->th_sport) == RTSP_CONTROL_PORT_NUMBER_1
-			    || ntohs(tc->th_dport) == RTSP_CONTROL_PORT_NUMBER_2
-		    || ntohs(tc->th_sport) == RTSP_CONTROL_PORT_NUMBER_2)
-			AliasHandleRtspOut(la, pip, lnk, maxpacketsize);
-		else if (ntohs(tc->th_dport) == PPTP_CONTROL_PORT_NUMBER
-		    || ntohs(tc->th_sport) == PPTP_CONTROL_PORT_NUMBER)
-			AliasHandlePptpOut(la, pip, lnk);
-		else if (la->skinnyPort != 0 && (ntohs(tc->th_sport) == la->skinnyPort
-		    || ntohs(tc->th_dport) == la->skinnyPort))
-			AliasHandleSkinny(la, pip, lnk);
+		
+		/* Walk out chain. */		
+		error = find_handler(OUT, TCP, la, pip, &ad);
 
 /* Adjust TCP checksum since source port is being aliased */
 /* and source address is being altered                    */
@@ -1067,6 +1099,7 @@
 {
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	lnk = FindFragmentIn2(la, pip->ip_src, pip->ip_dst, pip->ip_id);
 	if (lnk != NULL) {
 		struct in_addr original_address;
@@ -1087,6 +1120,7 @@
 {
 	struct in_addr alias_address;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	alias_address = FindAliasAddress(la, pip->ip_src);
 	DifferentialChecksum(&pip->ip_sum,
 	    &alias_address, &pip->ip_src, 2);
@@ -1120,6 +1154,7 @@
 	struct alias_link *lnk;
 	struct ip *pip;
 
+	LIBALIAS_LOCK(la);
 	pip = (struct ip *)ptr;
 	lnk = AddFragmentPtrLink(la, pip->ip_src, pip->ip_id);
 	iresult = PKT_ALIAS_ERROR;
@@ -1127,6 +1162,7 @@
 		SetFragmentPtr(lnk, ptr);
 		iresult = PKT_ALIAS_OK;
 	}
+	LIBALIAS_UNLOCK(la);
 	return (iresult);
 }
 
@@ -1138,17 +1174,18 @@
 	char *fptr;
 	struct ip *pip;
 
+	LIBALIAS_LOCK(la);
 	pip = (struct ip *)ptr;
 	lnk = FindFragmentPtr(la, pip->ip_src, pip->ip_id);
 	if (lnk != NULL) {
 		GetFragmentPtr(lnk, &fptr);
 		SetFragmentPtr(lnk, NULL);
 		SetExpire(lnk, 0);	/* Deletes link */
+	} else		
+		fptr = NULL;
 
-		return (fptr);
-	} else {
-		return (NULL);
-	}
+	LIBALIAS_UNLOCK(la);
+	return (fptr);
 }
 
 
@@ -1163,6 +1200,7 @@
 	struct ip *pip;
 	struct ip *fpip;
 
+	LIBALIAS_LOCK(la);
 	(void)la;
 	pip = (struct ip *)ptr;
 	fpip = (struct ip *)ptr_fragment;
@@ -1170,21 +1208,40 @@
 	DifferentialChecksum(&fpip->ip_sum,
 	    &pip->ip_dst, &fpip->ip_dst, 2);
 	fpip->ip_dst = pip->ip_dst;
+	LIBALIAS_UNLOCK(la);
 }
 
+/* Local prototypes */
+static int
+LibAliasOutLocked(struct libalias *la, char *ptr,
+		  int maxpacketsize, int create);
+static int
+LibAliasInLocked(struct libalias *la, char *ptr,
+		  int maxpacketsize);
 
 int
 LibAliasIn(struct libalias *la, char *ptr, int maxpacketsize)
 {
+	int res;
+
+	LIBALIAS_LOCK(la);
+	res = LibAliasInLocked(la, ptr, maxpacketsize);
+	LIBALIAS_UNLOCK(la);
+	return (res);
+}
+
+static int
+LibAliasInLocked(struct libalias *la, char *ptr, int maxpacketsize)
+{
 	struct in_addr alias_addr;
 	struct ip *pip;
 	int iresult;
 
 	if (la->packetAliasMode & PKT_ALIAS_REVERSE) {
 		la->packetAliasMode &= ~PKT_ALIAS_REVERSE;
-		iresult = LibAliasOut(la, ptr, maxpacketsize);
+		iresult = LibAliasOutLocked(la, ptr, maxpacketsize, 1);
 		la->packetAliasMode |= PKT_ALIAS_REVERSE;
-		return (iresult);
+		goto getout;
 	}
 	HouseKeeping(la);
 	ClearCheckNewLink(la);
@@ -1193,8 +1250,10 @@
 
 	/* Defense against mangled packets */
 	if (ntohs(pip->ip_len) > maxpacketsize
-	    || (pip->ip_hl << 2) > maxpacketsize)
-		return (PKT_ALIAS_IGNORED);
+	    || (pip->ip_hl << 2) > maxpacketsize) {
+		iresult = PKT_ALIAS_IGNORED; 
+		goto getout;
+	}
 
 	iresult = PKT_ALIAS_IGNORED;
 	if ((ntohs(pip->ip_off) & IP_OFFMASK) == 0) {
@@ -1208,13 +1267,26 @@
 		case IPPROTO_TCP:
 			iresult = TcpAliasIn(la, pip);
 			break;
-		case IPPROTO_GRE:
-			if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY ||
-			    AliasHandlePptpGreIn(la, pip) == 0)
+ 		case IPPROTO_GRE: {
+			int error;
+			struct alias_data ad = {
+				.lnk = NULL, 
+				.oaddr = NULL, 
+				.aaddr = NULL,
+				.aport = NULL,
+				.sport = NULL,
+				.dport = NULL,
+				.maxpktsize = 0                  
+			};
+			
+			/* Walk out chain. */		
+			error = find_handler(IN, IP, la, pip, &ad);
+			if (error ==  0)
 				iresult = PKT_ALIAS_OK;
 			else
 				iresult = ProtoAliasIn(la, pip);
-			break;
+		}
+ 			break; 
 		default:
 			iresult = ProtoAliasIn(la, pip);
 			break;
@@ -1235,6 +1307,7 @@
 		iresult = FragmentIn(la, pip);
 	}
 
+getout:
 	return (iresult);
 }
 
@@ -1255,19 +1328,32 @@
 #define UNREG_ADDR_C_UPPER 0xc0a8ffff
 
 int
-LibAliasOut(struct libalias *la, char *ptr,	/* valid IP packet */
-    int maxpacketsize		/* How much the packet data may grow (FTP
-				 * and IRC inline changes) */
-)
+LibAliasOut(struct libalias *la, char *ptr, int maxpacketsize)
 {
-	return (LibAliasOutTry(la, ptr, maxpacketsize, 1));
+	int res;
+
+	LIBALIAS_LOCK(la);
+	res = LibAliasOutLocked(la, ptr, maxpacketsize, 1);
+	LIBALIAS_UNLOCK(la);
+	return (res);
 }
 
 int
-LibAliasOutTry(struct libalias *la, char *ptr,	/* valid IP packet */
+LibAliasOutTry(struct libalias *la, char *ptr, int maxpacketsize, int create)
+{
+	int res;
+
+	LIBALIAS_LOCK(la);
+	res = LibAliasOutLocked(la, ptr, maxpacketsize, create);
+	LIBALIAS_UNLOCK(la);
+	return (res);
+}
+
+static int
+LibAliasOutLocked(struct libalias *la, char *ptr,	/* valid IP packet */
     int maxpacketsize,		/* How much the packet data may grow (FTP
 				 * and IRC inline changes) */
-    int create			/* Create new entries ? */
+    int create                  /* Create new entries ? */
 )
 {
 	int iresult;
@@ -1276,9 +1362,9 @@
 
 	if (la->packetAliasMode & PKT_ALIAS_REVERSE) {
 		la->packetAliasMode &= ~PKT_ALIAS_REVERSE;
-		iresult = LibAliasIn(la, ptr, maxpacketsize);
+		iresult = LibAliasInLocked(la, ptr, maxpacketsize);
 		la->packetAliasMode |= PKT_ALIAS_REVERSE;
-		return (iresult);
+		goto getout;
 	}
 	HouseKeeping(la);
 	ClearCheckNewLink(la);
@@ -1286,8 +1372,10 @@
 
 	/* Defense against mangled packets */
 	if (ntohs(pip->ip_len) > maxpacketsize
-	    || (pip->ip_hl << 2) > maxpacketsize)
-		return (PKT_ALIAS_IGNORED);
+	    || (pip->ip_hl << 2) > maxpacketsize) {
+		iresult = PKT_ALIAS_IGNORED;
+		goto getout;
+	}
 
 	addr_save = GetDefaultAliasAddress(la);
 	if (la->packetAliasMode & PKT_ALIAS_UNREGISTERED_ONLY) {
@@ -1321,12 +1409,25 @@
 			case IPPROTO_TCP:
 			iresult = TcpAliasOut(la, pip, maxpacketsize, create);
 			break;
-		case IPPROTO_GRE:
-			if (AliasHandlePptpGreOut(la, pip) == 0)
-				iresult = PKT_ALIAS_OK;
-			else
-				iresult = ProtoAliasOut(la, pip, create);
-			break;
+ 		case IPPROTO_GRE: {
+			int error;
+			struct alias_data ad = {
+				.lnk = NULL, 
+				.oaddr = NULL, 
+				.aaddr = NULL,
+				.aport = NULL,
+				.sport = NULL,
+				.dport = NULL,
+				.maxpktsize = 0                  
+			};
+			/* Walk out chain. */		
+			error = find_handler(OUT, IP, la, pip, &ad);
+			if (error == 0)
+ 				iresult = PKT_ALIAS_OK;
+ 			else
+ 				iresult = ProtoAliasOut(la, pip, create);
+		}
+ 			break;
 		default:
 			iresult = ProtoAliasOut(la, pip, create);
 			break;
@@ -1336,6 +1437,7 @@
 	}
 
 	SetDefaultAliasAddress(la, addr_save);
+getout:
 	return (iresult);
 }
 
@@ -1351,12 +1453,13 @@
 	struct alias_link *lnk;
 	int iresult = PKT_ALIAS_IGNORED;
 
+	LIBALIAS_LOCK(la);
 	pip = (struct ip *)ptr;
 
 	/* Defense against mangled packets */
 	if (ntohs(pip->ip_len) > maxpacketsize
 	    || (pip->ip_hl << 2) > maxpacketsize)
-		return (iresult);
+		goto getout;
 
 	ud = (struct udphdr *)ip_next(pip);
 	tc = (struct tcphdr *)ip_next(pip);
@@ -1440,6 +1543,140 @@
 			iresult = PKT_ALIAS_OK;
 		}
 	}
+getout:
+	LIBALIAS_UNLOCK(la);
 	return (iresult);
 
 }
+
+#ifndef _KERNEL
+
+int
+LibAliasRefreshModules(void)
+{
+	char buf[256], conf[] = "/etc/libalias.conf";
+	FILE *fd;
+	int i, len;
+
+	fd = fopen(conf, "r");
+	if (fd == NULL)
+		err(1, "fopen(%s)", conf);
+
+	LibAliasUnLoadAllModule();
+
+	for (;;) {
+		fgets(buf, 256, fd);
+		if feof(fd) 
+		        break;
+		len = strlen(buf);
+		if (len > 1) {
+			for (i = 0; i < len; i++)
+				if (!isspace(buf[i]))
+					break;
+			if (buf[i] == '#')
+				continue;
+			buf[len - 1] = '\0';
+			printf("Loading %s\n", buf);
+			LibAliasLoadModule(buf);
+		}
+	}
+	return (0);
+}
+
+int
+LibAliasLoadModule(char *path)
+{
+	struct dll *t;
+	void *handle;
+	struct proto_handler *m;
+        const char *error;
+	moduledata_t *p;
+
+        handle = dlopen (path, RTLD_LAZY);
+        if (!handle) {
+		fprintf(stderr, "%s\n", dlerror());
+		return (EINVAL);
+        }
+
+	p = dlsym(handle, "alias_mod");
+        if ((error = dlerror()) != NULL)  {
+		fprintf(stderr, "%s\n", dlerror());
+		return (EINVAL);
+        }
+
+	t = malloc(sizeof(struct dll));
+	if (t == NULL)
+		return (ENOMEM);
+	strncpy(t->name, p->name, DLL_LEN);
+	t->handle = handle;
+	if (attach_dll(t) == EEXIST) {
+		free(t);
+		fprintf(stderr, "dll conflict\n");
+		return (EEXIST);
+	}
+
+        m = dlsym(t->handle, "handlers");
+        if ((error = dlerror()) != NULL)  {
+		fprintf(stderr, "%s\n", error);
+		return (EINVAL);
+	}
+
+	LibAliasAttachHandlers(m);
+	return (0);
+}
+
+int
+LibAliasUnLoadAllModule(void)
+{
+	struct dll *t;
+	struct proto_handler *p;
+
+	/* Unload all modules then reload everything. */
+	while ((p = first_handler()) != NULL) {	
+		detach_handler(p);
+	}
+	while ((t = walk_dll_chain()) != NULL) {	
+		dlclose(t->handle);
+		free(t);
+	}
+	return (1);
+}
+
+#endif
+
+#ifdef _KERNEL
+/*
+ * m_megapullup() - this function is a big hack.
+ * Thankfully, it's only used in ng_nat and ipfw+nat.
+ *
+ * It allocates an mbuf with cluster and copies the whole chain into cluster,
+ * so that it is all contiguous and the whole packet can be accessed via a
+ * plain (char *) pointer.  This is required, because libalias doesn't know
+ * how to handle mbuf chains.
+ *
+ * On success, m_megapullup returns an mbuf with cluster containing the input
+ * packet, on failure NULL.  In both cases, the input packet is consumed.
+ */
+struct mbuf *
+m_megapullup(struct mbuf *m, int len) {
+	struct mbuf *mcl;
+	caddr_t cp;
+	
+	if (len > MCLBYTES)
+		goto bad;
+	
+	if ((mcl = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR)) == NULL)
+		goto bad;
+ 
+	cp = mtod(mcl, caddr_t);
+	m_copydata(m, 0, len, cp);
+	m_move_pkthdr(mcl, m);
+	mcl->m_len = mcl->m_pkthdr.len;
+	m_freem(m);
+ 
+	return (mcl);
+bad:
+	m_freem(m);
+	return (NULL);
+}
+#endif
Index: alias_proxy.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/alias_proxy.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet/libalias/alias_proxy.c -L sys/netinet/libalias/alias_proxy.c -u -r1.2 -r1.3
--- sys/netinet/libalias/alias_proxy.c
+++ sys/netinet/libalias/alias_proxy.c
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_proxy.c,v 1.26 2005/06/27 07:36:02 glebius Exp $");
+__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_proxy.c,v 1.31 2007/04/30 20:26:11 maxim Exp $");
 
 /* file: alias_proxy.c
 
@@ -58,30 +58,24 @@
 #include <sys/param.h>
 #include <sys/ctype.h>
 #include <sys/libkern.h>
-#include <sys/kernel.h>
-#include <sys/malloc.h>
 #include <sys/limits.h>
 #else
 #include <sys/types.h>
-#include <sys/socket.h>
 #include <ctype.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <string.h>
 #include <netdb.h>
-#include <arpa/inet.h>
+#include <string.h>
 #endif
 
-/* BSD IPV4 includes */
-#include <netinet/in_systm.h>
-#include <netinet/in.h>
-#include <netinet/ip.h>
 #include <netinet/tcp.h>
 
 #ifdef _KERNEL
 #include <netinet/libalias/alias.h>
 #include <netinet/libalias/alias_local.h>
+#include <netinet/libalias/alias_mod.h>
 #else
+#include <arpa/inet.h>
 #include "alias.h"		/* Public API functions for libalias */
 #include "alias_local.h"	/* Functions used by alias*.c */
 #endif
@@ -182,8 +176,8 @@
 		if (l == ULONG_MAX || (l == 0 && endptr == c))
 			return (0);
 
-		val = (in_addr_t)l;	
-		/* 
+		val = (in_addr_t)l;
+		/*
 		 * If the whole string is invalid, endptr will equal
 		 * c.. this way we can make sure someone hasn't
 		 * gone '.12' or something which would get past
@@ -318,6 +312,8 @@
 	struct proxy_entry *ptr;
 	struct proxy_entry *ptr_last;
 
+	LIBALIAS_LOCK_ASSERT(la);
+
 	if (la->proxyList == NULL) {
 		la->proxyList = entry;
 		entry->last = NULL;
@@ -359,6 +355,7 @@
 	struct libalias *la;
 
 	la = entry->la;
+	LIBALIAS_LOCK_ASSERT(la);
 	if (entry->last != NULL)
 		entry->last->next = entry->next;
 	else
@@ -376,6 +373,7 @@
 	int err;
 	struct proxy_entry *ptr;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	err = -1;
 	ptr = la->proxyList;
 	while (ptr != NULL) {
@@ -433,7 +431,7 @@
 	{
 		int dlen;
 		int hlen;
-		u_char *p;
+		char *p;
 
 		hlen = (pip->ip_hl + tc->th_off) << 2;
 		dlen = ntohs(pip->ip_len) - hlen;
@@ -572,6 +570,7 @@
 	struct in_addr dst_addr;
 	struct proxy_entry *ptr;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	src_addr = pip->ip_src;
 	dst_addr = pip->ip_dst;
 	dst_port = ((struct tcphdr *)ip_next(pip))
@@ -612,6 +611,7 @@
     int proxy_type)
 {
 
+	LIBALIAS_LOCK_ASSERT(la);
 	(void)la;
 
 	switch (proxy_type) {
@@ -654,7 +654,7 @@
  * then 0 is used, and group 0 rules are always checked before any
  * others.
  */
-	int i, n, len;
+	int i, n, len, ret;
 	int cmd_len;
 	int token_count;
 	int state;
@@ -674,11 +674,15 @@
 	struct in_addr dst_addr, dst_mask;
 	struct proxy_entry *proxy_entry;
 
+	LIBALIAS_LOCK(la);
+	ret = 0;
 /* Copy command line into a buffer */
 	cmd += strspn(cmd, " \t");
 	cmd_len = strlen(cmd);
-	if (cmd_len > (int)(sizeof(buffer) - 1))
-		return (-1);
+	if (cmd_len > (int)(sizeof(buffer) - 1)) {
+		ret = -1;
+		goto getout;
+	}
 	strcpy(buffer, cmd);
 
 /* Convert to lower case */
@@ -736,8 +740,10 @@
 				state = STATE_READ_SRC;
 			else if (strcmp(token, "dst") == 0)
 				state = STATE_READ_DST;
-			else
-				return (-1);
+			else {
+				ret = -1;
+				goto getout;
+			}
 			break;
 
 		case STATE_READ_TYPE:
@@ -747,8 +753,10 @@
 				proxy_type = PROXY_TYPE_ENCODE_TCPSTREAM;
 			else if (strcmp(token, "no_encode") == 0)
 				proxy_type = PROXY_TYPE_ENCODE_NONE;
-			else
-				return (-1);
+			else {
+				ret = -1;
+				goto getout;
+			}
 			state = STATE_READ_KEYWORD;
 			break;
 
@@ -769,18 +777,24 @@
 
 				if (*p != ':') {
 					err = IpAddr(token, &server_addr);
-					if (err)
-						return (-1);
+					if (err) {
+						ret = -1;
+						goto getout;
+					}
 				} else {
 					*p = ' ';
 
 					n = sscanf(token, "%s %s", s, str_server_port);
-					if (n != 2)
-						return (-1);
+					if (n != 2) {
+						ret = -1;
+						goto getout;
+					}
 
 					err = IpAddr(s, &server_addr);
-					if (err)
-						return (-1);
+					if (err) {
+						ret = -1;
+						goto getout;
+					}
 				}
 			}
 			state = STATE_READ_KEYWORD;
@@ -788,8 +802,10 @@
 
 		case STATE_READ_RULE:
 			n = sscanf(token, "%d", &rule_index);
-			if (n != 1 || rule_index < 0)
-				return (-1);
+			if (n != 1 || rule_index < 0) {
+				ret = -1;
+				goto getout;
+			}
 			state = STATE_READ_KEYWORD;
 			break;
 
@@ -798,16 +814,21 @@
 				int err;
 				int rule_to_delete;
 
-				if (token_count != 2)
-					return (-1);
+				if (token_count != 2) {
+					ret = -1;
+					goto getout;
+				}
 
 				n = sscanf(token, "%d", &rule_to_delete);
-				if (n != 1)
-					return (-1);
+				if (n != 1) {
+					ret = -1;
+					goto getout;
+				}
 				err = RuleNumberDelete(la, rule_to_delete);
 				if (err)
-					return (-1);
-				return (0);
+					ret = -1;
+				ret = 0;
+				goto getout;
 			}
 
 		case STATE_READ_PROTO:
@@ -815,8 +836,10 @@
 				proto = IPPROTO_TCP;
 			else if (strcmp(token, "udp") == 0)
 				proto = IPPROTO_UDP;
-			else
-				return (-1);
+			else {
+				ret = -1;
+				goto getout;
+			}
 			state = STATE_READ_KEYWORD;
 			break;
 
@@ -835,24 +858,32 @@
 				if (*p != '/') {
 					IpMask(32, &mask);
 					err = IpAddr(token, &addr);
-					if (err)
-						return (-1);
+					if (err) {
+						ret = -1;
+						goto getout;
+					}
 				} else {
 					int nbits;
 					char s[sizeof(buffer)];
 
 					*p = ' ';
 					n = sscanf(token, "%s %d", s, &nbits);
-					if (n != 2)
-						return (-1);
+					if (n != 2) {
+						ret = -1;
+						goto getout;
+					}
 
 					err = IpAddr(s, &addr);
-					if (err)
-						return (-1);
+					if (err) {
+						ret = -1;
+						goto getout;
+					}
 
 					err = IpMask(nbits, &mask);
-					if (err)
-						return (-1);
+					if (err) {
+						ret = -1;
+						goto getout;
+					}
 				}
 
 				if (state == STATE_READ_SRC) {
@@ -867,7 +898,8 @@
 			break;
 
 		default:
-			return (-1);
+			ret = -1;
+			goto getout;
 			break;
 		}
 
@@ -893,8 +925,10 @@
 		int err;
 
 		err = IpPort(str_port, proto, &proxy_port);
-		if (err)
-			return (-1);
+		if (err) {
+			ret = -1;
+			goto getout;
+		}
 	} else {
 		proxy_port = 0;
 	}
@@ -903,20 +937,26 @@
 		int err;
 
 		err = IpPort(str_server_port, proto, &server_port);
-		if (err)
-			return (-1);
+		if (err) {
+			ret = -1;
+			goto getout;
+		}
 	} else {
 		server_port = 0;
 	}
 
 /* Check that at least the server address has been defined */
-	if (server_addr.s_addr == 0)
-		return (-1);
+	if (server_addr.s_addr == 0) {
+		ret = -1;
+		goto getout;
+	}
 
 /* Add to linked list */
 	proxy_entry = malloc(sizeof(struct proxy_entry));
-	if (proxy_entry == NULL)
-		return (-1);
+	if (proxy_entry == NULL) {
+		ret = -1;
+		goto getout;
+	}
 
 	proxy_entry->proxy_type = proxy_type;
 	proxy_entry->rule_index = rule_index;
@@ -931,5 +971,7 @@
 
 	RuleAdd(la, proxy_entry);
 
-	return (0);
+getout:
+	LIBALIAS_UNLOCK(la);
+	return (ret);
 }
--- /dev/null
+++ sys/netinet/libalias/alias_mod.h
@@ -0,0 +1,157 @@
+/*-
+ * Copyright (c) 2005 Paolo Pisati <piso at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/netinet/libalias/alias_mod.h,v 1.1 2006/09/26 23:26:53 piso Exp $
+ */
+
+/*
+ * Alias_mod.h defines the outside world interfaces for the packet aliasing
+ * modular framework
+ */
+
+#ifndef _ALIAS_MOD_H_
+#define _ALIAS_MOD_H_
+
+#ifdef _KERNEL
+MALLOC_DECLARE(M_ALIAS);
+
+/* Use kernel allocator. */
+#if defined(_SYS_MALLOC_H_)
+#define	malloc(x)	malloc(x, M_ALIAS, M_NOWAIT|M_ZERO)
+#define	calloc(x, n)	malloc(x*n)
+#define	free(x)		free(x, M_ALIAS)
+#endif
+#endif
+
+/* Protocol handlers struct & function. */
+
+/* Packet flow direction. */
+#define IN                              1 
+#define OUT                             2 
+
+/* Working protocol. */
+#define IP                              1
+#define TCP                             2
+#define UDP                             4
+
+/* 
+ * Data passed to protocol handler module, it must be filled
+ * right before calling find_handler() to determine which
+ * module is elegible to be called.
+ */
+
+struct alias_data {	
+	struct alias_link       *lnk;            
+	struct in_addr          *oaddr;         /* Original address. */
+	struct in_addr          *aaddr;         /* Alias address. */ 
+	uint16_t                *aport;         /* Alias port. */
+	uint16_t                *sport, *dport;	/* Source & destination port */
+	uint16_t                maxpktsize;     /* Max packet size. */
+}; 
+
+/* 
+ * This structure contains all the information necessary to make
+ * a protocol handler correctly work.
+ */
+
+struct proto_handler {
+	u_int pri;                                              /* Handler priority. */
+        int16_t dir;                                            /* Flow direction. */
+	uint8_t proto;                                          /* Working protocol. */	
+	int (*fingerprint)(struct libalias *la,                 /* Fingerprint * function. */
+		 struct ip *pip, struct alias_data *ah);
+	int (*protohandler)(struct libalias *la,                /* Aliasing * function. */
+		 struct ip *pip, struct alias_data *ah);                 
+	LIST_ENTRY(proto_handler) entries;
+};
+
+
+/* 
+ * Used only in userland when libalias needs to keep track of all
+ * module loaded. In kernel land (kld mode) we don't need to care
+ * care about libalias modules cause it's kld to do it for us.
+ */
+
+#define DLL_LEN         32
+struct dll {	
+	char            name[DLL_LEN];  /* Name of module. */
+	void            *handle;        /* 
+					 * Ptr to shared obj obtained through
+					 * dlopen() - use this ptr to get access
+					 * to any symbols from a loaded module 					 
+					 * via dlsym(). 
+					 */
+	SLIST_ENTRY(dll)        next;
+};
+
+/* Functions used with protocol handlers. */
+
+void            handler_chain_init(void);
+void            handler_chain_destroy(void);
+int             LibAliasAttachHandlers(struct proto_handler *);
+int             LibAliasDetachHandlers(struct proto_handler *);
+int             detach_handler(struct proto_handler *);
+int             find_handler(int8_t, int8_t, struct libalias *, 
+			   struct ip *, struct alias_data *);
+struct proto_handler *first_handler(void);
+
+/* Functions used with dll module. */
+
+void            dll_chain_init(void);
+void            dll_chain_destroy(void);
+int             attach_dll(struct dll *);
+void            *detach_dll(char *);
+struct dll      *walk_dll_chain(void);
+
+/* End of handlers. */
+#define EOH     -1
+
+/* 
+ * Some defines borrowed from sys/module.h used to compile a kld
+ * in userland as a shared lib.
+ */
+
+#ifndef _KERNEL
+typedef enum modeventtype {
+        MOD_LOAD,
+        MOD_UNLOAD,
+        MOD_SHUTDOWN,
+        MOD_QUIESCE
+} modeventtype_t;
+        
+typedef struct module *module_t;
+typedef int (*modeventhand_t)(module_t, int /* modeventtype_t */, void *);
+
+/*
+ * Struct for registering modules statically via SYSINIT.
+ */
+typedef struct moduledata {
+        const char      *name;          /* module name */
+        modeventhand_t  evhand;         /* event handler */
+        void            *priv;          /* extra data */
+} moduledata_t;
+#endif
+
+#endif				/* !_ALIAS_MOD_H_ */
Index: alias_nbt.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/alias_nbt.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/libalias/alias_nbt.c -L sys/netinet/libalias/alias_nbt.c -u -r1.1.1.1 -r1.2
--- sys/netinet/libalias/alias_nbt.c
+++ sys/netinet/libalias/alias_nbt.c
@@ -29,7 +29,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_nbt.c,v 1.19 2005/05/06 11:07:49 glebius Exp $");
+__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_nbt.c,v 1.20 2006/09/26 23:26:53 piso Exp $");
 
 /*
     alias_nbt.c performs special processing for NetBios over TCP/IP
@@ -43,27 +43,147 @@
 /* Includes */
 #ifdef _KERNEL
 #include <sys/param.h>
-#include <sys/ctype.h>
-#include <sys/libkern.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
 #else
+#include <errno.h>
 #include <sys/types.h>
-#include <ctype.h>
 #include <stdio.h>
-#include <string.h>
-#include <arpa/inet.h>
 #endif
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/udp.h>
-#include <netinet/tcp.h>
 
 #ifdef _KERNEL
-#include <netinet/libalias/alias.h>
 #include <netinet/libalias/alias_local.h>
+#include <netinet/libalias/alias_mod.h>
 #else
 #include "alias_local.h"
+#include "alias_mod.h"
+#endif
+
+#define NETBIOS_NS_PORT_NUMBER 137
+#define NETBIOS_DGM_PORT_NUMBER 138
+
+static int
+AliasHandleUdpNbt(struct libalias *, struct ip *, struct alias_link *, 
+		  struct in_addr *, u_short);
+
+static int
+AliasHandleUdpNbtNS(struct libalias *, struct ip *, struct alias_link *,
+		    struct in_addr *, u_short *, struct in_addr *, u_short *);
+static int 
+fingerprint1(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+	if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL || 
+	    ah->aaddr == NULL || ah->aport == NULL)
+		return (-1);
+	if (ntohs(*ah->dport) == NETBIOS_DGM_PORT_NUMBER
+	    || ntohs(*ah->sport) == NETBIOS_DGM_PORT_NUMBER)		
+		return (0);
+	return (-1);
+}
+
+static int 
+protohandler1(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+	
+	AliasHandleUdpNbt(la, pip, ah->lnk, ah->aaddr, *ah->aport);
+	return (0);
+}
+
+static int 
+fingerprint2(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+	if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL || 
+	    ah->aaddr == NULL || ah->aport == NULL)
+		return (-1);
+	if (ntohs(*ah->dport) == NETBIOS_NS_PORT_NUMBER
+	    || ntohs(*ah->sport) == NETBIOS_NS_PORT_NUMBER)
+		return (0);
+	return (-1);
+}
+
+static int 
+protohandler2in(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+	
+	AliasHandleUdpNbtNS(la, pip, ah->lnk, ah->aaddr, ah->aport,
+ 			    ah->oaddr, ah->dport);
+	return (0);
+}
+
+static int 
+protohandler2out(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+	
+	AliasHandleUdpNbtNS(la, pip, ah->lnk, &pip->ip_src, ah->sport,
+ 			    ah->aaddr, ah->aport);
+	return (0);
+}
+
+/* Kernel module definition. */
+struct proto_handler handlers[] = {
+	{ 
+	  .pri = 130, 
+	  .dir = IN|OUT, 
+	  .proto = UDP, 
+	  .fingerprint = &fingerprint1, 
+	  .protohandler = &protohandler1
+	}, 
+	{ 
+	  .pri = 140, 
+	  .dir = IN, 
+	  .proto = UDP, 
+	  .fingerprint = &fingerprint2, 
+	  .protohandler = &protohandler2in
+	}, 
+	{ 
+	  .pri = 140, 
+	  .dir = OUT, 
+	  .proto = UDP, 
+	  .fingerprint = &fingerprint2, 
+	  .protohandler = &protohandler2out
+	}, 
+	{ EOH }
+};
+
+static int
+mod_handler(module_t mod, int type, void *data)
+{
+	int error;
+
+	switch (type) {
+	case MOD_LOAD:
+		error = 0;
+		LibAliasAttachHandlers(handlers);
+		break;
+	case MOD_UNLOAD:
+		error = 0;
+		LibAliasDetachHandlers(handlers);
+		break;
+	default:
+		error = EINVAL;
+	}
+	return (error);
+}
+
+#ifdef	_KERNEL
+static 
+#endif
+moduledata_t alias_mod = {
+       "alias_nbt", mod_handler, NULL
+};
+
+#ifdef	_KERNEL
+DECLARE_MODULE(alias_nbt, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);
+MODULE_VERSION(alias_nbt, 1);
+MODULE_DEPEND(alias_nbt, libalias, 1, 1, 1);
 #endif
 
 typedef struct {
@@ -212,7 +332,7 @@
 #define DGM_POSITIVE_RES	0x15
 #define DGM_NEGATIVE_RES	0x16
 
-int
+static int
 AliasHandleUdpNbt(
     struct libalias *la,
     struct ip *pip,		/* IP packet to examine/patch */
@@ -640,7 +760,7 @@
 	return ((u_char *) q);
 }
 
-int
+static int
 AliasHandleUdpNbtNS(
     struct libalias *la,
     struct ip *pip,		/* IP packet to examine/patch */
Index: alias_util.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/alias_util.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/libalias/alias_util.c -L sys/netinet/libalias/alias_util.c -u -r1.1.1.1 -r1.2
--- sys/netinet/libalias/alias_util.c
+++ sys/netinet/libalias/alias_util.c
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_util.c,v 1.18 2005/06/27 07:36:02 glebius Exp $");
+__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_util.c,v 1.20 2006/12/15 12:50:06 piso Exp $");
 
 
 /*
@@ -45,6 +45,7 @@
 
 #ifdef _KERNEL
 #include <sys/param.h>
+#include <sys/proc.h>
 #else
 #include <sys/types.h>
 #include <stdio.h>
@@ -75,6 +76,7 @@
 {
 	int sum, oddbyte;
 
+	LIBALIAS_LOCK(la);
 	sum = 0;
 	while (nbytes > 1) {
 		sum += *ptr++;
@@ -88,6 +90,7 @@
 	}
 	sum = (sum >> 16) + (sum & 0xffff);
 	sum += (sum >> 16);
+	LIBALIAS_UNLOCK(la);
 	return (~sum);
 }
 
Index: alias_cuseeme.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/alias_cuseeme.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/libalias/alias_cuseeme.c -L sys/netinet/libalias/alias_cuseeme.c -u -r1.1.1.1 -r1.2
--- sys/netinet/libalias/alias_cuseeme.c
+++ sys/netinet/libalias/alias_cuseeme.c
@@ -27,11 +27,14 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_cuseeme.c,v 1.12 2005/05/05 21:55:17 glebius Exp $");
+__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_cuseeme.c,v 1.13 2006/09/26 23:26:53 piso Exp $");
 
 #ifdef _KERNEL
 #include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
 #else
+#include <errno.h>
 #include <sys/types.h>
 #include <stdio.h>
 #endif
@@ -44,8 +47,100 @@
 #ifdef _KERNEL
 #include <netinet/libalias/alias.h>
 #include <netinet/libalias/alias_local.h>
+#include <netinet/libalias/alias_mod.h>
 #else
 #include "alias_local.h"
+#include "alias_mod.h"
+#endif
+
+#define CUSEEME_PORT_NUMBER 7648
+
+static void
+AliasHandleCUSeeMeOut(struct libalias *la, struct ip *pip, 
+		      struct alias_link *lnk);
+
+static void
+AliasHandleCUSeeMeIn(struct libalias *la, struct ip *pip, 
+		     struct in_addr original_addr);
+
+static int 
+fingerprint(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+	if (ah->dport == NULL || ah->oaddr == NULL)
+		return (-1);
+	if (ntohs(*ah->dport) == CUSEEME_PORT_NUMBER)
+		return (0);
+	return (-1);
+}
+
+static int 
+protohandlerin(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+	
+	AliasHandleCUSeeMeIn(la, pip, *ah->oaddr);
+	return (0);
+}
+
+static int 
+protohandlerout(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+	
+	AliasHandleCUSeeMeOut(la, pip, ah->lnk);
+	return (0);
+}
+
+/* Kernel module definition. */
+struct proto_handler handlers[] = {
+	{ 
+	  .pri = 120, 
+	  .dir = OUT, 
+	  .proto = UDP, 
+	  .fingerprint = &fingerprint, 
+	  .protohandler = &protohandlerout
+	}, 
+	{
+	  .pri = 120, 
+	  .dir = IN, 
+	  .proto = UDP, 
+	  .fingerprint = &fingerprint, 
+	  .protohandler = &protohandlerin
+	}, 
+	{ EOH }
+};
+
+static int
+mod_handler(module_t mod, int type, void *data)
+{
+	int error;
+
+	switch (type) {
+	case MOD_LOAD:
+		error = 0;
+		LibAliasAttachHandlers(handlers);
+		break;
+	case MOD_UNLOAD:
+		error = 0;
+		LibAliasDetachHandlers(handlers);
+		break;
+	default:
+		error = EINVAL;
+	}
+	return (error);
+}
+
+#ifdef _KERNEL
+static 
+#endif
+moduledata_t 
+alias_mod = {
+       "alias_cuseeme", mod_handler, NULL
+};
+
+#ifdef	_KERNEL
+DECLARE_MODULE(alias_cuseeme, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);
+MODULE_VERSION(alias_cuseeme, 1);
+MODULE_DEPEND(alias_cuseeme, libalias, 1, 1, 1);
 #endif
 
 /* CU-SeeMe Data Header */
@@ -77,7 +172,7 @@
 					 * counts etc */
 };
 
-void
+static void
 AliasHandleCUSeeMeOut(struct libalias *la, struct ip *pip, struct alias_link *lnk)
 {
 	struct udphdr *ud = ip_next(pip);
@@ -100,7 +195,7 @@
 	}
 }
 
-void
+static void
 AliasHandleCUSeeMeIn(struct libalias *la, struct ip *pip, struct in_addr original_addr)
 {
 	struct in_addr alias_addr;
Index: libalias.3
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/libalias.3,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/libalias/libalias.3 -L sys/netinet/libalias/libalias.3 -u -r1.1.1.1 -r1.2
--- sys/netinet/libalias/libalias.3
+++ sys/netinet/libalias/libalias.3
@@ -23,9 +23,9 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.\" $FreeBSD: src/sys/netinet/libalias/libalias.3,v 1.53 2005/01/20 09:17:02 ru Exp $
+.\" $FreeBSD: src/sys/netinet/libalias/libalias.3,v 1.58 2006/10/11 07:11:56 ru Exp $
 .\"
-.Dd January 17, 2004
+.Dd October 1, 2006
 .Dt LIBALIAS 3
 .Os
 .Sh NAME
@@ -836,7 +836,7 @@
 .Ed
 .Pp
 .Ft int
-.Fn LibAliasCheckNewLink void
+.Fn LibAliasCheckNewLink "struct libalias *"
 .Bd -ragged -offset indent
 This function returns a non-zero value when a new aliasing link is created.
 In circumstances where incoming traffic is being sequentially sent to
@@ -893,6 +893,9 @@
 added support for RTSP/PNA.
 .An Ruslan Ermilov Aq ru at FreeBSD.org
 added support for PPTP and LSNAT as well as general hacking.
+.An Paolo Pisati Aq piso at FreeBSD.org
+made the library modular, moving support for all
+protocols (except for IP, TCP and UDP) to external modules.
 .Sh ACKNOWLEDGMENTS
 Listed below, in approximate chronological order, are individuals who
 have provided valuable comments and/or debugging assistance.
@@ -1011,8 +1014,445 @@
 a unique aliasing link can be established.
 In an alternate operating mode, the first choice of an aliasing port is also
 random and unrelated to the local port number.
-.Sh BUGS
-PPTP aliasing does not work when more than one internal client
-connects to the same external server at the same time, because
-PPTP requires a single TCP control connection to be established
-between any two IP addresses.
+.Sh MODULAR ARCHITECTURE (AND Xr ipfw 4 Sh SUPPORT)
+One of the latest improvements to
+.Nm
+was to make its support
+for new protocols independent from the rest of the library, giving it
+the ability to load/unload support for new protocols at run-time.
+To achieve this feature, all the code for protocol handling was moved
+to a series of modules outside of the main library.
+These modules are compiled from the same sources but work in
+different ways, depending on whether they are compiled to work inside a kernel
+or as part of the userland library.
+.Ss LIBALIAS MODULES IN KERNEL LAND
+When compiled for the kernel,
+.Nm
+modules are plain KLDs recognizable with the
+.Pa alias_
+prefix.
+.Pp
+To add support for a new protocol, load the corresponding module.
+For example:
+.Pp
+.Dl "kldload alias_ftp"
+.Pp
+When support for a protocol is no longer needed, its module can be unloaded:
+.Pp
+.Dl "kldunload alias_ftp"
+.Ss LIBALIAS MODULES IN USERLAND
+Due to the differences between kernel and userland (no KLD mechanism,
+many different address spaces, etc.), we had to change a bit how to
+handle module loading/tracking/unloading in userland.
+.Pp
+While compiled for a userland
+.Nm ,
+all the modules are plain libraries, residing in
+.Pa /usr/lib ,
+and recognizable with the
+.Pa libalias_
+prefix.
+.Pp
+There is a configuration file,
+.Pa /etc/libalias.conf ,
+with the following contents (by default):
+.Bd -literal -offset indent
+/usr/lib/libalias_cuseeme.so
+/usr/lib/libalias_ftp.so
+/usr/lib/libalias_irc.so
+/usr/lib/libalias_nbt.so
+/usr/lib/libalias_pptp.so
+/usr/lib/libalias_skinny.so
+/usr/lib/libalias_smedia.so
+.Ed
+.Pp
+This file contains the paths to the modules that
+.Nm
+will load.
+To load/unload a new module, just add its path to
+.Pa libalias.conf
+and call
+.Fn LibAliasRefreshModules
+from the program.
+In case the application provides a
+.Dv SIGHUP
+signal handler, add a call to
+.Fn LibAliasRefreshModules
+inside the handler, and everytime you want to refresh the loaded modules,
+send it the
+.Dv SIGHUP
+signal:
+.Pp
+.Dl "kill -HUP <process_pid>"
+.Ss MODULAR ARCHITECURE: HOW IT WORKS
+The modular architecture of
+.Nm
+works similar whether it is running inside the
+kernel or in userland.
+From
+.Pa alias_mod.c :
+.Bd -literal
+/* Protocol and userland module handlers chains. */
+LIST_HEAD(handler_chain, proto_handler) handler_chain ...
+\&...
+SLIST_HEAD(dll_chain, dll) dll_chain ...
+.Ed
+.Pp
+.Va handler_chain
+keep tracks of all the protocol handlers loaded, while
+.Va ddl_chain
+takes care of userland modules loaded.
+.Pp
+.Va handler_chain
+is composed of
+.Vt "struct proto_handler"
+entries:
+.Bd -literal
+struct proto_handler {
+	u_int pri;
+	int16_t dir;
+	uint8_t proto;
+	int (*fingerprint)(struct libalias *la,
+		 struct ip *pip, struct alias_data *ah);
+	int (*protohandler)(struct libalias *la,
+		 struct ip *pip, struct alias_data *ah);
+	LIST_ENTRY(proto_handler) entries;
+};
+.Ed
+.Pp
+where:
+.Bl -inset
+.It Va pri
+is the priority assigned to a protocol handler, lower
+is better.
+.It Va dir
+is the direction of packets: ingoing or outgoing.
+.It Va proto
+says at which protocol this packet belongs: IP, TCP or UDP.
+.It Va fingerprint
+points to the fingerprint function while protohandler points
+to the protocol handler function.
+.El
+.Pp
+The
+.Va fingerprint
+function has the double of scope of checking if the
+incoming packet is found and if it belongs to any categories that this
+module can handle.
+.Pp
+The
+.Va protohandler
+function actually manipulates
+the packet to make
+.Nm
+correctly NAT it.
+.Pp
+When a packet enters
+.Nm ,
+if it meets a module hook,
+.Va handler_chain
+is searched to see if there is an handler that matches
+this type of a packet (it checks protocol and direction of packet), then if
+more than one handler is found, it starts with the module with
+the lowest priority number: it calls the
+.Va fingerprint
+function and interprets the result.
+.Pp
+If the result value is equal to 0 then it calls the protocol handler
+of this handler and returns.
+Otherwise, it proceeds to the next eligible module until the
+.Va handler_chain
+is exhausted.
+.Pp
+Inside
+.Nm ,
+the module hook looks like this:
+.Bd -literal -offset indent
+struct alias_data ad = {
+	lnk,
+	&original_address,
+	&alias_address,
+	&alias_port,
+	&ud->uh_sport,          /* original source port */
+	&ud->uh_dport,		/* original dest port */
+	256                     /* maxpacketsize */
+};
+
+\&...
+
+/* walk out chain */
+err = find_handler(IN, UDP, la, pip, &ad);
+.Ed
+.Pp
+All data useful to a module are gathered together in an
+.Vt alias_data
+structure, then
+.Fn find_handler
+is called.
+The
+.Fn find_handler
+function is responsible for walking out the handler
+chain, it receives as input parameters:
+.Bl -tag -width indent
+.It Fa IN
+direction
+.It Fa UDP
+working protocol
+.It Fa la
+pointer to this instance of libalias
+.It Fa pip
+pointer to a
+.Vt "struct ip"
+.It Fa ad
+pointer to
+.Vt "struct alias_data"
+(see above)
+.El
+.Pp
+In this case,
+.Fn find_handler
+will search only for modules registered for
+supporting INcoming UDP packets.
+.Pp
+As was mentioned earlier,
+.Nm
+in userland is a bit different, cause
+care has to be taken of module handling too (avoiding duplicate load of
+module, avoiding module with same name, etc.) so
+.Va dll_chain
+was introduced.
+.Pp
+.Va dll_chain
+contains a list of all userland
+.Nm
+modules loaded.
+.Pp
+When an application calls
+.Fn LibAliasRefreshModules ,
+.Nm
+first unloads all the loaded modules, then reloads all the modules listed in
+.Pa /etc/libalias.conf :
+for every module loaded, a new entry to
+.Va dll_chain
+is added.
+.Pp
+.Va dll_chain
+is composed of
+.Vt "struct dll"
+entries:
+.Bd -literal
+struct dll {
+	/* name of module */
+	char            name[DLL_LEN];
+	/*
+	 * ptr to shared obj obtained through
+	 * dlopen() - use this ptr to get access
+	 * to any symbols from a loaded module
+	 * via dlsym()
+	 */
+	void            *handle;
+	struct dll      *next;
+};
+.Ed
+.Bl -inset
+.It Va name
+is the name of the module
+.It Va handle
+is a pointer to the module obtained through
+.Xr dlopen 3
+.El
+Whenever a module is loaded in userland, an entry is added to
+.Va dll_chain ,
+then every protocol handler present in that module
+is resolved and registered in
+.Va handler_chain .
+.Ss HOW TO WRITE A MODULE FOR LIBALIAS
+There is a module (called
+.Pa alias_dummy.[ch] )
+in
+.Nm
+that can be used as a skeleton for future work, here we analyse some parts of that
+module.
+From
+.Pa alias_dummy.c :
+.Bd -literal
+struct proto_handler handlers [] = {{666, IN|OUT, UDP|TCP,
+				    &fingerprint, &protohandler}};
+.Ed
+.Pp
+The variable
+.Va handlers
+is the
+.Dq "most important thing"
+in a module
+cause it describes the handlers present and lets the outside world use
+it in an opaque way.
+.Pp
+It must ALWAYS be present in every module, and it MUST retain
+the name
+.Va handlers ,
+otherwise attempting to load a module in userland will fail and
+complain about missing symbols: for more information about module
+load/unload, please refer to
+.Fn LibAliasRefreshModules ,
+.Fn LibAliasLoadModule
+and
+.Fn LibAliasUnloadModule
+in
+.Pa alias.c .
+.Pp
+.Va handlers
+contains all the
+.Vt proto_handler
+structures present in a module.
+.Bd -literal
+static int
+mod_handler(module_t mod, int type, void *data)
+{
+	int error;
+
+	switch (type) {
+	case MOD_LOAD:
+		error = 0;
+		attach_handlers(handlers);
+		break;
+	case MOD_UNLOAD:
+		error = 0;
+		detach_handlers(handlers;
+		break;
+	default:
+		error = EINVAL;
+	}
+	return (error);
+}
+.Ed
+When running as KLD,
+.Fn mod_handler
+register/deregister the module using
+.Fn attach_handlers
+and
+.Fn detach_handlers ,
+respectively.
+.Pp
+Every module must contain at least 2 functions: one fingerprint
+function and a protocol handler function.
+.Bd -literal
+#ifdef _KERNEL
+static
+#endif
+int
+fingerprint(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+\&...
+}
+
+#ifdef _KERNEL
+static
+#endif
+int
+protohandler(struct libalias *la, struct ip *pip,
+             struct alias_data *ah)
+{
+
+\&...
+}
+.Ed
+and they must accept exactly these input parameters.
+.Ss PATCHING AN APPLICATION FOR USERLAND LIBALIAS MODULES
+To add module support into an application that uses
+.Nm ,
+the following simple steps can be followed.
+.Bl -enum
+.It
+Find the main file of an application
+(let us call it
+.Pa main.c ) .
+.It
+Add this to the header section of
+.Pa main.c ,
+if not already present:
+.Pp
+.Dl "#include <signal.h>"
+.Pp
+and this just after the header section:
+.Pp
+.Dl "static void signal_handler(int);"
+.It
+Add the following line to the init function of an application or,
+if it does not have any init function, put it in
+.Fn main :
+.Pp
+.Dl "signal(SIGHUP, signal_handler);"
+.Pp
+and place the
+.Fn signal_handler
+function somewhere in
+.Pa main.c :
+.Bd -literal -offset indent
+static void
+signal_handler(int sig)
+{
+
+	LibAliasRefreshModules();
+}
+.Ed
+.Pp
+Otherwise, if an application already traps the
+.Dv SIGHUP
+signal, just add a call to
+.Fn LibAliasRefreshModules
+in the signal handler function.
+.El
+For example, to patch
+.Xr natd 8
+to use
+.Nm
+modules, just add the following line to
+.Fn RefreshAddr "int sig __unused" :
+.Pp
+.Dl "LibAliasRefreshModules()"
+.Pp
+recompile and you are done.
+.Ss LOGGING SUPPORT IN KERNEL LAND
+When working as KLD,
+.Nm
+now has log support that
+happens on a buffer allocated inside
+.Vt "struct libalias"
+(from
+.Pa alias_local.h ) :
+.Bd -literal
+struct libalias {
+       ...
+
+	/* log descriptor        */
+#ifdef	KERNEL_LOG
+	char           *logDesc;        /*
+					 * ptr to an auto-malloced
+					 * memory buffer when libalias
+					 * works as kld
+					 */
+#else
+	FILE           *logDesc;	/*
+					 * ptr to /var/log/alias.log
+					 * when libalias runs as a
+					 * userland lib
+					 */
+#endif
+
+	...
+}
+.Ed
+so all applications using
+.Nm
+will be able to handle their
+own logs, if they want, accessing
+.Va logDesc .
+Moreover, every change to a log buffer is automatically added to
+.Xr syslog 3
+with the
+.Dv LOG_SECURITY
+facility and the
+.Dv LOG_INFO
+level.
Index: alias_old.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/alias_old.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/libalias/alias_old.c -L sys/netinet/libalias/alias_old.c -u -r1.1.1.1 -r1.2
--- sys/netinet/libalias/alias_old.c
+++ sys/netinet/libalias/alias_old.c
@@ -25,10 +25,11 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_old.c,v 1.7 2005/05/05 19:27:32 glebius Exp $");
+__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_old.c,v 1.8 2006/09/26 23:26:53 piso Exp $");
 
 #ifdef _KERNEL
 #include <sys/param.h>
+#include <sys/proc.h>
 #else
 #include <sys/types.h>
 #include <stdlib.h>
--- /dev/null
+++ sys/netinet/libalias/alias_dummy.c
@@ -0,0 +1,153 @@
+/*-
+ * Copyright (c) 2005 Paolo Pisati <piso at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_dummy.c,v 1.1 2006/09/26 23:26:53 piso Exp $");
+
+/* 
+ * Alias_dummy is just an empty skeleton used to demostrate how to write
+ * a module for libalias, that will run unalterated in userland or in
+ * kernel land.
+ */
+
+#ifdef _KERNEL
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#else
+#include <errno.h>
+#include <sys/types.h>
+#include <stdio.h>
+#endif
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+
+#ifdef _KERNEL
+#include <netinet/libalias/alias_local.h>
+#include <netinet/libalias/alias_mod.h>
+#else
+#include "alias_local.h"
+#include "alias_mod.h"
+#endif
+
+static void
+AliasHandleDummy(struct libalias *la, struct ip *ip, struct alias_data *ah);
+
+static int 
+fingerprint(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+	/* 
+	 * Check here all the data that will be used later, if any field 
+	 * is empy/NULL, return a -1 value.
+	 */
+	if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL || 
+		ah->maxpktsize == 0)
+		return (-1);
+	/* 
+	 * Fingerprint the incoming packet, if it matches any conditions 
+	 * return an OK value.
+	 */
+	if (ntohs(*ah->dport) == 123
+	    || ntohs(*ah->sport) == 456)
+		return (0); /* I know how to handle it. */
+	return (-1); /* I don't recognize this packet. */
+}
+
+/* 
+ * Wrap in this general purpose function, the real function used to alias the 
+ * packets.
+ */
+
+static int 
+protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+	
+	AliasHandleDummy(la, pip, ah);
+	return (0);
+}
+
+/* 
+ * NOTA BENE: the next variable MUST NOT be renamed in any case if you want 
+ * your module to work in userland, cause it's used to find and use all 
+ * the protocol handlers present in every module.
+ * So WATCH OUT, your module needs this variables and it needs it with 
+ * ITS EXACT NAME: handlers.
+ */
+
+struct proto_handler handlers [] = {
+	{ 
+	  .pri = 666, 
+	  .dir = IN|OUT, 
+	  .proto = UDP|TCP, 
+	  .fingerprint = &fingerprint, 
+	  .protohandler = &protohandler
+	}, 
+	{ EOH }
+};
+
+static int
+mod_handler(module_t mod, int type, void *data)
+{
+	int error;
+
+	switch (type) {	  
+	case MOD_LOAD:
+		error = 0;
+		LibAliasAttachHandlers(handlers);
+		break;
+	case MOD_UNLOAD:
+		error = 0;
+		LibAliasDetachHandlers(handlers);
+		break;
+	default:
+		error = EINVAL;
+	}
+	return (error);
+}
+
+#ifdef _KERNEL
+static
+#endif
+moduledata_t alias_mod = {
+       "alias_dummy", mod_handler, NULL
+};
+
+#ifdef	_KERNEL
+DECLARE_MODULE(alias_dummy, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);
+MODULE_VERSION(alias_dummy, 1);
+MODULE_DEPEND(alias_dummy, libalias, 1, 1, 1);
+#endif
+
+static void
+AliasHandleDummy(struct libalias *la, struct ip *ip, struct alias_data *ah)
+{
+	; /* Dummy. */
+}
+
Index: alias_skinny.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/alias_skinny.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/libalias/alias_skinny.c -L sys/netinet/libalias/alias_skinny.c -u -r1.1.1.1 -r1.2
--- sys/netinet/libalias/alias_skinny.c
+++ sys/netinet/libalias/alias_skinny.c
@@ -27,31 +27,97 @@
  *
  * Author: Joe Marcus Clarke <marcus at FreeBSD.org>
  *
- * $FreeBSD: src/sys/netinet/libalias/alias_skinny.c,v 1.12 2005/06/27 07:36:02 glebius Exp $
+ * $FreeBSD: src/sys/netinet/libalias/alias_skinny.c,v 1.14 2007/04/07 09:52:36 piso Exp $
  */
 
 #ifdef _KERNEL
 #include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
 #else
-#include <sys/types.h>
-#include <sys/socket.h>
+#include <errno.h>
 #include <stdio.h>
-#include <string.h>
 #include <unistd.h>
-#include <arpa/inet.h>
 #endif
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/tcp.h>
-#include <netinet/udp.h>
 
 #ifdef _KERNEL
-#include <netinet/libalias/alias.h>
 #include <netinet/libalias/alias_local.h>
+#include <netinet/libalias/alias_mod.h>
 #else
 #include "alias_local.h"
+#include "alias_mod.h"
+#endif
+
+static void
+AliasHandleSkinny(struct libalias *, struct ip *, struct alias_link *);
+
+static int 
+fingerprint(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+	if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL)
+		return (-1);
+	if (la->skinnyPort != 0 && (ntohs(*ah->sport) == la->skinnyPort ||
+				    ntohs(*ah->dport) == la->skinnyPort))
+		return (0);
+	return (-1);
+}
+
+static int 
+protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+	
+        AliasHandleSkinny(la, pip, ah->lnk);
+	return (0);
+}
+
+struct proto_handler handlers[] = {
+	{ 
+	  .pri = 110, 
+	  .dir = IN|OUT, 
+	  .proto = TCP, 
+	  .fingerprint = &fingerprint, 
+	  .protohandler = &protohandler
+	}, 
+	{ EOH }
+};
+
+static int
+mod_handler(module_t mod, int type, void *data)
+{
+	int error;
+
+	switch (type) {
+	case MOD_LOAD:
+		error = 0;
+		LibAliasAttachHandlers(handlers);
+		break;
+	case MOD_UNLOAD:
+		error = 0;
+		LibAliasDetachHandlers(handlers);
+		break;
+	default:
+		error = EINVAL;
+	}
+	return (error);
+}
+
+#ifdef _KERNEL
+static 
+#endif
+moduledata_t alias_mod = {
+       "alias_skinny", mod_handler, NULL
+};
+
+#ifdef	_KERNEL
+DECLARE_MODULE(alias_skinny, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);
+MODULE_VERSION(alias_skinny, 1);
+MODULE_DEPEND(alias_skinny, libalias, 1, 1, 1);
 #endif
 
 /*
@@ -233,7 +299,7 @@
 	return (0);
 }
 
-void
+static void
 AliasHandleSkinny(struct libalias *la, struct ip *pip, struct alias_link *lnk)
 {
 	size_t hlen, tlen, dlen;
@@ -243,6 +309,7 @@
 	size_t orig_len, skinny_hdr_len = sizeof(struct skinny_header);
 	ConvDirection direction;
 
+	lip = -1;
 	tc = (struct tcphdr *)ip_next(pip);
 	hlen = (pip->ip_hl + tc->th_off) << 2;
 	tlen = ntohs(pip->ip_len);
@@ -352,6 +419,16 @@
 #endif
 				return;
 			}
+			if (lip == -1) {
+#ifdef LIBALIAS_DEBUG
+				fprintf(stderr,
+				    "PacketAlias/Skinny: received a"
+				    " packet,StartMediaTx Message before"
+				    " packet,OpnRcvChnAckMsg\n"
+#endif
+				return;
+			}
+
 #ifdef LIBALIAS_DEBUG
 			fprintf(stderr,
 			    "PacketAlias/Skinny: Received start media trans msg\n");
Index: alias_irc.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/alias_irc.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/libalias/alias_irc.c -L sys/netinet/libalias/alias_irc.c -u -r1.1.1.1 -r1.2
--- sys/netinet/libalias/alias_irc.c
+++ sys/netinet/libalias/alias_irc.c
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_irc.c,v 1.21 2005/06/27 07:36:02 glebius Exp $");
+__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_irc.c,v 1.23 2007/04/04 03:16:59 kan Exp $");
 
 /* Alias_irc.c intercepts packages contain IRC CTCP commands, and
 	changes DCC commands to export a port on the aliasing host instead
@@ -50,12 +50,14 @@
 /* Includes */
 #ifdef _KERNEL
 #include <sys/param.h>
-#include <sys/libkern.h>
 #include <sys/ctype.h>
 #include <sys/limits.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
 #else
+#include <errno.h>
 #include <sys/types.h>
-#include <ctype.h>
 #include <stdio.h>
 #include <string.h>
 #include <limits.h>
@@ -69,15 +71,89 @@
 #ifdef _KERNEL
 #include <netinet/libalias/alias.h>
 #include <netinet/libalias/alias_local.h>
+#include <netinet/libalias/alias_mod.h>
 #else
 #include "alias_local.h"
+#include "alias_mod.h"
 #endif
 
+#define IRC_CONTROL_PORT_NUMBER_1 6667
+#define IRC_CONTROL_PORT_NUMBER_2 6668
+
 /* Local defines */
 #define DBprintf(a)
 
+static void
+AliasHandleIrcOut(struct libalias *, struct ip *, struct alias_link *,	
+		  int maxpacketsize);
+
+static int 
+fingerprint(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+	if (ah->dport == NULL || ah->dport == NULL || ah->lnk == NULL || 
+	    ah->maxpktsize == 0)
+		return (-1);
+	if (ntohs(*ah->dport) == IRC_CONTROL_PORT_NUMBER_1
+	    || ntohs(*ah->dport) == IRC_CONTROL_PORT_NUMBER_2)
+		return (0);
+	return (-1);
+}
+
+static int 
+protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+	
+	AliasHandleIrcOut(la, pip, ah->lnk, ah->maxpktsize);
+	return (0);
+}
+
+struct proto_handler handlers[] = {
+	{ 
+	  .pri = 90, 
+	  .dir = OUT, 
+	  .proto = TCP, 
+	  .fingerprint = &fingerprint, 
+	  .protohandler = &protohandler
+	}, 
+	{ EOH }
+};
+
+static int
+mod_handler(module_t mod, int type, void *data)
+{
+	int error;
+
+	switch (type) {
+	case MOD_LOAD:
+		error = 0;
+		LibAliasAttachHandlers(handlers);
+		break;
+	case MOD_UNLOAD:
+		error = 0;
+		LibAliasDetachHandlers(handlers);
+		break;
+	default:
+		error = EINVAL;
+	}
+	return (error);
+}
+
+#ifdef _KERNEL
+static 
+#endif
+moduledata_t alias_mod = {
+       "alias_irc", mod_handler, NULL
+};
+
+/* Kernel module definition. */
+#ifdef	_KERNEL
+DECLARE_MODULE(alias_irc, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);
+MODULE_VERSION(alias_irc, 1);
+MODULE_DEPEND(alias_irc, libalias, 1, 1, 1);
+#endif
 
-void
+static void
 AliasHandleIrcOut(struct libalias *la,
     struct ip *pip,		/* IP packet to examine */
     struct alias_link *lnk,	/* Which link are we on? */
--- /dev/null
+++ sys/netinet/libalias/alias_mod.c
@@ -0,0 +1,284 @@
+/*-
+ * Copyright (c) 2005 Paolo Pisati <piso at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_mod.c,v 1.3 2006/12/15 12:50:06 piso Exp $");
+
+#ifdef _KERNEL
+#include <sys/libkern.h>
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
+#else
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <errno.h>
+#endif
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+
+#ifdef _KERNEL
+#include <netinet/libalias/alias_local.h>
+#include <netinet/libalias/alias_mod.h>
+#else
+#include "alias_local.h"
+#include "alias_mod.h"
+#endif
+
+/* Protocol and userland module handlers chains. */
+LIST_HEAD(handler_chain, proto_handler) handler_chain = LIST_HEAD_INITIALIZER(foo);
+#ifdef _KERNEL
+struct rwlock   handler_rw;
+#endif
+SLIST_HEAD(dll_chain, dll) dll_chain = SLIST_HEAD_INITIALIZER(foo); 
+
+#ifdef _KERNEL
+
+#define	LIBALIAS_RWLOCK_INIT() \
+        rw_init(&handler_rw, "Libalias_modules_rwlock")
+#define	LIBALIAS_RWLOCK_DESTROY()	rw_destroy(&handler_rw)
+#define	LIBALIAS_WLOCK_ASSERT() \
+        rw_assert(&handler_rw, RA_WLOCKED)
+
+static __inline void
+LIBALIAS_RLOCK(void)
+{
+	rw_rlock(&handler_rw);
+}
+
+static __inline void
+LIBALIAS_RUNLOCK(void)
+{
+	rw_runlock(&handler_rw);
+}
+
+static __inline void
+LIBALIAS_WLOCK(void)
+{
+	rw_wlock(&handler_rw);
+}
+
+static __inline void
+LIBALIAS_WUNLOCK(void)
+{
+	rw_wunlock(&handler_rw);
+}
+
+static void
+_handler_chain_init(void)
+{
+
+	if (!rw_initialized(&handler_rw))
+		LIBALIAS_RWLOCK_INIT();
+}
+
+static void
+_handler_chain_destroy(void)
+{
+
+	if (rw_initialized(&handler_rw))
+		LIBALIAS_RWLOCK_DESTROY();
+}
+
+#else
+#define	LIBALIAS_RWLOCK_INIT() ;
+#define	LIBALIAS_RWLOCK_DESTROY()	;
+#define	LIBALIAS_WLOCK_ASSERT()	;
+#define	LIBALIAS_RLOCK() ;
+#define	LIBALIAS_RUNLOCK() ;
+#define	LIBALIAS_WLOCK() ;
+#define	LIBALIAS_WUNLOCK() ;
+#define _handler_chain_init() ;
+#define _handler_chain_destroy() ;
+#endif 
+
+void
+handler_chain_init(void)
+{
+	_handler_chain_init();
+}
+
+void
+handler_chain_destroy(void)
+{
+	_handler_chain_destroy();
+}
+
+static int
+_attach_handler(struct proto_handler *p)
+{
+	struct proto_handler *b = NULL;
+
+	LIBALIAS_WLOCK_ASSERT();	
+	LIST_FOREACH(b, &handler_chain, entries) {
+		if ((b->pri == p->pri) && 
+		    (b->dir == p->dir) &&
+		    (b->proto == p->proto))
+			return (EEXIST); /* Priority conflict. */
+		if (b->pri > p->pri) {
+			LIST_INSERT_BEFORE(b, p, entries);
+			return (0);
+		}
+	}
+	/* End of list or found right position, inserts here. */
+	if (b)
+		LIST_INSERT_AFTER(b, p, entries);
+	else
+		LIST_INSERT_HEAD(&handler_chain, p, entries);
+	return (0);
+}
+
+static int
+_detach_handler(struct proto_handler *p)
+{
+	struct proto_handler *b, *b_tmp;;
+
+	LIBALIAS_WLOCK_ASSERT();	
+	LIST_FOREACH_SAFE(b, &handler_chain, entries, b_tmp) {
+		if (b == p) {
+			LIST_REMOVE(b, entries);
+			return (0);
+		}
+	}
+	return (ENOENT); /* Handler not found. */
+}
+
+int
+LibAliasAttachHandlers(struct proto_handler *_p)
+{
+	int i, error = -1;
+
+	LIBALIAS_WLOCK();
+	for (i=0; 1; i++) {
+		if (*((int *)&_p[i]) == EOH) 
+			break;
+		error = _attach_handler(&_p[i]);
+		if (error != 0) 
+			break;
+	}
+	LIBALIAS_WUNLOCK();
+	return (error);
+}
+
+int
+LibAliasDetachHandlers(struct proto_handler *_p)
+{
+	int i, error = -1;
+
+	LIBALIAS_WLOCK();
+	for (i=0; 1; i++) {
+		if (*((int *)&_p[i]) == EOH) 
+			break;
+		error = _detach_handler(&_p[i]);
+		if (error != 0) 
+			break;
+	}
+	LIBALIAS_WUNLOCK();
+	return (error);
+}
+
+int
+detach_handler(struct proto_handler *_p)
+{
+	int error = -1;
+
+	LIBALIAS_WLOCK();
+	error = _detach_handler(_p);
+	LIBALIAS_WUNLOCK();
+	return (error);
+}
+
+int
+find_handler(int8_t dir, int8_t proto, struct libalias *la, struct ip *pip, 
+	     struct alias_data *ad)
+{
+	struct proto_handler *p;
+	int error = ENOENT;
+
+	LIBALIAS_RLOCK();
+	
+	LIST_FOREACH(p, &handler_chain, entries) {
+		if ((p->dir & dir) && (p->proto & proto))
+			if (p->fingerprint(la, pip, ad) == 0) {
+				error = p->protohandler(la, pip, ad);
+				break;
+			}
+	}
+	LIBALIAS_RUNLOCK();
+	return (error);	
+}
+
+struct proto_handler *
+first_handler(void)
+{
+	
+	return (LIST_FIRST(&handler_chain));	
+}
+
+/* Dll manipulation code - this code is not thread safe... */
+
+int
+attach_dll(struct dll *p)
+{
+	struct dll *b;
+
+	SLIST_FOREACH(b, &dll_chain, next) {
+		if (!strncmp(b->name, p->name, DLL_LEN))
+			return (EEXIST); /* Dll name conflict. */
+	}
+	SLIST_INSERT_HEAD(&dll_chain, p, next);
+	return (0);
+}
+
+void *
+detach_dll(char *p)
+{
+	struct dll *b = NULL, *b_tmp;
+	void *error = NULL;
+
+	SLIST_FOREACH_SAFE(b, &dll_chain, next, b_tmp)
+		if (!strncmp(b->name, p, DLL_LEN)) {
+			SLIST_REMOVE(&dll_chain, b, dll, next); 
+			error = b;
+			break;
+		}
+	return (error);
+}
+
+struct dll *
+walk_dll_chain(void)
+{
+	struct dll *t;
+
+	t = SLIST_FIRST(&dll_chain);
+	if (t == NULL)
+		return (NULL);
+	SLIST_REMOVE_HEAD(&dll_chain, next);
+	return (t);
+}
Index: alias_smedia.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/alias_smedia.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/libalias/alias_smedia.c -L sys/netinet/libalias/alias_smedia.c -u -r1.1.1.1 -r1.2
--- sys/netinet/libalias/alias_smedia.c
+++ sys/netinet/libalias/alias_smedia.c
@@ -64,7 +64,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_smedia.c,v 1.15 2005/06/27 07:36:02 glebius Exp $");
+__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_smedia.c,v 1.17 2006/11/07 21:06:48 marcus Exp $");
 
 /*
    Alias_smedia.c is meant to contain the aliasing code for streaming media
@@ -100,8 +100,11 @@
 
 #ifdef _KERNEL
 #include <sys/param.h>
-#include <sys/libkern.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
 #else
+#include <errno.h>
 #include <sys/types.h>
 #include <stdio.h>
 #include <string.h>
@@ -111,13 +114,94 @@
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/tcp.h>
-#include <netinet/udp.h>
 
 #ifdef _KERNEL
 #include <netinet/libalias/alias.h>
 #include <netinet/libalias/alias_local.h>
+#include <netinet/libalias/alias_mod.h>
 #else
 #include "alias_local.h"
+#include "alias_mod.h"
+#endif
+
+#define RTSP_CONTROL_PORT_NUMBER_1 554
+#define RTSP_CONTROL_PORT_NUMBER_2 7070
+#define TFTP_PORT_NUMBER 69
+
+static void
+AliasHandleRtspOut(struct libalias *, struct ip *, struct alias_link *,	
+		  int maxpacketsize);
+static int 
+fingerprint(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+	if (ah->dport != NULL && ah->aport != NULL && ah->sport != NULL &&
+            ntohs(*ah->dport) == TFTP_PORT_NUMBER)
+		return (0);
+	if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL || 
+	    ah->maxpktsize == 0)
+		return (-1);
+	if (ntohs(*ah->dport) == RTSP_CONTROL_PORT_NUMBER_1
+	    || ntohs(*ah->sport) == RTSP_CONTROL_PORT_NUMBER_1
+	    || ntohs(*ah->dport) == RTSP_CONTROL_PORT_NUMBER_2
+	    || ntohs(*ah->sport) == RTSP_CONTROL_PORT_NUMBER_2)
+		return (0);
+	return (-1);
+}
+
+static int 
+protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+	
+	if (ntohs(*ah->dport) == TFTP_PORT_NUMBER)
+		FindRtspOut(la, pip->ip_src, pip->ip_dst,
+ 			    *ah->sport, *ah->aport, IPPROTO_UDP);
+	else AliasHandleRtspOut(la, pip, ah->lnk, ah->maxpktsize);	
+	return (0);
+}
+
+struct proto_handler handlers[] = {
+	{ 
+	  .pri = 100, 
+	  .dir = OUT, 
+	  .proto = TCP|UDP,
+	  .fingerprint = &fingerprint, 
+	  .protohandler = &protohandler
+	}, 
+	{ EOH }
+};
+
+static int
+mod_handler(module_t mod, int type, void *data)
+{
+	int error;
+
+	switch (type) {
+	case MOD_LOAD:
+		error = 0;
+		LibAliasAttachHandlers(handlers);
+		break;
+	case MOD_UNLOAD:
+		error = 0;
+		LibAliasDetachHandlers(handlers);
+		break;
+	default:
+		error = EINVAL;
+	}
+	return (error);
+}
+
+#ifdef _KERNEL
+static 
+#endif
+moduledata_t alias_mod = {
+       "alias_smedia", mod_handler, NULL
+};
+
+#ifdef	_KERNEL
+DECLARE_MODULE(alias_smedia, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);
+MODULE_VERSION(alias_smedia, 1);
+MODULE_DEPEND(alias_smedia, libalias, 1, 1, 1);
 #endif
 
 #define RTSP_CONTROL_PORT_NUMBER_1 554
@@ -392,7 +476,7 @@
 	return (0);
 }
 
-void
+static void
 AliasHandleRtspOut(struct libalias *la, struct ip *pip, struct alias_link *lnk, int maxpacketsize)
 {
 	int hlen, tlen, dlen;
Index: alias_local.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/alias_local.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/libalias/alias_local.h -L sys/netinet/libalias/alias_local.h -u -r1.1.1.1 -r1.2
--- sys/netinet/libalias/alias_local.h
+++ sys/netinet/libalias/alias_local.h
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/netinet/libalias/alias_local.h,v 1.32 2005/06/27 07:36:02 glebius Exp $
+ * $FreeBSD: src/sys/netinet/libalias/alias_local.h,v 1.34 2006/12/15 12:50:06 piso Exp $
  */
 
 /*
@@ -46,18 +46,16 @@
 #ifndef _ALIAS_LOCAL_H_
 #define	_ALIAS_LOCAL_H_
 
-#include <sys/queue.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
 
-/* Use kernel allocator. */
-#if defined(_KERNEL) && defined(_SYS_MALLOC_H_)
-MALLOC_DECLARE(M_ALIAS);
-#define	malloc(x)	malloc(x, M_ALIAS, M_NOWAIT|M_ZERO)
-#define	calloc(x, n)	malloc(x*n)
-#define	free(x)		free(x, M_ALIAS)
-#endif
+#ifdef _KERNEL
+#include <sys/malloc.h>
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
 
 /* XXX: LibAliasSetTarget() uses this constant. */
-#ifdef _KERNEL
 #define	INADDR_NONE	0xffffffff
 #endif
 
@@ -116,10 +114,14 @@
 
 	int		deleteAllLinks;	/* If equal to zero, DeleteLink()  */
 	/* will not remove permanent links */
-#ifndef	NO_LOGGING
-	FILE           *monitorFile;	/* File descriptor for link        */
+	
+	/* log descriptor        */ 
+#ifdef  _KERNEL
+	char           *logDesc;        
+#else 
+	FILE           *logDesc;	
 #endif
-	/* statistics monitoring file      */
+	/* statistics monitoring */
 
 	int		newDefaultLink;	/* Indicates if a new aliasing     */
 	/* link has been created after a   */
@@ -147,11 +149,31 @@
 
 	struct in_addr	true_addr;	/* in network byte order. */
 	u_short		true_port;	/* in host byte order. */
-
+#ifdef  _KERNEL
+	/* 
+	 * avoid races in libalias: every public function has to use it.
+	 */
+	struct mtx mutex;
+#endif
 };
 
 /* Macros */
 
+#ifdef _KERNEL
+#define LIBALIAS_LOCK_INIT(l) \
+        mtx_init(&l->mutex, "per-instance libalias mutex", NULL, MTX_DEF)
+#define LIBALIAS_LOCK_ASSERT(l) mtx_assert(&l->mutex, MA_OWNED)
+#define LIBALIAS_LOCK(l) mtx_lock(&l->mutex)
+#define LIBALIAS_UNLOCK(l) mtx_unlock(&l->mutex)
+#define LIBALIAS_LOCK_DESTROY(l)	mtx_destroy(&l->mutex)
+#else
+#define LIBALIAS_LOCK_INIT(l)
+#define LIBALIAS_LOCK_ASSERT(l)
+#define LIBALIAS_LOCK(l)
+#define LIBALIAS_UNLOCK(l)
+#define LIBALIAS_LOCK_DESTROY(l)
+#endif
+
 /*
  * The following macro is used to update an
  * internet checksum.  "delta" is a 32-bit
@@ -296,43 +318,6 @@
 /* Tcp specfic routines */
 /* lint -save -library Suppress flexelint warnings */
 
-/* FTP routines */
-void
-AliasHandleFtpOut(struct libalias *la, struct ip *_pip, struct alias_link *_lnk,
-    int _maxpacketsize);
-
-/* IRC routines */
-void
-AliasHandleIrcOut(struct libalias *la, struct ip *_pip, struct alias_link *_lnk,
-    int _maxsize);
-
-/* RTSP routines */
-void
-AliasHandleRtspOut(struct libalias *la, struct ip *_pip, struct alias_link *_lnk,
-    int _maxpacketsize);
-
-/* PPTP routines */
-void		AliasHandlePptpOut(struct libalias *la, struct ip *_pip, struct alias_link *_lnk);
-void		AliasHandlePptpIn(struct libalias *la, struct ip *_pip, struct alias_link *_lnk);
-int		AliasHandlePptpGreOut(struct libalias *la, struct ip *_pip);
-int		AliasHandlePptpGreIn(struct libalias *la, struct ip *_pip);
-
-/* NetBIOS routines */
-int
-AliasHandleUdpNbt(struct libalias *la, struct ip *_pip, struct alias_link *_lnk,
-    struct in_addr *_alias_address, u_short _alias_port);
-int
-AliasHandleUdpNbtNS(struct libalias *la, struct ip *_pip, struct alias_link *_lnk,
-    struct in_addr *_alias_address, u_short * _alias_port,
-    struct in_addr *_original_address, u_short * _original_port);
-
-/* CUSeeMe routines */
-void		AliasHandleCUSeeMeOut(struct libalias *la, struct ip *_pip, struct alias_link *_lnk);
-void		AliasHandleCUSeeMeIn(struct libalias *la, struct ip *_pip, struct in_addr _original_addr);
-
-/* Skinny routines */
-void		AliasHandleSkinny(struct libalias *la, struct ip *_pip, struct alias_link *_lnk);
-
 /* Transparent proxy routines */
 int
 ProxyCheck(struct libalias *la, struct ip *_pip, struct in_addr *_proxy_server_addr,
@@ -373,6 +358,4 @@
 }
 #endif
 
-/*lint -restore */
-
 #endif				/* !_ALIAS_LOCAL_H_ */
Index: alias_pptp.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/alias_pptp.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/libalias/alias_pptp.c -L sys/netinet/libalias/alias_pptp.c -u -r1.1.1.1 -r1.2
--- sys/netinet/libalias/alias_pptp.c
+++ sys/netinet/libalias/alias_pptp.c
@@ -37,7 +37,170 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_pptp.c,v 1.14 2005/05/05 21:55:17 glebius Exp $");
+__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_pptp.c,v 1.15 2006/09/26 23:26:53 piso Exp $");
+
+/* Includes */
+#ifdef _KERNEL
+#include <sys/param.h>
+#include <sys/limits.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#else
+#include <errno.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <stdio.h>
+#endif
+
+#include <netinet/tcp.h>
+
+#ifdef _KERNEL
+#include <netinet/libalias/alias.h>
+#include <netinet/libalias/alias_local.h>
+#include <netinet/libalias/alias_mod.h>
+#else
+#include "alias.h"
+#include "alias_local.h"
+#include "alias_mod.h"
+#endif
+
+#define PPTP_CONTROL_PORT_NUMBER 1723
+
+static void
+AliasHandlePptpOut(struct libalias *, struct ip *, struct alias_link *);
+
+static void
+AliasHandlePptpIn(struct libalias *, struct ip *, struct alias_link *);
+
+static int
+AliasHandlePptpGreOut(struct libalias *, struct ip *);
+
+static int
+AliasHandlePptpGreIn(struct libalias *, struct ip *);
+
+static int 
+fingerprint(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+	if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL)
+		return (-1);
+	if (ntohs(*ah->dport) == PPTP_CONTROL_PORT_NUMBER
+	    || ntohs(*ah->sport) == PPTP_CONTROL_PORT_NUMBER)
+		return (0);
+	return (-1);
+}
+
+static int 
+fingerprintgre(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+	return (0);
+}
+
+static int 
+protohandlerin(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+	
+	AliasHandlePptpIn(la, pip, ah->lnk);
+	return (0);
+}
+
+static int 
+protohandlerout(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+	
+	AliasHandlePptpOut(la, pip, ah->lnk);
+	return (0);
+}
+
+static int 
+protohandlergrein(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+	if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY ||
+	    AliasHandlePptpGreIn(la, pip) == 0)
+		return (0);
+	return (-1);
+}
+
+static int 
+protohandlergreout(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+	if (AliasHandlePptpGreOut(la, pip) == 0)
+		return (0);
+	return (-1);
+}
+
+/* Kernel module definition. */
+struct proto_handler handlers[] = {
+	{ 
+	  .pri = 200, 
+	  .dir = IN, 
+	  .proto = TCP, 
+	  .fingerprint = &fingerprint, 
+	  .protohandler = &protohandlerin
+	},
+	{ 
+	  .pri = 210, 
+	  .dir = OUT, 
+	  .proto = TCP, 
+	  .fingerprint = &fingerprint, 
+	  .protohandler = &protohandlerout
+	},
+/* 
+ * WATCH OUT!!! these 2 handlers NEED a priority of INT_MAX (highest possible) 
+ * cause they will ALWAYS process packets, so they must be the last one
+ * in chain: look fingerprintgre() above.
+ */
+	{ 
+	  .pri = INT_MAX, 
+	  .dir = IN, 
+	  .proto = IP, 
+	  .fingerprint = &fingerprintgre, 
+	  .protohandler = &protohandlergrein
+	},
+	{ 
+	  .pri = INT_MAX, 
+	  .dir = OUT, 
+	  .proto = IP, 
+	  .fingerprint = &fingerprintgre, 
+	  .protohandler = &protohandlergreout
+	}, 
+	{ EOH }
+};
+static int
+mod_handler(module_t mod, int type, void *data)
+{
+	int error;
+
+	switch (type) {
+	case MOD_LOAD:
+		error = 0;
+		LibAliasAttachHandlers(handlers);
+		break;
+	case MOD_UNLOAD:
+		error = 0;
+		LibAliasDetachHandlers(handlers);
+		break;
+	default:
+		error = EINVAL;
+	}
+	return (error);
+}
+
+#ifdef _KERNEL
+static 
+#endif
+moduledata_t alias_mod = {
+       "alias_pptp", mod_handler, NULL
+};
+
+#ifdef	_KERNEL
+DECLARE_MODULE(alias_pptp, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);
+MODULE_VERSION(alias_pptp, 1);
+MODULE_DEPEND(alias_pptp, libalias, 1, 1, 1);
+#endif
 
 /*
    Alias_pptp.c performs special processing for PPTP sessions under TCP.
@@ -65,26 +228,6 @@
 
 */
 
-/* Includes */
-#ifdef _KERNEL
-#include <sys/param.h>
-#else
-#include <sys/types.h>
-#include <stdio.h>
-#endif
-
-#include <netinet/in_systm.h>
-#include <netinet/in.h>
-#include <netinet/ip.h>
-#include <netinet/tcp.h>
-
-#ifdef _KERNEL
-#include <netinet/libalias/alias.h>
-#include <netinet/libalias/alias_local.h>
-#else
-#include "alias_local.h"
-#endif
-
 /*
  * PPTP definitions
  */
@@ -153,7 +296,7 @@
 static PptpCallId AliasVerifyPptp(struct ip *, u_int16_t *);
 
 
-void
+static void
 AliasHandlePptpOut(struct libalias *la,
     struct ip *pip,		/* IP packet to examine/patch */
     struct alias_link *lnk)
@@ -225,7 +368,7 @@
 	}
 }
 
-void
+static void
 AliasHandlePptpIn(struct libalias *la,
     struct ip *pip,		/* IP packet to examine/patch */
     struct alias_link *lnk)
@@ -328,8 +471,7 @@
 		return (PptpCallId) (hptr + 1);
 }
 
-
-int
+static int
 AliasHandlePptpGreOut(struct libalias *la, struct ip *pip)
 {
 	GreHdr *gr;
@@ -353,8 +495,7 @@
 	return (0);
 }
 
-
-int
+static int
 AliasHandlePptpGreIn(struct libalias *la, struct ip *pip)
 {
 	GreHdr *gr;
Index: alias_db.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/alias_db.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/libalias/alias_db.c -L sys/netinet/libalias/alias_db.c -u -r1.1.1.1 -r1.2
--- sys/netinet/libalias/alias_db.c
+++ sys/netinet/libalias/alias_db.c
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_db.c,v 1.67 2005/05/06 11:07:49 glebius Exp $");
+__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_db.c,v 1.71 2007/04/07 09:47:39 piso Exp $");
 
 /*
     Alias_db.c encapsulates all data structures used for storing
@@ -143,40 +143,32 @@
 */
 
 #ifdef _KERNEL
+#include <machine/stdarg.h>
 #include <sys/param.h>
-#else
-#include <sys/types.h>
-#endif
-
-#include <sys/errno.h>
-#include <sys/queue.h>
-#include <sys/socket.h>
-#include <sys/time.h>
-
-#ifdef _KERNEL
-#include <sys/systm.h>
 #include <sys/kernel.h>
-#include <sys/malloc.h>
 #include <sys/module.h>
-#else 
+#include <sys/syslog.h>
+#else
+#include <stdarg.h>
 #include <stdlib.h>
 #include <stdio.h>
+#include <sys/errno.h>
+#include <sys/time.h>
 #include <unistd.h> 
-#include <arpa/inet.h>
 #endif
 
-/* BSD network include files */
-#include <netinet/in_systm.h>
-#include <netinet/in.h>
-#include <netinet/ip.h>
+#include <sys/socket.h>
 #include <netinet/tcp.h>
 
 #ifdef _KERNEL  
 #include <netinet/libalias/alias.h>
 #include <netinet/libalias/alias_local.h>
+#include <netinet/libalias/alias_mod.h>
+#include <net/if.h>
 #else
 #include "alias.h"
 #include "alias_local.h"
+#include "alias_mod.h"
 #endif
 
 static		LIST_HEAD(, libalias) instancehead = LIST_HEAD_INITIALIZER(instancehead);
@@ -358,10 +350,12 @@
 	switch (type) {
 	case MOD_LOAD:
 		error = 0;
+		handler_chain_init();
 		break;
 	case MOD_QUIESCE:
 	case MOD_UNLOAD:
-		finishoff();
+	        handler_chain_destroy();
+	        finishoff();
 		error = 0;
 		break;
 	default:
@@ -409,12 +403,10 @@
 
 #endif
 
-#ifndef	NO_LOGGING
 /* Log file control */
 static void	ShowAliasStats(struct libalias *);
-static void	InitPacketAliasLog(struct libalias *);
+static int	InitPacketAliasLog(struct libalias *);
 static void	UninitPacketAliasLog(struct libalias *);
-#endif
 
 static		u_int
 StartPointIn(struct in_addr alias_addr,
@@ -462,37 +454,56 @@
 	return (ntohl(y) - ntohl(x));
 }
 
+#ifdef _KERNEL
 
-#ifndef	NO_LOGGING
 static void
-ShowAliasStats(struct libalias *la)
+AliasLog(char *str, const char *format, ...)
+{		
+	va_list ap;
+	
+	va_start(ap, format);
+	vsnprintf(str, LIBALIAS_BUF_SIZE, format, ap);
+	va_end(ap);
+}
+#else
+static void
+AliasLog(FILE *stream, const char *format, ...)
 {
-/* Used for debugging */
+	va_list ap;
+	
+	va_start(ap, format);
+	vfprintf(stream, format, ap);
+	va_end(ap);
+	fflush(stream);
+}
+#endif
 
-	if (la->monitorFile) {
-		fprintf(la->monitorFile,
-		    "icmp=%d, udp=%d, tcp=%d, pptp=%d, proto=%d, frag_id=%d frag_ptr=%d",
-		    la->icmpLinkCount,
-		    la->udpLinkCount,
-		    la->tcpLinkCount,
-		    la->pptpLinkCount,
-		    la->protoLinkCount,
-		    la->fragmentIdLinkCount,
-		    la->fragmentPtrLinkCount);
-
-		fprintf(la->monitorFile, " / tot=%d  (sock=%d)\n",
-		    la->icmpLinkCount + la->udpLinkCount
-		    + la->tcpLinkCount
-		    + la->pptpLinkCount
-		    + la->protoLinkCount
-		    + la->fragmentIdLinkCount
-		    + la->fragmentPtrLinkCount,
-		    la->sockCount);
+static void
+ShowAliasStats(struct libalias *la)
+{
 
-		fflush(la->monitorFile);
+	LIBALIAS_LOCK_ASSERT(la);
+/* Used for debugging */
+	if (la->logDesc) {
+		int tot  = la->icmpLinkCount + la->udpLinkCount + 
+			la->tcpLinkCount + la->pptpLinkCount +
+			la->protoLinkCount + la->fragmentIdLinkCount +
+			la->fragmentPtrLinkCount;
+		
+		AliasLog(la->logDesc,
+			 "icmp=%u, udp=%u, tcp=%u, pptp=%u, proto=%u, frag_id=%u frag_ptr=%u / tot=%u",
+			 la->icmpLinkCount,
+			 la->udpLinkCount,
+			 la->tcpLinkCount,
+			 la->pptpLinkCount,
+			 la->protoLinkCount,
+			 la->fragmentIdLinkCount,
+			 la->fragmentPtrLinkCount, tot);
+#ifndef _KERNEL
+		AliasLog(la->logDesc, " (sock=%u)\n", la->sockCount); 
+#endif
 	}
 }
-#endif
 
 /* Internal routines for finding, deleting and adding links
 
@@ -565,6 +576,7 @@
 	u_short port_sys;
 	u_short port_net;
 
+	LIBALIAS_LOCK_ASSERT(la);
 /*
    Description of alias_port_param for GetNewPort().  When
    this parameter is zero or positive, it precisely specifies
@@ -665,6 +677,7 @@
 	int sock;
 	struct sockaddr_in sock_addr;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	if (link_type == LINK_TCP)
 		sock = socket(AF_INET, SOCK_STREAM, 0);
 	else if (link_type == LINK_UDP)
@@ -723,6 +736,7 @@
 	u_short port_sys;
 	int link_type;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	/*
 	 * Get link_type from protocol
 	 */
@@ -802,6 +816,7 @@
 	struct alias_link *lnk;
 	int i, icount;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	icount = 0;
 	for (i = 0; i < LINK_TABLE_OUT_SIZE; i++) {
 		lnk = LIST_FIRST(&la->linkTableOut[i]);
@@ -825,6 +840,7 @@
 	int icount;
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	icount = 0;
 	lnk = LIST_FIRST(&la->linkTableOut[la->cleanupIndex++]);
 	while (lnk != NULL) {
@@ -865,6 +881,7 @@
 {
 	struct libalias *la = lnk->la;
 
+	LIBALIAS_LOCK_ASSERT(la);
 /* Don't do anything if the link is marked permanent */
 	if (la->deleteAllLinks == 0 && lnk->flags & LINK_PERMANENT)
 		return;
@@ -929,12 +946,10 @@
 /* Free memory */
 	free(lnk);
 
-#ifndef	NO_LOGGING
 /* Write statistics, if logging enabled */
 	if (la->packetAliasMode & PKT_ALIAS_LOG) {
 		ShowAliasStats(la);
 	}
-#endif
 }
 
 
@@ -951,6 +966,7 @@
 	u_int start_point;	/* zero, equal to alias port  */
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	lnk = malloc(sizeof(struct alias_link));
 	if (lnk != NULL) {
 		/* Basic initialization */
@@ -1072,11 +1088,9 @@
 		fprintf(stderr, "malloc() call failed.\n");
 #endif
 	}
-#ifndef	NO_LOGGING
 	if (la->packetAliasMode & PKT_ALIAS_LOG) {
 		ShowAliasStats(la);
 	}
-#endif
 	return (lnk);
 }
 
@@ -1094,6 +1108,7 @@
 	struct alias_link *new_lnk;	/* zero, equal to alias port  */
 	struct libalias *la = old_lnk->la;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	new_lnk = AddLink(la, src_addr, dst_addr, alias_addr,
 	    src_port, dst_port, alias_port_param,
 	    link_type);
@@ -1119,6 +1134,7 @@
 	u_int i;
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	i = StartPointOut(src_addr, dst_addr, src_port, dst_port, link_type);
 	LIST_FOREACH(lnk, &la->linkTableOut[i], list_out) {
 		if (lnk->src_addr.s_addr == src_addr.s_addr
@@ -1166,6 +1182,7 @@
 {
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	lnk = _FindLinkOut(la, src_addr, dst_addr, src_port, dst_port,
 	    link_type, replace_partial_links);
 
@@ -1202,6 +1219,7 @@
 	struct alias_link *lnk_unknown_dst_addr;
 	struct alias_link *lnk_unknown_dst_port;
 
+	LIBALIAS_LOCK_ASSERT(la);
 /* Initialize pointers */
 	lnk_fully_specified = NULL;
 	lnk_unknown_all = NULL;
@@ -1305,6 +1323,7 @@
 {
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	lnk = _FindLinkIn(la, dst_addr, alias_addr, dst_port, alias_port,
 	    link_type, replace_partial_links);
 
@@ -1352,6 +1371,7 @@
 {
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	lnk = FindLinkIn(la, dst_addr, alias_addr,
 	    NO_DEST_PORT, id_alias,
 	    LINK_ICMP, 0);
@@ -1375,6 +1395,7 @@
 {
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	lnk = FindLinkOut(la, src_addr, dst_addr,
 	    id, NO_DEST_PORT,
 	    LINK_ICMP, 0);
@@ -1397,6 +1418,7 @@
 {
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	lnk = FindLinkIn(la, dst_addr, alias_addr,
 	    NO_DEST_PORT, ip_id,
 	    LINK_FRAGMENT_ID, 0);
@@ -1416,6 +1438,8 @@
     struct in_addr alias_addr,	/* is not found.           */
     u_short ip_id)
 {
+	
+	LIBALIAS_LOCK_ASSERT(la);
 	return FindLinkIn(la, dst_addr, alias_addr,
 	    NO_DEST_PORT, ip_id,
 	    LINK_FRAGMENT_ID, 0);
@@ -1426,6 +1450,8 @@
 AddFragmentPtrLink(struct libalias *la, struct in_addr dst_addr,
     u_short ip_id)
 {
+
+	LIBALIAS_LOCK_ASSERT(la);
 	return AddLink(la, la->nullAddress, dst_addr, la->nullAddress,
 	    NO_SRC_PORT, NO_DEST_PORT, ip_id,
 	    LINK_FRAGMENT_PTR);
@@ -1436,6 +1462,8 @@
 FindFragmentPtr(struct libalias *la, struct in_addr dst_addr,
     u_short ip_id)
 {
+
+	LIBALIAS_LOCK_ASSERT(la);
 	return FindLinkIn(la, dst_addr, la->nullAddress,
 	    NO_DEST_PORT, ip_id,
 	    LINK_FRAGMENT_PTR, 0);
@@ -1449,6 +1477,7 @@
 {
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	lnk = FindLinkIn(la, dst_addr, alias_addr,
 	    NO_DEST_PORT, 0,
 	    proto, 1);
@@ -1472,6 +1501,7 @@
 {
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	lnk = FindLinkOut(la, src_addr, dst_addr,
 	    NO_SRC_PORT, NO_DEST_PORT,
 	    proto, 1);
@@ -1499,6 +1529,7 @@
 	int link_type;
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	switch (proto) {
 	case IPPROTO_UDP:
 		link_type = LINK_UDP;
@@ -1538,6 +1569,7 @@
 	int link_type;
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	switch (proto) {
 	case IPPROTO_UDP:
 		link_type = LINK_UDP;
@@ -1572,6 +1604,7 @@
 {
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	lnk = AddLink(la, src_addr, dst_addr, alias_addr,
 	    src_call_id, 0, GET_ALIAS_PORT,
 	    LINK_PPTP);
@@ -1588,6 +1621,7 @@
 	u_int i;
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	i = StartPointOut(src_addr, dst_addr, 0, 0, LINK_PPTP);
 	LIST_FOREACH(lnk, &la->linkTableOut[i], list_out)
 	    if (lnk->link_type == LINK_PPTP &&
@@ -1608,6 +1642,7 @@
 	u_int i;
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	i = StartPointOut(src_addr, dst_addr, 0, 0, LINK_PPTP);
 	LIST_FOREACH(lnk, &la->linkTableOut[i], list_out)
 	    if (lnk->link_type == LINK_PPTP &&
@@ -1628,6 +1663,7 @@
 	u_int i;
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	i = StartPointIn(alias_addr, 0, LINK_PPTP);
 	LIST_FOREACH(lnk, &la->linkTableIn[i], list_in)
 	    if (lnk->link_type == LINK_PPTP &&
@@ -1647,6 +1683,7 @@
 {
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	lnk = FindLinkIn(la, dst_addr, alias_addr,
 	    0 /* any */ , alias_call_id,
 	    LINK_PPTP, 0);
@@ -1666,6 +1703,7 @@
 	int link_type;
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	switch (proto) {
 	case IPPROTO_UDP:
 		link_type = LINK_UDP;
@@ -1697,6 +1735,7 @@
 {
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	lnk = FindLinkIn(la, la->nullAddress, alias_addr,
 	    0, 0, LINK_ADDR, 0);
 	if (lnk == NULL) {
@@ -1729,6 +1768,7 @@
 {
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	lnk = FindLinkOut(la, original_addr, la->nullAddress,
 	    0, 0, LINK_ADDR, 0);
 	if (lnk == NULL) {
@@ -1885,6 +1925,8 @@
 struct in_addr
 GetDefaultAliasAddress(struct libalias *la)
 {
+	
+	LIBALIAS_LOCK_ASSERT(la);
 	return (la->aliasAddress);
 }
 
@@ -1892,6 +1934,8 @@
 void
 SetDefaultAliasAddress(struct libalias *la, struct in_addr alias_addr)
 {
+
+	LIBALIAS_LOCK_ASSERT(la);
 	la->aliasAddress = alias_addr;
 }
 
@@ -2106,6 +2150,8 @@
 void
 ClearCheckNewLink(struct libalias *la)
 {
+	
+	LIBALIAS_LOCK_ASSERT(la);
 	la->newDefaultLink = 0;
 }
 
@@ -2128,8 +2174,9 @@
 {
 	struct libalias *la = lnk->la;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	la->deleteAllLinks = 1;
-	lnk = ReLink(lnk, lnk->src_addr, lnk->dst_addr, lnk->alias_addr,
+	ReLink(lnk, lnk->src_addr, lnk->dst_addr, lnk->alias_addr,
 	    lnk->src_port, cid, lnk->alias_port, lnk->link_type);
 	la->deleteAllLinks = 0;
 }
@@ -2160,13 +2207,14 @@
 	struct timezone tz;
 #endif
 
+	LIBALIAS_LOCK_ASSERT(la);
 	/*
 	 * Save system time (seconds) in global variable timeStamp for use
 	 * by other functions. This is done so as not to unnecessarily
 	 * waste timeline by making system calls.
 	 */
 #ifdef	_KERNEL
-	la->timeStamp = time_second;
+	la->timeStamp = time_uptime;
 #else
 	gettimeofday(&tv, &tz);
 	la->timeStamp = tv.tv_sec;
@@ -2203,30 +2251,44 @@
 	}
 }
 
-#ifndef	NO_LOGGING
 /* Init the log file and enable logging */
-static void
+static int
 InitPacketAliasLog(struct libalias *la)
 {
-	if ((~la->packetAliasMode & PKT_ALIAS_LOG)
-	    && (la->monitorFile = fopen("/var/log/alias.log", "w"))) {
+
+	LIBALIAS_LOCK_ASSERT(la);
+	if (~la->packetAliasMode & PKT_ALIAS_LOG) {
+#ifdef _KERNEL
+		if ((la->logDesc = malloc(LIBALIAS_BUF_SIZE)))
+			;
+#else 		
+		if ((la->logDesc = fopen("/var/log/alias.log", "w")))
+			fprintf(la->logDesc, "PacketAlias/InitPacketAliasLog: Packet alias logging enabled.\n");	       
+#endif
+		else 
+			return (ENOMEM); /* log initialization failed */
 		la->packetAliasMode |= PKT_ALIAS_LOG;
-		fprintf(la->monitorFile,
-		    "PacketAlias/InitPacketAliasLog: Packet alias logging enabled.\n");
 	}
+
+	return (1);
 }
 
 /* Close the log-file and disable logging. */
 static void
 UninitPacketAliasLog(struct libalias *la)
 {
-	if (la->monitorFile) {
-		fclose(la->monitorFile);
-		la->monitorFile = NULL;
+
+	LIBALIAS_LOCK_ASSERT(la);
+	if (la->logDesc) {
+#ifdef _KERNEL
+		free(la->logDesc);
+#else
+		fclose(la->logDesc);
+#endif
+		la->logDesc = NULL;
 	}
 	la->packetAliasMode &= ~PKT_ALIAS_LOG;
 }
-#endif
 
 /* Outside world interfaces
 
@@ -2257,6 +2319,7 @@
 	int link_type;
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK(la);
 	switch (proto) {
 	case IPPROTO_UDP:
 		link_type = LINK_UDP;
@@ -2269,7 +2332,8 @@
 		fprintf(stderr, "PacketAliasRedirectPort(): ");
 		fprintf(stderr, "only TCP and UDP protocols allowed\n");
 #endif
-		return (NULL);
+		lnk = NULL;
+		goto getout;
 	}
 
 	lnk = AddLink(la, src_addr, dst_addr, alias_addr,
@@ -2286,6 +2350,8 @@
 	}
 #endif
 
+getout:
+	LIBALIAS_UNLOCK(la);
 	return (lnk);
 }
 
@@ -2294,7 +2360,9 @@
 LibAliasAddServer(struct libalias *la, struct alias_link *lnk, struct in_addr addr, u_short port)
 {
 	struct server *server;
+	int res;
 
+	LIBALIAS_LOCK(la);
 	(void)la;
 
 	server = malloc(sizeof(struct server));
@@ -2316,9 +2384,12 @@
 			server->next = head;
 		}
 		lnk->server = server;
-		return (0);
+		res = 0;
 	} else
-		return (-1);
+		res = -1;
+
+	LIBALIAS_UNLOCK(la);
+	return (res);
 }
 
 /* Redirect packets of a given IP protocol from a specific
@@ -2331,6 +2402,7 @@
 {
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK(la);
 	lnk = AddLink(la, src_addr, dst_addr, alias_addr,
 	    NO_SRC_PORT, NO_DEST_PORT, 0,
 	    proto);
@@ -2345,6 +2417,7 @@
 	}
 #endif
 
+	LIBALIAS_UNLOCK(la);
 	return (lnk);
 }
 
@@ -2355,6 +2428,7 @@
 {
 	struct alias_link *lnk;
 
+	LIBALIAS_LOCK(la);
 	lnk = AddLink(la, src_addr, la->nullAddress, alias_addr,
 	    0, 0, 0,
 	    LINK_ADDR);
@@ -2369,6 +2443,7 @@
 	}
 #endif
 
+	LIBALIAS_UNLOCK(la);
 	return (lnk);
 }
 
@@ -2377,15 +2452,19 @@
 int
 LibAliasRedirectDynamic(struct libalias *la, struct alias_link *lnk)
 {
+	int res;
 
+	LIBALIAS_LOCK(la);
 	(void)la;
 
 	if (lnk->flags & LINK_PARTIALLY_SPECIFIED)
-		return (-1);
+		res = -1;
 	else {
 		lnk->flags &= ~LINK_PERMANENT;
-		return (0);
+		res = 0;
 	}
+	LIBALIAS_UNLOCK(la);
+	return (res);
 }
 
 
@@ -2395,27 +2474,35 @@
 /* This is a dangerous function to put in the API,
    because an invalid pointer can crash the program. */
 
+	LIBALIAS_LOCK(la);
 	la->deleteAllLinks = 1;
 	DeleteLink(lnk);
 	la->deleteAllLinks = 0;
+	LIBALIAS_UNLOCK(la);
 }
 
 
 void
 LibAliasSetAddress(struct libalias *la, struct in_addr addr)
 {
+
+	LIBALIAS_LOCK(la);
 	if (la->packetAliasMode & PKT_ALIAS_RESET_ON_ADDR_CHANGE
 	    && la->aliasAddress.s_addr != addr.s_addr)
 		CleanupAliasData(la);
 
 	la->aliasAddress = addr;
+	LIBALIAS_UNLOCK(la);
 }
 
 
 void
 LibAliasSetTarget(struct libalias *la, struct in_addr target_addr)
 {
+
+	LIBALIAS_LOCK(la);
 	la->targetAddress = target_addr;
+	LIBALIAS_UNLOCK(la);
 }
 
 static void
@@ -2447,8 +2534,8 @@
 		LIST_INSERT_HEAD(&instancehead, la, instancelist);
 
 #ifdef	_KERNEL
-		la->timeStamp = time_second;
-		la->lastCleanupTime = time_second;
+		la->timeStamp = time_uptime;
+		la->lastCleanupTime = time_uptime;
 #else
 		gettimeofday(&tv, &tz);
 		la->timeStamp = tv.tv_sec;
@@ -2460,8 +2547,10 @@
 			LIST_INIT(&la->linkTableOut[i]);
 		for (i = 0; i < LINK_TABLE_IN_SIZE; i++)
 			LIST_INIT(&la->linkTableIn[i]);
-
+		LIBALIAS_LOCK_INIT(la);
+		LIBALIAS_LOCK(la);
 	} else {
+		LIBALIAS_LOCK(la);
 		la->deleteAllLinks = 1;
 		CleanupAliasData(la);
 		la->deleteAllLinks = 0;
@@ -2489,22 +2578,28 @@
 #ifndef NO_FW_PUNCH
 	la->fireWallFD = -1;
 #endif
+#ifndef _KERNEL
+	LibAliasRefreshModules();
+#endif
+	LIBALIAS_UNLOCK(la);
 	return (la);
 }
 
 void
 LibAliasUninit(struct libalias *la)
 {
+
+	LIBALIAS_LOCK(la);
 	la->deleteAllLinks = 1;
 	CleanupAliasData(la);
 	la->deleteAllLinks = 0;
-#ifndef	NO_LOGGING
 	UninitPacketAliasLog(la);
-#endif
 #ifndef NO_FW_PUNCH
 	UninitPunchFW(la);
 #endif
 	LIST_REMOVE(la, instancelist);
+	LIBALIAS_UNLOCK(la);
+	LIBALIAS_LOCK_DESTROY(la);
 	free(la);
 }
 
@@ -2517,16 +2612,19 @@
 				 * do a probe for flag values) */
 )
 {
-#ifndef	NO_LOGGING
+	int res = -1;
+
+	LIBALIAS_LOCK(la);
 /* Enable logging? */
 	if (flags & mask & PKT_ALIAS_LOG) {
-		InitPacketAliasLog(la);	/* Do the enable */
+		/* Do the enable */
+		if (InitPacketAliasLog(la) == ENOMEM)
+			goto getout;
 	} else
 /* _Disable_ logging? */
 	if (~flags & mask & PKT_ALIAS_LOG) {
 		UninitPacketAliasLog(la);
 	}
-#endif
 #ifndef NO_FW_PUNCH
 /* Start punching holes in the firewall? */
 	if (flags & mask & PKT_ALIAS_PUNCH_FW) {
@@ -2540,14 +2638,22 @@
 
 /* Other flags can be set/cleared without special action */
 	la->packetAliasMode = (flags & mask) | (la->packetAliasMode & ~mask);
-	return (la->packetAliasMode);
+	res = la->packetAliasMode;
+getout:
+	LIBALIAS_UNLOCK(la);
+	return (res);
 }
 
 
 int
 LibAliasCheckNewLink(struct libalias *la)
 {
-	return (la->newDefaultLink);
+	int res;
+
+	LIBALIAS_LOCK(la);
+	res = la->newDefaultLink;
+	LIBALIAS_UNLOCK(la);
+	return (res);
 }
 
 
@@ -2653,6 +2759,7 @@
 InitPunchFW(struct libalias *la)
 {
 
+	LIBALIAS_LOCK_ASSERT(la);
 	la->fireWallField = malloc(la->fireWallNumNums);
 	if (la->fireWallField) {
 		memset(la->fireWallField, 0, la->fireWallNumNums);
@@ -2667,6 +2774,8 @@
 static void
 UninitPunchFW(struct libalias *la)
 {
+
+	LIBALIAS_LOCK_ASSERT(la);
 	ClearAllFWHoles(la);
 	if (la->fireWallFD >= 0)
 		close(la->fireWallFD);
@@ -2686,6 +2795,7 @@
 	struct ip_fw rule;	/* On-the-fly built rule */
 	int fwhole;		/* Where to punch hole */
 
+	LIBALIAS_LOCK_ASSERT(la);
 	la = lnk->la;
 
 /* Don't do anything unless we are asked to */
@@ -2757,9 +2867,9 @@
 static void
 ClearFWHole(struct alias_link *lnk)
 {
-
 	struct libalias *la;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	la = lnk->la;
 	if (lnk->link_type == LINK_TCP) {
 		int fwhole = lnk->data.tcp->fwhole;	/* Where is the firewall
@@ -2784,6 +2894,7 @@
 	struct ip_fw rule;	/* On-the-fly built rule */
 	int i;
 
+	LIBALIAS_LOCK_ASSERT(la);
 	if (la->fireWallFD < 0)
 		return;
 
@@ -2802,14 +2913,20 @@
 void
 LibAliasSetFWBase(struct libalias *la, unsigned int base, unsigned int num)
 {
+
+	LIBALIAS_LOCK(la);
 #ifndef NO_FW_PUNCH
 	la->fireWallBaseNum = base;
 	la->fireWallNumNums = num;
 #endif
+	LIBALIAS_UNLOCK(la);
 }
 
 void
 LibAliasSetSkinnyPort(struct libalias *la, unsigned int port)
 {
+
+	LIBALIAS_LOCK(la);
 	la->skinnyPort = port;
+	LIBALIAS_UNLOCK(la);
 }
Index: alias_ftp.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/alias_ftp.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/libalias/alias_ftp.c -L sys/netinet/libalias/alias_ftp.c -u -r1.1.1.1 -r1.2
--- sys/netinet/libalias/alias_ftp.c
+++ sys/netinet/libalias/alias_ftp.c
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_ftp.c,v 1.27 2005/06/27 07:36:02 glebius Exp $");
+__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_ftp.c,v 1.29 2007/04/04 03:14:15 kan Exp $");
 
 /*
     Alias_ftp.c performs special processing for FTP sessions under
@@ -72,10 +72,12 @@
 #ifdef _KERNEL
 #include <sys/param.h>
 #include <sys/ctype.h>
-#include <sys/libkern.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
 #else
+#include <errno.h>
 #include <sys/types.h>
-#include <ctype.h>
 #include <stdio.h>
 #include <string.h>
 #endif
@@ -88,8 +90,81 @@
 #ifdef _KERNEL
 #include <netinet/libalias/alias.h>
 #include <netinet/libalias/alias_local.h>
+#include <netinet/libalias/alias_mod.h>
 #else
 #include "alias_local.h"
+#include "alias_mod.h"
+#endif
+
+#define FTP_CONTROL_PORT_NUMBER 21
+
+static void
+AliasHandleFtpOut(struct libalias *, struct ip *, struct alias_link *,	
+		  int maxpacketsize);
+
+static int 
+fingerprint(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+	if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL || 
+		ah->maxpktsize == 0)
+		return (-1);
+	if (ntohs(*ah->dport) == FTP_CONTROL_PORT_NUMBER
+	    || ntohs(*ah->sport) == FTP_CONTROL_PORT_NUMBER)
+		return (0);
+	return (-1);
+}
+
+static int 
+protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+	
+	AliasHandleFtpOut(la, pip, ah->lnk, ah->maxpktsize);
+	return (0);
+}
+
+struct proto_handler handlers[] = {
+	{ 
+	  .pri = 80, 
+	  .dir = OUT, 
+	  .proto = TCP, 
+	  .fingerprint = &fingerprint, 
+	  .protohandler = &protohandler
+	}, 
+	{ EOH }
+};
+
+static int
+mod_handler(module_t mod, int type, void *data)
+{
+	int error;
+
+	switch (type) {	  
+	case MOD_LOAD:
+		error = 0;
+		LibAliasAttachHandlers(handlers);
+		break;
+	case MOD_UNLOAD:
+		error = 0;
+		LibAliasDetachHandlers(handlers);
+		break;
+	default:
+		error = EINVAL;
+	}
+	return (error);
+}
+
+#ifdef _KERNEL
+static
+#endif
+moduledata_t alias_mod = {
+       "alias_ftp", mod_handler, NULL
+};
+
+#ifdef	_KERNEL
+DECLARE_MODULE(alias_ftp, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);
+MODULE_VERSION(alias_ftp, 1);
+MODULE_DEPEND(alias_ftp, libalias, 1, 1, 1);
 #endif
 
 #define FTP_CONTROL_PORT_NUMBER 21
@@ -112,7 +187,7 @@
 static int	ParseFtp229Reply(struct libalias *la, char *, int);
 static void	NewFtpMessage(struct libalias *la, struct ip *, struct alias_link *, int, int);
 
-void
+static void
 AliasHandleFtpOut(
     struct libalias *la,
     struct ip *pip,		/* IP packet to examine/patch */
Index: mld6_var.h
===================================================================
RCS file: /home/cvs/src/sys/netinet6/mld6_var.h,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet6/mld6_var.h -L sys/netinet6/mld6_var.h -u -r1.1.1.2 -r1.2
--- sys/netinet6/mld6_var.h
+++ sys/netinet6/mld6_var.h
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet6/mld6_var.h,v 1.6.2.1 2005/12/25 14:03:38 suz Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/mld6_var.h,v 1.7 2005/10/21 16:23:00 suz Exp $	*/
 /*	$KAME: mld6_var.h,v 1.4 2000/03/25 07:23:54 sumikawa Exp $	*/
 
 /*-
Index: ip6_mroute.h
===================================================================
RCS file: /home/cvs/src/sys/netinet6/ip6_mroute.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet6/ip6_mroute.h -L sys/netinet6/ip6_mroute.h -u -r1.1.1.1 -r1.2
--- sys/netinet6/ip6_mroute.h
+++ sys/netinet6/ip6_mroute.h
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet6/ip6_mroute.h,v 1.6.2.1 2005/10/09 05:21:18 ume Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/ip6_mroute.h,v 1.12 2007/07/05 16:29:40 delphij Exp $	*/
 /*	$KAME: ip6_mroute.h,v 1.19 2001/06/14 06:12:55 suz Exp $	*/
 
 /*-
@@ -100,12 +100,9 @@
  * Argument structure for MRT6_ADD_IF.
  */
 struct mif6ctl {
-	mifi_t	    mif6c_mifi;	    	/* the index of the mif to be added  */
-	u_char	    mif6c_flags;     	/* MIFF_ flags defined below         */
+	mifi_t	    mif6c_mifi;		/* the index of the mif to be added  */
+	u_char	    mif6c_flags;	/* MIFF_ flags defined below         */
 	u_short	    mif6c_pifi;		/* the index of the physical IF */
-#ifdef notyet
-	u_int	    mif6c_rate_limit;    /* max rate           		     */
-#endif
 };
 
 #define	MIFF_REGISTER	0x1	/* mif represents a register end-point */
@@ -126,16 +123,16 @@
 struct mrt6stat {
 	u_quad_t mrt6s_mfc_lookups;	/* # forw. cache hash table hits   */
 	u_quad_t mrt6s_mfc_misses;	/* # forw. cache hash table misses */
-	u_quad_t mrt6s_upcalls;		/* # calls to mrouted              */
+	u_quad_t mrt6s_upcalls;		/* # calls to multicast routing daemon */
 	u_quad_t mrt6s_no_route;	/* no route for packet's origin    */
 	u_quad_t mrt6s_bad_tunnel;	/* malformed tunnel options        */
 	u_quad_t mrt6s_cant_tunnel;	/* no room for tunnel options      */
 	u_quad_t mrt6s_wrong_if;	/* arrived on wrong interface	   */
 	u_quad_t mrt6s_upq_ovflw;	/* upcall Q overflow		   */
-	u_quad_t mrt6s_cache_cleanups;	/* # entries with no upcalls 	   */
-	u_quad_t mrt6s_drop_sel;     	/* pkts dropped selectively        */
-	u_quad_t mrt6s_q_overflow;    	/* pkts dropped - Q overflow       */
-	u_quad_t mrt6s_pkt2large;     	/* pkts dropped - size > BKT SIZE  */
+	u_quad_t mrt6s_cache_cleanups;	/* # entries with no upcalls	   */
+	u_quad_t mrt6s_drop_sel;	/* pkts dropped selectively        */
+	u_quad_t mrt6s_q_overflow;	/* pkts dropped - Q overflow       */
+	u_quad_t mrt6s_pkt2large;	/* pkts dropped - size > BKT SIZE  */
 	u_quad_t mrt6s_upq_sockfull;	/* upcalls dropped - socket full   */
 };
 
@@ -207,13 +204,10 @@
  * The kernel's multicast-interface structure.
  */
 struct mif6 {
-        u_char   	m6_flags;     	/* MIFF_ flags defined above         */
-	u_int      	m6_rate_limit; 	/* max rate			     */
-#ifdef notyet
-	struct tbf      *m6_tbf;      	/* token bucket structure at intf.   */
-#endif
-	struct in6_addr	m6_lcl_addr;   	/* local interface address           */
-	struct ifnet    *m6_ifp;     	/* pointer to interface              */
+        u_char		m6_flags;	/* MIFF_ flags defined above         */
+	u_int		m6_rate_limit;	/* max rate			     */
+	struct in6_addr	m6_lcl_addr;	/* local interface address           */
+	struct ifnet    *m6_ifp;	/* pointer to interface              */
 	u_quad_t	m6_pkt_in;	/* # pkts in on interface            */
 	u_quad_t	m6_pkt_out;	/* # pkts out on interface           */
 	u_quad_t	m6_bytes_in;	/* # bytes in on interface	     */
@@ -231,13 +225,13 @@
 struct mf6c {
 	struct sockaddr_in6  mf6c_origin;	/* IPv6 origin of mcasts     */
 	struct sockaddr_in6  mf6c_mcastgrp;	/* multicast group associated*/
-	mifi_t	    	 mf6c_parent; 		/* incoming IF               */
+	mifi_t		 mf6c_parent;		/* incoming IF               */
 	struct if_set	 mf6c_ifset;		/* set of outgoing IFs */
 
-	u_quad_t    	mf6c_pkt_cnt;		/* pkt count for src-grp     */
-	u_quad_t    	mf6c_byte_cnt;		/* byte count for src-grp    */
-	u_quad_t    	mf6c_wrong_if;		/* wrong if for src-grp	     */
-	int	    	mf6c_expire;		/* time to clean entry up    */
+	u_quad_t	mf6c_pkt_cnt;		/* pkt count for src-grp     */
+	u_quad_t	mf6c_byte_cnt;		/* byte count for src-grp    */
+	u_quad_t	mf6c_wrong_if;		/* wrong if for src-grp	     */
+	int		mf6c_expire;		/* time to clean entry up    */
 	struct timeval  mf6c_last_assert;	/* last time I sent an assert*/
 	struct rtdetq  *mf6c_stall;		/* pkts waiting for route */
 	struct mf6c    *mf6c_next;		/* hash table linkage */
@@ -250,8 +244,8 @@
  */
 #ifndef _NETINET_IP_MROUTE_H_
 struct rtdetq {		/* XXX: rtdetq is also defined in ip_mroute.h */
-    struct mbuf 	*m;		/* A copy of the packet	    	    */
-    struct ifnet	*ifp;		/* Interface pkt came in on 	    */
+    struct mbuf		*m;		/* A copy of the packet		    */
+    struct ifnet	*ifp;		/* Interface pkt came in on	    */
 #ifdef UPCALL_TIMING
     struct timeval	t;		/* Timestamp */
 #endif /* UPCALL_TIMING */
@@ -268,10 +262,10 @@
 
 #define MAX_UPQ6	4		/* max. no of pkts in upcall Q */
 
-int	ip6_mrouter_set __P((struct socket *so, struct sockopt *sopt));
-int	ip6_mrouter_get __P((struct socket *so, struct sockopt *sopt));
-int	ip6_mrouter_done __P((void));
-int	mrt6_ioctl __P((int, caddr_t));
+extern int	(*ip6_mrouter_set)(struct socket *so, struct sockopt *sopt);
+extern int	(*ip6_mrouter_get)(struct socket *so, struct sockopt *sopt);
+extern int	(*ip6_mrouter_done)(void);
+extern int	(*mrt6_ioctl)(int, caddr_t);
 #endif /* _KERNEL */
 
 #endif /* !_NETINET6_IP6_MROUTE_H_ */
Index: in6.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/in6.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -L sys/netinet6/in6.c -L sys/netinet6/in6.c -u -r1.3 -r1.4
--- sys/netinet6/in6.c
+++ sys/netinet6/in6.c
@@ -1,4 +1,4 @@
-/*	$FreeBSD: /usr/local/www/cvsroot/FreeBSD/src/sys/netinet6/in6.c,v 1.51.2.9 2006/06/17 17:58:33 gnn Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/in6.c,v 1.73 2007/07/05 16:29:39 delphij Exp $	*/
 /*	$KAME: in6.c,v 1.259 2002/01/21 11:37:50 keiichi Exp $	*/
 
 /*-
@@ -71,6 +71,7 @@
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/systm.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
@@ -97,8 +98,6 @@
 #include <netinet6/scope6_var.h>
 #include <netinet6/in6_pcb.h>
 
-#include <net/net_osdep.h>
-
 MALLOC_DEFINE(M_IP6MADDR, "in6_multi", "internet multicast address");
 
 /*
@@ -141,6 +140,7 @@
 	struct sockaddr_in6 all1_sa;
 	struct rtentry *nrt = NULL;
 	int e;
+	char ip6buf[INET6_ADDRSTRLEN];
 
 	bzero(&all1_sa, sizeof(all1_sa));
 	all1_sa.sin6_family = AF_INET6;
@@ -159,11 +159,12 @@
 	    (struct sockaddr *)&all1_sa, RTF_UP|RTF_HOST|RTF_LLINFO, &nrt);
 	if (e != 0) {
 		/* XXX need more descriptive message */
+
 		log(LOG_ERR, "in6_ifloop_request: "
 		    "%s operation failed for %s (errno=%d)\n",
 		    cmd == RTM_ADD ? "ADD" : "DELETE",
-		    ip6_sprintf(&((struct in6_ifaddr *)ifa)->ia_addr.sin6_addr),
-		    e);
+		    ip6_sprintf(ip6buf,
+			    &((struct in6_ifaddr *)ifa)->ia_addr.sin6_addr), e);
 	}
 
 	/*
@@ -187,9 +188,9 @@
 		}
 
 		rt_newaddrmsg(cmd, ifa, e, nrt);
-		if (cmd == RTM_DELETE) {
-			rtfree(nrt);
-		} else {
+		if (cmd == RTM_DELETE)
+			RTFREE_LOCKED(nrt);
+		else {
 			/* the cmd must be RTM_ADD here */
 			RT_REMREF(nrt);
 			RT_UNLOCK(nrt);
@@ -215,7 +216,7 @@
 	need_loop = (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 ||
 	    (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0);
 	if (rt)
-		rtfree(rt);
+		RTFREE_LOCKED(rt);
 	if (need_loop)
 		in6_ifloop_request(RTM_ADD, ifa);
 }
@@ -267,7 +268,7 @@
 		if (rt != NULL) {
 			if ((rt->rt_flags & RTF_HOST) != 0 &&
 			    (rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) {
-				rtfree(rt);
+				RTFREE_LOCKED(rt);
 				in6_ifloop_request(RTM_DELETE, ifa);
 			} else
 				RT_UNLOCK(rt);
@@ -276,9 +277,7 @@
 }
 
 int
-in6_mask2len(mask, lim0)
-	struct in6_addr *mask;
-	u_char *lim0;
+in6_mask2len(struct in6_addr *mask, u_char *lim0)
 {
 	int x = 0, y;
 	u_char *lim = lim0, *p;
@@ -317,34 +316,29 @@
 #define ia62ifa(ia6)	(&((ia6)->ia_ifa))
 
 int
-in6_control(so, cmd, data, ifp, td)
-	struct	socket *so;
-	u_long cmd;
-	caddr_t	data;
-	struct ifnet *ifp;
-	struct thread *td;
+in6_control(struct socket *so, u_long cmd, caddr_t data,
+    struct ifnet *ifp, struct thread *td)
 {
 	struct	in6_ifreq *ifr = (struct in6_ifreq *)data;
 	struct	in6_ifaddr *ia = NULL;
 	struct	in6_aliasreq *ifra = (struct in6_aliasreq *)data;
-	int error, privileged;
 	struct sockaddr_in6 *sa6;
-
-	privileged = 0;
-	if (td == NULL || !suser(td))
-		privileged++;
+	int error;
 
 	switch (cmd) {
 	case SIOCGETSGCNT_IN6:
 	case SIOCGETMIFCNT_IN6:
-		return (mrt6_ioctl(cmd, data));
+		return (mrt6_ioctl ? mrt6_ioctl(cmd, data) : EOPNOTSUPP);
 	}
 
 	switch(cmd) {
 	case SIOCAADDRCTL_POLICY:
 	case SIOCDADDRCTL_POLICY:
-		if (!privileged)
-			return (EPERM);
+		if (td != NULL) {
+			error = priv_check(td, PRIV_NETINET_ADDRCTRL6);
+			if (error)
+				return (error);
+		}
 		return (in6_src_ioctl(cmd, data));
 	}
 
@@ -357,8 +351,11 @@
 	case SIOCSRTRFLUSH_IN6:
 	case SIOCSDEFIFACE_IN6:
 	case SIOCSIFINFO_FLAGS:
-		if (!privileged)
-			return (EPERM);
+		if (td != NULL) {
+			error = priv_check(td, PRIV_NETINET_ND6);
+			if (error)
+				return (error);
+		}
 		/* FALLTHROUGH */
 	case OSIOCGIFINFO_IN6:
 	case SIOCGIFINFO_IN6:
@@ -385,8 +382,11 @@
 
 	switch (cmd) {
 	case SIOCSSCOPE6:
-		if (!privileged)
-			return (EPERM);
+		if (td != NULL) {
+			error = priv_check(td, PRIV_NETINET_SCOPE6);
+			if (error)
+				return (error);
+		}
 		return (scope6_set(ifp,
 		    (struct scope6_id *)ifr->ifr_ifru.ifru_scope_id));
 	case SIOCGSCOPE6:
@@ -400,8 +400,15 @@
 	switch (cmd) {
 	case SIOCALIFADDR:
 	case SIOCDLIFADDR:
-		if (!privileged)
-			return (EPERM);
+		/*
+		 * XXXRW: Is this checked at another layer?  What priv to use
+		 * here?
+		 */
+		if (td != NULL) {
+			error = suser(td);
+			if (error)
+				return (error);
+		}
 		/* FALLTHROUGH */
 	case SIOCGLIFADDR:
 		return in6_lifaddr_ioctl(so, cmd, data, ifp, td);
@@ -490,8 +497,16 @@
 		if (ifra->ifra_addr.sin6_family != AF_INET6 ||
 		    ifra->ifra_addr.sin6_len != sizeof(struct sockaddr_in6))
 			return (EAFNOSUPPORT);
-		if (!privileged)
-			return (EPERM);
+
+		/*
+		 * XXXRW: Is this checked at another layer?  What priv to use
+		 * here?
+		 */
+		if (td != NULL) {
+			error = suser(td);
+			if (error)
+				return (error);
+		}
 
 		break;
 
@@ -510,8 +525,11 @@
 	    {
 		struct in6_addrlifetime *lt;
 
-		if (!privileged)
-			return (EPERM);
+		if (td != NULL) {
+			error = priv_check(td, PRIV_NETINET_ALIFETIME6);
+			if (error)
+				return (error);
+		}
 		if (ia == NULL)
 			return (EADDRNOTAVAIL);
 		/* sanity for overflow - beware unsigned */
@@ -643,7 +661,7 @@
 			return (error);
 		if ((ia = in6ifa_ifpwithaddr(ifp, &ifra->ifra_addr.sin6_addr))
 		    == NULL) {
-		    	/*
+			/*
 			 * this can happen when the user specify the 0 valid
 			 * lifetime.
 			 */
@@ -771,11 +789,8 @@
  * XXX: should this be performed under splnet()?
  */
 int
-in6_update_ifa(ifp, ifra, ia, flags)
-	struct ifnet *ifp;
-	struct in6_aliasreq *ifra;
-	struct in6_ifaddr *ia;
-	int flags;
+in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
+    struct in6_ifaddr *ia, int flags)
 {
 	int error = 0, hostIsNew = 0, plen = -1;
 	struct in6_ifaddr *oia;
@@ -785,6 +800,7 @@
 	struct in6_multi *in6m_sol;
 	struct rtentry *rt;
 	int delay;
+	char ip6buf[INET6_ADDRSTRLEN];
 
 	/* Validate parameters */
 	if (ifp == NULL || ifra == NULL) /* this maybe redundant */
@@ -879,7 +895,7 @@
 		 */
 		nd6log((LOG_INFO,
 		    "in6_update_ifa: valid lifetime is 0 for %s\n",
-		    ip6_sprintf(&ifra->ifra_addr.sin6_addr)));
+		    ip6_sprintf(ip6buf, &ifra->ifra_addr.sin6_addr)));
 
 		if (ia == NULL)
 			return (0); /* there's nothing to do */
@@ -901,6 +917,7 @@
 		if (ia == NULL)
 			return (ENOBUFS);
 		bzero((caddr_t)ia, sizeof(*ia));
+		LIST_INIT(&ia->ia6_memberships);
 		/* Initialize the address and masks, and put time stamp */
 		IFA_LOCK_INIT(&ia->ia_ifa);
 		ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
@@ -946,7 +963,7 @@
 		    in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL) != plen) {
 			nd6log((LOG_INFO, "in6_update_ifa: the prefix length of an"
 			    " existing (%s) address should not be changed\n",
-			    ip6_sprintf(&ia->ia_addr.sin6_addr)));
+			    ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
 			error = EINVAL;
 			goto unlink;
 		}
@@ -966,7 +983,7 @@
 		    (e = rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST)) != 0) {
 			nd6log((LOG_ERR, "in6_update_ifa: failed to remove "
 			    "a route to the old destination: %s\n",
-			    ip6_sprintf(&ia->ia_addr.sin6_addr)));
+			    ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
 			/* proceed anyway... */
 		} else
 			ia->ia_flags &= ~IFA_ROUTE;
@@ -1058,15 +1075,17 @@
 			    (MAX_RTR_SOLICITATION_DELAY * hz);
 		}
 		imm = in6_joingroup(ifp, &llsol, &error, delay);
-		if (error != 0) {
+		if (imm == NULL) {
 			nd6log((LOG_WARNING,
 			    "in6_update_ifa: addmulti failed for "
 			    "%s on %s (errno=%d)\n",
-			    ip6_sprintf(&llsol), if_name(ifp),
+			    ip6_sprintf(ip6buf, &llsol), if_name(ifp),
 			    error));
 			in6_purgeaddr((struct ifaddr *)ia);
 			return (error);
 		}
+		LIST_INSERT_HEAD(&ia->ia6_memberships,
+		    imm, i6mm_chain);
 		in6m_sol = imm->i6mm_maddr;
 
 		bzero(&mltmask, sizeof(mltmask));
@@ -1144,10 +1163,11 @@
 			nd6log((LOG_WARNING,
 			    "in6_update_ifa: addmulti failed for "
 			    "%s on %s (errno=%d)\n",
-			    ip6_sprintf(&mltaddr.sin6_addr),
+			    ip6_sprintf(ip6buf, &mltaddr.sin6_addr),
 			    if_name(ifp), error));
 			goto cleanup;
 		}
+		LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
 
 		/*
 		 * join node information group address
@@ -1170,9 +1190,12 @@
 				nd6log((LOG_WARNING, "in6_update_ifa: "
 				    "addmulti failed for %s on %s "
 				    "(errno=%d)\n",
-				    ip6_sprintf(&mltaddr.sin6_addr),
+				    ip6_sprintf(ip6buf, &mltaddr.sin6_addr),
 				    if_name(ifp), error));
 				/* XXX not very fatal, go on... */
+			} else {
+				LIST_INSERT_HEAD(&ia->ia6_memberships,
+				    imm, i6mm_chain);
 			}
 		}
 #undef hostnamelen
@@ -1231,10 +1254,11 @@
 			nd6log((LOG_WARNING, "in6_update_ifa: "
 			    "addmulti failed for %s on %s "
 			    "(errno=%d)\n",
-			    ip6_sprintf(&mltaddr.sin6_addr),
+			    ip6_sprintf(ip6buf, &mltaddr.sin6_addr),
 			    if_name(ifp), error));
 			goto cleanup;
 		}
+		LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
 #undef	MLTMASK_LEN
 	}
 
@@ -1293,11 +1317,12 @@
 }
 
 void
-in6_purgeaddr(ifa)
-	struct ifaddr *ifa;
+in6_purgeaddr(struct ifaddr *ifa)
 {
 	struct ifnet *ifp = ifa->ifa_ifp;
 	struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa;
+	char ip6buf[INET6_ADDRSTRLEN];
+	struct in6_multi_mship *imm;
 
 	/* stop DAD processing */
 	nd6_dad_stop(ifa);
@@ -1314,8 +1339,8 @@
 			log(LOG_ERR, "in6_purgeaddr: failed to remove "
 			    "a route to the p2p destination: %s on %s, "
 			    "errno=%d\n",
-			    ip6_sprintf(&ia->ia_addr.sin6_addr), if_name(ifp),
-			    e);
+			    ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
+			    if_name(ifp), e);
 			/* proceed anyway... */
 		} else
 			ia->ia_flags &= ~IFA_ROUTE;
@@ -1324,33 +1349,19 @@
 	/* Remove ownaddr's loopback rtentry, if it exists. */
 	in6_ifremloop(&(ia->ia_ifa));
 
-	if (ifp->if_flags & IFF_MULTICAST) {
-		/*
-		 * delete solicited multicast addr for deleting host id
-		 */
-		struct in6_multi *in6m;
-		struct in6_addr llsol;
-		bzero(&llsol, sizeof(struct in6_addr));
-		llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL;
-		llsol.s6_addr32[1] = 0;
-		llsol.s6_addr32[2] = htonl(1);
-		llsol.s6_addr32[3] =
-			ia->ia_addr.sin6_addr.s6_addr32[3];
-		llsol.s6_addr8[12] = 0xff;
-		(void)in6_setscope(&llsol, ifp, NULL); /* XXX proceed anyway */
-
-		IN6_LOOKUP_MULTI(llsol, ifp, in6m);
-		if (in6m)
-			in6_delmulti(in6m);
+	/*
+	 * leave from multicast groups we have joined for the interface
+	 */
+	while ((imm = ia->ia6_memberships.lh_first) != NULL) {
+		LIST_REMOVE(imm, i6mm_chain);
+		in6_leavegroup(imm);
 	}
 
 	in6_unlink_ifa(ia, ifp);
 }
 
 static void
-in6_unlink_ifa(ia, ifp)
-	struct in6_ifaddr *ia;
-	struct ifnet *ifp;
+in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp)
 {
 	struct in6_ifaddr *oia;
 	int	s = splnet();
@@ -1387,7 +1398,7 @@
 	/*
 	 * Also, if the address being removed is autoconf'ed, call
 	 * pfxlist_onlink_check() since the release might affect the status of
-	 * other (detached) addresses. 
+	 * other (detached) addresses.
 	 */
 	if ((oia->ia6_flags & IN6_IFF_AUTOCONF)) {
 		pfxlist_onlink_check();
@@ -1403,8 +1414,7 @@
 }
 
 void
-in6_purgeif(ifp)
-	struct ifnet *ifp;
+in6_purgeif(struct ifnet *ifp)
 {
 	struct ifaddr *ifa, *nifa;
 
@@ -1442,12 +1452,8 @@
  * address encoding scheme. (see figure on page 8)
  */
 static int
-in6_lifaddr_ioctl(so, cmd, data, ifp, td)
-	struct socket *so;
-	u_long cmd;
-	caddr_t	data;
-	struct ifnet *ifp;
-	struct thread *td;
+in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
+    struct ifnet *ifp, struct thread *td)
 {
 	struct if_laddrreq *iflr = (struct if_laddrreq *)data;
 	struct ifaddr *ifa;
@@ -1511,7 +1517,7 @@
 				return EADDRNOTAVAIL;
 			hostid = IFA_IN6(ifa);
 
-		 	/* prefixlen must be <= 64. */
+			/* prefixlen must be <= 64. */
 			if (64 < iflr->prefixlen)
 				return EINVAL;
 			prefixlen = iflr->prefixlen;
@@ -1680,11 +1686,8 @@
  * and routing table entry.
  */
 static int
-in6_ifinit(ifp, ia, sin6, newhost)
-	struct ifnet *ifp;
-	struct in6_ifaddr *ia;
-	struct sockaddr_in6 *sin6;
-	int newhost;
+in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia,
+    struct sockaddr_in6 *sin6, int newhost)
 {
 	int	error = 0, plen, ifacount = 0;
 	int	s = splimp();
@@ -1696,8 +1699,6 @@
 	 * and to validate the address if necessary.
 	 */
 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
-		if (ifa->ifa_addr == NULL)
-			continue;	/* just for safety */
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		ifacount++;
@@ -1720,8 +1721,12 @@
 
 	/* we could do in(6)_socktrim here, but just omit it at this moment. */
 
-	if (newhost && nd6_need_cache(ifp) != 0) {
-		/* set the rtrequest function to create llinfo */
+	if (newhost) {
+		/*
+		 * set the rtrequest function to create llinfo.  It also
+		 * adjust outgoing interface of the route for the local
+		 * address when called via in6_ifaddloop() below.
+		 */
 		ia->ia_ifa.ifa_rtrequest = nd6_rtrequest;
 	}
 
@@ -1786,11 +1791,8 @@
 }
 
 struct in6_multi_mship *
-in6_joingroup(ifp, addr, errorp, delay)
-	struct ifnet *ifp;
-	struct in6_addr *addr;
-	int *errorp;
-	int delay;
+in6_joingroup(struct ifnet *ifp, struct in6_addr *addr,
+    int *errorp, int delay)
 {
 	struct in6_multi_mship *imm;
 
@@ -1809,8 +1811,7 @@
 }
 
 int
-in6_leavegroup(imm)
-	struct in6_multi_mship *imm;
+in6_leavegroup(struct in6_multi_mship *imm)
 {
 
 	if (imm->i6mm_maddr)
@@ -1823,15 +1824,11 @@
  * Find an IPv6 interface link-local address specific to an interface.
  */
 struct in6_ifaddr *
-in6ifa_ifpforlinklocal(ifp, ignoreflags)
-	struct ifnet *ifp;
-	int ignoreflags;
+in6ifa_ifpforlinklocal(struct ifnet *ifp, int ignoreflags)
 {
 	struct ifaddr *ifa;
 
 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
-		if (ifa->ifa_addr == NULL)
-			continue;	/* just for safety */
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		if (IN6_IS_ADDR_LINKLOCAL(IFA_IN6(ifa))) {
@@ -1850,15 +1847,11 @@
  * find the internet address corresponding to a given interface and address.
  */
 struct in6_ifaddr *
-in6ifa_ifpwithaddr(ifp, addr)
-	struct ifnet *ifp;
-	struct in6_addr *addr;
+in6ifa_ifpwithaddr(struct ifnet *ifp, struct in6_addr *addr)
 {
 	struct ifaddr *ifa;
 
 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
-		if (ifa->ifa_addr == NULL)
-			continue;	/* just for safety */
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		if (IN6_ARE_ADDR_EQUAL(addr, IFA_IN6(ifa)))
@@ -1869,23 +1862,20 @@
 }
 
 /*
- * Convert IP6 address to printable (loggable) representation.
+ * Convert IP6 address to printable (loggable) representation. Caller
+ * has to make sure that ip6buf is at least INET6_ADDRSTRLEN long.
  */
 static char digits[] = "0123456789abcdef";
-static int ip6round = 0;
 char *
-ip6_sprintf(addr)
-	const struct in6_addr *addr;
+ip6_sprintf(char *ip6buf, const struct in6_addr *addr)
 {
-	static char ip6buf[8][48];
 	int i;
 	char *cp;
 	const u_int16_t *a = (const u_int16_t *)addr;
 	const u_int8_t *d;
-	int dcolon = 0;
+	int dcolon = 0, zero = 0;
 
-	ip6round = (ip6round + 1) & 7;
-	cp = ip6buf[ip6round];
+	cp = ip6buf;
 
 	for (i = 0; i < 8; i++) {
 		if (dcolon == 1) {
@@ -1911,20 +1901,33 @@
 			continue;
 		}
 		d = (const u_char *)a;
-		*cp++ = digits[*d >> 4];
-		*cp++ = digits[*d++ & 0xf];
-		*cp++ = digits[*d >> 4];
+		/* Try to eliminate leading zeros in printout like in :0001. */
+		zero = 1;
+		*cp = digits[*d >> 4];
+		if (*cp != '0') {
+			zero = 0;
+			cp++;
+		}
+		*cp = digits[*d++ & 0xf];
+		if (zero == 0 || (*cp != '0')) {
+			zero = 0;
+			cp++;
+		}
+		*cp = digits[*d >> 4];
+		if (zero == 0 || (*cp != '0')) {
+			zero = 0;
+			cp++;
+		}
 		*cp++ = digits[*d & 0xf];
 		*cp++ = ':';
 		a++;
 	}
-	*--cp = 0;
-	return (ip6buf[ip6round]);
+	*--cp = '\0';
+	return (ip6buf);
 }
 
 int
-in6_localaddr(in6)
-	struct in6_addr *in6;
+in6_localaddr(struct in6_addr *in6)
 {
 	struct in6_ifaddr *ia;
 
@@ -1942,8 +1945,7 @@
 }
 
 int
-in6_is_addr_deprecated(sa6)
-	struct sockaddr_in6 *sa6;
+in6_is_addr_deprecated(struct sockaddr_in6 *sa6)
 {
 	struct in6_ifaddr *ia;
 
@@ -1964,8 +1966,7 @@
  * hard coding...
  */
 int
-in6_matchlen(src, dst)
-struct in6_addr *src, *dst;
+in6_matchlen(struct in6_addr *src, struct in6_addr *dst)
 {
 	int match = 0;
 	u_char *s = (u_char *)src, *d = (u_char *)dst;
@@ -1985,9 +1986,7 @@
 
 /* XXX: to be scope conscious */
 int
-in6_are_prefix_equal(p1, p2, len)
-	struct in6_addr *p1, *p2;
-	int len;
+in6_are_prefix_equal(struct in6_addr *p1, struct in6_addr *p2, int len)
 {
 	int bytelen, bitlen;
 
@@ -2012,9 +2011,7 @@
 }
 
 void
-in6_prefixlen2mask(maskp, len)
-	struct in6_addr *maskp;
-	int len;
+in6_prefixlen2mask(struct in6_addr *maskp, int len)
 {
 	u_char maskarray[8] = {0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff};
 	int bytelen, bitlen, i;
@@ -2040,9 +2037,7 @@
  * found, return the first valid address from designated IF.
  */
 struct in6_ifaddr *
-in6_ifawithifp(ifp, dst)
-	struct ifnet *ifp;
-	struct in6_addr *dst;
+in6_ifawithifp(struct ifnet *ifp, struct in6_addr *dst)
 {
 	int dst_scope =	in6_addrscope(dst), blen = -1, tlen;
 	struct ifaddr *ifa;
@@ -2122,8 +2117,7 @@
  * perform DAD when interface becomes IFF_UP.
  */
 void
-in6_if_up(ifp)
-	struct ifnet *ifp;
+in6_if_up(struct ifnet *ifp)
 {
 	struct ifaddr *ifa;
 	struct in6_ifaddr *ia;
@@ -2151,8 +2145,7 @@
 }
 
 int
-in6if_do_dad(ifp)
-	struct ifnet *ifp;
+in6if_do_dad(struct ifnet *ifp)
 {
 	if ((ifp->if_flags & IFF_LOOPBACK) != 0)
 		return (0);
@@ -2192,7 +2185,7 @@
  * to in6_maxmtu.
  */
 void
-in6_setmaxmtu()
+in6_setmaxmtu(void)
 {
 	unsigned long maxmtu = 0;
 	struct ifnet *ifp;
@@ -2220,8 +2213,7 @@
  * consistent, and those really are as of August 2004.
  */
 int
-in6_if2idlen(ifp)
-	struct ifnet *ifp;
+in6_if2idlen(struct ifnet *ifp)
 {
 	switch (ifp->if_type) {
 	case IFT_ETHER:		/* RFC2464 */
@@ -2272,8 +2264,7 @@
 }
 
 void *
-in6_domifattach(ifp)
-	struct ifnet *ifp;
+in6_domifattach(struct ifnet *ifp)
 {
 	struct in6_ifextra *ext;
 
@@ -2295,9 +2286,7 @@
 }
 
 void
-in6_domifdetach(ifp, aux)
-	struct ifnet *ifp;
-	void *aux;
+in6_domifdetach(struct ifnet *ifp, void *aux)
 {
 	struct in6_ifextra *ext = (struct in6_ifextra *)aux;
 
@@ -2315,6 +2304,7 @@
 void
 in6_sin6_2_sin(struct sockaddr_in *sin, struct sockaddr_in6 *sin6)
 {
+
 	bzero(sin, sizeof(*sin));
 	sin->sin_len = sizeof(struct sockaddr_in);
 	sin->sin_family = AF_INET;
Index: nd6_rtr.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/nd6_rtr.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet6/nd6_rtr.c -L sys/netinet6/nd6_rtr.c -u -r1.2 -r1.3
--- sys/netinet6/nd6_rtr.c
+++ sys/netinet6/nd6_rtr.c
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet6/nd6_rtr.c,v 1.26.2.5 2006/03/20 16:23:08 suz Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/nd6_rtr.c,v 1.36 2007/07/05 16:29:40 delphij Exp $	*/
 /*	$KAME: nd6_rtr.c,v 1.111 2001/04/27 01:37:15 jinmei Exp $	*/
 
 /*-
@@ -60,8 +60,6 @@
 #include <netinet/icmp6.h>
 #include <netinet6/scope6_var.h>
 
-#include <net/net_osdep.h>
-
 #define SDL(s)	((struct sockaddr_dl *)s)
 
 static int rtpref __P((struct nd_defrouter *));
@@ -116,9 +114,7 @@
  * Based on RFC 2461
  */
 void
-nd6_rs_input(m, off, icmp6len)
-	struct	mbuf *m;
-	int off, icmp6len;
+nd6_rs_input(struct mbuf *m, int off, int icmp6len)
 {
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
@@ -127,6 +123,7 @@
 	char *lladdr = NULL;
 	int lladdrlen = 0;
 	union nd_opts ndopts;
+	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 
 	/* If I'm not a router, ignore it. */
 	if (ip6_accept_rtadv != 0 || ip6_forwarding != 1)
@@ -136,8 +133,8 @@
 	if (ip6->ip6_hlim != 255) {
 		nd6log((LOG_ERR,
 		    "nd6_rs_input: invalid hlim (%d) from %s to %s on %s\n",
-		    ip6->ip6_hlim, ip6_sprintf(&ip6->ip6_src),
-		    ip6_sprintf(&ip6->ip6_dst), if_name(ifp)));
+		    ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
+		    ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
 		goto bad;
 	}
 
@@ -177,7 +174,7 @@
 		nd6log((LOG_INFO,
 		    "nd6_rs_input: lladdrlen mismatch for %s "
 		    "(if %d, RS packet %d)\n",
-		    ip6_sprintf(&saddr6),
+		    ip6_sprintf(ip6bufs, &saddr6),
 		    ifp->if_addrlen, lladdrlen - 2));
 		goto bad;
 	}
@@ -201,9 +198,7 @@
  * TODO: ND_RA_FLAG_{OTHER,MANAGED} processing
  */
 void
-nd6_ra_input(m, off, icmp6len)
-	struct	mbuf *m;
-	int off, icmp6len;
+nd6_ra_input(struct mbuf *m, int off, int icmp6len)
 {
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct nd_ifinfo *ndi = ND_IFINFO(ifp);
@@ -213,6 +208,7 @@
 	int mcast = 0;
 	union nd_opts ndopts;
 	struct nd_defrouter *dr;
+	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 
 	/*
 	 * We only accept RAs only when
@@ -227,15 +223,15 @@
 	if (ip6->ip6_hlim != 255) {
 		nd6log((LOG_ERR,
 		    "nd6_ra_input: invalid hlim (%d) from %s to %s on %s\n",
-		    ip6->ip6_hlim, ip6_sprintf(&ip6->ip6_src),
-		    ip6_sprintf(&ip6->ip6_dst), if_name(ifp)));
+		    ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
+		    ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
 		goto bad;
 	}
 
 	if (!IN6_IS_ADDR_LINKLOCAL(&saddr6)) {
 		nd6log((LOG_ERR,
 		    "nd6_ra_input: src %s is not link-local\n",
-		    ip6_sprintf(&saddr6)));
+		    ip6_sprintf(ip6bufs, &saddr6)));
 		goto bad;
 	}
 
@@ -327,7 +323,8 @@
 				nd6log((LOG_INFO,
 				    "nd6_ra_input: invalid prefix "
 				    "%s, ignored\n",
-				    ip6_sprintf(&pi->nd_opt_pi_prefix)));
+				    ip6_sprintf(ip6bufs,
+					&pi->nd_opt_pi_prefix)));
 				continue;
 			}
 
@@ -361,7 +358,7 @@
 		if (mtu < IPV6_MMTU) {
 			nd6log((LOG_INFO, "nd6_ra_input: bogus mtu option "
 			    "mtu=%lu sent from %s, ignoring\n",
-			    mtu, ip6_sprintf(&ip6->ip6_src)));
+			    mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src)));
 			goto skip;
 		}
 
@@ -378,7 +375,7 @@
 			nd6log((LOG_INFO, "nd6_ra_input: bogus mtu "
 			    "mtu=%lu sent from %s; "
 			    "exceeds maxmtu %lu, ignoring\n",
-			    mtu, ip6_sprintf(&ip6->ip6_src), maxmtu));
+			    mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src), maxmtu));
 		}
 	}
 
@@ -399,7 +396,7 @@
 	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
 		nd6log((LOG_INFO,
 		    "nd6_ra_input: lladdrlen mismatch for %s "
-		    "(if %d, RA packet %d)\n", ip6_sprintf(&saddr6),
+		    "(if %d, RA packet %d)\n", ip6_sprintf(ip6bufs, &saddr6),
 		    ifp->if_addrlen, lladdrlen - 2));
 		goto bad;
 	}
@@ -430,9 +427,7 @@
 
 /* tell the change to user processes watching the routing socket. */
 static void
-nd6_rtmsg(cmd, rt)
-	int cmd;
-	struct rtentry *rt;
+nd6_rtmsg(int cmd, struct rtentry *rt)
 {
 	struct rt_addrinfo info;
 
@@ -450,8 +445,7 @@
 }
 
 void
-defrouter_addreq(new)
-	struct nd_defrouter *new;
+defrouter_addreq(struct nd_defrouter *new)
 {
 	struct sockaddr_in6 def, mask, gate;
 	struct rtentry *newrt = NULL;
@@ -484,9 +478,7 @@
 }
 
 struct nd_defrouter *
-defrouter_lookup(addr, ifp)
-	struct in6_addr *addr;
-	struct ifnet *ifp;
+defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp)
 {
 	struct nd_defrouter *dr;
 
@@ -505,8 +497,7 @@
  * not be called from anywhere else.
  */
 static void
-defrouter_delreq(dr)
-	struct nd_defrouter *dr;
+defrouter_delreq(struct nd_defrouter *dr)
 {
 	struct sockaddr_in6 def, mask, gate;
 	struct rtentry *oldrt = NULL;
@@ -535,7 +526,7 @@
  * remove all default routes from default router list
  */
 void
-defrouter_reset()
+defrouter_reset(void)
 {
 	struct nd_defrouter *dr;
 
@@ -550,8 +541,7 @@
 }
 
 void
-defrtrlist_del(dr)
-	struct nd_defrouter *dr;
+defrtrlist_del(struct nd_defrouter *dr)
 {
 	struct nd_defrouter *deldr = NULL;
 	struct nd_prefix *pr;
@@ -612,7 +602,7 @@
  * complicated and the possibility of introducing bugs.
  */
 void
-defrouter_select()
+defrouter_select(void)
 {
 	int s = splnet();
 	struct nd_defrouter *dr, *selected_dr = NULL, *installed_dr = NULL;
@@ -728,8 +718,7 @@
 }
 
 static struct nd_defrouter *
-defrtrlist_update(new)
-	struct nd_defrouter *new;
+defrtrlist_update(struct nd_defrouter *new)
 {
 	struct nd_defrouter *dr, *n;
 	int s = splnet();
@@ -814,9 +803,7 @@
 }
 
 static struct nd_pfxrouter *
-pfxrtr_lookup(pr, dr)
-	struct nd_prefix *pr;
-	struct nd_defrouter *dr;
+pfxrtr_lookup(struct nd_prefix *pr, struct nd_defrouter *dr)
 {
 	struct nd_pfxrouter *search;
 
@@ -829,9 +816,7 @@
 }
 
 static void
-pfxrtr_add(pr, dr)
-	struct nd_prefix *pr;
-	struct nd_defrouter *dr;
+pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr)
 {
 	struct nd_pfxrouter *new;
 
@@ -847,16 +832,14 @@
 }
 
 static void
-pfxrtr_del(pfr)
-	struct nd_pfxrouter *pfr;
+pfxrtr_del(struct nd_pfxrouter *pfr)
 {
 	LIST_REMOVE(pfr, pfr_entry);
 	free(pfr, M_IP6NDP);
 }
 
 struct nd_prefix *
-nd6_prefix_lookup(key)
-	struct nd_prefixctl *key;
+nd6_prefix_lookup(struct nd_prefixctl *key)
 {
 	struct nd_prefix *search;
 
@@ -873,14 +856,13 @@
 }
 
 int
-nd6_prelist_add(pr, dr, newp)
-	struct nd_prefixctl *pr;
-	struct nd_prefix **newp;
-	struct nd_defrouter *dr;
+nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr,
+    struct nd_prefix **newp)
 {
 	struct nd_prefix *new = NULL;
 	int error = 0;
 	int i, s;
+	char ip6buf[INET6_ADDRSTRLEN];
 
 	new = (struct nd_prefix *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT);
 	if (new == NULL)
@@ -920,7 +902,7 @@
 		if ((e = nd6_prefix_onlink(new)) != 0) {
 			nd6log((LOG_ERR, "nd6_prelist_add: failed to make "
 			    "the prefix %s/%d on-link on %s (errno=%d)\n",
-			    ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
+			    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
 			    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
 			/* proceed anyway. XXX: is it correct? */
 		}
@@ -933,11 +915,11 @@
 }
 
 void
-prelist_remove(pr)
-	struct nd_prefix *pr;
+prelist_remove(struct nd_prefix *pr)
 {
 	struct nd_pfxrouter *pfr, *next;
 	int e, s;
+	char ip6buf[INET6_ADDRSTRLEN];
 
 	/* make sure to invalidate the prefix until it is really freed. */
 	pr->ndpr_vltime = 0;
@@ -953,7 +935,7 @@
 	    (e = nd6_prefix_offlink(pr)) != 0) {
 		nd6log((LOG_ERR, "prelist_remove: failed to make %s/%d offlink "
 		    "on %s, errno=%d\n",
-		    ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
+		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
 		    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
 		/* what should we do? */
 	}
@@ -979,12 +961,13 @@
 	pfxlist_onlink_check();
 }
 
+/*
+ * dr - may be NULL
+ */
+
 static int
-prelist_update(new, dr, m, mcast)
-	struct nd_prefixctl *new;
-	struct nd_defrouter *dr; /* may be NULL */
-	struct mbuf *m;
-	int mcast;
+prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
+    struct mbuf *m, int mcast)
 {
 	struct in6_ifaddr *ia6 = NULL, *ia6_match = NULL;
 	struct ifaddr *ifa;
@@ -995,6 +978,7 @@
 	int newprefix = 0;
 	int auth;
 	struct in6_addrlifetime lt6_tmp;
+	char ip6buf[INET6_ADDRSTRLEN];
 
 	auth = 0;
 	if (m) {
@@ -1039,7 +1023,8 @@
 				    "prelist_update: failed to make "
 				    "the prefix %s/%d on-link on %s "
 				    "(errno=%d)\n",
-				    ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
+				    ip6_sprintf(ip6buf,
+					    &pr->ndpr_prefix.sin6_addr),
 				    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
 				/* proceed anyway. XXX: is it correct? */
 			}
@@ -1062,7 +1047,7 @@
 			nd6log((LOG_NOTICE, "prelist_update: "
 			    "nd6_prelist_add failed for %s/%d on %s "
 			    "errno=%d, returnpr=%p\n",
-			    ip6_sprintf(&new->ndpr_prefix.sin6_addr),
+			    ip6_sprintf(ip6buf, &new->ndpr_prefix.sin6_addr),
 			    new->ndpr_plen, if_name(new->ndpr_ifp),
 			    error, newpr));
 			goto end; /* we should just give up in this case. */
@@ -1104,7 +1089,7 @@
 		goto end;
 	}
 
- 	/*
+	/*
 	 * 5.5.3 (d).  If the prefix advertised is not equal to the prefix of
 	 * an address configured by stateless autoconfiguration already in the
 	 * list of addresses associated with the interface, and the Valid
@@ -1197,14 +1182,14 @@
 
 		in6_init_address_ltimes(pr, &lt6_tmp);
 
-  		/*
+		/*
 		 * We need to treat lifetimes for temporary addresses
 		 * differently, according to
 		 * draft-ietf-ipv6-privacy-addrs-v2-01.txt 3.3 (1);
 		 * we only update the lifetimes when they are in the maximum
 		 * intervals.
-  		 */
-  		if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
+		 */
+		if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
 			u_int32_t maxvltime, maxpltime;
 
 			if (ip6_temp_valid_lifetime >
@@ -1320,8 +1305,7 @@
  * XXX: lengthy function name...
  */
 static struct nd_pfxrouter *
-find_pfxlist_reachable_router(pr)
-	struct nd_prefix *pr;
+find_pfxlist_reachable_router(struct nd_prefix *pr)
 {
 	struct nd_pfxrouter *pfxrtr;
 	struct rtentry *rt;
@@ -1388,15 +1372,15 @@
 		}
 	}
 	if (pr != NULL || (TAILQ_FIRST(&nd_defrouter) && pfxrtr == NULL)) {
-  		/*
+		/*
 		 * There is at least one prefix that has a reachable router,
 		 * or at least a router which probably does not advertise
 		 * any prefixes.  The latter would be the case when we move
 		 * to a new link where we have a router that does not provide
 		 * prefixes and we configure an address by hand.
-  		 * Detach prefixes which have no reachable advertising
-  		 * router, and attach other prefixes.
-  		 */
+		 * Detach prefixes which have no reachable advertising
+		 * router, and attach other prefixes.
+		 */
 		for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
 			/* XXX: a link-local prefix should never be detached */
 			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
@@ -1440,6 +1424,7 @@
 	 */
 	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
 		int e;
+		char ip6buf[INET6_ADDRSTRLEN];
 
 		if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
 			continue;
@@ -1453,8 +1438,9 @@
 				nd6log((LOG_ERR,
 				    "pfxlist_onlink_check: failed to "
 				    "make %s/%d offlink, errno=%d\n",
-				    ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
-				    pr->ndpr_plen, e));
+				    ip6_sprintf(ip6buf,
+					    &pr->ndpr_prefix.sin6_addr),
+					    pr->ndpr_plen, e));
 			}
 		}
 		if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
@@ -1464,8 +1450,9 @@
 				nd6log((LOG_ERR,
 				    "pfxlist_onlink_check: failed to "
 				    "make %s/%d onlink, errno=%d\n",
-				    ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
-				    pr->ndpr_plen, e));
+				    ip6_sprintf(ip6buf,
+					    &pr->ndpr_prefix.sin6_addr),
+					    pr->ndpr_plen, e));
 			}
 		}
 	}
@@ -1529,8 +1516,7 @@
 }
 
 int
-nd6_prefix_onlink(pr)
-	struct nd_prefix *pr;
+nd6_prefix_onlink(struct nd_prefix *pr)
 {
 	struct ifaddr *ifa;
 	struct ifnet *ifp = pr->ndpr_ifp;
@@ -1539,12 +1525,14 @@
 	u_long rtflags;
 	int error = 0;
 	struct rtentry *rt = NULL;
+	char ip6buf[INET6_ADDRSTRLEN];
 
 	/* sanity check */
 	if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
 		nd6log((LOG_ERR,
 		    "nd6_prefix_onlink: %s/%d is already on-link\n",
-		    ip6_sprintf(&pr->ndpr_prefix.sin6_addr), pr->ndpr_plen));
+		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
+		    pr->ndpr_plen));
 		return (EEXIST);
 	}
 
@@ -1592,7 +1580,7 @@
 		nd6log((LOG_NOTICE,
 		    "nd6_prefix_onlink: failed to find any ifaddr"
 		    " to add route for a prefix(%s/%d) on %s\n",
-		    ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
+		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
 		    pr->ndpr_plen, if_name(ifp)));
 		return (0);
 	}
@@ -1621,13 +1609,14 @@
 			nd6_rtmsg(RTM_ADD, rt);
 		pr->ndpr_stateflags |= NDPRF_ONLINK;
 	} else {
+		char ip6bufg[INET6_ADDRSTRLEN], ip6bufm[INET6_ADDRSTRLEN];
 		nd6log((LOG_ERR, "nd6_prefix_onlink: failed to add route for a"
 		    " prefix (%s/%d) on %s, gw=%s, mask=%s, flags=%lx "
 		    "errno = %d\n",
-		    ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
+		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
 		    pr->ndpr_plen, if_name(ifp),
-		    ip6_sprintf(&((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr),
-		    ip6_sprintf(&mask6.sin6_addr), rtflags, error));
+		    ip6_sprintf(ip6bufg, &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr),
+		    ip6_sprintf(ip6bufm, &mask6.sin6_addr), rtflags, error));
 	}
 
 	if (rt != NULL) {
@@ -1640,20 +1629,21 @@
 }
 
 int
-nd6_prefix_offlink(pr)
-	struct nd_prefix *pr;
+nd6_prefix_offlink(struct nd_prefix *pr)
 {
 	int error = 0;
 	struct ifnet *ifp = pr->ndpr_ifp;
 	struct nd_prefix *opr;
 	struct sockaddr_in6 sa6, mask6;
 	struct rtentry *rt = NULL;
+	char ip6buf[INET6_ADDRSTRLEN];
 
 	/* sanity check */
 	if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
 		nd6log((LOG_ERR,
 		    "nd6_prefix_offlink: %s/%d is already off-link\n",
-		    ip6_sprintf(&pr->ndpr_prefix.sin6_addr), pr->ndpr_plen));
+		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
+		    pr->ndpr_plen));
 		return (EEXIST);
 	}
 
@@ -1706,7 +1696,8 @@
 					    "nd6_prefix_offlink: failed to "
 					    "recover a prefix %s/%d from %s "
 					    "to %s (errno = %d)\n",
-					    ip6_sprintf(&opr->ndpr_prefix.sin6_addr),
+					    ip6_sprintf(ip6buf,
+						&opr->ndpr_prefix.sin6_addr),
 					    opr->ndpr_plen, if_name(ifp),
 					    if_name(opr->ndpr_ifp), e));
 				}
@@ -1717,8 +1708,8 @@
 		nd6log((LOG_ERR,
 		    "nd6_prefix_offlink: failed to delete route: "
 		    "%s/%d on %s (errno = %d)\n",
-		    ip6_sprintf(&sa6.sin6_addr), pr->ndpr_plen, if_name(ifp),
-		    error));
+		    ip6_sprintf(ip6buf, &sa6.sin6_addr), pr->ndpr_plen,
+		    if_name(ifp), error));
 	}
 
 	if (rt != NULL) {
@@ -1729,9 +1720,7 @@
 }
 
 static struct in6_ifaddr *
-in6_ifadd(pr, mcast)
-	struct nd_prefixctl *pr;
-	int mcast;
+in6_ifadd(struct nd_prefixctl *pr, int mcast)
 {
 	struct ifnet *ifp = pr->ndpr_ifp;
 	struct ifaddr *ifa;
@@ -1741,6 +1730,7 @@
 	struct in6_addr mask;
 	int prefixlen = pr->ndpr_plen;
 	int updateflags;
+	char ip6buf[INET6_ADDRSTRLEN];
 
 	in6_prefixlen2mask(&mask, prefixlen);
 
@@ -1820,7 +1810,7 @@
 
 	ifra.ifra_flags |= IN6_IFF_AUTOCONF; /* obey autoconf */
 
-  	/*
+	/*
 	 * Make sure that we do not have this address already.  This should
 	 * usually not happen, but we can still see this case, e.g., if we
 	 * have manually configured the exact address to be configured.
@@ -1828,7 +1818,7 @@
 	if (in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr) != NULL) {
 		/* this should be rare enough to make an explicit log */
 		log(LOG_INFO, "in6_ifadd: %s is already configured\n",
-		    ip6_sprintf(&ifra.ifra_addr.sin6_addr));
+		    ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr));
 		return (NULL);
 	}
 
@@ -1844,8 +1834,8 @@
 	if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0) {
 		nd6log((LOG_ERR,
 		    "in6_ifadd: failed to make ifaddr %s on %s (errno=%d)\n",
-		    ip6_sprintf(&ifra.ifra_addr.sin6_addr), if_name(ifp),
-		    error));
+		    ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr),
+		    if_name(ifp), error));
 		return (NULL);	/* ifaddr must not have been allocated. */
 	}
 
@@ -1854,10 +1844,11 @@
 	return (ia);		/* this is always non-NULL */
 }
 
+/*
+ * ia0 - corresponding public address
+ */
 int
-in6_tmpifadd(ia0, forcegen, delay)
-	const struct in6_ifaddr *ia0; /* corresponding public address */
-	int forcegen, delay;
+in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay)
 {
 	struct ifnet *ifp = ia0->ia_ifa.ifa_ifp;
 	struct in6_ifaddr *newia, *ia;
@@ -1891,7 +1882,7 @@
 	ifra.ifra_addr.sin6_addr.s6_addr32[3] |=
 	    (randid[1] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[3]));
 
-  	/*
+	/*
 	 * in6_get_tmpifid() quite likely provided a unique interface ID.
 	 * However, we may still have a chance to see collision, because
 	 * there may be a time lag between generation of the ID and generation
@@ -1900,7 +1891,7 @@
 	for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
 		if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
 		    &ifra.ifra_addr.sin6_addr)) {
-  			if (trylimit-- == 0) {
+			if (trylimit-- == 0) {
 				/*
 				 * Give up.  Something strange should have
 				 * happened.
@@ -2024,9 +2015,7 @@
  * it shouldn't be called when acting as a router.
  */
 void
-rt6_flush(gateway, ifp)
-	struct in6_addr *gateway;
-	struct ifnet *ifp;
+rt6_flush(struct in6_addr *gateway, struct ifnet *ifp)
 {
 	struct radix_node_head *rnh = rt_tables[AF_INET6];
 	int s = splnet();
@@ -2044,9 +2033,7 @@
 }
 
 static int
-rt6_deleteroute(rn, arg)
-	struct radix_node *rn;
-	void *arg;
+rt6_deleteroute(struct radix_node *rn, void *arg)
 {
 #define SIN6(s)	((struct sockaddr_in6 *)s)
 	struct rtentry *rt = (struct rtentry *)rn;
@@ -2080,8 +2067,7 @@
 }
 
 int
-nd6_setdefaultiface(ifindex)
-	int ifindex;
+nd6_setdefaultiface(int ifindex)
 {
 	int error = 0;
 
Index: ip6_forward.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/ip6_forward.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet6/ip6_forward.c -L sys/netinet6/ip6_forward.c -u -r1.1.1.1 -r1.2
--- sys/netinet6/ip6_forward.c
+++ sys/netinet6/ip6_forward.c
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet6/ip6_forward.c,v 1.28.2.2 2005/11/04 20:26:15 ume Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/ip6_forward.c,v 1.40 2007/07/05 16:29:40 delphij Exp $	*/
 /*	$KAME: ip6_forward.c,v 1.69 2001/05/17 03:48:30 itojun Exp $	*/
 
 /*-
@@ -30,7 +30,6 @@
  * SUCH DAMAGE.
  */
 
-#include "opt_ip6fw.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
@@ -67,23 +66,10 @@
 #include <netinet/in_pcb.h>
 
 #ifdef IPSEC
-#include <netinet6/ipsec.h>
-#ifdef INET6
-#include <netinet6/ipsec6.h>
-#endif
-#include <netkey/key.h>
-#endif /* IPSEC */
-
-#ifdef FAST_IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #include <netipsec/key.h>
-#define	IPSEC
-#endif /* FAST_IPSEC */
-
-#include <netinet6/ip6_fw.h>
-
-#include <net/net_osdep.h>
+#endif /* IPSEC */
 
 #include <netinet6/ip6protosw.h>
 
@@ -101,11 +87,8 @@
  * protocol deal with that.
  *
  */
-
 void
-ip6_forward(m, srcrt)
-	struct mbuf *m;
-	int srcrt;
+ip6_forward(struct mbuf *m, int srcrt)
 {
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct sockaddr_in6 *dst = NULL;
@@ -119,6 +102,9 @@
 	struct secpolicy *sp = NULL;
 	int ipsecrt = 0;
 #endif
+	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
+
+	GIANT_REQUIRED; /* XXX bz: ip6_forward_rt */
 
 #ifdef IPSEC
 	/*
@@ -129,9 +115,7 @@
 	 * before forwarding packet actually.
 	 */
 	if (ipsec6_in_reject(m, NULL)) {
-#if !defined(FAST_IPSEC)
 		ipsec6stat.in_polvio++;
-#endif
 		m_freem(m);
 		return;
 	}
@@ -153,8 +137,8 @@
 			log(LOG_DEBUG,
 			    "cannot forward "
 			    "from %s to %s nxt %d received on %s\n",
-			    ip6_sprintf(&ip6->ip6_src),
-			    ip6_sprintf(&ip6->ip6_dst),
+			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
+			    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 			    ip6->ip6_nxt,
 			    if_name(m->m_pkthdr.rcvif));
 		}
@@ -190,7 +174,7 @@
 
 #ifdef IPSEC
 	/* get a security policy for this packet */
-	sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
+	sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
 	    IP_FORWARDING, &error);
 	if (sp == NULL) {
 		ipsec6stat.out_inval++;
@@ -216,7 +200,7 @@
 		 */
 		ipsec6stat.out_polvio++;
 		ip6stat.ip6s_cantforward++;
-		key_freesp(sp);
+		KEY_FREESP(&sp);
 		if (mcopy) {
 #if 0
 			/* XXX: what icmp ? */
@@ -230,7 +214,7 @@
 	case IPSEC_POLICY_BYPASS:
 	case IPSEC_POLICY_NONE:
 		/* no need to do IPsec. */
-		key_freesp(sp);
+		KEY_FREESP(&sp);
 		goto skip_ipsec;
 
 	case IPSEC_POLICY_IPSEC:
@@ -238,7 +222,7 @@
 			/* XXX should be panic ? */
 			printf("ip6_forward: No IPsec request specified.\n");
 			ip6stat.ip6s_cantforward++;
-			key_freesp(sp);
+			KEY_FREESP(&sp);
 			if (mcopy) {
 #if 0
 				/* XXX: what icmp ? */
@@ -256,7 +240,7 @@
 	default:
 		/* should be panic ?? */
 		printf("ip6_forward: Invalid policy found. %d\n", sp->policy);
-		key_freesp(sp);
+		KEY_FREESP(&sp);
 		goto skip_ipsec;
 	}
 
@@ -303,7 +287,7 @@
 	error = ipsec6_output_tunnel(&state, sp, 0);
 
 	m = state.m;
-	key_freesp(sp);
+	KEY_FREESP(&sp);
 
 	if (error) {
 		/* mbuf is already reclaimed in ipsec6_output_tunnel. */
@@ -331,9 +315,18 @@
 		}
 		m_freem(m);
 		return;
+	} else {
+		/*
+		 * In the FAST IPSec case we have already
+		 * re-injected the packet and it has been freed
+		 * by the ipsec_done() function.  So, just clean
+		 * up after ourselves.
+		 */
+		m = NULL;
+		goto freecopy;
 	}
 
-	if (ip6 != mtod(m, struct ip6_hdr *)) {
+	if ((m != NULL) && (ip6 != mtod(m, struct ip6_hdr *)) ){
 		/*
 		 * now tunnel mode headers are added.  we are originating
 		 * packet instead of forwarding the packet.
@@ -392,7 +385,7 @@
 		dst->sin6_family = AF_INET6;
 		dst->sin6_addr = ip6->ip6_dst;
 
-  		rtalloc((struct route *)&ip6_forward_rt);
+		rtalloc((struct route *)&ip6_forward_rt);
 		if (ip6_forward_rt.ro_rt == 0) {
 			ip6stat.ip6s_noroute++;
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_noroute);
@@ -446,8 +439,8 @@
 			log(LOG_DEBUG,
 			    "cannot forward "
 			    "src %s, dst %s, nxt %d, rcvif %s, outif %s\n",
-			    ip6_sprintf(&ip6->ip6_src),
-			    ip6_sprintf(&ip6->ip6_dst),
+			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
+			    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 			    ip6->ip6_nxt,
 			    if_name(m->m_pkthdr.rcvif), if_name(rt->rt_ifp));
 		}
@@ -483,7 +476,7 @@
 			struct secpolicy *sp;
 			int ipsecerror;
 			size_t ipsechdrsiz;
-#endif
+#endif /* IPSEC */
 
 			mtu = IN6_LINKMTU(rt->rt_ifp);
 #ifdef IPSEC
@@ -494,7 +487,7 @@
 			 * case, as we have the outgoing interface for
 			 * encapsulated packet as "rt->rt_ifp".
 			 */
-			sp = ipsec6_getpolicybyaddr(mcopy, IPSEC_DIR_OUTBOUND,
+			sp = ipsec_getpolicybyaddr(mcopy, IPSEC_DIR_OUTBOUND,
 				IP_FORWARDING, &ipsecerror);
 			if (sp) {
 				ipsechdrsiz = ipsec6_hdrsiz(mcopy,
@@ -509,7 +502,7 @@
 			 */
 			if (mtu < IPV6_MMTU)
 				mtu = IPV6_MMTU;
-#endif
+#endif /* IPSEC */
 			icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, mtu);
 		}
 		m_freem(m);
@@ -528,10 +521,10 @@
 	 * Also, don't send redirect if forwarding using a route
 	 * modified by a redirect.
 	 */
-	if (rt->rt_ifp == m->m_pkthdr.rcvif && !srcrt &&
+	if (ip6_sendredirects && rt->rt_ifp == m->m_pkthdr.rcvif && !srcrt &&
 #ifdef IPSEC
 	    !ipsecrt &&
-#endif
+#endif /* IPSEC */
 	    (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0) {
 		if ((rt->rt_ifp->if_flags & IFF_POINTOPOINT) != 0) {
 			/*
@@ -553,20 +546,6 @@
 	}
 
 	/*
-	 * Check with the firewall...
-	 */
-	if (ip6_fw_enable && ip6_fw_chk_ptr) {
-		u_short port = 0;
-		/* If ipfw says divert, we have to just drop packet */
-		if ((*ip6_fw_chk_ptr)(&ip6, rt->rt_ifp, &port, &m)) {
-			m_freem(m);
-			goto freecopy;
-		}
-		if (!m)
-			goto freecopy;
-	}
-
-	/*
 	 * Fake scoped addresses. Note that even link-local source or
 	 * destinaion can appear, if the originating node just sends the
 	 * packet to us (without address resolution for the destination).
@@ -592,8 +571,8 @@
 		{
 			printf("ip6_forward: outgoing interface is loopback. "
 			       "src %s, dst %s, nxt %d, rcvif %s, outif %s\n",
-			       ip6_sprintf(&ip6->ip6_src),
-			       ip6_sprintf(&ip6->ip6_dst),
+			       ip6_sprintf(ip6bufs, &ip6->ip6_src),
+			       ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 			       ip6->ip6_nxt, if_name(m->m_pkthdr.rcvif),
 			       if_name(rt->rt_ifp));
 		}
@@ -611,7 +590,7 @@
 	in6_clearscope(&ip6->ip6_dst);
 
 	/* Jump over all PFIL processing if hooks are not active. */
-	if (inet6_pfil_hook.ph_busy_count == -1)
+	if (!PFIL_HOOKED(&inet6_pfil_hook))
 		goto pass;
 
 	/* Run through list of hooks for output packets. */
Index: in6_cksum.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/in6_cksum.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet6/in6_cksum.c -L sys/netinet6/in6_cksum.c -u -r1.1.1.1 -r1.2
--- sys/netinet6/in6_cksum.c
+++ sys/netinet6/in6_cksum.c
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet6/in6_cksum.c,v 1.10.2.1 2005/11/04 20:26:15 ume Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/in6_cksum.c,v 1.16 2007/07/05 16:23:47 delphij Exp $	*/
 /*	$KAME: in6_cksum.c,v 1.10 2000/12/03 00:53:59 itojun Exp $	*/
 
 /*-
@@ -68,8 +68,6 @@
 #include <netinet/ip6.h>
 #include <netinet6/scope6_var.h>
 
-#include <net/net_osdep.h>
-
 /*
  * Checksum routine for Internet Protocol family headers (Portable Version).
  *
@@ -86,12 +84,8 @@
  * len is a total length of a transport segment.
  * (e.g. TCP header + TCP payload)
  */
-
 int
-in6_cksum(m, nxt, off, len)
-	struct mbuf *m;
-	u_int8_t nxt;
-	u_int32_t off, len;
+in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len)
 {
 	u_int16_t *w;
 	int sum = 0;
@@ -105,7 +99,7 @@
 			u_int32_t	ph_len;
 			u_int8_t	ph_zero[3];
 			u_int8_t	ph_nxt;
-		} ph __packed;
+		} __packed ph;
 	} uph;
 	union {
 		u_int8_t	c[2];
@@ -156,7 +150,7 @@
 	/*
 	 * Secondly calculate a summary of the first mbuf excluding offset.
 	 */
-	while (m != NULL && off > 0) {
+	while (off > 0) {
 		if (m->m_len <= off)
 			off -= m->m_len;
 		else
Index: ip6_input.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/ip6_input.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet6/ip6_input.c -L sys/netinet6/ip6_input.c -u -r1.1.1.2 -r1.2
--- sys/netinet6/ip6_input.c
+++ sys/netinet6/ip6_input.c
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet6/ip6_input.c,v 1.81.2.4 2006/01/31 16:36:11 ume Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/ip6_input.c,v 1.95 2007/07/05 16:29:40 delphij Exp $	*/
 /*	$KAME: ip6_input.c,v 1.259 2002/01/21 04:58:09 jinmei Exp $	*/
 
 /*-
@@ -61,7 +61,6 @@
  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
  */
 
-#include "opt_ip6fw.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
@@ -103,24 +102,13 @@
 #include <netinet6/nd6.h>
 
 #ifdef IPSEC
-#include <netinet6/ipsec.h>
-#ifdef INET6
-#include <netinet6/ipsec6.h>
-#endif
-#endif
-
-#ifdef FAST_IPSEC
 #include <netipsec/ipsec.h>
+#include <netinet6/ip6_ipsec.h>
 #include <netipsec/ipsec6.h>
-#define	IPSEC
-#endif /* FAST_IPSEC */
-
-#include <netinet6/ip6_fw.h>
+#endif /* IPSEC */
 
 #include <netinet6/ip6protosw.h>
 
-#include <net/net_osdep.h>
-
 extern struct domain inet6domain;
 
 u_char ip6_protox[IPPROTO_MAX];
@@ -138,11 +126,6 @@
 
 struct pfil_head inet6_pfil_hook;
 
-/* firewall hooks */
-ip6_fw_chk_t *ip6_fw_chk_ptr;
-ip6_fw_ctl_t *ip6_fw_ctl_ptr;
-int ip6_fw_enable = 1;
-
 struct ip6stat ip6stat;
 
 static void ip6_init2 __P((void *));
@@ -157,7 +140,7 @@
  * All protocols not implemented in kernel go to raw IP6 protocol handler.
  */
 void
-ip6_init()
+ip6_init(void)
 {
 	struct ip6protosw *pr;
 	int i;
@@ -204,8 +187,7 @@
 }
 
 static void
-ip6_init2(dummy)
-	void *dummy;
+ip6_init2(void *dummy)
 {
 
 	/* nd6_timer_init */
@@ -227,8 +209,7 @@
 extern struct	route_in6 ip6_forward_rt;
 
 void
-ip6_input(m)
-	struct mbuf *m;
+ip6_input(struct mbuf *m)
 {
 	struct ip6_hdr *ip6;
 	int off = sizeof(struct ip6_hdr), nest;
@@ -240,16 +221,18 @@
 	int srcrt = 0;
 
 	GIANT_REQUIRED;			/* XXX for now */
+
 #ifdef IPSEC
 	/*
 	 * should the inner packet be considered authentic?
 	 * see comment in ah4_input().
+	 * NB: m cannot be NULL when passed to the input routine
 	 */
-	if (m) {
-		m->m_flags &= ~M_AUTHIPHDR;
-		m->m_flags &= ~M_AUTHIPDGM;
-	}
-#endif
+
+	m->m_flags &= ~M_AUTHIPHDR;
+	m->m_flags &= ~M_AUTHIPDGM;
+
+#endif /* IPSEC */
 
 	/*
 	 * make sure we don't have onion peering information into m_tag.
@@ -414,7 +397,7 @@
 	odst = ip6->ip6_dst;
 
 	/* Jump over all PFIL processing if hooks are not active. */
-	if (inet6_pfil_hook.ph_busy_count == -1)
+	if (!PFIL_HOOKED(&inet6_pfil_hook))
 		goto passin;
 
 	if (pfil_run_hooks(&inet6_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN, NULL))
@@ -426,21 +409,6 @@
 
 passin:
 	/*
-	 * Check with the firewall...
-	 */
-	if (ip6_fw_enable && ip6_fw_chk_ptr) {
-		u_short port = 0;
-		/* If ipfw says divert, we have to just drop packet */
-		/* use port as a dummy argument */
-		if ((*ip6_fw_chk_ptr)(&ip6, NULL, &port, &m)) {
-			m_freem(m);
-			m = NULL;
-		}
-		if (!m)
-			return;
-	}
-
-	/*
 	 * Disambiguate address scope zones (if there is ambiguity).
 	 * We first make sure that the original source or destination address
 	 * is not in our internal form for scoped addresses.  Such addresses
@@ -464,7 +432,7 @@
 	 * Multicast check
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
-	  	struct in6_multi *in6m = 0;
+		struct in6_multi *in6m = 0;
 
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mcast);
 		/*
@@ -573,11 +541,13 @@
 			ia6->ia_ifa.if_ibytes += m->m_pkthdr.len;
 			goto hbhcheck;
 		} else {
+			char ip6bufs[INET6_ADDRSTRLEN];
+			char ip6bufd[INET6_ADDRSTRLEN];
 			/* address is not ready, so discard the packet. */
 			nd6log((LOG_INFO,
 			    "ip6_input: packet to an unready address %s->%s\n",
-			    ip6_sprintf(&ip6->ip6_src),
-			    ip6_sprintf(&ip6->ip6_dst)));
+			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
+			    ip6_sprintf(ip6bufd, &ip6->ip6_dst)));
 
 			goto bad;
 		}
@@ -680,11 +650,25 @@
 		nxt = hbh->ip6h_nxt;
 
 		/*
-		 * accept the packet if a router alert option is included
-		 * and we act as an IPv6 router.
+		 * If we are acting as a router and the packet contains a
+		 * router alert option, see if we know the option value.
+		 * Currently, we only support the option value for MLD, in which
+		 * case we should pass the packet to the multicast routing
+		 * daemon.
 		 */
-		if (rtalert != ~0 && ip6_forwarding)
-			ours = 1;
+		if (rtalert != ~0 && ip6_forwarding) {
+			switch (rtalert) {
+			case IP6OPT_RTALERT_MLD:
+				ours = 1;
+				break;
+			default:
+				/*
+				 * RFC2711 requires unrecognized values must be
+				 * silently ignored.
+				 */
+				break;
+			}
+		}
 	} else
 		nxt = ip6->ip6_nxt;
 
@@ -719,7 +703,8 @@
 		 * ip6_mforward() returns a non-zero value, the packet
 		 * must be discarded, else it may be accepted below.
 		 */
-		if (ip6_mrouter && ip6_mforward(ip6, m->m_pkthdr.rcvif, m)) {
+		if (ip6_mrouter && ip6_mforward &&
+		    ip6_mforward(ip6, m->m_pkthdr.rcvif, m)) {
 			ip6stat.ip6s_cantforward++;
 			m_freem(m);
 			return;
@@ -780,12 +765,9 @@
 		 * note that we do not visit this with protocols with pcb layer
 		 * code - like udp/tcp/raw ip.
 		 */
-		if ((inet6sw[ip6_protox[nxt]].pr_flags & PR_LASTHDR) != 0 &&
-		    ipsec6_in_reject(m, NULL)) {
-			ipsec6stat.in_polvio++;
+		if (ip6_ipsec_input(m, nxt))
 			goto bad;
-		}
-#endif
+#endif /* IPSEC */
 		nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt);
 	}
 	return;
@@ -798,9 +780,7 @@
  * XXX backward compatibility wrapper
  */
 static struct ip6aux *
-ip6_setdstifaddr(m, ia6)
-	struct mbuf *m;
-	struct in6_ifaddr *ia6;
+ip6_setdstifaddr(struct mbuf *m, struct in6_ifaddr *ia6)
 {
 	struct ip6aux *ip6a;
 
@@ -811,8 +791,7 @@
 }
 
 struct in6_ifaddr *
-ip6_getdstifaddr(m)
-	struct mbuf *m;
+ip6_getdstifaddr(struct mbuf *m)
 {
 	struct ip6aux *ip6a;
 
@@ -826,13 +805,12 @@
 /*
  * Hop-by-Hop options header processing. If a valid jumbo payload option is
  * included, the real payload length will be stored in plenp.
+ *
+ * rtalertp - XXX: should be stored more smart way
  */
 static int
-ip6_hopopts_input(plenp, rtalertp, mp, offp)
-	u_int32_t *plenp;
-	u_int32_t *rtalertp;	/* XXX: should be stored more smart way */
-	struct mbuf **mp;
-	int *offp;
+ip6_hopopts_input(u_int32_t *plenp, u_int32_t *rtalertp,
+    struct mbuf **mp, int *offp)
 {
 	struct mbuf *m = *mp;
 	int off = *offp, hbhlen;
@@ -886,12 +864,8 @@
  * opthead + hbhlen is located in continuous memory region.
  */
 int
-ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp)
-	struct mbuf *m;
-	u_int8_t *opthead;
-	int hbhlen;
-	u_int32_t *rtalertp;
-	u_int32_t *plenp;
+ip6_process_hopopts(struct mbuf *m, u_int8_t *opthead, int hbhlen,
+    u_int32_t *rtalertp, u_int32_t *plenp)
 {
 	struct ip6_hdr *ip6;
 	int optlen = 0;
@@ -1023,10 +997,7 @@
  * is not continuous in order to return an ICMPv6 error.
  */
 int
-ip6_unknown_opt(optp, m, off)
-	u_int8_t *optp;
-	struct mbuf *m;
-	int off;
+ip6_unknown_opt(u_int8_t *optp, struct mbuf *m, int off)
 {
 	struct ip6_hdr *ip6;
 
@@ -1067,9 +1038,7 @@
  * very first mbuf on the mbuf chain.
  */
 void
-ip6_savecontrol(in6p, m, mp)
-	struct inpcb *in6p;
-	struct mbuf *m, **mp;
+ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
 {
 #define IS2292(x, y)	((in6p->in6p_flags & IN6P_RFC2292) ? (x) : (y))
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
@@ -1273,7 +1242,7 @@
 
 			default:
 				/*
-			 	 * other cases have been filtered in the above.
+				 * other cases have been filtered in the above.
 				 * none will visit this case.  here we supply
 				 * the code just in case (nxt overwritten or
 				 * other cases).
@@ -1302,10 +1271,7 @@
 }
 
 void
-ip6_notify_pmtu(in6p, dst, mtu)
-	struct inpcb *in6p;
-	struct sockaddr_in6 *dst;
-	u_int32_t *mtu;
+ip6_notify_pmtu(struct inpcb *in6p, struct sockaddr_in6 *dst, u_int32_t *mtu)
 {
 	struct socket *so;
 	struct mbuf *m_mtu;
@@ -1347,10 +1313,7 @@
  * contains the result, or NULL on error.
  */
 static struct mbuf *
-ip6_pullexthdr(m, off, nxt)
-	struct mbuf *m;
-	size_t off;
-	int nxt;
+ip6_pullexthdr(struct mbuf *m, size_t off, int nxt)
 {
 	struct ip6_ext ip6e;
 	size_t elen;
@@ -1410,9 +1373,7 @@
  * we develop `neater' mechanism to process extension headers.
  */
 char *
-ip6_get_prevhdr(m, off)
-	struct mbuf *m;
-	int off;
+ip6_get_prevhdr(struct mbuf *m, int off)
 {
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 
@@ -1451,11 +1412,7 @@
  * get next header offset.  m will be retained.
  */
 int
-ip6_nexthdr(m, off, proto, nxtp)
-	struct mbuf *m;
-	int off;
-	int proto;
-	int *nxtp;
+ip6_nexthdr(struct mbuf *m, int off, int proto, int *nxtp)
 {
 	struct ip6_hdr ip6;
 	struct ip6_ext ip6e;
@@ -1530,11 +1487,7 @@
  * get offset for the last header in the chain.  m will be kept untainted.
  */
 int
-ip6_lasthdr(m, off, proto, nxtp)
-	struct mbuf *m;
-	int off;
-	int proto;
-	int *nxtp;
+ip6_lasthdr(struct mbuf *m, int off, int proto, int *nxtp)
 {
 	int newoff;
 	int nxt;
@@ -1558,8 +1511,7 @@
 }
 
 struct ip6aux *
-ip6_addaux(m)
-	struct mbuf *m;
+ip6_addaux(struct mbuf *m)
 {
 	struct m_tag *mtag;
 
@@ -1576,8 +1528,7 @@
 }
 
 struct ip6aux *
-ip6_findaux(m)
-	struct mbuf *m;
+ip6_findaux(struct mbuf *m)
 {
 	struct m_tag *mtag;
 
@@ -1586,8 +1537,7 @@
 }
 
 void
-ip6_delaux(m)
-	struct mbuf *m;
+ip6_delaux(struct mbuf *m)
 {
 	struct m_tag *mtag;
 
Index: ip6_id.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/ip6_id.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet6/ip6_id.c -L sys/netinet6/ip6_id.c -u -r1.1.1.1 -r1.2
--- sys/netinet6/ip6_id.c
+++ sys/netinet6/ip6_id.c
@@ -1,6 +1,6 @@
 /*	$KAME: ip6_id.c,v 1.13 2003/09/16 09:11:19 itojun Exp $	*/
 /*	$OpenBSD: ip_id.c,v 1.6 2002/03/15 18:19:52 millert Exp $	*/
-/* $FreeBSD: src/sys/netinet6/ip6_id.c,v 1.7 2005/01/07 02:30:34 imp Exp $ */
+/* $FreeBSD: src/sys/netinet6/ip6_id.c,v 1.8 2007/07/05 16:23:47 delphij Exp $ */
 
 /*-
  * Copyright (C) 2003 WIDE Project.
@@ -152,7 +152,6 @@
  * Do a fast modular exponation, returned value will be in the range
  * of 0 - (mod-1)
  */
-
 static u_int32_t
 pmod(u_int32_t gen, u_int32_t expo, u_int32_t mod)
 {
Index: in6_gif.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/in6_gif.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet6/in6_gif.c -L sys/netinet6/in6_gif.c -u -r1.1.1.2 -r1.2
--- sys/netinet6/in6_gif.c
+++ sys/netinet6/in6_gif.c
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet6/in6_gif.c,v 1.22.2.2 2006/01/31 15:56:47 glebius Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/in6_gif.c,v 1.29 2007/07/05 16:29:39 delphij Exp $	*/
 /*	$KAME: in6_gif.c,v 1.49 2001/05/14 14:02:17 itojun Exp $	*/
 
 /*-
@@ -68,8 +68,6 @@
 
 #include <net/if_gif.h>
 
-#include <net/net_osdep.h>
-
 static int gif_validate6(const struct ip6_hdr *, struct gif_softc *,
 			 struct ifnet *);
 
@@ -83,10 +81,9 @@
 };
 
 int
-in6_gif_output(ifp, family, m)
-	struct ifnet *ifp;
-	int family; /* family of the packet to be encapsulate. */
-	struct mbuf *m;
+in6_gif_output(struct ifnet *ifp,
+    int family,			/* family of the packet to be encapsulate */
+    struct mbuf *m)
 {
 	struct gif_softc *sc = ifp->if_softc;
 	struct sockaddr_in6 *dst = (struct sockaddr_in6 *)&sc->gif_ro6.ro_dst;
@@ -139,16 +136,16 @@
 	    }
 #endif
 	case AF_LINK:
- 		proto = IPPROTO_ETHERIP;
- 		eiphdr.eip_ver = ETHERIP_VERSION & ETHERIP_VER_VERS_MASK;
- 		eiphdr.eip_pad = 0;
- 		/* prepend Ethernet-in-IP header */
- 		M_PREPEND(m, sizeof(struct etherip_header), M_DONTWAIT);
- 		if (m && m->m_len < sizeof(struct etherip_header))
- 			m = m_pullup(m, sizeof(struct etherip_header));
- 		if (m == NULL)
- 			return ENOBUFS;
- 		bcopy(&eiphdr, mtod(m, struct etherip_header *),
+		proto = IPPROTO_ETHERIP;
+		eiphdr.eip_ver = ETHERIP_VERSION & ETHERIP_VER_VERS_MASK;
+		eiphdr.eip_pad = 0;
+		/* prepend Ethernet-in-IP header */
+		M_PREPEND(m, sizeof(struct etherip_header), M_DONTWAIT);
+		if (m && m->m_len < sizeof(struct etherip_header))
+			m = m_pullup(m, sizeof(struct etherip_header));
+		if (m == NULL)
+			return ENOBUFS;
+		bcopy(&eiphdr, mtod(m, struct etherip_header *),
 		    sizeof(struct etherip_header));
 		break;
 
@@ -245,9 +242,7 @@
 }
 
 int
-in6_gif_input(mp, offp, proto)
-	struct mbuf **mp;
-	int *offp, proto;
+in6_gif_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp;
 	struct ifnet *gifp = NULL;
@@ -318,9 +313,9 @@
 		break;
 	    }
 #endif
- 	case IPPROTO_ETHERIP:
- 		af = AF_LINK;
- 		break;	
+	case IPPROTO_ETHERIP:
+		af = AF_LINK;
+		break;
 
 	default:
 		ip6stat.ip6s_nogif++;
@@ -336,10 +331,8 @@
  * validate outer address.
  */
 static int
-gif_validate6(ip6, sc, ifp)
-	const struct ip6_hdr *ip6;
-	struct gif_softc *sc;
-	struct ifnet *ifp;
+gif_validate6(const struct ip6_hdr *ip6, struct gif_softc *sc,
+    struct ifnet *ifp)
 {
 	struct sockaddr_in6 *src, *dst;
 
@@ -371,9 +364,10 @@
 		rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL);
 		if (!rt || rt->rt_ifp != ifp) {
 #if 0
+			char ip6buf[INET6_ADDRSTRLEN];
 			log(LOG_WARNING, "%s: packet from %s dropped "
 			    "due to ingress filter\n", if_name(GIF2IFP(sc)),
-			    ip6_sprintf(&sin6.sin6_addr));
+			    ip6_sprintf(ip6buf, &sin6.sin6_addr));
 #endif
 			if (rt)
 				rtfree(rt);
@@ -391,11 +385,7 @@
  * sanity check for arg should have been done in the caller.
  */
 int
-gif_encapcheck6(m, off, proto, arg)
-	const struct mbuf *m;
-	int off;
-	int proto;
-	void *arg;
+gif_encapcheck6(const struct mbuf *m, int off, int proto, void *arg)
 {
 	struct ip6_hdr ip6;
 	struct gif_softc *sc;
@@ -412,19 +402,17 @@
 }
 
 int
-in6_gif_attach(sc)
-	struct gif_softc *sc;
+in6_gif_attach(struct gif_softc *sc)
 {
 	sc->encap_cookie6 = encap_attach_func(AF_INET6, -1, gif_encapcheck,
-	    (struct protosw *)&in6_gif_protosw, sc);
+	    (void *)&in6_gif_protosw, sc);
 	if (sc->encap_cookie6 == NULL)
 		return EEXIST;
 	return 0;
 }
 
 int
-in6_gif_detach(sc)
-	struct gif_softc *sc;
+in6_gif_detach(struct gif_softc *sc)
 {
 	int error;
 
Index: in6_ifattach.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/in6_ifattach.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet6/in6_ifattach.c -L sys/netinet6/in6_ifattach.c -u -r1.1.1.2 -r1.2
--- sys/netinet6/in6_ifattach.c
+++ sys/netinet6/in6_ifattach.c
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet6/in6_ifattach.c,v 1.26.2.5 2005/12/25 14:03:37 suz Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/in6_ifattach.c,v 1.39 2007/07/05 16:23:47 delphij Exp $	*/
 /*	$KAME: in6_ifattach.c,v 1.118 2001/05/24 07:44:00 itojun Exp $	*/
 
 /*-
@@ -58,8 +58,6 @@
 #include <netinet6/nd6.h>
 #include <netinet6/scope6_var.h>
 
-#include <net/net_osdep.h>
-
 unsigned long in6_maxmtu = 0;
 
 #ifdef IP6_AUTO_LINKLOCAL
@@ -78,6 +76,7 @@
 static int get_ifid __P((struct ifnet *, struct ifnet *, struct in6_addr *));
 static int in6_ifattach_linklocal __P((struct ifnet *, struct ifnet *));
 static int in6_ifattach_loopback __P((struct ifnet *));
+static void in6_purgemaddrs __P((struct ifnet *));
 
 #define EUI64_GBIT	0x01
 #define EUI64_UBIT	0x02
@@ -96,11 +95,11 @@
  * The goal here is to get an interface identifier that is
  * (1) random enough and (2) does not change across reboot.
  * We currently use MD5(hostname) for it.
+ *
+ * in6 - upper 64bits are preserved
  */
 static int
-get_rand_ifid(ifp, in6)
-	struct ifnet *ifp;
-	struct in6_addr *in6;	/* upper 64bits are preserved */
+get_rand_ifid(struct ifnet *ifp, struct in6_addr *in6)
 {
 	MD5_CTX ctxt;
 	u_int8_t digest[16];
@@ -132,15 +131,13 @@
 }
 
 static int
-generate_tmp_ifid(seed0, seed1, ret)
-	u_int8_t *seed0, *ret;
-	const u_int8_t *seed1;
+generate_tmp_ifid(u_int8_t *seed0, const u_int8_t *seed1, u_int8_t *ret)
 {
 	MD5_CTX ctxt;
 	u_int8_t seed[16], digest[16], nullbuf[8];
 	u_int32_t val32;
 
-	/* If there's no hisotry, start with a random seed. */
+	/* If there's no history, start with a random seed. */
 	bzero(nullbuf, sizeof(nullbuf));
 	if (bcmp(nullbuf, seed0, sizeof(nullbuf)) == 0) {
 		int i;
@@ -215,11 +212,11 @@
 /*
  * Get interface identifier for the specified interface.
  * XXX assumes single sockaddr_dl (AF_LINK address) per an interface
+ *
+ * in6 - upper 64bits are preserved
  */
 int
-in6_get_hw_ifid(ifp, in6)
-	struct ifnet *ifp;
-	struct in6_addr *in6;	/* upper 64bits are preserved */
+in6_get_hw_ifid(struct ifnet *ifp, struct in6_addr *in6)
 {
 	struct ifaddr *ifa;
 	struct sockaddr_dl *sdl;
@@ -348,12 +345,12 @@
  * Get interface identifier for the specified interface.  If it is not
  * available on ifp0, borrow interface identifier from other information
  * sources.
+ *
+ * altifp - secondary EUI64 source
  */
 static int
-get_ifid(ifp0, altifp, in6)
-	struct ifnet *ifp0;
-	struct ifnet *altifp;	/* secondary EUI64 source */
-	struct in6_addr *in6;
+get_ifid(struct ifnet *ifp0, struct ifnet *altifp,
+    struct in6_addr *in6)
 {
 	struct ifnet *ifp;
 
@@ -412,10 +409,11 @@
 	return 0;
 }
 
+/*
+ * altifp - secondary EUI64 source
+ */
 static int
-in6_ifattach_linklocal(ifp, altifp)
-	struct ifnet *ifp;
-	struct ifnet *altifp;	/* secondary EUI64 source */
+in6_ifattach_linklocal(struct ifnet *ifp, struct ifnet *altifp)
 {
 	struct in6_ifaddr *ia;
 	struct in6_aliasreq ifra;
@@ -527,9 +525,11 @@
 	return 0;
 }
 
+/*
+ * ifp - must be IFT_LOOP
+ */
 static int
-in6_ifattach_loopback(ifp)
-	struct ifnet *ifp;	/* must be IFT_LOOP */
+in6_ifattach_loopback(struct ifnet *ifp)
 {
 	struct in6_aliasreq ifra;
 	int error;
@@ -589,11 +589,8 @@
  * when ifp == NULL, the caller is responsible for filling scopeid.
  */
 int
-in6_nigroup(ifp, name, namelen, in6)
-	struct ifnet *ifp;
-	const char *name;
-	int namelen;
-	struct in6_addr *in6;
+in6_nigroup(struct ifnet *ifp, const char *name, int namelen,
+    struct in6_addr *in6)
 {
 	const char *p;
 	u_char *q;
@@ -639,11 +636,11 @@
  * XXX multiple loopback interface needs more care.  for instance,
  * nodelocal address needs to be configured onto only one of them.
  * XXX multiple link-local address case
+ *
+ * altifp - secondary EUI64 source
  */
 void
-in6_ifattach(ifp, altifp)
-	struct ifnet *ifp;
-	struct ifnet *altifp;	/* secondary EUI64 source */
+in6_ifattach(struct ifnet *ifp, struct ifnet *altifp)
 {
 	struct in6_ifaddr *ia;
 	struct in6_addr in6;
@@ -725,16 +722,14 @@
  * from the ifnet list in bsdi.
  */
 void
-in6_ifdetach(ifp)
-	struct ifnet *ifp;
+in6_ifdetach(struct ifnet *ifp)
 {
 	struct in6_ifaddr *ia, *oia;
 	struct ifaddr *ifa, *next;
 	struct rtentry *rt;
 	short rtflags;
 	struct sockaddr_in6 sin6;
-	struct in6_multi *in6m;
-	struct in6_multi *in6m_next;
+	struct in6_multi_mship *imm;
 
 	/* remove neighbor management table */
 	nd6_purge(ifp);
@@ -758,6 +753,14 @@
 
 		ia = (struct in6_ifaddr *)ifa;
 
+		/*
+		 * leave from multicast groups we have joined for the interface
+		 */
+		while ((imm = ia->ia6_memberships.lh_first) != NULL) {
+			LIST_REMOVE(imm, i6mm_chain);
+			in6_leavegroup(imm);
+		}
+
 		/* remove from the routing table */
 		if ((ia->ia_flags & IFA_ROUTE) &&
 		    (rt = rtalloc1((struct sockaddr *)&ia->ia_addr, 0, 0UL))) {
@@ -792,20 +795,10 @@
 		IFAFREE(&oia->ia_ifa);
 	}
 
+	in6_pcbpurgeif0(&udbinfo, ifp);
+	in6_pcbpurgeif0(&ripcbinfo, ifp);
 	/* leave from all multicast groups joined */
-
-	if (udbinfo.listhead != NULL)
-		in6_pcbpurgeif0(LIST_FIRST(udbinfo.listhead), ifp);
-	if (ripcbinfo.listhead != NULL)
-		in6_pcbpurgeif0(LIST_FIRST(ripcbinfo.listhead), ifp);
-
-	for (in6m = LIST_FIRST(&in6_multihead); in6m; in6m = in6m_next) {
-		in6m_next = LIST_NEXT(in6m, in6m_entry);
-		if (in6m->in6m_ifp != ifp)
-			continue;
-		in6_delmulti(in6m);
-		in6m = NULL;
-	}
+	in6_purgemaddrs(ifp);
 
 	/*
 	 * remove neighbor management table.  we call it twice just to make
@@ -839,11 +832,8 @@
 }
 
 int
-in6_get_tmpifid(ifp, retbuf, baseid, generate)
-	struct ifnet *ifp;
-	u_int8_t *retbuf;
-	const u_int8_t *baseid;
-	int generate;
+in6_get_tmpifid(struct ifnet *ifp, u_int8_t *retbuf,
+    const u_int8_t *baseid, int generate)
 {
 	u_int8_t nullbuf[8];
 	struct nd_ifinfo *ndi = ND_IFINFO(ifp);
@@ -867,8 +857,7 @@
 }
 
 void
-in6_tmpaddrtimer(ignored_arg)
-	void *ignored_arg;
+in6_tmpaddrtimer(void *ignored_arg)
 {
 	struct nd_ifinfo *ndi;
 	u_int8_t nullbuf[8];
@@ -894,3 +883,21 @@
 
 	splx(s);
 }
+
+static void
+in6_purgemaddrs(struct ifnet *ifp)
+{
+	struct in6_multi *in6m;
+	struct in6_multi *oin6m;
+
+#ifdef DIAGNOSTIC
+	printf("%s: purging ifp %p\n", __func__, ifp);
+#endif
+
+	IFF_LOCKGIANT(ifp);
+	LIST_FOREACH_SAFE(in6m, &in6_multihead, in6m_entry, oin6m) {
+		if (in6m->in6m_ifp == ifp)
+			in6_delmulti(in6m);
+	}
+	IFF_UNLOCKGIANT(ifp);
+}
Index: in6_pcb.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/in6_pcb.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet6/in6_pcb.c -L sys/netinet6/in6_pcb.c -u -r1.1.1.2 -r1.2
--- sys/netinet6/in6_pcb.c
+++ sys/netinet6/in6_pcb.c
@@ -1,6 +1,6 @@
-/*	$FreeBSD: src/sys/netinet6/in6_pcb.c,v 1.62.2.2 2005/12/25 12:44:12 suz Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/in6_pcb.c,v 1.84.2.1 2007/12/21 14:25:43 rwatson Exp $	*/
 /*	$KAME: in6_pcb.c,v 1.31 2001/05/21 05:45:10 jinmei Exp $	*/
-  
+
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
@@ -65,6 +65,7 @@
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
+#include "opt_mac.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -77,6 +78,7 @@
 #include <sys/sockio.h>
 #include <sys/errno.h>
 #include <sys/time.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/jail.h>
 
@@ -99,30 +101,18 @@
 #include <netinet6/scope6_var.h>
 
 #ifdef IPSEC
-#include <netinet6/ipsec.h>
-#ifdef INET6
-#include <netinet6/ipsec6.h>
-#endif
-#include <netinet6/ah.h>
-#ifdef INET6
-#include <netinet6/ah6.h>
-#endif
-#include <netkey/key.h>
-#endif /* IPSEC */
-
-#ifdef FAST_IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #include <netipsec/key.h>
-#endif /* FAST_IPSEC */
+#endif /* IPSEC */
+
+#include <security/mac/mac_framework.h>
 
 struct	in6_addr zeroin6_addr;
 
 int
-in6_pcbbind(inp, nam, cred)
-	register struct inpcb *inp;
-	struct sockaddr *nam;
-	struct ucred *cred;
+in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam,
+    struct ucred *cred)
 {
 	struct socket *so = inp->inp_socket;
 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)NULL;
@@ -138,7 +128,7 @@
 	if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
 		return (EINVAL);
 	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
-		wild = 1;
+		wild = INPLOOKUP_WILDCARD;
 	if (nam) {
 		int error;
 
@@ -188,11 +178,14 @@
 			struct inpcb *t;
 
 			/* GROSS */
-			if (ntohs(lport) < IPV6PORT_RESERVED &&
-			    suser_cred(cred, SUSER_ALLOWJAIL))
+			if (ntohs(lport) <= ipport_reservedhigh &&
+			    ntohs(lport) >= ipport_reservedlow &&
+			    priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT,
+			    0))
 				return (EACCES);
-			if (so->so_cred->cr_uid != 0 &&
-			    !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
+			if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) &&
+			    priv_check_cred(so->so_cred,
+			    PRIV_NETINET_REUSEPORT, 0) != 0) {
 				t = in6_pcblookup_local(pcbinfo,
 				    &sin6->sin6_addr, lport,
 				    INPLOOKUP_WILDCARD);
@@ -201,8 +194,8 @@
 				    (so->so_type != SOCK_STREAM ||
 				     IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) &&
 				    (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
-			    	     !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) ||
-				     (t->inp_socket->so_options & SO_REUSEPORT) 
+				     !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) ||
+				     (t->inp_socket->so_options & SO_REUSEPORT)
 				      == 0) && (so->so_cred->cr_uid !=
 				     t->inp_socket->so_cred->cr_uid))
 					return (EADDRINUSE);
@@ -228,7 +221,7 @@
 			t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr,
 						lport, wild);
 			if (t && (reuseport & ((t->inp_vflag & INP_TIMEWAIT) ?
-			    intotw(t)->tw_so_options : 
+			    intotw(t)->tw_so_options :
 			    t->inp_socket->so_options)) == 0)
 				return (EADDRINUSE);
 			if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
@@ -239,17 +232,17 @@
 				t = in_pcblookup_local(pcbinfo, sin.sin_addr,
 						       lport, wild);
 				if (t && t->inp_vflag & INP_TIMEWAIT) {
-					if ((reuseport & 
+					if ((reuseport &
 					    intotw(t)->tw_so_options) == 0 &&
 					    (ntohl(t->inp_laddr.s_addr) !=
-					     INADDR_ANY || ((inp->inp_vflag & 
-					     INP_IPV6PROTO) == 
+					     INADDR_ANY || ((inp->inp_vflag &
+					     INP_IPV6PROTO) ==
 					     (t->inp_vflag & INP_IPV6PROTO))))
 						return (EADDRINUSE);
 				}
-				else if (t && 
-				    (reuseport & t->inp_socket->so_options) 
-				    == 0 && (ntohl(t->inp_laddr.s_addr) != 
+				else if (t &&
+				    (reuseport & t->inp_socket->so_options)
+				    == 0 && (ntohl(t->inp_laddr.s_addr) !=
 				    INADDR_ANY || INP_SOCKAF(so) ==
 				     INP_SOCKAF(t->inp_socket)))
 					return (EADDRINUSE);
@@ -284,18 +277,18 @@
  *   a bit of a kludge, but cleaning up the internal interfaces would
  *   have forced minor changes in every protocol).
  */
-
 int
-in6_pcbladdr(inp, nam, plocal_addr6)
-	register struct inpcb *inp;
-	struct sockaddr *nam;
-	struct in6_addr **plocal_addr6;
+in6_pcbladdr(register struct inpcb *inp, struct sockaddr *nam,
+    struct in6_addr **plocal_addr6)
 {
 	register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
 	int error = 0;
 	struct ifnet *ifp = NULL;
 	int scope_ambiguous = 0;
 
+	INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
+	INP_LOCK_ASSERT(inp);
+
 	if (nam->sa_len != sizeof (*sin6))
 		return (EINVAL);
 	if (sin6->sin6_family != AF_INET6)
@@ -308,9 +301,6 @@
 	if ((error = sa6_embedscope(sin6, ip6_use_defzone)) != 0)
 		return(error);
 
-	INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
-	INP_LOCK_ASSERT(inp);
-
 	if (in6_ifaddr) {
 		/*
 		 * If the destination address is UNSPECIFIED addr,
@@ -355,10 +345,8 @@
  * then pick one.
  */
 int
-in6_pcbconnect(inp, nam, cred)
-	register struct inpcb *inp;
-	struct sockaddr *nam;
-	struct ucred *cred;
+in6_pcbconnect(register struct inpcb *inp, struct sockaddr *nam,
+    struct ucred *cred)
 {
 	struct in6_addr *addr6;
 	register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
@@ -398,16 +386,12 @@
 		    (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
 
 	in_pcbrehash(inp);
-#ifdef IPSEC
-	if (inp->inp_socket->so_type == SOCK_STREAM)
-		ipsec_pcbconn(inp->inp_sp);
-#endif
+
 	return (0);
 }
 
 void
-in6_pcbdisconnect(inp)
-	struct inpcb *inp;
+in6_pcbdisconnect(struct inpcb *inp)
 {
 
 	INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
@@ -418,52 +402,49 @@
 	/* clear flowinfo - draft-itojun-ipv6-flowlabel-api-00 */
 	inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK;
 	in_pcbrehash(inp);
-#ifdef IPSEC
-	ipsec_pcbdisconn(inp->inp_sp);
-#endif
-	if (inp->inp_socket->so_state & SS_NOFDREF)
-		in6_pcbdetach(inp);
 }
 
 void
-in6_pcbdetach(inp)
-	struct inpcb *inp;
+in6_pcbdetach(struct inpcb *inp)
+{
+
+	KASSERT(inp->inp_socket != NULL, ("in6_pcbdetach: inp_socket == NULL"));
+	inp->inp_socket->so_pcb = NULL;
+	inp->inp_socket = NULL;
+}
+
+void
+in6_pcbfree(struct inpcb *inp)
 {
-	struct socket *so = inp->inp_socket;
 	struct inpcbinfo *ipi = inp->inp_pcbinfo;
 
+	KASSERT(inp->inp_socket == NULL, ("in6_pcbfree: inp_socket != NULL"));
 	INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
 	INP_LOCK_ASSERT(inp);
 
-#if defined(IPSEC) || defined(FAST_IPSEC)
+#ifdef IPSEC
 	if (inp->in6p_sp != NULL)
 		ipsec6_delete_pcbpolicy(inp);
 #endif /* IPSEC */
 	inp->inp_gencnt = ++ipi->ipi_gencnt;
 	in_pcbremlists(inp);
-
-	if (so) {
-		ACCEPT_LOCK();
-		SOCK_LOCK(so);
-		so->so_pcb = NULL;
-		sotryfree(so);
-	}
-
- 	ip6_freepcbopts(inp->in6p_outputopts);
- 	ip6_freemoptions(inp->in6p_moptions);
+	ip6_freepcbopts(inp->in6p_outputopts);
+	ip6_freemoptions(inp->in6p_moptions);
 	/* Check and free IPv4 related resources in case of mapped addr */
 	if (inp->inp_options)
 		(void)m_free(inp->inp_options);
-	ip_freemoptions(inp->inp_moptions);
+	if (inp->inp_moptions != NULL)
+		inp_freemoptions(inp->inp_moptions);
 	inp->inp_vflag = 0;
-	INP_LOCK_DESTROY(inp);
+#ifdef MAC
+	mac_destroy_inpcb(inp);
+#endif
+	INP_UNLOCK(inp);
 	uma_zfree(ipi->ipi_zone, inp);
 }
 
 struct sockaddr *
-in6_sockaddr(port, addr_p)
-	in_port_t port;
-	struct in6_addr *addr_p;
+in6_sockaddr(in_port_t port, struct in6_addr *addr_p)
 {
 	struct sockaddr_in6 *sin6;
 
@@ -479,9 +460,7 @@
 }
 
 struct sockaddr *
-in6_v4mapsin6_sockaddr(port, addr_p)
-	in_port_t port;
-	struct in_addr *addr_p;
+in6_v4mapsin6_sockaddr(in_port_t port, struct in_addr *addr_p)
 {
 	struct sockaddr_in sin;
 	struct sockaddr_in6 *sin6_p;
@@ -499,59 +478,39 @@
 	return (struct sockaddr *)sin6_p;
 }
 
-/*
- * The calling convention of in6_setsockaddr() and in6_setpeeraddr() was
- * modified to match the pru_sockaddr() and pru_peeraddr() entry points
- * in struct pr_usrreqs, so that protocols can just reference then directly
- * without the need for a wrapper function.  The socket must have a valid
- * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
- * except through a kernel programming error, so it is acceptable to panic
- * (or in this case trap) if the PCB is invalid.  (Actually, we don't trap
- * because there actually /is/ a programming error somewhere... XXX)
- */
 int
-in6_setsockaddr(so, nam)
-	struct socket *so;
-	struct sockaddr **nam;
+in6_getsockaddr(struct socket *so, struct sockaddr **nam)
 {
-	int s;
 	register struct inpcb *inp;
 	struct in6_addr addr;
 	in_port_t port;
 
-	s = splnet();
 	inp = sotoinpcb(so);
-	if (!inp) {
-		splx(s);
-		return EINVAL;
-	}
+	KASSERT(inp != NULL, ("in6_getsockaddr: inp == NULL"));
+
+	INP_LOCK(inp);
 	port = inp->inp_lport;
 	addr = inp->in6p_laddr;
-	splx(s);
+	INP_UNLOCK(inp);
 
 	*nam = in6_sockaddr(port, &addr);
 	return 0;
 }
 
 int
-in6_setpeeraddr(so, nam)
-	struct socket *so;
-	struct sockaddr **nam;
+in6_getpeeraddr(struct socket *so, struct sockaddr **nam)
 {
-	int s;
 	struct inpcb *inp;
 	struct in6_addr addr;
 	in_port_t port;
 
-	s = splnet();
 	inp = sotoinpcb(so);
-	if (!inp) {
-		splx(s);
-		return EINVAL;
-	}
+	KASSERT(inp != NULL, ("in6_getpeeraddr: inp == NULL"));
+
+	INP_LOCK(inp);
 	port = inp->inp_fport;
 	addr = inp->in6p_faddr;
-	splx(s);
+	INP_UNLOCK(inp);
 
 	*nam = in6_sockaddr(port, &addr);
 	return 0;
@@ -560,18 +519,19 @@
 int
 in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam)
 {
-	struct	inpcb *inp = sotoinpcb(so);
+	struct	inpcb *inp;
 	int	error;
 
-	if (inp == NULL)
-		return EINVAL;
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("in6_mapped_sockaddr: inp == NULL"));
+
 	if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) {
-		error = in_setsockaddr(so, nam, &tcbinfo);
+		error = in_getsockaddr(so, nam);
 		if (error == 0)
 			in6_sin_2_v4mapsin6_in_sock(nam);
 	} else {
-		/* scope issues will be handled in in6_setsockaddr(). */
-		error = in6_setsockaddr(so, nam);
+		/* scope issues will be handled in in6_getsockaddr(). */
+		error = in6_getsockaddr(so, nam);
 	}
 
 	return error;
@@ -580,18 +540,19 @@
 int
 in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam)
 {
-	struct	inpcb *inp = sotoinpcb(so);
+	struct	inpcb *inp;
 	int	error;
 
-	if (inp == NULL)
-		return EINVAL;
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("in6_mapped_peeraddr: inp == NULL"));
+
 	if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) {
-		error = in_setpeeraddr(so, nam, &tcbinfo);
+		error = in_getpeeraddr(so, nam);
 		if (error == 0)
 			in6_sin_2_v4mapsin6_in_sock(nam);
 	} else
-	/* scope issues will be handled in in6_setpeeraddr(). */
-	error = in6_setpeeraddr(so, nam);
+	/* scope issues will be handled in in6_getpeeraddr(). */
+	error = in6_getpeeraddr(so, nam);
 
 	return error;
 }
@@ -604,25 +565,19 @@
  * cmds that are uninteresting (e.g., no error in the map).
  * Call the protocol specific routine (if any) to report
  * any errors for each matching socket.
- *
- * Must be called at splnet.
  */
 void
-in6_pcbnotify(pcbinfo, dst, fport_arg, src, lport_arg, cmd, cmdarg, notify)
-	struct inpcbinfo *pcbinfo;
-	struct sockaddr *dst;
-	const struct sockaddr *src;
-	u_int fport_arg, lport_arg;
-	int cmd;
-	void *cmdarg;
-	struct inpcb *(*notify) __P((struct inpcb *, int));
+in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst,
+    u_int fport_arg, const struct sockaddr *src, u_int lport_arg,
+    int cmd, void *cmdarg,
+    struct inpcb *(*notify) __P((struct inpcb *, int)))
 {
 	struct inpcbhead *head;
 	struct inpcb *inp, *ninp;
 	struct sockaddr_in6 sa6_src, *sa6_dst;
 	u_short	fport = fport_arg, lport = lport_arg;
 	u_int32_t flowinfo;
-	int errno, s;
+	int errno;
 
 	if ((unsigned)cmd >= PRC_NCMDS || dst->sa_family != AF_INET6)
 		return;
@@ -654,14 +609,13 @@
 			notify = in6_rtchange;
 	}
 	errno = inet6ctlerrmap[cmd];
-	s = splnet();
-	head = pcbinfo->listhead;
+	head = pcbinfo->ipi_listhead;
 	INP_INFO_WLOCK(pcbinfo);
- 	for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) {
+	for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) {
 		INP_LOCK(inp);
- 		ninp = LIST_NEXT(inp, inp_list);
+		ninp = LIST_NEXT(inp, inp_list);
 
- 		if ((inp->inp_vflag & INP_IPV6) == 0) {
+		if ((inp->inp_vflag & INP_IPV6) == 0) {
 			INP_UNLOCK(inp);
 			continue;
 		}
@@ -715,31 +669,29 @@
 			INP_UNLOCK(inp);
 	}
 	INP_INFO_WUNLOCK(pcbinfo);
-	splx(s);
 }
 
 /*
  * Lookup a PCB based on the local address and port.
  */
 struct inpcb *
-in6_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay)
-	struct inpcbinfo *pcbinfo;
-	struct in6_addr *laddr;
-	u_int lport_arg;
-	int wild_okay;
+in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr,
+    u_int lport_arg, int wild_okay)
 {
 	register struct inpcb *inp;
 	int matchwild = 3, wildcard;
 	u_short lport = lport_arg;
 
+	INP_INFO_WLOCK_ASSERT(pcbinfo);
+
 	if (!wild_okay) {
 		struct inpcbhead *head;
 		/*
 		 * Look for an unconnected (wildcard foreign addr) PCB that
 		 * matches the local address and port we're looking for.
 		 */
-		head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
-						      pcbinfo->hashmask)];
+		head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
+		    0, pcbinfo->ipi_hashmask)];
 		LIST_FOREACH(inp, head, inp_hash) {
 			if ((inp->inp_vflag & INP_IPV6) == 0)
 				continue;
@@ -766,8 +718,8 @@
 		 * First see if this local port is in use by looking on the
 		 * port hash list.
 		 */
-		porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
-		    pcbinfo->porthashmask)];
+		porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
+		    pcbinfo->ipi_porthashmask)];
 		LIST_FOREACH(phd, porthash, phd_hash) {
 			if (phd->phd_port == lport)
 				break;
@@ -808,15 +760,15 @@
 }
 
 void
-in6_pcbpurgeif0(head, ifp)
-	struct in6pcb *head;
-	struct ifnet *ifp;
+in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
 {
 	struct in6pcb *in6p;
 	struct ip6_moptions *im6o;
 	struct in6_multi_mship *imm, *nimm;
 
-	for (in6p = head; in6p != NULL; in6p = LIST_NEXT(in6p, inp_list)) {
+	INP_INFO_RLOCK(pcbinfo);
+	LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) {
+		INP_LOCK(in6p);
 		im6o = in6p->in6p_moptions;
 		if ((in6p->inp_vflag & INP_IPV6) &&
 		    im6o) {
@@ -843,7 +795,9 @@
 				}
 			}
 		}
+		INP_UNLOCK(in6p);
 	}
+	INP_INFO_RUNLOCK(pcbinfo);
 }
 
 /*
@@ -853,9 +807,9 @@
  * (by a redirect), time to try a default gateway again.
  */
 void
-in6_losing(in6p)
-	struct inpcb *in6p;
+in6_losing(struct inpcb *in6p)
 {
+
 	/*
 	 * We don't store route pointers in the routing table anymore
 	 */
@@ -867,9 +821,7 @@
  * and allocate a (hopefully) better one.
  */
 struct inpcb *
-in6_rtchange(inp, errno)
-	struct inpcb *inp;
-	int errno;
+in6_rtchange(struct inpcb *inp, int errno)
 {
 	/*
 	 * We don't store route pointers in the routing table anymore
@@ -881,18 +833,17 @@
  * Lookup PCB in hash list.
  */
 struct inpcb *
-in6_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, ifp)
-	struct inpcbinfo *pcbinfo;
-	struct in6_addr *faddr, *laddr;
-	u_int fport_arg, lport_arg;
-	int wildcard;
-	struct ifnet *ifp;
+in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
+    u_int fport_arg, struct in6_addr *laddr, u_int lport_arg,
+    int wildcard, struct ifnet *ifp)
 {
 	struct inpcbhead *head;
 	register struct inpcb *inp;
 	u_short fport = fport_arg, lport = lport_arg;
 	int faith;
 
+	INP_INFO_RLOCK_ASSERT(pcbinfo);
+
 	if (faithprefix_p != NULL)
 		faith = (*faithprefix_p)(laddr);
 	else
@@ -901,9 +852,9 @@
 	/*
 	 * First look for an exact match.
 	 */
-	head = &pcbinfo->hashbase[INP_PCBHASH(faddr->s6_addr32[3] /* XXX */,
-					      lport, fport,
-					      pcbinfo->hashmask)];
+	head = &pcbinfo->ipi_hashbase[
+	    INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, lport, fport,
+	    pcbinfo->ipi_hashmask)];
 	LIST_FOREACH(inp, head, inp_hash) {
 		if ((inp->inp_vflag & INP_IPV6) == 0)
 			continue;
@@ -920,8 +871,8 @@
 	if (wildcard) {
 		struct inpcb *local_wild = NULL;
 
-		head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
-						      pcbinfo->hashmask)];
+		head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
+		    0, pcbinfo->ipi_hashmask)];
 		LIST_FOREACH(inp, head, inp_hash) {
 			if ((inp->inp_vflag & INP_IPV6) == 0)
 				continue;
Index: udp6_var.h
===================================================================
RCS file: /home/cvs/src/sys/netinet6/udp6_var.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet6/udp6_var.h -L sys/netinet6/udp6_var.h -u -r1.1.1.1 -r1.2
--- sys/netinet6/udp6_var.h
+++ sys/netinet6/udp6_var.h
@@ -26,12 +26,13 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/netinet6/udp6_var.h,v 1.7 2005/01/07 02:30:35 imp Exp $
+ * $FreeBSD: src/sys/netinet6/udp6_var.h,v 1.9 2007/07/23 07:58:58 rwatson Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1989, 1993
- *	The Regents of the University of California.  All rights reserved.
+ *	The Regents of the University of California.
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -66,13 +67,10 @@
 #ifdef _KERNEL
 SYSCTL_DECL(_net_inet6_udp6);
 
-extern struct	pr_usrreqs udp6_usrreqs;
+extern struct pr_usrreqs	udp6_usrreqs;
 
-void	udp6_ctlinput __P((int, struct sockaddr *, void *));
-int	udp6_input __P((struct mbuf **, int *, int));
-int	udp6_output __P((struct inpcb *inp, struct mbuf *m,
-			struct sockaddr *addr, struct mbuf *control,
-			struct thread *td));
+void	udp6_ctlinput(int, struct sockaddr *, void *);
+int	udp6_input(struct mbuf **, int *, int);
 #endif
 
 #endif /*_NETINET6_UDP6_VAR_H_*/
Index: scope6_var.h
===================================================================
RCS file: /home/cvs/src/sys/netinet6/scope6_var.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet6/scope6_var.h -L sys/netinet6/scope6_var.h -u -r1.1.1.1 -r1.2
--- sys/netinet6/scope6_var.h
+++ sys/netinet6/scope6_var.h
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet6/scope6_var.h,v 1.4.2.1 2005/11/04 20:26:15 ume Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/scope6_var.h,v 1.5 2005/07/25 12:31:42 ume Exp $	*/
 /*	$KAME: scope6_var.h,v 1.4 2000/05/18 15:03:27 jinmei Exp $	*/
 
 /*-
Index: raw_ip6.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/raw_ip6.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet6/raw_ip6.c -L sys/netinet6/raw_ip6.c -u -r1.1.1.2 -r1.2
--- sys/netinet6/raw_ip6.c
+++ sys/netinet6/raw_ip6.c
@@ -26,7 +26,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/netinet6/raw_ip6.c,v 1.50.2.7 2005/12/26 00:59:12 suz Exp $
+ * $FreeBSD: src/sys/netinet6/raw_ip6.c,v 1.73 2007/07/05 16:29:40 delphij Exp $
  */
 
 /*-
@@ -68,13 +68,13 @@
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
-#include <sys/systm.h>
 #include <sys/syslog.h>
 
 #include <net/if.h>
@@ -96,14 +96,9 @@
 #include <netinet6/scope6_var.h>
 
 #ifdef IPSEC
-#include <netinet6/ipsec.h>
-#include <netinet6/ipsec6.h>
-#endif /*IPSEC*/
-
-#ifdef FAST_IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
-#endif /* FAST_IPSEC */
+#endif /* IPSEC */
 
 #include <machine/stdarg.h>
 
@@ -122,14 +117,22 @@
 struct rip6stat rip6stat;
 
 /*
+ * Hooks for multicast forwarding.
+ */
+struct socket *ip6_mrouter = NULL;
+int (*ip6_mrouter_set)(struct socket *, struct sockopt *);
+int (*ip6_mrouter_get)(struct socket *, struct sockopt *);
+int (*ip6_mrouter_done)(void);
+int (*ip6_mforward)(struct ip6_hdr *, struct ifnet *, struct mbuf *);
+int (*mrt6_ioctl)(int, caddr_t);
+
+/*
  * Setup generic address and protocol structures
  * for raw_input routine, then pass them along with
  * mbuf chain.
  */
 int
-rip6_input(mp, offp, proto)
-	struct	mbuf **mp;
-	int	*offp, proto;
+rip6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp;
 	register struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
@@ -176,18 +179,16 @@
 		if (last) {
 			struct mbuf *n = m_copy(m, 0, (int)M_COPYALL);
 
-#if defined(IPSEC) || defined(FAST_IPSEC)
+#ifdef IPSEC
 			/*
 			 * Check AH/ESP integrity.
 			 */
 			if (n && ipsec6_in_reject(n, last)) {
 				m_freem(n);
-#ifdef IPSEC
 				ipsec6stat.in_polvio++;
-#endif /*IPSEC*/
 				/* do not inject data into pcb */
 			} else
-#endif /*IPSEC || FAST_IPSEC*/
+#endif /* IPSEC */
 			if (n) {
 				if (last->in6p_flags & IN6P_CONTROLOPTS ||
 				    last->in6p_socket->so_options & SO_TIMESTAMP)
@@ -209,20 +210,18 @@
 		}
 		last = in6p;
 	}
-#if defined(IPSEC) || defined(FAST_IPSEC)
+#ifdef IPSEC
 	/*
 	 * Check AH/ESP integrity.
 	 */
 	if (last && ipsec6_in_reject(m, last)) {
 		m_freem(m);
-#ifdef IPSEC
 		ipsec6stat.in_polvio++;
-#endif /*IPSEC*/
 		ip6stat.ip6s_delivered--;
 		/* do not inject data into pcb */
 		INP_UNLOCK(last);
 	} else
-#endif /*IPSEC || FAST_IPSEC*/
+#endif /* IPSEC */
 	if (last) {
 		if (last->in6p_flags & IN6P_CONTROLOPTS ||
 		    last->in6p_socket->so_options & SO_TIMESTAMP)
@@ -257,10 +256,7 @@
 }
 
 void
-rip6_ctlinput(cmd, sa, d)
-	int cmd;
-	struct sockaddr *sa;
-	void *d;
+rip6_ctlinput(int cmd, struct sockaddr *sa, void *d)
 {
 	struct ip6_hdr *ip6;
 	struct mbuf *m;
@@ -342,7 +338,7 @@
 	INP_LOCK(in6p);
 
 	priv = 0;
-	if (so->so_cred->cr_uid == 0)
+	if (suser_cred(so->so_cred, 0) == 0)
 		priv = 1;
 	dst = &dstsock->sin6_addr;
 	if (control) {
@@ -480,9 +476,7 @@
  * Raw IPv6 socket option processing.
  */
 int
-rip6_ctloutput(so, sopt)
-	struct socket *so;
-	struct sockopt *sopt;
+rip6_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	int error;
 
@@ -507,7 +501,8 @@
 		case MRT6_ADD_MFC:
 		case MRT6_DEL_MFC:
 		case MRT6_PIM:
-			error = ip6_mrouter_get(so, sopt);
+			error = ip6_mrouter_get ?  ip6_mrouter_get(so, sopt) :
+			    EOPNOTSUPP;
 			break;
 		case IPV6_CHECKSUM:
 			error = ip6_raw_ctloutput(so, sopt);
@@ -527,7 +522,8 @@
 		case MRT6_ADD_MFC:
 		case MRT6_DEL_MFC:
 		case MRT6_PIM:
-			error = ip6_mrouter_set(so, sopt);
+			error = ip6_mrouter_set ?  ip6_mrouter_set(so, sopt) :
+			    EOPNOTSUPP;
 			break;
 		case IPV6_CHECKSUM:
 			error = ip6_raw_ctloutput(so, sopt);
@@ -547,39 +543,27 @@
 {
 	struct inpcb *inp;
 	struct icmp6_filter *filter;
-	int error, s;
+	int error;
 
-	INP_INFO_WLOCK(&ripcbinfo);
 	inp = sotoinpcb(so);
-	if (inp) {
-		INP_INFO_WUNLOCK(&ripcbinfo);
-		panic("rip6_attach");
-	}
-	if (td && (error = suser(td)) != 0) {
-		INP_INFO_WUNLOCK(&ripcbinfo);
+	KASSERT(inp == NULL, ("rip6_attach: inp != NULL"));
+	if (td && (error = suser(td)) != 0)
 		return error;
-	}
 	error = soreserve(so, rip_sendspace, rip_recvspace);
-	if (error) {
-		INP_INFO_WUNLOCK(&ripcbinfo);
+	if (error)
 		return error;
-	}
 	MALLOC(filter, struct icmp6_filter *,
 	       sizeof(struct icmp6_filter), M_PCB, M_NOWAIT);
-	if (filter == NULL) {
-		INP_INFO_WUNLOCK(&ripcbinfo);
+	if (filter == NULL)
 		return ENOMEM;
-	}
-	s = splnet();
-	error = in_pcballoc(so, &ripcbinfo, "raw6inp");
-	splx(s);
+	INP_INFO_WLOCK(&ripcbinfo);
+	error = in_pcballoc(so, &ripcbinfo);
 	if (error) {
 		INP_INFO_WUNLOCK(&ripcbinfo);
 		FREE(filter, M_PCB);
 		return error;
 	}
 	inp = (struct inpcb *)so->so_pcb;
-	INP_LOCK(inp);
 	INP_INFO_WUNLOCK(&ripcbinfo);
 	inp->inp_vflag |= INP_IPV6;
 	inp->in6p_ip6_nxt = (long)proto;
@@ -591,35 +575,49 @@
 	return 0;
 }
 
-static int
+static void
 rip6_detach(struct socket *so)
 {
 	struct inpcb *inp;
 
-	INP_INFO_WLOCK(&ripcbinfo);
 	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_WUNLOCK(&ripcbinfo);
-		panic("rip6_detach");
-	}
-	/* xxx: RSVP */
-	if (so == ip6_mrouter)
+	KASSERT(inp != NULL, ("rip6_detach: inp == NULL"));
+
+	if (so == ip6_mrouter && ip6_mrouter_done)
 		ip6_mrouter_done();
+	/* xxx: RSVP */
+	INP_INFO_WLOCK(&ripcbinfo);
+	INP_LOCK(inp);
 	if (inp->in6p_icmp6filt) {
 		FREE(inp->in6p_icmp6filt, M_PCB);
 		inp->in6p_icmp6filt = NULL;
 	}
-	INP_LOCK(inp);
 	in6_pcbdetach(inp);
+	in6_pcbfree(inp);
 	INP_INFO_WUNLOCK(&ripcbinfo);
-	return 0;
 }
 
-static int
+/* XXXRW: This can't ever be called. */
+static void
 rip6_abort(struct socket *so)
 {
+	struct inpcb *inp;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("rip6_abort: inp == NULL"));
+
+	soisdisconnected(so);
+}
+
+static void
+rip6_close(struct socket *so)
+{
+	struct inpcb *inp;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("rip6_close: inp == NULL"));
+
 	soisdisconnected(so);
-	return rip6_detach(so);
 }
 
 static int
@@ -630,7 +628,8 @@
 	if ((so->so_state & SS_ISCONNECTED) == 0)
 		return ENOTCONN;
 	inp->in6p_faddr = in6addr_any;
-	return rip6_abort(so);
+	rip6_abort(so);
+	return (0);
 }
 
 static int
@@ -641,6 +640,7 @@
 	struct ifaddr *ia = NULL;
 	int error = 0;
 
+	KASSERT(inp != NULL, ("rip6_bind: inp == NULL"));
 	if (nam->sa_len != sizeof(*addr))
 		return EINVAL;
 	if (TAILQ_EMPTY(&ifnet) || addr->sin6_family != AF_INET6)
@@ -674,6 +674,7 @@
 	struct ifnet *ifp = NULL;
 	int error = 0, scope_ambiguous = 0;
 
+	KASSERT(inp != NULL, ("rip6_connect: inp == NULL"));
 	if (nam->sa_len != sizeof(*addr))
 		return EINVAL;
 	if (TAILQ_EMPTY(&ifnet))
@@ -726,10 +727,9 @@
 {
 	struct inpcb *inp;
 
-	INP_INFO_RLOCK(&ripcbinfo);
 	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("rip6_shutdown: inp == NULL"));
 	INP_LOCK(inp);
-	INP_INFO_RUNLOCK(&ripcbinfo);
 	socantsendmore(so);
 	INP_UNLOCK(inp);
 	return 0;
@@ -737,13 +737,14 @@
 
 static int
 rip6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
-	 struct mbuf *control, struct thread *td)
+    struct mbuf *control, struct thread *td)
 {
 	struct inpcb *inp = sotoinpcb(so);
 	struct sockaddr_in6 tmp;
 	struct sockaddr_in6 *dst;
 	int ret;
 
+	KASSERT(inp != NULL, ("rip6_send: inp == NULL"));
 	INP_INFO_WLOCK(&ripcbinfo);
 	/* always copy sockaddr to avoid overwrites */
 	/* Unlocked read. */
@@ -802,8 +803,9 @@
 	.pru_control =		in6_control,
 	.pru_detach =		rip6_detach,
 	.pru_disconnect =	rip6_disconnect,
-	.pru_peeraddr =		in6_setpeeraddr,
+	.pru_peeraddr =		in6_getpeeraddr,
 	.pru_send =		rip6_send,
 	.pru_shutdown =		rip6_shutdown,
-	.pru_sockaddr =		in6_setsockaddr,
+	.pru_sockaddr =		in6_getsockaddr,
+	.pru_close =		rip6_close,
 };
Index: in6_proto.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/in6_proto.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet6/in6_proto.c -L sys/netinet6/in6_proto.c -u -r1.2 -r1.3
--- sys/netinet6/in6_proto.c
+++ sys/netinet6/in6_proto.c
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet6/in6_proto.c,v 1.32.2.4 2005/11/16 10:31:23 ru Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/in6_proto.c,v 1.46 2007/07/05 16:29:39 delphij Exp $	*/
 /*	$KAME: in6_proto.c,v 1.91 2001/05/27 13:28:35 itojun Exp $	*/
 
 /*-
@@ -66,6 +66,7 @@
 #include "opt_ipsec.h"
 #include "opt_ipstealth.h"
 #include "opt_carp.h"
+#include "opt_sctp.h"
 
 #include <sys/param.h>
 #include <sys/socket.h>
@@ -102,44 +103,25 @@
 #include <netinet6/pim6_var.h>
 #include <netinet6/nd6.h>
 
-#ifdef IPSEC
-#include <netinet6/ipsec.h>
-#ifdef INET6
-#include <netinet6/ipsec6.h>
-#endif
-#include <netinet6/ah.h>
-#ifdef INET6
-#include <netinet6/ah6.h>
-#endif
-#ifdef IPSEC_ESP
-#include <netinet6/esp.h>
-#ifdef INET6
-#include <netinet6/esp6.h>
-#endif
-#endif
-#include <netinet6/ipcomp.h>
-#ifdef INET6
-#include <netinet6/ipcomp6.h>
-#endif
-#endif /* IPSEC */
-
 #ifdef DEV_CARP
 #include <netinet/ip_carp.h>
 #endif
 
-#ifdef FAST_IPSEC
+#ifdef SCTP
+#include <netinet/in_pcb.h>
+#include <netinet/sctp_pcb.h>
+#include <netinet/sctp.h>
+#include <netinet/sctp_var.h>
+#include <netinet6/sctp6_var.h>
+#endif /* SCTP */
+
+#ifdef IPSEC
+#include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
-#define	IPSEC
-#define	IPSEC_ESP
-#define	ah6_input	ipsec6_common_input
-#define	esp6_input	ipsec6_common_input
-#define	ipcomp6_input	ipsec6_common_input
-#endif /* FAST_IPSEC */
+#endif /* IPSEC */
 
 #include <netinet6/ip6protosw.h>
 
-#include <net/net_osdep.h>
-
 /*
  * TCP/IP protocol family: IP6, ICMP6, UDP, TCP.
  */
@@ -186,6 +168,42 @@
 	.pr_drain =		tcp_drain,
 	.pr_usrreqs =		&tcp6_usrreqs,
 },
+#ifdef SCTP
+{
+	.pr_type =	SOCK_DGRAM,
+	.pr_domain =	&inet6domain,
+        .pr_protocol =	IPPROTO_SCTP,
+        .pr_flags =	PR_WANTRCVD,
+        .pr_input =	sctp6_input,
+        .pr_ctlinput =  sctp6_ctlinput,
+        .pr_ctloutput = sctp_ctloutput,
+        .pr_drain =	sctp_drain,
+        .pr_usrreqs =	&sctp6_usrreqs
+},
+{
+	.pr_type =	SOCK_SEQPACKET,
+	.pr_domain =	&inet6domain,
+        .pr_protocol =	IPPROTO_SCTP,
+        .pr_flags =	PR_WANTRCVD,
+        .pr_input =	sctp6_input,
+        .pr_ctlinput =  sctp6_ctlinput,
+        .pr_ctloutput = sctp_ctloutput,
+        .pr_drain =	sctp_drain,
+        .pr_usrreqs =	&sctp6_usrreqs
+},
+
+{
+	.pr_type =	SOCK_STREAM,
+	.pr_domain =	&inet6domain,
+        .pr_protocol =	IPPROTO_SCTP,
+        .pr_flags =	PR_WANTRCVD,
+        .pr_input =	sctp6_input,
+        .pr_ctlinput =  sctp6_ctlinput,
+        .pr_ctloutput = sctp_ctloutput,
+        .pr_drain =	sctp_drain,
+        .pr_usrreqs =	&sctp6_usrreqs
+},
+#endif /* SCTP */
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
@@ -240,26 +258,24 @@
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_AH,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		ah6_input,
+	.pr_input =		ipsec6_common_input,
 	.pr_usrreqs =		&nousrreqs,
 },
-#ifdef IPSEC_ESP
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_ESP,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		esp6_input,
+        .pr_input =		ipsec6_common_input,
 	.pr_ctlinput =		esp6_ctlinput,
 	.pr_usrreqs =		&nousrreqs,
 },
-#endif
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_IPCOMP,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		ipcomp6_input,
+        .pr_input =		ipsec6_common_input,
 	.pr_usrreqs =		&nousrreqs,
 },
 #endif /* IPSEC */
@@ -292,7 +308,7 @@
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_PIM,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
-	.pr_input =		pim6_input,
+	.pr_input =		encap6_input,
 	.pr_output =		rip6_output,
 	.pr_ctloutput =		rip6_ctloutput,
 	.pr_usrreqs =		&rip6_usrreqs
@@ -361,7 +377,7 @@
 int	ip6_maxfragpackets;	/* initialized in frag6.c:frag6_init() */
 int	ip6_maxfrags;	/* initialized in frag6.c:frag6_init() */
 int	ip6_log_interval = 5;
-int	ip6_hdrnestlimit = 50;	/* appropriate? */
+int	ip6_hdrnestlimit = 15;	/* How many header options will we process? */
 int	ip6_dad_count = 1;	/* DupAddrDetectionTransmits */
 int	ip6_auto_flowlabel = 1;
 int	ip6_gif_hlim = 0;
@@ -376,8 +392,6 @@
 #ifdef IPSTEALTH
 int	ip6stealth = 0;
 #endif
-int     ip6_rthdr0_allowed = 0; /* Disallow use of routing header 0 */
-				/* by default. */
 
 /* icmp6 */
 /*
@@ -402,7 +416,8 @@
 int	icmp6_rediraccept = 1;		/* accept and process redirects */
 int	icmp6_redirtimeout = 10 * 60;	/* 10 minutes */
 int	icmp6errppslim = 100;		/* 100pps */
-int	icmp6_nodeinfo = 3;		/* enable/disable NI response */
+/* control how to respond to NI queries */
+int	icmp6_nodeinfo = (ICMP6_NODEINFO_FQDNOK|ICMP6_NODEINFO_NODEADDROK);
 
 /* UDP on IP6 parameters */
 int	udp6_sendspace = 9216;		/* really max datagram size */
@@ -420,6 +435,9 @@
 SYSCTL_NODE(_net_inet6,	IPPROTO_ICMPV6,	icmp6,	CTLFLAG_RW, 0,	"ICMP6");
 SYSCTL_NODE(_net_inet6,	IPPROTO_UDP,	udp6,	CTLFLAG_RW, 0,	"UDP6");
 SYSCTL_NODE(_net_inet6,	IPPROTO_TCP,	tcp6,	CTLFLAG_RW, 0,	"TCP6");
+#ifdef SCTP
+SYSCTL_NODE(_net_inet6,	IPPROTO_SCTP,	sctp6,	CTLFLAG_RW, 0,	"SCTP6");
+#endif
 #ifdef IPSEC
 SYSCTL_NODE(_net_inet6,	IPPROTO_ESP,	ipsec6,	CTLFLAG_RW, 0,	"IPSEC6");
 #endif /* IPSEC */
@@ -463,7 +481,7 @@
 }
 
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_FORWARDING,
-	forwarding, CTLFLAG_RW, 	&ip6_forwarding,	0, "");
+	forwarding, CTLFLAG_RW,		&ip6_forwarding,	0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_SENDREDIRECTS,
 	redirect, CTLFLAG_RW,		&ip6_sendredirects,	0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFHLIM,
@@ -504,6 +522,7 @@
 	   sysctl_ip6_tempvltime, "I", "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_V6ONLY,
 	v6only,	CTLFLAG_RW,	&ip6_v6only,			0, "");
+TUNABLE_INT("net.inet6.ip6.auto_linklocal", &ip6_auto_linklocal);
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_AUTO_LINKLOCAL,
 	auto_linklocal, CTLFLAG_RW, &ip6_auto_linklocal,	0, "");
 SYSCTL_STRUCT(_net_inet6_ip6, IPV6CTL_RIP6STATS, rip6stats, CTLFLAG_RD,
@@ -515,14 +534,11 @@
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS,
 	maxfrags, CTLFLAG_RW,		&ip6_maxfrags,	0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MCAST_PMTU,
-	mcast_pmtu, CTLFLAG_RW, 	&ip6_mcast_pmtu,	0, "");
+	mcast_pmtu, CTLFLAG_RW,		&ip6_mcast_pmtu,	0, "");
 #ifdef IPSTEALTH
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_STEALTH, stealth, CTLFLAG_RW,
 	&ip6stealth, 0, "");
 #endif
-SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTHDR0_ALLOWED, 
-	   rthdr0_allowed, CTLFLAG_RW, &ip6_rthdr0_allowed, 0, "");
-
 
 /* net.inet6.icmp6 */
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRACCEPT,
Index: dest6.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/dest6.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet6/dest6.c -L sys/netinet6/dest6.c -u -r1.1.1.1 -r1.2
--- sys/netinet6/dest6.c
+++ sys/netinet6/dest6.c
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet6/dest6.c,v 1.10 2005/01/07 02:30:34 imp Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/dest6.c,v 1.11 2007/07/05 16:23:46 delphij Exp $	*/
 /*	$KAME: dest6.c,v 1.59 2003/07/11 13:21:16 t-momose Exp $	*/
 
 /*-
@@ -57,9 +57,7 @@
  * Destination options header processing.
  */
 int
-dest6_input(mp, offp, proto)
-	struct mbuf **mp;
-	int *offp, proto;
+dest6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp;
 	int off = *offp, dstoptlen, optlen;
Index: ip6_var.h
===================================================================
RCS file: /home/cvs/src/sys/netinet6/ip6_var.h,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet6/ip6_var.h -L sys/netinet6/ip6_var.h -u -r1.1.1.2 -r1.2
--- sys/netinet6/ip6_var.h
+++ sys/netinet6/ip6_var.h
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet6/ip6_var.h,v 1.30.2.6 2005/12/25 14:03:38 suz Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/ip6_var.h,v 1.39 2007/07/05 16:29:40 delphij Exp $	*/
 /*	$KAME: ip6_var.h,v 1.62 2001/05/03 14:51:48 itojun Exp $	*/
 
 /*-
@@ -69,16 +69,13 @@
  * being reassembled is attached to one of these structures.
  */
 struct	ip6q {
-	u_int32_t	ip6q_head;
-	u_int16_t	ip6q_len;
-	u_int8_t	ip6q_nxt;	/* ip6f_nxt in first fragment */
-	u_int8_t	ip6q_hlim;
 	struct ip6asfrag *ip6q_down;
 	struct ip6asfrag *ip6q_up;
 	u_int32_t	ip6q_ident;
-	u_int8_t	ip6q_arrive;
+	u_int8_t	ip6q_nxt;
+	u_int8_t	ip6q_ecn;
 	u_int8_t	ip6q_ttl;
-	struct in6_addr	ip6q_src, ip6q_dst;
+	struct in6_addr ip6q_src, ip6q_dst;
 	struct ip6q	*ip6q_next;
 	struct ip6q	*ip6q_prev;
 	int		ip6q_unfrglen;	/* len of unfragmentable part */
@@ -89,11 +86,6 @@
 };
 
 struct	ip6asfrag {
-	u_int32_t	ip6af_head;
-	u_int16_t	ip6af_len;
-	u_int8_t	ip6af_nxt;
-	u_int8_t	ip6af_hlim;
-	/* must not override the above members during reassembling */
 	struct ip6asfrag *ip6af_down;
 	struct ip6asfrag *ip6af_up;
 	struct mbuf	*ip6af_m;
@@ -298,7 +290,7 @@
 extern int	ip6_mcast_pmtu;		/* enable pMTU discovery for multicast? */
 extern int	ip6_v6only;
 
-extern struct socket *ip6_mrouter; 	/* multicast routing daemon */
+extern struct socket *ip6_mrouter;	/* multicast routing daemon */
 extern int	ip6_sendredirects;	/* send IP redirects when forwarding? */
 extern int	ip6_maxfragpackets; /* Maximum packets in reassembly queue */
 extern int	ip6_maxfrags;	/* Maximum fragments in reassembly queue */
@@ -353,7 +345,9 @@
 struct ip6aux *ip6_findaux __P((struct mbuf *));
 void	ip6_delaux __P((struct mbuf *));
 
-int	ip6_mforward __P((struct ip6_hdr *, struct ifnet *, struct mbuf *));
+extern int	(*ip6_mforward)(struct ip6_hdr *, struct ifnet *,
+    struct mbuf *);
+
 int	ip6_process_hopopts __P((struct mbuf *, u_int8_t *, int, u_int32_t *,
 				 u_int32_t *));
 void	ip6_savecontrol __P((struct inpcb *, struct mbuf *, struct mbuf **));
Index: in6_rmx.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/in6_rmx.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet6/in6_rmx.c -L sys/netinet6/in6_rmx.c -u -r1.1.1.1 -r1.2
--- sys/netinet6/in6_rmx.c
+++ sys/netinet6/in6_rmx.c
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet6/in6_rmx.c,v 1.14 2005/01/07 02:30:34 imp Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/in6_rmx.c,v 1.18 2007/07/05 16:29:39 delphij Exp $	*/
 /*	$KAME: in6_rmx.c,v 1.11 2001/07/26 06:53:16 jinmei Exp $	*/
 
 /*-
@@ -110,7 +110,7 @@
  */
 static struct radix_node *
 in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
-	    struct radix_node *treenodes)
+    struct radix_node *treenodes)
 {
 	struct rtentry *rt = (struct rtentry *)treenodes;
 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)rt_key(rt);
@@ -259,7 +259,7 @@
 	 */
 	if (rtq_reallyold != 0) {
 		rt->rt_flags |= RTPRF_OURS;
-		rt->rt_rmx.rmx_expire = time_second + rtq_reallyold;
+		rt->rt_rmx.rmx_expire = time_uptime + rtq_reallyold;
 	} else {
 		rtexpunge(rt);
 	}
@@ -290,7 +290,7 @@
 	if (rt->rt_flags & RTPRF_OURS) {
 		ap->found++;
 
-		if (ap->draining || rt->rt_rmx.rmx_expire <= time_second) {
+		if (ap->draining || rt->rt_rmx.rmx_expire <= time_uptime) {
 			if (rt->rt_refcnt > 0)
 				panic("rtqkill route really not free");
 
@@ -305,9 +305,9 @@
 			}
 		} else {
 			if (ap->updating
-			   && (rt->rt_rmx.rmx_expire - time_second
+			   && (rt->rt_rmx.rmx_expire - time_uptime
 			       > rtq_reallyold)) {
-				rt->rt_rmx.rmx_expire = time_second
+				rt->rt_rmx.rmx_expire = time_uptime
 					+ rtq_reallyold;
 			}
 			ap->nextstop = lmin(ap->nextstop,
@@ -332,7 +332,7 @@
 
 	arg.found = arg.killed = 0;
 	arg.rnh = rnh;
-	arg.nextstop = time_second + rtq_timeout;
+	arg.nextstop = time_uptime + rtq_timeout;
 	arg.draining = arg.updating = 0;
 	RADIX_NODE_HEAD_LOCK(rnh);
 	rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
@@ -347,14 +347,14 @@
 	 * hard.
 	 */
 	if ((arg.found - arg.killed > rtq_toomany)
-	   && (time_second - last_adjusted_timeout >= rtq_timeout)
+	   && (time_uptime - last_adjusted_timeout >= rtq_timeout)
 	   && rtq_reallyold > rtq_minreallyold) {
 		rtq_reallyold = 2*rtq_reallyold / 3;
 		if (rtq_reallyold < rtq_minreallyold) {
 			rtq_reallyold = rtq_minreallyold;
 		}
 
-		last_adjusted_timeout = time_second;
+		last_adjusted_timeout = time_uptime;
 #ifdef DIAGNOSTIC
 		log(LOG_DEBUG, "in6_rtqtimo: adjusted rtq_reallyold to %d",
 		    rtq_reallyold);
@@ -367,7 +367,7 @@
 	}
 
 	atv.tv_usec = 0;
-	atv.tv_sec = arg.nextstop - time_second;
+	atv.tv_sec = arg.nextstop - time_uptime;
 	callout_reset(&rtq_timer, tvtohz(&atv), in6_rtqtimo, rock);
 }
 
@@ -391,7 +391,7 @@
 		panic("rt == NULL in in6_mtuexpire");
 
 	if (rt->rt_rmx.rmx_expire && !(rt->rt_flags & RTF_PROBEMTU)) {
-		if (rt->rt_rmx.rmx_expire <= time_second) {
+		if (rt->rt_rmx.rmx_expire <= time_uptime) {
 			rt->rt_flags |= RTF_PROBEMTU;
 		} else {
 			ap->nextstop = lmin(ap->nextstop,
@@ -412,23 +412,24 @@
 	struct timeval atv;
 
 	arg.rnh = rnh;
-	arg.nextstop = time_second + MTUTIMO_DEFAULT;
+	arg.nextstop = time_uptime + MTUTIMO_DEFAULT;
 	RADIX_NODE_HEAD_LOCK(rnh);
 	rnh->rnh_walktree(rnh, in6_mtuexpire, &arg);
 	RADIX_NODE_HEAD_UNLOCK(rnh);
 
 	atv.tv_usec = 0;
-	atv.tv_sec = arg.nextstop - time_second;
+	atv.tv_sec = arg.nextstop - time_uptime;
 	if (atv.tv_sec < 0) {
 		printf("invalid mtu expiration time on routing table\n");
-		arg.nextstop = 30;	/* last resort */
+		arg.nextstop = time_uptime + 30;	/* last resort */
+		atv.tv_sec = 30;
 	}
 	callout_reset(&rtq_mtutimer, tvtohz(&atv), in6_mtutimo, rock);
 }
 
 #if 0
 void
-in6_rtqdrain()
+in6_rtqdrain(void)
 {
 	struct radix_node_head *rnh = rt_tables[AF_INET6];
 	struct rtqk_arg arg;
Index: route6.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/route6.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet6/route6.c -L sys/netinet6/route6.c -u -r1.2 -r1.3
--- sys/netinet6/route6.c
+++ sys/netinet6/route6.c
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet6/route6.c,v 1.11.2.1 2005/11/04 20:26:15 ume Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/route6.c,v 1.14 2007/07/05 16:23:48 delphij Exp $	*/
 /*	$KAME: route6.c,v 1.24 2001/03/14 03:07:05 itojun Exp $	*/
 
 /*-
@@ -49,15 +49,18 @@
 
 #include <netinet/icmp6.h>
 
-extern int ip6_rthdr0_allowed;
-
+#if 0
 static int ip6_rthdr0 __P((struct mbuf *, struct ip6_hdr *,
     struct ip6_rthdr0 *));
 
+#endif /* Disable route header processing. */
+
+/*
+ * proto - is unused
+ */
+
 int
-route6_input(mp, offp, proto)
-	struct mbuf **mp;
-	int *offp, proto;	/* proto is unused */
+route6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct ip6_hdr *ip6;
 	struct mbuf *m = *mp;
@@ -89,9 +92,8 @@
 #endif
 
 	switch (rh->ip6r_type) {
+#if 0
 	case IPV6_RTHDR_TYPE_0:
-		if (!ip6_rthdr0_allowed)
-			return (IPPROTO_DONE);
 		rhlen = (rh->ip6r_len + 1) << 3;
 #ifndef PULLDOWN_TEST
 		/*
@@ -118,6 +120,7 @@
 		if (ip6_rthdr0(m, ip6, (struct ip6_rthdr0 *)rh))
 			return (IPPROTO_DONE);
 		break;
+#endif /* Disable route header 0 */
 	default:
 		/* unknown routing type */
 		if (rh->ip6r_segleft == 0) {
@@ -140,11 +143,9 @@
  * RFC2292 backward compatibility warning: no support for strict/loose bitmap,
  * as it was dropped between RFC1883 and RFC2460.
  */
+#if 0
 static int
-ip6_rthdr0(m, ip6, rh0)
-	struct mbuf *m;
-	struct ip6_hdr *ip6;
-	struct ip6_rthdr0 *rh0;
+ip6_rthdr0(struct mbuf *m, struct ip6_hdr *ip6, struct ip6_rthdr0 *rh0)
 {
 	int addrs, index;
 	struct in6_addr *nextaddr, tmpaddr;
@@ -237,3 +238,4 @@
 	m_freem(m);
 	return (-1);
 }
+#endif
Index: nd6_nbr.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/nd6_nbr.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet6/nd6_nbr.c -L sys/netinet6/nd6_nbr.c -u -r1.1.1.2 -r1.2
--- sys/netinet6/nd6_nbr.c
+++ sys/netinet6/nd6_nbr.c
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet6/nd6_nbr.c,v 1.29.2.8 2005/12/25 14:03:38 suz Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/nd6_nbr.c,v 1.47 2007/07/05 16:29:40 delphij Exp $	*/
 /*	$KAME: nd6_nbr.c,v 1.86 2002/01/21 02:33:04 jinmei Exp $	*/
 
 /*-
@@ -68,8 +68,6 @@
 #include <netinet/ip_carp.h>
 #endif
 
-#include <net/net_osdep.h>
-
 #define SDL(s) ((struct sockaddr_dl *)s)
 
 struct dadq;
@@ -91,9 +89,7 @@
  * Based on RFC 2462 (duplicate address detection)
  */
 void
-nd6_ns_input(m, off, icmp6len)
-	struct mbuf *m;
-	int off, icmp6len;
+nd6_ns_input(struct mbuf *m, int off, int icmp6len)
 {
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
@@ -109,6 +105,7 @@
 	int tlladdr;
 	union nd_opts ndopts;
 	struct sockaddr_dl *proxydl = NULL;
+	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, icmp6len,);
@@ -128,8 +125,8 @@
 	if (ip6->ip6_hlim != 255) {
 		nd6log((LOG_ERR,
 		    "nd6_ns_input: invalid hlim (%d) from %s to %s on %s\n",
-		    ip6->ip6_hlim, ip6_sprintf(&ip6->ip6_src),
-		    ip6_sprintf(&ip6->ip6_dst), if_name(ifp)));
+		    ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
+		    ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
 		goto bad;
 	}
 
@@ -210,7 +207,7 @@
 		struct sockaddr_in6 tsin6;
 		int need_proxy;
 
-		bzero(&tsin6, sizeof tsin6);		
+		bzero(&tsin6, sizeof tsin6);
 		tsin6.sin6_len = sizeof(struct sockaddr_in6);
 		tsin6.sin6_family = AF_INET6;
 		tsin6.sin6_addr = taddr6;
@@ -249,14 +246,14 @@
 	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
 		nd6log((LOG_INFO, "nd6_ns_input: lladdrlen mismatch for %s "
 		    "(if %d, NS packet %d)\n",
-		    ip6_sprintf(&taddr6),
+		    ip6_sprintf(ip6bufs, &taddr6),
 		    ifp->if_addrlen, lladdrlen - 2));
 		goto bad;
 	}
 
 	if (IN6_ARE_ADDR_EQUAL(&myaddr6, &saddr6)) {
 		nd6log((LOG_INFO, "nd6_ns_input: duplicate IP6 address %s\n",
-		    ip6_sprintf(&saddr6)));
+		    ip6_sprintf(ip6bufs, &saddr6)));
 		goto freeit;
 	}
 
@@ -319,9 +316,12 @@
 	return;
 
  bad:
-	nd6log((LOG_ERR, "nd6_ns_input: src=%s\n", ip6_sprintf(&saddr6)));
-	nd6log((LOG_ERR, "nd6_ns_input: dst=%s\n", ip6_sprintf(&daddr6)));
-	nd6log((LOG_ERR, "nd6_ns_input: tgt=%s\n", ip6_sprintf(&taddr6)));
+	nd6log((LOG_ERR, "nd6_ns_input: src=%s\n",
+		ip6_sprintf(ip6bufs, &saddr6)));
+	nd6log((LOG_ERR, "nd6_ns_input: dst=%s\n",
+		ip6_sprintf(ip6bufs, &daddr6)));
+	nd6log((LOG_ERR, "nd6_ns_input: tgt=%s\n",
+		ip6_sprintf(ip6bufs, &taddr6)));
 	icmp6stat.icp6s_badns++;
 	m_freem(m);
 }
@@ -334,13 +334,13 @@
  *
  * Based on RFC 2461
  * Based on RFC 2462 (duplicate address detection)
+ *
+ *   ln - for source address determination
+ *  dad - duplicate address detection
  */
 void
-nd6_ns_output(ifp, daddr6, taddr6, ln, dad)
-	struct ifnet *ifp;
-	const struct in6_addr *daddr6, *taddr6;
-	struct llinfo_nd6 *ln;	/* for source address determination */
-	int dad;	/* duplicate address detection */
+nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
+    const struct in6_addr *taddr6, struct llinfo_nd6 *ln, int dad)
 {
 	struct mbuf *m;
 	struct ip6_hdr *ip6;
@@ -456,10 +456,12 @@
 			src = in6_selectsrc(&dst_sa, NULL,
 			    NULL, &ro, NULL, NULL, &error);
 			if (src == NULL) {
+				char ip6buf[INET6_ADDRSTRLEN];
 				nd6log((LOG_DEBUG,
 				    "nd6_ns_output: source can't be "
 				    "determined: dst=%s, error=%d\n",
-				    ip6_sprintf(&dst_sa.sin6_addr), error));
+				    ip6_sprintf(ip6buf, &dst_sa.sin6_addr),
+				    error));
 				goto bad;
 			}
 		}
@@ -543,9 +545,7 @@
  * - anycast advertisement delay rule (RFC2461 7.2.7, SHOULD)
  */
 void
-nd6_na_input(m, off, icmp6len)
-	struct mbuf *m;
-	int off, icmp6len;
+nd6_na_input(struct mbuf *m, int off, int icmp6len)
 {
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
@@ -563,12 +563,13 @@
 	struct rtentry *rt;
 	struct sockaddr_dl *sdl;
 	union nd_opts ndopts;
+	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 
 	if (ip6->ip6_hlim != 255) {
 		nd6log((LOG_ERR,
 		    "nd6_na_input: invalid hlim (%d) from %s to %s on %s\n",
-		    ip6->ip6_hlim, ip6_sprintf(&ip6->ip6_src),
-		    ip6_sprintf(&ip6->ip6_dst), if_name(ifp)));
+		    ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
+		    ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
 		goto bad;
 	}
 
@@ -595,7 +596,7 @@
 	if (IN6_IS_ADDR_MULTICAST(&taddr6)) {
 		nd6log((LOG_ERR,
 		    "nd6_na_input: invalid target address %s\n",
-		    ip6_sprintf(&taddr6)));
+		    ip6_sprintf(ip6bufs, &taddr6)));
 		goto bad;
 	}
 	if (IN6_IS_ADDR_MULTICAST(&daddr6))
@@ -640,13 +641,13 @@
 	if (ifa) {
 		log(LOG_ERR,
 		    "nd6_na_input: duplicate IP6 address %s\n",
-		    ip6_sprintf(&taddr6));
+		    ip6_sprintf(ip6bufs, &taddr6));
 		goto freeit;
 	}
 
 	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
 		nd6log((LOG_INFO, "nd6_na_input: lladdrlen mismatch for %s "
-		    "(if %d, NA packet %d)\n", ip6_sprintf(&taddr6),
+		    "(if %d, NA packet %d)\n", ip6_sprintf(ip6bufs, &taddr6),
 		    ifp->if_addrlen, lladdrlen - 2));
 		goto bad;
 	}
@@ -813,21 +814,20 @@
 	if (ln->ln_hold) {
 		struct mbuf *m_hold, *m_hold_next;
 
-		for (m_hold = ln->ln_hold; m_hold; m_hold = m_hold_next) {
-			struct mbuf *mpkt = NULL;
-
+		/*
+		 * reset the ln_hold in advance, to explicitly
+		 * prevent a ln_hold lookup in nd6_output()
+		 * (wouldn't happen, though...)
+		 */
+		for (m_hold = ln->ln_hold;
+		    m_hold; m_hold = m_hold_next) {
 			m_hold_next = m_hold->m_nextpkt;
-			mpkt = m_copym(m_hold, 0, M_COPYALL, M_DONTWAIT);
-			if (mpkt == NULL) {
-				m_freem(m_hold);
-				break;
-			}
-			mpkt->m_nextpkt = NULL;
+			m_hold->m_nextpkt = NULL;
 			/*
 			 * we assume ifp is not a loopback here, so just set
 			 * the 2nd argument as the 1st one.
 			 */
-			nd6_output(ifp, ifp, mpkt,
+			nd6_output(ifp, ifp, m_hold,
 			    (struct sockaddr_in6 *)rt_key(rt), rt);
 		}
 		ln->ln_hold = NULL;
@@ -850,14 +850,14 @@
  * the following items are not implemented yet:
  * - proxy advertisement delay rule (RFC2461 7.2.8, last paragraph, SHOULD)
  * - anycast advertisement delay rule (RFC2461 7.2.7, SHOULD)
+ *
+ * tlladdr - 1 if include target link-layer address
+ * sdl0 - sockaddr_dl (= proxy NA) or NULL
  */
 void
-nd6_na_output(ifp, daddr6_0, taddr6, flags, tlladdr, sdl0)
-	struct ifnet *ifp;
-	const struct in6_addr *daddr6_0, *taddr6;
-	u_long flags;
-	int tlladdr;		/* 1 if include target link-layer address */
-	struct sockaddr *sdl0;	/* sockaddr_dl (= proxy NA) or NULL */
+nd6_na_output(struct ifnet *ifp, const struct in6_addr *daddr6_0,
+    const struct in6_addr *taddr6, u_long flags, int tlladdr,
+    struct sockaddr *sdl0)
 {
 	struct mbuf *m;
 	struct ip6_hdr *ip6;
@@ -938,9 +938,10 @@
 	bcopy(&dst_sa, &ro.ro_dst, sizeof(dst_sa));
 	src = in6_selectsrc(&dst_sa, NULL, NULL, &ro, NULL, NULL, &error);
 	if (src == NULL) {
+		char ip6buf[INET6_ADDRSTRLEN];
 		nd6log((LOG_DEBUG, "nd6_na_output: source can't be "
 		    "determined: dst=%s, error=%d\n",
-		    ip6_sprintf(&dst_sa.sin6_addr), error));
+		    ip6_sprintf(ip6buf, &dst_sa.sin6_addr), error));
 		goto bad;
 	}
 	ip6->ip6_src = *src;
@@ -1020,8 +1021,7 @@
 }
 
 caddr_t
-nd6_ifptomac(ifp)
-	struct ifnet *ifp;
+nd6_ifptomac(struct ifnet *ifp)
 {
 	switch (ifp->if_type) {
 	case IFT_ARCNET:
@@ -1061,8 +1061,7 @@
 static int dad_init = 0;
 
 static struct dadq *
-nd6_dad_find(ifa)
-	struct ifaddr *ifa;
+nd6_dad_find(struct ifaddr *ifa)
 {
 	struct dadq *dp;
 
@@ -1074,9 +1073,7 @@
 }
 
 static void
-nd6_dad_starttimer(dp, ticks)
-	struct dadq *dp;
-	int ticks;
+nd6_dad_starttimer(struct dadq *dp, int ticks)
 {
 
 	callout_reset(&dp->dad_timer_ch, ticks,
@@ -1084,8 +1081,7 @@
 }
 
 static void
-nd6_dad_stoptimer(dp)
-	struct dadq *dp;
+nd6_dad_stoptimer(struct dadq *dp)
 {
 
 	callout_stop(&dp->dad_timer_ch);
@@ -1095,12 +1091,11 @@
  * Start Duplicate Address Detection (DAD) for specified interface address.
  */
 void
-nd6_dad_start(ifa, delay)
-	struct ifaddr *ifa;
-	int delay;
+nd6_dad_start(struct ifaddr *ifa, int delay)
 {
 	struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
 	struct dadq *dp;
+	char ip6buf[INET6_ADDRSTRLEN];
 
 	if (!dad_init) {
 		TAILQ_INIT(&dadq);
@@ -1117,7 +1112,7 @@
 		log(LOG_DEBUG,
 			"nd6_dad_start: called with non-tentative address "
 			"%s(%s)\n",
-			ip6_sprintf(&ia->ia_addr.sin6_addr),
+			ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
 			ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
 		return;
 	}
@@ -1143,7 +1138,7 @@
 	if (dp == NULL) {
 		log(LOG_ERR, "nd6_dad_start: memory allocation failed for "
 			"%s(%s)\n",
-			ip6_sprintf(&ia->ia_addr.sin6_addr),
+			ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
 			ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
 		return;
 	}
@@ -1152,7 +1147,7 @@
 	TAILQ_INSERT_TAIL(&dadq, (struct dadq *)dp, dad_list);
 
 	nd6log((LOG_DEBUG, "%s: starting DAD for %s\n", if_name(ifa->ifa_ifp),
-	    ip6_sprintf(&ia->ia_addr.sin6_addr)));
+	    ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
 
 	/*
 	 * Send NS packet for DAD, ip6_dad_count times.
@@ -1178,8 +1173,7 @@
  * terminate DAD unconditionally.  used for address removals.
  */
 void
-nd6_dad_stop(ifa)
-	struct ifaddr *ifa;
+nd6_dad_stop(struct ifaddr *ifa)
 {
 	struct dadq *dp;
 
@@ -1200,12 +1194,12 @@
 }
 
 static void
-nd6_dad_timer(ifa)
-	struct ifaddr *ifa;
+nd6_dad_timer(struct ifaddr *ifa)
 {
 	int s;
 	struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
 	struct dadq *dp;
+	char ip6buf[INET6_ADDRSTRLEN];
 
 	s = splnet();		/* XXX */
 
@@ -1222,14 +1216,14 @@
 	if (ia->ia6_flags & IN6_IFF_DUPLICATED) {
 		log(LOG_ERR, "nd6_dad_timer: called with duplicated address "
 			"%s(%s)\n",
-			ip6_sprintf(&ia->ia_addr.sin6_addr),
+			ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
 			ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
 		goto done;
 	}
 	if ((ia->ia6_flags & IN6_IFF_TENTATIVE) == 0) {
 		log(LOG_ERR, "nd6_dad_timer: called with non-tentative address "
 			"%s(%s)\n",
-			ip6_sprintf(&ia->ia_addr.sin6_addr),
+			ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
 			ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
 		goto done;
 	}
@@ -1290,7 +1284,7 @@
 			nd6log((LOG_DEBUG,
 			    "%s: DAD complete for %s - no duplicates found\n",
 			    if_name(ifa->ifa_ifp),
-			    ip6_sprintf(&ia->ia_addr.sin6_addr)));
+			    ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
 
 			TAILQ_REMOVE(&dadq, (struct dadq *)dp, dad_list);
 			free(dp, M_IP6NDP);
@@ -1304,12 +1298,12 @@
 }
 
 void
-nd6_dad_duplicated(ifa)
-	struct ifaddr *ifa;
+nd6_dad_duplicated(struct ifaddr *ifa)
 {
 	struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
 	struct ifnet *ifp;
 	struct dadq *dp;
+	char ip6buf[INET6_ADDRSTRLEN];
 
 	dp = nd6_dad_find(ifa);
 	if (dp == NULL) {
@@ -1319,7 +1313,7 @@
 
 	log(LOG_ERR, "%s: DAD detected duplicate IPv6 address %s: "
 	    "NS in/out=%d/%d, NA in=%d\n",
-	    if_name(ifa->ifa_ifp), ip6_sprintf(&ia->ia_addr.sin6_addr),
+	    if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
 	    dp->dad_ns_icount, dp->dad_ns_ocount, dp->dad_na_icount);
 
 	ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
@@ -1330,7 +1324,7 @@
 
 	ifp = ifa->ifa_ifp;
 	log(LOG_ERR, "%s: DAD complete for %s - duplicate found\n",
-	    if_name(ifp), ip6_sprintf(&ia->ia_addr.sin6_addr));
+	    if_name(ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr));
 	log(LOG_ERR, "%s: manual intervention required\n",
 	    if_name(ifp));
 
@@ -1375,9 +1369,7 @@
 }
 
 static void
-nd6_dad_ns_output(dp, ifa)
-	struct dadq *dp;
-	struct ifaddr *ifa;
+nd6_dad_ns_output(struct dadq *dp, struct ifaddr *ifa)
 {
 	struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
 	struct ifnet *ifp = ifa->ifa_ifp;
@@ -1395,8 +1387,7 @@
 }
 
 static void
-nd6_dad_ns_input(ifa)
-	struct ifaddr *ifa;
+nd6_dad_ns_input(struct ifaddr *ifa)
 {
 	struct in6_ifaddr *ia;
 	struct ifnet *ifp;
@@ -1415,9 +1406,10 @@
 
 	/* Quickhack - completely ignore DAD NS packets */
 	if (dad_ignore_ns) {
+		char ip6buf[INET6_ADDRSTRLEN];
 		nd6log((LOG_INFO,
 		    "nd6_dad_ns_input: ignoring DAD NS packet for "
-		    "address %s(%s)\n", ip6_sprintf(taddr6),
+		    "address %s(%s)\n", ip6_sprintf(ip6buf, taddr6),
 		    if_name(ifa->ifa_ifp)));
 		return;
 	}
@@ -1445,8 +1437,7 @@
 }
 
 static void
-nd6_dad_na_input(ifa)
-	struct ifaddr *ifa;
+nd6_dad_na_input(struct ifaddr *ifa)
 {
 	struct dadq *dp;
 
Index: mld6.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/mld6.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet6/mld6.c -L sys/netinet6/mld6.c -u -r1.2 -r1.3
--- sys/netinet6/mld6.c
+++ sys/netinet6/mld6.c
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet6/mld6.c,v 1.19.2.6 2006/03/11 10:36:23 suz Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/mld6.c,v 1.31 2007/07/05 16:29:40 delphij Exp $	*/
 /*	$KAME: mld6.c,v 1.27 2001/04/04 05:17:30 itojun Exp $	*/
 
 /*-
@@ -89,8 +89,6 @@
 #include <netinet/icmp6.h>
 #include <netinet6/mld6_var.h>
 
-#include <net/net_osdep.h>
-
 /*
  * Protocol constants
  */
@@ -112,7 +110,7 @@
 static u_long mld_timerresid(struct in6_multi *);
 
 void
-mld6_init()
+mld6_init(void)
 {
 	static u_int8_t hbh_buf[8];
 	struct ip6_hbh *hbh = (struct ip6_hbh *)hbh_buf;
@@ -133,8 +131,7 @@
 }
 
 static void
-mld_starttimer(in6m)
-	struct in6_multi *in6m;
+mld_starttimer(struct in6_multi *in6m)
 {
 	struct timeval now;
 
@@ -153,8 +150,7 @@
 }
 
 static void
-mld_stoptimer(in6m)
-	struct in6_multi *in6m;
+mld_stoptimer(struct in6_multi *in6m)
 {
 	if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
 		return;
@@ -164,8 +160,7 @@
 }
 
 static void
-mld_timeo(in6m)
-	struct in6_multi *in6m;
+mld_timeo(struct in6_multi *in6m)
 {
 	int s = splnet();
 
@@ -186,8 +181,7 @@
 }
 
 static u_long
-mld_timerresid(in6m)
-	struct in6_multi *in6m;
+mld_timerresid(struct in6_multi *in6m)
 {
 	struct timeval now, diff;
 
@@ -211,8 +205,7 @@
 }
 
 void
-mld6_start_listening(in6m)
-	struct in6_multi *in6m;
+mld6_start_listening(struct in6_multi *in6m)
 {
 	struct in6_addr all_in6;
 	int s = splnet();
@@ -247,8 +240,7 @@
 }
 
 void
-mld6_stop_listening(in6m)
-	struct in6_multi *in6m;
+mld6_stop_listening(struct in6_multi *in6m)
 {
 	struct in6_addr allnode, allrouter;
 
@@ -271,9 +263,7 @@
 }
 
 void
-mld6_input(m, off)
-	struct mbuf *m;
-	int off;
+mld6_input(struct mbuf *m, int off)
 {
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct mld_hdr *mldh;
@@ -298,10 +288,11 @@
 	/* source address validation */
 	ip6 = mtod(m, struct ip6_hdr *); /* in case mpullup */
 	if (!IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src)) {
+		char ip6bufs[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN];
 		log(LOG_ERR,
 		    "mld6_input: src %s is not link-local (grp=%s)\n",
-		    ip6_sprintf(&ip6->ip6_src),
-		    ip6_sprintf(&mldh->mld_addr));
+		    ip6_sprintf(ip6bufs, &ip6->ip6_src),
+		    ip6_sprintf(ip6bufg, &mldh->mld_addr));
 		/*
 		 * spec (RFC2710) does not explicitly
 		 * specify to discard the packet from a non link-local
@@ -327,7 +318,7 @@
 	 *
 	 * In Non-Listener state, we simply don't have a membership record.
 	 * In Delaying Listener state, our timer is running (in6m->in6m_timer)
-	 * In Idle Listener state, our timer is not running 
+	 * In Idle Listener state, our timer is not running
 	 * (in6m->in6m_timer==IN6M_TIMER_UNDEF)
 	 *
 	 * The flag is in6m->in6m_state, it is set to MLD_OTHERLISTENER if
@@ -442,10 +433,7 @@
 }
 
 static void
-mld6_sendpkt(in6m, type, dst)
-	struct in6_multi *in6m;
-	int type;
-	const struct in6_addr *dst;
+mld6_sendpkt(struct in6_multi *in6m, int type, const struct in6_addr *dst)
 {
 	struct mbuf *mh, *md;
 	struct mld_hdr *mldh;
@@ -544,108 +532,117 @@
  * Add source addresses to the list also, if upstream router is MLDv2 capable
  * and the number of source is not 0.
  */
-struct	in6_multi *
-in6_addmulti(maddr6, ifp, errorp, delay)
-	struct in6_addr *maddr6;
-	struct ifnet *ifp;
-	int *errorp, delay;
+struct in6_multi *
+in6_addmulti(struct in6_addr *maddr6, struct ifnet *ifp,
+    int *errorp, int delay)
 {
 	struct in6_multi *in6m;
-	struct ifmultiaddr *ifma;
-	struct sockaddr_in6 sa6;
-	int	s = splnet();
 
 	*errorp = 0;
+	in6m = NULL;
 
-	/*
-	 * Call generic routine to add membership or increment
-	 * refcount.  It wants addresses in the form of a sockaddr,
-	 * so we build one here (being careful to zero the unused bytes).
-	 */
-	bzero(&sa6, sizeof(sa6));
-	sa6.sin6_family = AF_INET6;
-	sa6.sin6_len = sizeof(struct sockaddr_in6);
-	sa6.sin6_addr = *maddr6;
-	*errorp = if_addmulti(ifp, (struct sockaddr *)&sa6, &ifma);
-	if (*errorp) {
-		splx(s);
-		return 0;
-	}
+	IFF_LOCKGIANT(ifp);
+	/*IN6_MULTI_LOCK();*/
 
-	/*
-	 * If ifma->ifma_protospec is null, then if_addmulti() created
-	 * a new record.  Otherwise, we are done.
-	 */
-	if (ifma->ifma_protospec != NULL) {
-		splx(s);
-		return ifma->ifma_protospec;
-	}
-
-	/* XXX - if_addmulti uses M_WAITOK.  Can this really be called
-	   at interrupt time?  If so, need to fix if_addmulti. XXX */
-	in6m = (struct in6_multi *)malloc(sizeof(*in6m), M_IP6MADDR, M_NOWAIT);
-	if (in6m == NULL) {
-		splx(s);
-		return (NULL);
-	}
-
-	bzero(in6m, sizeof *in6m);
-	in6m->in6m_addr = *maddr6;
-	in6m->in6m_ifp = ifp;
-	in6m->in6m_refcount = 1;
-	in6m->in6m_ifma = ifma;
-	ifma->ifma_protospec = in6m;
-	in6m->in6m_timer_ch = malloc(sizeof(*in6m->in6m_timer_ch), M_IP6MADDR,
-	    M_NOWAIT);
-	if (in6m->in6m_timer_ch == NULL) {
-		free(in6m, M_IP6MADDR);
-		splx(s);
-		return (NULL);
-	}
-	LIST_INSERT_HEAD(&in6_multihead, in6m, in6m_entry);
+	IN6_LOOKUP_MULTI(*maddr6, ifp, in6m);
+	if (in6m != NULL) {
+		/*
+		 * If we already joined this group, just bump the
+		 * refcount and return it.
+		 */
+		KASSERT(in6m->in6m_refcount >= 1,
+		    ("%s: bad refcount %d", __func__, in6m->in6m_refcount));
+		++in6m->in6m_refcount;
+	} else do {
+		struct in6_multi *nin6m;
+		struct ifmultiaddr *ifma;
+		struct sockaddr_in6 sa6;
+
+		bzero(&sa6, sizeof(sa6));
+		sa6.sin6_family = AF_INET6;
+		sa6.sin6_len = sizeof(struct sockaddr_in6);
+		sa6.sin6_addr = *maddr6;
 
-	callout_init(in6m->in6m_timer_ch, 0);
-	in6m->in6m_timer = delay;
-	if (in6m->in6m_timer > 0) {
-		in6m->in6m_state = MLD_REPORTPENDING;
-		mld_starttimer(in6m);
+		*errorp = if_addmulti(ifp, (struct sockaddr *)&sa6, &ifma);
+		if (*errorp)
+			break;
 
-		splx(s);
-		return (in6m);
-	}
+		/*
+		 * If ifma->ifma_protospec is null, then if_addmulti() created
+		 * a new record.  Otherwise, bump refcount, and we are done.
+		 */
+		if (ifma->ifma_protospec != NULL) {
+			in6m = ifma->ifma_protospec;
+			++in6m->in6m_refcount;
+			break;
+		}
+
+		nin6m = malloc(sizeof(*nin6m), M_IP6MADDR, M_NOWAIT | M_ZERO);
+		if (nin6m == NULL) {
+			if_delmulti_ifma(ifma);
+			break;
+		}
+
+		nin6m->in6m_addr = *maddr6;
+		nin6m->in6m_ifp = ifp;
+		nin6m->in6m_refcount = 1;
+		nin6m->in6m_ifma = ifma;
+		ifma->ifma_protospec = nin6m;
+
+		nin6m->in6m_timer_ch = malloc(sizeof(*nin6m->in6m_timer_ch),
+		    M_IP6MADDR, M_NOWAIT);
+		if (nin6m->in6m_timer_ch == NULL) {
+			free(nin6m, M_IP6MADDR);
+			if_delmulti_ifma(ifma);
+			break;
+		}
+
+		LIST_INSERT_HEAD(&in6_multihead, nin6m, in6m_entry);
+
+		callout_init(nin6m->in6m_timer_ch, 0);
+		nin6m->in6m_timer = delay;
+		if (nin6m->in6m_timer > 0) {
+			nin6m->in6m_state = MLD_REPORTPENDING;
+			mld_starttimer(nin6m);
+		}
+
+		mld6_start_listening(nin6m);
+
+		in6m = nin6m;
+
+	} while (0);
+
+	/*IN6_MULTI_UNLOCK();*/
+	IFF_UNLOCKGIANT(ifp);
 
-	/*
-	 * Let MLD6 know that we have joined a new IPv6 multicast
-	 * group.
-	 */
-	mld6_start_listening(in6m);
-	splx(s);
 	return (in6m);
 }
 
 /*
  * Delete a multicast address record.
+ *
+ * TODO: Locking, as per netinet.
  */
 void
-in6_delmulti(in6m)
-	struct in6_multi *in6m;
+in6_delmulti(struct in6_multi *in6m)
 {
-	struct ifmultiaddr *ifma = in6m->in6m_ifma;
-	int	s = splnet();
+	struct ifmultiaddr *ifma;
 
-	if (ifma->ifma_refcount == 1) {
-		/*
-		 * No remaining claims to this record; let MLD6 know
-		 * that we are leaving the multicast group.
-		 */
+	KASSERT(in6m->in6m_refcount >= 1, ("%s: freeing freed in6m", __func__));
+
+	if (--in6m->in6m_refcount == 0) {
 		mld_stoptimer(in6m);
 		mld6_stop_listening(in6m);
+
+		ifma = in6m->in6m_ifma;
+		KASSERT(ifma->ifma_protospec == in6m,
+		    ("%s: ifma_protospec != in6m", __func__));
 		ifma->ifma_protospec = NULL;
+
 		LIST_REMOVE(in6m, in6m_entry);
 		free(in6m->in6m_timer_ch, M_IP6MADDR);
 		free(in6m, M_IP6MADDR);
+
+		if_delmulti_ifma(ifma);
 	}
-	/* XXX - should be separate API for when we have an ifma? */
-	if_delmulti(ifma->ifma_ifp, ifma->ifma_addr);
-	splx(s);
 }
Index: nd6.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/nd6.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -L sys/netinet6/nd6.c -L sys/netinet6/nd6.c -u -r1.3 -r1.4
--- sys/netinet6/nd6.c
+++ sys/netinet6/nd6.c
@@ -1,4 +1,4 @@
-/*	$FreeBSD: /usr/local/www/cvsroot/FreeBSD/src/sys/netinet6/nd6.c,v 1.48.2.13 2006/06/17 17:58:33 gnn Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/nd6.c,v 1.83.2.1 2007/10/30 18:03:50 jhb Exp $	*/
 /*	$KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $	*/
 
 /*-
@@ -37,7 +37,6 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/callout.h>
-#include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
@@ -69,7 +68,7 @@
 
 #include <sys/limits.h>
 
-#include <net/net_osdep.h>
+#include <security/mac/mac_framework.h>
 
 #define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
 #define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
@@ -121,7 +120,7 @@
 extern struct callout in6_tmpaddrtimer_ch;
 
 void
-nd6_init()
+nd6_init(void)
 {
 	static int nd6_init_done = 0;
 	int i;
@@ -148,8 +147,7 @@
 }
 
 struct nd_ifinfo *
-nd6_ifattach(ifp)
-	struct ifnet *ifp;
+nd6_ifattach(struct ifnet *ifp)
 {
 	struct nd_ifinfo *nd;
 
@@ -176,8 +174,7 @@
 }
 
 void
-nd6_ifdetach(nd)
-	struct nd_ifinfo *nd;
+nd6_ifdetach(struct nd_ifinfo *nd)
 {
 
 	free(nd, M_IP6NDP);
@@ -188,8 +185,7 @@
  * changes, which means we might have to adjust the ND level MTU.
  */
 void
-nd6_setmtu(ifp)
-	struct ifnet *ifp;
+nd6_setmtu(struct ifnet *ifp)
 {
 
 	nd6_setmtu0(ifp, ND_IFINFO(ifp));
@@ -197,9 +193,7 @@
 
 /* XXX todo: do not maintain copy of ifp->if_mtu in ndi->maxmtu */
 void
-nd6_setmtu0(ifp, ndi)
-	struct ifnet *ifp;
-	struct nd_ifinfo *ndi;
+nd6_setmtu0(struct ifnet *ifp, struct nd_ifinfo *ndi)
 {
 	u_int32_t omaxmtu;
 
@@ -239,10 +233,7 @@
 }
 
 void
-nd6_option_init(opt, icmp6len, ndopts)
-	void *opt;
-	int icmp6len;
-	union nd_opts *ndopts;
+nd6_option_init(void *opt, int icmp6len, union nd_opts *ndopts)
 {
 
 	bzero(ndopts, sizeof(*ndopts));
@@ -260,8 +251,7 @@
  * Take one ND option.
  */
 struct nd_opt_hdr *
-nd6_option(ndopts)
-	union nd_opts *ndopts;
+nd6_option(union nd_opts *ndopts)
 {
 	struct nd_opt_hdr *nd_opt;
 	int olen;
@@ -312,8 +302,7 @@
  * multiple options of the same type.
  */
 int
-nd6_options(ndopts)
-	union nd_opts *ndopts;
+nd6_options(union nd_opts *ndopts)
 {
 	struct nd_opt_hdr *nd_opt;
 	int i = 0;
@@ -392,9 +381,7 @@
  * ND6 timer routine to handle ND6 entries
  */
 void
-nd6_llinfo_settimer(ln, tick)
-	struct llinfo_nd6 *ln;
-	long tick;
+nd6_llinfo_settimer(struct llinfo_nd6 *ln, long tick)
 {
 	if (tick < 0) {
 		ln->ln_expire = 0;
@@ -415,8 +402,7 @@
 }
 
 static void
-nd6_llinfo_timer(arg)
-	void *arg;
+nd6_llinfo_timer(void *arg)
 {
 	struct llinfo_nd6 *ln;
 	struct rtentry *rt;
@@ -475,7 +461,7 @@
 				ln->ln_hold = m0;
 				clear_llinfo_pqueue(ln);
 			}
-			if (rt)
+			if (rt && rt->rt_llinfo)
 				(void)nd6_free(rt, 0);
 			ln = NULL;
 		}
@@ -490,7 +476,8 @@
 	case ND6_LLINFO_STALE:
 		/* Garbage Collection(RFC 2461 5.3) */
 		if (!ND6_LLINFO_PERMANENT(ln)) {
-			(void)nd6_free(rt, 1);
+			if (rt && rt->rt_llinfo)
+				(void)nd6_free(rt, 1);
 			ln = NULL;
 		}
 		break;
@@ -520,13 +507,14 @@
 			 * specified as the destination of a p2p interface
 			 * (see in6_ifinit()).  We should not free the entry
 			 * since this is sort of a "static" entry generated
-			 * via interface address configuration. 
+			 * via interface address configuration.
 			 */
 			ln->ln_asked = 0;
 			ln->ln_expire = 0; /* make it permanent */
 			ln->ln_state = ND6_LLINFO_STALE;
 		} else {
-			(void)nd6_free(rt, 0);
+			if (rt && rt->rt_llinfo)
+				(void)nd6_free(rt, 0);
 			ln = NULL;
 		}
 		break;
@@ -538,8 +526,7 @@
  * ND6 timer routine to expire default route list and prefix list
  */
 void
-nd6_timer(ignored_arg)
-	void	*ignored_arg;
+nd6_timer(void *ignored_arg)
 {
 	int s;
 	struct nd_defrouter *dr;
@@ -662,9 +649,11 @@
 	splx(s);
 }
 
+/*
+ * ia6 - deprecated/invalidated temporary address
+ */
 static int
-regen_tmpaddr(ia6)
-	struct in6_ifaddr *ia6; /* deprecated/invalidated temporary address */
+regen_tmpaddr(struct in6_ifaddr *ia6)
 {
 	struct ifaddr *ifa;
 	struct ifnet *ifp;
@@ -730,8 +719,7 @@
  * ifp goes away.
  */
 void
-nd6_purge(ifp)
-	struct ifnet *ifp;
+nd6_purge(struct ifnet *ifp)
 {
 	struct llinfo_nd6 *ln, *nln;
 	struct nd_defrouter *dr, *ndr;
@@ -818,13 +806,11 @@
 }
 
 struct rtentry *
-nd6_lookup(addr6, create, ifp)
-	struct in6_addr *addr6;
-	int create;
-	struct ifnet *ifp;
+nd6_lookup(struct in6_addr *addr6, int create, struct ifnet *ifp)
 {
 	struct rtentry *rt;
 	struct sockaddr_in6 sin6;
+	char ip6buf[INET6_ADDRSTRLEN];
 
 	bzero(&sin6, sizeof(sin6));
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
@@ -872,7 +858,7 @@
 				log(LOG_ERR,
 				    "nd6_lookup: failed to add route for a "
 				    "neighbor(%s), errno=%d\n",
-				    ip6_sprintf(addr6), e);
+				    ip6_sprintf(ip6buf, addr6), e);
 			}
 			if (rt == NULL)
 				return (NULL);
@@ -909,7 +895,7 @@
 		if (create) {
 			nd6log((LOG_DEBUG,
 			    "nd6_lookup: failed to lookup %s (if = %s)\n",
-			    ip6_sprintf(addr6),
+			    ip6_sprintf(ip6buf, addr6),
 			    ifp ? if_name(ifp) : "unspec"));
 		}
 		RT_UNLOCK(rt);
@@ -925,9 +911,7 @@
  * to not reenter the routing code from within itself.
  */
 static int
-nd6_is_new_addr_neighbor(addr, ifp)
-	struct sockaddr_in6 *addr;
-	struct ifnet *ifp;
+nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
 {
 	struct nd_prefix *pr;
 	struct ifaddr *dstaddr;
@@ -1001,9 +985,7 @@
  * XXX: should take care of the destination of a p2p link?
  */
 int
-nd6_is_addr_neighbor(addr, ifp)
-	struct sockaddr_in6 *addr;
-	struct ifnet *ifp;
+nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
 {
 
 	if (nd6_is_new_addr_neighbor(addr, ifp))
@@ -1026,9 +1008,7 @@
  * that the change is safe.
  */
 static struct llinfo_nd6 *
-nd6_free(rt, gc)
-	struct rtentry *rt;
-	int gc;
+nd6_free(struct rtentry *rt, int gc)
 {
 	struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo, *next;
 	struct in6_addr in6 = ((struct sockaddr_in6 *)rt_key(rt))->sin6_addr;
@@ -1137,10 +1117,7 @@
  * XXX cost-effective methods?
  */
 void
-nd6_nud_hint(rt, dst6, force)
-	struct rtentry *rt;
-	struct in6_addr *dst6;
-	int force;
+nd6_nud_hint(struct rtentry *rt, struct in6_addr *dst6, int force)
 {
 	struct llinfo_nd6 *ln;
 
@@ -1184,11 +1161,11 @@
 	}
 }
 
+/*
+ * info - XXX unused
+ */
 void
-nd6_rtrequest(req, rt, info)
-	int	req;
-	struct rtentry *rt;
-	struct rt_addrinfo *info; /* xxx unused */
+nd6_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info)
 {
 	struct sockaddr *gate = rt->rt_gateway;
 	struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo;
@@ -1311,6 +1288,7 @@
 		nd6_inuse++;
 		nd6_allocated++;
 		bzero(ln, sizeof(*ln));
+		RT_ADDREF(rt);
 		ln->ln_rt = rt;
 		callout_init(&ln->ln_timer_ch, 0);
 
@@ -1387,9 +1365,11 @@
 					break;
 				if (in6_addmulti(&llsol, ifp,
 				    &error, 0) == NULL) {
+					char ip6buf[INET6_ADDRSTRLEN];
 					nd6log((LOG_ERR, "%s: failed to join "
 					    "%s (errno=%d)\n", if_name(ifp),
-					    ip6_sprintf(&llsol), error));
+					    ip6_sprintf(ip6buf, &llsol),
+					    error));
 				}
 			}
 		}
@@ -1421,6 +1401,7 @@
 		ln->ln_prev->ln_next = ln->ln_next;
 		ln->ln_prev = NULL;
 		nd6_llinfo_settimer(ln, -1);
+		RT_REMREF(rt);
 		rt->rt_llinfo = 0;
 		rt->rt_flags &= ~RTF_LLINFO;
 		clear_llinfo_pqueue(ln);
@@ -1429,10 +1410,7 @@
 }
 
 int
-nd6_ioctl(cmd, data, ifp)
-	u_long cmd;
-	caddr_t	data;
-	struct ifnet *ifp;
+nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
 {
 	struct in6_drlist *drl = (struct in6_drlist *)data;
 	struct in6_oprlist *oprl = (struct in6_oprlist *)data;
@@ -1664,15 +1642,13 @@
 /*
  * Create neighbor cache entry and cache link-layer address,
  * on reception of inbound ND6 packets.  (RS/RA/NS/redirect)
+ *
+ * type - ICMP6 type
+ * code - type dependent information
  */
 struct rtentry *
-nd6_cache_lladdr(ifp, from, lladdr, lladdrlen, type, code)
-	struct ifnet *ifp;
-	struct in6_addr *from;
-	char *lladdr;
-	int lladdrlen;
-	int type;	/* ICMP6 type */
-	int code;	/* type dependent information */
+nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
+    int lladdrlen, int type, int code)
 {
 	struct rtentry *rt = NULL;
 	struct llinfo_nd6 *ln = NULL;
@@ -1790,28 +1766,26 @@
 
 			if (ln->ln_hold) {
 				struct mbuf *m_hold, *m_hold_next;
-				for (m_hold = ln->ln_hold; m_hold;
-				     m_hold = m_hold_next) {
-					struct mbuf *mpkt = NULL;
 
+				/*
+				 * reset the ln_hold in advance, to explicitly
+				 * prevent a ln_hold lookup in nd6_output()
+				 * (wouldn't happen, though...)
+				 */
+				for (m_hold = ln->ln_hold, ln->ln_hold = NULL;
+				    m_hold; m_hold = m_hold_next) {
 					m_hold_next = m_hold->m_nextpkt;
-					mpkt = m_copym(m_hold, 0, M_COPYALL, M_DONTWAIT);
-					if (mpkt == NULL) {
-						m_freem(m_hold);
-						break;
-					}
-					mpkt->m_nextpkt = NULL;
+					m_hold->m_nextpkt = NULL;
 
 					/*
 					 * we assume ifp is not a p2p here, so
 					 * just set the 2nd argument as the
 					 * 1st one.
 					 */
-					nd6_output(ifp, ifp, mpkt,
+					nd6_output(ifp, ifp, m_hold,
 					     (struct sockaddr_in6 *)rt_key(rt),
 					     rt);
 				}
-				ln->ln_hold = NULL;
 			}
 		} else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
 			/* probe right away */
@@ -1843,7 +1817,7 @@
 	 *	0	n	y	--	(3)	c   s     s
 	 *	0	y	y	n	(4)	c   s     s
 	 *	0	y	y	y	(5)	c   s     s
-	 *	1	--	n	--	(6) c	c 	c s
+	 *	1	--	n	--	(6) c	c	c s
 	 *	1	--	y	--	(7) c	c   s	c s
 	 *
 	 *					(c=clear s=set)
@@ -1906,8 +1880,7 @@
 }
 
 static void
-nd6_slowtimo(ignored_arg)
-    void *ignored_arg;
+nd6_slowtimo(void *ignored_arg)
 {
 	struct nd_ifinfo *nd6if;
 	struct ifnet *ifp;
@@ -1934,12 +1907,8 @@
 
 #define senderr(e) { error = (e); goto bad;}
 int
-nd6_output(ifp, origifp, m0, dst, rt0)
-	struct ifnet *ifp;
-	struct ifnet *origifp;
-	struct mbuf *m0;
-	struct sockaddr_in6 *dst;
-	struct rtentry *rt0;
+nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
+    struct sockaddr_in6 *dst, struct rtentry *rt0)
 {
 	struct mbuf *m = m0;
 	struct rtentry *rt = rt0;
@@ -1956,13 +1925,16 @@
 	/*
 	 * next hop determination.  This routine is derived from ether_output.
 	 */
+	/* NB: the locking here is tortuous... */
+	if (rt != NULL)
+		RT_LOCK(rt);
 again:
-	if (rt) {
+	if (rt != NULL) {
 		if ((rt->rt_flags & RTF_UP) == 0) {
+			RT_UNLOCK(rt);
 			rt0 = rt = rtalloc1((struct sockaddr *)dst, 1, 0UL);
 			if (rt != NULL) {
 				RT_REMREF(rt);
-				RT_UNLOCK(rt);
 				if (rt->rt_ifp != ifp)
 					/*
 					 * XXX maybe we should update ifp too,
@@ -1987,6 +1959,7 @@
 			 */
 			if (!nd6_is_addr_neighbor(gw6, ifp) ||
 			    in6ifa_ifpwithaddr(ifp, &gw6->sin6_addr)) {
+				RT_UNLOCK(rt);
 				/*
 				 * We allow this kind of tricky route only
 				 * when the outgoing interface is p2p.
@@ -1998,18 +1971,34 @@
 				goto sendpkt;
 			}
 
-			if (rt->rt_gwroute == 0)
+			if (rt->rt_gwroute == NULL)
 				goto lookup;
-			if (((rt = rt->rt_gwroute)->rt_flags & RTF_UP) == 0) {
-				RT_LOCK(rt);
-				rtfree(rt); rt = rt0;
+			rt = rt->rt_gwroute;
+			RT_LOCK(rt);		/* NB: gwroute */
+			if ((rt->rt_flags & RTF_UP) == 0) {
+				RTFREE_LOCKED(rt);	/* unlock gwroute */
+				rt = rt0;
+				rt0->rt_gwroute = NULL;
 			lookup:
-				rt->rt_gwroute = rtalloc1(rt->rt_gateway, 1, 0UL);
-				if ((rt = rt->rt_gwroute) == 0)
+				RT_UNLOCK(rt0);
+				rt = rtalloc1(rt->rt_gateway, 1, 0UL);
+				if (rt == rt0) {
+					RT_REMREF(rt0);
+					RT_UNLOCK(rt0);
 					senderr(EHOSTUNREACH);
-				RT_UNLOCK(rt);
+				}
+				RT_LOCK(rt0);
+				if (rt0->rt_gwroute != NULL)
+					RTFREE(rt0->rt_gwroute);
+				rt0->rt_gwroute = rt;
+				if (rt == NULL) {
+					RT_UNLOCK(rt0);
+					senderr(EHOSTUNREACH);
+				}
 			}
+			RT_UNLOCK(rt0);
 		}
+		RT_UNLOCK(rt);
 	}
 
 	/*
@@ -2035,10 +2024,11 @@
 	if (ln == NULL || rt == NULL) {
 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0 &&
 		    !(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) {
+			char ip6buf[INET6_ADDRSTRLEN];
 			log(LOG_DEBUG,
 			    "nd6_output: can't allocate llinfo for %s "
 			    "(ln=%p, rt=%p)\n",
-			    ip6_sprintf(&dst->sin6_addr), ln, rt);
+			    ip6_sprintf(ip6buf, &dst->sin6_addr), ln, rt);
 			senderr(EIO);	/* XXX: good error? */
 		}
 
@@ -2123,11 +2113,6 @@
 		goto bad;
 	}
 
-#ifdef IPSEC
-	/* clean ipsec history once it goes out of the node */
-	ipsec_delaux(m);
-#endif
-
 #ifdef MAC
 	mac_create_mbuf_linklayer(ifp, m);
 #endif
@@ -2145,8 +2130,7 @@
 #undef senderr
 
 int
-nd6_need_cache(ifp)
-	struct ifnet *ifp;
+nd6_need_cache(struct ifnet *ifp)
 {
 	/*
 	 * XXX: we currently do not make neighbor cache on any interface
@@ -2173,6 +2157,7 @@
 	case IFT_PPP:
 	case IFT_TUNNEL:
 	case IFT_BRIDGE:
+	case IFT_PROPVIRTUAL:
 		return (1);
 	default:
 		return (0);
@@ -2180,12 +2165,8 @@
 }
 
 int
-nd6_storelladdr(ifp, rt0, m, dst, desten)
-	struct ifnet *ifp;
-	struct rtentry *rt0;
-	struct mbuf *m;
-	struct sockaddr *dst;
-	u_char *desten;
+nd6_storelladdr(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m,
+    struct sockaddr *dst, u_char *desten)
 {
 	struct sockaddr_dl *sdl;
 	struct rtentry *rt;
@@ -2255,9 +2236,8 @@
 	return (0);
 }
 
-static void 
-clear_llinfo_pqueue(ln)
-	struct llinfo_nd6 *ln;
+static void
+clear_llinfo_pqueue(struct llinfo_nd6 *ln)
 {
 	struct mbuf *m_hold, *m_hold_next;
 
@@ -2287,7 +2267,7 @@
 nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS)
 {
 	int error;
-	char buf[1024];
+	char buf[1024] __aligned(4);
 	struct in6_defrouter *d, *de;
 	struct nd_defrouter *dr;
 
@@ -2305,7 +2285,9 @@
 			d->rtaddr.sin6_family = AF_INET6;
 			d->rtaddr.sin6_len = sizeof(d->rtaddr);
 			d->rtaddr.sin6_addr = dr->rtaddr;
-			sa6_recoverscope(&d->rtaddr);
+			error = sa6_recoverscope(&d->rtaddr);
+			if (error != 0)
+				return (error);
 			d->flags = dr->flags;
 			d->rtlifetime = dr->rtlifetime;
 			d->expire = dr->expire;
@@ -2325,9 +2307,10 @@
 nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
 {
 	int error;
-	char buf[1024];
+	char buf[1024] __aligned(4);
 	struct in6_prefix *p, *pe;
 	struct nd_prefix *pr;
+	char ip6buf[INET6_ADDRSTRLEN];
 
 	if (req->newptr)
 		return EPERM;
@@ -2350,7 +2333,7 @@
 			if (sa6_recoverscope(&p->prefix)) {
 				log(LOG_ERR,
 				    "scope error in prefix list (%s)\n",
-				    ip6_sprintf(&p->prefix.sin6_addr));
+				    ip6_sprintf(ip6buf, &p->prefix.sin6_addr));
 				/* XXX: press on... */
 			}
 			p->raflags = pr->ndpr_raf;
@@ -2393,7 +2376,8 @@
 					log(LOG_ERR,
 					    "scope error in "
 					    "prefix list (%s)\n",
-					    ip6_sprintf(&pfr->router->rtaddr));
+					    ip6_sprintf(ip6buf,
+						    &pfr->router->rtaddr));
 				}
 				advrtrs++;
 			}
Index: in6_pcb.h
===================================================================
RCS file: /home/cvs/src/sys/netinet6/in6_pcb.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet6/in6_pcb.h -L sys/netinet6/in6_pcb.h -u -r1.1.1.1 -r1.2
--- sys/netinet6/in6_pcb.h
+++ sys/netinet6/in6_pcb.h
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet6/in6_pcb.h,v 1.16 2005/01/07 02:30:34 imp Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/in6_pcb.h,v 1.19 2007/05/11 10:20:50 rwatson Exp $	*/
 /*	$KAME: in6_pcb.h,v 1.13 2001/02/06 09:16:53 itojun Exp $	*/
 
 /*-
@@ -70,12 +70,13 @@
 #define	sin6tosa(sin6)	((struct sockaddr *)(sin6))
 #define	ifatoia6(ifa)	((struct in6_ifaddr *)(ifa))
 
-void	in6_pcbpurgeif0 __P((struct in6pcb *, struct ifnet *));
+void	in6_pcbpurgeif0 __P((struct inpcbinfo *, struct ifnet *));
 void	in6_losing __P((struct inpcb *));
 int	in6_pcbbind __P((struct inpcb *, struct sockaddr *, struct ucred *));
 int	in6_pcbconnect __P((struct inpcb *, struct sockaddr *, struct ucred *));
 void	in6_pcbdetach __P((struct inpcb *));
 void	in6_pcbdisconnect __P((struct inpcb *));
+void	in6_pcbfree __P((struct inpcb *));
 int	in6_pcbladdr __P((struct inpcb *, struct sockaddr *,
 			  struct in6_addr **));
 struct	inpcb *
@@ -94,8 +95,8 @@
 	in6_sockaddr __P((in_port_t port, struct in6_addr *addr_p));
 struct sockaddr *
 	in6_v4mapsin6_sockaddr __P((in_port_t port, struct in_addr *addr_p));
-int	in6_setpeeraddr __P((struct socket *so, struct sockaddr **nam));
-int	in6_setsockaddr __P((struct socket *so, struct sockaddr **nam));
+int	in6_getpeeraddr __P((struct socket *so, struct sockaddr **nam));
+int	in6_getsockaddr __P((struct socket *so, struct sockaddr **nam));
 int	in6_mapped_sockaddr __P((struct socket *so, struct sockaddr **nam));
 int	in6_mapped_peeraddr __P((struct socket *so, struct sockaddr **nam));
 int	in6_selecthlim __P((struct in6pcb *, struct ifnet *));
Index: ip6_mroute.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/ip6_mroute.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet6/ip6_mroute.c -L sys/netinet6/ip6_mroute.c -u -r1.2 -r1.3
--- sys/netinet6/ip6_mroute.c
+++ sys/netinet6/ip6_mroute.c
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet6/ip6_mroute.c,v 1.29.2.7.2.1 2006/04/20 16:05:17 suz Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/ip6_mroute.c,v 1.46 2007/07/05 16:29:40 delphij Exp $	*/
 /*	$KAME: ip6_mroute.c,v 1.58 2001/12/18 02:36:31 itojun Exp $	*/
 
 /*-
@@ -96,6 +96,7 @@
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/sx.h>
+#include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/time.h>
@@ -114,13 +115,13 @@
 #include <netinet6/scope6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/ip6_mroute.h>
+#include <netinet6/ip6protosw.h>
 #include <netinet6/pim6.h>
 #include <netinet6/pim6_var.h>
 
-#include <net/net_osdep.h>
-
 static MALLOC_DEFINE(M_MRTABLE6, "mf6c", "multicast forwarding cache entry");
 
+/* XXX: this is a very common idiom; move to <sys/mbuf.h> ? */
 #define M_HASCL(m) ((m)->m_flags & M_EXT)
 
 static int ip6_mdq __P((struct mbuf *, struct ifnet *, struct mf6c *));
@@ -132,23 +133,49 @@
 static int register_send __P((struct ip6_hdr *, struct mif6 *,
 	    struct mbuf *));
 
-/*
- * Globals.  All but ip6_mrouter, ip6_mrtproto and mrt6stat could be static,
- * except for netstat or debugging purposes.
- */
-struct socket  *ip6_mrouter = NULL;
-int		ip6_mrouter_ver = 0;
-int		ip6_mrtproto = IPPROTO_PIM;    /* for netstat only */
-struct mrt6stat	mrt6stat;
+extern struct domain inet6domain;
 
-#define NO_RTE_FOUND 	0x1
+/* XXX: referenced from ip_mroute.c for dynamically loading this code. */
+struct ip6protosw in6_pim_protosw = {
+	.pr_type =		SOCK_RAW,
+	.pr_domain =		&inet6domain,
+	.pr_protocol =		IPPROTO_PIM,
+	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
+	.pr_input =		pim6_input,
+	.pr_output =		rip6_output,
+	.pr_ctloutput =		rip6_ctloutput,
+	.pr_usrreqs =		&rip6_usrreqs
+};
+
+static int ip6_mrouter_ver = 0;
+
+SYSCTL_DECL(_net_inet6);
+SYSCTL_DECL(_net_inet6_ip6);
+SYSCTL_NODE(_net_inet6, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM");
+
+static struct mrt6stat mrt6stat;
+SYSCTL_STRUCT(_net_inet6_ip6, OID_AUTO, mrt6stat, CTLFLAG_RW,
+    &mrt6stat, mrt6stat,
+    "Multicast Routing Statistics (struct mrt6stat, netinet6/ip6_mroute.h)");
+
+#define NO_RTE_FOUND	0x1
 #define RTE_FOUND	0x2
 
-struct mf6c	*mf6ctable[MF6CTBLSIZ];
-u_char		n6expire[MF6CTBLSIZ];
+static struct mf6c *mf6ctable[MF6CTBLSIZ];
+SYSCTL_OPAQUE(_net_inet6_ip6, OID_AUTO, mf6ctable, CTLFLAG_RD,
+    &mf6ctable, sizeof(mf6ctable), "S,*mf6ctable[MF6CTBLSIZ]",
+    "Multicast Forwarding Table (struct *mf6ctable[MF6CTBLSIZ], "
+    "netinet6/ip6_mroute.h)");
+
+static u_char n6expire[MF6CTBLSIZ];
+
 static struct mif6 mif6table[MAXMIFS];
+SYSCTL_OPAQUE(_net_inet6_ip6, OID_AUTO, mif6table, CTLFLAG_RD,
+    &mif6table, sizeof(mif6table), "S,vif[MAXMIFS]",
+    "Multicast Interfaces (struct mif[MAXMIFS], netinet6/ip6_mroute.h)");
+
 #ifdef MRT6DEBUG
-u_int		mrt6debug = 0;	  /* debug level 	*/
+static u_int mrt6debug = 0;		/* debug level */
 #define DEBUG_MFC	0x02
 #define DEBUG_FORWARD	0x04
 #define DEBUG_EXPIRE	0x08
@@ -173,9 +200,9 @@
  * by a broken gateway).  Different from IPv4 register_if,
  * these interfaces are linked into the system ifnet list,
  * because per-interface IPv6 statistics are maintained in
- * ifp->if_afdata.  But it does not have any routes point 
+ * ifp->if_afdata.  But it does not have any routes point
  * to them.  I.e., packets can't be sent this way.  They
- * only exist as a placeholder for multicast source 
+ * only exist as a placeholder for multicast source
  * verification.
  */
 static struct ifnet *multicast_register_if6;
@@ -189,6 +216,10 @@
 static mifi_t reg_mif_num = (mifi_t)-1;
 
 static struct pim6stat pim6stat;
+SYSCTL_STRUCT(_net_inet6_pim, PIM6CTL_STATS, stats, CTLFLAG_RD,
+    &pim6stat, pim6stat,
+    "PIM Statistics (struct pim6stat, netinet6/pim_var.h)");
+
 static int pim6;
 
 /*
@@ -201,9 +232,7 @@
 
 /*
  * Find a route for a given origin IPv6 address and Multicast group address.
- * Quality of service parameter to be added in the future!!!
  */
-
 #define MF6CFIND(o, g, rt) do { \
 	struct mf6c *_rt = mf6ctable[MF6CHASH(o,g)]; \
 	rt = NULL; \
@@ -225,6 +254,7 @@
 /*
  * Macros to compute elapsed time efficiently
  * Borrowed from Van Jacobson's scheduling code
+ * XXX: replace with timersub() ?
  */
 #define TV_DELTA(a, b, delta) do { \
 	    int xxs; \
@@ -244,12 +274,13 @@
 	    } \
 } while (/*CONSTCOND*/ 0)
 
+/* XXX: replace with timercmp(a, b, <) ? */
 #define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \
 	      (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
 
 #ifdef UPCALL_TIMING
 #define UPCALL_MAX	50
-u_long upcall_data[UPCALL_MAX + 1];
+static u_long upcall_data[UPCALL_MAX + 1];
 static void collate();
 #endif /* UPCALL_TIMING */
 
@@ -263,13 +294,17 @@
 
 static struct callout expire_upcalls_ch;
 
+int X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m);
+int X_ip6_mrouter_done(void);
+int X_ip6_mrouter_set(struct socket *so, struct sockopt *sopt);
+int X_ip6_mrouter_get(struct socket *so, struct sockopt *sopt);
+int X_mrt6_ioctl(int cmd, caddr_t data);
+
 /*
  * Handle MRT setsockopt commands to modify the multicast routing tables.
  */
 int
-ip6_mrouter_set(so, sopt)
-	struct socket *so;
-	struct sockopt *sopt;
+X_ip6_mrouter_set(struct socket *so, struct sockopt *sopt)
 {
 	int error = 0;
 	int optval;
@@ -292,7 +327,7 @@
 		error = ip6_mrouter_init(so, optval, sopt->sopt_name);
 		break;
 	case MRT6_DONE:
-		error = ip6_mrouter_done();
+		error = X_ip6_mrouter_done();
 		break;
 	case MRT6_ADD_MIF:
 		error = sooptcopyin(sopt, &mifc, sizeof(mifc), sizeof(mifc));
@@ -337,9 +372,7 @@
  * Handle MRT getsockopt commands
  */
 int
-ip6_mrouter_get(so, sopt)
-	struct socket *so;
-	struct sockopt *sopt;
+X_ip6_mrouter_get(struct socket *so, struct sockopt *sopt)
 {
 	int error = 0;
 
@@ -358,9 +391,7 @@
  * Handle ioctl commands to obtain information from the cache
  */
 int
-mrt6_ioctl(cmd, data)
-	int cmd;
-	caddr_t data;
+X_mrt6_ioctl(int cmd, caddr_t data)
 {
 	switch (cmd) {
 	case SIOCGETSGCNT_IN6:
@@ -376,8 +407,7 @@
  * returns the packet, byte, rpf-failure count for the source group provided
  */
 static int
-get_sg_cnt(req)
-	struct sioc_sg_req6 *req;
+get_sg_cnt(struct sioc_sg_req6 *req)
 {
 	struct mf6c *rt;
 	int s;
@@ -402,8 +432,7 @@
  * returns the input and output packet and byte counts on the mif provided
  */
 static int
-get_mif6_cnt(req)
-	struct sioc_mif_req6 *req;
+get_mif6_cnt(struct sioc_mif_req6 *req)
 {
 	mifi_t mifi = req->mifi;
 
@@ -419,8 +448,7 @@
 }
 
 static int
-set_pim6(i)
-	int *i;
+set_pim6(int *i)
 {
 	if ((*i != 1) && (*i != 0))
 		return (EINVAL);
@@ -434,10 +462,7 @@
  * Enable multicast routing
  */
 static int
-ip6_mrouter_init(so, v, cmd)
-	struct socket *so;
-	int v;
-	int cmd;
+ip6_mrouter_init(struct socket *so, int v, int cmd)
 {
 #ifdef MRT6DEBUG
 	if (mrt6debug)
@@ -480,7 +505,7 @@
  * Disable multicast routing
  */
 int
-ip6_mrouter_done()
+X_ip6_mrouter_done(void)
 {
 	mifi_t mifi;
 	int i;
@@ -513,10 +538,6 @@
 			}
 		}
 	}
-#ifdef notyet
-	bzero((caddr_t)qtable, sizeof(qtable));
-	bzero((caddr_t)tbftable, sizeof(tbftable));
-#endif
 	bzero((caddr_t)mif6table, sizeof(mif6table));
 	nummifs = 0;
 
@@ -576,15 +597,11 @@
  * Add a mif to the mif table
  */
 static int
-add_m6if(mifcp)
-	struct mif6ctl *mifcp;
+add_m6if(struct mif6ctl *mifcp)
 {
 	struct mif6 *mifp;
 	struct ifnet *ifp;
 	int error, s;
-#ifdef notyet
-	struct tbf *m_tbf = tbftable + mifcp->mif6c_mifi;
-#endif
 
 	if (mifcp->mif6c_mifi >= MAXMIFS)
 		return (EINVAL);
@@ -604,8 +621,8 @@
 			if_attach(ifp);
 			multicast_register_if6 = ifp;
 			reg_mif_num = mifcp->mif6c_mifi;
-			/* 
-			 * it is impossible to guess the ifindex of the 
+			/*
+			 * it is impossible to guess the ifindex of the
 			 * register interface.  So mif6c_pifi is automatically
 			 * calculated.
 			 */
@@ -630,10 +647,7 @@
 	s = splnet();
 	mifp->m6_flags     = mifcp->mif6c_flags;
 	mifp->m6_ifp       = ifp;
-#ifdef notyet
-	/* scaling up here allows division by 1024 in critical code */
-	mifp->m6_rate_limit = mifcp->mif6c_rate_limit * 1024 / 1000;
-#endif
+
 	/* initialize per mif pkt counters */
 	mifp->m6_pkt_in    = 0;
 	mifp->m6_pkt_out   = 0;
@@ -660,8 +674,7 @@
  * Delete a mif from the mif table
  */
 static int
-del_m6if(mifip)
-	mifi_t *mifip;
+del_m6if(mifi_t *mifip)
 {
 	struct mif6 *mifp = mif6table + *mifip;
 	mifi_t mifi;
@@ -693,10 +706,6 @@
 		}
 	}
 
-#ifdef notyet
-	bzero((caddr_t)qtable[*mifip], sizeof(qtable[*mifip]));
-	bzero((caddr_t)mifp->m6_tbf, sizeof(*(mifp->m6_tbf)));
-#endif
 	bzero((caddr_t)mifp, sizeof(*mifp));
 
 	/* Adjust nummifs down */
@@ -719,14 +728,14 @@
  * Add an mfc entry
  */
 static int
-add_m6fc(mfccp)
-	struct mf6cctl *mfccp;
+add_m6fc(struct mf6cctl *mfccp)
 {
 	struct mf6c *rt;
 	u_long hash;
 	struct rtdetq *rte;
 	u_short nstl;
 	int s;
+	char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN];
 
 	MF6CFIND(mfccp->mf6cc_origin.sin6_addr,
 		 mfccp->mf6cc_mcastgrp.sin6_addr, rt);
@@ -734,12 +743,13 @@
 	/* If an entry already exists, just update the fields */
 	if (rt) {
 #ifdef MRT6DEBUG
-		if (mrt6debug & DEBUG_MFC)
-			log(LOG_DEBUG,
-			    "add_m6fc no upcall h %d o %s g %s p %x\n",
-			    ip6_sprintf(&mfccp->mf6cc_origin.sin6_addr),
-			    ip6_sprintf(&mfccp->mf6cc_mcastgrp.sin6_addr),
-			    mfccp->mf6cc_parent);
+		if (mrt6debug & DEBUG_MFC) {
+		    log(LOG_DEBUG,
+			"add_m6fc no upcall h %d o %s g %s p %x\n",
+			ip6_sprintf(ip6bufo, &mfccp->mf6cc_origin.sin6_addr),
+			ip6_sprintf(ip6bufg, &mfccp->mf6cc_mcastgrp.sin6_addr),
+			mfccp->mf6cc_parent);
+		}
 #endif
 
 		s = splnet();
@@ -766,16 +776,20 @@
 				log(LOG_ERR,
 				    "add_m6fc: %s o %s g %s p %x dbx %p\n",
 				    "multiple kernel entries",
-				    ip6_sprintf(&mfccp->mf6cc_origin.sin6_addr),
-				    ip6_sprintf(&mfccp->mf6cc_mcastgrp.sin6_addr),
+				    ip6_sprintf(ip6bufo,
+					    &mfccp->mf6cc_origin.sin6_addr),
+				    ip6_sprintf(ip6bufg,
+					    &mfccp->mf6cc_mcastgrp.sin6_addr),
 				    mfccp->mf6cc_parent, rt->mf6c_stall);
 
 #ifdef MRT6DEBUG
 			if (mrt6debug & DEBUG_MFC)
 				log(LOG_DEBUG,
 				    "add_m6fc o %s g %s p %x dbg %x\n",
-				    ip6_sprintf(&mfccp->mf6cc_origin.sin6_addr),
-				    ip6_sprintf(&mfccp->mf6cc_mcastgrp.sin6_addr),
+				    ip6_sprintf(ip6bufo,
+					    &mfccp->mf6cc_origin.sin6_addr),
+				    ip6_sprintf(ip6bufg,
+					    &mfccp->mf6cc_mcastgrp.sin6_addr),
 				    mfccp->mf6cc_parent, rt->mf6c_stall);
 #endif
 
@@ -812,12 +826,12 @@
 	if (nstl == 0) {
 #ifdef MRT6DEBUG
 		if (mrt6debug & DEBUG_MFC)
-			log(LOG_DEBUG,
-			    "add_mfc no upcall h %d o %s g %s p %x\n",
-			    hash,
-			    ip6_sprintf(&mfccp->mf6cc_origin.sin6_addr),
-			    ip6_sprintf(&mfccp->mf6cc_mcastgrp.sin6_addr),
-			    mfccp->mf6cc_parent);
+		    log(LOG_DEBUG,
+			"add_mfc no upcall h %d o %s g %s p %x\n",
+			hash,
+			ip6_sprintf(ip6bufo, &mfccp->mf6cc_origin.sin6_addr),
+			ip6_sprintf(ip6bufg, &mfccp->mf6cc_mcastgrp.sin6_addr),
+			mfccp->mf6cc_parent);
 #endif
 
 		for (rt = mf6ctable[hash]; rt; rt = rt->mf6c_next) {
@@ -876,8 +890,7 @@
  * collect delay statistics on the upcalls
  */
 static void
-collate(t)
-	struct timeval *t;
+collate(struct timeval *t)
 {
 	u_long d;
 	struct timeval tp;
@@ -902,14 +915,13 @@
  * Delete an mfc entry
  */
 static int
-del_m6fc(mfccp)
-	struct mf6cctl *mfccp;
+del_m6fc(struct mf6cctl *mfccp)
 {
-	struct sockaddr_in6 	origin;
-	struct sockaddr_in6 	mcastgrp;
-	struct mf6c 		*rt;
-	struct mf6c	 	**nptr;
-	u_long 		hash;
+	struct sockaddr_in6	origin;
+	struct sockaddr_in6	mcastgrp;
+	struct mf6c		*rt;
+	struct mf6c		**nptr;
+	u_long		hash;
 	int s;
 
 	origin = mfccp->mf6cc_origin;
@@ -917,10 +929,12 @@
 	hash = MF6CHASH(origin.sin6_addr, mcastgrp.sin6_addr);
 
 #ifdef MRT6DEBUG
-	if (mrt6debug & DEBUG_MFC)
+	if (mrt6debug & DEBUG_MFC) {
+		char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN];
 		log(LOG_DEBUG,"del_m6fc orig %s mcastgrp %s\n",
-		    ip6_sprintf(&origin.sin6_addr),
-		    ip6_sprintf(&mcastgrp.sin6_addr));
+		    ip6_sprintf(ip6bufo, &origin.sin6_addr),
+		    ip6_sprintf(ip6bufg, &mcastgrp.sin6_addr));
+	}
 #endif
 
 	s = splnet();
@@ -950,11 +964,9 @@
 }
 
 static int
-socket_send(s, mm, src)
-	struct socket *s;
-	struct mbuf *mm;
-	struct sockaddr_in6 *src;
+socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in6 *src)
 {
+
 	if (s) {
 		if (sbappendaddr(&s->so_rcv,
 				 (struct sockaddr *)src,
@@ -985,23 +997,21 @@
  * that if this function is called from somewhere else in the originating
  * context in the future.
  */
-
 int
-ip6_mforward(ip6, ifp, m)
-	struct ip6_hdr *ip6;
-	struct ifnet *ifp;
-	struct mbuf *m;
+X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m)
 {
 	struct mf6c *rt;
 	struct mif6 *mifp;
 	struct mbuf *mm;
 	int s;
 	mifi_t mifi;
+	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 
 #ifdef MRT6DEBUG
 	if (mrt6debug & DEBUG_FORWARD)
 		log(LOG_DEBUG, "ip6_mforward: src %s, dst %s, ifindex %d\n",
-		    ip6_sprintf(&ip6->ip6_src), ip6_sprintf(&ip6->ip6_dst),
+		    ip6_sprintf(ip6bufs, &ip6->ip6_src),
+		    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 		    ifp->if_index);
 #endif
 
@@ -1028,8 +1038,8 @@
 			log(LOG_DEBUG,
 			    "cannot forward "
 			    "from %s to %s nxt %d received on %s\n",
-			    ip6_sprintf(&ip6->ip6_src),
-			    ip6_sprintf(&ip6->ip6_dst),
+			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
+			    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 			    ip6->ip6_nxt,
 			    if_name(m->m_pkthdr.rcvif));
 		}
@@ -1067,8 +1077,8 @@
 #ifdef MRT6DEBUG
 		if (mrt6debug & (DEBUG_FORWARD | DEBUG_MFC))
 			log(LOG_DEBUG, "ip6_mforward: no rte s %s g %s\n",
-			    ip6_sprintf(&ip6->ip6_src),
-			    ip6_sprintf(&ip6->ip6_dst));
+			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
+			    ip6_sprintf(ip6bufd, &ip6->ip6_dst));
 #endif
 
 		/*
@@ -1253,8 +1263,7 @@
  * Call from the Slow Timeout mechanism, every half second.
  */
 static void
-expire_upcalls(unused)
-	void *unused;
+expire_upcalls(void *unused)
 {
 	struct rtdetq *rte;
 	struct mf6c *mfc, **nptr;
@@ -1277,10 +1286,13 @@
 			    mfc->mf6c_expire != 0 &&
 			    --mfc->mf6c_expire == 0) {
 #ifdef MRT6DEBUG
-				if (mrt6debug & DEBUG_EXPIRE)
+				if (mrt6debug & DEBUG_EXPIRE) {
+					char ip6bufo[INET6_ADDRSTRLEN];
+					char ip6bufg[INET6_ADDRSTRLEN];
 					log(LOG_DEBUG, "expire_upcalls: expiring (%s %s)\n",
-					    ip6_sprintf(&mfc->mf6c_origin.sin6_addr),
-					    ip6_sprintf(&mfc->mf6c_mcastgrp.sin6_addr));
+					    ip6_sprintf(ip6bufo, &mfc->mf6c_origin.sin6_addr),
+					    ip6_sprintf(ip6bufg, &mfc->mf6c_mcastgrp.sin6_addr));
+				}
 #endif
 				/*
 				 * drop all the packets
@@ -1311,10 +1323,7 @@
  * Packet forwarding routine once entry in the cache is made
  */
 static int
-ip6_mdq(m, ifp, rt)
-	struct mbuf *m;
-	struct ifnet *ifp;
-	struct mf6c *rt;
+ip6_mdq(struct mbuf *m, struct ifnet *ifp, struct mf6c *rt)
 {
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	mifi_t mifi, iif;
@@ -1492,10 +1501,7 @@
 }
 
 static void
-phyint_send(ip6, mifp, m)
-    struct ip6_hdr *ip6;
-    struct mif6 *mifp;
-    struct mbuf *m;
+phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m)
 {
 	struct mbuf *mb_copy;
 	struct ifnet *ifp = mifp->m6_ifp;
@@ -1590,14 +1596,17 @@
 			icmp6_error(mb_copy, ICMP6_PACKET_TOO_BIG, 0, linkmtu);
 		else {
 #ifdef MRT6DEBUG
-			if (mrt6debug & DEBUG_XMIT)
+			if (mrt6debug & DEBUG_XMIT) {
+				char ip6bufs[INET6_ADDRSTRLEN];
+				char ip6bufd[INET6_ADDRSTRLEN];
 				log(LOG_DEBUG,
 				    "phyint_send: packet too big on %s o %s "
 				    "g %s size %d(discarded)\n",
 				    if_name(ifp),
-				    ip6_sprintf(&ip6->ip6_src),
-				    ip6_sprintf(&ip6->ip6_dst),
+				    ip6_sprintf(ip6bufs, &ip6->ip6_src),
+				    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 				    mb_copy->m_pkthdr.len);
+			}
 #endif /* MRT6DEBUG */
 			m_freem(mb_copy); /* simply discard the packet */
 		}
@@ -1607,10 +1616,7 @@
 }
 
 static int
-register_send(ip6, mif, m)
-	struct ip6_hdr *ip6;
-	struct mif6 *mif;
-	struct mbuf *m;
+register_send(struct ip6_hdr *ip6, struct mif6 *mif, struct mbuf *m)
 {
 	struct mbuf *mm;
 	int i, len = m->m_pkthdr.len;
@@ -1618,9 +1624,12 @@
 	struct mrt6msg *im6;
 
 #ifdef MRT6DEBUG
-	if (mrt6debug)
+	if (mrt6debug) {
+		char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 		log(LOG_DEBUG, "** IPv6 register_send **\n src %s dst %s\n",
-		    ip6_sprintf(&ip6->ip6_src), ip6_sprintf(&ip6->ip6_dst));
+		    ip6_sprintf(ip6bufs, &ip6->ip6_src),
+		    ip6_sprintf(ip6bufd, &ip6->ip6_dst));
+	}
 #endif
 	++pim6stat.pim6s_snd_registers;
 
@@ -1679,9 +1688,7 @@
  * is stripped off, and the inner packet is passed to register_mforward.
  */
 int
-pim6_input(mp, offp, proto)
-	struct mbuf **mp;
-	int *offp, proto;
+pim6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct pim *pim; /* pointer to a pim struct */
 	struct ip6_hdr *ip6;
@@ -1788,6 +1795,9 @@
 		struct ip6_hdr *eip6;
 		u_int32_t *reghdr;
 		int rc;
+#ifdef MRT6DEBUG
+		char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
+#endif
 
 		++pim6stat.pim6s_rcv_registers;
 
@@ -1817,7 +1827,7 @@
 			log(LOG_ERR,
 			    "pim6_input: register packet size too "
 			    "small %d from %s\n",
-			    pimlen, ip6_sprintf(&ip6->ip6_src));
+			    pimlen, ip6_sprintf(ip6bufs, &ip6->ip6_src));
 #endif
 			m_freem(m);
 			return (IPPROTO_DONE);
@@ -1829,8 +1839,8 @@
 			log(LOG_DEBUG,
 			    "pim6_input[register], eip6: %s -> %s, "
 			    "eip6 plen %d\n",
-			    ip6_sprintf(&eip6->ip6_src),
-			    ip6_sprintf(&eip6->ip6_dst),
+			    ip6_sprintf(ip6bufs, &eip6->ip6_src),
+			    ip6_sprintf(ip6bufd, &eip6->ip6_dst),
 			    ntohs(eip6->ip6_plen));
 #endif
 
@@ -1854,7 +1864,7 @@
 				log(LOG_DEBUG,
 				    "pim6_input: inner packet of register "
 				    "is not multicast %s\n",
-				    ip6_sprintf(&eip6->ip6_dst));
+				    ip6_sprintf(ip6bufd, &eip6->ip6_dst));
 #endif
 			m_freem(m);
 			return (IPPROTO_DONE);
@@ -1883,8 +1893,8 @@
 			log(LOG_DEBUG,
 			    "pim6_input: forwarding decapsulated register: "
 			    "src %s, dst %s, mif %d\n",
-			    ip6_sprintf(&eip6->ip6_src),
-			    ip6_sprintf(&eip6->ip6_dst),
+			    ip6_sprintf(ip6bufs, &eip6->ip6_src),
+			    ip6_sprintf(ip6bufd, &eip6->ip6_dst),
 			    reg_mif_num);
 		}
 #endif
Index: in6_var.h
===================================================================
RCS file: /home/cvs/src/sys/netinet6/in6_var.h,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet6/in6_var.h -L sys/netinet6/in6_var.h -u -r1.1.1.2 -r1.2
--- sys/netinet6/in6_var.h
+++ sys/netinet6/in6_var.h
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet6/in6_var.h,v 1.21.2.6 2005/12/25 14:03:37 suz Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/in6_var.h,v 1.31 2007/06/02 08:02:36 jinmei Exp $	*/
 /*	$KAME: in6_var.h,v 1.56 2001/03/29 05:34:31 itojun Exp $	*/
 
 /*-
@@ -115,6 +115,9 @@
 
 	/* back pointer to the ND prefix (for autoconfigured addresses only) */
 	struct nd_prefix *ia6_ndpr;
+
+	/* multicast addresses joined from the kernel */
+	LIST_HEAD(, in6_multi_mship) ia6_memberships;
 };
 
 /* control structure to manage address selection policy */
@@ -493,9 +496,7 @@
 /* struct in6_ifaddr *ia; */				\
 do {									\
 	struct ifaddr *ifa;						\
-	for (ifa = (ifp)->if_addrlist.tqh_first; ifa; ifa = ifa->ifa_list.tqe_next) {	\
-		if (!ifa->ifa_addr)					\
-			continue;					\
+	TAILQ_FOREACH(ifa, &(ifp)->if_addrlist, ifa_list) {		\
 		if (ifa->ifa_addr->sa_family == AF_INET6)		\
 			break;						\
 	}								\
@@ -577,14 +578,14 @@
 /* struct in6_multi *in6m; */						\
 do { \
 	if (((in6m) = (step).i_in6m) != NULL) \
-		(step).i_in6m = (step).i_in6m->in6m_entry.le_next; \
+		(step).i_in6m = LIST_NEXT((step).i_in6m, in6m_entry); \
 } while(0)
 
 #define IN6_FIRST_MULTI(step, in6m)		\
 /* struct in6_multistep step; */		\
 /* struct in6_multi *in6m */			\
 do { \
-	(step).i_in6m = in6_multihead.lh_first; \
+	(step).i_in6m = LIST_FIRST(&in6_multihead); \
 		IN6_NEXT_MULTI((step), (in6m)); \
 } while(0)
 
@@ -610,7 +611,7 @@
 void	in6_purgemkludge __P((struct ifnet *));
 struct in6_ifaddr *in6ifa_ifpforlinklocal __P((struct ifnet *, int));
 struct in6_ifaddr *in6ifa_ifpwithaddr __P((struct ifnet *, struct in6_addr *));
-char	*ip6_sprintf __P((const struct in6_addr *));
+char	*ip6_sprintf __P((char *, const struct in6_addr *));
 int	in6_addr2zoneid __P((struct ifnet *, struct in6_addr *, u_int32_t *));
 int	in6_matchlen __P((struct in6_addr *, struct in6_addr *));
 int	in6_are_prefix_equal __P((struct in6_addr *, struct in6_addr *, int));
Index: nd6.h
===================================================================
RCS file: /home/cvs/src/sys/netinet6/nd6.h,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet6/nd6.h -L sys/netinet6/nd6.h -u -r1.1.1.2 -r1.2
--- sys/netinet6/nd6.h
+++ sys/netinet6/nd6.h
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet6/nd6.h,v 1.19.2.2 2005/12/25 14:03:38 suz Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/nd6.h,v 1.21 2005/10/21 16:23:00 suz Exp $	*/
 /*	$KAME: nd6.h,v 1.76 2001/12/18 02:10:31 itojun Exp $	*/
 
 /*-
Index: ip6_output.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/ip6_output.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet6/ip6_output.c -L sys/netinet6/ip6_output.c -u -r1.1.1.2 -r1.2
--- sys/netinet6/ip6_output.c
+++ sys/netinet6/ip6_output.c
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet6/ip6_output.c,v 1.90.2.10 2006/02/14 21:38:46 rwatson Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/ip6_output.c,v 1.109 2007/07/05 16:29:40 delphij Exp $	*/
 /*	$KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $	*/
 
 /*-
@@ -61,7 +61,6 @@
  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
  */
 
-#include "opt_ip6fw.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
@@ -71,10 +70,10 @@
 #include <sys/mbuf.h>
 #include <sys/proc.h>
 #include <sys/errno.h>
+#include <sys/priv.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
-#include <sys/systm.h>
 #include <sys/kernel.h>
 
 #include <net/if.h>
@@ -93,22 +92,11 @@
 #include <netinet6/nd6.h>
 
 #ifdef IPSEC
-#include <netinet6/ipsec.h>
-#ifdef INET6
-#include <netinet6/ipsec6.h>
-#endif
-#include <netkey/key.h>
-#endif /* IPSEC */
-
-#ifdef FAST_IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #include <netipsec/key.h>
-#endif /* FAST_IPSEC */
-
-#include <netinet6/ip6_fw.h>
-
-#include <net/net_osdep.h>
+#include <netinet6/ip6_ipsec.h>
+#endif /* IPSEC */
 
 #include <netinet6/ip6protosw.h>
 #include <netinet6/scope6_var.h>
@@ -144,6 +132,42 @@
 
 
 /*
+ * Make an extension header from option data.  hp is the source, and
+ * mp is the destination.
+ */
+#define MAKE_EXTHDR(hp, mp)						\
+    do {								\
+	if (hp) {							\
+		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
+		error = ip6_copyexthdr((mp), (caddr_t)(hp),		\
+		    ((eh)->ip6e_len + 1) << 3);				\
+		if (error)						\
+			goto freehdrs;					\
+	}								\
+    } while (/*CONSTCOND*/ 0)
+
+/*
+ * Form a chain of extension headers.
+ * m is the extension header mbuf
+ * mp is the previous mbuf in the chain
+ * p is the next header
+ * i is the type of option.
+ */
+#define MAKE_CHAIN(m, mp, p, i)\
+    do {\
+	if (m) {\
+		if (!hdrsplit) \
+			panic("assumption failed: hdr not split"); \
+		*mtod((m), u_char *) = *(p);\
+		*(p) = (i);\
+		p = mtod((m), u_char *);\
+		(m)->m_next = (mp)->m_next;\
+		(mp)->m_next = (m);\
+		(mp) = (m);\
+	}\
+    } while (/*CONSTCOND*/ 0)
+
+/*
  * IP6 output. The packet in mbuf chain m contains a skeletal IP6
  * header (with pri, len, nxt, hlim, src, dst).
  * This function may modify ver and hlim only.
@@ -153,20 +177,18 @@
  * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
  * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
  * which is rt_rmx.rmx_mtu.
+ *
+ * ifpp - XXX: just for statistics
  */
 int
-ip6_output(m0, opt, ro, flags, im6o, ifpp, inp)
-	struct mbuf *m0;
-	struct ip6_pktopts *opt;
-	struct route_in6 *ro;
-	int flags;
-	struct ip6_moptions *im6o;
-	struct ifnet **ifpp;		/* XXX: just for statistics */
-	struct inpcb *inp;
+ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
+    struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
+    struct ifnet **ifpp, struct inpcb *inp)
 {
 	struct ip6_hdr *ip6, *mhip6;
 	struct ifnet *ifp, *origifp;
 	struct mbuf *m = m0;
+	struct mbuf *mprev = NULL;
 	int hlen, tlen, len, off;
 	struct route_in6 ip6route;
 	struct rtentry *rt = NULL;
@@ -183,24 +205,21 @@
 	struct route_in6 *ro_pmtu = NULL;
 	int hdrsplit = 0;
 	int needipsec = 0;
-#if defined(IPSEC) || defined(FAST_IPSEC)
+#ifdef IPSEC
+	struct ipsec_output_state state;
+	struct ip6_rthdr *rh = NULL;
 	int needipsectun = 0;
+	int segleft_org = 0;
 	struct secpolicy *sp = NULL;
-#endif /*IPSEC || FAST_IPSEC*/
+#endif /* IPSEC */
 
 	ip6 = mtod(m, struct ip6_hdr *);
-	finaldst = ip6->ip6_dst;
+	if (ip6 == NULL) {
+		printf ("ip6 is NULL");
+		goto bad;
+	}
 
-#define MAKE_EXTHDR(hp, mp)						\
-    do {								\
-	if (hp) {							\
-		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
-		error = ip6_copyexthdr((mp), (caddr_t)(hp),		\
-		    ((eh)->ip6e_len + 1) << 3);				\
-		if (error)						\
-			goto freehdrs;					\
-	}								\
-    } while (/*CONSTCOND*/ 0)
+	finaldst = ip6->ip6_dst;
 
 	bzero(&exthdrs, sizeof(exthdrs));
 
@@ -211,7 +230,7 @@
 		if (opt->ip6po_rthdr) {
 			/*
 			 * Destination options header(1st part)
-			 * This only makes sence with a routing header.
+			 * This only makes sense with a routing header.
 			 * See Section 9.2 of RFC 3542.
 			 * Disabling this part just for MIP6 convenience is
 			 * a bad idea.  We need to think carefully about a
@@ -227,104 +246,39 @@
 		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
 	}
 
+	/*
+	 * IPSec checking which handles several cases.
+	 * FAST IPSEC: We re-injected the packet.
+	 */
 #ifdef IPSEC
-	/* get a security policy for this packet */
-	if (inp == NULL)
-		sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
-	else
-		sp = ipsec6_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error);
-
-	if (sp == NULL) {
-		ipsec6stat.out_inval++;
-		goto freehdrs;
-	}
-
-	error = 0;
-
-	/* check policy */
-	switch (sp->policy) {
-	case IPSEC_POLICY_DISCARD:
-		/*
-		 * This packet is just discarded.
-		 */
-		ipsec6stat.out_polvio++;
+	switch(ip6_ipsec_output(&m, inp, &flags, &error, &ifp, &sp))
+	{
+	case 1:                 /* Bad packet */
 		goto freehdrs;
-
-	case IPSEC_POLICY_BYPASS:
-	case IPSEC_POLICY_NONE:
-		/* no need to do IPsec. */
-		needipsec = 0;
-		break;
-
-	case IPSEC_POLICY_IPSEC:
-		if (sp->req == NULL) {
-			/* acquire a policy */
-			error = key_spdacquire(sp);
-			goto freehdrs;
-		}
+	case -1:                /* Do IPSec */
 		needipsec = 1;
-		break;
-
-	case IPSEC_POLICY_ENTRUST:
+	case 0:                 /* No IPSec */
 	default:
-		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
-	}
-#endif /* IPSEC */
-#ifdef FAST_IPSEC
-	/* get a security policy for this packet */
-	if (inp == NULL)
-		sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
-	else
-		sp = ipsec_getpolicybysock(m, IPSEC_DIR_OUTBOUND, inp, &error);
-
-	if (sp == NULL) {
-		newipsecstat.ips_out_inval++;
-		goto freehdrs;
-	}
-
-	error = 0;
-
-	/* check policy */
-	switch (sp->policy) {
-	case IPSEC_POLICY_DISCARD:
-		/*
-		 * This packet is just discarded.
-		 */
-		newipsecstat.ips_out_polvio++;
-		goto freehdrs;
-
-	case IPSEC_POLICY_BYPASS:
-	case IPSEC_POLICY_NONE:
-		/* no need to do IPsec. */
-		needipsec = 0;
 		break;
-
-	case IPSEC_POLICY_IPSEC:
-		if (sp->req == NULL) {
-			/* acquire a policy */
-			error = key_spdacquire(sp);
-			goto freehdrs;
-		}
-		needipsec = 1;
-		break;
-
-	case IPSEC_POLICY_ENTRUST:
-	default:
-		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
 	}
-#endif /* FAST_IPSEC */
+#endif /* IPSEC */
 
 	/*
 	 * Calculate the total length of the extension header chain.
 	 * Keep the length of the unfragmentable part for fragmentation.
 	 */
 	optlen = 0;
-	if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len;
-	if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len;
-	if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len;
+	if (exthdrs.ip6e_hbh)
+		optlen += exthdrs.ip6e_hbh->m_len;
+	if (exthdrs.ip6e_dest1)
+		optlen += exthdrs.ip6e_dest1->m_len;
+	if (exthdrs.ip6e_rthdr)
+		optlen += exthdrs.ip6e_rthdr->m_len;
 	unfragpartlen = optlen + sizeof(struct ip6_hdr);
+
 	/* NOTE: we don't add AH/ESP length here. do that later. */
-	if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len;
+	if (exthdrs.ip6e_dest2)
+		optlen += exthdrs.ip6e_dest2->m_len;
 
 	/*
 	 * If we need IPsec, or there is at least one extension header,
@@ -374,106 +328,94 @@
 	 * during the header composing process, "m" points to IPv6 header.
 	 * "mprev" points to an extension header prior to esp.
 	 */
-	{
-		u_char *nexthdrp = &ip6->ip6_nxt;
-		struct mbuf *mprev = m;
+	u_char *nexthdrp = &ip6->ip6_nxt;
+	mprev = m;
 
-		/*
-		 * we treat dest2 specially.  this makes IPsec processing
-		 * much easier.  the goal here is to make mprev point the
-		 * mbuf prior to dest2.
-		 *
-		 * result: IPv6 dest2 payload
-		 * m and mprev will point to IPv6 header.
-		 */
-		if (exthdrs.ip6e_dest2) {
-			if (!hdrsplit)
-				panic("assumption failed: hdr not split");
-			exthdrs.ip6e_dest2->m_next = m->m_next;
-			m->m_next = exthdrs.ip6e_dest2;
-			*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
-			ip6->ip6_nxt = IPPROTO_DSTOPTS;
-		}
+	/*
+	 * we treat dest2 specially.  this makes IPsec processing
+	 * much easier.  the goal here is to make mprev point the
+	 * mbuf prior to dest2.
+	 *
+	 * result: IPv6 dest2 payload
+	 * m and mprev will point to IPv6 header.
+	 */
+	if (exthdrs.ip6e_dest2) {
+		if (!hdrsplit)
+			panic("assumption failed: hdr not split");
+		exthdrs.ip6e_dest2->m_next = m->m_next;
+		m->m_next = exthdrs.ip6e_dest2;
+		*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
+		ip6->ip6_nxt = IPPROTO_DSTOPTS;
+	}
 
-#define MAKE_CHAIN(m, mp, p, i)\
-    do {\
-	if (m) {\
-		if (!hdrsplit) \
-			panic("assumption failed: hdr not split"); \
-		*mtod((m), u_char *) = *(p);\
-		*(p) = (i);\
-		p = mtod((m), u_char *);\
-		(m)->m_next = (mp)->m_next;\
-		(mp)->m_next = (m);\
-		(mp) = (m);\
-	}\
-    } while (/*CONSTCOND*/ 0)
-		/*
-		 * result: IPv6 hbh dest1 rthdr dest2 payload
-		 * m will point to IPv6 header.  mprev will point to the
-		 * extension header prior to dest2 (rthdr in the above case).
-		 */
-		MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
-		MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
-		    IPPROTO_DSTOPTS);
-		MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
-		    IPPROTO_ROUTING);
-
-#if defined(IPSEC) || defined(FAST_IPSEC)
-		if (!needipsec)
-			goto skip_ipsec2;
-
-		/*
-		 * pointers after IPsec headers are not valid any more.
-		 * other pointers need a great care too.
-		 * (IPsec routines should not mangle mbufs prior to AH/ESP)
-		 */
-		exthdrs.ip6e_dest2 = NULL;
+	/*
+	 * result: IPv6 hbh dest1 rthdr dest2 payload
+	 * m will point to IPv6 header.  mprev will point to the
+	 * extension header prior to dest2 (rthdr in the above case).
+	 */
+	MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
+	MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
+		   IPPROTO_DSTOPTS);
+	MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
+		   IPPROTO_ROUTING);
 
-	    {
-		struct ip6_rthdr *rh = NULL;
-		int segleft_org = 0;
-		struct ipsec_output_state state;
+#ifdef IPSEC
+	if (!needipsec)
+		goto skip_ipsec2;
 
-		if (exthdrs.ip6e_rthdr) {
-			rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
-			segleft_org = rh->ip6r_segleft;
-			rh->ip6r_segleft = 0;
-		}
+	/*
+	 * pointers after IPsec headers are not valid any more.
+	 * other pointers need a great care too.
+	 * (IPsec routines should not mangle mbufs prior to AH/ESP)
+	 */
+	exthdrs.ip6e_dest2 = NULL;
 
-		bzero(&state, sizeof(state));
-		state.m = m;
-		error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
-		    &needipsectun);
-		m = state.m;
-		if (error) {
-			/* mbuf is already reclaimed in ipsec6_output_trans. */
-			m = NULL;
-			switch (error) {
-			case EHOSTUNREACH:
-			case ENETUNREACH:
-			case EMSGSIZE:
-			case ENOBUFS:
-			case ENOMEM:
-				break;
-			default:
-				printf("ip6_output (ipsec): error code %d\n", error);
-				/* FALLTHROUGH */
-			case ENOENT:
-				/* don't show these error codes to the user */
-				error = 0;
-				break;
-			}
-			goto bad;
-		}
-		if (exthdrs.ip6e_rthdr) {
-			/* ah6_output doesn't modify mbuf chain */
-			rh->ip6r_segleft = segleft_org;
+	if (exthdrs.ip6e_rthdr) {
+		rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
+		segleft_org = rh->ip6r_segleft;
+		rh->ip6r_segleft = 0;
+	}
+
+	bzero(&state, sizeof(state));
+	state.m = m;
+	error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
+				    &needipsectun);
+	m = state.m;
+	if (error) {
+		/* mbuf is already reclaimed in ipsec6_output_trans. */
+		m = NULL;
+		switch (error) {
+		case EHOSTUNREACH:
+		case ENETUNREACH:
+		case EMSGSIZE:
+		case ENOBUFS:
+		case ENOMEM:
+			break;
+		default:
+			printf("ip6_output (ipsec): error code %d\n", error);
+			/* FALLTHROUGH */
+		case ENOENT:
+			/* don't show these error codes to the user */
+			error = 0;
+			break;
 		}
-	    }
-skip_ipsec2:;
-#endif
+		goto bad;
+	} else if (!needipsectun) {
+		/*
+		 * In the FAST IPSec case we have already
+		 * re-injected the packet and it has been freed
+		 * by the ipsec_done() function.  So, just clean
+		 * up after ourselves.
+		 */
+		m = NULL;
+		goto done;
+	}
+	if (exthdrs.ip6e_rthdr) {
+		/* ah6_output doesn't modify mbuf chain */
+		rh->ip6r_segleft = segleft_org;
 	}
+skip_ipsec2:;
+#endif /* IPSEC */
 
 	/*
 	 * If there is a routing header, replace the destination address field
@@ -551,7 +493,7 @@
 	dst = (struct sockaddr_in6 *)&ro->ro_dst;
 
 again:
- 	/*
+	/*
 	 * if specified, try to fill in the traffic class field.
 	 * do not override if a non-zero value is already set.
 	 * we check the diffserv field and the ecn field separately.
@@ -577,7 +519,10 @@
 			ip6->ip6_hlim = ip6_defmcasthlim;
 	}
 
-#if defined(IPSEC) || defined(FAST_IPSEC)
+#ifdef IPSEC
+	/*
+	 * We may re-inject packets into the stack here.
+	 */
 	if (needipsec && needipsectun) {
 		struct ipsec_output_state state;
 
@@ -622,6 +567,15 @@
 				break;
 			}
 			goto bad;
+		} else {
+			/*
+			 * In the FAST IPSec case we have already
+			 * re-injected the packet and it has been freed
+			 * by the ipsec_done() function.  So, just clean
+			 * up after ourselves.
+			 */
+			m = NULL;
+			goto done;
 		}
 
 		exthdrs.ip6e_ip6 = m;
@@ -839,23 +793,6 @@
 	in6_clearscope(&ip6->ip6_dst);
 
 	/*
-	 * Check with the firewall...
-	 */
-	if (ip6_fw_enable && ip6_fw_chk_ptr) {
-		u_short port = 0;
-		m->m_pkthdr.rcvif = NULL;	/* XXX */
-		/* If ipfw says divert, we have to just drop packet */
-		if ((*ip6_fw_chk_ptr)(&ip6, ifp, &port, &m)) {
-			m_freem(m);
-			goto done;
-		}
-		if (!m) {
-			error = EACCES;
-			goto done;
-		}
-	}
-
-	/*
 	 * If the outgoing packet contains a hop-by-hop options header,
 	 * it must be examined and processed even by the source node.
 	 * (RFC 2460, section 4.)
@@ -889,7 +826,7 @@
 	}
 
 	/* Jump over all PFIL processing if hooks are not active. */
-	if (inet6_pfil_hook.ph_busy_count == -1)
+	if (!PFIL_HOOKED(&inet6_pfil_hook))
 		goto passout;
 
 	odst = ip6->ip6_dst;
@@ -987,10 +924,6 @@
 			ia6->ia_ifa.if_opackets++;
 			ia6->ia_ifa.if_obytes += m->m_pkthdr.len;
 		}
-#ifdef IPSEC
-		/* clean ipsec history once it goes out of the node */
-		ipsec_delaux(m);
-#endif
 		error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
 		goto done;
 	}
@@ -1013,10 +946,7 @@
 		struct ip6_frag *ip6f;
 		u_int32_t id = htonl(ip6_randomid());
 		u_char nextproto;
-#if 0
-		struct ip6ctlparam ip6cp;
-		u_int32_t mtu32;
-#endif
+
 		int qslots = ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len;
 
 		/*
@@ -1028,25 +958,6 @@
 		if (mtu > IPV6_MAXPACKET)
 			mtu = IPV6_MAXPACKET;
 
-#if 0
-		/*
-		 * It is believed this code is a leftover from the
-		 * development of the IPV6_RECVPATHMTU sockopt and 
-		 * associated work to implement RFC3542.
-		 * It's not entirely clear what the intent of the API
-		 * is at this point, so disable this code for now.
-		 * The IPV6_RECVPATHMTU sockopt and/or IPV6_DONTFRAG
-		 * will send notifications if the application requests.
-		 */
-
-		/* Notify a proper path MTU to applications. */
-		mtu32 = (u_int32_t)mtu;
-		bzero(&ip6cp, sizeof(ip6cp));
-		ip6cp.ip6c_cmdarg = (void *)&mtu32;
-		pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
-		    (void *)&ip6cp);
-#endif
-
 		len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
 		if (len < 8) {
 			error = EMSGSIZE;
@@ -1147,15 +1058,11 @@
 		m0 = m->m_nextpkt;
 		m->m_nextpkt = 0;
 		if (error == 0) {
- 			/* Record statistics for this interface address. */
- 			if (ia) {
- 				ia->ia_ifa.if_opackets++;
- 				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
- 			}
-#ifdef IPSEC
-			/* clean ipsec history once it goes out of the node */
-			ipsec_delaux(m);
-#endif
+			/* Record statistics for this interface address. */
+			if (ia) {
+				ia->ia_ifa.if_opackets++;
+				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
+			}
 			error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
 		} else
 			m_freem(m);
@@ -1171,15 +1078,6 @@
 		RTFREE(ro_pmtu->ro_rt);
 	}
 
-#ifdef IPSEC
-	if (sp != NULL)
-		key_freesp(sp);
-#endif /* IPSEC */
-#ifdef FAST_IPSEC
-	if (sp != NULL)
-		KEY_FREESP(&sp);
-#endif /* FAST_IPSEC */
-
 	return (error);
 
 freehdrs:
@@ -1189,15 +1087,13 @@
 	m_freem(exthdrs.ip6e_dest2);
 	/* FALLTHROUGH */
 bad:
-	m_freem(m);
+	if (m)
+		m_freem(m);
 	goto done;
 }
 
 static int
-ip6_copyexthdr(mp, hdr, hlen)
-	struct mbuf **mp;
-	caddr_t hdr;
-	int hlen;
+ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen)
 {
 	struct mbuf *m;
 
@@ -1227,9 +1123,7 @@
  * Insert jumbo payload option.
  */
 static int
-ip6_insert_jumboopt(exthdrs, plen)
-	struct ip6_exthdrs *exthdrs;
-	u_int32_t plen;
+ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
 {
 	struct mbuf *mopt;
 	u_char *optbuf;
@@ -1324,10 +1218,8 @@
  * Insert fragment header and copy unfragmentable header portions.
  */
 static int
-ip6_insertfraghdr(m0, m, hlen, frghdrp)
-	struct mbuf *m0, *m;
-	int hlen;
-	struct ip6_frag **frghdrp;
+ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
+    struct ip6_frag **frghdrp)
 {
 	struct mbuf *n, *mlast;
 
@@ -1367,12 +1259,9 @@
 }
 
 static int
-ip6_getpmtu(ro_pmtu, ro, ifp, dst, mtup, alwaysfragp)
-	struct route_in6 *ro_pmtu, *ro;
-	struct ifnet *ifp;
-	struct in6_addr *dst;
-	u_long *mtup;
-	int *alwaysfragp;
+ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
+    struct ifnet *ifp, struct in6_addr *dst, u_long *mtup,
+    int *alwaysfragp)
 {
 	u_int32_t mtu = 0;
 	int alwaysfrag = 0;
@@ -1453,9 +1342,7 @@
  * IP6 socket option processing.
  */
 int
-ip6_ctloutput(so, sopt)
-	struct socket *so;
-	struct sockopt *sopt;
+ip6_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	int privileged, optdatalen, uproto;
 	void *optdata;
@@ -1869,7 +1756,7 @@
 				}
 				break;
 
-#if defined(IPSEC) || defined(FAST_IPSEC)
+#ifdef IPSEC
 			case IPV6_IPSEC_POLICY:
 			    {
 				caddr_t req = NULL;
@@ -1889,28 +1776,7 @@
 				m_freem(m);
 			    }
 				break;
-#endif /* KAME IPSEC */
-
-			case IPV6_FW_ADD:
-			case IPV6_FW_DEL:
-			case IPV6_FW_FLUSH:
-			case IPV6_FW_ZERO:
-			    {
-				struct mbuf *m;
-				struct mbuf **mp = &m;
-
-				if (ip6_fw_ctl_ptr == NULL)
-					return EINVAL;
-				/* XXX */
-				if ((error = soopt_getm(sopt, &m)) != 0)
-					break;
-				/* XXX */
-				if ((error = soopt_mcopyin(sopt, m)) != 0)
-					break;
-				error = (*ip6_fw_ctl_ptr)(optname, mp);
-				m = *mp;
-			    }
-				break;
+#endif /* IPSEC */
 
 			default:
 				error = ENOPROTOOPT;
@@ -2107,7 +1973,7 @@
 			    }
 				break;
 
-#if defined(IPSEC) || defined(FAST_IPSEC)
+#ifdef IPSEC
 			case IPV6_IPSEC_POLICY:
 			  {
 				caddr_t req = NULL;
@@ -2136,24 +2002,7 @@
 					m_freem(m);
 				break;
 			  }
-#endif /* KAME IPSEC */
-
-			case IPV6_FW_GET:
-			  {
-				struct mbuf *m;
-				struct mbuf **mp = &m;
-
-				if (ip6_fw_ctl_ptr == NULL)
-			        {
-					return EINVAL;
-				}
-				error = (*ip6_fw_ctl_ptr)(optname, mp);
-				if (error == 0)
-					error = soopt_mcopyout(sopt, m); /* XXX */
-				if (error == 0 && m)
-					m_freem(m);
-			  }
-				break;
+#endif /* IPSEC */
 
 			default:
 				error = ENOPROTOOPT;
@@ -2168,9 +2017,7 @@
 }
 
 int
-ip6_raw_ctloutput(so, sopt)
-	struct socket *so;
-	struct sockopt *sopt;
+ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	int error = 0, optval, optlen;
 	const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
@@ -2248,11 +2095,8 @@
  * specifying behavior of outgoing packets.
  */
 static int
-ip6_pcbopts(pktopt, m, so, sopt)
-	struct ip6_pktopts **pktopt;
-	struct mbuf *m;
-	struct socket *so;
-	struct sockopt *sopt;
+ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m,
+    struct socket *so, struct sockopt *sopt)
 {
 	struct ip6_pktopts *opt = *pktopt;
 	int error = 0;
@@ -2299,8 +2143,7 @@
  * the struct.
  */
 void
-ip6_initpktopts(opt)
-	struct ip6_pktopts *opt;
+ip6_initpktopts(struct ip6_pktopts *opt)
 {
 
 	bzero(opt, sizeof(*opt));
@@ -2311,11 +2154,8 @@
 }
 
 static int
-ip6_pcbopt(optname, buf, len, pktopt, priv, uproto)
-	int optname, len, priv;
-	u_char *buf;
-	struct ip6_pktopts **pktopt;
-	int uproto;
+ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
+    int priv, int uproto)
 {
 	struct ip6_pktopts *opt;
 
@@ -2330,10 +2170,7 @@
 }
 
 static int
-ip6_getpcbopt(pktopt, optname, sopt)
-	struct ip6_pktopts *pktopt;
-	struct sockopt *sopt;
-	int optname;
+ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
 {
 	void *optdata = NULL;
 	int optdatalen = 0;
@@ -2431,9 +2268,7 @@
 }
 
 void
-ip6_clearpktopts(pktopt, optname)
-	struct ip6_pktopts *pktopt;
-	int optname;
+ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
 {
 	if (pktopt == NULL)
 		return;
@@ -2494,9 +2329,7 @@
 } while (/*CONSTCOND*/ 0)
 
 static int
-copypktopts(dst, src, canwait)
-	struct ip6_pktopts *dst, *src;
-	int canwait;
+copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
 {
 	if (dst == NULL || src == NULL)  {
 		printf("ip6_clearpktopts: invalid argument\n");
@@ -2509,7 +2342,7 @@
 	if (src->ip6po_pktinfo) {
 		dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
 		    M_IP6OPT, canwait);
-		if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT)
+		if (dst->ip6po_pktinfo == NULL)
 			goto bad;
 		*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
 	}
@@ -2539,15 +2372,13 @@
 #undef PKTOPT_EXTHDRCPY
 
 struct ip6_pktopts *
-ip6_copypktopts(src, canwait)
-	struct ip6_pktopts *src;
-	int canwait;
+ip6_copypktopts(struct ip6_pktopts *src, int canwait)
 {
 	int error;
 	struct ip6_pktopts *dst;
 
 	dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
-	if (dst == NULL && canwait == M_NOWAIT)
+	if (dst == NULL)
 		return (NULL);
 	ip6_initpktopts(dst);
 
@@ -2560,8 +2391,7 @@
 }
 
 void
-ip6_freepcbopts(pktopt)
-	struct ip6_pktopts *pktopt;
+ip6_freepcbopts(struct ip6_pktopts *pktopt)
 {
 	if (pktopt == NULL)
 		return;
@@ -2575,10 +2405,7 @@
  * Set the IP6 multicast options in response to user setsockopt().
  */
 static int
-ip6_setmoptions(optname, im6op, m)
-	int optname;
-	struct ip6_moptions **im6op;
-	struct mbuf *m;
+ip6_setmoptions(int optname, struct ip6_moptions **im6op, struct mbuf *m)
 {
 	int error = 0;
 	u_int loop, ifindex;
@@ -2880,10 +2707,7 @@
  * Return the IP6 multicast options in response to user getsockopt().
  */
 static int
-ip6_getmoptions(optname, im6o, mp)
-	int optname;
-	struct ip6_moptions *im6o;
-	struct mbuf **mp;
+ip6_getmoptions(int optname, struct ip6_moptions *im6o, struct mbuf **mp)
 {
 	u_int *hlim, *loop, *ifindex;
 
@@ -2927,8 +2751,7 @@
  * Discard the IP6 multicast options.
  */
 void
-ip6_freemoptions(im6o)
-	struct ip6_moptions *im6o;
+ip6_freemoptions(struct ip6_moptions *im6o)
 {
 	struct in6_multi_mship *imm;
 
@@ -2948,10 +2771,8 @@
  * Set IPv6 outgoing packet options based on advanced API.
  */
 int
-ip6_setpktopts(control, opt, stickyopt, priv, uproto)
-	struct mbuf *control;
-	struct ip6_pktopts *opt, *stickyopt;
-	int priv, uproto;
+ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
+    struct ip6_pktopts *stickyopt, int priv, int uproto)
 {
 	struct cmsghdr *cm = 0;
 
@@ -3014,10 +2835,8 @@
  * "sticky=1, cmsg=1": RFC2292 socket option
  */
 static int
-ip6_setpktopt(optname, buf, len, opt, priv, sticky, cmsg, uproto)
-	int optname, len, priv, sticky, cmsg, uproto;
-	u_char *buf;
-	struct ip6_pktopts *opt;
+ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
+    int priv, int sticky, int cmsg, int uproto)
 {
 	int minmtupolicy, preftemp;
 
@@ -3400,10 +3219,7 @@
  * pointer that might NOT be &loif -- easier than replicating that code here.
  */
 void
-ip6_mloopback(ifp, m, dst)
-	struct ifnet *ifp;
-	struct mbuf *m;
-	struct sockaddr_in6 *dst;
+ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst)
 {
 	struct mbuf *copym;
 	struct ip6_hdr *ip6;
@@ -3446,9 +3262,7 @@
  * Chop IPv6 header off from the payload.
  */
 static int
-ip6_splithdr(m, exthdrs)
-	struct mbuf *m;
-	struct ip6_exthdrs *exthdrs;
+ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
 {
 	struct mbuf *mh;
 	struct ip6_hdr *ip6;
@@ -3477,8 +3291,7 @@
  * Compute IPv6 extension header length.
  */
 int
-ip6_optlen(in6p)
-	struct in6pcb *in6p;
+ip6_optlen(struct in6pcb *in6p)
 {
 	int len;
 
Index: in6_ifattach.h
===================================================================
RCS file: /home/cvs/src/sys/netinet6/in6_ifattach.h,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet6/in6_ifattach.h -L sys/netinet6/in6_ifattach.h -u -r1.1.1.2 -r1.2
--- sys/netinet6/in6_ifattach.h
+++ sys/netinet6/in6_ifattach.h
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet6/in6_ifattach.h,v 1.5.2.2 2005/12/25 14:03:37 suz Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/in6_ifattach.h,v 1.7 2005/10/21 16:23:00 suz Exp $	*/
 /*	$KAME: in6_ifattach.h,v 1.14 2001/02/08 12:48:39 jinmei Exp $	*/
 
 /*-
Index: pim6_var.h
===================================================================
RCS file: /home/cvs/src/sys/netinet6/pim6_var.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet6/pim6_var.h -L sys/netinet6/pim6_var.h -u -r1.1.1.1 -r1.2
--- sys/netinet6/pim6_var.h
+++ sys/netinet6/pim6_var.h
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet6/pim6_var.h,v 1.4 2005/01/07 02:30:35 imp Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/pim6_var.h,v 1.5 2005/08/10 07:10:02 obrien Exp $	*/
 /*	$KAME: pim6_var.h,v 1.8 2000/06/06 08:07:43 jinmei Exp $	*/
 
 /*-
@@ -52,8 +52,6 @@
 };
 
 #if (defined(KERNEL)) || (defined(_KERNEL))
-extern struct pim6stat pim6stat;
-
 int pim6_input __P((struct mbuf **, int*, int));
 #endif /* KERNEL */
 
Index: udp6_usrreq.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/udp6_usrreq.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet6/udp6_usrreq.c -L sys/netinet6/udp6_usrreq.c -u -r1.1.1.2 -r1.2
--- sys/netinet6/udp6_usrreq.c
+++ sys/netinet6/udp6_usrreq.c
@@ -1,5 +1,5 @@
-/*	$FreeBSD: src/sys/netinet6/udp6_usrreq.c,v 1.54.2.2 2006/02/09 02:29:06 ume Exp $	*/
 /*	$KAME: udp6_usrreq.c,v 1.27 2001/05/21 05:45:10 jinmei Exp $	*/
+/*	$KAME: udp6_output.c,v 1.31 2001/05/21 16:39:15 jinmei Exp $	*/
 
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -31,8 +31,9 @@
  */
 
 /*-
- * Copyright (c) 1982, 1986, 1989, 1993
- *	The Regents of the University of California.  All rights reserved.
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
+ *	The Regents of the University of California.
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -58,24 +59,25 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	@(#)udp_var.h	8.1 (Berkeley) 6/10/93
+ *	@(#)udp_usrreq.c	8.6 (Berkeley) 5/23/95
+ * $FreeBSD: src/sys/netinet6/udp6_usrreq.c,v 1.81 2007/09/08 08:18:24 rwatson Exp $
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
+#include "opt_mac.h"
 
 #include <sys/param.h>
-#include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
-#include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
@@ -90,7 +92,9 @@
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
+#include <netinet/ip_icmp.h>
 #include <netinet/ip6.h>
+#include <netinet/icmp_var.h>
 #include <netinet/icmp6.h>
 #include <netinet/ip_var.h>
 #include <netinet/udp.h>
@@ -102,44 +106,79 @@
 #include <netinet6/scope6_var.h>
 
 #ifdef IPSEC
-#include <netinet6/ipsec.h>
-#include <netinet6/ipsec6.h>
-#endif /* IPSEC */
-
-#ifdef FAST_IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
-#endif /* FAST_IPSEC */
+#endif /* IPSEC */
+
+#include <security/mac/mac_framework.h>
 
 /*
- * UDP protocol inplementation.
+ * UDP protocol implementation.
  * Per RFC 768, August, 1980.
  */
 
-extern	struct protosw inetsw[];
-static	int udp6_detach __P((struct socket *so));
+extern struct protosw	inetsw[];
+static void		udp6_detach(struct socket *so);
+
+static void
+udp6_append(struct inpcb *inp, struct mbuf *n, int off,
+    struct sockaddr_in6 *fromsa)
+{
+	struct socket *so;
+	struct mbuf *opts;
+
+	INP_LOCK_ASSERT(inp);
+
+#ifdef IPSEC
+	/* Check AH/ESP integrity. */
+	if (ipsec6_in_reject(n, inp)) {
+		m_freem(n);
+		ipsec6stat.in_polvio++;
+		return;
+	}
+#endif /* IPSEC */
+#ifdef MAC
+	if (mac_check_inpcb_deliver(inp, n) != 0) {
+		m_freem(n);
+		return;
+	}
+#endif
+	opts = NULL;
+	if (inp->in6p_flags & IN6P_CONTROLOPTS ||
+	    inp->inp_socket->so_options & SO_TIMESTAMP)
+		ip6_savecontrol(inp, n, &opts);
+	m_adj(n, off + sizeof(struct udphdr));
+
+	so = inp->inp_socket;
+	SOCKBUF_LOCK(&so->so_rcv);
+	if (sbappendaddr_locked(&so->so_rcv, (struct sockaddr *)fromsa, n,
+	    opts) == 0) {
+		SOCKBUF_UNLOCK(&so->so_rcv);
+		m_freem(n);
+		if (opts)
+			m_freem(opts);
+		udpstat.udps_fullsock++;
+	} else
+		sorwakeup_locked(so);
+}
 
 int
-udp6_input(mp, offp, proto)
-	struct mbuf **mp;
-	int *offp, proto;
-{
-	struct mbuf *m = *mp, *opts;
-	register struct ip6_hdr *ip6;
-	register struct udphdr *uh;
-	register struct inpcb *in6p;
+udp6_input(struct mbuf **mp, int *offp, int proto)
+{
+	struct mbuf *m = *mp;
+	struct ip6_hdr *ip6;
+	struct udphdr *uh;
+	struct inpcb *inp;
 	int off = *offp;
 	int plen, ulen;
 	struct sockaddr_in6 fromsa;
 
-	opts = NULL;
-
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) {
 		/* XXX send icmp6 host/port unreach? */
 		m_freem(m);
-		return IPPROTO_DONE;
+		return (IPPROTO_DONE);
 	}
 
 #ifndef PULLDOWN_TEST
@@ -149,17 +188,23 @@
 #else
 	IP6_EXTHDR_GET(uh, struct udphdr *, m, off, sizeof(*uh));
 	if (!uh)
-		return IPPROTO_DONE;
+		return (IPPROTO_DONE);
 #endif
 
 	udpstat.udps_ipackets++;
 
+	/*
+	 * Destination port of 0 is illegal, based on RFC768.
+	 */
+	if (uh->uh_dport == 0)
+		goto badunlocked;
+
 	plen = ntohs(ip6->ip6_plen) - off + sizeof(*ip6);
 	ulen = ntohs((u_short)uh->uh_ulen);
 
 	if (plen != ulen) {
 		udpstat.udps_badlen++;
-		goto bad;
+		goto badunlocked;
 	}
 
 	/*
@@ -167,229 +212,151 @@
 	 */
 	if (uh->uh_sum == 0) {
 		udpstat.udps_nosum++;
-		goto bad;
+		goto badunlocked;
 	}
 	if (in6_cksum(m, IPPROTO_UDP, off, ulen) != 0) {
 		udpstat.udps_badsum++;
-		goto bad;
+		goto badunlocked;
 	}
 
-	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
-		struct	inpcb *last;
+	/*
+	 * Construct sockaddr format source address.
+	 */
+	init_sin6(&fromsa, m);
+	fromsa.sin6_port = uh->uh_sport;
 
-		/*
-		 * Deliver a multicast datagram to all sockets
-		 * for which the local and remote addresses and ports match
-		 * those of the incoming datagram.  This allows more than
-		 * one process to receive multicasts on the same port.
-		 * (This really ought to be done for unicast datagrams as
-		 * well, but that would cause problems with existing
-		 * applications that open both address-specific sockets and
-		 * a wildcard socket listening to the same port -- they would
-		 * end up receiving duplicates of every unicast datagram.
-		 * Those applications open the multiple sockets to overcome an
-		 * inadequacy of the UDP socket interface, but for backwards
-		 * compatibility we avoid the problem here rather than
-		 * fixing the interface.  Maybe 4.5BSD will remedy this?)
-		 */
+	INP_INFO_RLOCK(&udbinfo);
+	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
+		struct inpcb *last;
 
 		/*
-		 * In a case that laddr should be set to the link-local
+		 * In the event that laddr should be set to the link-local
 		 * address (this happens in RIPng), the multicast address
-		 * specified in the received packet does not match with
-		 * laddr. To cure this situation, the matching is relaxed
-		 * if the receiving interface is the same as one specified
-		 * in the socket and if the destination multicast address
-		 * matches one of the multicast groups specified in the socket.
-		 */
-
-		/*
-		 * Construct sockaddr format source address.
-		 */
-		init_sin6(&fromsa, m);
-		fromsa.sin6_port = uh->uh_sport;
-		/*
-		 * KAME note: traditionally we dropped udpiphdr from mbuf here.
-		 * We need udphdr for IPsec processing so we do that later.
+		 * specified in the received packet will not match laddr.  To
+		 * handle this situation, matching is relaxed if the
+		 * receiving interface is the same as one specified in the
+		 * socket and if the destination multicast address matches
+		 * one of the multicast groups specified in the socket.
 		 */
 
 		/*
-		 * Locate pcb(s) for datagram.
-		 * (Algorithm copied from raw_intr().)
+		 * KAME note: traditionally we dropped udpiphdr from mbuf
+		 * here.  We need udphdr for IPsec processing so we do that
+		 * later.
 		 */
 		last = NULL;
-		LIST_FOREACH(in6p, &udb, inp_list) {
-			if ((in6p->inp_vflag & INP_IPV6) == 0)
+		LIST_FOREACH(inp, &udb, inp_list) {
+			if ((inp->inp_vflag & INP_IPV6) == 0)
+				continue;
+			if (inp->in6p_lport != uh->uh_dport)
 				continue;
-			if (in6p->in6p_lport != uh->uh_dport)
+			/*
+			 * XXX: Do not check source port of incoming datagram
+			 * unless inp_connect() has been called to bind the
+			 * fport part of the 4-tuple; the source could be
+			 * trying to talk to us with an ephemeral port.
+			 */
+			if (inp->inp_fport != 0 &&
+			    inp->inp_fport != uh->uh_sport)
 				continue;
-			if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
-				if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr,
+			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
+				if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
 							&ip6->ip6_dst))
 					continue;
 			}
-			if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) {
-				if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr,
+			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
+				if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr,
 							&ip6->ip6_src) ||
-				    in6p->in6p_fport != uh->uh_sport)
+				    inp->in6p_fport != uh->uh_sport)
 					continue;
 			}
 
 			if (last != NULL) {
 				struct mbuf *n;
 
-#if defined(IPSEC) || defined(FAST_IPSEC)
-				/*
-				 * Check AH/ESP integrity.
-				 */
-				if (ipsec6_in_reject(m, last)) {
-#ifdef IPSEC
-					ipsec6stat.in_polvio++;
-#endif /* IPSEC */
-					/* do not inject data into pcb */
-				} else
-#endif /*IPSEC || FAST_IPSEC*/
 				if ((n = m_copy(m, 0, M_COPYALL)) != NULL) {
-					/*
-					 * KAME NOTE: do not
-					 * m_copy(m, offset, ...) above.
-					 * sbappendaddr() expects M_PKTHDR,
-					 * and m_copy() will copy M_PKTHDR
-					 * only if offset is 0.
-					 */
-					if (last->in6p_flags & IN6P_CONTROLOPTS
-					    || last->in6p_socket->so_options & SO_TIMESTAMP)
-						ip6_savecontrol(last, n, &opts);
-
-					m_adj(n, off + sizeof(struct udphdr));
-					if (sbappendaddr(&last->in6p_socket->so_rcv,
-							(struct sockaddr *)&fromsa,
-							n, opts) == 0) {
-						m_freem(n);
-						if (opts)
-							m_freem(opts);
-						udpstat.udps_fullsock++;
-					} else
-						sorwakeup(last->in6p_socket);
-					opts = NULL;
+					INP_LOCK(last);
+					udp6_append(last, n, off, &fromsa);
+					INP_UNLOCK(last);
 				}
 			}
-			last = in6p;
+			last = inp;
 			/*
 			 * Don't look for additional matches if this one does
 			 * not have either the SO_REUSEPORT or SO_REUSEADDR
-			 * socket options set.  This heuristic avoids searching
-			 * through all pcbs in the common case of a non-shared
-			 * port.  It assumes that an application will never
-			 * clear these options after setting them.
+			 * socket options set.  This heuristic avoids
+			 * searching through all pcbs in the common case of a
+			 * non-shared port.  It assumes that an application
+			 * will never clear these options after setting them.
 			 */
-			if ((last->in6p_socket->so_options &
+			if ((last->inp_socket->so_options &
 			     (SO_REUSEPORT|SO_REUSEADDR)) == 0)
 				break;
 		}
 
 		if (last == NULL) {
 			/*
-			 * No matching pcb found; discard datagram.
-			 * (No need to send an ICMP Port Unreachable
-			 * for a broadcast or multicast datgram.)
+			 * No matching pcb found; discard datagram.  (No need
+			 * to send an ICMP Port Unreachable for a broadcast
+			 * or multicast datgram.)
 			 */
 			udpstat.udps_noport++;
 			udpstat.udps_noportmcast++;
-			goto bad;
-		}
-#if defined(IPSEC) || defined(FAST_IPSEC)
-		/*
-		 * Check AH/ESP integrity.
-		 */
-		if (ipsec6_in_reject(m, last)) {
-#ifdef IPSEC
-			ipsec6stat.in_polvio++;
-#endif /* IPSEC */
-			goto bad;
-		}
-#endif /*IPSEC || FAST_IPSEC*/
-		if (last->in6p_flags & IN6P_CONTROLOPTS
-		    || last->in6p_socket->so_options & SO_TIMESTAMP)
-			ip6_savecontrol(last, m, &opts);
-
-		m_adj(m, off + sizeof(struct udphdr));
-		if (sbappendaddr(&last->in6p_socket->so_rcv,
-				(struct sockaddr *)&fromsa,
-				m, opts) == 0) {
-			udpstat.udps_fullsock++;
-			goto bad;
+			goto badheadlocked;
 		}
-		sorwakeup(last->in6p_socket);
-		return IPPROTO_DONE;
+		INP_LOCK(last);
+		udp6_append(last, m, off, &fromsa);
+		INP_UNLOCK(last);
+		INP_INFO_RUNLOCK(&udbinfo);
+		return (IPPROTO_DONE);
 	}
 	/*
 	 * Locate pcb for datagram.
 	 */
-	in6p = in6_pcblookup_hash(&udbinfo, &ip6->ip6_src, uh->uh_sport,
-				  &ip6->ip6_dst, uh->uh_dport, 1,
-				  m->m_pkthdr.rcvif);
-	if (in6p == 0) {
-		if (log_in_vain) {
-			char buf[INET6_ADDRSTRLEN];
+	inp = in6_pcblookup_hash(&udbinfo, &ip6->ip6_src, uh->uh_sport,
+	    &ip6->ip6_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif);
+	if (inp == NULL) {
+		if (udp_log_in_vain) {
+			char ip6bufs[INET6_ADDRSTRLEN];
+			char ip6bufd[INET6_ADDRSTRLEN];
 
-			strcpy(buf, ip6_sprintf(&ip6->ip6_dst));
 			log(LOG_INFO,
 			    "Connection attempt to UDP [%s]:%d from [%s]:%d\n",
-			    buf, ntohs(uh->uh_dport),
-			    ip6_sprintf(&ip6->ip6_src), ntohs(uh->uh_sport));
+			    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
+			    ntohs(uh->uh_dport),
+			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
+			    ntohs(uh->uh_sport));
 		}
 		udpstat.udps_noport++;
 		if (m->m_flags & M_MCAST) {
 			printf("UDP6: M_MCAST is set in a unicast packet.\n");
 			udpstat.udps_noportmcast++;
-			goto bad;
+			goto badheadlocked;
 		}
+		INP_INFO_RUNLOCK(&udbinfo);
+		if (udp_blackhole)
+			goto badunlocked;
+		if (badport_bandlim(BANDLIM_ICMP6_UNREACH) < 0)
+			goto badunlocked;
 		icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0);
-		return IPPROTO_DONE;
-	}
-#if defined(IPSEC) || defined(FAST_IPSEC)
-	/*
-	 * Check AH/ESP integrity.
-	 */
-	if (ipsec6_in_reject(m, in6p)) {
-#ifdef IPSEC
-		ipsec6stat.in_polvio++;
-#endif /* IPSEC */
-		goto bad;
+		return (IPPROTO_DONE);
 	}
-#endif /*IPSEC || FAST_IPSEC*/
+	INP_LOCK(inp);
+	udp6_append(inp, m, off, &fromsa);
+	INP_UNLOCK(inp);
+	INP_INFO_RUNLOCK(&udbinfo);
+	return (IPPROTO_DONE);
 
-	/*
-	 * Construct sockaddr format source address.
-	 * Stuff source address and datagram in user buffer.
-	 */
-	init_sin6(&fromsa, m);
-	fromsa.sin6_port = uh->uh_sport;
-	if (in6p->in6p_flags & IN6P_CONTROLOPTS
-	    || in6p->in6p_socket->so_options & SO_TIMESTAMP)
-		ip6_savecontrol(in6p, m, &opts);
-	m_adj(m, off + sizeof(struct udphdr));
-	if (sbappendaddr(&in6p->in6p_socket->so_rcv,
-			(struct sockaddr *)&fromsa, m, opts) == 0) {
-		udpstat.udps_fullsock++;
-		goto bad;
-	}
-	sorwakeup(in6p->in6p_socket);
-	return IPPROTO_DONE;
-bad:
+badheadlocked:
+	INP_INFO_RUNLOCK(&udbinfo);
+badunlocked:
 	if (m)
 		m_freem(m);
-	if (opts)
-		m_freem(opts);
-	return IPPROTO_DONE;
+	return (IPPROTO_DONE);
 }
 
 void
-udp6_ctlinput(cmd, sa, d)
-	int cmd;
-	struct sockaddr *sa;
-	void *d;
+udp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
 {
 	struct udphdr uh;
 	struct ip6_hdr *ip6;
@@ -438,7 +405,7 @@
 		 * M and OFF are valid.
 		 */
 
-		/* check if we can safely examine src and dst ports */
+		/* Check if we can safely examine src and dst ports. */
 		if (m->m_pkthdr.len < off + sizeof(*uhp))
 			return;
 
@@ -446,12 +413,11 @@
 		m_copydata(m, off, sizeof(*uhp), (caddr_t)&uh);
 
 		(void) in6_pcbnotify(&udbinfo, sa, uh.uh_dport,
-				     (struct sockaddr *)ip6cp->ip6c_src,
-				     uh.uh_sport, cmd, cmdarg, notify);
+		    (struct sockaddr *)ip6cp->ip6c_src, uh.uh_sport, cmd,
+		    cmdarg, notify);
 	} else
 		(void) in6_pcbnotify(&udbinfo, sa, 0,
-				     (const struct sockaddr *)sa6_src,
-				     0, cmd, cmdarg, notify);
+		    (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify);
 }
 
 static int
@@ -460,9 +426,9 @@
 	struct xucred xuc;
 	struct sockaddr_in6 addrs[2];
 	struct inpcb *inp;
-	int error, s;
+	int error;
 
-	error = suser(req->td);
+	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 	if (error)
 		return (error);
 
@@ -477,76 +443,305 @@
 	    (error = sa6_embedscope(&addrs[1], ip6_use_defzone)) != 0) {
 		return (error);
 	}
-	s = splnet();
+	INP_INFO_RLOCK(&udbinfo);
 	inp = in6_pcblookup_hash(&udbinfo, &addrs[1].sin6_addr,
-				 addrs[1].sin6_port,
-				 &addrs[0].sin6_addr, addrs[0].sin6_port,
-				 1, NULL);
-	if (!inp || !inp->inp_socket) {
+	    addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port, 1,
+	    NULL);
+	if (inp == NULL) {
+		INP_INFO_RUNLOCK(&udbinfo);
+		return (ENOENT);
+	}
+	INP_LOCK(inp);
+	if (inp->inp_socket == NULL) {
 		error = ENOENT;
 		goto out;
 	}
+	error = cr_canseesocket(req->td->td_ucred, inp->inp_socket);
+	if (error)
+		goto out;
 	cru2x(inp->inp_socket->so_cred, &xuc);
-	error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
 out:
-	splx(s);
+	INP_UNLOCK(inp);
+	INP_INFO_RUNLOCK(&udbinfo);
+	if (error == 0)
+		error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
 	return (error);
 }
 
-SYSCTL_PROC(_net_inet6_udp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW,
-	    0, 0,
-	    udp6_getcred, "S,xucred", "Get the xucred of a UDP6 connection");
+SYSCTL_PROC(_net_inet6_udp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 0,
+    0, udp6_getcred, "S,xucred", "Get the xucred of a UDP6 connection");
 
 static int
+udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
+    struct mbuf *control, struct thread *td)
+{
+	u_int32_t ulen = m->m_pkthdr.len;
+	u_int32_t plen = sizeof(struct udphdr) + ulen;
+	struct ip6_hdr *ip6;
+	struct udphdr *udp6;
+	struct in6_addr *laddr, *faddr;
+	struct sockaddr_in6 *sin6 = NULL;
+	struct ifnet *oifp = NULL;
+	int scope_ambiguous = 0;
+	u_short fport;
+	int error = 0;
+	struct ip6_pktopts *optp, opt;
+	int priv;
+	int af = AF_INET6, hlen = sizeof(struct ip6_hdr);
+	int flags;
+	struct sockaddr_in6 tmp;
+
+	INP_LOCK_ASSERT(inp);
+
+	priv = 0;
+	if (td && !suser(td))
+		priv = 1;
+
+	if (addr6) {
+		/* addr6 has been validated in udp6_send(). */
+		sin6 = (struct sockaddr_in6 *)addr6;
+
+		/* protect *sin6 from overwrites */
+		tmp = *sin6;
+		sin6 = &tmp;
+
+		/*
+		 * Application should provide a proper zone ID or the use of
+		 * default zone IDs should be enabled.  Unfortunately, some
+		 * applications do not behave as it should, so we need a
+		 * workaround.  Even if an appropriate ID is not determined,
+		 * we'll see if we can determine the outgoing interface.  If we
+		 * can, determine the zone ID based on the interface below.
+		 */
+		if (sin6->sin6_scope_id == 0 && !ip6_use_defzone)
+			scope_ambiguous = 1;
+		if ((error = sa6_embedscope(sin6, ip6_use_defzone)) != 0)
+			return (error);
+	}
+
+	if (control) {
+		if ((error = ip6_setpktopts(control, &opt,
+		    inp->in6p_outputopts, priv, IPPROTO_UDP)) != 0)
+			goto release;
+		optp = &opt;
+	} else
+		optp = inp->in6p_outputopts;
+
+	if (sin6) {
+		faddr = &sin6->sin6_addr;
+
+		/*
+		 * IPv4 version of udp_output calls in_pcbconnect in this case,
+		 * which needs splnet and affects performance.
+		 * Since we saw no essential reason for calling in_pcbconnect,
+		 * we get rid of such kind of logic, and call in6_selectsrc
+		 * and in6_pcbsetport in order to fill in the local address
+		 * and the local port.
+		 */
+		if (sin6->sin6_port == 0) {
+			error = EADDRNOTAVAIL;
+			goto release;
+		}
+
+		if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
+			/* how about ::ffff:0.0.0.0 case? */
+			error = EISCONN;
+			goto release;
+		}
+
+		fport = sin6->sin6_port; /* allow 0 port */
+
+		if (IN6_IS_ADDR_V4MAPPED(faddr)) {
+			if ((inp->in6p_flags & IN6P_IPV6_V6ONLY)) {
+				/*
+				 * I believe we should explicitly discard the
+				 * packet when mapped addresses are disabled,
+				 * rather than send the packet as an IPv6 one.
+				 * If we chose the latter approach, the packet
+				 * might be sent out on the wire based on the
+				 * default route, the situation which we'd
+				 * probably want to avoid.
+				 * (20010421 jinmei at kame.net)
+				 */
+				error = EINVAL;
+				goto release;
+			}
+			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
+			    !IN6_IS_ADDR_V4MAPPED(&inp->in6p_laddr)) {
+				/*
+				 * when remote addr is an IPv4-mapped address,
+				 * local addr should not be an IPv6 address,
+				 * since you cannot determine how to map IPv6
+				 * source address to IPv4.
+				 */
+				error = EINVAL;
+				goto release;
+			}
+
+			af = AF_INET;
+		}
+
+		if (!IN6_IS_ADDR_V4MAPPED(faddr)) {
+			laddr = in6_selectsrc(sin6, optp, inp->in6p_moptions,
+			    NULL, &inp->in6p_laddr, &oifp, &error);
+			if (oifp && scope_ambiguous &&
+			    (error = in6_setscope(&sin6->sin6_addr,
+			    oifp, NULL))) {
+				goto release;
+			}
+		} else
+			laddr = &inp->in6p_laddr;	/* XXX */
+		if (laddr == NULL) {
+			if (error == 0)
+				error = EADDRNOTAVAIL;
+			goto release;
+		}
+		if (inp->in6p_lport == 0 &&
+		    (error = in6_pcbsetport(laddr, inp, td->td_ucred)) != 0)
+			goto release;
+	} else {
+		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
+			error = ENOTCONN;
+			goto release;
+		}
+		if (IN6_IS_ADDR_V4MAPPED(&inp->in6p_faddr)) {
+			if ((inp->in6p_flags & IN6P_IPV6_V6ONLY)) {
+				/*
+				 * XXX: this case would happen when the
+				 * application sets the V6ONLY flag after
+				 * connecting the foreign address.
+				 * Such applications should be fixed,
+				 * so we bark here.
+				 */
+				log(LOG_INFO, "udp6_output: IPV6_V6ONLY "
+				    "option was set for a connected socket\n");
+				error = EINVAL;
+				goto release;
+			} else
+				af = AF_INET;
+		}
+		laddr = &inp->in6p_laddr;
+		faddr = &inp->in6p_faddr;
+		fport = inp->in6p_fport;
+	}
+
+	if (af == AF_INET)
+		hlen = sizeof(struct ip);
+
+	/*
+	 * Calculate data length and get a mbuf
+	 * for UDP and IP6 headers.
+	 */
+	M_PREPEND(m, hlen + sizeof(struct udphdr), M_DONTWAIT);
+	if (m == 0) {
+		error = ENOBUFS;
+		goto release;
+	}
+
+	/*
+	 * Stuff checksum and output datagram.
+	 */
+	udp6 = (struct udphdr *)(mtod(m, caddr_t) + hlen);
+	udp6->uh_sport = inp->in6p_lport; /* lport is always set in the PCB */
+	udp6->uh_dport = fport;
+	if (plen <= 0xffff)
+		udp6->uh_ulen = htons((u_short)plen);
+	else
+		udp6->uh_ulen = 0;
+	udp6->uh_sum = 0;
+
+	switch (af) {
+	case AF_INET6:
+		ip6 = mtod(m, struct ip6_hdr *);
+		ip6->ip6_flow	= inp->in6p_flowinfo & IPV6_FLOWINFO_MASK;
+		ip6->ip6_vfc	&= ~IPV6_VERSION_MASK;
+		ip6->ip6_vfc	|= IPV6_VERSION;
+#if 0				/* ip6_plen will be filled in ip6_output. */
+		ip6->ip6_plen	= htons((u_short)plen);
+#endif
+		ip6->ip6_nxt	= IPPROTO_UDP;
+		ip6->ip6_hlim	= in6_selecthlim(inp, NULL);
+		ip6->ip6_src	= *laddr;
+		ip6->ip6_dst	= *faddr;
+
+		if ((udp6->uh_sum = in6_cksum(m, IPPROTO_UDP,
+				sizeof(struct ip6_hdr), plen)) == 0) {
+			udp6->uh_sum = 0xffff;
+		}
+
+		flags = 0;
+
+		udpstat.udps_opackets++;
+		error = ip6_output(m, optp, NULL, flags, inp->in6p_moptions,
+		    NULL, inp);
+		break;
+	case AF_INET:
+		error = EAFNOSUPPORT;
+		goto release;
+	}
+	goto releaseopt;
+
+release:
+	m_freem(m);
+
+releaseopt:
+	if (control) {
+		ip6_clearpktopts(&opt, -1);
+		m_freem(control);
+	}
+	return (error);
+}
+
+static void
 udp6_abort(struct socket *so)
 {
 	struct inpcb *inp;
-	int s;
 
-	INP_INFO_WLOCK(&udbinfo);
 	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_WUNLOCK(&udbinfo);
-		return EINVAL;	/* ??? possible? panic instead? */
+	KASSERT(inp != NULL, ("udp6_abort: inp == NULL"));
+
+#ifdef INET
+	if (inp->inp_vflag & INP_IPV4) {
+		struct pr_usrreqs *pru;
+
+		pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
+		(*pru->pru_abort)(so);
+		return;
 	}
-	soisdisconnected(so);
-	s = splnet();
+#endif
+
+	INP_INFO_WLOCK(&udbinfo);
 	INP_LOCK(inp);
-	in6_pcbdetach(inp);
+	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
+		in6_pcbdisconnect(inp);
+		inp->in6p_laddr = in6addr_any;
+		soisdisconnected(so);
+	}
+	INP_UNLOCK(inp);
 	INP_INFO_WUNLOCK(&udbinfo);
-	splx(s);
-	return 0;
 }
 
 static int
 udp6_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct inpcb *inp;
-	int s, error;
+	int error;
 
-	INP_INFO_WLOCK(&udbinfo);
 	inp = sotoinpcb(so);
-	if (inp != 0) {
-		INP_INFO_WUNLOCK(&udbinfo);
-		return EINVAL;
-	}
+	KASSERT(inp == NULL, ("udp6_attach: inp != NULL"));
 
 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
 		error = soreserve(so, udp_sendspace, udp_recvspace);
-		if (error) {
-			INP_INFO_WUNLOCK(&udbinfo);
-			return error;
-		}
+		if (error)
+			return (error);
 	}
-	s = splnet();
-	error = in_pcballoc(so, &udbinfo, "udp6inp");
-	splx(s);
+	INP_INFO_WLOCK(&udbinfo);
+	error = in_pcballoc(so, &udbinfo);
 	if (error) {
 		INP_INFO_WUNLOCK(&udbinfo);
-		return error;
+		return (error);
 	}
 	inp = (struct inpcb *)so->so_pcb;
-	INP_LOCK(inp);
 	INP_INFO_WUNLOCK(&udbinfo);
 	inp->inp_vflag |= INP_IPV6;
 	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
@@ -561,23 +756,20 @@
 	 */
 	inp->inp_ip_ttl = ip_defttl;
 	INP_UNLOCK(inp);
-	return 0;
+	return (0);
 }
 
 static int
 udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp;
-	int s, error;
+	int error;
 
-	INP_INFO_WLOCK(&udbinfo);
 	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_WUNLOCK(&udbinfo);
-		return EINVAL;
-	}
-	INP_LOCK(inp);
+	KASSERT(inp != NULL, ("udp6_bind: inp == NULL"));
 
+	INP_INFO_WLOCK(&udbinfo);
+	INP_LOCK(inp);
 	inp->inp_vflag &= ~INP_IPV4;
 	inp->inp_vflag |= INP_IPV6;
 	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
@@ -593,36 +785,58 @@
 			in6_sin6_2_sin(&sin, sin6_p);
 			inp->inp_vflag |= INP_IPV4;
 			inp->inp_vflag &= ~INP_IPV6;
-			s = splnet();
 			error = in_pcbbind(inp, (struct sockaddr *)&sin,
 			    td->td_ucred);
 			goto out;
 		}
 	}
 
-	s = splnet();
 	error = in6_pcbbind(inp, nam, td->td_ucred);
 out:
 	INP_UNLOCK(inp);
 	INP_INFO_WUNLOCK(&udbinfo);
-	splx(s);
-	return error;
+	return (error);
 }
 
-static int
-udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
+static void
+udp6_close(struct socket *so)
 {
 	struct inpcb *inp;
-	int s, error;
 
-	INP_INFO_WLOCK(&udbinfo);
 	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_WUNLOCK(&udbinfo);
-		return EINVAL;
+	KASSERT(inp != NULL, ("udp6_close: inp == NULL"));
+
+#ifdef INET
+	if (inp->inp_vflag & INP_IPV4) {
+		struct pr_usrreqs *pru;
+
+		pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
+		(*pru->pru_disconnect)(so);
+		return;
 	}
+#endif
+	INP_INFO_WLOCK(&udbinfo);
 	INP_LOCK(inp);
+	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
+		in6_pcbdisconnect(inp);
+		inp->in6p_laddr = in6addr_any;
+		soisdisconnected(so);
+	}
+	INP_UNLOCK(inp);
+	INP_INFO_WUNLOCK(&udbinfo);
+}
+
+static int
+udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
+{
+	struct inpcb *inp;
+	int error;
+
+	inp = sotoinpcb(so);
+	KASSERT(inp != NULL, ("udp6_connect: inp == NULL"));
 
+	INP_INFO_WLOCK(&udbinfo);
+	INP_LOCK(inp);
 	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
 		struct sockaddr_in6 *sin6_p;
 
@@ -630,13 +844,13 @@
 		if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) {
 			struct sockaddr_in sin;
 
-			if (inp->inp_faddr.s_addr != INADDR_ANY)
-				return EISCONN;
+			if (inp->inp_faddr.s_addr != INADDR_ANY) {
+				error = EISCONN;
+				goto out;
+			}
 			in6_sin6_2_sin(&sin, sin6_p);
-			s = splnet();
 			error = in_pcbconnect(inp, (struct sockaddr *)&sin,
 			    td->td_ucred);
-			splx(s);
 			if (error == 0) {
 				inp->inp_vflag |= INP_IPV4;
 				inp->inp_vflag &= ~INP_IPV6;
@@ -649,9 +863,7 @@
 		error = EISCONN;
 		goto out;
 	}
-	s = splnet();
 	error = in6_pcbconnect(inp, nam, td->td_ucred);
-	splx(s);
 	if (error == 0) {
 		if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
 			/* should be non mapped addr */
@@ -663,41 +875,34 @@
 out:
 	INP_UNLOCK(inp);
 	INP_INFO_WUNLOCK(&udbinfo);
-	return error;
+	return (error);
 }
 
-static int
+static void
 udp6_detach(struct socket *so)
 {
 	struct inpcb *inp;
-	int s;
 
-	INP_INFO_WLOCK(&udbinfo);
 	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_WUNLOCK(&udbinfo);
-		return EINVAL;
-	}
+	KASSERT(inp != NULL, ("udp6_detach: inp == NULL"));
+
+	INP_INFO_WLOCK(&udbinfo);
 	INP_LOCK(inp);
-	s = splnet();
 	in6_pcbdetach(inp);
-	splx(s);
+	in6_pcbfree(inp);
 	INP_INFO_WUNLOCK(&udbinfo);
-	return 0;
 }
 
 static int
 udp6_disconnect(struct socket *so)
 {
 	struct inpcb *inp;
-	int error, s;
+	int error;
 
-	INP_INFO_WLOCK(&udbinfo);
 	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_WUNLOCK(&udbinfo);
-		return EINVAL;
-	}
+	KASSERT(inp != NULL, ("udp6_disconnect: inp == NULL"));
+
+	INP_INFO_WLOCK(&udbinfo);
 	INP_LOCK(inp);
 
 #ifdef INET
@@ -715,36 +920,30 @@
 		goto out;
 	}
 
-	s = splnet();
 	in6_pcbdisconnect(inp);
 	inp->in6p_laddr = in6addr_any;
-	splx(s);
 	/* XXXRW: so_state locking? */
 	so->so_state &= ~SS_ISCONNECTED;		/* XXX */
 out:
 	INP_UNLOCK(inp);
 	INP_INFO_WUNLOCK(&udbinfo);
-	return 0;
+	return (0);
 }
 
 static int
-udp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
-	  struct mbuf *control, struct thread *td)
+udp6_send(struct socket *so, int flags, struct mbuf *m,
+    struct sockaddr *addr, struct mbuf *control, struct thread *td)
 {
 	struct inpcb *inp;
 	int error = 0;
 
-	INP_INFO_WLOCK(&udbinfo);
 	inp = sotoinpcb(so);
-	if (inp == 0) {
-		INP_INFO_WUNLOCK(&udbinfo);
-		m_freem(m);
-		return EINVAL;
-	}
-	INP_LOCK(inp);
+	KASSERT(inp != NULL, ("udp6_send: inp == NULL"));
 
+	INP_INFO_WLOCK(&udbinfo);
+	INP_LOCK(inp);
 	if (addr) {
-		if (addr->sa_len != sizeof(struct sockaddr_in6)) { 
+		if (addr->sa_len != sizeof(struct sockaddr_in6)) {
 			error = EINVAL;
 			goto bad;
 		}
@@ -764,7 +963,7 @@
 		else {
 			sin6 = (struct sockaddr_in6 *)addr;
 			hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)
-				? 1 : 0;
+			    ? 1 : 0;
 		}
 		if (hasv4addr) {
 			struct pr_usrreqs *pru;
@@ -772,11 +971,10 @@
 			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
 			    !IN6_IS_ADDR_V4MAPPED(&inp->in6p_laddr)) {
 				/*
-				 * when remote addr is IPv4-mapped
-				 * address, local addr should not be
-				 * an IPv6 address; since you cannot
-				 * determine how to map IPv6 source
-				 * address to IPv4.
+				 * When remote addr is IPv4-mapped address,
+				 * local addr should not be an IPv6 address;
+				 * since you cannot determine how to map IPv6
+				 * source address to IPv4.
 				 */
 				error = EINVAL;
 				goto out;
@@ -785,20 +983,22 @@
 				in6_sin6_2_sin_in_sock(addr);
 			pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
 			error = ((*pru->pru_send)(so, flags, m, addr, control,
-						  td));
+			    td));
 			/* addr will just be freed in sendit(). */
 			goto out;
 		}
 	}
 #endif
-
+#ifdef MAC
+	mac_create_mbuf_from_inpcb(inp, m);
+#endif
 	error = udp6_output(inp, m, addr, control, td);
 out:
 	INP_UNLOCK(inp);
 	INP_INFO_WUNLOCK(&udbinfo);
-	return error;
+	return (error);
 
-  bad:
+bad:
 	INP_UNLOCK(inp);
 	INP_INFO_WUNLOCK(&udbinfo);
 	m_freem(m);
@@ -817,5 +1017,6 @@
 	.pru_send =		udp6_send,
 	.pru_shutdown =		udp_shutdown,
 	.pru_sockaddr =		in6_mapped_sockaddr,
-	.pru_sosetlabel =	in_pcbsosetlabel
+	.pru_sosetlabel =	in_pcbsosetlabel,
+	.pru_close =		udp6_close
 };
Index: in6.h
===================================================================
RCS file: /home/cvs/src/sys/netinet6/in6.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -L sys/netinet6/in6.h -L sys/netinet6/in6.h -u -r1.3 -r1.4
--- sys/netinet6/in6.h
+++ sys/netinet6/in6.h
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet6/in6.h,v 1.36.2.6 2006/03/22 06:32:54 suz Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/in6.h,v 1.51 2007/07/19 09:16:40 bz Exp $	*/
 /*	$KAME: in6.h,v 1.89 2001/05/27 13:28:35 itojun Exp $	*/
 
 /*-
@@ -77,31 +77,9 @@
 #define __KAME_VERSION		"FreeBSD"
 
 /*
- * Local port number conventions:
- *
- * Ports < IPPORT_RESERVED are reserved for privileged processes (e.g. root),
- * unless a kernel is compiled with IPNOPRIVPORTS defined.
- *
- * When a user does a bind(2) or connect(2) with a port number of zero,
- * a non-conflicting local port address is chosen.
- *
- * The default range is IPPORT_ANONMIN to IPPORT_ANONMAX, although
- * that is settable by sysctl(3); net.inet.ip.anonportmin and
- * net.inet.ip.anonportmax respectively.
- *
- * A user may set the IPPROTO_IP option IP_PORTRANGE to change this
- * default assignment range.
- *
- * The value IP_PORTRANGE_DEFAULT causes the default behavior.
- *
- * The value IP_PORTRANGE_HIGH is the same as IP_PORTRANGE_DEFAULT,
- * and exists only for FreeBSD compatibility purposes.
- *
- * The value IP_PORTRANGE_LOW changes the range to the "low" are
- * that is (by convention) restricted to privileged processes.
- * This convention is based on "vouchsafe" principles only.
- * It is only secure if you trust the remote host to restrict these ports.
- * The range is IPPORT_RESERVEDMIN to IPPORT_RESERVEDMAX.
+ * IPv6 port allocation rules should mirror the IPv4 rules and are controlled
+ * by the the net.inet.ip.portrange sysctl tree. The following defines exist
+ * for compatibility with userland applications that need them.
  */
 #if __BSD_VISIBLE
 #define	IPV6PORT_RESERVED	1024
@@ -339,7 +317,7 @@
 	(IN6_IS_ADDR_MULTICAST(a) &&	\
 	 (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_LINKLOCAL))
 #define IN6_IS_ADDR_MC_SITELOCAL(a)	\
-	(IN6_IS_ADDR_MULTICAST(a) && 	\
+	(IN6_IS_ADDR_MULTICAST(a) &&	\
 	 (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_SITELOCAL))
 #define IN6_IS_ADDR_MC_ORGLOCAL(a)	\
 	(IN6_IS_ADDR_MULTICAST(a) &&	\
@@ -355,7 +333,7 @@
 	(IN6_IS_ADDR_MULTICAST(a) &&	\
 	 (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_LINKLOCAL))
 #define IN6_IS_ADDR_MC_SITELOCAL(a)	\
-	(IN6_IS_ADDR_MULTICAST(a) && 	\
+	(IN6_IS_ADDR_MULTICAST(a) &&	\
 	 (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_SITELOCAL))
 #define IN6_IS_ADDR_MC_ORGLOCAL(a)	\
 	(IN6_IS_ADDR_MULTICAST(a) &&	\
@@ -372,6 +350,10 @@
 #define IN6_IS_SCOPE_LINKLOCAL(a)	\
 	((IN6_IS_ADDR_LINKLOCAL(a)) ||	\
 	 (IN6_IS_ADDR_MC_LINKLOCAL(a)))
+#define	IN6_IS_SCOPE_EMBED(a)			\
+	((IN6_IS_ADDR_LINKLOCAL(a)) ||		\
+	 (IN6_IS_ADDR_MC_LINKLOCAL(a)) ||	\
+	 (IN6_IS_ADDR_MC_INTFACELOCAL(a)))
 
 #define IFA6_IS_DEPRECATED(a) \
 	((a)->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME && \
@@ -433,7 +415,8 @@
 
 #if 1 /* IPSEC */
 #define IPV6_IPSEC_POLICY	28 /* struct; get/set security policy */
-#endif
+#endif /* IPSEC */
+
 #define IPV6_FAITH		29 /* bool; accept FAITH'ed connections */
 
 #if 1 /* IPV6FIREWALL */
@@ -489,6 +472,14 @@
 				    * the source address.
 				    */
 
+/*
+ * The following option is private; do not use it from user applications.
+ * It is deliberately defined to the same value as IP_MSFILTER.
+ */
+#define	IPV6_MSFILTER		74 /* struct __msfilterreq;
+				    * set/get multicast source filter list.
+				    */
+
 /* to define items, should talk with KAME guys first, for *BSD compatibility */
 
 #define IPV6_RTHDR_LOOSE     0 /* this hop need not be a neighbor. XXX old spec */
@@ -509,6 +500,18 @@
 	unsigned int	ipv6mr_interface;
 };
 
+#ifdef notyet
+/*
+ * Argument structure for IPV6_ADD_SOURCE_MEMBERSHIP,
+ * IPV6_DROP_SOURCE_MEMBERSHIP, IPV6_BLOCK_SOURCE, and IPV6_UNBLOCK_SOURCE.
+ */
+struct ipv6_mreq_source {
+	struct in6_addr	ipv6mr_multiaddr;
+	struct in6_addr	ipv6mr_sourceaddr;
+	uint32_t	ipv6mr_interface;
+};
+#endif
+
 /*
  * IPV6_PKTINFO: Packet information(RFC2292 sec 5)
  */
@@ -596,8 +599,7 @@
 /* New entries should be added here from current IPV6CTL_MAXID value. */
 /* to define items, should talk with KAME guys first, for *BSD compatibility */
 #define IPV6CTL_STEALTH		45
-#define IPV6CTL_RTHDR0_ALLOWED  46
-#define IPV6CTL_MAXID		47
+#define IPV6CTL_MAXID		46
 #endif /* __BSD_VISIBLE */
 
 /*
Index: scope6.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/scope6.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet6/scope6.c -L sys/netinet6/scope6.c -u -r1.1.1.1 -r1.2
--- sys/netinet6/scope6.c
+++ sys/netinet6/scope6.c
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet6/scope6.c,v 1.12.2.1 2005/11/04 20:26:15 ume Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/scope6.c,v 1.17 2007/07/05 16:23:48 delphij Exp $	*/
 /*	$KAME: scope6.c,v 1.10 2000/07/24 13:29:31 itojun Exp $	*/
 
 /*-
@@ -67,7 +67,7 @@
 	(((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->scope6_id)
 
 void
-scope6_init()
+scope6_init(void)
 {
 
 	SCOPE6_LOCK_INIT();
@@ -75,8 +75,7 @@
 }
 
 struct scope6_id *
-scope6_ifattach(ifp)
-	struct ifnet *ifp;
+scope6_ifattach(struct ifnet *ifp)
 {
 	struct scope6_id *sid;
 
@@ -99,17 +98,14 @@
 }
 
 void
-scope6_ifdetach(sid)
-	struct scope6_id *sid;
+scope6_ifdetach(struct scope6_id *sid)
 {
 
 	free(sid, M_IFADDR);
 }
 
 int
-scope6_set(ifp, idlist)
-	struct ifnet *ifp;
-	struct scope6_id *idlist;
+scope6_set(struct ifnet *ifp, struct scope6_id *idlist)
 {
 	int i;
 	int error = 0;
@@ -176,9 +172,7 @@
 }
 
 int
-scope6_get(ifp, idlist)
-	struct ifnet *ifp;
-	struct scope6_id *idlist;
+scope6_get(struct ifnet *ifp, struct scope6_id *idlist)
 {
 	/* We only need to lock the interface's afdata for SID() to work. */
 	IF_AFDATA_LOCK(ifp);
@@ -202,8 +196,7 @@
  * Get a scope of the address. Node-local, link-local, site-local or global.
  */
 int
-in6_addrscope(addr)
-	struct in6_addr *addr;
+in6_addrscope(struct in6_addr *addr)
 {
 	int scope;
 
@@ -261,9 +254,12 @@
 	return IPV6_ADDR_SCOPE_GLOBAL;
 }
 
+/*
+ * ifp - note that this might be NULL
+ */
+
 void
-scope6_setdefault(ifp)
-	struct ifnet *ifp;	/* note that this might be NULL */
+scope6_setdefault(struct ifnet *ifp)
 {
 	/*
 	 * Currently, this function just sets the default "interfaces"
@@ -285,8 +281,7 @@
 }
 
 int
-scope6_get_default(idlist)
-	struct scope6_id *idlist;
+scope6_get_default(struct scope6_id *idlist)
 {
 
 	SCOPE6_LOCK();
@@ -297,8 +292,7 @@
 }
 
 u_int32_t
-scope6_addr2default(addr)
-	struct in6_addr *addr;
+scope6_addr2default(struct in6_addr *addr)
 {
 	u_int32_t id;
 
@@ -328,9 +322,7 @@
  * address.
  */
 int
-sa6_embedscope(sin6, defaultok)
-	struct sockaddr_in6 *sin6;
-	int defaultok;
+sa6_embedscope(struct sockaddr_in6 *sin6, int defaultok)
 {
 	struct ifnet *ifp;
 	u_int32_t zoneid;
@@ -366,15 +358,15 @@
  * generate standard sockaddr_in6 from embedded form.
  */
 int
-sa6_recoverscope(sin6)
-	struct sockaddr_in6 *sin6;
+sa6_recoverscope(struct sockaddr_in6 *sin6)
 {
+	char ip6buf[INET6_ADDRSTRLEN];
 	u_int32_t zoneid;
 
 	if (sin6->sin6_scope_id != 0) {
 		log(LOG_NOTICE,
 		    "sa6_recoverscope: assumption failure (non 0 ID): %s%%%d\n",
-		    ip6_sprintf(&sin6->sin6_addr), sin6->sin6_scope_id);
+		    ip6_sprintf(ip6buf, &sin6->sin6_addr), sin6->sin6_scope_id);
 		/* XXX: proceed anyway... */
 	}
 	if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) ||
@@ -401,12 +393,11 @@
  * Determine the appropriate scope zone ID for in6 and ifp.  If ret_id is
  * non NULL, it is set to the zone ID.  If the zone ID needs to be embedded
  * in the in6_addr structure, in6 will be modified.
+ *
+ * ret_id - unnecessary?
  */
 int
-in6_setscope(in6, ifp, ret_id)
-	struct in6_addr *in6;
-	struct ifnet *ifp;
-	u_int32_t *ret_id;	/* unnecessary? */
+in6_setscope(struct in6_addr *in6, struct ifnet *ifp, u_int32_t *ret_id)
 {
 	int scope;
 	u_int32_t zoneid = 0;
@@ -480,8 +471,7 @@
  * is intact; return non 0 if the address is modified.
  */
 int
-in6_clearscope(in6)
-	struct in6_addr *in6;
+in6_clearscope(struct in6_addr *in6)
 {
 	int modified = 0;
 
Index: in6_src.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/in6_src.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet6/in6_src.c -L sys/netinet6/in6_src.c -u -r1.1.1.2 -r1.2
--- sys/netinet6/in6_src.c
+++ sys/netinet6/in6_src.c
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet6/in6_src.c,v 1.30.2.4 2005/12/25 14:03:37 suz Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/in6_src.c,v 1.46 2007/07/05 16:29:39 delphij Exp $	*/
 /*	$KAME: in6_src.c,v 1.132 2003/08/26 04:42:27 keiichi Exp $	*/
 
 /*-
@@ -66,8 +66,10 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
+#include <sys/priv.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
@@ -93,8 +95,6 @@
 #include <netinet6/scope6_var.h>
 #include <netinet6/nd6.h>
 
-#include <net/net_osdep.h>
-
 static struct mtx addrsel_lock;
 #define	ADDRSEL_LOCK_INIT()	mtx_init(&addrsel_lock, "addrsel_lock", NULL, MTX_DEF)
 #define	ADDRSEL_LOCK()		mtx_lock(&addrsel_lock)
@@ -147,24 +147,19 @@
 		sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
 		ip6stat.ip6s_sources_rule[(r)]++; \
 	/* printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(&ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(&ia->ia_addr.sin6_addr), (r)); */ \
-	goto next; 		/* XXX: we can't use 'continue' here */ \
+	goto next;		/* XXX: we can't use 'continue' here */ \
 } while(0)
 #define BREAK(r) do { \
 	if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \
 		sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
 		ip6stat.ip6s_sources_rule[(r)]++; \
-	goto out; 		/* XXX: we can't use 'break' here */ \
+	goto out;		/* XXX: we can't use 'break' here */ \
 } while(0)
 
 struct in6_addr *
-in6_selectsrc(dstsock, opts, mopts, ro, laddr, ifpp, errorp)
-	struct sockaddr_in6 *dstsock;
-	struct ip6_pktopts *opts;
-	struct ip6_moptions *mopts;
-	struct route_in6 *ro;
-	struct in6_addr *laddr;
-	struct ifnet **ifpp;
-	int *errorp;
+in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
+    struct ip6_moptions *mopts, struct route_in6 *ro,
+    struct in6_addr *laddr, struct ifnet **ifpp, int *errorp)
 {
 	struct in6_addr dst;
 	struct ifnet *ifp = NULL;
@@ -430,16 +425,14 @@
 	return (&ia->ia_addr.sin6_addr);
 }
 
+/*
+ * clone - meaningful only for bsdi and freebsd
+ */
 static int
-selectroute(dstsock, opts, mopts, ro, retifp, retrt, clone, norouteok)
-	struct sockaddr_in6 *dstsock;
-	struct ip6_pktopts *opts;
-	struct ip6_moptions *mopts;
-	struct route_in6 *ro;
-	struct ifnet **retifp;
-	struct rtentry **retrt;
-	int clone;		/* meaningful only for bsdi and freebsd. */
-	int norouteok;
+selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
+    struct ip6_moptions *mopts, struct route_in6 *ro,
+    struct ifnet **retifp, struct rtentry **retrt, int clone,
+    int norouteok)
 {
 	int error = 0;
 	struct ifnet *ifp = NULL;
@@ -447,16 +440,17 @@
 	struct sockaddr_in6 *sin6_next;
 	struct in6_pktinfo *pi = NULL;
 	struct in6_addr *dst = &dstsock->sin6_addr;
-
 #if 0
+	char ip6buf[INET6_ADDRSTRLEN];
+
 	if (dstsock->sin6_addr.s6_addr32[0] == 0 &&
 	    dstsock->sin6_addr.s6_addr32[1] == 0 &&
 	    !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) {
 		printf("in6_selectroute: strange destination %s\n",
-		       ip6_sprintf(&dstsock->sin6_addr));
+		       ip6_sprintf(ip6buf, &dstsock->sin6_addr));
 	} else {
 		printf("in6_selectroute: destination = %s%%%d\n",
-		       ip6_sprintf(&dstsock->sin6_addr),
+		       ip6_sprintf(ip6buf, &dstsock->sin6_addr),
 		       dstsock->sin6_scope_id); /* for debug */
 	}
 #endif
@@ -632,12 +626,8 @@
 }
 
 static int
-in6_selectif(dstsock, opts, mopts, ro, retifp)
-	struct sockaddr_in6 *dstsock;
-	struct ip6_pktopts *opts;
-	struct ip6_moptions *mopts;
-	struct route_in6 *ro;
-	struct ifnet **retifp;
+in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
+    struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp)
 {
 	int error;
 	struct route_in6 sro;
@@ -650,7 +640,7 @@
 
 	if ((error = selectroute(dstsock, opts, mopts, ro, retifp,
 				     &rt, 0, 1)) != 0) {
-		if (rt && rt == sro.ro_rt)
+		if (ro == &sro && rt && rt == sro.ro_rt)
 			RTFREE(rt);
 		return (error);
 	}
@@ -675,7 +665,7 @@
 	if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) {
 		int flags = (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
 
-		if (rt && rt == sro.ro_rt)
+		if (ro == &sro && rt && rt == sro.ro_rt)
 			RTFREE(rt);
 		return (flags);
 	}
@@ -690,21 +680,20 @@
 	if (rt && rt->rt_ifa && rt->rt_ifa->ifa_ifp)
 		*retifp = rt->rt_ifa->ifa_ifp;
 
-	if (rt && rt == sro.ro_rt)
+	if (ro == &sro && rt && rt == sro.ro_rt)
 		RTFREE(rt);
 	return (0);
 }
 
+/*
+ * clone - meaningful only for bsdi and freebsd
+ */
 int
-in6_selectroute(dstsock, opts, mopts, ro, retifp, retrt, clone)
-	struct sockaddr_in6 *dstsock;
-	struct ip6_pktopts *opts;
-	struct ip6_moptions *mopts;
-	struct route_in6 *ro;
-	struct ifnet **retifp;
-	struct rtentry **retrt;
-	int clone;		/* meaningful only for bsdi and freebsd. */
+in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
+    struct ip6_moptions *mopts, struct route_in6 *ro,
+    struct ifnet **retifp, struct rtentry **retrt, int clone)
 {
+
 	return (selectroute(dstsock, opts, mopts, ro, retifp,
 	    retrt, clone, 0));
 }
@@ -717,10 +706,9 @@
  * 3. The system default hoplimit.
  */
 int
-in6_selecthlim(in6p, ifp)
-	struct in6pcb *in6p;
-	struct ifnet *ifp;
+in6_selecthlim(struct in6pcb *in6p, struct ifnet *ifp)
 {
+
 	if (in6p && in6p->in6p_hops >= 0)
 		return (in6p->in6p_hops);
 	else if (ifp)
@@ -750,16 +738,16 @@
  * share this function by all *bsd*...
  */
 int
-in6_pcbsetport(laddr, inp, cred)
-	struct in6_addr *laddr;
-	struct inpcb *inp;
-	struct ucred *cred;
+in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct ucred *cred)
 {
 	struct socket *so = inp->inp_socket;
 	u_int16_t lport = 0, first, last, *lastport;
 	int count, error = 0, wild = 0;
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 
+	INP_INFO_WLOCK_ASSERT(pcbinfo);
+	INP_LOCK_ASSERT(inp);
+
 	/* XXX: this is redundant when called from in6_pcbbind */
 	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
 		wild = INPLOOKUP_WILDCARD;
@@ -769,17 +757,18 @@
 	if (inp->inp_flags & INP_HIGHPORT) {
 		first = ipport_hifirstauto;	/* sysctl */
 		last  = ipport_hilastauto;
-		lastport = &pcbinfo->lasthi;
+		lastport = &pcbinfo->ipi_lasthi;
 	} else if (inp->inp_flags & INP_LOWPORT) {
-		if ((error = suser_cred(cred, 0)))
+		error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0);
+		if (error)
 			return error;
 		first = ipport_lowfirstauto;	/* 1023 */
 		last  = ipport_lowlastauto;	/* 600 */
-		lastport = &pcbinfo->lastlow;
+		lastport = &pcbinfo->ipi_lastlow;
 	} else {
 		first = ipport_firstauto;	/* sysctl */
 		last  = ipport_lastauto;
-		lastport = &pcbinfo->lastport;
+		lastport = &pcbinfo->ipi_lastport;
 	}
 	/*
 	 * Simple check to ensure all ports are not used up causing
@@ -843,7 +832,7 @@
 }
 
 void
-addrsel_policy_init()
+addrsel_policy_init(void)
 {
 	ADDRSEL_LOCK_INIT();
 	ADDRSEL_SXLOCK_INIT();
@@ -856,8 +845,7 @@
 }
 
 static struct in6_addrpolicy *
-lookup_addrsel_policy(key)
-	struct sockaddr_in6 *key;
+lookup_addrsel_policy(struct sockaddr_in6 *key)
 {
 	struct in6_addrpolicy *match = NULL;
 
@@ -900,9 +888,7 @@
 }
 
 int
-in6_src_ioctl(cmd, data)
-	u_long cmd;
-	caddr_t data;
+in6_src_ioctl(u_long cmd, caddr_t data)
 {
 	int i;
 	struct in6_addrpolicy ent0;
@@ -950,14 +936,14 @@
 struct addrsel_policyhead addrsel_policytab;
 
 static void
-init_policy_queue()
+init_policy_queue(void)
 {
+
 	TAILQ_INIT(&addrsel_policytab);
 }
 
 static int
-add_addrsel_policyent(newpolicy)
-	struct in6_addrpolicy *newpolicy;
+add_addrsel_policyent(struct in6_addrpolicy *newpolicy)
 {
 	struct addrsel_policyent *new, *pol;
 
@@ -992,8 +978,7 @@
 }
 
 static int
-delete_addrsel_policyent(key)
-	struct in6_addrpolicy *key;
+delete_addrsel_policyent(struct in6_addrpolicy *key)
 {
 	struct addrsel_policyent *pol;
 
@@ -1023,9 +1008,8 @@
 }
 
 static int
-walk_addrsel_policy(callback, w)
-	int (*callback) __P((struct in6_addrpolicy *, void *));
-	void *w;
+walk_addrsel_policy(int (*callback) __P((struct in6_addrpolicy *, void *)),
+    void *w)
 {
 	struct addrsel_policyent *pol;
 	int error = 0;
@@ -1042,9 +1026,7 @@
 }
 
 static int
-dump_addrsel_policyent(pol, arg)
-	struct in6_addrpolicy *pol;
-	void *arg;
+dump_addrsel_policyent(struct in6_addrpolicy *pol, void *arg)
 {
 	int error = 0;
 	struct walkarg *w = arg;
@@ -1055,8 +1037,7 @@
 }
 
 static struct in6_addrpolicy *
-match_addrsel_policy(key)
-	struct sockaddr_in6 *key;
+match_addrsel_policy(struct sockaddr_in6 *key)
 {
 	struct addrsel_policyent *pent;
 	struct in6_addrpolicy *bestpol = NULL, *pol;
Index: frag6.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/frag6.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet6/frag6.c -L sys/netinet6/frag6.c -u -r1.1.1.1 -r1.2
--- sys/netinet6/frag6.c
+++ sys/netinet6/frag6.c
@@ -1,4 +1,4 @@
-/*	$FreeBSD: src/sys/netinet6/frag6.c,v 1.25 2005/01/07 02:30:34 imp Exp $	*/
+/*	$FreeBSD: src/sys/netinet6/frag6.c,v 1.33 2007/07/05 16:29:39 delphij Exp $	*/
 /*	$KAME: frag6.c,v 1.33 2002/01/07 11:34:48 kjc Exp $	*/
 
 /*-
@@ -53,8 +53,6 @@
 #include <netinet/in_systm.h>	/* for ECN definitions */
 #include <netinet/ip.h>		/* for ECN definitions */
 
-#include <net/net_osdep.h>
-
 /*
  * Define it to get a correct behavior on per-interface statistics.
  * You will need to perform an extra routing table lookup, per fragment,
@@ -87,12 +85,22 @@
 /*
  * Initialise reassembly queue and fragment identifier.
  */
+static void
+frag6_change(void *tag)
+{
+
+	ip6_maxfragpackets = nmbclusters / 4;
+	ip6_maxfrags = nmbclusters / 4;
+}
+
 void
-frag6_init()
+frag6_init(void)
 {
 
 	ip6_maxfragpackets = nmbclusters / 4;
 	ip6_maxfrags = nmbclusters / 4;
+	EVENTHANDLER_REGISTER(nmbclusters_change,
+	    frag6_change, NULL, EVENTHANDLER_PRI_ANY);
 
 	IP6Q_LOCK_INIT();
 
@@ -132,9 +140,7 @@
  * Fragment input
  */
 int
-frag6_input(mp, offp, proto)
-	struct mbuf **mp;
-	int *offp, proto;
+frag6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp, *t;
 	struct ip6_hdr *ip6;
@@ -149,6 +155,9 @@
 	int fragoff, frgpartlen;	/* must be larger than u_int16_t */
 	struct ifnet *dstifp;
 	u_int8_t ecn, ecn0;
+#if 0
+	char ip6buf[INET6_ADDRSTRLEN];
+#endif
 
 	ip6 = mtod(m, struct ip6_hdr *);
 #ifndef PULLDOWN_TEST
@@ -248,10 +257,11 @@
 		q6->ip6q_nxtp	= (u_char *)nxtp;
 #endif
 		q6->ip6q_ident	= ip6f->ip6f_ident;
-		q6->ip6q_arrive = 0; /* Is it used anywhere? */
-		q6->ip6q_ttl 	= IPV6_FRAGTTL;
+		q6->ip6q_ttl	= IPV6_FRAGTTL;
 		q6->ip6q_src	= ip6->ip6_src;
 		q6->ip6q_dst	= ip6->ip6_dst;
+		q6->ip6q_ecn	=
+		    (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
 		q6->ip6q_unfrglen = -1;	/* The 1st fragment has not arrived. */
 
 		q6->ip6q_nfrag = 0;
@@ -332,10 +342,6 @@
 	if (ip6af == NULL)
 		goto dropfrag;
 	bzero(ip6af, sizeof(*ip6af));
-	ip6af->ip6af_head = ip6->ip6_flow;
-	ip6af->ip6af_len = ip6->ip6_plen;
-	ip6af->ip6af_nxt = ip6->ip6_nxt;
-	ip6af->ip6af_hlim = ip6->ip6_hlim;
 	ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG;
 	ip6af->ip6af_off = fragoff;
 	ip6af->ip6af_frglen = frgpartlen;
@@ -353,14 +359,14 @@
 	 * drop if CE and not-ECT are mixed for the same packet.
 	 */
 	ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
-	ecn0 = (ntohl(q6->ip6q_down->ip6af_head) >> 20) & IPTOS_ECN_MASK;
+	ecn0 = q6->ip6q_ecn;
 	if (ecn == IPTOS_ECN_CE) {
 		if (ecn0 == IPTOS_ECN_NOTECT) {
 			free(ip6af, M_FTABLE);
 			goto dropfrag;
 		}
 		if (ecn0 != IPTOS_ECN_CE)
-			q6->ip6q_down->ip6af_head |= htonl(IPTOS_ECN_CE << 20);
+			q6->ip6q_ecn = IPTOS_ECN_CE;
 	}
 	if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) {
 		free(ip6af, M_FTABLE);
@@ -417,6 +423,9 @@
 	 * existing fragments from a security point of view.
 	 * We don't know which fragment is the bad guy - here we trust
 	 * fragment that came in earlier, with no real reason.
+	 *
+	 * Note: due to changes after disabling this part, mbuf passed to
+	 * m_adj() below now does not meet the requirement.
 	 */
 	if (af6->ip6af_up != (struct ip6asfrag *)q6) {
 		i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
@@ -425,7 +434,7 @@
 #if 0				/* suppress the noisy log */
 			log(LOG_ERR, "%d bytes of a fragment from %s "
 			    "overlaps the previous fragment\n",
-			    i, ip6_sprintf(&q6->ip6q_src));
+			    i, ip6_sprintf(ip6buf, &q6->ip6q_src));
 #endif
 			free(ip6af, M_FTABLE);
 			goto dropfrag;
@@ -437,7 +446,7 @@
 #if 0				/* suppress the noisy log */
 			log(LOG_ERR, "%d bytes of a fragment from %s "
 			    "overlaps the succeeding fragment",
-			    i, ip6_sprintf(&q6->ip6q_src));
+			    i, ip6_sprintf(ip6buf, &q6->ip6q_src));
 #endif
 			free(ip6af, M_FTABLE);
 			goto dropfrag;
@@ -499,19 +508,18 @@
 	free(ip6af, M_FTABLE);
 	ip6 = mtod(m, struct ip6_hdr *);
 	ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr));
-	ip6->ip6_src = q6->ip6q_src;
-	ip6->ip6_dst = q6->ip6q_dst;
+	if (q6->ip6q_ecn == IPTOS_ECN_CE)
+		ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20);
 	nxt = q6->ip6q_nxt;
 #ifdef notyet
 	*q6->ip6q_nxtp = (u_char)(nxt & 0xff);
 #endif
 
-	/*
-	 * Delete frag6 header with as a few cost as possible.
-	 */
-	if (offset < m->m_len) {
+	/* Delete frag6 header */
+	if (m->m_len >= offset + sizeof(struct ip6_frag)) {
+		/* This is the only possible case with !PULLDOWN_TEST */
 		ovbcopy((caddr_t)ip6, (caddr_t)ip6 + sizeof(struct ip6_frag),
-			offset);
+		    offset);
 		m->m_data += sizeof(struct ip6_frag);
 		m->m_len -= sizeof(struct ip6_frag);
 	} else {
@@ -573,8 +581,7 @@
  * associated datagrams.
  */
 void
-frag6_freef(q6)
-	struct ip6q *q6;
+frag6_freef(struct ip6q *q6)
 {
 	struct ip6asfrag *af6, *down6;
 
@@ -618,8 +625,7 @@
  * Like insque, but pointers in middle of structure.
  */
 void
-frag6_enq(af6, up6)
-	struct ip6asfrag *af6, *up6;
+frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6)
 {
 
 	IP6Q_LOCK_ASSERT();
@@ -634,8 +640,7 @@
  * To frag6_enq as remque is to insque.
  */
 void
-frag6_deq(af6)
-	struct ip6asfrag *af6;
+frag6_deq(struct ip6asfrag *af6)
 {
 
 	IP6Q_LOCK_ASSERT();
@@ -645,8 +650,7 @@
 }
 
 void
-frag6_insque(new, old)
-	struct ip6q *new, *old;
+frag6_insque(struct ip6q *new, struct ip6q *old)
 {
 
 	IP6Q_LOCK_ASSERT();
@@ -658,8 +662,7 @@
 }
 
 void
-frag6_remque(p6)
-	struct ip6q *p6;
+frag6_remque(struct ip6q *p6)
 {
 
 	IP6Q_LOCK_ASSERT();
@@ -674,10 +677,13 @@
  * queue, discard it.
  */
 void
-frag6_slowtimo()
+frag6_slowtimo(void)
 {
 	struct ip6q *q6;
-	int s = splnet();
+
+#if 0
+	GIANT_REQUIRED;	/* XXX bz: ip6_forward_rt */
+#endif
 
 	IP6Q_LOCK();
 	q6 = ip6q.ip6q_next;
@@ -719,15 +725,13 @@
 		ipsrcchk_rt.ro_rt = 0;
 	}
 #endif
-
-	splx(s);
 }
 
 /*
  * Drain off all datagram fragments.
  */
 void
-frag6_drain()
+frag6_drain(void)
 {
 
 	if (IP6Q_TRYLOCK() == 0)