[Midnightbsd-cvs] src [9927] trunk/sys: sync with freebsd

laffer1 at midnightbsd.org laffer1 at midnightbsd.org
Fri May 25 09:10:12 EDT 2018


Revision: 9927
          http://svnweb.midnightbsd.org/src/?rev=9927
Author:   laffer1
Date:     2018-05-25 09:10:11 -0400 (Fri, 25 May 2018)
Log Message:
-----------
sync with freebsd

Modified Paths:
--------------
    trunk/sys/netinet/accf_data.c
    trunk/sys/netinet/accf_dns.c
    trunk/sys/netinet/accf_http.c
    trunk/sys/netinet/cc/cc.c
    trunk/sys/netinet/cc/cc_chd.c
    trunk/sys/netinet/cc/cc_cubic.c
    trunk/sys/netinet/cc/cc_cubic.h
    trunk/sys/netinet/cc/cc_hd.c
    trunk/sys/netinet/cc/cc_htcp.c
    trunk/sys/netinet/cc/cc_module.h
    trunk/sys/netinet/cc/cc_newreno.c
    trunk/sys/netinet/cc/cc_vegas.c
    trunk/sys/netinet/cc.h
    trunk/sys/netinet/icmp6.h
    trunk/sys/netinet/icmp_var.h
    trunk/sys/netinet/if_atm.c
    trunk/sys/netinet/if_atm.h
    trunk/sys/netinet/if_ether.c
    trunk/sys/netinet/if_ether.h
    trunk/sys/netinet/igmp.c
    trunk/sys/netinet/igmp.h
    trunk/sys/netinet/igmp_var.h
    trunk/sys/netinet/in.c
    trunk/sys/netinet/in.h
    trunk/sys/netinet/in_cksum.c
    trunk/sys/netinet/in_debug.c
    trunk/sys/netinet/in_gif.c
    trunk/sys/netinet/in_mcast.c
    trunk/sys/netinet/in_pcb.c
    trunk/sys/netinet/in_pcb.h
    trunk/sys/netinet/in_pcbgroup.c
    trunk/sys/netinet/in_proto.c
    trunk/sys/netinet/in_rmx.c
    trunk/sys/netinet/in_systm.h
    trunk/sys/netinet/in_var.h
    trunk/sys/netinet/ip.h
    trunk/sys/netinet/ip6.h
    trunk/sys/netinet/ip_carp.c
    trunk/sys/netinet/ip_carp.h
    trunk/sys/netinet/ip_divert.c
    trunk/sys/netinet/ip_divert.h
    trunk/sys/netinet/ip_dummynet.h
    trunk/sys/netinet/ip_ecn.c
    trunk/sys/netinet/ip_ecn.h
    trunk/sys/netinet/ip_encap.c
    trunk/sys/netinet/ip_encap.h
    trunk/sys/netinet/ip_fastfwd.c
    trunk/sys/netinet/ip_fw.h
    trunk/sys/netinet/ip_gre.c
    trunk/sys/netinet/ip_icmp.c
    trunk/sys/netinet/ip_icmp.h
    trunk/sys/netinet/ip_id.c
    trunk/sys/netinet/ip_input.c
    trunk/sys/netinet/ip_ipsec.c
    trunk/sys/netinet/ip_ipsec.h
    trunk/sys/netinet/ip_mroute.c
    trunk/sys/netinet/ip_mroute.h
    trunk/sys/netinet/ip_options.c
    trunk/sys/netinet/ip_options.h
    trunk/sys/netinet/ip_output.c
    trunk/sys/netinet/ip_var.h
    trunk/sys/netinet/khelp/h_ertt.c
    trunk/sys/netinet/khelp/h_ertt.h
    trunk/sys/netinet/libalias/HISTORY
    trunk/sys/netinet/libalias/alias.c
    trunk/sys/netinet/libalias/alias.h
    trunk/sys/netinet/libalias/alias_cuseeme.c
    trunk/sys/netinet/libalias/alias_db.c
    trunk/sys/netinet/libalias/alias_dummy.c
    trunk/sys/netinet/libalias/alias_ftp.c
    trunk/sys/netinet/libalias/alias_irc.c
    trunk/sys/netinet/libalias/alias_local.h
    trunk/sys/netinet/libalias/alias_mod.c
    trunk/sys/netinet/libalias/alias_mod.h
    trunk/sys/netinet/libalias/alias_nbt.c
    trunk/sys/netinet/libalias/alias_old.c
    trunk/sys/netinet/libalias/alias_pptp.c
    trunk/sys/netinet/libalias/alias_proxy.c
    trunk/sys/netinet/libalias/alias_sctp.c
    trunk/sys/netinet/libalias/alias_sctp.h
    trunk/sys/netinet/libalias/alias_skinny.c
    trunk/sys/netinet/libalias/alias_smedia.c
    trunk/sys/netinet/libalias/alias_util.c
    trunk/sys/netinet/libalias/libalias.3
    trunk/sys/netinet/pim.h
    trunk/sys/netinet/pim_var.h
    trunk/sys/netinet/raw_ip.c
    trunk/sys/netinet/sctp.h
    trunk/sys/netinet/sctp_asconf.c
    trunk/sys/netinet/sctp_asconf.h
    trunk/sys/netinet/sctp_auth.c
    trunk/sys/netinet/sctp_auth.h
    trunk/sys/netinet/sctp_bsd_addr.c
    trunk/sys/netinet/sctp_bsd_addr.h
    trunk/sys/netinet/sctp_cc_functions.c
    trunk/sys/netinet/sctp_constants.h
    trunk/sys/netinet/sctp_crc32.c
    trunk/sys/netinet/sctp_crc32.h
    trunk/sys/netinet/sctp_dtrace_declare.h
    trunk/sys/netinet/sctp_dtrace_define.h
    trunk/sys/netinet/sctp_header.h
    trunk/sys/netinet/sctp_indata.c
    trunk/sys/netinet/sctp_indata.h
    trunk/sys/netinet/sctp_input.c
    trunk/sys/netinet/sctp_input.h
    trunk/sys/netinet/sctp_lock_bsd.h
    trunk/sys/netinet/sctp_os.h
    trunk/sys/netinet/sctp_os_bsd.h
    trunk/sys/netinet/sctp_output.c
    trunk/sys/netinet/sctp_output.h
    trunk/sys/netinet/sctp_pcb.c
    trunk/sys/netinet/sctp_pcb.h
    trunk/sys/netinet/sctp_peeloff.c
    trunk/sys/netinet/sctp_peeloff.h
    trunk/sys/netinet/sctp_ss_functions.c
    trunk/sys/netinet/sctp_structs.h
    trunk/sys/netinet/sctp_sysctl.c
    trunk/sys/netinet/sctp_sysctl.h
    trunk/sys/netinet/sctp_timer.c
    trunk/sys/netinet/sctp_timer.h
    trunk/sys/netinet/sctp_uio.h
    trunk/sys/netinet/sctp_usrreq.c
    trunk/sys/netinet/sctp_var.h
    trunk/sys/netinet/sctputil.c
    trunk/sys/netinet/sctputil.h
    trunk/sys/netinet/siftr.c
    trunk/sys/netinet/tcp.h
    trunk/sys/netinet/tcp_debug.c
    trunk/sys/netinet/tcp_debug.h
    trunk/sys/netinet/tcp_fsm.h
    trunk/sys/netinet/tcp_hostcache.c
    trunk/sys/netinet/tcp_hostcache.h
    trunk/sys/netinet/tcp_input.c
    trunk/sys/netinet/tcp_lro.c
    trunk/sys/netinet/tcp_lro.h
    trunk/sys/netinet/tcp_offload.c
    trunk/sys/netinet/tcp_offload.h
    trunk/sys/netinet/tcp_output.c
    trunk/sys/netinet/tcp_reass.c
    trunk/sys/netinet/tcp_sack.c
    trunk/sys/netinet/tcp_seq.h
    trunk/sys/netinet/tcp_subr.c
    trunk/sys/netinet/tcp_syncache.c
    trunk/sys/netinet/tcp_syncache.h
    trunk/sys/netinet/tcp_timer.c
    trunk/sys/netinet/tcp_timer.h
    trunk/sys/netinet/tcp_timewait.c
    trunk/sys/netinet/tcp_usrreq.c
    trunk/sys/netinet/tcp_var.h
    trunk/sys/netinet/tcpip.h
    trunk/sys/netinet/udp.h
    trunk/sys/netinet/udp_usrreq.c
    trunk/sys/netinet/udp_var.h
    trunk/sys/netinet6/dest6.c
    trunk/sys/netinet6/frag6.c
    trunk/sys/netinet6/icmp6.c
    trunk/sys/netinet6/icmp6.h
    trunk/sys/netinet6/in6.c
    trunk/sys/netinet6/in6.h
    trunk/sys/netinet6/in6_cksum.c
    trunk/sys/netinet6/in6_gif.c
    trunk/sys/netinet6/in6_ifattach.c
    trunk/sys/netinet6/in6_ifattach.h
    trunk/sys/netinet6/in6_mcast.c
    trunk/sys/netinet6/in6_pcb.c
    trunk/sys/netinet6/in6_pcb.h
    trunk/sys/netinet6/in6_pcbgroup.c
    trunk/sys/netinet6/in6_proto.c
    trunk/sys/netinet6/in6_rmx.c
    trunk/sys/netinet6/in6_src.c
    trunk/sys/netinet6/in6_var.h
    trunk/sys/netinet6/ip6.h
    trunk/sys/netinet6/ip6_ecn.h
    trunk/sys/netinet6/ip6_forward.c
    trunk/sys/netinet6/ip6_id.c
    trunk/sys/netinet6/ip6_input.c
    trunk/sys/netinet6/ip6_ipsec.c
    trunk/sys/netinet6/ip6_ipsec.h
    trunk/sys/netinet6/ip6_mroute.c
    trunk/sys/netinet6/ip6_mroute.h
    trunk/sys/netinet6/ip6_output.c
    trunk/sys/netinet6/ip6_var.h
    trunk/sys/netinet6/ip6protosw.h
    trunk/sys/netinet6/mld6.c
    trunk/sys/netinet6/mld6.h
    trunk/sys/netinet6/mld6_var.h
    trunk/sys/netinet6/nd6.c
    trunk/sys/netinet6/nd6.h
    trunk/sys/netinet6/nd6_nbr.c
    trunk/sys/netinet6/nd6_rtr.c
    trunk/sys/netinet6/pim6.h
    trunk/sys/netinet6/pim6_var.h
    trunk/sys/netinet6/raw_ip6.c
    trunk/sys/netinet6/raw_ip6.h
    trunk/sys/netinet6/route6.c
    trunk/sys/netinet6/scope6.c
    trunk/sys/netinet6/scope6_var.h
    trunk/sys/netinet6/sctp6_usrreq.c
    trunk/sys/netinet6/sctp6_var.h
    trunk/sys/netinet6/send.c
    trunk/sys/netinet6/send.h
    trunk/sys/netinet6/tcp6_var.h
    trunk/sys/netinet6/udp6_usrreq.c
    trunk/sys/netinet6/udp6_var.h

Added Paths:
-----------
    trunk/sys/netinet/cc/cc_cdg.c
    trunk/sys/netinet/in_kdtrace.c
    trunk/sys/netinet/in_kdtrace.h
    trunk/sys/netinet/sctp_syscalls.c
    trunk/sys/netinet/tcp_fastopen.c
    trunk/sys/netinet/tcp_fastopen.h
    trunk/sys/netinet/toecore.c
    trunk/sys/netinet/toecore.h
    trunk/sys/netinet/udplite.h
    trunk/sys/netinet6/ip6_gre.c

Property Changed:
----------------
    trunk/sys/netinet/libalias/HISTORY
    trunk/sys/netinet/libalias/libalias.3

Modified: trunk/sys/netinet/accf_data.c
===================================================================
--- trunk/sys/netinet/accf_data.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/accf_data.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/accf_data.c 193272 2009-06-01 21:17:03Z jhb $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/accf_data.c 193272 2009-06-01 21:17:03Z jhb $");
 
 #define ACCEPT_FILTER_MOD
 

Modified: trunk/sys/netinet/accf_dns.c
===================================================================
--- trunk/sys/netinet/accf_dns.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/accf_dns.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -24,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	$FreeBSD: stable/9/sys/netinet/accf_dns.c 193272 2009-06-01 21:17:03Z jhb $
+ *	$FreeBSD: stable/10/sys/netinet/accf_dns.c 193272 2009-06-01 21:17:03Z jhb $
  */
 
 #define ACCEPT_FILTER_MOD

Modified: trunk/sys/netinet/accf_http.c
===================================================================
--- trunk/sys/netinet/accf_http.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/accf_http.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/accf_http.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/accf_http.c 227309 2011-11-07 15:43:11Z ed $");
 
 #define ACCEPT_FILTER_MOD
 

Modified: trunk/sys/netinet/cc/cc.c
===================================================================
--- trunk/sys/netinet/cc/cc.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/cc/cc.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2007-2008
  *	Swinburne University of Technology, Melbourne, Australia.
@@ -47,7 +48,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/cc/cc.c 220560 2011-04-12 08:13:18Z lstewart $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/cc/cc.c 273847 2014-10-30 08:04:48Z hselasky $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -92,33 +93,33 @@
 {
 	char default_cc[TCP_CA_NAME_MAX];
 	struct cc_algo *funcs;
-	int err, found;
+	int error;
 
-	err = found = 0;
+	/* Get the current default: */
+	CC_LIST_RLOCK();
+	strlcpy(default_cc, CC_DEFAULT()->name, sizeof(default_cc));
+	CC_LIST_RUNLOCK();
 
-	if (req->newptr == NULL) {
-		/* Just print the current default. */
-		CC_LIST_RLOCK();
-		strlcpy(default_cc, CC_DEFAULT()->name, TCP_CA_NAME_MAX);
-		CC_LIST_RUNLOCK();
-		err = sysctl_handle_string(oidp, default_cc, 1, req);
-	} else {
-		/* Find algo with specified name and set it to default. */
-		CC_LIST_RLOCK();
-		STAILQ_FOREACH(funcs, &cc_list, entries) {
-			if (strncmp((char *)req->newptr, funcs->name,
-			    TCP_CA_NAME_MAX) == 0) {
-				found = 1;
-				V_default_cc_ptr = funcs;
-			}
-		}
-		CC_LIST_RUNLOCK();
+	error = sysctl_handle_string(oidp, default_cc, sizeof(default_cc), req);
 
-		if (!found)
-			err = ESRCH;
+	/* Check for error or no change */
+	if (error != 0 || req->newptr == NULL)
+		goto done;
+
+	error = ESRCH;
+
+	/* Find algo with specified name and set it to default. */
+	CC_LIST_RLOCK();
+	STAILQ_FOREACH(funcs, &cc_list, entries) {
+		if (strncmp(default_cc, funcs->name, sizeof(default_cc)))
+			continue;
+		V_default_cc_ptr = funcs;
+		error = 0;
+		break;
 	}
-
-	return (err);
+	CC_LIST_RUNLOCK();
+done:
+	return (error);
 }
 
 /*
@@ -166,7 +167,7 @@
 
 	if (!err) {
 		sbuf_finish(s);
-		err = sysctl_handle_string(oidp, sbuf_data(s), 1, req);
+		err = sysctl_handle_string(oidp, sbuf_data(s), 0, req);
 	}
 
 	sbuf_delete(s);

Added: trunk/sys/netinet/cc/cc_cdg.c
===================================================================
--- trunk/sys/netinet/cc/cc_cdg.c	                        (rev 0)
+++ trunk/sys/netinet/cc/cc_cdg.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -0,0 +1,701 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2009-2013
+ * 	Swinburne University of Technology, Melbourne, Australia
+ * All rights reserved.
+ *
+ * This software was developed at the Centre for Advanced Internet
+ * Architectures, Swinburne University of Technology, by David Hayes, made
+ * possible in part by a gift from The Cisco University Research Program Fund,
+ * a corporate advised fund of Silicon Valley Community Foundation. Development
+ * and testing were further assisted by a grant from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * CAIA Delay-Gradient (CDG) congestion control algorithm
+ *
+ * An implemention of the delay-gradient congestion control algorithm proposed
+ * in the following paper:
+ *
+ * D. A. Hayes and G. Armitage, "Revisiting TCP Congestion Control using Delay
+ * Gradients", in IFIP Networking, Valencia, Spain, 9-13 May 2011.
+ *
+ * Developed as part of the NewTCP research project at Swinburne University of
+ * Technology's Centre for Advanced Internet Architectures, Melbourne,
+ * Australia. More details are available at:
+ *   http://caia.swin.edu.au/urp/newtcp/
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/netinet/cc/cc_cdg.c 271690 2014-09-16 21:26:24Z lstewart $");
+
+#include <sys/param.h>
+#include <sys/hhook.h>
+#include <sys/kernel.h>
+#include <sys/khelp.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/queue.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+
+#include <net/if.h>
+#include <net/vnet.h>
+
+#include <netinet/cc.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+
+#include <netinet/cc/cc_module.h>
+
+#include <netinet/khelp/h_ertt.h>
+
+#include <vm/uma.h>
+
+#define	CDG_VERSION "0.1"
+
+#define	CAST_PTR_INT(X) (*((int*)(X)))
+
+/* Private delay-gradient induced congestion control signal. */
+#define	CC_CDG_DELAY 0x01000000
+
+/* NewReno window deflation factor on loss (as a percentage). */
+#define	RENO_BETA 50
+
+/* Queue states. */
+#define	CDG_Q_EMPTY	1
+#define	CDG_Q_RISING	2
+#define	CDG_Q_FALLING	3
+#define	CDG_Q_FULL	4
+#define	CDG_Q_UNKNOWN	9999
+
+/* Number of bit shifts used in probexp lookup table. */
+#define	EXP_PREC 15
+
+/* Largest gradient represented in probexp lookup table. */
+#define	MAXGRAD 5
+
+/*
+ * Delay Precision Enhance - number of bit shifts used for qtrend related
+ * integer arithmetic precision.
+ */
+#define	D_P_E 7
+
+struct qdiff_sample {
+	long qdiff;
+	STAILQ_ENTRY(qdiff_sample) qdiff_lnk;
+};
+
+struct cdg {
+	long max_qtrend;
+	long min_qtrend;
+	STAILQ_HEAD(minrtts_head, qdiff_sample) qdiffmin_q;
+	STAILQ_HEAD(maxrtts_head, qdiff_sample) qdiffmax_q;
+	long window_incr;
+	/* rttcount for window increase when in congestion avoidance */
+	long rtt_count;
+	/* maximum measured rtt within an rtt period */
+	int maxrtt_in_rtt;
+	/* maximum measured rtt within prev rtt period */
+	int maxrtt_in_prevrtt;
+	/* minimum measured rtt within an rtt period */
+	int minrtt_in_rtt;
+	/* minimum measured rtt within prev rtt period */
+	int minrtt_in_prevrtt;
+	/* consecutive congestion episode counter */
+	uint32_t consec_cong_cnt;
+	/* when tracking a new reno type loss window */
+	uint32_t shadow_w;
+	/* maximum number of samples in the moving average queue */
+	int sample_q_size;
+	/* number of samples in the moving average queue */
+	int num_samples;
+	/* estimate of the queue state of the path */
+	int queue_state;
+};
+
+/*
+ * Lookup table for:
+ *   (1 - exp(-x)) << EXP_PREC, where x = [0,MAXGRAD] in 2^-7 increments
+ *
+ * Note: probexp[0] is set to 10 (not 0) as a safety for very low increase
+ * gradients.
+ */
+static const int probexp[641] = {
+   10,255,508,759,1008,1255,1501,1744,1985,2225,2463,2698,2932,3165,3395,3624,
+   3850,4075,4299,4520,4740,4958,5175,5389,5602,5814,6024,6232,6438,6643,6846,
+   7048,7248,7447,7644,7839,8033,8226,8417,8606,8794,8981,9166,9350,9532,9713,
+   9892,10070,10247,10422,10596,10769,10940,11110,11278,11445,11611,11776,11939,
+   12101,12262,12422,12580,12737,12893,13048,13201,13354,13505,13655,13803,13951,
+   14097,14243,14387,14530,14672,14813,14952,15091,15229,15365,15500,15635,15768,
+   15900,16032,16162,16291,16419,16547,16673,16798,16922,17046,17168,17289,17410,
+   17529,17648,17766,17882,17998,18113,18227,18340,18453,18564,18675,18784,18893,
+   19001,19108,19215,19320,19425,19529,19632,19734,19835,19936,20036,20135,20233,
+   20331,20427,20523,20619,20713,20807,20900,20993,21084,21175,21265,21355,21444,
+   21532,21619,21706,21792,21878,21962,22046,22130,22213,22295,22376,22457,22537,
+   22617,22696,22774,22852,22929,23006,23082,23157,23232,23306,23380,23453,23525,
+   23597,23669,23739,23810,23879,23949,24017,24085,24153,24220,24286,24352,24418,
+   24483,24547,24611,24675,24738,24800,24862,24924,24985,25045,25106,25165,25224,
+   25283,25341,25399,25456,25513,25570,25626,25681,25737,25791,25846,25899,25953,
+   26006,26059,26111,26163,26214,26265,26316,26366,26416,26465,26514,26563,26611,
+   26659,26707,26754,26801,26847,26893,26939,26984,27029,27074,27118,27162,27206,
+   27249,27292,27335,27377,27419,27460,27502,27543,27583,27624,27664,27703,27743,
+   27782,27821,27859,27897,27935,27973,28010,28047,28084,28121,28157,28193,28228,
+   28263,28299,28333,28368,28402,28436,28470,28503,28536,28569,28602,28634,28667,
+   28699,28730,28762,28793,28824,28854,28885,28915,28945,28975,29004,29034,29063,
+   29092,29120,29149,29177,29205,29232,29260,29287,29314,29341,29368,29394,29421,
+   29447,29472,29498,29524,29549,29574,29599,29623,29648,29672,29696,29720,29744,
+   29767,29791,29814,29837,29860,29882,29905,29927,29949,29971,29993,30014,30036,
+   30057,30078,30099,30120,30141,30161,30181,30201,30221,30241,30261,30280,30300,
+   30319,30338,30357,30376,30394,30413,30431,30449,30467,30485,30503,30521,30538,
+   30555,30573,30590,30607,30624,30640,30657,30673,30690,30706,30722,30738,30753,
+   30769,30785,30800,30815,30831,30846,30861,30876,30890,30905,30919,30934,30948,
+   30962,30976,30990,31004,31018,31031,31045,31058,31072,31085,31098,31111,31124,
+   31137,31149,31162,31174,31187,31199,31211,31223,31235,31247,31259,31271,31283,
+   31294,31306,31317,31328,31339,31351,31362,31373,31383,31394,31405,31416,31426,
+   31436,31447,31457,31467,31477,31487,31497,31507,31517,31527,31537,31546,31556,
+   31565,31574,31584,31593,31602,31611,31620,31629,31638,31647,31655,31664,31673,
+   31681,31690,31698,31706,31715,31723,31731,31739,31747,31755,31763,31771,31778,
+   31786,31794,31801,31809,31816,31824,31831,31838,31846,31853,31860,31867,31874,
+   31881,31888,31895,31902,31908,31915,31922,31928,31935,31941,31948,31954,31960,
+   31967,31973,31979,31985,31991,31997,32003,32009,32015,32021,32027,32033,32038,
+   32044,32050,32055,32061,32066,32072,32077,32083,32088,32093,32098,32104,32109,
+   32114,32119,32124,32129,32134,32139,32144,32149,32154,32158,32163,32168,32173,
+   32177,32182,32186,32191,32195,32200,32204,32209,32213,32217,32222,32226,32230,
+   32234,32238,32242,32247,32251,32255,32259,32263,32267,32270,32274,32278,32282,
+   32286,32290,32293,32297,32301,32304,32308,32311,32315,32318,32322,32325,32329,
+   32332,32336,32339,32342,32346,32349,32352,32356,32359,32362,32365,32368,32371,
+   32374,32377,32381,32384,32387,32389,32392,32395,32398,32401,32404,32407,32410,
+   32412,32415,32418,32421,32423,32426,32429,32431,32434,32437,32439,32442,32444,
+   32447,32449,32452,32454,32457,32459,32461,32464,32466,32469,32471,32473,32476,
+   32478,32480,32482,32485,32487,32489,32491,32493,32495,32497,32500,32502,32504,
+   32506,32508,32510,32512,32514,32516,32518,32520,32522,32524,32526,32527,32529,
+   32531,32533,32535,32537,32538,32540,32542,32544,32545,32547};
+
+static uma_zone_t qdiffsample_zone;
+
+static MALLOC_DEFINE(M_CDG, "cdg data",
+  "Per connection data required for the CDG congestion control algorithm");
+
+static int ertt_id;
+
+static VNET_DEFINE(uint32_t, cdg_alpha_inc);
+static VNET_DEFINE(uint32_t, cdg_beta_delay);
+static VNET_DEFINE(uint32_t, cdg_beta_loss);
+static VNET_DEFINE(uint32_t, cdg_smoothing_factor);
+static VNET_DEFINE(uint32_t, cdg_exp_backoff_scale);
+static VNET_DEFINE(uint32_t, cdg_consec_cong);
+static VNET_DEFINE(uint32_t, cdg_hold_backoff);
+#define	V_cdg_alpha_inc		VNET(cdg_alpha_inc)
+#define	V_cdg_beta_delay	VNET(cdg_beta_delay)
+#define	V_cdg_beta_loss		VNET(cdg_beta_loss)
+#define	V_cdg_smoothing_factor	VNET(cdg_smoothing_factor)
+#define	V_cdg_exp_backoff_scale	VNET(cdg_exp_backoff_scale)
+#define	V_cdg_consec_cong	VNET(cdg_consec_cong)
+#define	V_cdg_hold_backoff	VNET(cdg_hold_backoff)
+
+/* Function prototypes. */
+static int cdg_mod_init(void);
+static int cdg_mod_destroy(void);
+static void cdg_conn_init(struct cc_var *ccv);
+static int cdg_cb_init(struct cc_var *ccv);
+static void cdg_cb_destroy(struct cc_var *ccv);
+static void cdg_cong_signal(struct cc_var *ccv, uint32_t signal_type);
+static void cdg_ack_received(struct cc_var *ccv, uint16_t ack_type);
+
+struct cc_algo cdg_cc_algo = {
+	.name = "cdg",
+	.mod_init = cdg_mod_init,
+	.ack_received = cdg_ack_received,
+	.cb_destroy = cdg_cb_destroy,
+	.cb_init = cdg_cb_init,
+	.conn_init = cdg_conn_init,
+	.cong_signal = cdg_cong_signal,
+	.mod_destroy = cdg_mod_destroy
+};
+
+/* Vnet created and being initialised. */
+static void
+cdg_init_vnet(const void *unused __unused)
+{
+
+	V_cdg_alpha_inc = 0;
+	V_cdg_beta_delay = 70;
+	V_cdg_beta_loss = 50;
+	V_cdg_smoothing_factor = 8;
+	V_cdg_exp_backoff_scale = 3;
+	V_cdg_consec_cong = 5;
+	V_cdg_hold_backoff = 5;
+}
+
+static int
+cdg_mod_init(void)
+{
+	VNET_ITERATOR_DECL(v);
+
+	ertt_id = khelp_get_id("ertt");
+	if (ertt_id <= 0)
+		return (EINVAL);
+
+	qdiffsample_zone = uma_zcreate("cdg_qdiffsample",
+	    sizeof(struct qdiff_sample), NULL, NULL, NULL, NULL, 0, 0);
+
+	VNET_LIST_RLOCK();
+	VNET_FOREACH(v) {
+		CURVNET_SET(v);
+		cdg_init_vnet(NULL);
+		CURVNET_RESTORE();
+	}
+	VNET_LIST_RUNLOCK();
+
+	cdg_cc_algo.post_recovery = newreno_cc_algo.post_recovery;
+	cdg_cc_algo.after_idle = newreno_cc_algo.after_idle;
+
+	return (0);
+}
+
+static int
+cdg_mod_destroy(void)
+{
+
+	uma_zdestroy(qdiffsample_zone);
+	return (0);
+}
+
+static int
+cdg_cb_init(struct cc_var *ccv)
+{
+	struct cdg *cdg_data;
+
+	cdg_data = malloc(sizeof(struct cdg), M_CDG, M_NOWAIT);
+	if (cdg_data == NULL)
+		return (ENOMEM);
+
+	cdg_data->shadow_w = 0;
+	cdg_data->max_qtrend = 0;
+	cdg_data->min_qtrend = 0;
+	cdg_data->queue_state = CDG_Q_UNKNOWN;
+	cdg_data->maxrtt_in_rtt = 0;
+	cdg_data->maxrtt_in_prevrtt = 0;
+	cdg_data->minrtt_in_rtt = INT_MAX;
+	cdg_data->minrtt_in_prevrtt = 0;
+	cdg_data->window_incr = 0;
+	cdg_data->rtt_count = 0;
+	cdg_data->consec_cong_cnt = 0;
+	cdg_data->sample_q_size = V_cdg_smoothing_factor;
+	cdg_data->num_samples = 0;
+	STAILQ_INIT(&cdg_data->qdiffmin_q);
+	STAILQ_INIT(&cdg_data->qdiffmax_q);
+
+	ccv->cc_data = cdg_data;
+
+	return (0);
+}
+
+static void
+cdg_conn_init(struct cc_var *ccv)
+{
+	struct cdg *cdg_data = ccv->cc_data;
+
+	/*
+	 * Initialise the shadow_cwnd in case we are competing with loss based
+	 * flows from the start
+	 */
+	cdg_data->shadow_w = CCV(ccv, snd_cwnd);
+}
+
+static void
+cdg_cb_destroy(struct cc_var *ccv)
+{
+	struct cdg *cdg_data;
+	struct qdiff_sample *qds, *qds_n;
+
+	cdg_data = ccv->cc_data;
+
+	qds = STAILQ_FIRST(&cdg_data->qdiffmin_q);
+	while (qds != NULL) {
+		qds_n = STAILQ_NEXT(qds, qdiff_lnk);
+		uma_zfree(qdiffsample_zone,qds);
+		qds = qds_n;
+	}
+
+	qds = STAILQ_FIRST(&cdg_data->qdiffmax_q);
+	while (qds != NULL) {
+		qds_n = STAILQ_NEXT(qds, qdiff_lnk);
+		uma_zfree(qdiffsample_zone,qds);
+		qds = qds_n;
+	}
+
+	free(ccv->cc_data, M_CDG);
+}
+
+static int
+cdg_beta_handler(SYSCTL_HANDLER_ARGS)
+{
+
+	if (req->newptr != NULL &&
+	    (CAST_PTR_INT(req->newptr) == 0 || CAST_PTR_INT(req->newptr) > 100))
+		return (EINVAL);
+
+	return (sysctl_handle_int(oidp, arg1, arg2, req));
+}
+
+static int
+cdg_exp_backoff_scale_handler(SYSCTL_HANDLER_ARGS)
+{
+
+	if (req->newptr != NULL && CAST_PTR_INT(req->newptr) < 1)
+		return (EINVAL);
+
+	return (sysctl_handle_int(oidp, arg1, arg2, req));
+}
+
+static inline unsigned long
+cdg_window_decrease(struct cc_var *ccv, unsigned long owin, unsigned int beta)
+{
+
+	return ((ulmin(CCV(ccv, snd_wnd), owin) * beta) / 100);
+}
+
+/*
+ * Window increase function
+ * This window increase function is independent of the initial window size
+ * to ensure small window flows are not discriminated against (i.e. fairness).
+ * It increases at 1pkt/rtt like Reno for alpha_inc rtts, and then 2pkts/rtt for
+ * the next alpha_inc rtts, etc.
+ */
+static void
+cdg_window_increase(struct cc_var *ccv, int new_measurement)
+{
+	struct cdg *cdg_data;
+	int incr, s_w_incr;
+
+	cdg_data = ccv->cc_data;
+	incr = s_w_incr = 0;
+
+	if (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh)) {
+		/* Slow start. */
+		incr = CCV(ccv, t_maxseg);
+		s_w_incr = incr;
+		cdg_data->window_incr = cdg_data->rtt_count = 0;
+	} else {
+		/* Congestion avoidance. */
+		if (new_measurement) {
+			s_w_incr = CCV(ccv, t_maxseg);
+			if (V_cdg_alpha_inc == 0) {
+				incr = CCV(ccv, t_maxseg);
+			} else {
+				if (++cdg_data->rtt_count >= V_cdg_alpha_inc) {
+					cdg_data->window_incr++;
+					cdg_data->rtt_count = 0;
+				}
+				incr = CCV(ccv, t_maxseg) *
+				    cdg_data->window_incr;
+			}
+		}
+	}
+
+	if (cdg_data->shadow_w > 0)
+		cdg_data->shadow_w = ulmin(cdg_data->shadow_w + s_w_incr,
+		    TCP_MAXWIN << CCV(ccv, snd_scale));
+
+	CCV(ccv, snd_cwnd) = ulmin(CCV(ccv, snd_cwnd) + incr,
+	    TCP_MAXWIN << CCV(ccv, snd_scale));
+}
+
+static void
+cdg_cong_signal(struct cc_var *ccv, uint32_t signal_type)
+{
+	struct cdg *cdg_data = ccv->cc_data;
+
+	switch(signal_type) {
+	case CC_CDG_DELAY:
+		CCV(ccv, snd_ssthresh) = cdg_window_decrease(ccv,
+		    CCV(ccv, snd_cwnd), V_cdg_beta_delay);
+		CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh);
+		CCV(ccv, snd_recover) = CCV(ccv, snd_max);
+		cdg_data->window_incr = cdg_data->rtt_count = 0;
+		ENTER_CONGRECOVERY(CCV(ccv, t_flags));
+		break;
+	case CC_NDUPACK:
+		/*
+		 * If already responding to congestion OR we have guessed no
+		 * queue in the path is full.
+		 */
+		if (IN_CONGRECOVERY(CCV(ccv, t_flags)) ||
+		    cdg_data->queue_state < CDG_Q_FULL) {
+			CCV(ccv, snd_ssthresh) = CCV(ccv, snd_cwnd);
+			CCV(ccv, snd_recover) = CCV(ccv, snd_max);
+		} else {
+			/*
+			 * Loss is likely to be congestion related. We have
+			 * inferred a queue full state, so have shadow window
+			 * react to loss as NewReno would.
+			 */
+			if (cdg_data->shadow_w > 0)
+				cdg_data->shadow_w = cdg_window_decrease(ccv,
+				    cdg_data->shadow_w, RENO_BETA);
+
+			CCV(ccv, snd_ssthresh) = ulmax(cdg_data->shadow_w,
+			    cdg_window_decrease(ccv, CCV(ccv, snd_cwnd),
+			    V_cdg_beta_loss));
+
+			cdg_data->window_incr = cdg_data->rtt_count = 0;
+		}
+		ENTER_RECOVERY(CCV(ccv, t_flags));
+		break;
+	default:
+		newreno_cc_algo.cong_signal(ccv, signal_type);
+		break;
+	}
+}
+
+/*
+ * Using a negative exponential probabilistic backoff so that sources with
+ * varying RTTs which share the same link will, on average, have the same
+ * probability of backoff over time.
+ *
+ * Prob_backoff = 1 - exp(-qtrend / V_cdg_exp_backoff_scale), where
+ * V_cdg_exp_backoff_scale is the average qtrend for the exponential backoff.
+ */
+static inline int
+prob_backoff(long qtrend)
+{
+	int backoff, idx, p;
+
+	backoff = (qtrend > ((MAXGRAD * V_cdg_exp_backoff_scale) << D_P_E));
+
+	if (!backoff) {
+		if (V_cdg_exp_backoff_scale > 1)
+			idx = (qtrend + V_cdg_exp_backoff_scale / 2) /
+			    V_cdg_exp_backoff_scale;
+		else
+			idx = qtrend;
+
+		/* Backoff probability proportional to rate of queue growth. */
+		p = (INT_MAX / (1 << EXP_PREC)) * probexp[idx];
+		backoff = (random() < p);
+	}
+
+	return (backoff);
+}
+
+static inline void
+calc_moving_average(struct cdg *cdg_data, long qdiff_max, long qdiff_min)
+{
+	struct qdiff_sample *qds;
+
+	++cdg_data->num_samples;
+	if (cdg_data->num_samples > cdg_data->sample_q_size) {
+		/* Minimum RTT. */
+		qds = STAILQ_FIRST(&cdg_data->qdiffmin_q);
+		cdg_data->min_qtrend =  cdg_data->min_qtrend +
+		    (qdiff_min - qds->qdiff) / cdg_data->sample_q_size;
+		STAILQ_REMOVE_HEAD(&cdg_data->qdiffmin_q, qdiff_lnk);
+		qds->qdiff = qdiff_min;
+		STAILQ_INSERT_TAIL(&cdg_data->qdiffmin_q, qds, qdiff_lnk);
+
+		/* Maximum RTT. */
+		qds = STAILQ_FIRST(&cdg_data->qdiffmax_q);
+		cdg_data->max_qtrend =  cdg_data->max_qtrend +
+		    (qdiff_max - qds->qdiff) / cdg_data->sample_q_size;
+		STAILQ_REMOVE_HEAD(&cdg_data->qdiffmax_q, qdiff_lnk);
+		qds->qdiff = qdiff_max;
+		STAILQ_INSERT_TAIL(&cdg_data->qdiffmax_q, qds, qdiff_lnk);
+		--cdg_data->num_samples;
+	} else {
+		qds = uma_zalloc(qdiffsample_zone, M_NOWAIT);
+		if (qds != NULL) {
+			cdg_data->min_qtrend = cdg_data->min_qtrend +
+			    qdiff_min / cdg_data->sample_q_size;
+			qds->qdiff = qdiff_min;
+			STAILQ_INSERT_TAIL(&cdg_data->qdiffmin_q, qds,
+			    qdiff_lnk);
+		}
+
+		qds = uma_zalloc(qdiffsample_zone, M_NOWAIT);
+		if (qds) {
+			cdg_data->max_qtrend = cdg_data->max_qtrend +
+			    qdiff_max / cdg_data->sample_q_size;
+			qds->qdiff = qdiff_max;
+			STAILQ_INSERT_TAIL(&cdg_data->qdiffmax_q, qds,
+			    qdiff_lnk);
+		}
+	}
+}
+
+static void
+cdg_ack_received(struct cc_var *ccv, uint16_t ack_type)
+{
+	struct cdg *cdg_data;
+	struct ertt *e_t;
+	long qdiff_max, qdiff_min;
+	int congestion, new_measurement, slowstart;
+
+	cdg_data = ccv->cc_data;
+	e_t = (struct ertt *)khelp_get_osd(CCV(ccv, osd), ertt_id);
+	new_measurement = e_t->flags & ERTT_NEW_MEASUREMENT;
+	congestion = 0;
+	cdg_data->maxrtt_in_rtt = imax(e_t->rtt, cdg_data->maxrtt_in_rtt);
+	cdg_data->minrtt_in_rtt = imin(e_t->rtt, cdg_data->minrtt_in_rtt);
+
+	if (new_measurement) {
+		slowstart = (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh));
+		/*
+		 * Update smoothed gradient measurements. Since we are only
+		 * using one measurement per RTT, use max or min rtt_in_rtt.
+		 * This is also less noisy than a sample RTT measurement. Max
+		 * RTT measurements can have trouble due to OS issues.
+		 */
+		if (cdg_data->maxrtt_in_prevrtt) {
+			qdiff_max = ((long)(cdg_data->maxrtt_in_rtt -
+			    cdg_data->maxrtt_in_prevrtt) << D_P_E );
+			qdiff_min = ((long)(cdg_data->minrtt_in_rtt -
+			    cdg_data->minrtt_in_prevrtt) << D_P_E );
+
+			calc_moving_average(cdg_data, qdiff_max, qdiff_min);
+
+			/* Probabilistic backoff with respect to gradient. */
+			if (slowstart && qdiff_min > 0)
+				congestion = prob_backoff(qdiff_min);
+			else if (cdg_data->min_qtrend > 0)
+				congestion = prob_backoff(cdg_data->min_qtrend);
+			else if (slowstart && qdiff_max > 0)
+				congestion = prob_backoff(qdiff_max);
+			else if (cdg_data->max_qtrend > 0)
+				congestion = prob_backoff(cdg_data->max_qtrend);
+			
+			/* Update estimate of queue state. */
+			if (cdg_data->min_qtrend > 0 &&
+			    cdg_data->max_qtrend <= 0) {
+				cdg_data->queue_state = CDG_Q_FULL;
+			} else if (cdg_data->min_qtrend >= 0 &&
+			    cdg_data->max_qtrend < 0) {
+				cdg_data->queue_state = CDG_Q_EMPTY;
+				cdg_data->shadow_w = 0;
+			} else if (cdg_data->min_qtrend > 0 &&
+			    cdg_data->max_qtrend > 0) {
+				cdg_data->queue_state = CDG_Q_RISING;
+			} else if (cdg_data->min_qtrend < 0 &&
+			    cdg_data->max_qtrend < 0) {
+				cdg_data->queue_state = CDG_Q_FALLING;
+			}
+
+			if (cdg_data->min_qtrend < 0 ||
+			    cdg_data->max_qtrend < 0)
+				cdg_data->consec_cong_cnt = 0;
+		}
+
+		cdg_data->minrtt_in_prevrtt = cdg_data->minrtt_in_rtt;
+		cdg_data->minrtt_in_rtt = INT_MAX;
+		cdg_data->maxrtt_in_prevrtt = cdg_data->maxrtt_in_rtt;
+		cdg_data->maxrtt_in_rtt = 0;
+		e_t->flags &= ~ERTT_NEW_MEASUREMENT;
+	}
+
+	if (congestion) {
+		cdg_data->consec_cong_cnt++;
+		if (!IN_RECOVERY(CCV(ccv, t_flags))) {
+			if (cdg_data->consec_cong_cnt <= V_cdg_consec_cong)
+				cdg_cong_signal(ccv, CC_CDG_DELAY);
+			else
+				/*
+				 * We have been backing off but the queue is not
+				 * falling. Assume we are competing with
+				 * loss-based flows and don't back off for the
+				 * next V_cdg_hold_backoff RTT periods.
+				 */
+				if (cdg_data->consec_cong_cnt >=
+				    V_cdg_consec_cong + V_cdg_hold_backoff)
+					cdg_data->consec_cong_cnt = 0;
+
+			/* Won't see effect until 2nd RTT. */
+			cdg_data->maxrtt_in_prevrtt = 0;
+			/*
+			 * Resync shadow window in case we are competing with a
+			 * loss based flow
+			 */
+			cdg_data->shadow_w = ulmax(CCV(ccv, snd_cwnd),
+			    cdg_data->shadow_w);
+		}
+	} else if (ack_type == CC_ACK)
+		cdg_window_increase(ccv, new_measurement);
+}
+
+/* When a vnet is created and being initialised, init the per-stack CDG vars. */
+VNET_SYSINIT(cdg_init_vnet, SI_SUB_PROTO_BEGIN, SI_ORDER_FIRST,
+    cdg_init_vnet, NULL);
+
+SYSCTL_DECL(_net_inet_tcp_cc_cdg);
+SYSCTL_NODE(_net_inet_tcp_cc, OID_AUTO, cdg, CTLFLAG_RW, NULL,
+    "CAIA delay-gradient congestion control related settings");
+
+SYSCTL_STRING(_net_inet_tcp_cc_cdg, OID_AUTO, version,
+    CTLFLAG_RD, CDG_VERSION, sizeof(CDG_VERSION) - 1,
+    "Current algorithm/implementation version number");
+
+SYSCTL_VNET_UINT(_net_inet_tcp_cc_cdg, OID_AUTO, alpha_inc,
+    CTLFLAG_RW, &VNET_NAME(cdg_alpha_inc), 0,
+    "Increment the window increase factor alpha by 1 MSS segment every "
+    "alpha_inc RTTs during congestion avoidance mode.");
+
+SYSCTL_VNET_PROC(_net_inet_tcp_cc_cdg, OID_AUTO, beta_delay,
+    CTLTYPE_UINT|CTLFLAG_RW, &VNET_NAME(cdg_beta_delay), 70,
+    &cdg_beta_handler, "IU",
+    "Delay-based window decrease factor as a percentage "
+    "(on delay-based backoff, w = w * beta_delay / 100)");
+
+SYSCTL_VNET_PROC(_net_inet_tcp_cc_cdg, OID_AUTO, beta_loss,
+    CTLTYPE_UINT|CTLFLAG_RW, &VNET_NAME(cdg_beta_loss), 50,
+    &cdg_beta_handler, "IU",
+    "Loss-based window decrease factor as a percentage "
+    "(on loss-based backoff, w = w * beta_loss / 100)");
+
+SYSCTL_VNET_PROC(_net_inet_tcp_cc_cdg, OID_AUTO, exp_backoff_scale,
+    CTLTYPE_UINT|CTLFLAG_RW, &VNET_NAME(cdg_exp_backoff_scale), 2,
+    &cdg_exp_backoff_scale_handler, "IU",
+    "Scaling parameter for the probabilistic exponential backoff");
+
+SYSCTL_VNET_UINT(_net_inet_tcp_cc_cdg,  OID_AUTO, smoothing_factor,
+    CTLFLAG_RW, &VNET_NAME(cdg_smoothing_factor), 8,
+    "Number of samples used for moving average smoothing (0 = no smoothing)");
+
+SYSCTL_VNET_UINT(_net_inet_tcp_cc_cdg, OID_AUTO, loss_compete_consec_cong,
+    CTLFLAG_RW, &VNET_NAME(cdg_consec_cong), 5,
+    "Number of consecutive delay-gradient based congestion episodes which will "
+    "trigger loss based CC compatibility");
+
+SYSCTL_VNET_UINT(_net_inet_tcp_cc_cdg, OID_AUTO, loss_compete_hold_backoff,
+    CTLFLAG_RW, &VNET_NAME(cdg_hold_backoff), 5,
+    "Number of consecutive delay-gradient based congestion episodes to hold "
+    "the window backoff for loss based CC compatibility");
+
+DECLARE_CC_MODULE(cdg, &cdg_cc_algo);
+
+MODULE_DEPEND(cdg, ertt, 1, 1, 1);


Property changes on: trunk/sys/netinet/cc/cc_cdg.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/netinet/cc/cc_chd.c
===================================================================
--- trunk/sys/netinet/cc/cc_chd.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/cc/cc_chd.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2009-2010
  *	Swinburne University of Technology, Melbourne, Australia
@@ -51,7 +52,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/cc/cc_chd.c 220592 2011-04-13 11:28:46Z pluknet $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/cc/cc_chd.c 273736 2014-10-27 14:38:00Z hselasky $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -484,11 +485,11 @@
     "IU", "Queueing congestion threshold in ticks");
 
 SYSCTL_VNET_UINT(_net_inet_tcp_cc_chd, OID_AUTO, queue_min,
-    CTLTYPE_UINT|CTLFLAG_RW, &VNET_NAME(chd_qmin), 5,
+    CTLFLAG_RW, &VNET_NAME(chd_qmin), 5,
     "Minimum queueing delay threshold in ticks");
 
 SYSCTL_VNET_UINT(_net_inet_tcp_cc_chd,  OID_AUTO, use_max,
-    CTLTYPE_UINT|CTLFLAG_RW, &VNET_NAME(chd_use_max), 1,
+    CTLFLAG_RW, &VNET_NAME(chd_use_max), 1,
     "Use the maximum RTT seen within the measurement period (RTT) "
     "as the basic delay measurement for the algorithm.");
 

Modified: trunk/sys/netinet/cc/cc_cubic.c
===================================================================
--- trunk/sys/netinet/cc/cc_cubic.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/cc/cc_cubic.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2008-2010 Lawrence Stewart <lstewart at freebsd.org>
  * Copyright (c) 2010 The FreeBSD Foundation
@@ -46,7 +47,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/cc/cc_cubic.c 220592 2011-04-13 11:28:46Z pluknet $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/cc/cc_cubic.c 293711 2016-01-11 23:37:31Z hiren $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -299,8 +300,10 @@
 cubic_post_recovery(struct cc_var *ccv)
 {
 	struct cubic *cubic_data;
+	int pipe;
 
 	cubic_data = ccv->cc_data;
+	pipe = 0;
 
 	/* Fast convergence heuristic. */
 	if (cubic_data->max_cwnd < cubic_data->prev_max_cwnd)
@@ -315,11 +318,14 @@
 		 *
 		 * XXXLAS: Find a way to do this without needing curack
 		 */
-		if (SEQ_GT(ccv->curack + CCV(ccv, snd_ssthresh),
-		    CCV(ccv, snd_max)))
-			CCV(ccv, snd_cwnd) = CCV(ccv, snd_max) - ccv->curack +
-			    CCV(ccv, t_maxseg);
+		if (V_tcp_do_rfc6675_pipe)
+			pipe = tcp_compute_pipe(ccv->ccvc.tcp);
 		else
+			pipe = CCV(ccv, snd_max) - ccv->curack;
+
+		if (pipe < CCV(ccv, snd_ssthresh))
+			CCV(ccv, snd_cwnd) = pipe + CCV(ccv, t_maxseg);
+		else
 			/* Update cwnd based on beta and adjusted max_cwnd. */
 			CCV(ccv, snd_cwnd) = max(1, ((CUBIC_BETA *
 			    cubic_data->max_cwnd) >> CUBIC_SHIFT));

Modified: trunk/sys/netinet/cc/cc_cubic.h
===================================================================
--- trunk/sys/netinet/cc/cc_cubic.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/cc/cc_cubic.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2008-2010 Lawrence Stewart <lstewart at freebsd.org>
  * Copyright (c) 2010 The FreeBSD Foundation
@@ -33,7 +34,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/netinet/cc/cc_cubic.h 220560 2011-04-12 08:13:18Z lstewart $
+ * $FreeBSD: stable/10/sys/netinet/cc/cc_cubic.h 220560 2011-04-12 08:13:18Z lstewart $
  */
 
 #ifndef _NETINET_CC_CUBIC_H_

Modified: trunk/sys/netinet/cc/cc_hd.c
===================================================================
--- trunk/sys/netinet/cc/cc_hd.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/cc/cc_hd.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2009-2010
  *	Swinburne University of Technology, Melbourne, Australia
@@ -52,7 +53,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/cc/cc_hd.c 220560 2011-04-12 08:13:18Z lstewart $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/cc/cc_hd.c 220560 2011-04-12 08:13:18Z lstewart $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>

Modified: trunk/sys/netinet/cc/cc_htcp.c
===================================================================
--- trunk/sys/netinet/cc/cc_htcp.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/cc/cc_htcp.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2007-2008
  * 	Swinburne University of Technology, Melbourne, Australia
@@ -48,7 +49,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/cc/cc_htcp.c 220592 2011-04-13 11:28:46Z pluknet $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/cc/cc_htcp.c 220592 2011-04-13 11:28:46Z pluknet $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>

Modified: trunk/sys/netinet/cc/cc_module.h
===================================================================
--- trunk/sys/netinet/cc/cc_module.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/cc/cc_module.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2009-2010 Lawrence Stewart <lstewart at freebsd.org>
  * All rights reserved.
@@ -28,7 +29,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/netinet/cc/cc_module.h 220560 2011-04-12 08:13:18Z lstewart $
+ * $FreeBSD: stable/10/sys/netinet/cc/cc_module.h 220560 2011-04-12 08:13:18Z lstewart $
  */
 
 /*

Modified: trunk/sys/netinet/cc/cc_newreno.c
===================================================================
--- trunk/sys/netinet/cc/cc_newreno.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/cc/cc_newreno.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
  *	The Regents of the University of California.
@@ -49,7 +50,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/cc/cc_newreno.c 220560 2011-04-12 08:13:18Z lstewart $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/cc/cc_newreno.c 293711 2016-01-11 23:37:31Z hiren $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -214,6 +215,9 @@
 static void
 newreno_post_recovery(struct cc_var *ccv)
 {
+	int pipe;
+	pipe = 0;
+
 	if (IN_FASTRECOVERY(CCV(ccv, t_flags))) {
 		/*
 		 * Fast recovery will conclude after returning from this
@@ -224,11 +228,14 @@
 		 *
 		 * XXXLAS: Find a way to do this without needing curack
 		 */
-		if (SEQ_GT(ccv->curack + CCV(ccv, snd_ssthresh),
-		    CCV(ccv, snd_max)))
-			CCV(ccv, snd_cwnd) = CCV(ccv, snd_max) -
-			ccv->curack + CCV(ccv, t_maxseg);
+		if (V_tcp_do_rfc6675_pipe)
+			pipe = tcp_compute_pipe(ccv->ccvc.tcp);
 		else
+			pipe = CCV(ccv, snd_max) - ccv->curack;
+
+		if (pipe < CCV(ccv, snd_ssthresh))
+			CCV(ccv, snd_cwnd) = pipe + CCV(ccv, t_maxseg);
+		else
 			CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh);
 	}
 }

Modified: trunk/sys/netinet/cc/cc_vegas.c
===================================================================
--- trunk/sys/netinet/cc/cc_vegas.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/cc/cc_vegas.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2009-2010
  *	Swinburne University of Technology, Melbourne, Australia
@@ -54,7 +55,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/cc/cc_vegas.c 220592 2011-04-13 11:28:46Z pluknet $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/cc/cc_vegas.c 220592 2011-04-13 11:28:46Z pluknet $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>

Modified: trunk/sys/netinet/cc.h
===================================================================
--- trunk/sys/netinet/cc.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/cc.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -36,7 +36,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/netinet/cc.h 220560 2011-04-12 08:13:18Z lstewart $
+ * $FreeBSD: stable/10/sys/netinet/cc.h 220560 2011-04-12 08:13:18Z lstewart $
  */
 
 /*

Modified: trunk/sys/netinet/icmp6.h
===================================================================
--- trunk/sys/netinet/icmp6.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/icmp6.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,5 +1,5 @@
 /* $MidnightBSD$ */
-/*	$FreeBSD: stable/9/sys/netinet/icmp6.h 235705 2012-05-20 23:33:10Z bz $	*/
+/*	$FreeBSD: stable/10/sys/netinet/icmp6.h 327716 2018-01-09 06:07:51Z cy $	*/
 /*	$KAME: icmp6.h,v 1.46 2001/04/27 15:09:48 itojun Exp $	*/
 
 /*-
@@ -298,9 +298,11 @@
 #define ND_OPT_PREFIX_INFORMATION	3
 #define ND_OPT_REDIRECTED_HEADER	4
 #define ND_OPT_MTU			5
+#define ND_OPT_NONCE			14	/* RFC 3971 */
 #define ND_OPT_ROUTE_INFO		24	/* RFC 4191 */
 #define ND_OPT_RDNSS			25	/* RFC 6106 */
 #define ND_OPT_DNSSL			31	/* RFC 6106 */
+#define ND_OPT_MAX			31
 
 struct nd_opt_prefix_info {	/* prefix information */
 	u_int8_t	nd_opt_pi_type;
@@ -331,6 +333,16 @@
 	u_int32_t	nd_opt_mtu_mtu;
 } __packed;
 
+#define	ND_OPT_NONCE_LEN	((1 * 8) - 2)
+#if ((ND_OPT_NONCE_LEN + 2) % 8) != 0
+#error "(ND_OPT_NONCE_LEN + 2) must be a multiple of 8."
+#endif 
+struct nd_opt_nonce {		/* nonce option */
+	u_int8_t	nd_opt_nonce_type;
+	u_int8_t	nd_opt_nonce_len;
+	u_int8_t	nd_opt_nonce[ND_OPT_NONCE_LEN];
+} __packed;
+
 struct nd_opt_route_info {	/* route info */
 	u_int8_t	nd_opt_rti_type;
 	u_int8_t	nd_opt_rti_len;
@@ -556,39 +568,39 @@
  * of the internet control message protocol version 6.
  */
 struct icmp6errstat {
-	u_quad_t icp6errs_dst_unreach_noroute;
-	u_quad_t icp6errs_dst_unreach_admin;
-	u_quad_t icp6errs_dst_unreach_beyondscope;
-	u_quad_t icp6errs_dst_unreach_addr;
-	u_quad_t icp6errs_dst_unreach_noport;
-	u_quad_t icp6errs_packet_too_big;
-	u_quad_t icp6errs_time_exceed_transit;
-	u_quad_t icp6errs_time_exceed_reassembly;
-	u_quad_t icp6errs_paramprob_header;
-	u_quad_t icp6errs_paramprob_nextheader;
-	u_quad_t icp6errs_paramprob_option;
-	u_quad_t icp6errs_redirect; /* we regard redirect as an error here */
-	u_quad_t icp6errs_unknown;
+	uint64_t icp6errs_dst_unreach_noroute;
+	uint64_t icp6errs_dst_unreach_admin;
+	uint64_t icp6errs_dst_unreach_beyondscope;
+	uint64_t icp6errs_dst_unreach_addr;
+	uint64_t icp6errs_dst_unreach_noport;
+	uint64_t icp6errs_packet_too_big;
+	uint64_t icp6errs_time_exceed_transit;
+	uint64_t icp6errs_time_exceed_reassembly;
+	uint64_t icp6errs_paramprob_header;
+	uint64_t icp6errs_paramprob_nextheader;
+	uint64_t icp6errs_paramprob_option;
+	uint64_t icp6errs_redirect; /* we regard redirect as an error here */
+	uint64_t icp6errs_unknown;
 };
 
 struct icmp6stat {
 /* statistics related to icmp6 packets generated */
-	u_quad_t icp6s_error;		/* # of calls to icmp6_error */
-	u_quad_t icp6s_canterror;	/* no error 'cuz old was icmp */
-	u_quad_t icp6s_toofreq;		/* no error 'cuz rate limitation */
-	u_quad_t icp6s_outhist[256];
+	uint64_t icp6s_error;		/* # of calls to icmp6_error */
+	uint64_t icp6s_canterror;	/* no error 'cuz old was icmp */
+	uint64_t icp6s_toofreq;		/* no error 'cuz rate limitation */
+	uint64_t icp6s_outhist[256];
 /* statistics related to input message processed */
-	u_quad_t icp6s_badcode;		/* icmp6_code out of range */
-	u_quad_t icp6s_tooshort;	/* packet < sizeof(struct icmp6_hdr) */
-	u_quad_t icp6s_checksum;	/* bad checksum */
-	u_quad_t icp6s_badlen;		/* calculated bound mismatch */
+	uint64_t icp6s_badcode;		/* icmp6_code out of range */
+	uint64_t icp6s_tooshort;	/* packet < sizeof(struct icmp6_hdr) */
+	uint64_t icp6s_checksum;	/* bad checksum */
+	uint64_t icp6s_badlen;		/* calculated bound mismatch */
 	/*
 	 * number of responses: this member is inherited from netinet code, but
 	 * for netinet6 code, it is already available in icp6s_outhist[].
 	 */
-	u_quad_t icp6s_reflect;
-	u_quad_t icp6s_inhist[256];	
-	u_quad_t icp6s_nd_toomanyopt;	/* too many ND options */
+	uint64_t icp6s_reflect;
+	uint64_t icp6s_inhist[256];	
+	uint64_t icp6s_nd_toomanyopt;	/* too many ND options */
 	struct icmp6errstat icp6s_outerrhist;
 #define icp6s_odst_unreach_noroute \
 	icp6s_outerrhist.icp6errs_dst_unreach_noroute
@@ -608,21 +620,25 @@
 #define icp6s_oparamprob_option icp6s_outerrhist.icp6errs_paramprob_option
 #define icp6s_oredirect icp6s_outerrhist.icp6errs_redirect
 #define icp6s_ounknown icp6s_outerrhist.icp6errs_unknown
-	u_quad_t icp6s_pmtuchg;		/* path MTU changes */
-	u_quad_t icp6s_nd_badopt;	/* bad ND options */
-	u_quad_t icp6s_badns;		/* bad neighbor solicitation */
-	u_quad_t icp6s_badna;		/* bad neighbor advertisement */
-	u_quad_t icp6s_badrs;		/* bad router advertisement */
-	u_quad_t icp6s_badra;		/* bad router advertisement */
-	u_quad_t icp6s_badredirect;	/* bad redirect message */
+	uint64_t icp6s_pmtuchg;		/* path MTU changes */
+	uint64_t icp6s_nd_badopt;	/* bad ND options */
+	uint64_t icp6s_badns;		/* bad neighbor solicitation */
+	uint64_t icp6s_badna;		/* bad neighbor advertisement */
+	uint64_t icp6s_badrs;		/* bad router solicitation */
+	uint64_t icp6s_badra;		/* bad router advertisement */
+	uint64_t icp6s_badredirect;	/* bad redirect message */
 };
 
 #ifdef _KERNEL
+#include <sys/counter.h>
+
+VNET_PCPUSTAT_DECLARE(struct icmp6stat, icmp6stat);
 /*
  * In-kernel consumers can use these accessor macros directly to update
  * stats.
  */
-#define	ICMP6STAT_ADD(name, val)	V_icmp6stat.name += (val)
+#define	ICMP6STAT_ADD(name, val)	\
+    VNET_PCPUSTAT_ADD(struct icmp6stat, icmp6stat, name, (val))
 #define	ICMP6STAT_INC(name)		ICMP6STAT_ADD(name, 1)
 
 /*
@@ -629,8 +645,8 @@
  * Kernel module consumers must use this accessor macro.
  */
 void	kmod_icmp6stat_inc(int statnum);
-#define	KMOD_ICMP6STAT_INC(name)					\
-	kmod_icmp6stat_inc(offsetof(struct icmp6stat, name) / sizeof(u_quad_t))
+#define	KMOD_ICMP6STAT_INC(name)	\
+    kmod_icmp6stat_inc(offsetof(struct icmp6stat, name) / sizeof(uint64_t))
 #endif
 
 /*
@@ -660,7 +676,8 @@
 #define ICMPV6CTL_MLD_SOMAXSRC	22
 #define ICMPV6CTL_MLD_VERSION	23
 #define ICMPV6CTL_ND6_MAXQLEN	24
-#define ICMPV6CTL_MAXID		25
+#define ICMPV6CTL_NODEINFO_OLDMCPREFIX	25
+#define ICMPV6CTL_MAXID		26
 
 #define RTF_PROBEMTU	RTF_PROTO1
 
@@ -688,7 +705,9 @@
 #define icmp6_ifstat_inc(ifp, tag) \
 do {								\
 	if (ifp)						\
-		((struct in6_ifextra *)((ifp)->if_afdata[AF_INET6]))->icmp6_ifstat->tag++; \
+		counter_u64_add(((struct in6_ifextra *)		\
+		    ((ifp)->if_afdata[AF_INET6]))->icmp6_ifstat[\
+		    offsetof(struct icmp6_ifstat, tag) / sizeof(uint64_t)], 1);\
 } while (/*CONSTCOND*/ 0)
 
 #define icmp6_ifoutstat_inc(ifp, type, code) \

Modified: trunk/sys/netinet/icmp_var.h
===================================================================
--- trunk/sys/netinet/icmp_var.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/icmp_var.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)icmp_var.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet/icmp_var.h 237913 2012-07-01 09:00:29Z tuexen $
+ * $FreeBSD: stable/10/sys/netinet/icmp_var.h 254925 2013-08-26 18:16:05Z jhb $
  */
 
 #ifndef _NETINET_ICMP_VAR_H_
@@ -59,11 +59,15 @@
 };
 
 #ifdef _KERNEL
+#include <sys/counter.h>
+
+VNET_PCPUSTAT_DECLARE(struct icmpstat, icmpstat);
 /*
  * In-kernel consumers can use these accessor macros directly to update
  * stats.
  */
-#define	ICMPSTAT_ADD(name, val)	V_icmpstat.name += (val)
+#define	ICMPSTAT_ADD(name, val)	\
+    VNET_PCPUSTAT_ADD(struct icmpstat, icmpstat, name, (val))
 #define	ICMPSTAT_INC(name)	ICMPSTAT_ADD(name, 1)
 
 /*
@@ -71,7 +75,7 @@
  */
 void	kmod_icmpstat_inc(int statnum);
 #define	KMOD_ICMPSTAT_INC(name)						\
-	kmod_icmpstat_inc(offsetof(struct icmpstat, name) / sizeof(u_long))
+    kmod_icmpstat_inc(offsetof(struct icmpstat, name) / sizeof(uint64_t))
 #endif
 
 /*
@@ -82,19 +86,9 @@
 #define ICMPCTL_ICMPLIM		3
 #define ICMPCTL_MAXID		4
 
-#define ICMPCTL_NAMES { \
-	{ 0, 0 }, \
-	{ "maskrepl", CTLTYPE_INT }, \
-	{ "stats", CTLTYPE_STRUCT }, \
-	{ "icmplim", CTLTYPE_INT }, \
-}
-
 #ifdef _KERNEL
 SYSCTL_DECL(_net_inet_icmp);
 
-VNET_DECLARE(struct icmpstat, icmpstat);	/* icmp statistics. */
-#define	V_icmpstat	VNET(icmpstat)
-
 extern int badport_bandlim(int);
 #define BANDLIM_UNLIMITED -1
 #define BANDLIM_ICMP_UNREACH 0

Modified: trunk/sys/netinet/if_atm.c
===================================================================
--- trunk/sys/netinet/if_atm.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/if_atm.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -33,7 +33,7 @@
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/if_atm.c 216466 2010-12-15 22:58:45Z bz $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/if_atm.c 249925 2013-04-26 12:50:32Z glebius $");
 
 /*
  * IP <=> ATM address resolution.
@@ -318,7 +318,7 @@
  *   but this is enough for PVCs entered via the "route" command.
  */
 int
-atmresolve(struct rtentry *rt, struct mbuf *m, struct sockaddr *dst,
+atmresolve(struct rtentry *rt, struct mbuf *m, const struct sockaddr *dst,
     struct atm_pseudohdr *desten)
 {
 	struct sockaddr_dl *sdl;
@@ -330,7 +330,8 @@
 	}
 
 	if (rt == NULL) {
-		rt = RTALLOC1(dst, 0); /* link level on table 0 XXX MRT */
+		/* link level on table 0 XXX MRT */
+		rt = RTALLOC1(__DECONST(struct sockaddr *, dst), 0);
 		if (rt == NULL)
 			goto bad;	/* failed */
 		RT_REMREF(rt);		/* don't keep LL references */

Modified: trunk/sys/netinet/if_atm.h
===================================================================
--- trunk/sys/netinet/if_atm.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/if_atm.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,5 +1,5 @@
 /* $MidnightBSD$ */
-/* $FreeBSD: stable/9/sys/netinet/if_atm.h 139823 2005-01-07 01:45:51Z imp $ */
+/* $FreeBSD: stable/10/sys/netinet/if_atm.h 249925 2013-04-26 12:50:32Z glebius $ */
 /*      $NetBSD: if_atm.h,v 1.2 1996/07/03 17:17:17 chuck Exp $       */
 
 /*-
@@ -44,5 +44,5 @@
 struct sockaddr;
 
 void atm_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
-int atmresolve(struct rtentry *, struct mbuf *, struct sockaddr *,
+int atmresolve(struct rtentry *, struct mbuf *, const struct sockaddr *,
 		struct atm_pseudohdr *);

Modified: trunk/sys/netinet/if_ether.c
===================================================================
--- trunk/sys/netinet/if_ether.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/if_ether.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/if_ether.c 248852 2013-03-28 20:48:40Z emaste $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/if_ether.c 309340 2016-11-30 22:20:23Z vangyzen $");
 
 #include "opt_inet.h"
 
@@ -65,7 +65,7 @@
 #include <netinet/in_var.h>
 #include <net/if_llatbl.h>
 #include <netinet/if_ether.h>
-#if defined(INET)
+#ifdef INET
 #include <netinet/ip_carp.h>
 #endif
 
@@ -74,7 +74,7 @@
 
 #include <security/mac/mac_framework.h>
 
-#define SIN(s) ((struct sockaddr_in *)s)
+#define SIN(s) ((const struct sockaddr_in *)(s))
 #define SDL(s) ((struct sockaddr_dl *)s)
 
 SYSCTL_DECL(_net_link_ether);
@@ -90,8 +90,13 @@
 static VNET_DEFINE(int, arp_proxyall) = 0;
 static VNET_DEFINE(int, arpt_down) = 20;	/* keep incomplete entries for
 						 * 20 seconds */
-VNET_DEFINE(struct arpstat, arpstat);  /* ARP statistics, see if_arp.h */
+VNET_PCPUSTAT_DEFINE(struct arpstat, arpstat);  /* ARP statistics, see if_arp.h */
+VNET_PCPUSTAT_SYSINIT(arpstat);
 
+#ifdef VIMAGE
+VNET_PCPUSTAT_SYSUNINIT(arpstat);
+#endif /* VIMAGE */
+
 static VNET_DEFINE(int, arp_maxhold) = 1;
 
 #define	V_arpt_keep		VNET(arpt_keep)
@@ -98,7 +103,6 @@
 #define	V_arpt_down		VNET(arpt_down)
 #define	V_arp_maxtries		VNET(arp_maxtries)
 #define	V_arp_proxyall		VNET(arp_proxyall)
-#define	V_arpstat		VNET(arpstat)
 #define	V_arp_maxhold		VNET(arp_maxhold)
 
 SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_RW,
@@ -116,16 +120,35 @@
 SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, wait, CTLFLAG_RW,
 	&VNET_NAME(arpt_down), 0,
 	"Incomplete ARP entry lifetime in seconds");
-SYSCTL_VNET_STRUCT(_net_link_ether_arp, OID_AUTO, stats, CTLFLAG_RW,
-	&VNET_NAME(arpstat), arpstat,
-	"ARP statistics (struct arpstat, net/if_arp.h)");
+SYSCTL_VNET_PCPUSTAT(_net_link_ether_arp, OID_AUTO, stats, struct arpstat,
+    arpstat, "ARP statistics (struct arpstat, net/if_arp.h)");
 SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, maxhold, CTLFLAG_RW,
 	&VNET_NAME(arp_maxhold), 0,
 	"Number of packets to hold per ARP entry");
 
+/*
+ * Due to the exponential backoff algorithm used for the interval between GARP
+ * retransmissions, the maximum number of retransmissions is limited for
+ * sanity. This limit corresponds to a maximum interval between retransmissions
+ * of 2^16 seconds ~= 18 hours.
+ *
+ * Making this limit more dynamic is more complicated than worthwhile,
+ * especially since sending out GARPs spaced days apart would be of little
+ * use. A maximum dynamic limit would look something like:
+ *
+ * const int max = fls(INT_MAX / hz) - 1;
+ */
+#define MAX_GARP_RETRANSMITS 16
+static int sysctl_garp_rexmit(SYSCTL_HANDLER_ARGS);
+static int garp_rexmit_count = 0; /* GARP retransmission setting. */
+
+SYSCTL_PROC(_net_link_ether_inet, OID_AUTO, garp_rexmit_count,
+    CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_MPSAFE,
+    &garp_rexmit_count, 0, sysctl_garp_rexmit, "I",
+    "Number of times to retransmit GARP packets;"
+    " 0 to disable, maximum of 16");
+
 static void	arp_init(void);
-void		arprequest(struct ifnet *,
-			struct in_addr *, struct in_addr *, u_char *);
 static void	arpintr(struct mbuf *);
 static void	arptimer(void *);
 #ifdef INET
@@ -140,8 +163,6 @@
 };
 
 #ifdef AF_INET
-void arp_ifscrub(struct ifnet *ifp, uint32_t addr);
-
 /*
  * called by in_ifscrub to remove entry from the table when
  * the interface goes away
@@ -155,10 +176,10 @@
 	addr4.sin_len    = sizeof(addr4);
 	addr4.sin_family = AF_INET;
 	addr4.sin_addr.s_addr = addr;
-	IF_AFDATA_LOCK(ifp);
+	IF_AFDATA_WLOCK(ifp);
 	lla_lookup(LLTABLE(ifp), (LLE_DELETE | LLE_IFADDR),
 	    (struct sockaddr *)&addr4);
-	IF_AFDATA_UNLOCK(ifp);
+	IF_AFDATA_WUNLOCK(ifp);
 }
 #endif
 
@@ -172,13 +193,41 @@
 	struct ifnet *ifp;
 
 	if (lle->la_flags & LLE_STATIC) {
-		LLE_WUNLOCK(lle);
 		return;
 	}
-
+	LLE_WLOCK(lle);
+	if (callout_pending(&lle->la_timer)) {
+		/*
+		 * Here we are a bit odd here in the treatment of 
+		 * active/pending. If the pending bit is set, it got
+		 * rescheduled before I ran. The active
+		 * bit we ignore, since if it was stopped
+		 * in ll_tablefree() and was currently running
+		 * it would have return 0 so the code would
+		 * not have deleted it since the callout could
+		 * not be stopped so we want to go through
+		 * with the delete here now. If the callout
+		 * was restarted, the pending bit will be back on and
+		 * we just want to bail since the callout_reset would
+		 * return 1 and our reference would have been removed
+		 * by arpresolve() below.
+		 */
+		LLE_WUNLOCK(lle);
+ 		return;
+ 	}
 	ifp = lle->lle_tbl->llt_ifp;
 	CURVNET_SET(ifp->if_vnet);
 
+	if ((lle->la_flags & LLE_DELETED) == 0) {
+		int evt;
+
+		if (lle->la_flags & LLE_VALID)
+			evt = LLENTRY_EXPIRED;
+		else
+			evt = LLENTRY_TIMEDOUT;
+		EVENTHANDLER_INVOKE(lle_event, lle, evt);
+	}
+
 	callout_stop(&lle->la_timer);
 
 	/* XXX: LOR avoidance. We still have ref on lle. */
@@ -210,15 +259,15 @@
  *	- arp header source ethernet address
  */
 void
-arprequest(struct ifnet *ifp, struct in_addr *sip, struct in_addr  *tip,
-    u_char *enaddr)
+arprequest(struct ifnet *ifp, const struct in_addr *sip,
+    const struct in_addr *tip, u_char *enaddr)
 {
 	struct mbuf *m;
 	struct arphdr *ah;
 	struct sockaddr sa;
+	u_char *carpaddr = NULL;
 
 	if (sip == NULL) {
-		/* XXX don't believe this can happen (or explain why) */
 		/*
 		 * The caller did not supply a source address, try to find
 		 * a compatible one among those assigned to this interface.
@@ -225,22 +274,34 @@
 		 */
 		struct ifaddr *ifa;
 
+		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
-			if (!ifa->ifa_addr ||
-			    ifa->ifa_addr->sa_family != AF_INET)
+			if (ifa->ifa_addr->sa_family != AF_INET)
 				continue;
-			sip = &SIN(ifa->ifa_addr)->sin_addr;
+
+			if (ifa->ifa_carp) {
+				if ((*carp_iamatch_p)(ifa, &carpaddr) == 0)
+					continue;
+				sip = &IA_SIN(ifa)->sin_addr;
+			} else {
+				carpaddr = NULL;
+				sip = &IA_SIN(ifa)->sin_addr;
+			}
+
 			if (0 == ((sip->s_addr ^ tip->s_addr) &
-			    SIN(ifa->ifa_netmask)->sin_addr.s_addr) )
+			    IA_MASKSIN(ifa)->sin_addr.s_addr))
 				break;  /* found it. */
 		}
+		IF_ADDR_RUNLOCK(ifp);
 		if (sip == NULL) {
 			printf("%s: cannot find matching address\n", __func__);
 			return;
 		}
 	}
+	if (enaddr == NULL)
+		enaddr = carpaddr ? carpaddr : (u_char *)IF_LLADDR(ifp);
 
-	if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
+	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
 		return;
 	m->m_len = sizeof(*ah) + 2*sizeof(struct in_addr) +
 		2*ifp->if_data.ifi_addrlen;
@@ -255,12 +316,13 @@
 	ah->ar_hln = ifp->if_addrlen;		/* hardware address length */
 	ah->ar_pln = sizeof(struct in_addr);	/* protocol address length */
 	ah->ar_op = htons(ARPOP_REQUEST);
-	bcopy((caddr_t)enaddr, (caddr_t)ar_sha(ah), ah->ar_hln);
-	bcopy((caddr_t)sip, (caddr_t)ar_spa(ah), ah->ar_pln);
-	bcopy((caddr_t)tip, (caddr_t)ar_tpa(ah), ah->ar_pln);
+	bcopy(enaddr, ar_sha(ah), ah->ar_hln);
+	bcopy(sip, ar_spa(ah), ah->ar_pln);
+	bcopy(tip, ar_tpa(ah), ah->ar_pln);
 	sa.sa_family = AF_ARP;
 	sa.sa_len = 2;
 	m->m_flags |= M_BCAST;
+	m_clrprotoflags(m);	/* Avoid confusing lower layers. */
 	(*ifp->if_output)(ifp, m, &sa, NULL);
 	ARPSTAT_INC(txrequests);
 }
@@ -281,7 +343,7 @@
  */
 int
 arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m,
-	struct sockaddr *dst, u_char *desten, struct llentry **lle)
+	const struct sockaddr *dst, u_char *desten, struct llentry **lle)
 {
 	struct llentry *la = 0;
 	u_int flags = 0;
@@ -317,8 +379,8 @@
 	if (la == NULL) {
 		if (flags & LLE_CREATE)
 			log(LOG_DEBUG,
-			    "arpresolve: can't allocate llinfo for %s\n",
-			    inet_ntoa(SIN(dst)->sin_addr));
+			    "arpresolve: can't allocate llinfo for %s on %s\n",
+			    inet_ntoa(SIN(dst)->sin_addr), ifp->if_xname);
 		m_freem(m);
 		return (EINVAL);
 	}
@@ -326,6 +388,7 @@
 	if ((la->la_flags & LLE_VALID) &&
 	    ((la->la_flags & LLE_STATIC) || la->la_expire > time_uptime)) {
 		bcopy(&la->ll_addr, desten, ifp->if_addrlen);
+		renew = 0;
 		/*
 		 * If entry has an expiry time and it is approaching,
 		 * see if we need to send an ARP request within this
@@ -333,15 +396,21 @@
 		 */
 		if (!(la->la_flags & LLE_STATIC) &&
 		    time_uptime + la->la_preempt > la->la_expire) {
-			arprequest(ifp, NULL,
-			    &SIN(dst)->sin_addr, IF_LLADDR(ifp));
-
+			renew = 1;
 			la->la_preempt--;
 		}
 
 		*lle = la;
-		error = 0;
-		goto done;
+
+		if (flags & LLE_EXCLUSIVE)
+			LLE_WUNLOCK(la);
+		else
+			LLE_RUNLOCK(la);
+
+		if (renew == 1)
+			arprequest(ifp, NULL, &SIN(dst)->sin_addr, NULL);
+
+		return (0);
 	}
 
 	if (la->la_flags & LLE_STATIC) {   /* should not happen! */
@@ -411,8 +480,7 @@
 			LLE_REMREF(la);
 		la->la_asked++;
 		LLE_WUNLOCK(la);
-		arprequest(ifp, NULL, &SIN(dst)->sin_addr,
-		    IF_LLADDR(ifp));
+		arprequest(ifp, NULL, &SIN(dst)->sin_addr, NULL);
 		return (error);
 	}
 done:
@@ -491,6 +559,9 @@
 static int log_arp_movements = 1;
 static int log_arp_permanent_modify = 1;
 static int allow_multicast = 0;
+static struct timeval arp_lastlog;
+static int arp_curpps;
+static int arp_maxpps = 1;
 
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_wrong_iface, CTLFLAG_RW,
 	&log_arp_wrong_iface, 0,
@@ -503,7 +574,16 @@
 	"log arp replies from MACs different than the one in the permanent arp entry");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, allow_multicast, CTLFLAG_RW,
 	&allow_multicast, 0, "accept multicast addresses");
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_log_per_second,
+	CTLFLAG_RW, &arp_maxpps, 0,
+	"Maximum number of remotely triggered ARP messages that can be "
+	"logged per second");
 
+#define	ARP_LOG(pri, ...)	do {					\
+	if (ppsratecheck(&arp_lastlog, &arp_curpps, arp_maxpps))	\
+		log((pri), "arp: " __VA_ARGS__);			\
+} while (0)
+
 static void
 in_arpinput(struct mbuf *m)
 {
@@ -519,7 +599,7 @@
 	int op, flags;
 	int req_len;
 	int bridged = 0, is_bridge = 0;
-	int carp_match = 0;
+	int carped;
 	struct sockaddr_in sin;
 	sin.sin_len = sizeof(struct sockaddr_in);
 	sin.sin_family = AF_INET;
@@ -532,7 +612,7 @@
 
 	req_len = arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr));
 	if (m->m_len < req_len && (m = m_pullup(m, req_len)) == NULL) {
-		log(LOG_NOTICE, "in_arp: runt packet -- m_pullup failed\n");
+		ARP_LOG(LOG_NOTICE, "runt packet -- m_pullup failed\n");
 		return;
 	}
 
@@ -542,13 +622,13 @@
 	 * a protocol length not equal to an IPv4 address.
 	 */
 	if (ah->ar_pln != sizeof(struct in_addr)) {
-		log(LOG_NOTICE, "in_arp: requested protocol length != %zu\n",
+		ARP_LOG(LOG_NOTICE, "requested protocol length != %zu\n",
 		    sizeof(struct in_addr));
 		goto drop;
 	}
 
 	if (allow_multicast == 0 && ETHER_IS_MULTICAST(ar_sha(ah))) {
-		log(LOG_NOTICE, "arp: %*D is multicast\n",
+		ARP_LOG(LOG_NOTICE, "%*D is multicast\n",
 		    ifp->if_addrlen, (u_char *)ar_sha(ah), ":");
 		goto drop;
 	}
@@ -564,28 +644,18 @@
 	 * For a bridge, we want to check the address irrespective
 	 * of the receive interface. (This will change slightly
 	 * when we have clusters of interfaces).
-	 * If the interface does not match, but the recieving interface
-	 * is part of carp, we call carp_iamatch to see if this is a
-	 * request for the virtual host ip.
-	 * XXX: This is really ugly!
 	 */
 	IN_IFADDR_RLOCK();
 	LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) {
 		if (((bridged && ia->ia_ifp->if_bridge == ifp->if_bridge) ||
 		    ia->ia_ifp == ifp) &&
-		    itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) {
+		    itaddr.s_addr == ia->ia_addr.sin_addr.s_addr &&
+		    (ia->ia_ifa.ifa_carp == NULL ||
+		    (*carp_iamatch_p)(&ia->ia_ifa, &enaddr))) {
 			ifa_ref(&ia->ia_ifa);
 			IN_IFADDR_RUNLOCK();
 			goto match;
 		}
-		if (ifp->if_carp != NULL &&
-		    (*carp_iamatch_p)(ifp, ia, &isaddr, &enaddr) &&
-		    itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) {
-			carp_match = 1;
-			ifa_ref(&ia->ia_ifa);
-			IN_IFADDR_RUNLOCK();
-			goto match;
-		}
 	}
 	LIST_FOREACH(ia, INADDR_HASH(isaddr.s_addr), ia_hash)
 		if (((bridged && ia->ia_ifp->if_bridge == ifp->if_bridge) ||
@@ -625,7 +695,9 @@
 	 */
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
-		if (ifa->ifa_addr->sa_family == AF_INET) {
+		if (ifa->ifa_addr->sa_family == AF_INET &&
+		    (ifa->ifa_carp == NULL ||
+		    (*carp_iamatch_p)(ifa, &enaddr))) {
 			ia = ifatoia(ifa);
 			ifa_ref(ifa);
 			IF_ADDR_RUNLOCK(ifp);
@@ -646,14 +718,14 @@
 match:
 	if (!enaddr)
 		enaddr = (u_int8_t *)IF_LLADDR(ifp);
+	carped = (ia->ia_ifa.ifa_carp != NULL);
 	myaddr = ia->ia_addr.sin_addr;
 	ifa_free(&ia->ia_ifa);
 	if (!bcmp(ar_sha(ah), enaddr, ifp->if_addrlen))
 		goto drop;	/* it's from me, ignore it. */
 	if (!bcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen)) {
-		log(LOG_NOTICE,
-		    "arp: link address is broadcast for IP address %s!\n",
-		    inet_ntoa(isaddr));
+		ARP_LOG(LOG_NOTICE, "link address is broadcast for IP address "
+		    "%s!\n", inet_ntoa(isaddr));
 		goto drop;
 	}
 	/*
@@ -662,9 +734,9 @@
 	 * case we suppress the warning to avoid false positive complaints of
 	 * potential misconfiguration.
 	 */
-	if (!bridged && isaddr.s_addr == myaddr.s_addr && myaddr.s_addr != 0) {
-		log(LOG_ERR,
-		   "arp: %*D is using my IP address %s on %s!\n",
+	if (!bridged && !carped && isaddr.s_addr == myaddr.s_addr &&
+	    myaddr.s_addr != 0) {
+		ARP_LOG(LOG_ERR, "%*D is using my IP address %s on %s!\n",
 		   ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
 		   inet_ntoa(isaddr), ifp->if_xname);
 		itaddr = myaddr;
@@ -685,9 +757,9 @@
 	IF_AFDATA_UNLOCK(ifp);
 	if (la != NULL) {
 		/* the following is not an error when doing bridging */
-		if (!bridged && la->lle_tbl->llt_ifp != ifp && !carp_match) {
+		if (!bridged && la->lle_tbl->llt_ifp != ifp) {
 			if (log_arp_wrong_iface)
-				log(LOG_WARNING, "arp: %s is on %s "
+				ARP_LOG(LOG_WARNING, "%s is on %s "
 				    "but got reply from %*D on %s\n",
 				    inet_ntoa(isaddr),
 				    la->lle_tbl->llt_ifp->if_xname,
@@ -701,8 +773,8 @@
 			if (la->la_flags & LLE_STATIC) {
 				LLE_WUNLOCK(la);
 				if (log_arp_permanent_modify)
-					log(LOG_ERR,
-					    "arp: %*D attempts to modify "
+					ARP_LOG(LOG_ERR,
+					    "%*D attempts to modify "
 					    "permanent entry for %s on %s\n",
 					    ifp->if_addrlen,
 					    (u_char *)ar_sha(ah), ":",
@@ -710,7 +782,7 @@
 				goto reply;
 			}
 			if (log_arp_movements) {
-				log(LOG_INFO, "arp: %s moved from %*D "
+				ARP_LOG(LOG_INFO, "%s moved from %*D "
 				    "to %*D on %s\n",
 				    inet_ntoa(isaddr),
 				    ifp->if_addrlen,
@@ -722,7 +794,7 @@
 
 		if (ifp->if_addrlen != ah->ar_hln) {
 			LLE_WUNLOCK(la);
-			log(LOG_WARNING, "arp from %*D: addr len: new %d, "
+			ARP_LOG(LOG_WARNING, "from %*D: addr len: new %d, "
 			    "i/f %d (ignored)\n", ifp->if_addrlen,
 			    (u_char *) ar_sha(ah), ":", ah->ar_hln,
 			    ifp->if_addrlen);
@@ -731,7 +803,7 @@
 		(void)memcpy(&la->ll_addr, ar_sha(ah), ifp->if_addrlen);
 		la->la_flags |= LLE_VALID;
 
-		EVENTHANDLER_INVOKE(arp_update_event, la);
+		EVENTHANDLER_INVOKE(lle_event, la, LLENTRY_RESOLVED);
 
 		if (!(la->la_flags & LLE_STATIC)) {
 			int canceled;
@@ -763,6 +835,8 @@
 			for (; m_hold != NULL; m_hold = m_hold_next) {
 				m_hold_next = m_hold->m_nextpkt;
 				m_hold->m_nextpkt = NULL;
+				/* Avoid confusing lower layers. */
+				m_clrprotoflags(m_hold);
 				(*ifp->if_output)(ifp, m_hold, &sa, NULL);
 			}
 		} else
@@ -781,9 +855,9 @@
 		struct llentry *lle = NULL;
 
 		sin.sin_addr = itaddr;
-		IF_AFDATA_LOCK(ifp);
+		IF_AFDATA_RLOCK(ifp);
 		lle = lla_lookup(LLTABLE(ifp), 0, (struct sockaddr *)&sin);
-		IF_AFDATA_UNLOCK(ifp);
+		IF_AFDATA_RUNLOCK(ifp);
 
 		if ((lle != NULL) && (lle->la_flags & LLE_PUB)) {
 			(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
@@ -830,7 +904,7 @@
 			if (!rt)
 				goto drop;
 			if (rt->rt_ifp != ifp) {
-				log(LOG_INFO, "arp_proxy: ignoring request"
+				ARP_LOG(LOG_INFO, "proxy: ignoring request"
 				    " from %s via %s, expecting %s\n",
 				    inet_ntoa(isaddr), ifp->if_xname,
 				    rt->rt_ifp->if_xname);
@@ -867,6 +941,7 @@
 	m->m_pkthdr.rcvif = NULL;
 	sa.sa_family = AF_ARP;
 	sa.sa_len = 2;
+	m_clrprotoflags(m);	/* Avoid confusing lower layers. */
 	(*ifp->if_output)(ifp, m, &sa, NULL);
 	ARPSTAT_INC(txreplies);
 	return;
@@ -876,14 +951,124 @@
 }
 #endif
 
+/*
+ * Handle the garp_rexmit_count. Like sysctl_handle_int(), but limits the range
+ * of valid values.
+ */
+static int
+sysctl_garp_rexmit(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	int rexmit_count = *(int *)arg1;
+
+	error = sysctl_handle_int(oidp, &rexmit_count, 0, req);
+
+	/* Enforce limits on any new value that may have been set. */
+	if (!error && req->newptr) {
+		/* A new value was set. */
+		if (rexmit_count < 0) {
+			rexmit_count = 0;
+		} else if (rexmit_count > MAX_GARP_RETRANSMITS) {
+			rexmit_count = MAX_GARP_RETRANSMITS;
+		}
+		*(int *)arg1 = rexmit_count;
+	}
+
+	return (error);
+}
+
+/*
+ * Retransmit a Gratuitous ARP (GARP) and, if necessary, schedule a callout to
+ * retransmit it again. A pending callout owns a reference to the ifa.
+ */
+static void
+garp_rexmit(void *arg)
+{
+	struct in_ifaddr *ia = arg;
+
+	if (callout_pending(&ia->ia_garp_timer) ||
+	    !callout_active(&ia->ia_garp_timer)) {
+		IFA_UNLOCK(&ia->ia_ifa);
+		ifa_free(&ia->ia_ifa);
+		return;
+	}
+
+	/*
+	 * Drop ifa lock while the ARP request is generated.
+	 */
+	IFA_UNLOCK(&ia->ia_ifa);
+
+	arprequest(ia->ia_ifa.ifa_ifp, &IA_SIN(ia)->sin_addr,
+	    &IA_SIN(ia)->sin_addr, IF_LLADDR(ia->ia_ifa.ifa_ifp));
+
+	/*
+	 * Increment the count of retransmissions. If the count has reached the
+	 * maximum value, stop sending the GARP packets. Otherwise, schedule
+	 * the callout to retransmit another GARP packet.
+	 */
+	++ia->ia_garp_count;
+	if (ia->ia_garp_count >= garp_rexmit_count) {
+		ifa_free(&ia->ia_ifa);
+	} else {
+		int rescheduled;
+		IFA_LOCK(&ia->ia_ifa);
+		rescheduled = callout_reset(&ia->ia_garp_timer,
+		    (1 << ia->ia_garp_count) * hz,
+		    garp_rexmit, ia);
+		IFA_UNLOCK(&ia->ia_ifa);
+		if (rescheduled) {
+			ifa_free(&ia->ia_ifa);
+		}
+	}
+}
+
+/*
+ * Start the GARP retransmit timer.
+ *
+ * A single GARP is always transmitted when an IPv4 address is added
+ * to an interface and that is usually sufficient. However, in some
+ * circumstances, such as when a shared address is passed between
+ * cluster nodes, this single GARP may occasionally be dropped or
+ * lost. This can lead to neighbors on the network link working with a
+ * stale ARP cache and sending packets destined for that address to
+ * the node that previously owned the address, which may not respond.
+ *
+ * To avoid this situation, GARP retransmits can be enabled by setting
+ * the net.link.ether.inet.garp_rexmit_count sysctl to a value greater
+ * than zero. The setting represents the maximum number of
+ * retransmissions. The interval between retransmissions is calculated
+ * using an exponential backoff algorithm, doubling each time, so the
+ * retransmission intervals are: {1, 2, 4, 8, 16, ...} (seconds).
+ */
+static void
+garp_timer_start(struct ifaddr *ifa)
+{
+	struct in_ifaddr *ia = (struct in_ifaddr *) ifa;
+
+	IFA_LOCK(ifa);
+	ia->ia_garp_count = 0;
+	if (callout_reset(&ia->ia_garp_timer, (1 << ia->ia_garp_count) * hz,
+	    garp_rexmit, ia) == 0) {
+		ifa_ref(ifa);
+	}
+	IFA_UNLOCK(ifa);
+}
+
 void
 arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
 {
 	struct llentry *lle;
 
+	if (ifa->ifa_carp != NULL)
+		return;
+
 	if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY) {
 		arprequest(ifp, &IA_SIN(ifa)->sin_addr,
 				&IA_SIN(ifa)->sin_addr, IF_LLADDR(ifp));
+		if (garp_rexmit_count > 0) {
+			garp_timer_start(ifa);
+		}
+
 		/*
 		 * interface address is considered static entry
 		 * because the output of the arp utility shows

Modified: trunk/sys/netinet/if_ether.h
===================================================================
--- trunk/sys/netinet/if_ether.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/if_ether.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)if_ether.h	8.3 (Berkeley) 5/2/95
- * $FreeBSD: stable/9/sys/netinet/if_ether.h 196995 2009-09-08 21:17:17Z np $
+ * $FreeBSD: stable/10/sys/netinet/if_ether.h 249925 2013-04-26 12:50:32Z glebius $
  */
 
 #ifndef _NETINET_IF_ETHER_H_
@@ -49,9 +49,9 @@
 	(enaddr)[0] = 0x01; \
 	(enaddr)[1] = 0x00; \
 	(enaddr)[2] = 0x5e; \
-	(enaddr)[3] = ((u_char *)ipaddr)[1] & 0x7f; \
-	(enaddr)[4] = ((u_char *)ipaddr)[2]; \
-	(enaddr)[5] = ((u_char *)ipaddr)[3]; \
+	(enaddr)[3] = ((const u_char *)ipaddr)[1] & 0x7f; \
+	(enaddr)[4] = ((const u_char *)ipaddr)[2]; \
+	(enaddr)[5] = ((const u_char *)ipaddr)[3]; \
 }
 /*
  * Macro to map an IP6 multicast address to an Ethernet multicast address.
@@ -64,10 +64,10 @@
 {                                                                       \
 	(enaddr)[0] = 0x33;						\
 	(enaddr)[1] = 0x33;						\
-	(enaddr)[2] = ((u_char *)ip6addr)[12];				\
-	(enaddr)[3] = ((u_char *)ip6addr)[13];				\
-	(enaddr)[4] = ((u_char *)ip6addr)[14];				\
-	(enaddr)[5] = ((u_char *)ip6addr)[15];				\
+	(enaddr)[2] = ((const u_char *)ip6addr)[12];			\
+	(enaddr)[3] = ((const u_char *)ip6addr)[13];			\
+	(enaddr)[4] = ((const u_char *)ip6addr)[14];			\
+	(enaddr)[5] = ((const u_char *)ip6addr)[15];			\
 }
 
 /*
@@ -90,6 +90,7 @@
 #define	arp_pln	ea_hdr.ar_pln
 #define	arp_op	ea_hdr.ar_op
 
+#ifndef BURN_BRIDGES	/* Can be used by third party software. */
 struct sockaddr_inarp {
 	u_char	sin_len;
 	u_char	sin_family;
@@ -100,6 +101,8 @@
 	u_short	sin_other;
 #define SIN_PROXY 1
 };
+#endif /* !BURN_BRIDGES  */
+
 /*
  * IP and ethernet specific routing flags
  */
@@ -113,16 +116,13 @@
 struct llentry;
 struct ifaddr;
 
-int	arpresolve(struct ifnet *ifp, struct rtentry *rt,
-		    struct mbuf *m, struct sockaddr *dst, u_char *desten,
-		    struct llentry **lle);
+int	arpresolve(struct ifnet *ifp, struct rtentry *rt, struct mbuf *m,
+	    const struct sockaddr *dst, u_char *desten, struct llentry **lle);
+void	arprequest(struct ifnet *, const struct in_addr *,
+	    const struct in_addr *, u_char *);
 void	arp_ifinit(struct ifnet *, struct ifaddr *);
 void	arp_ifinit2(struct ifnet *, struct ifaddr *, u_char *);
-
-#include <sys/eventhandler.h>
-typedef void (*llevent_arp_update_fn)(void *, struct llentry *);
-EVENTHANDLER_DECLARE(arp_update_event, llevent_arp_update_fn);
-
+void	arp_ifscrub(struct ifnet *, uint32_t);
 #endif
 
 #endif

Modified: trunk/sys/netinet/igmp.c
===================================================================
--- trunk/sys/netinet/igmp.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/igmp.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -49,7 +49,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/igmp.c 249132 2013-04-05 08:22:11Z mav $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/igmp.c 305558 2016-09-07 19:25:08Z dim $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -290,7 +290,7 @@
 {
 
 #ifdef VIMAGE
-	m->m_pkthdr.header = ifp->if_vnet;
+	m->m_pkthdr.PH_loc.ptr = ifp->if_vnet;
 #endif /* VIMAGE */
 	m->m_pkthdr.flowid = ifp->if_index;
 }
@@ -299,7 +299,7 @@
 igmp_scrub_context(struct mbuf *m)
 {
 
-	m->m_pkthdr.header = NULL;
+	m->m_pkthdr.PH_loc.ptr = NULL;
 	m->m_pkthdr.flowid = 0;
 }
 
@@ -327,7 +327,7 @@
 
 #ifdef notyet
 #if defined(VIMAGE) && defined(INVARIANTS)
-	KASSERT(curvnet == (m->m_pkthdr.header),
+	KASSERT(curvnet == (m->m_pkthdr.PH_loc.ptr),
 	    ("%s: called when curvnet was not restored", __func__));
 #endif
 #endif
@@ -524,13 +524,13 @@
 	struct mbuf	*m;
 	struct ipoption	*p;
 
-	MGET(m, M_DONTWAIT, MT_DATA);
+	m = m_get(M_WAITOK, MT_DATA);
 	p = mtod(m, struct ipoption *);
 	p->ipopt_dst.s_addr = INADDR_ANY;
-	p->ipopt_list[0] = IPOPT_RA;	/* Router Alert Option */
-	p->ipopt_list[1] = 0x04;	/* 4 bytes long */
-	p->ipopt_list[2] = IPOPT_EOL;	/* End of IP option list */
-	p->ipopt_list[3] = 0x00;	/* pad byte */
+	p->ipopt_list[0] = (char)IPOPT_RA;	/* Router Alert Option */
+	p->ipopt_list[1] = 0x04;		/* 4 bytes long */
+	p->ipopt_list[2] = IPOPT_EOL;		/* End of IP option list */
+	p->ipopt_list[3] = 0x00;		/* pad byte */
 	m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1];
 
 	return (m);
@@ -655,16 +655,12 @@
 void
 igmp_domifdetach(struct ifnet *ifp)
 {
-	struct igmp_ifinfo *igi;
 
 	CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)",
 	    __func__, ifp, ifp->if_xname);
 
 	IGMP_LOCK();
-
-	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
 	igi_delete_locked(ifp);
-
 	IGMP_UNLOCK();
 }
 
@@ -1443,7 +1439,7 @@
 
 	ip = mtod(m, struct ip *);
 	iphlen = off;
-	igmplen = ip->ip_len;
+	igmplen = ntohs(ip->ip_len) - off;
 
 	/*
 	 * Validate lengths.
@@ -1534,8 +1530,7 @@
 		case IGMP_VERSION_3: {
 				struct igmpv3 *igmpv3;
 				uint16_t igmpv3len;
-				uint16_t srclen;
-				int nsrc;
+				uint16_t nsrc;
 
 				IGMPSTAT_INC(igps_rcv_v3_queries);
 				igmpv3 = (struct igmpv3 *)igmp;
@@ -1543,8 +1538,8 @@
 				 * Validate length based on source count.
 				 */
 				nsrc = ntohs(igmpv3->igmp_numsrc);
-				srclen = sizeof(struct in_addr) * nsrc;
-				if (nsrc * sizeof(in_addr_t) > srclen) {
+				if (nsrc * sizeof(in_addr_t) >
+				    UINT16_MAX - iphlen - IGMP_V3_QUERY_MINLEN) {
 					IGMPSTAT_INC(igps_rcv_tooshort);
 					return;
 				}
@@ -1553,7 +1548,7 @@
 				 * this scope.
 				 */
 				igmpv3len = iphlen + IGMP_V3_QUERY_MINLEN +
-				    srclen;
+				    sizeof(struct in_addr) * nsrc;
 				if ((m->m_flags & M_EXT ||
 				     m->m_len < igmpv3len) &&
 				    (m = m_pullup(m, igmpv3len)) == NULL) {
@@ -2122,6 +2117,7 @@
 				    __func__, igi->igi_version, IGMP_VERSION_2,
 				    igi->igi_ifp, igi->igi_ifp->if_xname);
 				igi->igi_version = IGMP_VERSION_2;
+				igmp_v3_cancel_link_timers(igi);
 			}
 		}
 	} else if (igi->igi_v1_timer > 0) {
@@ -2204,7 +2200,7 @@
 
 	ifp = inm->inm_ifp;
 
-	MGETHDR(m, M_DONTWAIT, MT_DATA);
+	m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (ENOMEM);
 	MH_ALIGN(m, sizeof(struct ip) + sizeof(struct igmp));
@@ -2226,7 +2222,7 @@
 
 	ip = mtod(m, struct ip *);
 	ip->ip_tos = 0;
-	ip->ip_len = sizeof(struct ip) + sizeof(struct igmp);
+	ip->ip_len = htons(sizeof(struct ip) + sizeof(struct igmp));
 	ip->ip_off = 0;
 	ip->ip_p = IPPROTO_IGMP;
 	ip->ip_src.s_addr = INADDR_ANY;
@@ -2781,12 +2777,12 @@
 		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
 		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
 		if (!is_state_change && !is_group_query) {
-			m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+			m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 			if (m)
 				m->m_data += IGMP_LEADINGSPACE;
 		}
 		if (m == NULL) {
-			m = m_gethdr(M_DONTWAIT, MT_DATA);
+			m = m_gethdr(M_NOWAIT, MT_DATA);
 			if (m)
 				MH_ALIGN(m, IGMP_LEADINGSPACE);
 		}
@@ -2906,11 +2902,11 @@
 			CTR1(KTR_IGMPV3, "%s: outbound queue full", __func__);
 			return (-ENOMEM);
 		}
-		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 		if (m)
 			m->m_data += IGMP_LEADINGSPACE;
 		if (m == NULL) {
-			m = m_gethdr(M_DONTWAIT, MT_DATA);
+			m = m_gethdr(M_NOWAIT, MT_DATA);
 			if (m)
 				MH_ALIGN(m, IGMP_LEADINGSPACE);
 		}
@@ -3062,11 +3058,11 @@
 				CTR1(KTR_IGMPV3,
 				    "%s: use previous packet", __func__);
 			} else {
-				m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+				m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 				if (m)
 					m->m_data += IGMP_LEADINGSPACE;
 				if (m == NULL) {
-					m = m_gethdr(M_DONTWAIT, MT_DATA);
+					m = m_gethdr(M_NOWAIT, MT_DATA);
 					if (m)
 						MH_ALIGN(m, IGMP_LEADINGSPACE);
 				}
@@ -3328,6 +3324,15 @@
 	KASSERT(igi->igi_version == IGMP_VERSION_3,
 	    ("%s: called when version %d", __func__, igi->igi_version));
 
+	/*
+	 * Check that there are some packets queued. If so, send them first.
+	 * For large number of groups the reply to general query can take
+	 * many packets, we should finish sending them before starting of
+	 * queuing the new reply.
+	 */
+	if (igi->igi_gq.ifq_head != NULL)
+		goto send;
+
 	ifp = igi->igi_ifp;
 
 	IF_ADDR_RLOCK(ifp);
@@ -3363,6 +3368,7 @@
 	}
 	IF_ADDR_RUNLOCK(ifp);
 
+send:
 	loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
 	igmp_dispatch_queue(&igi->igi_gq, IGMP_MAX_RESPONSE_BURST, loop);
 
@@ -3403,7 +3409,7 @@
 	 * indexes to guard against interface detach, they are
 	 * unique to each VIMAGE and must be retrieved.
 	 */
-	CURVNET_SET((struct vnet *)(m->m_pkthdr.header));
+	CURVNET_SET((struct vnet *)(m->m_pkthdr.PH_loc.ptr));
 	ifindex = igmp_restore_context(m);
 
 	/*
@@ -3450,7 +3456,7 @@
 	}
 
 	igmp_scrub_context(m0);
-	m->m_flags &= ~(M_PROTOFLAGS);
+	m_clrprotoflags(m);
 	m0->m_pkthdr.rcvif = V_loif;
 #ifdef MAC
 	mac_netinet_igmp_send(ifp, m0);
@@ -3498,7 +3504,7 @@
 	if (m->m_flags & M_IGMPV3_HDR) {
 		igmpreclen -= hdrlen;
 	} else {
-		M_PREPEND(m, hdrlen, M_DONTWAIT);
+		M_PREPEND(m, hdrlen, M_NOWAIT);
 		if (m == NULL)
 			return (NULL);
 		m->m_flags |= M_IGMPV3_HDR;
@@ -3523,8 +3529,8 @@
 
 	ip = mtod(m, struct ip *);
 	ip->ip_tos = IPTOS_PREC_INTERNETCONTROL;
-	ip->ip_len = hdrlen + igmpreclen;
-	ip->ip_off = IP_DF;
+	ip->ip_len = htons(hdrlen + igmpreclen);
+	ip->ip_off = htons(IP_DF);
 	ip->ip_p = IPPROTO_IGMP;
 	ip->ip_sum = 0;
 

Modified: trunk/sys/netinet/igmp.h
===================================================================
--- trunk/sys/netinet/igmp.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/igmp.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)igmp.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet/igmp.h 193938 2009-06-10 18:12:15Z imp $
+ * $FreeBSD: stable/10/sys/netinet/igmp.h 193938 2009-06-10 18:12:15Z imp $
  */
 
 #ifndef _NETINET_IGMP_H_

Modified: trunk/sys/netinet/igmp_var.h
===================================================================
--- trunk/sys/netinet/igmp_var.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/igmp_var.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  * SUCH DAMAGE.
  *
  *	from: @(#)igmp_var.h	8.1 (Berkeley) 7/19/93
- * $FreeBSD: stable/9/sys/netinet/igmp_var.h 190965 2009-04-12 13:41:13Z rwatson $
+ * $FreeBSD: stable/10/sys/netinet/igmp_var.h 254925 2013-08-26 18:16:05Z jhb $
  */
 
 #ifndef _NETINET_IGMP_VAR_H_
@@ -219,8 +219,4 @@
 #define IGMPCTL_STATS		1	/* statistics (read-only) */
 #define IGMPCTL_MAXID		2
 
-#define IGMPCTL_NAMES { \
-	{ 0, 0 }, \
-	{ "stats", CTLTYPE_STRUCT } \
-}
 #endif

Modified: trunk/sys/netinet/in.c
===================================================================
--- trunk/sys/netinet/in.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/in.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/in.c 240313 2012-09-10 12:25:57Z glebius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/in.c 309340 2016-11-30 22:20:23Z vangyzen $");
 
 #include "opt_mpath.h"
 
@@ -57,10 +57,12 @@
 #include <net/route.h>
 #include <net/vnet.h>
 
+#include <netinet/if_ether.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
+#include <netinet/ip_carp.h>
 #include <netinet/igmp_var.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
@@ -70,25 +72,20 @@
 static int in_lifaddr_ioctl(struct socket *, u_long, caddr_t,
 	struct ifnet *, struct thread *);
 
-static int	in_addprefix(struct in_ifaddr *, int);
-static int	in_scrubprefix(struct in_ifaddr *, u_int);
 static void	in_socktrim(struct sockaddr_in *);
-static int	in_ifinit(struct ifnet *,
-	    struct in_ifaddr *, struct sockaddr_in *, int);
+static int	in_ifinit(struct ifnet *, struct in_ifaddr *,
+		    struct sockaddr_in *, int, int);
 static void	in_purgemaddrs(struct ifnet *);
 
-static VNET_DEFINE(int, sameprefixcarponly);
-#define	V_sameprefixcarponly		VNET(sameprefixcarponly)
-SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, same_prefix_carp_only, CTLFLAG_RW,
-	&VNET_NAME(sameprefixcarponly), 0,
+static VNET_DEFINE(int, nosameprefix);
+#define	V_nosameprefix			VNET(nosameprefix)
+SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, no_same_prefix, CTLFLAG_RW,
+	&VNET_NAME(nosameprefix), 0,
 	"Refuse to create same prefixes on different interfaces");
 
 VNET_DECLARE(struct inpcbinfo, ripcbinfo);
 #define	V_ripcbinfo			VNET(ripcbinfo)
 
-VNET_DECLARE(struct arpstat, arpstat);  /* ARP statistics, see if_arp.h */
-#define	V_arpstat		VNET(arpstat)
-
 /*
  * Return 1 if an internet address is for a ``local'' host
  * (one to which we have a connection).
@@ -221,10 +218,17 @@
 	struct in_addr dst;
 	struct in_ifinfo *ii;
 	struct in_aliasreq *ifra = (struct in_aliasreq *)data;
-	struct sockaddr_in oldaddr;
 	int error, hostIsNew, iaIsNew, maskIsNew;
 	int iaIsFirst;
+	u_long ocmd = cmd;
 
+	/*
+	 * Pre-10.x compat: OSIOCAIFADDR passes a shorter
+	 * struct in_aliasreq, without ifra_vhid.
+	 */
+	if (cmd == OSIOCAIFADDR)
+		cmd = SIOCAIFADDR;
+
 	ia = NULL;
 	iaIsFirst = 0;
 	iaIsNew = 0;
@@ -235,17 +239,44 @@
 	 * in_lifaddr_ioctl() and ifp->if_ioctl().
 	 */
 	switch (cmd) {
-	case SIOCAIFADDR:
-	case SIOCDIFADDR:
 	case SIOCGIFADDR:
 	case SIOCGIFBRDADDR:
 	case SIOCGIFDSTADDR:
 	case SIOCGIFNETMASK:
+	case SIOCDIFADDR:
+		break;
+	case SIOCAIFADDR:
+		/*
+		 * ifra_addr must be present and be of INET family.
+		 * ifra_broadaddr and ifra_mask are optional.
+		 */
+		if (ifra->ifra_addr.sin_len != sizeof(struct sockaddr_in) ||
+		    ifra->ifra_addr.sin_family != AF_INET)
+			return (EINVAL);
+		if (ifra->ifra_broadaddr.sin_len != 0 &&
+		    (ifra->ifra_broadaddr.sin_len !=
+		    sizeof(struct sockaddr_in) ||
+		    ifra->ifra_broadaddr.sin_family != AF_INET))
+			return (EINVAL);
+#if 0
+		/*
+		 * ifconfig(8) in pre-10.x doesn't set sin_family for the
+		 * mask. The code is disabled for the 10.x timeline, to
+		 * make SIOCAIFADDR compatible with 9.x ifconfig(8).
+		 * The code should be enabled in 11.x
+		 */
+		if (ifra->ifra_mask.sin_len != 0 &&
+		    (ifra->ifra_mask.sin_len != sizeof(struct sockaddr_in) ||
+		    ifra->ifra_mask.sin_family != AF_INET))
+			return (EINVAL);
+#endif
+		break;
 	case SIOCSIFADDR:
 	case SIOCSIFBRDADDR:
 	case SIOCSIFDSTADDR:
 	case SIOCSIFNETMASK:
-		break;
+		/* We no longer support that old commands. */
+		return (EINVAL);
 
 	case SIOCALIFADDR:
 		if (td != NULL) {
@@ -286,10 +317,6 @@
 	 */
 	switch (cmd) {
 	case SIOCAIFADDR:
-	case SIOCSIFADDR:
-	case SIOCSIFBRDADDR:
-	case SIOCSIFNETMASK:
-	case SIOCSIFDSTADDR:
 		if (td != NULL) {
 			error = priv_check(td, PRIV_NET_ADDIFADDR);
 			if (error)
@@ -377,10 +404,6 @@
 			error = EADDRNOTAVAIL;
 			goto out;
 		}
-		/* FALLTHROUGH */
-	case SIOCSIFADDR:
-	case SIOCSIFNETMASK:
-	case SIOCSIFDSTADDR:
 		if (ia == NULL) {
 			ia = (struct in_ifaddr *)
 				malloc(sizeof *ia, M_IFADDR, M_NOWAIT |
@@ -395,6 +418,8 @@
 			ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr;
 			ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr;
 			ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask;
+			callout_init_mtx(&ia->ia_garp_timer, &ifa->ifa_mtx,
+			    CALLOUT_RETURNUNLOCKED);
 
 			ia->ia_sockmask.sin_len = 8;
 			ia->ia_sockmask.sin_family = AF_INET;
@@ -416,7 +441,6 @@
 		}
 		break;
 
-	case SIOCSIFBRDADDR:
 	case SIOCGIFADDR:
 	case SIOCGIFNETMASK:
 	case SIOCGIFDSTADDR:
@@ -457,72 +481,13 @@
 		*((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_sockmask;
 		goto out;
 
-	case SIOCSIFDSTADDR:
-		if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
-			error = EINVAL;
-			goto out;
-		}
-		oldaddr = ia->ia_dstaddr;
-		ia->ia_dstaddr = *(struct sockaddr_in *)&ifr->ifr_dstaddr;
-		if (ifp->if_ioctl != NULL) {
-			error = (*ifp->if_ioctl)(ifp, SIOCSIFDSTADDR,
-			    (caddr_t)ia);
-			if (error) {
-				ia->ia_dstaddr = oldaddr;
-				goto out;
-			}
-		}
-		if (ia->ia_flags & IFA_ROUTE) {
-			ia->ia_ifa.ifa_dstaddr = (struct sockaddr *)&oldaddr;
-			rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
-			ia->ia_ifa.ifa_dstaddr =
-					(struct sockaddr *)&ia->ia_dstaddr;
-			rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_HOST|RTF_UP);
-		}
-		goto out;
-
-	case SIOCSIFBRDADDR:
-		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
-			error = EINVAL;
-			goto out;
-		}
-		ia->ia_broadaddr = *(struct sockaddr_in *)&ifr->ifr_broadaddr;
-		goto out;
-
-	case SIOCSIFADDR:
-		error = in_ifinit(ifp, ia,
-		    (struct sockaddr_in *) &ifr->ifr_addr, 1);
-		if (error != 0 && iaIsNew)
-			break;
-		if (error == 0) {
-			ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
-			if (iaIsFirst &&
-			    (ifp->if_flags & IFF_MULTICAST) != 0) {
-				error = in_joingroup(ifp, &allhosts_addr,
-				    NULL, &ii->ii_allhosts);
-			}
-			EVENTHANDLER_INVOKE(ifaddr_event, ifp);
-		}
-		error = 0;
-		goto out;
-
-	case SIOCSIFNETMASK:
-		ia->ia_sockmask.sin_addr = ifra->ifra_addr.sin_addr;
-		ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr);
-		goto out;
-
 	case SIOCAIFADDR:
 		maskIsNew = 0;
 		hostIsNew = 1;
 		error = 0;
-		if (ia->ia_addr.sin_family == AF_INET) {
-			if (ifra->ifra_addr.sin_len == 0) {
-				ifra->ifra_addr = ia->ia_addr;
-				hostIsNew = 0;
-			} else if (ifra->ifra_addr.sin_addr.s_addr ==
-					       ia->ia_addr.sin_addr.s_addr)
-				hostIsNew = 0;
-		}
+		if (ifra->ifra_addr.sin_addr.s_addr ==
+			    ia->ia_addr.sin_addr.s_addr)
+			hostIsNew = 0;
 		if (ifra->ifra_mask.sin_len) {
 			/*
 			 * QL: XXX
@@ -546,14 +511,14 @@
 			ia->ia_dstaddr = ifra->ifra_dstaddr;
 			maskIsNew  = 1; /* We lie; but the effect's the same */
 		}
-		if (ifra->ifra_addr.sin_family == AF_INET &&
-		    (hostIsNew || maskIsNew))
-			error = in_ifinit(ifp, ia, &ifra->ifra_addr, 0);
+		if (hostIsNew || maskIsNew)
+			error = in_ifinit(ifp, ia, &ifra->ifra_addr, maskIsNew,
+			    (ocmd == cmd ? ifra->ifra_vhid : 0));
 		if (error != 0 && iaIsNew)
 			break;
 
 		if ((ifp->if_flags & IFF_BROADCAST) &&
-		    (ifra->ifra_broadaddr.sin_family == AF_INET))
+		    ifra->ifra_broadaddr.sin_len)
 			ia->ia_broadaddr = ifra->ifra_broadaddr;
 		if (error == 0) {
 			ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
@@ -587,6 +552,9 @@
 		panic("in_control: unsupported ioctl");
 	}
 
+	if (ia->ia_ifa.ifa_carp)
+		(*carp_detach_p)(&ia->ia_ifa);
+
 	IF_ADDR_WLOCK(ifp);
 	/* Re-check that ia is still part of the list. */
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
@@ -605,35 +573,34 @@
 	}
 	TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
 	IF_ADDR_WUNLOCK(ifp);
-	ifa_free(&ia->ia_ifa);				/* if_addrhead */
+	ifa_free(&ia->ia_ifa);		      /* if_addrhead */
 
 	IN_IFADDR_WLOCK();
 	TAILQ_REMOVE(&V_in_ifaddrhead, ia, ia_link);
-	if (ia->ia_addr.sin_family == AF_INET) {
-		struct in_ifaddr *if_ia;
 
-		LIST_REMOVE(ia, ia_hash);
-		IN_IFADDR_WUNLOCK();
-		/*
-		 * If this is the last IPv4 address configured on this
-		 * interface, leave the all-hosts group.
-		 * No state-change report need be transmitted.
-		 */
-		if_ia = NULL;
-		IFP_TO_IA(ifp, if_ia);
-		if (if_ia == NULL) {
-			ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
-			IN_MULTI_LOCK();
-			if (ii->ii_allhosts) {
-				(void)in_leavegroup_locked(ii->ii_allhosts,
-				    NULL);
-				ii->ii_allhosts = NULL;
-			}
-			IN_MULTI_UNLOCK();
-		} else
-			ifa_free(&if_ia->ia_ifa);
+	LIST_REMOVE(ia, ia_hash);
+	IN_IFADDR_WUNLOCK();
+	/*
+	 * If this is the last IPv4 address configured on this
+	 * interface, leave the all-hosts group.
+	 * No state-change report need be transmitted.
+	 */
+	IFP_TO_IA(ifp, iap);
+	if (iap == NULL) {
+		ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
+		IN_MULTI_LOCK();
+		if (ii->ii_allhosts) {
+			(void)in_leavegroup_locked(ii->ii_allhosts, NULL);
+			ii->ii_allhosts = NULL;
+		}
+		IN_MULTI_UNLOCK();
 	} else
-		IN_IFADDR_WUNLOCK();
+		ifa_free(&iap->ia_ifa);
+
+	IFA_LOCK(&ia->ia_ifa);
+	if (callout_stop(&ia->ia_garp_timer))
+		ifa_free(&ia->ia_ifa);
+	IFA_UNLOCK(&ia->ia_ifa);
 	ifa_free(&ia->ia_ifa);				/* in_ifaddrhead */
 out:
 	if (ia != NULL)
@@ -760,7 +727,8 @@
 				continue;
 			if (match.s_addr == 0)
 				break;
-			candidate.s_addr = ((struct sockaddr_in *)&ifa->ifa_addr)->sin_addr.s_addr;
+			sin = (struct sockaddr_in *)&ifa->ifa_addr;
+			candidate.s_addr = sin->sin_addr.s_addr;
 			candidate.s_addr &= mask.s_addr;
 			if (candidate.s_addr == match.s_addr)
 				break;
@@ -832,60 +800,43 @@
  */
 static int
 in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
-    int scrub)
+    int masksupplied, int vhid)
 {
 	register u_long i = ntohl(sin->sin_addr.s_addr);
-	struct sockaddr_in oldaddr;
-	int s = splimp(), flags = RTF_UP, error = 0;
+	int flags, error = 0;
 
-	oldaddr = ia->ia_addr;
-	if (oldaddr.sin_family == AF_INET)
+	IN_IFADDR_WLOCK();
+	if (ia->ia_addr.sin_family == AF_INET)
 		LIST_REMOVE(ia, ia_hash);
 	ia->ia_addr = *sin;
-	if (ia->ia_addr.sin_family == AF_INET) {
-		IN_IFADDR_WLOCK();
-		LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr),
-		    ia, ia_hash);
-		IN_IFADDR_WUNLOCK();
+	LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr),
+	    ia, ia_hash);
+	IN_IFADDR_WUNLOCK();
+
+	if (vhid > 0) {
+		if (carp_attach_p != NULL)
+			error = (*carp_attach_p)(&ia->ia_ifa, vhid);
+		else
+			error = EPROTONOSUPPORT;
 	}
+	if (error)
+		return (error);
+
 	/*
 	 * Give the interface a chance to initialize
 	 * if this is its first address,
 	 * and to validate the address if necessary.
 	 */
-	if (ifp->if_ioctl != NULL) {
-		error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia);
-		if (error) {
-			splx(s);
+	if (ifp->if_ioctl != NULL &&
+	    (error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia)) != 0)
 			/* LIST_REMOVE(ia, ia_hash) is done in in_control */
-			ia->ia_addr = oldaddr;
-			IN_IFADDR_WLOCK();
-			if (ia->ia_addr.sin_family == AF_INET)
-				LIST_INSERT_HEAD(INADDR_HASH(
-				    ia->ia_addr.sin_addr.s_addr), ia, ia_hash);
-			else 
-				/* 
-				 * If oldaddr family is not AF_INET (e.g. 
-				 * interface has been just created) in_control 
-				 * does not call LIST_REMOVE, and we end up 
-				 * with bogus ia entries in hash
-				 */
-				LIST_REMOVE(ia, ia_hash);
-			IN_IFADDR_WUNLOCK();
 			return (error);
-		}
-	}
-	splx(s);
-	if (scrub) {
-		ia->ia_ifa.ifa_addr = (struct sockaddr *)&oldaddr;
-		in_ifscrub(ifp, ia, LLE_STATIC);
-		ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
-	}
+
 	/*
 	 * Be compatible with network classes, if netmask isn't supplied,
 	 * guess it based on classes.
 	 */
-	if (ia->ia_subnetmask == 0) {
+	if (!masksupplied) {
 		if (IN_CLASSA(i))
 			ia->ia_subnetmask = IN_CLASSA_NET;
 		else if (IN_CLASSB(i))
@@ -896,14 +847,11 @@
 	}
 	ia->ia_subnet = i & ia->ia_subnetmask;
 	in_socktrim(&ia->ia_sockmask);
+
 	/*
-	 * XXX: carp(4) does not have interface route
-	 */
-	if (ifp->if_type == IFT_CARP)
-		return (0);
-	/*
 	 * Add route for the network.
 	 */
+	flags = RTF_UP;
 	ia->ia_ifa.ifa_metric = ifp->if_metric;
 	if (ifp->if_flags & IFF_BROADCAST) {
 		if (ia->ia_subnetmask == IN_RFC3021_MASK)
@@ -919,22 +867,20 @@
 			return (0);
 		flags |= RTF_HOST;
 	}
-	if ((error = in_addprefix(ia, flags)) != 0)
+	if (!vhid && (error = in_addprefix(ia, flags)) != 0)
 		return (error);
 
 	if (ia->ia_addr.sin_addr.s_addr == INADDR_ANY)
 		return (0);
 
-	if (ifp->if_flags & IFF_POINTOPOINT) {
-		if (ia->ia_dstaddr.sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr)
+	if (ifp->if_flags & IFF_POINTOPOINT &&
+	    ia->ia_dstaddr.sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr)
 			return (0);
-	}
 
-
 	/*
 	 * add a loopback route to self
 	 */
-	if (V_useloopback && !(ifp->if_flags & IFF_LOOPBACK)) {
+	if (V_useloopback && !vhid && !(ifp->if_flags & IFF_LOOPBACK)) {
 		struct route ia_ro;
 
 		bzero(&ia_ro, sizeof(ia_ro));
@@ -962,48 +908,9 @@
 	    ? RTF_HOST : 0)
 
 /*
- * Generate a routing message when inserting or deleting
- * an interface address alias.
- */
-static void in_addralias_rtmsg(int cmd, struct in_addr *prefix,
-    struct in_ifaddr *target)
-{
-	struct route pfx_ro;
-	struct sockaddr_in *pfx_addr;
-	struct rtentry msg_rt;
-
-	/* QL: XXX
-	 * This is a bit questionable because there is no
-	 * additional route entry added/deleted for an address
-	 * alias. Therefore this route report is inaccurate.
-	 */
-	bzero(&pfx_ro, sizeof(pfx_ro));
-	pfx_addr = (struct sockaddr_in *)(&pfx_ro.ro_dst);
-	pfx_addr->sin_len = sizeof(*pfx_addr);
-	pfx_addr->sin_family = AF_INET;
-	pfx_addr->sin_addr = *prefix;
-	rtalloc_ign_fib(&pfx_ro, 0, 0);
-	if (pfx_ro.ro_rt != NULL) {
-		msg_rt = *pfx_ro.ro_rt;
-
-		/* QL: XXX
-		 * Point the gateway to the new interface
-		 * address as if a new prefix route entry has
-		 * been added through the new address alias.
-		 * All other parts of the rtentry is accurate,
-		 * e.g., rt_key, rt_mask, rt_ifp etc.
-		 */
-		msg_rt.rt_gateway = (struct sockaddr *)&target->ia_addr;
-		rt_newaddrmsg(cmd, (struct ifaddr *)target, 0, &msg_rt);
-		RTFREE(pfx_ro.ro_rt);
-	}
-	return;
-}
-
-/*
  * Check if we have a route for the given prefix already or add one accordingly.
  */
-static int
+int
 in_addprefix(struct in_ifaddr *target, int flags)
 {
 	struct in_ifaddr *ia;
@@ -1020,6 +927,7 @@
 	}
 
 	IN_IFADDR_RLOCK();
+	/* Look for an existing address with the same prefix, mask, and fib */
 	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
 		if (rtinitflags(ia)) {
 			p = ia->ia_dstaddr.sin_addr;
@@ -1035,6 +943,8 @@
 			    mask.s_addr != m.s_addr)
 				continue;
 		}
+		if (target->ia_ifp->if_fib != ia->ia_ifp->if_fib)
+			continue;
 
 		/*
 		 * If we got a matching prefix route inserted by other
@@ -1049,13 +959,15 @@
 			} else
 				break;
 #endif
-			if (V_sameprefixcarponly &&
-			    target->ia_ifp->if_type != IFT_CARP &&
-			    ia->ia_ifp->if_type != IFT_CARP) {
+			if (V_nosameprefix) {
 				IN_IFADDR_RUNLOCK();
 				return (EEXIST);
 			} else {
-				in_addralias_rtmsg(RTM_ADD, &prefix, target);
+				int fibnum;
+
+				fibnum = rt_add_addr_allfibs ? RT_ALL_FIBS :
+					target->ia_ifp->if_fib;
+				rt_addrmsg(RTM_ADD, &target->ia_ifa, fibnum);
 				IN_IFADDR_RUNLOCK();
 				return (0);
 			}
@@ -1072,18 +984,16 @@
 	return (error);
 }
 
-extern void arp_ifscrub(struct ifnet *ifp, uint32_t addr);
-
 /*
  * If there is no other address in the system that can serve a route to the
  * same prefix, remove the route.  Hand over the route to the new address
  * otherwise.
  */
-static int
+int
 in_scrubprefix(struct in_ifaddr *target, u_int flags)
 {
 	struct in_ifaddr *ia;
-	struct in_addr prefix, mask, p;
+	struct in_addr prefix, mask, p, m;
 	int error = 0;
 	struct sockaddr_in prefix0, mask0;
 
@@ -1102,10 +1012,12 @@
 	    (target->ia_flags & IFA_RTSELF)) {
 		struct route ia_ro;
 		int freeit = 0;
+		int fib;
 
 		bzero(&ia_ro, sizeof(ia_ro));
 		*((struct sockaddr_in *)(&ia_ro.ro_dst)) = target->ia_addr;
-		rtalloc_ign_fib(&ia_ro, 0, 0);
+		fib = target->ia_ifa.ifa_ifp->if_fib;
+		rtalloc_ign_fib(&ia_ro, 0, fib);
 		if ((ia_ro.ro_rt != NULL) && (ia_ro.ro_rt->rt_ifp != NULL) &&
 		    (ia_ro.ro_rt->rt_ifp == V_loif)) {
 			RT_LOCK(ia_ro.ro_rt);
@@ -1129,9 +1041,10 @@
 			arp_ifscrub(target->ia_ifp, IA_SIN(target)->sin_addr.s_addr);
 	}
 
-	if (rtinitflags(target))
+	if (rtinitflags(target)) {
 		prefix = target->ia_dstaddr.sin_addr;
-	else {
+		mask.s_addr = 0;
+	} else {
 		prefix = target->ia_addr.sin_addr;
 		mask = target->ia_sockmask.sin_addr;
 		prefix.s_addr &= mask.s_addr;
@@ -1138,21 +1051,32 @@
 	}
 
 	if ((target->ia_flags & IFA_ROUTE) == 0) {
-		in_addralias_rtmsg(RTM_DELETE, &prefix, target);
+		int fibnum;
+		
+		fibnum = rt_add_addr_allfibs ? RT_ALL_FIBS :
+			target->ia_ifp->if_fib;
+		rt_addrmsg(RTM_DELETE, &target->ia_ifa, fibnum);
 		return (0);
 	}
 
 	IN_IFADDR_RLOCK();
 	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
-		if (rtinitflags(ia))
+		if (rtinitflags(ia)) {
 			p = ia->ia_dstaddr.sin_addr;
-		else {
+
+			if (prefix.s_addr != p.s_addr)
+				continue;
+		} else {
 			p = ia->ia_addr.sin_addr;
-			p.s_addr &= ia->ia_sockmask.sin_addr.s_addr;
+			m = ia->ia_sockmask.sin_addr;
+			p.s_addr &= m.s_addr;
+
+			if (prefix.s_addr != p.s_addr ||
+			    mask.s_addr != m.s_addr)
+				continue;
 		}
 
-		if ((prefix.s_addr != p.s_addr) ||
-		    !(ia->ia_ifp->if_flags & IFF_UP))
+		if ((ia->ia_ifp->if_flags & IFF_UP) == 0)
 			continue;
 
 		/*
@@ -1159,13 +1083,8 @@
 		 * If we got a matching prefix address, move IFA_ROUTE and
 		 * the route itself to it.  Make sure that routing daemons
 		 * get a heads-up.
-		 *
-		 * XXX: a special case for carp(4) interface - this should
-		 *      be more generally specified as an interface that
-		 *      doesn't support such action.
 		 */
-		if ((ia->ia_flags & IFA_ROUTE) == 0
-		    && (ia->ia_ifp->if_type != IFT_CARP)) {
+		if ((ia->ia_flags & IFA_ROUTE) == 0) {
 			ifa_ref(&ia->ia_ifa);
 			IN_IFADDR_RUNLOCK();
 			error = rtinit(&(target->ia_ifa), (int)RTM_DELETE,
@@ -1264,6 +1183,7 @@
 
 	in_pcbpurgeif0(&V_ripcbinfo, ifp);
 	in_pcbpurgeif0(&V_udbinfo, ifp);
+	in_pcbpurgeif0(&V_ulitecbinfo, ifp);
 	in_purgemaddrs(ifp);
 }
 
@@ -1313,9 +1233,6 @@
 	IN_MULTI_UNLOCK();
 }
 
-#include <net/if_dl.h>
-#include <netinet/if_ether.h>
-
 struct in_llentry {
 	struct llentry		base;
 	struct sockaddr_in	l3_addr4;
@@ -1340,7 +1257,7 @@
 {
 	struct in_llentry *lle;
 
-	lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_DONTWAIT | M_ZERO);
+	lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_NOWAIT | M_ZERO);
 	if (lle == NULL)		/* NB: caller generates msg */
 		return NULL;
 
@@ -1353,8 +1270,7 @@
 	lle->base.lle_refcnt = 1;
 	lle->base.lle_free = in_lltable_free;
 	LLE_LOCK_INIT(&lle->base);
-	callout_init_rw(&lle->base.la_timer, &lle->base.lle_lock,
-	    CALLOUT_RETURNUNLOCKED);
+	callout_init(&lle->base.la_timer, 1);
 
 	return (&lle->base);
 }
@@ -1402,8 +1318,9 @@
 	KASSERT(l3addr->sa_family == AF_INET,
 	    ("sin_family %d", l3addr->sa_family));
 
-	/* XXX rtalloc1 should take a const param */
-	rt = rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0);
+	/* XXX rtalloc1_fib should take a const param */
+	rt = rtalloc1_fib(__DECONST(struct sockaddr *, l3addr), 0, 0,
+	    ifp->if_fib);
 
 	if (rt == NULL)
 		return (EINVAL);
@@ -1500,6 +1417,7 @@
 #endif
 		if (!(flags & LLE_CREATE))
 			return (NULL);
+		IF_AFDATA_WLOCK_ASSERT(ifp);
 		/*
 		 * A route that covers the given address must have
 		 * been installed 1st because we are doing a resolution,
@@ -1528,7 +1446,7 @@
 		if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) {
 			LLE_WLOCK(lle);
 			lle->la_flags |= LLE_DELETED;
-			EVENTHANDLER_INVOKE(arp_update_event, lle);
+			EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED);
 #ifdef DIAGNOSTIC
 			log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
 #endif
@@ -1560,7 +1478,7 @@
 	/* XXX stack use */
 	struct {
 		struct rt_msghdr	rtm;
-		struct sockaddr_inarp	sin;
+		struct sockaddr_in	sin;
 		struct sockaddr_dl	sdl;
 	} arpc;
 	int error, i;
@@ -1581,7 +1499,7 @@
 			/*
 			 * produce a msg made of:
 			 *  struct rt_msghdr;
-			 *  struct sockaddr_inarp; (IPv4)
+			 *  struct sockaddr_in; (IPv4)
 			 *  struct sockaddr_dl;
 			 */
 			bzero(&arpc, sizeof(arpc));
@@ -1595,12 +1513,8 @@
 			arpc.sin.sin_addr.s_addr = SIN(lle)->sin_addr.s_addr;
 
 			/* publish */
-			if (lle->la_flags & LLE_PUB) {
+			if (lle->la_flags & LLE_PUB)
 				arpc.rtm.rtm_flags |= RTF_ANNOUNCE;
-				/* proxy only */
-				if (lle->la_flags & LLE_PROXY)
-					arpc.sin.sin_other = SIN_PROXY;
-			}
 
 			sdl = &arpc.sdl;
 			sdl->sdl_family = AF_LINK;

Modified: trunk/sys/netinet/in.h
===================================================================
--- trunk/sys/netinet/in.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/in.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)in.h	8.3 (Berkeley) 1/3/94
- * $FreeBSD: stable/9/sys/netinet/in.h 239430 2012-08-20 14:42:29Z emaste $
+ * $FreeBSD: stable/10/sys/netinet/in.h 273841 2014-10-29 23:10:48Z jilles $
  */
 
 #ifndef _NETINET_IN_H_
@@ -101,7 +101,7 @@
 	char	sin_zero[8];
 };
 
-#if !defined(_KERNEL) && __BSD_VISIBLE
+#if !defined(_KERNEL) && __POSIX_VISIBLE >= 200112
 
 #ifndef _BYTEORDER_PROTOTYPED
 #define	_BYTEORDER_PROTOTYPED
@@ -121,7 +121,7 @@
 #define	ntohs(x)	__ntohs(x)
 #endif
 
-#endif /* !_KERNEL && __BSD_VISIBLE */
+#endif /* !_KERNEL && __POSIX_VISIBLE >= 200112 */
 
 #if __POSIX_VISIBLE >= 200112
 #define	IPPROTO_IPV6		41		/* IP6 header */
@@ -238,6 +238,9 @@
 #define	IPPROTO_IPCOMP		108		/* payload compression (IPComp) */
 #define	IPPROTO_SCTP		132		/* SCTP */
 #define	IPPROTO_MH		135		/* IPv6 Mobility Header */
+#define	IPPROTO_UDPLITE		136		/* UDP-Lite */
+#define	IPPROTO_HIP		139		/* IP6 Host Identity Protocol */
+#define	IPPROTO_SHIM6		140		/* IP6 Shim6 Protocol */
 /* 101-254: Partly Unassigned */
 #define	IPPROTO_PIM		103		/* Protocol Independent Mcast */
 #define	IPPROTO_CARP		112		/* CARP */
@@ -244,6 +247,8 @@
 #define	IPPROTO_PGM		113		/* PGM */
 #define	IPPROTO_MPLS		137		/* MPLS-in-IP */
 #define	IPPROTO_PFSYNC		240		/* PFSYNC */
+#define	IPPROTO_RESERVED_253	253		/* Reserved */
+#define	IPPROTO_RESERVED_254	254		/* Reserved */
 /* 255: Reserved */
 /* BSD Private, local use, namespace incursion, no longer used */
 #define	IPPROTO_OLD_DIVERT	254		/* OLD divert pseudo-proto */
@@ -700,24 +705,6 @@
 #define	IPCTL_GIF_TTL		16	/* default TTL for gif encap packet */
 #define	IPCTL_MAXID		17
 
-#define	IPCTL_NAMES { \
-	{ 0, 0 }, \
-	{ "forwarding", CTLTYPE_INT }, \
-	{ "redirect", CTLTYPE_INT }, \
-	{ "ttl", CTLTYPE_INT }, \
-	{ "mtu", CTLTYPE_INT }, \
-	{ "rtexpire", CTLTYPE_INT }, \
-	{ "rtminexpire", CTLTYPE_INT }, \
-	{ "rtmaxcache", CTLTYPE_INT }, \
-	{ "sourceroute", CTLTYPE_INT }, \
-	{ "directed-broadcast", CTLTYPE_INT }, \
-	{ "intr-queue-maxlen", CTLTYPE_INT }, \
-	{ "intr-queue-drops", CTLTYPE_INT }, \
-	{ "stats", CTLTYPE_STRUCT }, \
-	{ "accept_sourceroute", CTLTYPE_INT }, \
-	{ "fastforwarding", CTLTYPE_INT }, \
-}
-
 #endif /* __BSD_VISIBLE */
 
 #ifdef _KERNEL
@@ -742,33 +729,6 @@
 #define	satosin(sa)	((struct sockaddr_in *)(sa))
 #define	sintosa(sin)	((struct sockaddr *)(sin))
 #define	ifatoia(ifa)	((struct in_ifaddr *)(ifa))
-
-/*
- * Historically, BSD keeps ip_len and ip_off in host format
- * when doing layer 3 processing, and this often requires
- * to translate the format back and forth.
- * To make the process explicit, we define a couple of macros
- * that also take into account the fact that at some point
- * we may want to keep those fields always in net format.
- */
-
-#if (BYTE_ORDER == BIG_ENDIAN) || defined(HAVE_NET_IPLEN)
-#define SET_NET_IPLEN(p)	do {} while (0)
-#define SET_HOST_IPLEN(p)	do {} while (0)
-#else
-#define SET_NET_IPLEN(p)	do {		\
-	struct ip *h_ip = (p);			\
-	h_ip->ip_len = htons(h_ip->ip_len);	\
-	h_ip->ip_off = htons(h_ip->ip_off);	\
-	} while (0)
-
-#define SET_HOST_IPLEN(p)	do {		\
-	struct ip *h_ip = (p);			\
-	h_ip->ip_len = ntohs(h_ip->ip_len);	\
-	h_ip->ip_off = ntohs(h_ip->ip_off);	\
-	} while (0)
-#endif /* !HAVE_NET_IPLEN */
-
 #endif /* _KERNEL */
 
 /* INET6 stuff */

Modified: trunk/sys/netinet/in_cksum.c
===================================================================
--- trunk/sys/netinet/in_cksum.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/in_cksum.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/in_cksum.c 172467 2007-10-07 20:44:24Z silby $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/in_cksum.c 238941 2012-07-31 08:04:49Z luigi $");
 
 #include <sys/param.h>
 #include <sys/mbuf.h>
@@ -89,7 +89,7 @@
 		/*
 		 * Force to even boundary.
 		 */
-		if ((1 & (int) w) && (mlen > 0)) {
+		if ((1 & (uintptr_t) w) && (mlen > 0)) {
 			REDUCE;
 			sum <<= 8;
 			s_util.c[0] = *(u_char *)w;

Modified: trunk/sys/netinet/in_debug.c
===================================================================
--- trunk/sys/netinet/in_debug.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/in_debug.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/in_debug.c 226572 2011-10-20 15:58:05Z glebius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/in_debug.c 226401 2011-10-15 16:28:06Z glebius $");
 
 #include "opt_ddb.h"
 

Modified: trunk/sys/netinet/in_gif.c
===================================================================
--- trunk/sys/netinet/in_gif.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/in_gif.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,6 +1,4 @@
 /* $MidnightBSD$ */
-/*	$KAME: in_gif.c,v 1.54 2001/05/14 14:02:16 itojun Exp $	*/
-
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
@@ -28,16 +26,19 @@
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
+ *
+ *	$KAME: in_gif.c,v 1.54 2001/05/14 14:02:16 itojun Exp $
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/in_gif.c 223637 2011-06-28 11:57:25Z bz $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/in_gif.c 284072 2015-06-06 13:26:13Z ae $");
 
-#include "opt_mrouting.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
 #include <sys/systm.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
@@ -49,6 +50,7 @@
 #include <sys/malloc.h>
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
@@ -56,7 +58,6 @@
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
-#include <netinet/in_gif.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_encap.h>
 #include <netinet/ip_ecn.h>
@@ -65,153 +66,50 @@
 #include <netinet/ip6.h>
 #endif
 
-#ifdef MROUTING
-#include <netinet/ip_mroute.h>
-#endif /* MROUTING */
+#include <net/if_gif.h>
 
-#include <net/if_gif.h>	
-
 static int gif_validate4(const struct ip *, struct gif_softc *,
 	struct ifnet *);
+static int in_gif_input(struct mbuf **, int *, int);
 
+static void in_gif_input10(struct mbuf *, int);
 extern  struct domain inetdomain;
-struct protosw in_gif_protosw = {
+static struct protosw in_gif_protosw = {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		0/* IPPROTO_IPV[46] */,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
-	.pr_input =		in_gif_input,
+	.pr_input =		in_gif_input10,
 	.pr_output =		(pr_output_t*)rip_output,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_usrreqs =		&rip_usrreqs
 };
 
-VNET_DEFINE(int, ip_gif_ttl) = GIF_TTL;
+#define GIF_TTL		30
+static VNET_DEFINE(int, ip_gif_ttl) = GIF_TTL;
 #define	V_ip_gif_ttl		VNET(ip_gif_ttl)
 SYSCTL_VNET_INT(_net_inet_ip, IPCTL_GIF_TTL, gifttl, CTLFLAG_RW,
 	&VNET_NAME(ip_gif_ttl), 0, "");
 
 int
-in_gif_output(struct ifnet *ifp, int family, struct mbuf *m)
+in_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn)
 {
+	GIF_RLOCK_TRACKER;
 	struct gif_softc *sc = ifp->if_softc;
-	struct sockaddr_in *dst = (struct sockaddr_in *)&sc->gif_ro.ro_dst;
-	struct sockaddr_in *sin_src = (struct sockaddr_in *)sc->gif_psrc;
-	struct sockaddr_in *sin_dst = (struct sockaddr_in *)sc->gif_pdst;
-	struct ip iphdr;	/* capsule IP header, host byte ordered */
-	struct etherip_header eiphdr;
-	int error, len, proto;
-	u_int8_t tos;
+	struct ip *ip;
+	int len;
 
-	GIF_LOCK_ASSERT(sc);
-
-	if (sin_src == NULL || sin_dst == NULL ||
-	    sin_src->sin_family != AF_INET ||
-	    sin_dst->sin_family != AF_INET) {
-		m_freem(m);
-		return EAFNOSUPPORT;
-	}
-
-	switch (family) {
-#ifdef INET
-	case AF_INET:
-	    {
-		struct ip *ip;
-
-		proto = IPPROTO_IPV4;
-		if (m->m_len < sizeof(*ip)) {
-			m = m_pullup(m, sizeof(*ip));
-			if (!m)
-				return ENOBUFS;
-		}
-		ip = mtod(m, struct ip *);
-		tos = ip->ip_tos;
-		break;
-	    }
-#endif /* INET */
-#ifdef INET6
-	case AF_INET6:
-	    {
-		struct ip6_hdr *ip6;
-		proto = IPPROTO_IPV6;
-		if (m->m_len < sizeof(*ip6)) {
-			m = m_pullup(m, sizeof(*ip6));
-			if (!m)
-				return ENOBUFS;
-		}
-		ip6 = mtod(m, struct ip6_hdr *);
-		tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
-		break;
-	    }
-#endif /* INET6 */
-	case AF_LINK:
- 		proto = IPPROTO_ETHERIP;
-
-		/*
-		 * GIF_SEND_REVETHIP (disabled by default) intentionally
-		 * sends an EtherIP packet with revered version field in
-		 * the header.  This is a knob for backward compatibility
-		 * with FreeBSD 7.2R or prior.
-		 */
-		if ((sc->gif_options & GIF_SEND_REVETHIP)) {
- 			eiphdr.eip_ver = 0;
- 			eiphdr.eip_resvl = ETHERIP_VERSION;
- 			eiphdr.eip_resvh = 0;
-		} else {
- 			eiphdr.eip_ver = ETHERIP_VERSION;
- 			eiphdr.eip_resvl = 0;
- 			eiphdr.eip_resvh = 0;
-		}
- 		/* prepend Ethernet-in-IP header */
- 		M_PREPEND(m, sizeof(struct etherip_header), M_DONTWAIT);
- 		if (m && m->m_len < sizeof(struct etherip_header))
- 			m = m_pullup(m, sizeof(struct etherip_header));
- 		if (m == NULL)
- 			return ENOBUFS;
- 		bcopy(&eiphdr, mtod(m, struct etherip_header *),
-		    sizeof(struct etherip_header));
-		break;
-
-	default:
-#ifdef DEBUG
-		printf("in_gif_output: warning: unknown family %d passed\n",
-			family);
-#endif
-		m_freem(m);
-		return EAFNOSUPPORT;
-	}
-
-	bzero(&iphdr, sizeof(iphdr));
-	iphdr.ip_src = sin_src->sin_addr;
-	/* bidirectional configured tunnel mode */
-	if (sin_dst->sin_addr.s_addr != INADDR_ANY)
-		iphdr.ip_dst = sin_dst->sin_addr;
-	else {
-		m_freem(m);
-		return ENETUNREACH;
-	}
-	iphdr.ip_p = proto;
-	/* version will be set in ip_output() */
-	iphdr.ip_ttl = V_ip_gif_ttl;
-	iphdr.ip_len = m->m_pkthdr.len + sizeof(struct ip);
-	ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED : ECN_NOCARE,
-		       &iphdr.ip_tos, &tos);
-
 	/* prepend new IP header */
 	len = sizeof(struct ip);
 #ifndef __NO_STRICT_ALIGNMENT
-	if (family == AF_LINK)
+	if (proto == IPPROTO_ETHERIP)
 		len += ETHERIP_ALIGN;
 #endif
-	M_PREPEND(m, len, M_DONTWAIT);
-	if (m != NULL && m->m_len < len)
-		m = m_pullup(m, len);
-	if (m == NULL) {
-		printf("ENOBUFS in in_gif_output %d\n", __LINE__);
-		return ENOBUFS;
-	}
+	M_PREPEND(m, len, M_NOWAIT);
+	if (m == NULL)
+		return (ENOBUFS);
 #ifndef __NO_STRICT_ALIGNMENT
-	if (family == AF_LINK) {
+	if (proto == IPPROTO_ETHERIP) {
 		len = mtod(m, vm_offset_t) & 3;
 		KASSERT(len == 0 || len == ETHERIP_ALIGN,
 		    ("in_gif_output: unexpected misalignment"));
@@ -219,146 +117,60 @@
 		m->m_len -= ETHERIP_ALIGN;
 	}
 #endif
-	bcopy(&iphdr, mtod(m, struct ip *), sizeof(struct ip));
-
-	M_SETFIB(m, sc->gif_fibnum);
-
-	if (dst->sin_family != sin_dst->sin_family ||
-	    dst->sin_addr.s_addr != sin_dst->sin_addr.s_addr) {
-		/* cache route doesn't match */
-		bzero(dst, sizeof(*dst));
-		dst->sin_family = sin_dst->sin_family;
-		dst->sin_len = sizeof(struct sockaddr_in);
-		dst->sin_addr = sin_dst->sin_addr;
-		if (sc->gif_ro.ro_rt) {
-			RTFREE(sc->gif_ro.ro_rt);
-			sc->gif_ro.ro_rt = NULL;
-		}
-#if 0
-		GIF2IFP(sc)->if_mtu = GIF_MTU;
-#endif
+	ip = mtod(m, struct ip *);
+	GIF_RLOCK(sc);
+	if (sc->gif_family != AF_INET) {
+		m_freem(m);
+		GIF_RUNLOCK(sc);
+		return (ENETDOWN);
 	}
+	bcopy(sc->gif_iphdr, ip, sizeof(struct ip));
+	GIF_RUNLOCK(sc);
 
-	if (sc->gif_ro.ro_rt == NULL) {
-		in_rtalloc_ign(&sc->gif_ro, 0, sc->gif_fibnum);
-		if (sc->gif_ro.ro_rt == NULL) {
-			m_freem(m);
-			return ENETUNREACH;
-		}
+	ip->ip_p = proto;
+	/* version will be set in ip_output() */
+	ip->ip_ttl = V_ip_gif_ttl;
+	ip->ip_len = htons(m->m_pkthdr.len);
+	ip->ip_tos = ecn;
 
-		/* if it constitutes infinite encapsulation, punt. */
-		if (sc->gif_ro.ro_rt->rt_ifp == ifp) {
-			m_freem(m);
-			return ENETUNREACH;	/* XXX */
-		}
-#if 0
-		ifp->if_mtu = sc->gif_ro.ro_rt->rt_ifp->if_mtu
-			- sizeof(struct ip);
-#endif
-	}
+	return (ip_output(m, NULL, NULL, 0, NULL, NULL));
+}
 
-	m_addr_changed(m);
+static void
+in_gif_input10(struct mbuf *m, int off)
+{
+	int proto;
 
-	error = ip_output(m, NULL, &sc->gif_ro, 0, NULL, NULL);
-
-	if (!(GIF2IFP(sc)->if_flags & IFF_LINK0) &&
-	    sc->gif_ro.ro_rt != NULL) {
-		RTFREE(sc->gif_ro.ro_rt);
-		sc->gif_ro.ro_rt = NULL;
-	}
-
-	return (error);
+	proto = (mtod(m, struct ip *))->ip_p;
+	in_gif_input(&m, &off, proto);
 }
 
-void
-in_gif_input(struct mbuf *m, int off)
+static int
+in_gif_input(struct mbuf **mp, int *offp, int proto)
 {
-	struct ifnet *gifp = NULL;
+	struct mbuf *m = *mp;
 	struct gif_softc *sc;
+	struct ifnet *gifp;
 	struct ip *ip;
-	int af;
-	u_int8_t otos;
-	int proto;
+	uint8_t ecn;
 
-	ip = mtod(m, struct ip *);
-	proto = ip->ip_p;
-
-	sc = (struct gif_softc *)encap_getarg(m);
+	sc = encap_getarg(m);
 	if (sc == NULL) {
 		m_freem(m);
 		KMOD_IPSTAT_INC(ips_nogif);
-		return;
+		return (IPPROTO_DONE);
 	}
-
 	gifp = GIF2IFP(sc);
-	if (gifp == NULL || (gifp->if_flags & IFF_UP) == 0) {
+	if ((gifp->if_flags & IFF_UP) != 0) {
+		ip = mtod(m, struct ip *);
+		ecn = ip->ip_tos;
+		m_adj(m, *offp);
+		gif_input(m, gifp, proto, ecn);
+	} else {
 		m_freem(m);
 		KMOD_IPSTAT_INC(ips_nogif);
-		return;
 	}
-
-	otos = ip->ip_tos;
-	m_adj(m, off);
-
-	switch (proto) {
-#ifdef INET
-	case IPPROTO_IPV4:
-	    {
-		struct ip *ip;
-		af = AF_INET;
-		if (m->m_len < sizeof(*ip)) {
-			m = m_pullup(m, sizeof(*ip));
-			if (!m)
-				return;
-		}
-		ip = mtod(m, struct ip *);
-		if (ip_ecn_egress((gifp->if_flags & IFF_LINK1) ?
-				  ECN_ALLOWED : ECN_NOCARE,
-				  &otos, &ip->ip_tos) == 0) {
-			m_freem(m);
-			return;
-		}
-		break;
-	    }
-#endif
-#ifdef INET6
-	case IPPROTO_IPV6:
-	    {
-		struct ip6_hdr *ip6;
-		u_int8_t itos, oitos;
-
-		af = AF_INET6;
-		if (m->m_len < sizeof(*ip6)) {
-			m = m_pullup(m, sizeof(*ip6));
-			if (!m)
-				return;
-		}
-		ip6 = mtod(m, struct ip6_hdr *);
-		itos = oitos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
-		if (ip_ecn_egress((gifp->if_flags & IFF_LINK1) ?
-				  ECN_ALLOWED : ECN_NOCARE,
-				  &otos, &itos) == 0) {
-			m_freem(m);
-			return;
-		}
-		if (itos != oitos) {
-			ip6->ip6_flow &= ~htonl(0xff << 20);
-			ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
-		}
-		break;
-	    }
-#endif /* INET6 */
- 	case IPPROTO_ETHERIP:
- 		af = AF_LINK;
- 		break;	
-
-	default:
-		KMOD_IPSTAT_INC(ips_nogif);
-		m_freem(m);
-		return;
-	}
-	gif_input(m, af, gifp);
-	return;
+	return (IPPROTO_DONE);
 }
 
 /*
@@ -367,38 +179,30 @@
 static int
 gif_validate4(const struct ip *ip, struct gif_softc *sc, struct ifnet *ifp)
 {
-	struct sockaddr_in *src, *dst;
-	struct in_ifaddr *ia4;
+	int ret;
 
-	src = (struct sockaddr_in *)sc->gif_psrc;
-	dst = (struct sockaddr_in *)sc->gif_pdst;
+	GIF_RLOCK_ASSERT(sc);
 
 	/* check for address match */
-	if (src->sin_addr.s_addr != ip->ip_dst.s_addr ||
-	    dst->sin_addr.s_addr != ip->ip_src.s_addr)
-		return 0;
+	if (sc->gif_iphdr->ip_src.s_addr != ip->ip_dst.s_addr)
+		return (0);
+	ret = 32;
+	if (sc->gif_iphdr->ip_dst.s_addr != ip->ip_src.s_addr) {
+		if ((sc->gif_options & GIF_IGNORE_SOURCE) == 0)
+			return (0);
+	} else
+		ret += 32;
 
 	/* martian filters on outer source - NOT done in ip_input! */
 	if (IN_MULTICAST(ntohl(ip->ip_src.s_addr)))
-		return 0;
+		return (0);
 	switch ((ntohl(ip->ip_src.s_addr) & 0xff000000) >> 24) {
-	case 0: case 127: case 255:
-		return 0;
+	case 0:
+	case 127:
+	case 255:
+		return (0);
 	}
 
-	/* reject packets with broadcast on source */
-	/* XXXRW: should use hash lists? */
-	IN_IFADDR_RLOCK();
-	TAILQ_FOREACH(ia4, &V_in_ifaddrhead, ia_link) {
-		if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0)
-			continue;
-		if (ip->ip_src.s_addr == ia4->ia_broadaddr.sin_addr.s_addr) {
-			IN_IFADDR_RUNLOCK();
-			return 0;
-		}
-	}
-	IN_IFADDR_RUNLOCK();
-
 	/* ingress filters on outer source */
 	if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0 && ifp) {
 		struct sockaddr_in sin;
@@ -412,19 +216,13 @@
 		rt = in_rtalloc1((struct sockaddr *)&sin, 0,
 		    0UL, sc->gif_fibnum);
 		if (!rt || rt->rt_ifp != ifp) {
-#if 0
-			log(LOG_WARNING, "%s: packet from 0x%x dropped "
-			    "due to ingress filter\n", if_name(GIF2IFP(sc)),
-			    (u_int32_t)ntohl(sin.sin_addr.s_addr));
-#endif
 			if (rt)
 				RTFREE_LOCKED(rt);
-			return 0;
+			return (0);
 		}
 		RTFREE_LOCKED(rt);
 	}
-
-	return 32 * 2;
+	return (ret);
 }
 
 /*
@@ -432,7 +230,7 @@
  * matched the physical addr family.  see gif_encapcheck().
  */
 int
-gif_encapcheck4(const struct mbuf *m, int off, int proto, void *arg)
+in_gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
 {
 	struct ip ip;
 	struct gif_softc *sc;
@@ -440,31 +238,21 @@
 
 	/* sanity check done in caller */
 	sc = (struct gif_softc *)arg;
+	GIF_RLOCK_ASSERT(sc);
 
-	/* LINTED const cast */
 	m_copydata(m, 0, sizeof(ip), (caddr_t)&ip);
 	ifp = ((m->m_flags & M_PKTHDR) != 0) ? m->m_pkthdr.rcvif : NULL;
-
-	return gif_validate4(&ip, sc, ifp);
+	return (gif_validate4(&ip, sc, ifp));
 }
 
 int
 in_gif_attach(struct gif_softc *sc)
 {
-	sc->encap_cookie4 = encap_attach_func(AF_INET, -1, gif_encapcheck,
+
+	KASSERT(sc->gif_ecookie == NULL, ("gif_ecookie isn't NULL"));
+	sc->gif_ecookie = encap_attach_func(AF_INET, -1, gif_encapcheck,
 	    &in_gif_protosw, sc);
-	if (sc->encap_cookie4 == NULL)
-		return EEXIST;
-	return 0;
+	if (sc->gif_ecookie == NULL)
+		return (EEXIST);
+	return (0);
 }
-
-int
-in_gif_detach(struct gif_softc *sc)
-{
-	int error;
-
-	error = encap_detach(sc->encap_cookie4);
-	if (error == 0)
-		sc->encap_cookie4 = NULL;
-	return error;
-}

Added: trunk/sys/netinet/in_kdtrace.c
===================================================================
--- trunk/sys/netinet/in_kdtrace.c	                        (rev 0)
+++ trunk/sys/netinet/in_kdtrace.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -0,0 +1,136 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2013 Mark Johnston <markj at FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/netinet/in_kdtrace.c 318438 2017-05-18 03:32:01Z smh $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/netinet/in_kdtrace.c 318438 2017-05-18 03:32:01Z smh $");
+
+#include "opt_kdtrace.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sdt.h>
+
+SDT_PROVIDER_DEFINE(ip);
+SDT_PROVIDER_DEFINE(tcp);
+SDT_PROVIDER_DEFINE(udp);
+
+SDT_PROBE_DEFINE6_XLATE(ip, , , receive,
+    "void *", "pktinfo_t *",
+    "void *", "csinfo_t *",
+    "uint8_t *", "ipinfo_t *",
+    "struct ifnet *", "ifinfo_t *",
+    "struct ip *", "ipv4info_t *",
+    "struct ip6_hdr *", "ipv6info_t *");
+
+SDT_PROBE_DEFINE6_XLATE(ip, , , send,
+    "void *", "pktinfo_t *",
+    "void *", "csinfo_t *",
+    "uint8_t *", "ipinfo_t *",
+    "struct ifnet *", "ifinfo_t *",
+    "struct ip *", "ipv4info_t *",
+    "struct ip6_hdr *", "ipv6info_t *");
+
+SDT_PROBE_DEFINE5_XLATE(tcp, , , accept__established,
+    "void *", "pktinfo_t *",
+    "struct tcpcb *", "csinfo_t *",
+    "uint8_t *", "ipinfo_t *",
+    "struct tcpcb *", "tcpsinfo_t *" ,
+    "struct tcphdr *", "tcpinfoh_t *");
+
+SDT_PROBE_DEFINE5_XLATE(tcp, , , accept__refused,
+    "void *", "pktinfo_t *",
+    "struct tcpcb *", "csinfo_t *",
+    "uint8_t *", "ipinfo_t *",
+    "struct tcpcb *", "tcpsinfo_t *" ,
+    "struct tcphdr *", "tcpinfo_t *");
+
+SDT_PROBE_DEFINE5_XLATE(tcp, , , connect__established,
+    "void *", "pktinfo_t *",
+    "struct tcpcb *", "csinfo_t *",
+    "uint8_t *", "ipinfo_t *",
+    "struct tcpcb *", "tcpsinfo_t *" ,
+    "struct tcphdr *", "tcpinfoh_t *");
+
+SDT_PROBE_DEFINE5_XLATE(tcp, , , connect__refused,
+    "void *", "pktinfo_t *",
+    "struct tcpcb *", "csinfo_t *",
+    "uint8_t *", "ipinfo_t *",
+    "struct tcpcb *", "tcpsinfo_t *" ,
+    "struct tcphdr *", "tcpinfoh_t *");
+
+SDT_PROBE_DEFINE5_XLATE(tcp, , , connect__request,
+    "void *", "pktinfo_t *",
+    "struct tcpcb *", "csinfo_t *",
+    "uint8_t *", "ipinfo_t *",
+    "struct tcpcb *", "tcpsinfo_t *" ,
+    "struct tcphdr *", "tcpinfo_t *");
+
+SDT_PROBE_DEFINE5_XLATE(tcp, , , receive,
+    "void *", "pktinfo_t *",
+    "struct tcpcb *", "csinfo_t *",
+    "uint8_t *", "ipinfo_t *",
+    "struct tcpcb *", "tcpsinfo_t *" ,
+    "struct tcphdr *", "tcpinfoh_t *");
+
+SDT_PROBE_DEFINE5_XLATE(tcp, , , send,
+    "void *", "pktinfo_t *",
+    "struct tcpcb *", "csinfo_t *",
+    "uint8_t *", "ipinfo_t *",
+    "struct tcpcb *", "tcpsinfo_t *" ,
+    "struct tcphdr *", "tcpinfo_t *");
+
+SDT_PROBE_DEFINE6_XLATE(tcp, , , state__change,
+    "void *", "void *",
+    "struct tcpcb *", "csinfo_t *",
+    "void *", "void *",
+    "struct tcpcb *", "tcpsinfo_t *",
+    "void *", "void *",
+    "int", "tcplsinfo_t *");
+
+SDT_PROBE_DEFINE6_XLATE(tcp, , , receive__autoresize,
+    "void *", "void *",
+    "struct tcpcb *", "csinfo_t *",
+    "uint8_t *", "ipinfo_t *",
+    "struct tcpcb *", "tcpsinfo_t *" ,
+    "struct tcphdr *", "tcpinfoh_t *",
+    "int", "int");
+
+SDT_PROBE_DEFINE5_XLATE(udp, , , receive,
+    "void *", "pktinfo_t *",
+    "struct inpcb *", "csinfo_t *",
+    "uint8_t *", "ipinfo_t *",
+    "struct inpcb *", "udpsinfo_t *",
+    "struct udphdr *", "udpinfo_t *");
+
+SDT_PROBE_DEFINE5_XLATE(udp, , , send,
+    "void *", "pktinfo_t *",
+    "struct inpcb *", "csinfo_t *",
+    "uint8_t *", "ipinfo_t *",
+    "struct inpcb *", "udpsinfo_t *",
+    "struct udphdr *", "udpinfo_t *");


Property changes on: trunk/sys/netinet/in_kdtrace.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/netinet/in_kdtrace.h
===================================================================
--- trunk/sys/netinet/in_kdtrace.h	                        (rev 0)
+++ trunk/sys/netinet/in_kdtrace.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -0,0 +1,61 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2013 Mark Johnston <markj at FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/netinet/in_kdtrace.h 317375 2017-04-24 16:31:28Z smh $
+ */
+
+#ifndef _SYS_IN_KDTRACE_H_
+#define	_SYS_IN_KDTRACE_H_
+
+#define	IP_PROBE(probe, arg0, arg1, arg2, arg3, arg4, arg5)		\
+	SDT_PROBE6(ip, , , probe, arg0, arg1, arg2, arg3, arg4, arg5)
+#define	UDP_PROBE(probe, arg0, arg1, arg2, arg3, arg4)			\
+	SDT_PROBE5(udp, , , probe, arg0, arg1, arg2, arg3, arg4)
+#define	TCP_PROBE5(probe, arg0, arg1, arg2, arg3, arg4)			\
+	SDT_PROBE5(tcp, , , probe, arg0, arg1, arg2, arg3, arg4)
+#define	TCP_PROBE6(probe, arg0, arg1, arg2, arg3, arg4, arg5)		\
+	SDT_PROBE6(tcp, , , probe, arg0, arg1, arg2, arg3, arg4, arg5)
+
+SDT_PROVIDER_DECLARE(ip);
+SDT_PROVIDER_DECLARE(tcp);
+SDT_PROVIDER_DECLARE(udp);
+
+SDT_PROBE_DECLARE(ip, , , receive);
+SDT_PROBE_DECLARE(ip, , , send);
+
+SDT_PROBE_DECLARE(tcp, , , accept__established);
+SDT_PROBE_DECLARE(tcp, , , accept__refused);
+SDT_PROBE_DECLARE(tcp, , , connect__established);
+SDT_PROBE_DECLARE(tcp, , , connect__refused);
+SDT_PROBE_DECLARE(tcp, , , connect__request);
+SDT_PROBE_DECLARE(tcp, , , receive);
+SDT_PROBE_DECLARE(tcp, , , send);
+SDT_PROBE_DECLARE(tcp, , , state__change);
+SDT_PROBE_DECLARE(tcp, , , receive__autoresize);
+
+SDT_PROBE_DECLARE(udp, , , receive);
+SDT_PROBE_DECLARE(udp, , , send);
+
+#endif


Property changes on: trunk/sys/netinet/in_kdtrace.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/netinet/in_mcast.c
===================================================================
--- trunk/sys/netinet/in_mcast.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/in_mcast.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/in_mcast.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/in_mcast.c 321134 2017-07-18 16:58:52Z ngie $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -47,6 +47,7 @@
 #include <sys/protosw.h>
 #include <sys/sysctl.h>
 #include <sys/ktr.h>
+#include <sys/taskqueue.h>
 #include <sys/tree.h>
 
 #include <net/if.h>
@@ -139,12 +140,16 @@
 		    struct in_multi **);
 static int	inm_get_source(struct in_multi *inm, const in_addr_t haddr,
 		    const int noalloc, struct ip_msource **pims);
+#ifdef KTR
 static int	inm_is_ifp_detached(const struct in_multi *);
+#endif
 static int	inm_merge(struct in_multi *, /*const*/ struct in_mfilter *);
 static void	inm_purge(struct in_multi *);
 static void	inm_reap(struct in_multi *);
 static struct ip_moptions *
 		inp_findmoptions(struct inpcb *);
+static void	inp_freemoptions_internal(struct ip_moptions *);
+static void	inp_gcmoptions(void *, int);
 static int	inp_get_source_filters(struct inpcb *, struct sockopt *);
 static int	inp_join_group(struct inpcb *, struct sockopt *);
 static int	inp_leave_group(struct inpcb *, struct sockopt *);
@@ -180,6 +185,11 @@
     CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters,
     "Per-interface stack-wide source filters");
 
+static STAILQ_HEAD(, ip_moptions) imo_gc_list =
+    STAILQ_HEAD_INITIALIZER(imo_gc_list);
+static struct task imo_gc_task = TASK_INITIALIZER(0, inp_gcmoptions, NULL);
+
+#ifdef KTR
 /*
  * Inline function which wraps assertions for a valid ifp.
  * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp
@@ -202,6 +212,7 @@
 
 	return (ifp == NULL);
 }
+#endif
 
 /*
  * Initialize an in_mfilter structure to a known state at t0, t1
@@ -1004,9 +1015,10 @@
 	/* Decrement ASM listener count on transition out of ASM mode. */
 	if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) {
 		if ((imf->imf_st[1] != MCAST_EXCLUDE) ||
-		    (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0))
+		    (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) {
 			CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__);
 			--inm->inm_st[1].iss_asm;
+		}
 	}
 
 	/* Increment ASM listener count on transition to ASM mode. */
@@ -1172,11 +1184,8 @@
 int
 in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
 {
-	struct ifnet *ifp;
 	int error;
 
-	ifp = inm->inm_ifp;
-
 	IN_MULTI_LOCK();
 	error = in_leavegroup_locked(inm, imf);
 	IN_MULTI_UNLOCK();
@@ -1233,7 +1242,9 @@
 	KASSERT(error == 0, ("%s: failed to merge inm state", __func__));
 
 	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
+	CURVNET_SET(inm->inm_ifp->if_vnet);
 	error = igmp_change_state(inm);
+	CURVNET_RESTORE();
 	if (error)
 		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
 
@@ -1443,7 +1454,7 @@
 	error = inm_merge(inm, imf);
 	if (error) {
 		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
-		goto out_imf_rollback;
+		goto out_in_multi_locked;
 	}
 
 	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
@@ -1451,6 +1462,8 @@
 	if (error)
 		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
 
+out_in_multi_locked:
+
 	IN_MULTI_UNLOCK();
 
 out_imf_rollback:
@@ -1519,7 +1532,10 @@
 }
 
 /*
- * Discard the IP multicast options (and source filters).
+ * Discard the IP multicast options (and source filters).  To minimize
+ * the amount of work done while holding locks such as the INP's
+ * pcbinfo lock (which is used in the receive path), the free
+ * operation is performed asynchronously in a separate task.
  *
  * SMPng: NOTE: assumes INP write lock is held.
  */
@@ -1526,11 +1542,20 @@
 void
 inp_freemoptions(struct ip_moptions *imo)
 {
+
+	KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
+	IN_MULTI_LOCK();
+	STAILQ_INSERT_TAIL(&imo_gc_list, imo, imo_link);
+	IN_MULTI_UNLOCK();
+	taskqueue_enqueue(taskqueue_thread, &imo_gc_task);
+}
+
+static void
+inp_freemoptions_internal(struct ip_moptions *imo)
+{
 	struct in_mfilter	*imf;
 	size_t			 idx, nmships;
 
-	KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
-
 	nmships = imo->imo_num_memberships;
 	for (idx = 0; idx < nmships; ++idx) {
 		imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL;
@@ -1547,6 +1572,22 @@
 	free(imo, M_IPMOPTS);
 }
 
+static void
+inp_gcmoptions(void *context, int pending)
+{
+	struct ip_moptions *imo;
+
+	IN_MULTI_LOCK();
+	while (!STAILQ_EMPTY(&imo_gc_list)) {
+		imo = STAILQ_FIRST(&imo_gc_list);
+		STAILQ_REMOVE_HEAD(&imo_gc_list, imo_link);
+		IN_MULTI_UNLOCK();
+		inp_freemoptions_internal(imo);
+		IN_MULTI_LOCK();
+	}
+	IN_MULTI_UNLOCK();
+}
+
 /*
  * Atomically get source filters on a socket for an IPv4 multicast group.
  * Called with INP lock held; returns with lock released.
@@ -1615,6 +1656,8 @@
 	 * has asked for, but we always tell userland how big the
 	 * buffer really needs to be.
 	 */
+	if (msfr.msfr_nsrcs > in_mcast_maxsocksrc)
+		msfr.msfr_nsrcs = in_mcast_maxsocksrc;
 	tss = NULL;
 	if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
 		tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
@@ -2089,8 +2132,12 @@
 	if (is_new) {
 		error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf,
 		    &inm);
-		if (error)
+		if (error) {
+                        CTR1(KTR_IGMPV3, "%s: in_joingroup_locked failed", 
+                            __func__);
+                        IN_MULTI_UNLOCK();
 			goto out_imo_free;
+                }
 		imo->imo_membership[idx] = inm;
 	} else {
 		CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
@@ -2098,7 +2145,7 @@
 		if (error) {
 			CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
 			    __func__);
-			goto out_imf_rollback;
+			goto out_in_multi_locked;
 		}
 		CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
 		error = igmp_change_state(inm);
@@ -2105,13 +2152,14 @@
 		if (error) {
 			CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
 			    __func__);
-			goto out_imf_rollback;
+			goto out_in_multi_locked;
 		}
 	}
 
+out_in_multi_locked:
+
 	IN_MULTI_UNLOCK();
 
-out_imf_rollback:
 	INP_WLOCK_ASSERT(inp);
 	if (error) {
 		imf_rollback(imf);
@@ -2315,7 +2363,7 @@
 		if (error) {
 			CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
 			    __func__);
-			goto out_imf_rollback;
+			goto out_in_multi_locked;
 		}
 
 		CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
@@ -2326,9 +2374,10 @@
 		}
 	}
 
+out_in_multi_locked:
+
 	IN_MULTI_UNLOCK();
 
-out_imf_rollback:
 	if (error)
 		imf_rollback(imf);
 	else
@@ -2562,7 +2611,7 @@
 	error = inm_merge(inm, imf);
 	if (error) {
 		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
-		goto out_imf_rollback;
+		goto out_in_multi_locked;
 	}
 
 	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
@@ -2570,6 +2619,8 @@
 	if (error)
 		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
 
+out_in_multi_locked:
+
 	IN_MULTI_UNLOCK();
 
 out_imf_rollback:

Modified: trunk/sys/netinet/in_pcb.c
===================================================================
--- trunk/sys/netinet/in_pcb.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/in_pcb.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/in_pcb.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/in_pcb.c 314667 2017-03-04 13:03:31Z avg $");
 
 #include "opt_ddb.h"
 #include "opt_ipsec.h"
@@ -145,11 +145,7 @@
 {
 	int error;
 
-#ifdef VIMAGE
-	error = vnet_sysctl_handle_int(oidp, arg1, arg2, req);
-#else
 	error = sysctl_handle_int(oidp, arg1, arg2, req);
-#endif
 	if (error == 0) {
 		RANGECHK(V_ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
 		RANGECHK(V_ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
@@ -197,7 +193,7 @@
 	&VNET_NAME(ipport_randomtime), 0,
 	"Minimum time to keep sequental port "
 	"allocation before switching to a random one");
-#endif
+#endif /* INET */
 
 /*
  * in_pcb.c: manage the Protocol Control Blocks.
@@ -220,6 +216,7 @@
 
 	INP_INFO_LOCK_INIT(pcbinfo, name);
 	INP_HASH_LOCK_INIT(pcbinfo, "pcbinfohash");	/* XXXRW: argument? */
+	INP_LIST_LOCK_INIT(pcbinfo, "pcbinfolist");
 #ifdef VIMAGE
 	pcbinfo->ipi_vnet = curvnet;
 #endif
@@ -237,6 +234,8 @@
 	    NULL, NULL, inpcbzone_init, inpcbzone_fini, UMA_ALIGN_PTR,
 	    inpcbzone_flags);
 	uma_zone_set_max(pcbinfo->ipi_zone, maxsockets);
+	uma_zone_set_warning(pcbinfo->ipi_zone,
+	    "kern.ipc.maxsockets limit reached");
 }
 
 /*
@@ -256,6 +255,7 @@
 	in_pcbgroup_destroy(pcbinfo);
 #endif
 	uma_zdestroy(pcbinfo->ipi_zone);
+	INP_LIST_LOCK_DESTROY(pcbinfo);
 	INP_HASH_LOCK_DESTROY(pcbinfo);
 	INP_INFO_LOCK_DESTROY(pcbinfo);
 }
@@ -270,7 +270,14 @@
 	struct inpcb *inp;
 	int error;
 
-	INP_INFO_WLOCK_ASSERT(pcbinfo);
+#ifdef INVARIANTS
+	if (pcbinfo == &V_tcbinfo) {
+		INP_INFO_RLOCK_ASSERT(pcbinfo);
+	} else {
+		INP_INFO_WLOCK_ASSERT(pcbinfo);
+	}
+#endif
+
 	error = 0;
 	inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT);
 	if (inp == NULL)
@@ -302,6 +309,8 @@
 			inp->inp_flags |= IN6P_IPV6_V6ONLY;
 	}
 #endif
+	INP_WLOCK(inp);
+	INP_LIST_WLOCK(pcbinfo);
 	LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
 	pcbinfo->ipi_count++;
 	so->so_pcb = (caddr_t)inp;
@@ -309,9 +318,9 @@
 	if (V_ip6_auto_flowlabel)
 		inp->inp_flags |= IN6P_AUTOFLOWLABEL;
 #endif
-	INP_WLOCK(inp);
 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
 	refcount_init(&inp->inp_refcount, 1);	/* Reference from inpcbinfo */
+	INP_LIST_WUNLOCK(pcbinfo);
 #if defined(IPSEC) || defined(MAC)
 out:
 	if (error != 0) {
@@ -333,8 +342,7 @@
 
 	if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
 		return (EINVAL);
-	anonport = inp->inp_lport == 0 && (nam == NULL ||
-	    ((struct sockaddr_in *)nam)->sin_port == 0);
+	anonport = nam == NULL || ((struct sockaddr_in *)nam)->sin_port == 0;
 	error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr,
 	    &inp->inp_lport, cred);
 	if (error)
@@ -390,13 +398,14 @@
 		lastport = &pcbinfo->ipi_lastport;
 	}
 	/*
-	 * For UDP, use random port allocation as long as the user
+	 * For UDP(-Lite), use random port allocation as long as the user
 	 * allows it.  For TCP (and as of yet unknown) connections,
 	 * use random port allocation only if the user allows it AND
 	 * ipport_tick() allows it.
 	 */
 	if (V_ipport_randomized &&
-		(!V_ipport_stoprandom || pcbinfo == &V_udbinfo))
+		(!V_ipport_stoprandom || pcbinfo == &V_udbinfo ||
+		pcbinfo == &V_ulitecbinfo))
 		dorandom = 1;
 	else
 		dorandom = 0;
@@ -406,8 +415,8 @@
 	 */
 	if (first == last)
 		dorandom = 0;
-	/* Make sure to not include UDP packets in the count. */
-	if (pcbinfo != &V_udbinfo)
+	/* Make sure to not include UDP(-Lite) packets in the count. */
+	if (pcbinfo != &V_udbinfo || pcbinfo != &V_ulitecbinfo)
 		V_ipport_tcpallocs++;
 	/*
 	 * Instead of having two loops further down counting up or down
@@ -467,6 +476,23 @@
 
 	return (0);
 }
+
+/*
+ * Return cached socket options.
+ */
+short
+inp_so_options(const struct inpcb *inp)
+{
+   short so_options;
+
+   so_options = 0;
+
+   if ((inp->inp_flags2 & INP_REUSEPORT) != 0)
+	   so_options |= SO_REUSEPORT;
+   if ((inp->inp_flags2 & INP_REUSEADDR) != 0)
+	   so_options |= SO_REUSEADDR;
+   return (so_options);
+}
 #endif /* INET || INET6 */
 
 #ifdef INET
@@ -537,7 +563,7 @@
 			 * and a multicast address is bound on both
 			 * new and duplicated sockets.
 			 */
-			if (so->so_options & SO_REUSEADDR)
+			if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0)
 				reuseport = SO_REUSEADDR|SO_REUSEPORT;
 		} else if (sin->sin_addr.s_addr != INADDR_ANY) {
 			sin->sin_port = 0;		/* yech... */
@@ -595,8 +621,7 @@
 				if (tw == NULL ||
 				    (reuseport & tw->tw_so_options) == 0)
 					return (EADDRINUSE);
-			} else if (t && (reuseport == 0 ||
-			    (t->inp_flags2 & INP_REUSEPORT) == 0)) {
+			} else if (t && (reuseport & inp_so_options(t)) == 0) {
 #ifdef INET6
 				if (ntohl(sin->sin_addr.s_addr) !=
 				    INADDR_ANY ||
@@ -681,7 +706,7 @@
  * Do proper source address selection on an unbound socket in case
  * of connect. Take jails into account as well.
  */
-static int
+int
 in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
     struct ucred *cred)
 {
@@ -1039,7 +1064,7 @@
 	inp->inp_fport = 0;
 	in_pcbrehash(inp);
 }
-#endif
+#endif /* INET */
 
 /*
  * in_pcbdetach() is responsibe for disassociating a socket from an inpcb.
@@ -1135,8 +1160,17 @@
 
 	INP_WLOCK_ASSERT(inp);
 
-	if (refcount_release(&inp->inp_refcount) == 0)
+	if (refcount_release(&inp->inp_refcount) == 0) {
+		/*
+		 * If the inpcb has been freed, let the caller know, even if
+		 * this isn't the last reference.
+		 */
+		if (inp->inp_flags2 & INP_FREED) {
+			INP_WUNLOCK(inp);
+			return (1);
+		}
 		return (0);
+	}
 
 	KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
 
@@ -1172,7 +1206,13 @@
 
 	KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
 
-	INP_INFO_WLOCK_ASSERT(pcbinfo);
+#ifdef INVARIANTS
+	if (pcbinfo == &V_tcbinfo) {
+		INP_INFO_LOCK_ASSERT(pcbinfo);
+	} else {
+		INP_INFO_WLOCK_ASSERT(pcbinfo);
+	}
+#endif
 	INP_WLOCK_ASSERT(inp);
 
 	/* XXXRW: Do as much as possible here. */
@@ -1179,9 +1219,11 @@
 #ifdef IPSEC
 	if (inp->inp_sp != NULL)
 		ipsec_delete_pcbpolicy(inp);
-#endif /* IPSEC */
+#endif
+	INP_LIST_WLOCK(pcbinfo);
 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
 	in_pcbremlists(inp);
+	INP_LIST_WUNLOCK(pcbinfo);
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6PROTO) {
 		ip6_freepcbopts(inp->in6p_outputopts);
@@ -1338,7 +1380,7 @@
 	struct ip_moptions *imo;
 	int i, gap;
 
-	INP_INFO_RLOCK(pcbinfo);
+	INP_INFO_WLOCK(pcbinfo);
 	LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
 		INP_WLOCK(inp);
 		imo = inp->inp_moptions;
@@ -1368,7 +1410,7 @@
 		}
 		INP_WUNLOCK(inp);
 	}
-	INP_INFO_RUNLOCK(pcbinfo);
+	INP_INFO_WUNLOCK(pcbinfo);
 }
 
 /*
@@ -1597,7 +1639,7 @@
 				if (inp->inp_vflag & INP_IPV6PROTO)
 					local_wild_mapped = inp;
 				else
-#endif /* INET6 */
+#endif
 					if (injail)
 						jail_wild = inp;
 					else
@@ -1612,7 +1654,7 @@
 #ifdef INET6
 		if (inp == NULL)
 			inp = local_wild_mapped;
-#endif /* defined(INET6) */
+#endif
 		if (inp != NULL)
 			goto found;
 	} /* if (lookupflags & INPLOOKUP_WILDCARD) */
@@ -1742,7 +1784,7 @@
 				if (inp->inp_vflag & INP_IPV6PROTO)
 					local_wild_mapped = inp;
 				else
-#endif /* INET6 */
+#endif
 					if (injail)
 						jail_wild = inp;
 					else
@@ -1758,7 +1800,7 @@
 #ifdef INET6
 		if (local_wild_mapped != NULL)
 			return (local_wild_mapped);
-#endif /* defined(INET6) */
+#endif
 	} /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */
 
 	return (NULL);
@@ -1882,7 +1924,7 @@
 	if (inp->inp_vflag & INP_IPV6)
 		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
 	else
-#endif /* INET6 */
+#endif
 	hashkey_faddr = inp->inp_faddr.s_addr;
 
 	pcbhash = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
@@ -1969,7 +2011,7 @@
 	if (inp->inp_vflag & INP_IPV6)
 		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
 	else
-#endif /* INET6 */
+#endif
 	hashkey_faddr = inp->inp_faddr.s_addr;
 
 	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
@@ -2001,8 +2043,16 @@
 {
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 
-	INP_INFO_WLOCK_ASSERT(pcbinfo);
+#ifdef INVARIANTS
+	if (pcbinfo == &V_tcbinfo) {
+		INP_INFO_RLOCK_ASSERT(pcbinfo);
+	} else {
+		INP_INFO_WLOCK_ASSERT(pcbinfo);
+	}
+#endif
+
 	INP_WLOCK_ASSERT(inp);
+	INP_LIST_WLOCK_ASSERT(pcbinfo);
 
 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
 	if (inp->inp_flags & INP_INHASHLIST) {
@@ -2090,7 +2140,7 @@
 {
 
 	/* Start ipport_tick. */
-	callout_init(&ipport_tick_callout, CALLOUT_MPSAFE);
+	callout_init(&ipport_tick_callout, 1);
 	callout_reset(&ipport_tick_callout, 1, ipport_tick, NULL);
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL,
 		SHUTDOWN_PRI_DEFAULT);
@@ -2147,13 +2197,13 @@
 {
 	struct inpcb *inp;
 
-	INP_INFO_RLOCK(&V_tcbinfo);
+	INP_INFO_WLOCK(&V_tcbinfo);
 	LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) {
 		INP_WLOCK(inp);
 		func(inp, arg);
 		INP_WUNLOCK(inp);
 	}
-	INP_INFO_RUNLOCK(&V_tcbinfo);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
 }
 
 struct socket *
@@ -2237,14 +2287,13 @@
 		/* IPv6. */
 		ip6_sprintf(laddr_str, &inc->inc6_laddr);
 		ip6_sprintf(faddr_str, &inc->inc6_faddr);
-	} else {
+	} else
 #endif
+	{
 		/* IPv4. */
 		inet_ntoa_r(inc->inc_laddr, laddr_str);
 		inet_ntoa_r(inc->inc_faddr, faddr_str);
-#ifdef INET6
 	}
-#endif
 	db_print_indent(indent);
 	db_printf("inc_laddr %s   inc_lport %u\n", laddr_str,
 	    ntohs(inc->inc_lport));
@@ -2461,4 +2510,4 @@
 
 	db_print_inpcb(inp, "inpcb", 0);
 }
-#endif
+#endif /* DDB */

Modified: trunk/sys/netinet/in_pcb.h
===================================================================
--- trunk/sys/netinet/in_pcb.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/in_pcb.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -33,7 +33,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)in_pcb.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet/in_pcb.h 241435 2012-10-11 01:32:51Z glebius $
+ * $FreeBSD: stable/10/sys/netinet/in_pcb.h 309108 2016-11-24 14:48:46Z jch $
  */
 
 #ifndef _NETINET_IN_PCB_H_
@@ -129,23 +129,35 @@
 struct	icmp6_filter;
 
 /*-
- * struct inpcb captures the network layer state for TCP, UDP, and raw IPv4
- * and IPv6 sockets.  In the case of TCP, further per-connection state is
+ * struct inpcb captures the network layer state for TCP, UDP, and raw IPv4 and
+ * IPv6 sockets.  In the case of TCP and UDP, further per-connection state is
  * hung off of inp_ppcb most of the time.  Almost all fields of struct inpcb
  * are static after creation or protected by a per-inpcb rwlock, inp_lock.  A
- * few fields also require the global pcbinfo lock for the inpcb to be held,
- * when modified, such as the global connection lists and hashes, as well as
- * binding information (which affects which hash a connection is on).  This
- * model means that connections can be looked up without holding the
- * per-connection lock, which is important for performance when attempting to
- * find the connection for a packet given its IP and port tuple.  Writing to
- * these fields that write locks be held on both the inpcb and global locks.
+ * few fields are protected by multiple locks as indicated in the locking notes
+ * below.  For these fields, all of the listed locks must be write-locked for
+ * any modifications.  However, these fields can be safely read while any one of
+ * the listed locks are read-locked.  This model can permit greater concurrency
+ * for read operations.  For example, connections can be looked up while only
+ * holding a read lock on the global pcblist lock.  This is important for
+ * performance when attempting to find the connection for a packet given its IP
+ * and port tuple.
  *
+ * One noteworthy exception is that the global pcbinfo lock follows a different
+ * set of rules in relation to the inp_list field.  Rather than being
+ * write-locked for modifications and read-locked for list iterations, it must
+ * be read-locked during modifications and write-locked during list iterations.
+ * This ensures that the relatively rare global list iterations safely walk a
+ * stable snapshot of connections while allowing more common list modifications
+ * to safely grab the pcblist lock just while adding or removing a connection
+ * from the global list.
+ *
  * Key:
  * (c) - Constant after initialization
  * (g) - Protected by the pcbgroup lock
  * (i) - Protected by the inpcb lock
  * (p) - Protected by the pcbinfo lock for the inpcb
+ * (l) - Protected by the pcblist lock for the inpcb
+ * (h) - Protected by the pcbhash lock for the inpcb
  * (s) - Protected by another subsystem's locks
  * (x) - Undefined locking
  *
@@ -160,15 +172,21 @@
  * socket has been freed), or there may be close(2)-related races.
  *
  * The inp_vflag field is overloaded, and would otherwise ideally be (c).
+ *
+ * TODO:  Currently only the TCP stack is leveraging the global pcbinfo lock
+ * read-lock usage during modification, this model can be applied to other
+ * protocols (especially SCTP).
  */
 struct inpcb {
-	LIST_ENTRY(inpcb) inp_hash;	/* (i/p) hash list */
+	LIST_ENTRY(inpcb) inp_hash;	/* (h/i) hash list */
 	LIST_ENTRY(inpcb) inp_pcbgrouphash;	/* (g/i) hash list */
-	LIST_ENTRY(inpcb) inp_list;	/* (i/p) list for all PCBs for proto */
+	LIST_ENTRY(inpcb) inp_list;	/* (p/l) list for all PCBs for proto */
+	                                /* (p[w]) for list iteration */
+	                                /* (p[r]/l) for addition/removal */
 	void	*inp_ppcb;		/* (i) pointer to per-protocol pcb */
 	struct	inpcbinfo *inp_pcbinfo;	/* (c) PCB list info */
 	struct	inpcbgroup *inp_pcbgroup; /* (g/i) PCB group list */
-	LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/p) group wildcard entry */
+	LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/h) group wildcard entry */
 	struct	socket *inp_socket;	/* (i) back pointer to socket */
 	struct	ucred	*inp_cred;	/* (c) cache of socket cred */
 	u_int32_t inp_flow;		/* (i) IPv6 flow information */
@@ -181,11 +199,12 @@
 	uint32_t inp_flowid;		/* (x) flow id / queue id */
 	u_int	inp_refcount;		/* (i) refcount */
 	void	*inp_pspare[5];		/* (x) route caching / general use */
-	u_int	inp_ispare[6];		/* (x) route caching / user cookie /
+	uint32_t inp_flowtype;		/* (x) M_HASHTYPE value */
+	u_int	inp_ispare[5];		/* (x) route caching / user cookie /
 					 *     general use */
 
 	/* Local and foreign ports, local and foreign addr. */
-	struct	in_conninfo inp_inc;	/* (i/p) list for PCB's local port */
+	struct	in_conninfo inp_inc;	/* (i) list for PCB's local port */
 
 	/* MAC and IPSEC policy information. */
 	struct	label *inp_label;	/* (i) MAC label */
@@ -210,8 +229,8 @@
 		int	inp6_cksum;
 		short	inp6_hops;
 	} inp_depend6;
-	LIST_ENTRY(inpcb) inp_portlist;	/* (i/p) */
-	struct	inpcbport *inp_phd;	/* (i/p) head of this list */
+	LIST_ENTRY(inpcb) inp_portlist;	/* (i/h) */
+	struct	inpcbport *inp_phd;	/* (i/h) head of this list */
 #define inp_zero_size offsetof(struct inpcb, inp_gencnt)
 	inp_gen_t	inp_gencnt;	/* (c) generation count */
 	struct llentry	*inp_lle;	/* cached L2 information */
@@ -275,16 +294,25 @@
  * Global data structure for each high-level protocol (UDP, TCP, ...) in both
  * IPv4 and IPv6.  Holds inpcb lists and information for managing them.
  *
- * Each pcbinfo is protected by two locks: ipi_lock and ipi_hash_lock,
- * the former covering mutable global fields (such as the global pcb list),
- * and the latter covering the hashed lookup tables.  The lock order is:
+ * Each pcbinfo is protected by three locks: ipi_lock, ipi_hash_lock and
+ * ipi_list_lock:
+ *  - ipi_lock covering the global pcb list stability during loop iteration,
+ *  - ipi_hash_lock covering the hashed lookup tables,
+ *  - ipi_list_lock covering mutable global fields (such as the global
+ *    pcb list)
  *
- *    ipi_lock (before) inpcb locks (before) {ipi_hash_lock, pcbgroup locks}
+ * The lock order is:
  *
+ *    ipi_lock (before)
+ *        inpcb locks (before)
+ *            ipi_list locks (before)
+ *                {ipi_hash_lock, pcbgroup locks}
+ *
  * Locking key:
  *
  * (c) Constant or nearly constant after initialisation
  * (g) Locked by ipi_lock
+ * (l) Locked by ipi_list_lock
  * (h) Read using either ipi_hash_lock or inpcb lock; write requires both
  * (p) Protected by one or more pcbgroup locks
  * (x) Synchronisation properties poorly defined
@@ -291,7 +319,7 @@
  */
 struct inpcbinfo {
 	/*
-	 * Global lock protecting global inpcb list, inpcb count, etc.
+	 * Global lock protecting full inpcb list traversal
 	 */
 	struct rwlock		 ipi_lock;
 
@@ -298,14 +326,14 @@
 	/*
 	 * Global list of inpcbs on the protocol.
 	 */
-	struct inpcbhead	*ipi_listhead;		/* (g) */
-	u_int			 ipi_count;		/* (g) */
+	struct inpcbhead	*ipi_listhead;		/* (g/l) */
+	u_int			 ipi_count;		/* (l) */
 
 	/*
 	 * Generation count -- incremented each time a connection is allocated
 	 * or freed.
 	 */
-	u_quad_t		 ipi_gencnt;		/* (g) */
+	u_quad_t		 ipi_gencnt;		/* (l) */
 
 	/*
 	 * Fields associated with port lookup and allocation.
@@ -363,6 +391,11 @@
 	 * general use 2
 	 */
 	void 			*ipi_pspare[2];
+
+	/*
+	 * Global lock protecting global inpcb list, inpcb count, etc.
+	 */
+	struct rwlock		 ipi_list_lock;
 };
 
 #ifdef _KERNEL
@@ -443,6 +476,7 @@
 	inp_inpcbtotcpcb(struct inpcb *inp);
 void 	inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
 		uint32_t *faddr, uint16_t *fp);
+short	inp_so_options(const struct inpcb *inp);
 
 #endif /* _KERNEL */
 
@@ -454,6 +488,7 @@
 #define INP_INFO_TRY_RLOCK(ipi)	rw_try_rlock(&(ipi)->ipi_lock)
 #define INP_INFO_TRY_WLOCK(ipi)	rw_try_wlock(&(ipi)->ipi_lock)
 #define INP_INFO_TRY_UPGRADE(ipi)	rw_try_upgrade(&(ipi)->ipi_lock)
+#define INP_INFO_WLOCKED(ipi)	rw_wowned(&(ipi)->ipi_lock)
 #define INP_INFO_RUNLOCK(ipi)	rw_runlock(&(ipi)->ipi_lock)
 #define INP_INFO_WUNLOCK(ipi)	rw_wunlock(&(ipi)->ipi_lock)
 #define	INP_INFO_LOCK_ASSERT(ipi)	rw_assert(&(ipi)->ipi_lock, RA_LOCKED)
@@ -461,6 +496,25 @@
 #define INP_INFO_WLOCK_ASSERT(ipi)	rw_assert(&(ipi)->ipi_lock, RA_WLOCKED)
 #define INP_INFO_UNLOCK_ASSERT(ipi)	rw_assert(&(ipi)->ipi_lock, RA_UNLOCKED)
 
+#define INP_LIST_LOCK_INIT(ipi, d) \
+        rw_init_flags(&(ipi)->ipi_list_lock, (d), 0)
+#define INP_LIST_LOCK_DESTROY(ipi)  rw_destroy(&(ipi)->ipi_list_lock)
+#define INP_LIST_RLOCK(ipi)     rw_rlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_WLOCK(ipi)     rw_wlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_TRY_RLOCK(ipi) rw_try_rlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_TRY_WLOCK(ipi) rw_try_wlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_TRY_UPGRADE(ipi)       rw_try_upgrade(&(ipi)->ipi_list_lock)
+#define INP_LIST_RUNLOCK(ipi)   rw_runlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_WUNLOCK(ipi)   rw_wunlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_LOCK_ASSERT(ipi) \
+	rw_assert(&(ipi)->ipi_list_lock, RA_LOCKED)
+#define INP_LIST_RLOCK_ASSERT(ipi) \
+	rw_assert(&(ipi)->ipi_list_lock, RA_RLOCKED)
+#define INP_LIST_WLOCK_ASSERT(ipi) \
+	rw_assert(&(ipi)->ipi_list_lock, RA_WLOCKED)
+#define INP_LIST_UNLOCK_ASSERT(ipi) \
+	rw_assert(&(ipi)->ipi_list_lock, RA_UNLOCKED)
+
 #define	INP_HASH_LOCK_INIT(ipi, d) \
 	rw_init_flags(&(ipi)->ipi_hash_lock, (d), 0)
 #define	INP_HASH_LOCK_DESTROY(ipi)	rw_destroy(&(ipi)->ipi_hash_lock)
@@ -524,8 +578,8 @@
 #define	INP_ONESBCAST		0x02000000 /* send all-ones broadcast */
 #define	INP_DROPPED		0x04000000 /* protocol drop flag */
 #define	INP_SOCKREF		0x08000000 /* strong socket reference */
-#define	INP_SW_FLOWID           0x10000000 /* software generated flow id */
-#define	INP_HW_FLOWID           0x20000000 /* hardware generated flow id */
+#define	INP_RESERVED_0          0x10000000 /* reserved field */
+#define	INP_RESERVED_1          0x20000000 /* reserved field */
 #define	IN6P_RFC2292		0x40000000 /* used RFC2292 API on the socket */
 #define	IN6P_MTU		0x80000000 /* receive path MTU */
 
@@ -544,6 +598,7 @@
 #define	INP_PCBGROUPWILD	0x00000004 /* in pcbgroup wildcard list */
 #define	INP_REUSEPORT		0x00000008 /* SO_REUSEPORT option is set */
 #define	INP_FREED		0x00000010 /* inp itself is not valid */
+#define	INP_REUSEADDR		0x00000020 /* SO_REUSEADDR option is set */
 
 /*
  * Flags passed to in_pcblookup*() functions.
@@ -635,6 +690,8 @@
 void	in_pcbfree(struct inpcb *);
 int	in_pcbinshash(struct inpcb *);
 int	in_pcbinshash_nopcbgroup(struct inpcb *);
+int	in_pcbladdr(struct inpcb *, struct in_addr *, struct in_addr *,
+	    struct ucred *);
 struct inpcb *
 	in_pcblookup_local(struct inpcbinfo *,
 	    struct in_addr, u_short, int, struct ucred *);

Modified: trunk/sys/netinet/in_pcbgroup.c
===================================================================
--- trunk/sys/netinet/in_pcbgroup.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/in_pcbgroup.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -30,7 +30,7 @@
 
 #include <sys/cdefs.h>
 
-__FBSDID("$FreeBSD: stable/9/sys/netinet/in_pcbgroup.c 222748 2011-06-06 12:55:02Z rwatson $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/in_pcbgroup.c 222748 2011-06-06 12:55:02Z rwatson $");
 
 #include "opt_inet6.h"
 

Modified: trunk/sys/netinet/in_proto.c
===================================================================
--- trunk/sys/netinet/in_proto.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/in_proto.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/in_proto.c 230868 2012-02-01 15:57:49Z glebius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/in_proto.c 265946 2014-05-13 06:05:53Z kevlo $");
 
 #include "opt_ipx.h"
 #include "opt_mrouting.h"
@@ -185,6 +185,20 @@
 },
 #endif /* SCTP */
 {
+	.pr_type =		SOCK_DGRAM,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		IPPROTO_UDPLITE,
+	.pr_flags =		PR_ATOMIC|PR_ADDR,
+	.pr_input =		udp_input,
+	.pr_ctlinput =		udplite_ctlinput,
+	.pr_ctloutput =		udp_ctloutput,
+	.pr_init =		udplite_init,
+#ifdef VIMAGE
+	.pr_destroy =		udplite_destroy,
+#endif
+	.pr_usrreqs =		&udp_usrreqs
+},
+{
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_RAW,

Modified: trunk/sys/netinet/in_rmx.c
===================================================================
--- trunk/sys/netinet/in_rmx.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/in_rmx.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -29,7 +29,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/in_rmx.c 242646 2012-11-06 01:18:53Z melifaro $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/in_rmx.c 295389 2016-02-08 00:07:01Z bz $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -94,8 +94,8 @@
 	if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 		rt->rt_flags |= RTF_MULTICAST;
 
-	if (!rt->rt_rmx.rmx_mtu && rt->rt_ifp)
-		rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
+	if (rt->rt_mtu == 0 && rt->rt_ifp != NULL)
+		rt->rt_mtu = rt->rt_ifp->if_mtu;
 
 	return (rn_addroute(v_arg, n_arg, head, treenodes));
 }
@@ -115,7 +115,7 @@
 		RT_LOCK(rt);
 		if (rt->rt_flags & RTPRF_OURS) {
 			rt->rt_flags &= ~RTPRF_OURS;
-			rt->rt_rmx.rmx_expire = 0;
+			rt->rt_expire = 0;
 		}
 		RT_UNLOCK(rt);
 	}
@@ -168,7 +168,7 @@
 	 */
 	if (V_rtq_reallyold != 0) {
 		rt->rt_flags |= RTPRF_OURS;
-		rt->rt_rmx.rmx_expire = time_uptime + V_rtq_reallyold;
+		rt->rt_expire = time_uptime + V_rtq_reallyold;
 	} else {
 		rtexpunge(rt);
 	}
@@ -200,7 +200,7 @@
 	if (rt->rt_flags & RTPRF_OURS) {
 		ap->found++;
 
-		if (ap->draining || rt->rt_rmx.rmx_expire <= time_uptime) {
+		if (ap->draining || rt->rt_expire <= time_uptime) {
 			if (rt->rt_refcnt > 0)
 				panic("rtqkill route really not free");
 
@@ -216,13 +216,9 @@
 			}
 		} else {
 			if (ap->updating &&
-			    (rt->rt_rmx.rmx_expire - time_uptime >
-			     V_rtq_reallyold)) {
-				rt->rt_rmx.rmx_expire =
-				    time_uptime + V_rtq_reallyold;
-			}
-			ap->nextstop = lmin(ap->nextstop,
-					    rt->rt_rmx.rmx_expire);
+			    (rt->rt_expire - time_uptime > V_rtq_reallyold))
+				rt->rt_expire = time_uptime + V_rtq_reallyold;
+			ap->nextstop = lmin(ap->nextstop, rt->rt_expire);
 		}
 	}
 
@@ -378,7 +374,7 @@
 {
 
 	callout_drain(&V_rtq_timer);
-	return (1);
+	return (rn_detachhead(head));
 }
 #endif
 

Modified: trunk/sys/netinet/in_systm.h
===================================================================
--- trunk/sys/netinet/in_systm.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/in_systm.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)in_systm.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet/in_systm.h 188578 2009-02-13 15:14:43Z luigi $
+ * $FreeBSD: stable/10/sys/netinet/in_systm.h 188578 2009-02-13 15:14:43Z luigi $
  */
 
 #ifndef _NETINET_IN_SYSTM_H_

Modified: trunk/sys/netinet/in_var.h
===================================================================
--- trunk/sys/netinet/in_var.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/in_var.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,12 +28,13 @@
  * SUCH DAMAGE.
  *
  *	@(#)in_var.h	8.2 (Berkeley) 1/9/95
- * $FreeBSD: stable/9/sys/netinet/in_var.h 242640 2012-11-06 00:49:52Z melifaro $
+ * $FreeBSD: stable/10/sys/netinet/in_var.h 309340 2016-11-30 22:20:23Z vangyzen $
  */
 
 #ifndef _NETINET_IN_VAR_H_
 #define _NETINET_IN_VAR_H_
 
+#include <sys/callout.h>
 #include <sys/queue.h>
 #include <sys/fnv_hash.h>
 #include <sys/tree.h>
@@ -70,6 +71,8 @@
 	struct	sockaddr_in ia_dstaddr; /* reserve space for broadcast addr */
 #define	ia_broadaddr	ia_dstaddr
 	struct	sockaddr_in ia_sockmask; /* reserve space for general netmask */
+	struct	callout ia_garp_timer;	/* timer for retransmitting GARPs */
+	int	ia_garp_count;		/* count of retransmitted GARPs */
 };
 
 struct	in_aliasreq {
@@ -78,6 +81,7 @@
 	struct	sockaddr_in ifra_broadaddr;
 #define ifra_dstaddr ifra_broadaddr
 	struct	sockaddr_in ifra_mask;
+	int	ifra_vhid;
 };
 /*
  * Given a pointer to an in_ifaddr (ifaddr),
@@ -85,6 +89,7 @@
  */
 #define IA_SIN(ia)    (&(((struct in_ifaddr *)(ia))->ia_addr))
 #define IA_DSTSIN(ia) (&(((struct in_ifaddr *)(ia))->ia_dstaddr))
+#define IA_MASKSIN(ia) (&(((struct in_ifaddr *)(ia))->ia_sockmask))
 
 #define IN_LNAOF(in, ifa) \
 	((ntohl((in).s_addr) & ~((struct in_ifaddr *)(ifa)->ia_subnetmask))
@@ -446,6 +451,8 @@
 int	in_control(struct socket *, u_long, caddr_t, struct ifnet *,
 	    struct thread *);
 void	in_rtqdrain(void);
+int	in_addprefix(struct in_ifaddr *, int);
+int	in_scrubprefix(struct in_ifaddr *, u_int);
 void	ip_input(struct mbuf *);
 int	in_ifadown(struct ifaddr *ifa, int);
 void	in_ifscrub(struct ifnet *, struct in_ifaddr *, u_int);

Modified: trunk/sys/netinet/ip.h
===================================================================
--- trunk/sys/netinet/ip.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -29,7 +29,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)ip.h	8.2 (Berkeley) 6/1/94
- * $FreeBSD: stable/9/sys/netinet/ip.h 235805 2012-05-22 19:53:25Z delphij $
+ * $FreeBSD: stable/10/sys/netinet/ip.h 286105 2015-07-31 03:40:09Z kevlo $
  */
 
 #ifndef _NETINET_IP_H_
@@ -68,7 +68,7 @@
 	u_char	ip_p;			/* protocol */
 	u_short	ip_sum;			/* checksum */
 	struct	in_addr ip_src,ip_dst;	/* source and dest address */
-} __packed __aligned(4);
+} __packed __aligned(2);
 
 #define	IP_MAXPACKET	65535		/* maximum packet size */
 
@@ -81,19 +81,19 @@
 #define	IPTOS_MINCOST		0x02
 
 /*
- * Definitions for IP precedence (also in ip_tos) (hopefully unused).
+ * Definitions for IP precedence (also in ip_tos) (deprecated).
  */
-#define	IPTOS_PREC_NETCONTROL		0xe0
-#define	IPTOS_PREC_INTERNETCONTROL	0xc0
-#define	IPTOS_PREC_CRITIC_ECP		0xa0
-#define	IPTOS_PREC_FLASHOVERRIDE	0x80
-#define	IPTOS_PREC_FLASH		0x60
-#define	IPTOS_PREC_IMMEDIATE		0x40
-#define	IPTOS_PREC_PRIORITY		0x20
-#define	IPTOS_PREC_ROUTINE		0x00
+#define	IPTOS_PREC_NETCONTROL		IPTOS_DSCP_CS7
+#define	IPTOS_PREC_INTERNETCONTROL	IPTOS_DSCP_CS6
+#define	IPTOS_PREC_CRITIC_ECP		IPTOS_DSCP_CS5
+#define	IPTOS_PREC_FLASHOVERRIDE	IPTOS_DSCP_CS4
+#define	IPTOS_PREC_FLASH		IPTOS_DSCP_CS3
+#define	IPTOS_PREC_IMMEDIATE		IPTOS_DSCP_CS2
+#define	IPTOS_PREC_PRIORITY		IPTOS_DSCP_CS1
+#define	IPTOS_PREC_ROUTINE		IPTOS_DSCP_CS0
 
 /*
- * Definitions for DiffServ Codepoints as per RFC2474
+ * Definitions for DiffServ Codepoints as per RFC2474 and RFC5865.
  */
 #define	IPTOS_DSCP_CS0		0x00
 #define	IPTOS_DSCP_CS1		0x20
@@ -113,6 +113,7 @@
 #define	IPTOS_DSCP_AF42		0x90
 #define	IPTOS_DSCP_AF43		0x98
 #define	IPTOS_DSCP_CS5		0xa0
+#define	IPTOS_DSCP_VA		0xb0
 #define	IPTOS_DSCP_EF		0xb8
 #define	IPTOS_DSCP_CS6		0xc0
 #define	IPTOS_DSCP_CS7		0xe0

Modified: trunk/sys/netinet/ip6.h
===================================================================
--- trunk/sys/netinet/ip6.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip6.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,5 +1,5 @@
 /* $MidnightBSD$ */
-/*	$FreeBSD: stable/9/sys/netinet/ip6.h 215956 2010-11-27 21:51:39Z brucec $	*/
+/*	$FreeBSD: stable/10/sys/netinet/ip6.h 249294 2013-04-09 07:11:22Z ae $	*/
 /*	$KAME: ip6.h,v 1.18 2001/03/29 05:34:30 itojun Exp $	*/
 
 /*-

Modified: trunk/sys/netinet/ip_carp.c
===================================================================
--- trunk/sys/netinet/ip_carp.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_carp.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,7 +1,9 @@
 /* $MidnightBSD$ */
-/*
- * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
- * Copyright (c) 2003 Ryan McBride. All rights reserved.
+/*-
+ * Copyright (c) 2002 Michael Shalayeff.
+ * Copyright (c) 2003 Ryan McBride.
+ * Copyright (c) 2011 Gleb Smirnoff <glebius at FreeBSD.org>
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -26,44 +28,38 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/ip_carp.c 233200 2012-03-19 20:49:16Z jhb $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/ip_carp.c 278075 2015-02-02 11:42:35Z loos $");
 
 #include "opt_bpf.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
-#include <sys/types.h>
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/conf.h>
+#include <sys/bus.h>
+#include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
-#include <sys/time.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
-#include <sys/signalvar.h>
-#include <sys/filio.h>
-#include <sys/sockio.h>
+#include <sys/taskqueue.h>
+#include <sys/counter.h>
 
-#include <sys/socket.h>
-#include <sys/vnode.h>
-
-#include <machine/stdarg.h>
-
-#include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/fddi.h>
-#include <net/iso88025.h>
 #include <net/if.h>
-#include <net/if_clone.h>
 #include <net/if_dl.h>
+#include <net/if_llatbl.h>
 #include <net/if_types.h>
+#include <net/iso88025.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
@@ -72,12 +68,9 @@
 #include <netinet/in_var.h>
 #include <netinet/ip_carp.h>
 #include <netinet/ip.h>
-
 #include <machine/in_cksum.h>
 #endif
-
 #ifdef INET
-#include <netinet/in_systm.h>
 #include <netinet/ip_var.h>
 #include <netinet/if_ether.h>
 #endif
@@ -86,88 +79,72 @@
 #include <netinet/icmp6.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6protosw.h>
+#include <netinet6/in6_var.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
-#include <netinet6/in6_var.h>
 #include <netinet6/nd6.h>
 #endif
 
 #include <crypto/sha1.h>
 
-#define	CARP_IFNAME	"carp"
-static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
-SYSCTL_DECL(_net_inet_carp);
+static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses");
 
 struct carp_softc {
-	struct ifnet	 	*sc_ifp;	/* Interface clue */
-	struct ifnet		*sc_carpdev;	/* Pointer to parent interface */
-	struct in_ifaddr 	*sc_ia;		/* primary iface address */
+	struct ifnet		*sc_carpdev;	/* Pointer to parent ifnet. */
+	struct ifaddr		**sc_ifas;	/* Our ifaddrs. */
+	struct sockaddr_dl	sc_addr;	/* Our link level address. */
+	struct callout		sc_ad_tmo;	/* Advertising timeout. */
 #ifdef INET
-	struct ip_moptions 	 sc_imo;
+	struct callout		sc_md_tmo;	/* Master down timeout. */
 #endif
 #ifdef INET6
-	struct in6_ifaddr 	*sc_ia6;	/* primary iface address v6 */
-	struct ip6_moptions 	 sc_im6o;
-#endif /* INET6 */
-	TAILQ_ENTRY(carp_softc)	 sc_list;
+	struct callout 		sc_md6_tmo;	/* XXX: Master down timeout. */
+#endif
+	struct mtx		sc_mtx;
 
+	int			sc_vhid;
+	int			sc_advskew;
+	int			sc_advbase;
+
+	int			sc_naddrs;
+	int			sc_naddrs6;
+	int			sc_ifasiz;
 	enum { INIT = 0, BACKUP, MASTER }	sc_state;
-
-	int			 sc_flags_backup;
-	int			 sc_suppress;
-
-	int			 sc_sendad_errors;
+	int			sc_suppress;
+	int			sc_sendad_errors;
 #define	CARP_SENDAD_MAX_ERRORS	3
-	int			 sc_sendad_success;
+	int			sc_sendad_success;
 #define	CARP_SENDAD_MIN_SUCCESS 3
 
-	int			 sc_vhid;
-	int			 sc_advskew;
-	int			 sc_naddrs;
-	int			 sc_naddrs6;
-	int			 sc_advbase;	/* seconds */
-	int			 sc_init_counter;
-	u_int64_t		 sc_counter;
+	int			sc_init_counter;
+	uint64_t		sc_counter;
 
 	/* authentication */
-#define CARP_HMAC_PAD	64
+#define	CARP_HMAC_PAD	64
 	unsigned char sc_key[CARP_KEY_LEN];
 	unsigned char sc_pad[CARP_HMAC_PAD];
 	SHA1_CTX sc_sha1;
 
-	struct callout		 sc_ad_tmo;	/* advertisement timeout */
-	struct callout		 sc_md_tmo;	/* master down timeout */
-	struct callout 		 sc_md6_tmo;	/* master down timeout */
-	
-	LIST_ENTRY(carp_softc)	 sc_next;	/* Interface clue */
+	TAILQ_ENTRY(carp_softc)	sc_list;	/* On the carp_if list. */
+	LIST_ENTRY(carp_softc)	sc_next;	/* On the global list. */
 };
-#define	SC2IFP(sc)	((sc)->sc_ifp)
 
-int carp_suppress_preempt = 0;
-int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 };	/* XXX for now */
-SYSCTL_NODE(_net_inet, IPPROTO_CARP,	carp,	CTLFLAG_RW, 0,	"CARP");
-SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
-    &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
-SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
-    &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
-SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
-    &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
-SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
-    &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
-SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
-    &carp_suppress_preempt, 0, "Preemption is suppressed");
-
-struct carpstats carpstats;
-SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
-    &carpstats, carpstats,
-    "CARP statistics (struct carpstats, netinet/ip_carp.h)");
-
 struct carp_if {
-	TAILQ_HEAD(, carp_softc) vhif_vrs;
-	int vhif_nvrs;
-
-	struct ifnet 	*vhif_ifp;
-	struct mtx	 vhif_mtx;
+#ifdef INET
+	int	cif_naddrs;
+#endif
+#ifdef INET6
+	int	cif_naddrs6;
+#endif
+	TAILQ_HEAD(, carp_softc) cif_vrs;
+#ifdef INET
+	struct ip_moptions 	 cif_imo;
+#endif
+#ifdef INET6
+	struct ip6_moptions 	 cif_im6o;
+#endif
+	struct ifnet	*cif_ifp;
+	struct mtx	cif_mtx;
 };
 
 #define	CARP_INET	0
@@ -174,93 +151,179 @@
 #define	CARP_INET6	1
 static int proto_reg[] = {-1, -1};
 
-/* Get carp_if from softc. Valid after carp_set_addr{,6}. */
-#define	SC2CIF(sc)		((struct carp_if *)(sc)->sc_carpdev->if_carp)
+/*
+ * Brief design of carp(4).
+ *
+ * Any carp-capable ifnet may have a list of carp softcs hanging off
+ * its ifp->if_carp pointer. Each softc represents one unique virtual
+ * host id, or vhid. The softc has a back pointer to the ifnet. All
+ * softcs are joined in a global list, which has quite limited use.
+ *
+ * Any interface address that takes part in CARP negotiation has a
+ * pointer to the softc of its vhid, ifa->ifa_carp. That could be either
+ * AF_INET or AF_INET6 address.
+ *
+ * Although, one can get the softc's backpointer to ifnet and traverse
+ * through its ifp->if_addrhead queue to find all interface addresses
+ * involved in CARP, we keep a growable array of ifaddr pointers. This
+ * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that
+ * do calls into the network stack, thus avoiding LORs.
+ *
+ * Locking:
+ *
+ * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(),
+ * callout-driven events and ioctl()s.
+ *
+ * To traverse the list of softcs on an ifnet we use CIF_LOCK(), to
+ * traverse the global list we use the mutex carp_mtx.
+ *
+ * Known issues with locking:
+ *
+ * - There is no protection for races between two ioctl() requests,
+ *   neither SIOCSVH, nor SIOCAIFADDR & SIOCAIFADDR_IN6. I think that all
+ *   interface ioctl()s should be serialized right in net/if.c.
+ * - Sending ad, we put the pointer to the softc in an mtag, and no reference
+ *   counting is done on the softc.
+ * - On module unload we may race (?) with packet processing thread
+ *   dereferencing our function pointers.
+ */
 
-/* lock per carp_if queue */
-#define	CARP_LOCK_INIT(cif)	mtx_init(&(cif)->vhif_mtx, "carp_if", 	\
+/* Accept incoming CARP packets. */
+static VNET_DEFINE(int, carp_allow) = 1;
+#define	V_carp_allow	VNET(carp_allow)
+
+/* Preempt slower nodes. */
+static VNET_DEFINE(int, carp_preempt) = 0;
+#define	V_carp_preempt	VNET(carp_preempt)
+
+/* Log level. */
+static VNET_DEFINE(int, carp_log) = 1;
+#define	V_carp_log	VNET(carp_log)
+
+/* Global advskew demotion. */
+static VNET_DEFINE(int, carp_demotion) = 0;
+#define	V_carp_demotion	VNET(carp_demotion)
+
+/* Send error demotion factor. */
+static VNET_DEFINE(int, carp_senderr_adj) = CARP_MAXSKEW;
+#define	V_carp_senderr_adj	VNET(carp_senderr_adj)
+
+/* Iface down demotion factor. */
+static VNET_DEFINE(int, carp_ifdown_adj) = CARP_MAXSKEW;
+#define	V_carp_ifdown_adj	VNET(carp_ifdown_adj)
+
+static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS);
+
+SYSCTL_NODE(_net_inet, IPPROTO_CARP,	carp,	CTLFLAG_RW, 0,	"CARP");
+SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, allow, CTLFLAG_RW,
+    &VNET_NAME(carp_allow), 0, "Accept incoming CARP packets");
+SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_RW,
+    &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode");
+SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_RW,
+    &VNET_NAME(carp_log), 0, "CARP log level");
+SYSCTL_VNET_PROC(_net_inet_carp, OID_AUTO, demotion, CTLTYPE_INT|CTLFLAG_RW,
+    0, 0, carp_demote_adj_sysctl, "I",
+    "Adjust demotion factor (skew of advskew)");
+SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, CTLFLAG_RW,
+    &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment");
+SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, CTLFLAG_RW,
+    &VNET_NAME(carp_ifdown_adj), 0,
+    "Interface down demotion factor adjustment");
+
+VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats);
+VNET_PCPUSTAT_SYSINIT(carpstats);
+VNET_PCPUSTAT_SYSUNINIT(carpstats);
+
+#define	CARPSTATS_ADD(name, val)	\
+    counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \
+	sizeof(uint64_t)], (val))
+#define	CARPSTATS_INC(name)		CARPSTATS_ADD(name, 1)
+
+SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats,
+    carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)");
+
+#define	CARP_LOCK_INIT(sc)	mtx_init(&(sc)->sc_mtx, "carp_softc",   \
 	NULL, MTX_DEF)
-#define	CARP_LOCK_DESTROY(cif)	mtx_destroy(&(cif)->vhif_mtx)
-#define	CARP_LOCK_ASSERT(cif)	mtx_assert(&(cif)->vhif_mtx, MA_OWNED)
-#define	CARP_LOCK(cif)		mtx_lock(&(cif)->vhif_mtx)
-#define	CARP_UNLOCK(cif)	mtx_unlock(&(cif)->vhif_mtx)
+#define	CARP_LOCK_DESTROY(sc)	mtx_destroy(&(sc)->sc_mtx)
+#define	CARP_LOCK_ASSERT(sc)	mtx_assert(&(sc)->sc_mtx, MA_OWNED)
+#define	CARP_LOCK(sc)		mtx_lock(&(sc)->sc_mtx)
+#define	CARP_UNLOCK(sc)		mtx_unlock(&(sc)->sc_mtx)
+#define	CIF_LOCK_INIT(cif)	mtx_init(&(cif)->cif_mtx, "carp_if",   \
+	NULL, MTX_DEF)
+#define	CIF_LOCK_DESTROY(cif)	mtx_destroy(&(cif)->cif_mtx)
+#define	CIF_LOCK_ASSERT(cif)	mtx_assert(&(cif)->cif_mtx, MA_OWNED)
+#define	CIF_LOCK(cif)		mtx_lock(&(cif)->cif_mtx)
+#define	CIF_UNLOCK(cif)		mtx_unlock(&(cif)->cif_mtx)
+#define	CIF_FREE(cif)	do {				\
+		CIF_LOCK_ASSERT(cif);			\
+		if (TAILQ_EMPTY(&(cif)->cif_vrs))	\
+			carp_free_if(cif);		\
+		else					\
+			CIF_UNLOCK(cif);		\
+} while (0)
 
-#define	CARP_SCLOCK(sc)		mtx_lock(&SC2CIF(sc)->vhif_mtx)
-#define	CARP_SCUNLOCK(sc)	mtx_unlock(&SC2CIF(sc)->vhif_mtx)
-#define	CARP_SCLOCK_ASSERT(sc)	mtx_assert(&SC2CIF(sc)->vhif_mtx, MA_OWNED)
-
 #define	CARP_LOG(...)	do {				\
-	if (carp_opts[CARPCTL_LOG] > 0)			\
-		log(LOG_INFO, __VA_ARGS__);		\
+	if (V_carp_log > 0)				\
+		log(LOG_INFO, "carp: " __VA_ARGS__);	\
 } while (0)
 
 #define	CARP_DEBUG(...)	do {				\
-	if (carp_opts[CARPCTL_LOG] > 1)			\
+	if (V_carp_log > 1)				\
 		log(LOG_DEBUG, __VA_ARGS__);		\
 } while (0)
 
-static void	carp_hmac_prepare(struct carp_softc *);
-static void	carp_hmac_generate(struct carp_softc *, u_int32_t *,
-		    unsigned char *);
-static int	carp_hmac_verify(struct carp_softc *, u_int32_t *,
-		    unsigned char *);
-static void	carp_setroute(struct carp_softc *, int);
+#define	IFNET_FOREACH_IFA(ifp, ifa)					\
+	IF_ADDR_LOCK_ASSERT(ifp);					\
+	TAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link)		\
+		if ((ifa)->ifa_carp != NULL)
+
+#define	CARP_FOREACH_IFA(sc, ifa)					\
+	CARP_LOCK_ASSERT(sc);						\
+	for (int _i = 0;						\
+		_i < (sc)->sc_naddrs + (sc)->sc_naddrs6 &&		\
+		((ifa) = sc->sc_ifas[_i]) != NULL;			\
+		++_i)
+
+#define	IFNET_FOREACH_CARP(ifp, sc)					\
+	CIF_LOCK_ASSERT(ifp->if_carp);					\
+	TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list)
+
+#define	DEMOTE_ADVSKEW(sc)					\
+    (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ?	\
+    CARP_MAXSKEW : ((sc)->sc_advskew + V_carp_demotion))
+
 static void	carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
-static int 	carp_clone_create(struct if_clone *, int, caddr_t);
-static void 	carp_clone_destroy(struct ifnet *);
-static void	carpdetach(struct carp_softc *, int);
-static int	carp_prepare_ad(struct mbuf *, struct carp_softc *,
-		    struct carp_header *);
-static void	carp_send_ad_all(void);
+static struct carp_softc
+		*carp_alloc(struct ifnet *);
+static void	carp_detach_locked(struct ifaddr *);
+static void	carp_destroy(struct carp_softc *);
+static struct carp_if
+		*carp_alloc_if(struct ifnet *);
+static void	carp_free_if(struct carp_if *);
+static void	carp_set_state(struct carp_softc *, int);
+static void	carp_sc_state(struct carp_softc *);
+static void	carp_setrun(struct carp_softc *, sa_family_t);
+static void	carp_master_down(void *);
+static void	carp_master_down_locked(struct carp_softc *);
 static void	carp_send_ad(void *);
 static void	carp_send_ad_locked(struct carp_softc *);
-#ifdef INET
-static void	carp_send_arp(struct carp_softc *);
-#endif
-static void	carp_master_down(void *);
-static void	carp_master_down_locked(struct carp_softc *);
-static int	carp_ioctl(struct ifnet *, u_long, caddr_t);
-static int	carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *,
-    		    struct route *);
-static void	carp_start(struct ifnet *);
-static void	carp_setrun(struct carp_softc *, sa_family_t);
-static void	carp_set_state(struct carp_softc *, int);
-#ifdef INET
-static int	carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
-#endif
-enum	{ CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
+static void	carp_addroute(struct carp_softc *);
+static void	carp_ifa_addroute(struct ifaddr *);
+static void	carp_delroute(struct carp_softc *);
+static void	carp_ifa_delroute(struct ifaddr *);
+static void	carp_send_ad_all(void *, int);
+static void	carp_demote_adj(int, char *);
 
-#ifdef INET
-static void	carp_multicast_cleanup(struct carp_softc *, int dofree);
-static int	carp_set_addr(struct carp_softc *, struct sockaddr_in *);
-static int	carp_del_addr(struct carp_softc *, struct sockaddr_in *);
-#endif
-static void	carp_carpdev_state_locked(struct carp_if *);
-static void	carp_sc_state_locked(struct carp_softc *);
-#ifdef INET6
-static void	carp_send_na(struct carp_softc *);
-static int	carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
-static int	carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
-static void	carp_multicast6_cleanup(struct carp_softc *, int dofree);
-#endif
-
-static LIST_HEAD(, carp_softc) carpif_list;
+static LIST_HEAD(, carp_softc) carp_list;
 static struct mtx carp_mtx;
-IFC_SIMPLE_DECLARE(carp, 0);
+static struct task carp_sendall_task =
+    TASK_INITIALIZER(0, carp_send_ad_all, NULL);
 
-static eventhandler_tag if_detach_event_tag;
-
-static __inline u_int16_t
-carp_cksum(struct mbuf *m, int len)
-{
-	return (in_cksum(m, len));
-}
-
 static void
 carp_hmac_prepare(struct carp_softc *sc)
 {
-	u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
-	u_int8_t vhid = sc->sc_vhid & 0xff;
+	uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
+	uint8_t vhid = sc->sc_vhid & 0xff;
 	struct ifaddr *ifa;
 	int i, found;
 #ifdef INET
@@ -270,18 +333,15 @@
 	struct in6_addr last6, cur6, in6;
 #endif
 
-	if (sc->sc_carpdev)
-		CARP_SCLOCK(sc);
+	CARP_LOCK_ASSERT(sc);
 
-	/* XXX: possible race here */
-
-	/* compute ipad from key */
+	/* Compute ipad from key. */
 	bzero(sc->sc_pad, sizeof(sc->sc_pad));
 	bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
 	for (i = 0; i < sizeof(sc->sc_pad); i++)
 		sc->sc_pad[i] ^= 0x36;
 
-	/* precompute first part of inner hash */
+	/* Precompute first part of inner hash. */
 	SHA1Init(&sc->sc_sha1);
 	SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
 	SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
@@ -293,8 +353,7 @@
 		found = 0;
 		last = cur;
 		cur.s_addr = 0xffffffff;
-		IF_ADDR_RLOCK(SC2IFP(sc));
-		TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
+		CARP_FOREACH_IFA(sc, ifa) {
 			in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
 			if (ifa->ifa_addr->sa_family == AF_INET &&
 			    ntohl(in.s_addr) > ntohl(last.s_addr) &&
@@ -303,7 +362,6 @@
 				found++;
 			}
 		}
-		IF_ADDR_RUNLOCK(SC2IFP(sc));
 		if (found)
 			SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
 	} while (found);
@@ -314,8 +372,7 @@
 		found = 0;
 		last6 = cur6;
 		memset(&cur6, 0xff, sizeof(cur6));
-		IF_ADDR_RLOCK(SC2IFP(sc));
-		TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
+		CARP_FOREACH_IFA(sc, ifa) {
 			in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
 			if (IN6_IS_SCOPE_EMBED(&in6))
 				in6.s6_addr16[1] = 0;
@@ -326,7 +383,6 @@
 				found++;
 			}
 		}
-		IF_ADDR_RUNLOCK(SC2IFP(sc));
 		if (found)
 			SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
 	} while (found);
@@ -335,17 +391,16 @@
 	/* convert ipad to opad */
 	for (i = 0; i < sizeof(sc->sc_pad); i++)
 		sc->sc_pad[i] ^= 0x36 ^ 0x5c;
-
-	if (sc->sc_carpdev)
-		CARP_SCUNLOCK(sc);
 }
 
 static void
-carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2],
+carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
     unsigned char md[20])
 {
 	SHA1_CTX sha1ctx;
 
+	CARP_LOCK_ASSERT(sc);
+
 	/* fetch first half of inner hash */
 	bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
 
@@ -360,12 +415,12 @@
 }
 
 static int
-carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2],
+carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
     unsigned char md[20])
 {
 	unsigned char md2[20];
 
-	CARP_SCLOCK_ASSERT(sc);
+	CARP_LOCK_ASSERT(sc);
 
 	carp_hmac_generate(sc, counter, md2);
 
@@ -372,192 +427,7 @@
 	return (bcmp(md, md2, sizeof(md2)));
 }
 
-static void
-carp_setroute(struct carp_softc *sc, int cmd)
-{
-	struct ifaddr *ifa;
-	int s;
-
-	if (sc->sc_carpdev)
-		CARP_SCLOCK_ASSERT(sc);
-
-	s = splnet();
-	TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
-#ifdef INET
-		if (ifa->ifa_addr->sa_family == AF_INET &&
-		    sc->sc_carpdev != NULL) {
-			int count = carp_addrcount(
-			    (struct carp_if *)sc->sc_carpdev->if_carp,
-			    ifatoia(ifa), CARP_COUNT_MASTER);
-
-			if ((cmd == RTM_ADD && count == 1) ||
-			    (cmd == RTM_DELETE && count == 0))
-				rtinit(ifa, cmd, RTF_UP | RTF_HOST);
-		}
-#endif
-	}
-	splx(s);
-}
-
-static int
-carp_clone_create(struct if_clone *ifc, int unit, caddr_t params)
-{
-
-	struct carp_softc *sc;
-	struct ifnet *ifp;
-
-	sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
-	ifp = SC2IFP(sc) = if_alloc(IFT_ETHER);
-	if (ifp == NULL) {
-		free(sc, M_CARP);
-		return (ENOSPC);
-	}
-	
-	sc->sc_flags_backup = 0;
-	sc->sc_suppress = 0;
-	sc->sc_advbase = CARP_DFLTINTV;
-	sc->sc_vhid = -1;	/* required setting */
-	sc->sc_advskew = 0;
-	sc->sc_init_counter = 1;
-	sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */
-#ifdef INET
-	sc->sc_imo.imo_membership = (struct in_multi **)malloc(
-	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP,
-	    M_WAITOK);
-	sc->sc_imo.imo_mfilters = NULL;
-	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
-	sc->sc_imo.imo_multicast_vif = -1;
-#endif
-#ifdef INET6
-	sc->sc_im6o.im6o_membership = (struct in6_multi **)malloc(
-	    (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP,
-	    M_WAITOK);
-	sc->sc_im6o.im6o_mfilters = NULL;
-	sc->sc_im6o.im6o_max_memberships = IPV6_MIN_MEMBERSHIPS;
-	sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
-#endif
-
-	callout_init(&sc->sc_ad_tmo, CALLOUT_MPSAFE);
-	callout_init(&sc->sc_md_tmo, CALLOUT_MPSAFE);
-	callout_init(&sc->sc_md6_tmo, CALLOUT_MPSAFE);
-	
-	ifp->if_softc = sc;
-	if_initname(ifp, CARP_IFNAME, unit);
-	ifp->if_mtu = ETHERMTU;
-	ifp->if_flags = IFF_LOOPBACK;
-	ifp->if_ioctl = carp_ioctl;
-	ifp->if_output = carp_looutput;
-	ifp->if_start = carp_start;
-	ifp->if_type = IFT_CARP;
-	ifp->if_snd.ifq_maxlen = ifqmaxlen;
-	ifp->if_hdrlen = 0;
-	if_attach(ifp);
-	bpfattach(SC2IFP(sc), DLT_NULL, sizeof(u_int32_t));
-	mtx_lock(&carp_mtx);
-	LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
-	mtx_unlock(&carp_mtx);
-	return (0);
-}
-
-static void
-carp_clone_destroy(struct ifnet *ifp)
-{
-	struct carp_softc *sc = ifp->if_softc;
-
-	if (sc->sc_carpdev)
-		CARP_SCLOCK(sc);
-	carpdetach(sc, 1);	/* Returns unlocked. */
-
-	mtx_lock(&carp_mtx);
-	LIST_REMOVE(sc, sc_next);
-	mtx_unlock(&carp_mtx);
-	bpfdetach(ifp);
-	if_detach(ifp);
-	if_free_type(ifp, IFT_ETHER);
-#ifdef INET
-	free(sc->sc_imo.imo_membership, M_CARP);
-#endif
-#ifdef INET6
-	free(sc->sc_im6o.im6o_membership, M_CARP);
-#endif
-	free(sc, M_CARP);
-}
-
 /*
- * This function can be called on CARP interface destroy path,
- * and in case of the removal of the underlying interface as
- * well. We differentiate these two cases: in case of destruction
- * of the underlying interface, we do not cleanup our multicast
- * memberships, since they are already freed. But we purge pointers
- * to multicast structures, since they are no longer valid, to
- * avoid panic in future calls to carpdetach(). Also, we do not
- * release the lock on return, because the function will be
- * called once more, for another CARP instance on the same
- * interface.
- */
-static void
-carpdetach(struct carp_softc *sc, int unlock)
-{
-	struct carp_if *cif;
-
-	callout_stop(&sc->sc_ad_tmo);
-	callout_stop(&sc->sc_md_tmo);
-	callout_stop(&sc->sc_md6_tmo);
-
-	if (sc->sc_suppress)
-		carp_suppress_preempt--;
-	sc->sc_suppress = 0;
-
-	if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
-		carp_suppress_preempt--;
-	sc->sc_sendad_errors = 0;
-
-	carp_set_state(sc, INIT);
-	SC2IFP(sc)->if_flags &= ~IFF_UP;
-	carp_setrun(sc, 0);
-#ifdef INET
-	carp_multicast_cleanup(sc, unlock);
-#endif
-#ifdef INET6
-	carp_multicast6_cleanup(sc, unlock);
-#endif
-
-	if (sc->sc_carpdev != NULL) {
-		cif = (struct carp_if *)sc->sc_carpdev->if_carp;
-		CARP_LOCK_ASSERT(cif);
-		TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
-		if (!--cif->vhif_nvrs) {
-			ifpromisc(sc->sc_carpdev, 0);
-			sc->sc_carpdev->if_carp = NULL;
-			CARP_LOCK_DESTROY(cif);
-			free(cif, M_CARP);
-		} else if (unlock)
-			CARP_UNLOCK(cif);
-		sc->sc_carpdev = NULL;
-	}
-}
-
-/* Detach an interface from the carp. */
-static void
-carp_ifdetach(void *arg __unused, struct ifnet *ifp)
-{
-	struct carp_if *cif = (struct carp_if *)ifp->if_carp;
-	struct carp_softc *sc, *nextsc;
-
-	if (cif == NULL)
-		return;
-
-	/*
-	 * XXX: At the end of for() cycle the lock will be destroyed.
-	 */
-	CARP_LOCK(cif);
-	for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
-		nextsc = TAILQ_NEXT(sc, sc_list);
-		carpdetach(sc, 0);
-	}
-}
-
-/*
  * process input packet.
  * we have rearranged checks order compared to the rfc,
  * but it seems more efficient this way or not possible otherwise.
@@ -572,25 +442,15 @@
 
 	CARPSTATS_INC(carps_ipackets);
 
-	if (!carp_opts[CARPCTL_ALLOW]) {
+	if (!V_carp_allow) {
 		m_freem(m);
 		return;
 	}
 
-	/* check if received on a valid carp interface */
-	if (m->m_pkthdr.rcvif->if_carp == NULL) {
-		CARPSTATS_INC(carps_badif);
-		CARP_DEBUG("carp_input: packet received on non-carp "
-		    "interface: %s\n",
-		    m->m_pkthdr.rcvif->if_xname);
-		m_freem(m);
-		return;
-	}
-
 	/* verify that the IP TTL is 255.  */
 	if (ip->ip_ttl != CARP_DFLTTL) {
 		CARPSTATS_INC(carps_badttl);
-		CARP_DEBUG("carp_input: received ttl %d != 255 on %s\n",
+		CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
 		    ip->ip_ttl,
 		    m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
@@ -601,9 +461,8 @@
 
 	if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
 		CARPSTATS_INC(carps_badlen);
-		CARP_DEBUG("carp_input: received len %zd < "
-		    "sizeof(struct carp_header) on %s\n",
-		    m->m_len - sizeof(struct ip),
+		CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) "
+		    "on %s\n", __func__, m->m_len - sizeof(struct ip),
 		    m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
 		return;
@@ -612,7 +471,7 @@
 	if (iplen + sizeof(*ch) < m->m_len) {
 		if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
 			CARPSTATS_INC(carps_hdrops);
-			CARP_DEBUG("carp_input: pullup failed\n");
+			CARP_DEBUG("%s: pullup failed\n", __func__);
 			return;
 		}
 		ip = mtod(m, struct ip *);
@@ -626,7 +485,7 @@
 	len = iplen + sizeof(*ch);
 	if (len > m->m_pkthdr.len) {
 		CARPSTATS_INC(carps_badlen);
-		CARP_DEBUG("carp_input: packet too short %d on %s\n",
+		CARP_DEBUG("%s: packet too short %d on %s\n", __func__,
 		    m->m_pkthdr.len,
 		    m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
@@ -642,9 +501,9 @@
 
 	/* verify the CARP checksum */
 	m->m_data += iplen;
-	if (carp_cksum(m, len - iplen)) {
+	if (in_cksum(m, len - iplen)) {
 		CARPSTATS_INC(carps_badsum);
-		CARP_DEBUG("carp_input: checksum failed on %s\n",
+		CARP_DEBUG("%s: checksum failed on %s\n", __func__,
 		    m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
 		return;
@@ -666,7 +525,7 @@
 
 	CARPSTATS_INC(carps_ipackets6);
 
-	if (!carp_opts[CARPCTL_ALLOW]) {
+	if (!V_carp_allow) {
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
@@ -674,9 +533,8 @@
 	/* check if received on a valid carp interface */
 	if (m->m_pkthdr.rcvif->if_carp == NULL) {
 		CARPSTATS_INC(carps_badif);
-		CARP_DEBUG("carp6_input: packet received on non-carp "
-		    "interface: %s\n",
-		    m->m_pkthdr.rcvif->if_xname);
+		CARP_DEBUG("%s: packet received on non-carp interface: %s\n",
+		    __func__, m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
@@ -684,9 +542,8 @@
 	/* verify that the IP TTL is 255 */
 	if (ip6->ip6_hlim != CARP_DFLTTL) {
 		CARPSTATS_INC(carps_badttl);
-		CARP_DEBUG("carp6_input: received ttl %d != 255 on %s\n",
-		    ip6->ip6_hlim,
-		    m->m_pkthdr.rcvif->if_xname);
+		CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
+		    ip6->ip6_hlim, m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
@@ -696,7 +553,7 @@
 	IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
 	if (ch == NULL) {
 		CARPSTATS_INC(carps_badlen);
-		CARP_DEBUG("carp6_input: packet size %u too small\n", len);
+		CARP_DEBUG("%s: packet size %u too small\n", __func__, len);
 		return (IPPROTO_DONE);
 	}
 
@@ -703,9 +560,9 @@
 
 	/* verify the CARP checksum */
 	m->m_data += *offp;
-	if (carp_cksum(m, sizeof(*ch))) {
+	if (in_cksum(m, sizeof(*ch))) {
 		CARPSTATS_INC(carps_badsum);
-		CARP_DEBUG("carp6_input: checksum failed, on %s\n",
+		CARP_DEBUG("%s: checksum failed, on %s\n", __func__,
 		    m->m_pkthdr.rcvif->if_xname);
 		m_freem(m);
 		return (IPPROTO_DONE);
@@ -721,62 +578,46 @@
 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
 {
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
+	struct ifaddr *ifa;
 	struct carp_softc *sc;
-	u_int64_t tmp_counter;
+	uint64_t tmp_counter;
 	struct timeval sc_tv, ch_tv;
 
 	/* verify that the VHID is valid on the receiving interface */
-	CARP_LOCK(ifp->if_carp);
-	TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list)
-		if (sc->sc_vhid == ch->carp_vhid)
+	IF_ADDR_RLOCK(ifp);
+	IFNET_FOREACH_IFA(ifp, ifa)
+		if (ifa->ifa_addr->sa_family == af &&
+		    ifa->ifa_carp->sc_vhid == ch->carp_vhid) {
+			ifa_ref(ifa);
 			break;
+		}
+	IF_ADDR_RUNLOCK(ifp);
 
-	if (!sc || !((SC2IFP(sc)->if_flags & IFF_UP) &&
-	    (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) {
+	if (ifa == NULL) {
 		CARPSTATS_INC(carps_badvhid);
-		CARP_UNLOCK(ifp->if_carp);
 		m_freem(m);
 		return;
 	}
 
-	getmicrotime(&SC2IFP(sc)->if_lastchange);
-	SC2IFP(sc)->if_ipackets++;
-	SC2IFP(sc)->if_ibytes += m->m_pkthdr.len;
-
-	if (bpf_peers_present(SC2IFP(sc)->if_bpf)) {
-		uint32_t af1 = af;
-#ifdef INET
-		struct ip *ip = mtod(m, struct ip *);
-
-		/* BPF wants net byte order */
-		if (af == AF_INET) {
-			ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2));
-			ip->ip_off = htons(ip->ip_off);
-		}
-#endif
-		bpf_mtap2(SC2IFP(sc)->if_bpf, &af1, sizeof(af1), m);
-	}
-
 	/* verify the CARP version. */
 	if (ch->carp_version != CARP_VERSION) {
 		CARPSTATS_INC(carps_badver);
-		SC2IFP(sc)->if_ierrors++;
-		CARP_UNLOCK(ifp->if_carp);
-		CARP_DEBUG("%s; invalid version %d\n",
-		    SC2IFP(sc)->if_xname,
+		CARP_DEBUG("%s: invalid version %d\n", ifp->if_xname,
 		    ch->carp_version);
+		ifa_free(ifa);
 		m_freem(m);
 		return;
 	}
 
-	/* verify the hash */
+	sc = ifa->ifa_carp;
+	CARP_LOCK(sc);
+	ifa_free(ifa);
+
 	if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
 		CARPSTATS_INC(carps_badauth);
-		SC2IFP(sc)->if_ierrors++;
-		CARP_UNLOCK(ifp->if_carp);
-		CARP_DEBUG("%s: incorrect hash\n", SC2IFP(sc)->if_xname);
-		m_freem(m);
-		return;
+		CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__,
+		    sc->sc_vhid, ifp->if_xname);
+		goto out;
 	}
 
 	tmp_counter = ntohl(ch->carp_counter[0]);
@@ -789,10 +630,7 @@
 	sc->sc_counter = tmp_counter;
 
 	sc_tv.tv_sec = sc->sc_advbase;
-	if (carp_suppress_preempt && sc->sc_advskew <  240)
-		sc_tv.tv_usec = 240 * 1000000 / 256;
-	else
-		sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
+	sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256;
 	ch_tv.tv_sec = ch->carp_advbase;
 	ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
 
@@ -807,12 +645,13 @@
 		if (timevalcmp(&sc_tv, &ch_tv, >) ||
 		    timevalcmp(&sc_tv, &ch_tv, ==)) {
 			callout_stop(&sc->sc_ad_tmo);
-			CARP_LOG("%s: MASTER -> BACKUP "
-			   "(more frequent advertisement received)\n",
-			   SC2IFP(sc)->if_xname);
+			CARP_LOG("VHID %u@%s: MASTER -> BACKUP "
+			    "(more frequent advertisement received)\n",
+			    sc->sc_vhid,
+			    sc->sc_carpdev->if_xname);
 			carp_set_state(sc, BACKUP);
 			carp_setrun(sc, 0);
-			carp_setroute(sc, RTM_DELETE);
+			carp_delroute(sc);
 		}
 		break;
 	case BACKUP:
@@ -820,11 +659,11 @@
 		 * If we're pre-empting masters who advertise slower than us,
 		 * and this one claims to be slower, treat him as down.
 		 */
-		if (carp_opts[CARPCTL_PREEMPT] &&
-		    timevalcmp(&sc_tv, &ch_tv, <)) {
-			CARP_LOG("%s: BACKUP -> MASTER "
+		if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) {
+			CARP_LOG("VHID %u@%s: BACKUP -> MASTER "
 			    "(preempting a slower master)\n",
-			    SC2IFP(sc)->if_xname);
+			    sc->sc_vhid,
+			    sc->sc_carpdev->if_xname);
 			carp_master_down_locked(sc);
 			break;
 		}
@@ -836,9 +675,10 @@
 		 */
 		sc_tv.tv_sec = sc->sc_advbase * 3;
 		if (timevalcmp(&sc_tv, &ch_tv, <)) {
-			CARP_LOG("%s: BACKUP -> MASTER "
+			CARP_LOG("VHID %u@%s: BACKUP -> MASTER "
 			    "(master timed out)\n",
-			    SC2IFP(sc)->if_xname);
+			    sc->sc_vhid,
+			    sc->sc_carpdev->if_xname);
 			carp_master_down_locked(sc);
 			break;
 		}
@@ -851,10 +691,9 @@
 		break;
 	}
 
-	CARP_UNLOCK(ifp->if_carp);
-
+out:
+	CARP_UNLOCK(sc);
 	m_freem(m);
-	return;
 }
 
 static int
@@ -861,7 +700,6 @@
 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
 {
 	struct m_tag *mtag;
-	struct ifnet *ifp = SC2IFP(sc);
 
 	if (sc->sc_init_counter) {
 		/* this could also be seconds since unix epoch */
@@ -877,95 +715,117 @@
 	carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
 
 	/* Tag packet for carp_output */
-	mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT);
-	if (mtag == NULL) {
+	if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *),
+	    M_NOWAIT)) == NULL) {
 		m_freem(m);
-		SC2IFP(sc)->if_oerrors++;
+		CARPSTATS_INC(carps_onomem);
 		return (ENOMEM);
 	}
-	bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *));
+	bcopy(&sc, mtag + 1, sizeof(sc));
 	m_tag_prepend(m, mtag);
 
 	return (0);
 }
 
+/*
+ * To avoid LORs and possible recursions this function shouldn't
+ * be called directly, but scheduled via taskqueue.
+ */
 static void
-carp_send_ad_all(void)
+carp_send_ad_all(void *ctx __unused, int pending __unused)
 {
 	struct carp_softc *sc;
 
 	mtx_lock(&carp_mtx);
-	LIST_FOREACH(sc, &carpif_list, sc_next) {
-		if (sc->sc_carpdev == NULL)
-			continue;
-		CARP_SCLOCK(sc);
-		if ((SC2IFP(sc)->if_flags & IFF_UP) &&
-		    (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING) &&
-		     sc->sc_state == MASTER)
+	LIST_FOREACH(sc, &carp_list, sc_next)
+		if (sc->sc_state == MASTER) {
+			CARP_LOCK(sc);
+			CURVNET_SET(sc->sc_carpdev->if_vnet);
 			carp_send_ad_locked(sc);
-		CARP_SCUNLOCK(sc);
-	}
+			CURVNET_RESTORE();
+			CARP_UNLOCK(sc);
+		}
 	mtx_unlock(&carp_mtx);
 }
 
+/* Send a periodic advertisement, executed in callout context. */
 static void
 carp_send_ad(void *v)
 {
 	struct carp_softc *sc = v;
 
-	CARP_SCLOCK(sc);
+	CARP_LOCK_ASSERT(sc);
+	CURVNET_SET(sc->sc_carpdev->if_vnet);
 	carp_send_ad_locked(sc);
-	CARP_SCUNLOCK(sc);
+	CURVNET_RESTORE();
+	CARP_UNLOCK(sc);
 }
 
 static void
+carp_send_ad_error(struct carp_softc *sc, int error)
+{
+
+	if (error) {
+		if (sc->sc_sendad_errors < INT_MAX)
+			sc->sc_sendad_errors++;
+		if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
+			static const char fmt[] = "send error %d on %s";
+			char msg[sizeof(fmt) + IFNAMSIZ];
+
+			sprintf(msg, fmt, error, sc->sc_carpdev->if_xname);
+			carp_demote_adj(V_carp_senderr_adj, msg);
+		}
+		sc->sc_sendad_success = 0;
+	} else {
+		if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS &&
+		    ++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) {
+			static const char fmt[] = "send ok on %s";
+			char msg[sizeof(fmt) + IFNAMSIZ];
+
+			sprintf(msg, fmt, sc->sc_carpdev->if_xname);
+			carp_demote_adj(-V_carp_senderr_adj, msg);
+			sc->sc_sendad_errors = 0;
+		} else
+			sc->sc_sendad_errors = 0;
+	}
+}
+
+static void
 carp_send_ad_locked(struct carp_softc *sc)
 {
 	struct carp_header ch;
 	struct timeval tv;
+	struct sockaddr sa;
+	struct ifaddr *ifa;
 	struct carp_header *ch_ptr;
 	struct mbuf *m;
-	int len, advbase, advskew;
+	int len, advskew;
 
-	CARP_SCLOCK_ASSERT(sc);
+	CARP_LOCK_ASSERT(sc);
 
-	/* bow out if we've lost our UPness or RUNNINGuiness */
-	if (!((SC2IFP(sc)->if_flags & IFF_UP) &&
-	    (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) {
-		advbase = 255;
-		advskew = 255;
-	} else {
-		advbase = sc->sc_advbase;
-		if (!carp_suppress_preempt || sc->sc_advskew > 240)
-			advskew = sc->sc_advskew;
-		else
-			advskew = 240;
-		tv.tv_sec = advbase;
-		tv.tv_usec = advskew * 1000000 / 256;
-	}
+	advskew = DEMOTE_ADVSKEW(sc);
+	tv.tv_sec = sc->sc_advbase;
+	tv.tv_usec = advskew * 1000000 / 256;
 
 	ch.carp_version = CARP_VERSION;
 	ch.carp_type = CARP_ADVERTISEMENT;
 	ch.carp_vhid = sc->sc_vhid;
-	ch.carp_advbase = advbase;
+	ch.carp_advbase = sc->sc_advbase;
 	ch.carp_advskew = advskew;
 	ch.carp_authlen = 7;	/* XXX DEFINE */
 	ch.carp_pad1 = 0;	/* must be zero */
 	ch.carp_cksum = 0;
 
+	/* XXXGL: OpenBSD picks first ifaddr with needed family. */
+
 #ifdef INET
-	if (sc->sc_ia) {
+	if (sc->sc_naddrs) {
 		struct ip *ip;
 
-		MGETHDR(m, M_DONTWAIT, MT_HEADER);
+		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m == NULL) {
-			SC2IFP(sc)->if_oerrors++;
 			CARPSTATS_INC(carps_onomem);
-			/* XXX maybe less ? */
-			if (advbase != 255 || advskew != 255)
-				callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
-				    carp_send_ad, sc);
-			return;
+			goto resched;
 		}
 		len = sizeof(*ip) + sizeof(ch);
 		m->m_pkthdr.len = len;
@@ -977,67 +837,47 @@
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = sizeof(*ip) >> 2;
 		ip->ip_tos = IPTOS_LOWDELAY;
-		ip->ip_len = len;
+		ip->ip_len = htons(len);
 		ip->ip_id = ip_newid();
-		ip->ip_off = IP_DF;
+		ip->ip_off = htons(IP_DF);
 		ip->ip_ttl = CARP_DFLTTL;
 		ip->ip_p = IPPROTO_CARP;
 		ip->ip_sum = 0;
-		ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr;
+
+		bzero(&sa, sizeof(sa));
+		sa.sa_family = AF_INET;
+		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
+		if (ifa != NULL) {
+			ip->ip_src.s_addr =
+			    ifatoia(ifa)->ia_addr.sin_addr.s_addr;
+			ifa_free(ifa);
+		} else
+			ip->ip_src.s_addr = 0;
 		ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
 
 		ch_ptr = (struct carp_header *)(&ip[1]);
 		bcopy(&ch, ch_ptr, sizeof(ch));
 		if (carp_prepare_ad(m, sc, ch_ptr))
-			return;
+			goto resched;
 
 		m->m_data += sizeof(*ip);
-		ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
+		ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip));
 		m->m_data -= sizeof(*ip);
 
-		getmicrotime(&SC2IFP(sc)->if_lastchange);
-		SC2IFP(sc)->if_opackets++;
-		SC2IFP(sc)->if_obytes += len;
 		CARPSTATS_INC(carps_opackets);
 
-		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) {
-			SC2IFP(sc)->if_oerrors++;
-			if (sc->sc_sendad_errors < INT_MAX)
-				sc->sc_sendad_errors++;
-			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
-				carp_suppress_preempt++;
-				if (carp_suppress_preempt == 1) {
-					CARP_SCUNLOCK(sc);
-					carp_send_ad_all();
-					CARP_SCLOCK(sc);
-				}
-			}
-			sc->sc_sendad_success = 0;
-		} else {
-			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
-				if (++sc->sc_sendad_success >=
-				    CARP_SENDAD_MIN_SUCCESS) {
-					carp_suppress_preempt--;
-					sc->sc_sendad_errors = 0;
-				}
-			} else
-				sc->sc_sendad_errors = 0;
-		}
+		carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT,
+		    &sc->sc_carpdev->if_carp->cif_imo, NULL));
 	}
 #endif /* INET */
 #ifdef INET6
-	if (sc->sc_ia6) {
+	if (sc->sc_naddrs6) {
 		struct ip6_hdr *ip6;
 
-		MGETHDR(m, M_DONTWAIT, MT_HEADER);
+		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m == NULL) {
-			SC2IFP(sc)->if_oerrors++;
 			CARPSTATS_INC(carps_onomem);
-			/* XXX maybe less ? */
-			if (advbase != 255 || advskew != 255)
-				callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
-				    carp_send_ad, sc);
-			return;
+			goto resched;
 		}
 		len = sizeof(*ip6) + sizeof(ch);
 		m->m_pkthdr.len = len;
@@ -1050,66 +890,119 @@
 		ip6->ip6_vfc |= IPV6_VERSION;
 		ip6->ip6_hlim = CARP_DFLTTL;
 		ip6->ip6_nxt = IPPROTO_CARP;
-		bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
-		    sizeof(struct in6_addr));
-		/* set the multicast destination */
+		bzero(&sa, sizeof(sa));
 
+		/* set the source address */
+		sa.sa_family = AF_INET6;
+		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
+		if (ifa != NULL) {
+			bcopy(IFA_IN6(ifa), &ip6->ip6_src,
+			    sizeof(struct in6_addr));
+			ifa_free(ifa);
+		} else
+			/* This should never happen with IPv6. */
+			bzero(&ip6->ip6_src, sizeof(struct in6_addr));
+
+		/* Set the multicast destination. */
 		ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
 		ip6->ip6_dst.s6_addr8[15] = 0x12;
 		if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
-			SC2IFP(sc)->if_oerrors++;
 			m_freem(m);
 			CARP_DEBUG("%s: in6_setscope failed\n", __func__);
-			return;
+			goto resched;
 		}
 
 		ch_ptr = (struct carp_header *)(&ip6[1]);
 		bcopy(&ch, ch_ptr, sizeof(ch));
 		if (carp_prepare_ad(m, sc, ch_ptr))
-			return;
+			goto resched;
 
 		m->m_data += sizeof(*ip6);
-		ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6));
+		ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6));
 		m->m_data -= sizeof(*ip6);
 
-		getmicrotime(&SC2IFP(sc)->if_lastchange);
-		SC2IFP(sc)->if_opackets++;
-		SC2IFP(sc)->if_obytes += len;
 		CARPSTATS_INC(carps_opackets6);
 
-		if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) {
-			SC2IFP(sc)->if_oerrors++;
-			if (sc->sc_sendad_errors < INT_MAX)
-				sc->sc_sendad_errors++;
-			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
-				carp_suppress_preempt++;
-				if (carp_suppress_preempt == 1) {
-					CARP_SCUNLOCK(sc);
-					carp_send_ad_all();
-					CARP_SCLOCK(sc);
-				}
-			}
-			sc->sc_sendad_success = 0;
-		} else {
-			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
-				if (++sc->sc_sendad_success >=
-				    CARP_SENDAD_MIN_SUCCESS) {
-					carp_suppress_preempt--;
-					sc->sc_sendad_errors = 0;
-				}
-			} else
-				sc->sc_sendad_errors = 0;
-		}
+		carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0,
+		    &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL));
 	}
 #endif /* INET6 */
 
-	if (advbase != 255 || advskew != 255)
-		callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
-		    carp_send_ad, sc);
+resched:
+	callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc);
+}
 
+static void
+carp_addroute(struct carp_softc *sc)
+{
+	struct ifaddr *ifa;
+
+	CARP_FOREACH_IFA(sc, ifa)
+		carp_ifa_addroute(ifa);
 }
 
+static void
+carp_ifa_addroute(struct ifaddr *ifa)
+{
+
+	switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
+	case AF_INET:
+		in_addprefix(ifatoia(ifa), RTF_UP);
+		ifa_add_loopback_route(ifa,
+		    (struct sockaddr *)&ifatoia(ifa)->ia_addr);
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		ifa_add_loopback_route(ifa,
+		    (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
+		in6_ifaddloop(ifa);
+		break;
+#endif
+	}
+}
+
+static void
+carp_delroute(struct carp_softc *sc)
+{
+	struct ifaddr *ifa;
+
+	CARP_FOREACH_IFA(sc, ifa)
+		carp_ifa_delroute(ifa);
+}
+
+static void
+carp_ifa_delroute(struct ifaddr *ifa)
+{
+
+	switch (ifa->ifa_addr->sa_family) {
+#ifdef INET
+	case AF_INET:
+		ifa_del_loopback_route(ifa,
+		    (struct sockaddr *)&ifatoia(ifa)->ia_addr);
+		in_scrubprefix(ifatoia(ifa), LLE_STATIC);
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		ifa_del_loopback_route(ifa,
+		    (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
+		in6_ifremloop(ifa);
+		break;
+#endif
+	}
+}
+
+int
+carp_master(struct ifaddr *ifa)
+{
+	struct carp_softc *sc = ifa->ifa_carp;
+
+	return (sc->sc_state == MASTER);
+}
+
+#ifdef INET
 /*
  * Broadcast a gratuitous ARP request containing
  * the virtual router MAC address for each IP address
@@ -1120,16 +1013,22 @@
 {
 	struct ifaddr *ifa;
 
-	TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
+	CARP_FOREACH_IFA(sc, ifa)
+		if (ifa->ifa_addr->sa_family == AF_INET)
+			arp_ifinit2(sc->sc_carpdev, ifa, LLADDR(&sc->sc_addr));
+}
 
-		if (ifa->ifa_addr->sa_family != AF_INET)
-			continue;
+int
+carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr)
+{
+	struct carp_softc *sc = ifa->ifa_carp;
 
-/*		arprequest(sc->sc_carpdev, &in, &in, IF_LLADDR(sc->sc_ifp)); */
-		arp_ifinit2(sc->sc_carpdev, ifa, IF_LLADDR(sc->sc_ifp));
+	if (sc->sc_state == MASTER) {
+		*enaddr = LLADDR(&sc->sc_addr);
+		return (1);
+	}
 
-		DELAY(1000);	/* XXX */
-	}
+	return (0);
 }
 #endif
 
@@ -1137,251 +1036,131 @@
 static void
 carp_send_na(struct carp_softc *sc)
 {
+	static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
 	struct ifaddr *ifa;
 	struct in6_addr *in6;
-	static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
 
-	TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
-
+	CARP_FOREACH_IFA(sc, ifa) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 
-		in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
+		in6 = IFA_IN6(ifa);
 		nd6_na_output(sc->sc_carpdev, &mcast, in6,
 		    ND_NA_FLAG_OVERRIDE, 1, NULL);
 		DELAY(1000);	/* XXX */
 	}
 }
-#endif /* INET6 */
 
-#ifdef INET
-static int
-carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type)
-{
-	struct carp_softc *vh;
-	struct ifaddr *ifa;
-	int count = 0;
-
-	CARP_LOCK_ASSERT(cif);
-
-	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
-		if ((type == CARP_COUNT_RUNNING &&
-		    (SC2IFP(vh)->if_flags & IFF_UP) &&
-		    (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) ||
-		    (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) {
-			IF_ADDR_RLOCK(SC2IFP(vh));
-			TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist,
-			    ifa_list) {
-				if (ifa->ifa_addr->sa_family == AF_INET &&
-				    ia->ia_addr.sin_addr.s_addr ==
-				    ifatoia(ifa)->ia_addr.sin_addr.s_addr)
-					count++;
-			}
-			IF_ADDR_RUNLOCK(SC2IFP(vh));
-		}
-	}
-	return (count);
-}
-
-int
-carp_iamatch(struct ifnet *ifp, struct in_ifaddr *ia,
-    struct in_addr *isaddr, u_int8_t **enaddr)
-{
-	struct carp_if *cif;
-	struct carp_softc *vh;
-	int index, count = 0;
-	struct ifaddr *ifa;
-
-	cif = ifp->if_carp;
-	CARP_LOCK(cif);
-
-	if (carp_opts[CARPCTL_ARPBALANCE]) {
-		/*
-		 * XXX proof of concept implementation.
-		 * We use the source ip to decide which virtual host should
-		 * handle the request. If we're master of that virtual host,
-		 * then we respond, otherwise, just drop the arp packet on
-		 * the floor.
-		 */
-		count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING);
-		if (count == 0) {
-			/* should never reach this */
-			CARP_UNLOCK(cif);
-			return (0);
-		}
-
-		/* this should be a hash, like pf_hash() */
-		index = ntohl(isaddr->s_addr) % count;
-		count = 0;
-
-		TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
-			if ((SC2IFP(vh)->if_flags & IFF_UP) &&
-			    (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) {
-				IF_ADDR_RLOCK(SC2IFP(vh));
-				TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist,
-				    ifa_list) {
-					if (ifa->ifa_addr->sa_family ==
-					    AF_INET &&
-					    ia->ia_addr.sin_addr.s_addr ==
-					    ifatoia(ifa)->ia_addr.sin_addr.s_addr) {
-						if (count == index) {
-							if (vh->sc_state ==
-							    MASTER) {
-								*enaddr = IF_LLADDR(vh->sc_ifp);
-								IF_ADDR_RUNLOCK(SC2IFP(vh));
-								CARP_UNLOCK(cif);
-								return (1);
-							} else {
-								IF_ADDR_RUNLOCK(SC2IFP(vh));
-								CARP_UNLOCK(cif);
-								return (0);
-							}
-						}
-						count++;
-					}
-				}
-				IF_ADDR_RUNLOCK(SC2IFP(vh));
-			}
-		}
-	} else {
-		TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
-			if ((SC2IFP(vh)->if_flags & IFF_UP) &&
-			    (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
-			    ia->ia_ifp == SC2IFP(vh) &&
-			    vh->sc_state == MASTER) {
-				*enaddr = IF_LLADDR(vh->sc_ifp);
-				CARP_UNLOCK(cif);
-				return (1);
-			}
-		}
-	}
-	CARP_UNLOCK(cif);
-	return (0);
-}
-#endif
-
-#ifdef INET6
+/*
+ * Returns ifa in case it's a carp address and it is MASTER, or if the address
+ * matches and is not a carp address.  Returns NULL otherwise.
+ */
 struct ifaddr *
 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr)
 {
-	struct carp_if *cif;
-	struct carp_softc *vh;
 	struct ifaddr *ifa;
 
-	cif = ifp->if_carp;
-	CARP_LOCK(cif);
-	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
-		IF_ADDR_RLOCK(SC2IFP(vh));
-		TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, ifa_list) {
-			if (IN6_ARE_ADDR_EQUAL(taddr,
-			    &ifatoia6(ifa)->ia_addr.sin6_addr) &&
- 			    (SC2IFP(vh)->if_flags & IFF_UP) &&
-			    (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
-			    vh->sc_state == MASTER) {
-				ifa_ref(ifa);
-				IF_ADDR_RUNLOCK(SC2IFP(vh));
-			    	CARP_UNLOCK(cif);
-				return (ifa);
-			}
-		}
-		IF_ADDR_RUNLOCK(SC2IFP(vh));
+	ifa = NULL;
+	IF_ADDR_RLOCK(ifp);
+	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+		if (ifa->ifa_addr->sa_family != AF_INET6)
+			continue;
+		if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa)))
+			continue;
+		if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER)
+			ifa = NULL;
+		else
+			ifa_ref(ifa);
+		break;
 	}
-	CARP_UNLOCK(cif);
-	
-	return (NULL);
+	IF_ADDR_RUNLOCK(ifp);
+
+	return (ifa);
 }
 
 caddr_t
 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr)
 {
-	struct m_tag *mtag;
-	struct carp_if *cif;
-	struct carp_softc *sc;
 	struct ifaddr *ifa;
 
-	cif = ifp->if_carp;
-	CARP_LOCK(cif);
-	TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
-		IF_ADDR_RLOCK(SC2IFP(sc));
-		TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
-			if (IN6_ARE_ADDR_EQUAL(taddr,
-			    &ifatoia6(ifa)->ia_addr.sin6_addr) &&
- 			    (SC2IFP(sc)->if_flags & IFF_UP) &&
-			    (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING)) {
-				struct ifnet *ifp = SC2IFP(sc);
-				mtag = m_tag_get(PACKET_TAG_CARP,
-				    sizeof(struct ifnet *), M_NOWAIT);
-				if (mtag == NULL) {
-					/* better a bit than nothing */
-					IF_ADDR_RUNLOCK(SC2IFP(sc));
-					CARP_UNLOCK(cif);
-					return (IF_LLADDR(sc->sc_ifp));
-				}
-				bcopy(&ifp, (caddr_t)(mtag + 1),
-				    sizeof(struct ifnet *));
-				m_tag_prepend(m, mtag);
+	IF_ADDR_RLOCK(ifp);
+	IFNET_FOREACH_IFA(ifp, ifa)
+		if (ifa->ifa_addr->sa_family == AF_INET6 &&
+		    IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) {
+			struct carp_softc *sc = ifa->ifa_carp;
+			struct m_tag *mtag;
 
-				IF_ADDR_RUNLOCK(SC2IFP(sc));
-				CARP_UNLOCK(cif);
-				return (IF_LLADDR(sc->sc_ifp));
-			}
+			IF_ADDR_RUNLOCK(ifp);
+
+			mtag = m_tag_get(PACKET_TAG_CARP,
+			    sizeof(struct carp_softc *), M_NOWAIT);
+			if (mtag == NULL)
+				/* Better a bit than nothing. */
+				return (LLADDR(&sc->sc_addr));
+
+			bcopy(&sc, mtag + 1, sizeof(sc));
+			m_tag_prepend(m, mtag);
+
+			return (LLADDR(&sc->sc_addr));
 		}
-		IF_ADDR_RUNLOCK(SC2IFP(sc));
-	}
-	CARP_UNLOCK(cif);
+	IF_ADDR_RUNLOCK(ifp);
 
 	return (NULL);
 }
-#endif
+#endif /* INET6 */
 
-struct ifnet *
+int
 carp_forus(struct ifnet *ifp, u_char *dhost)
 {
-	struct carp_if *cif;
-	struct carp_softc *vh;
-	u_int8_t *ena = dhost;
+	struct carp_softc *sc;
+	uint8_t *ena = dhost;
 
 	if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
-		return (NULL);
+		return (0);
 
-	cif = ifp->if_carp;
-	CARP_LOCK(cif);
-	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list)
-		if ((SC2IFP(vh)->if_flags & IFF_UP) &&
-		    (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
-		    vh->sc_state == MASTER &&
-		    !bcmp(dhost, IF_LLADDR(vh->sc_ifp), ETHER_ADDR_LEN)) {
-		    	CARP_UNLOCK(cif);
-			return (SC2IFP(vh));
+	CIF_LOCK(ifp->if_carp);
+	IFNET_FOREACH_CARP(ifp, sc) {
+		CARP_LOCK(sc);
+		if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr),
+		    ETHER_ADDR_LEN)) {
+			CARP_UNLOCK(sc);
+			CIF_UNLOCK(ifp->if_carp);
+			return (1);
 		}
+		CARP_UNLOCK(sc);
+	}
+	CIF_UNLOCK(ifp->if_carp);
 
-    	CARP_UNLOCK(cif);
-	return (NULL);
+	return (0);
 }
 
+/* Master down timeout event, executed in callout context. */
 static void
 carp_master_down(void *v)
 {
 	struct carp_softc *sc = v;
 
-	CARP_SCLOCK(sc);
-	carp_master_down_locked(sc);
-	CARP_SCUNLOCK(sc);
+	CARP_LOCK_ASSERT(sc);
+
+	CURVNET_SET(sc->sc_carpdev->if_vnet);
+	if (sc->sc_state == BACKUP) {
+		CARP_LOG("VHID %u@%s: BACKUP -> MASTER (master down)\n",
+		    sc->sc_vhid,
+		    sc->sc_carpdev->if_xname);
+		carp_master_down_locked(sc);
+	}
+	CURVNET_RESTORE();
+
+	CARP_UNLOCK(sc);
 }
 
 static void
 carp_master_down_locked(struct carp_softc *sc)
 {
-	if (sc->sc_carpdev)
-		CARP_SCLOCK_ASSERT(sc);
 
+	CARP_LOCK_ASSERT(sc);
+
 	switch (sc->sc_state) {
-	case INIT:
-		printf("%s: master_down event in INIT state\n",
-		    SC2IFP(sc)->if_xname);
-		break;
-	case MASTER:
-		break;
 	case BACKUP:
 		carp_set_state(sc, MASTER);
 		carp_send_ad_locked(sc);
@@ -1390,10 +1169,19 @@
 #endif
 #ifdef INET6
 		carp_send_na(sc);
-#endif /* INET6 */
+#endif
 		carp_setrun(sc, 0);
-		carp_setroute(sc, RTM_ADD);
+		carp_addroute(sc);
 		break;
+	case INIT:
+	case MASTER:
+#ifdef INVARIANTS
+		panic("carp: VHID %u@%s: master_down event in %s state\n",
+		    sc->sc_vhid,
+		    sc->sc_carpdev->if_xname,
+		    sc->sc_state ? "MASTER" : "INIT");
+#endif
+		break;
 	}
 }
 
@@ -1406,28 +1194,19 @@
 {
 	struct timeval tv;
 
-	if (sc->sc_carpdev == NULL) {
-		SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
-		carp_set_state(sc, INIT);
-		return;
-	} else
-		CARP_SCLOCK_ASSERT(sc);
+	CARP_LOCK_ASSERT(sc);
 
-	if (SC2IFP(sc)->if_flags & IFF_UP &&
-	    sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6) &&
-	    sc->sc_carpdev->if_link_state == LINK_STATE_UP)
-		SC2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING;
-	else {
-		SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
-		carp_setroute(sc, RTM_DELETE);
+	if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 ||
+	    sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
+	    (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0))
 		return;
-	}
 
 	switch (sc->sc_state) {
 	case INIT:
-		CARP_LOG("%s: INIT -> BACKUP\n", SC2IFP(sc)->if_xname);
+		CARP_LOG("VHID %u@%s: INIT -> BACKUP\n",
+		    sc->sc_vhid,
+		    sc->sc_carpdev->if_xname);
 		carp_set_state(sc, BACKUP);
-		carp_setroute(sc, RTM_DELETE);
 		carp_setrun(sc, 0);
 		break;
 	case BACKUP:
@@ -1440,20 +1219,24 @@
 			callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
 			    carp_master_down, sc);
 			break;
-#endif /* INET */
+#endif
 #ifdef INET6
 		case AF_INET6:
 			callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
 			    carp_master_down, sc);
 			break;
-#endif /* INET6 */
+#endif
 		default:
+#ifdef INET
 			if (sc->sc_naddrs)
 				callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
 				    carp_master_down, sc);
+#endif
+#ifdef INET6
 			if (sc->sc_naddrs6)
 				callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
 				    carp_master_down, sc);
+#endif
 			break;
 		}
 		break;
@@ -1466,546 +1249,476 @@
 	}
 }
 
-#ifdef INET
-static void
-carp_multicast_cleanup(struct carp_softc *sc, int dofree)
+/*
+ * Setup multicast structures.
+ */
+static int
+carp_multicast_setup(struct carp_if *cif, sa_family_t sa)
 {
-	struct ip_moptions *imo = &sc->sc_imo;
-	u_int16_t n = imo->imo_num_memberships;
+	struct ifnet *ifp = cif->cif_ifp;
+	int error = 0;
 
-	/* Clean up our own multicast memberships */
-	while (n-- > 0) {
-		if (imo->imo_membership[n] != NULL) {
-			if (dofree)
-				in_delmulti(imo->imo_membership[n]);
-			imo->imo_membership[n] = NULL;
+	CIF_LOCK_ASSERT(cif);
+
+	switch (sa) {
+#ifdef INET
+	case AF_INET:
+	    {
+		struct ip_moptions *imo = &cif->cif_imo;
+		struct in_addr addr;
+
+		if (imo->imo_membership)
+			return (0);
+
+		imo->imo_membership = (struct in_multi **)malloc(
+		    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP,
+		    M_NOWAIT);
+		if (imo->imo_membership == NULL)
+			return (ENOMEM);
+		imo->imo_mfilters = NULL;
+		imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
+		imo->imo_multicast_vif = -1;
+
+		addr.s_addr = htonl(INADDR_CARP_GROUP);
+		if ((error = in_joingroup(ifp, &addr, NULL,
+		    &imo->imo_membership[0])) != 0) {
+			free(imo->imo_membership, M_CARP);
+			break;
 		}
+		imo->imo_num_memberships++;
+		imo->imo_multicast_ifp = ifp;
+		imo->imo_multicast_ttl = CARP_DFLTTL;
+		imo->imo_multicast_loop = 0;
+		break;
+	   }
+#endif
+#ifdef INET6
+	case AF_INET6:
+	    {
+		struct ip6_moptions *im6o = &cif->cif_im6o;
+		struct in6_addr in6;
+		struct in6_multi *in6m;
+
+		if (im6o->im6o_membership)
+			return (0);
+
+		im6o->im6o_membership = (struct in6_multi **)malloc(
+		    (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP,
+		    M_ZERO | M_NOWAIT);
+		if (im6o->im6o_membership == NULL)
+			return (ENOMEM);
+		im6o->im6o_mfilters = NULL;
+		im6o->im6o_max_memberships = IPV6_MIN_MEMBERSHIPS;
+		im6o->im6o_multicast_hlim = CARP_DFLTTL;
+		im6o->im6o_multicast_ifp = ifp;
+
+		/* Join IPv6 CARP multicast group. */
+		bzero(&in6, sizeof(in6));
+		in6.s6_addr16[0] = htons(0xff02);
+		in6.s6_addr8[15] = 0x12;
+		if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
+			free(im6o->im6o_membership, M_CARP);
+			break;
+		}
+		in6m = NULL;
+		if ((error = in6_mc_join(ifp, &in6, NULL, &in6m, 0)) != 0) {
+			free(im6o->im6o_membership, M_CARP);
+			break;
+		}
+		im6o->im6o_membership[0] = in6m;
+		im6o->im6o_num_memberships++;
+
+		/* Join solicited multicast address. */
+		bzero(&in6, sizeof(in6));
+		in6.s6_addr16[0] = htons(0xff02);
+		in6.s6_addr32[1] = 0;
+		in6.s6_addr32[2] = htonl(1);
+		in6.s6_addr32[3] = 0;
+		in6.s6_addr8[12] = 0xff;
+		if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
+			in6_mc_leave(im6o->im6o_membership[0], NULL);
+			free(im6o->im6o_membership, M_CARP);
+			break;
+		}
+		in6m = NULL;
+		if ((error = in6_mc_join(ifp, &in6, NULL, &in6m, 0)) != 0) {
+			in6_mc_leave(im6o->im6o_membership[0], NULL);
+			free(im6o->im6o_membership, M_CARP);
+			break;
+		}
+		im6o->im6o_membership[1] = in6m;
+		im6o->im6o_num_memberships++;
+		break;
+	    }
+#endif
 	}
-	KASSERT(imo->imo_mfilters == NULL,
-	   ("%s: imo_mfilters != NULL", __func__));
-	imo->imo_num_memberships = 0;
-	imo->imo_multicast_ifp = NULL;
+
+	return (error);
 }
-#endif
 
-#ifdef INET6
+/*
+ * Free multicast structures.
+ */
 static void
-carp_multicast6_cleanup(struct carp_softc *sc, int dofree)
+carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa)
 {
-	struct ip6_moptions *im6o = &sc->sc_im6o;
-	u_int16_t n = im6o->im6o_num_memberships;
 
-	while (n-- > 0) {
-		if (im6o->im6o_membership[n] != NULL) {
-			if (dofree)
-				in6_mc_leave(im6o->im6o_membership[n], NULL);
-			im6o->im6o_membership[n] = NULL;
+	CIF_LOCK_ASSERT(cif);
+	switch (sa) {
+#ifdef INET
+	case AF_INET:
+		if (cif->cif_naddrs == 0) {
+			struct ip_moptions *imo = &cif->cif_imo;
+
+			in_leavegroup(imo->imo_membership[0], NULL);
+			KASSERT(imo->imo_mfilters == NULL,
+			    ("%s: imo_mfilters != NULL", __func__));
+			free(imo->imo_membership, M_CARP);
+			imo->imo_membership = NULL;
+
 		}
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		if (cif->cif_naddrs6 == 0) {
+			struct ip6_moptions *im6o = &cif->cif_im6o;
+
+			in6_mc_leave(im6o->im6o_membership[0], NULL);
+			in6_mc_leave(im6o->im6o_membership[1], NULL);
+			KASSERT(im6o->im6o_mfilters == NULL,
+			    ("%s: im6o_mfilters != NULL", __func__));
+			free(im6o->im6o_membership, M_CARP);
+			im6o->im6o_membership = NULL;
+		}
+		break;
+#endif
 	}
-	KASSERT(im6o->im6o_mfilters == NULL,
-	   ("%s: im6o_mfilters != NULL", __func__));
-	im6o->im6o_num_memberships = 0;
-	im6o->im6o_multicast_ifp = NULL;
 }
-#endif
 
-#ifdef INET
-static int
-carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
+int
+carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa)
 {
-	struct ifnet *ifp;
-	struct carp_if *cif;
-	struct in_ifaddr *ia, *ia_if;
-	struct ip_moptions *imo = &sc->sc_imo;
-	struct in_addr addr;
-	u_long iaddr = htonl(sin->sin_addr.s_addr);
-	int own, error;
+	struct m_tag *mtag;
+	struct carp_softc *sc;
 
-	if (sin->sin_addr.s_addr == 0) {
-		if (!(SC2IFP(sc)->if_flags & IFF_UP))
-			carp_set_state(sc, INIT);
-		if (sc->sc_naddrs)
-			SC2IFP(sc)->if_flags |= IFF_UP;
-		if (sc->sc_carpdev)
-			CARP_SCLOCK(sc);
-		carp_setrun(sc, 0);
-		if (sc->sc_carpdev)
-			CARP_SCUNLOCK(sc);
+	if (!sa)
 		return (0);
-	}
 
-	/* we have to do it by hands to check we won't match on us */
-	ia_if = NULL; own = 0;
-	IN_IFADDR_RLOCK();
-	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
-		/* and, yeah, we need a multicast-capable iface too */
-		if (ia->ia_ifp != SC2IFP(sc) &&
-		    (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
-		    (iaddr & ia->ia_subnetmask) == ia->ia_subnet) {
-			if (!ia_if)
-				ia_if = ia;
-			if (sin->sin_addr.s_addr ==
-			    ia->ia_addr.sin_addr.s_addr)
-				own++;
-		}
+	switch (sa->sa_family) {
+#ifdef INET
+	case AF_INET:
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		break;
+#endif
+	default:
+		return (0);
 	}
 
-	if (!ia_if) {
-		IN_IFADDR_RUNLOCK();
-		return (EADDRNOTAVAIL);
-	}
+	mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
+	if (mtag == NULL)
+		return (0);
 
-	ia = ia_if;
-	ifa_ref(&ia->ia_ifa);
-	IN_IFADDR_RUNLOCK();
+	bcopy(mtag + 1, &sc, sizeof(sc));
 
-	ifp = ia->ia_ifp;
+	/* Set the source MAC address to the Virtual Router MAC Address. */
+	switch (ifp->if_type) {
+	case IFT_ETHER:
+	case IFT_BRIDGE:
+	case IFT_L2VLAN: {
+			struct ether_header *eh;
 
-	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
-	    (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp)) {
-		ifa_free(&ia->ia_ifa);
-		return (EADDRNOTAVAIL);
-	}
+			eh = mtod(m, struct ether_header *);
+			eh->ether_shost[0] = 0;
+			eh->ether_shost[1] = 0;
+			eh->ether_shost[2] = 0x5e;
+			eh->ether_shost[3] = 0;
+			eh->ether_shost[4] = 1;
+			eh->ether_shost[5] = sc->sc_vhid;
+		}
+		break;
+	case IFT_FDDI: {
+			struct fddi_header *fh;
 
-	if (imo->imo_num_memberships == 0) {
-		addr.s_addr = htonl(INADDR_CARP_GROUP);
-		if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) ==
-		    NULL) {
-			ifa_free(&ia->ia_ifa);
-			return (ENOBUFS);
+			fh = mtod(m, struct fddi_header *);
+			fh->fddi_shost[0] = 0;
+			fh->fddi_shost[1] = 0;
+			fh->fddi_shost[2] = 0x5e;
+			fh->fddi_shost[3] = 0;
+			fh->fddi_shost[4] = 1;
+			fh->fddi_shost[5] = sc->sc_vhid;
 		}
-		imo->imo_num_memberships++;
-		imo->imo_multicast_ifp = ifp;
-		imo->imo_multicast_ttl = CARP_DFLTTL;
-		imo->imo_multicast_loop = 0;
+		break;
+	case IFT_ISO88025: {
+ 			struct iso88025_header *th;
+ 			th = mtod(m, struct iso88025_header *);
+			th->iso88025_shost[0] = 3;
+			th->iso88025_shost[1] = 0;
+			th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1);
+			th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1);
+			th->iso88025_shost[4] = 0;
+			th->iso88025_shost[5] = 0;
+		}
+		break;
+	default:
+		printf("%s: carp is not supported for the %d interface type\n",
+		    ifp->if_xname, ifp->if_type);
+		return (EOPNOTSUPP);
 	}
 
-	if (!ifp->if_carp) {
+	return (0);
+}
 
-		cif = malloc(sizeof(*cif), M_CARP,
-		    M_WAITOK|M_ZERO);
-		if (!cif) {
-			error = ENOBUFS;
-			goto cleanup;
-		}
-		if ((error = ifpromisc(ifp, 1))) {
-			free(cif, M_CARP);
-			goto cleanup;
-		}
-		
-		CARP_LOCK_INIT(cif);
-		CARP_LOCK(cif);
-		cif->vhif_ifp = ifp;
-		TAILQ_INIT(&cif->vhif_vrs);
-		ifp->if_carp = cif;
+static struct carp_softc*
+carp_alloc(struct ifnet *ifp)
+{
+	struct carp_softc *sc;
+	struct carp_if *cif;
 
-	} else {
-		struct carp_softc *vr;
-
-		cif = (struct carp_if *)ifp->if_carp;
-		CARP_LOCK(cif);
-		TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
-			if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
-				CARP_UNLOCK(cif);
-				error = EEXIST;
-				goto cleanup;
-			}
+	if ((cif = ifp->if_carp) == NULL) {
+		cif = carp_alloc_if(ifp);
+		if (cif == NULL)
+			return (NULL);
 	}
-	sc->sc_ia = ia;
-	sc->sc_carpdev = ifp;
 
-	{ /* XXX prevent endless loop if already in queue */
-	struct carp_softc *vr, *after = NULL;
-	int myself = 0;
-	cif = (struct carp_if *)ifp->if_carp;
+	sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
 
-	/* XXX: cif should not change, right? So we still hold the lock */
-	CARP_LOCK_ASSERT(cif);
+	sc->sc_advbase = CARP_DFLTINTV;
+	sc->sc_vhid = -1;	/* required setting */
+	sc->sc_init_counter = 1;
+	sc->sc_state = INIT;
 
-	TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
-		if (vr == sc)
-			myself = 1;
-		if (vr->sc_vhid < sc->sc_vhid)
-			after = vr;
-	}
+	sc->sc_ifasiz = sizeof(struct ifaddr *);
+	sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO);
+	sc->sc_carpdev = ifp;
 
-	if (!myself) {
-		/* We're trying to keep things in order */
-		if (after == NULL) {
-			TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
-		} else {
-			TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
-		}
-		cif->vhif_nvrs++;
-	}
-	}
+	CARP_LOCK_INIT(sc);
+#ifdef INET
+	callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
+#endif
+#ifdef INET6
+	callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
+#endif
+	callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
 
-	sc->sc_naddrs++;
-	SC2IFP(sc)->if_flags |= IFF_UP;
-	if (own)
-		sc->sc_advskew = 0;
-	carp_sc_state_locked(sc);
-	carp_setrun(sc, 0);
+	CIF_LOCK(cif);
+	TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list);
+	CIF_UNLOCK(cif);
 
-	CARP_UNLOCK(cif);
-	ifa_free(&ia->ia_ifa);	/* XXXRW: should hold reference for softc. */
+	mtx_lock(&carp_mtx);
+	LIST_INSERT_HEAD(&carp_list, sc, sc_next);
+	mtx_unlock(&carp_mtx);
 
-	return (0);
-
-cleanup:
-	in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
-	ifa_free(&ia->ia_ifa);
-	return (error);
+	return (sc);
 }
 
 static int
-carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin)
+carp_grow_ifas(struct carp_softc *sc)
 {
-	int error = 0;
+	struct ifaddr **new;
 
-	if (!--sc->sc_naddrs) {
-		struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
-		struct ip_moptions *imo = &sc->sc_imo;
+	CARP_LOCK_ASSERT(sc);
 
-		CARP_LOCK(cif);
-		callout_stop(&sc->sc_ad_tmo);
-		SC2IFP(sc)->if_flags &= ~IFF_UP;
-		SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
-		sc->sc_vhid = -1;
-		in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
-		imo->imo_multicast_ifp = NULL;
-		TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
-		if (!--cif->vhif_nvrs) {
-			sc->sc_carpdev->if_carp = NULL;
-			CARP_LOCK_DESTROY(cif);
-			free(cif, M_CARP);
-		} else {
-			CARP_UNLOCK(cif);
-		}
-	}
+	new = malloc(sc->sc_ifasiz * 2, M_CARP, M_NOWAIT|M_ZERO);
+	if (new == NULL)
+		return (ENOMEM);
+	bcopy(sc->sc_ifas, new, sc->sc_ifasiz);
+	free(sc->sc_ifas, M_CARP);
+	sc->sc_ifas = new;
+	sc->sc_ifasiz *= 2;
 
-	return (error);
+	return (0);
 }
-#endif
 
-#ifdef INET6
-static int
-carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
+static void
+carp_destroy(struct carp_softc *sc)
 {
-	struct ifnet *ifp;
-	struct carp_if *cif;
-	struct in6_ifaddr *ia, *ia_if;
-	struct ip6_moptions *im6o = &sc->sc_im6o;
-	struct in6_addr in6;
-	int own, error;
+	struct ifnet *ifp = sc->sc_carpdev;
+	struct carp_if *cif = ifp->if_carp;
 
-	error = 0;
+	CIF_LOCK_ASSERT(cif);
 
-	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
-		if (!(SC2IFP(sc)->if_flags & IFF_UP))
-			carp_set_state(sc, INIT);
-		if (sc->sc_naddrs6)
-			SC2IFP(sc)->if_flags |= IFF_UP;
-		if (sc->sc_carpdev)
-			CARP_SCLOCK(sc);
-		carp_setrun(sc, 0);
-		if (sc->sc_carpdev)
-			CARP_SCUNLOCK(sc);
-		return (0);
-	}
+	TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list);
 
-	/* we have to do it by hands to check we won't match on us */
-	ia_if = NULL; own = 0;
-	IN6_IFADDR_RLOCK();
-	TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
-		int i;
+	mtx_lock(&carp_mtx);
+	LIST_REMOVE(sc, sc_next);
+	mtx_unlock(&carp_mtx);
 
-		for (i = 0; i < 4; i++) {
-			if ((sin6->sin6_addr.s6_addr32[i] &
-			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
-			    (ia->ia_addr.sin6_addr.s6_addr32[i] &
-			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
-				break;
-		}
-		/* and, yeah, we need a multicast-capable iface too */
-		if (ia->ia_ifp != SC2IFP(sc) &&
-		    (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
-		    (i == 4)) {
-			if (!ia_if)
-				ia_if = ia;
-			if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
-			    &ia->ia_addr.sin6_addr))
-				own++;
-		}
-	}
+	CARP_LOCK(sc);
+	if (sc->sc_suppress)
+		carp_demote_adj(-V_carp_ifdown_adj, "vhid removed");
+	callout_drain(&sc->sc_ad_tmo);
+#ifdef INET
+	callout_drain(&sc->sc_md_tmo);
+#endif
+#ifdef INET6
+	callout_drain(&sc->sc_md6_tmo);
+#endif
+	CARP_LOCK_DESTROY(sc);
 
-	if (!ia_if) {
-		IN6_IFADDR_RUNLOCK();
-		return (EADDRNOTAVAIL);
-	}
-	ia = ia_if;
-	ifa_ref(&ia->ia_ifa);
-	IN6_IFADDR_RUNLOCK();
-	ifp = ia->ia_ifp;
+	free(sc->sc_ifas, M_CARP);
+	free(sc, M_CARP);
+}
 
-	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
-	    (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp)) {
-		ifa_free(&ia->ia_ifa);
-		return (EADDRNOTAVAIL);
-	}
+static struct carp_if*
+carp_alloc_if(struct ifnet *ifp)
+{
+	struct carp_if *cif;
 
-	if (!sc->sc_naddrs6) {
-		struct in6_multi *in6m;
+	cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO);
 
-		im6o->im6o_multicast_ifp = ifp;
+	if (ifpromisc(ifp, 1) != 0)
+		goto cleanup;
 
-		/* join CARP multicast address */
-		bzero(&in6, sizeof(in6));
-		in6.s6_addr16[0] = htons(0xff02);
-		in6.s6_addr8[15] = 0x12;
-		if (in6_setscope(&in6, ifp, NULL) != 0)
-			goto cleanup;
-		in6m = NULL;
-		error = in6_mc_join(ifp, &in6, NULL, &in6m, 0);
-		if (error)
-			goto cleanup;
-		im6o->im6o_membership[0] = in6m;
-		im6o->im6o_num_memberships++;
+	CIF_LOCK_INIT(cif);
+	cif->cif_ifp = ifp;
+	TAILQ_INIT(&cif->cif_vrs);
 
-		/* join solicited multicast address */
-		bzero(&in6, sizeof(in6));
-		in6.s6_addr16[0] = htons(0xff02);
-		in6.s6_addr32[1] = 0;
-		in6.s6_addr32[2] = htonl(1);
-		in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3];
-		in6.s6_addr8[12] = 0xff;
-		if (in6_setscope(&in6, ifp, NULL) != 0)
-			goto cleanup;
-		in6m = NULL;
-		error = in6_mc_join(ifp, &in6, NULL, &in6m, 0);
-		if (error)
-			goto cleanup;
-		im6o->im6o_membership[1] = in6m;
-		im6o->im6o_num_memberships++;
-	}
+	IF_ADDR_WLOCK(ifp);
+	ifp->if_carp = cif;
+	if_ref(ifp);
+	IF_ADDR_WUNLOCK(ifp);
 
-	if (!ifp->if_carp) {
-		cif = malloc(sizeof(*cif), M_CARP,
-		    M_WAITOK|M_ZERO);
-		if (!cif) {
-			error = ENOBUFS;
-			goto cleanup;
-		}
-		if ((error = ifpromisc(ifp, 1))) {
-			free(cif, M_CARP);
-			goto cleanup;
-		}
+	return (cif);
 
-		CARP_LOCK_INIT(cif);
-		CARP_LOCK(cif);
-		cif->vhif_ifp = ifp;
-		TAILQ_INIT(&cif->vhif_vrs);
-		ifp->if_carp = cif;
+cleanup:
+	free(cif, M_CARP);
 
-	} else {
-		struct carp_softc *vr;
+	return (NULL);
+}
 
-		cif = (struct carp_if *)ifp->if_carp;
-		CARP_LOCK(cif);
-		TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
-			if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
-				CARP_UNLOCK(cif);
-				error = EINVAL;
-				goto cleanup;
-			}
-	}
-	sc->sc_ia6 = ia;
-	sc->sc_carpdev = ifp;
+static void
+carp_free_if(struct carp_if *cif)
+{
+	struct ifnet *ifp = cif->cif_ifp;
 
-	{ /* XXX prevent endless loop if already in queue */
-	struct carp_softc *vr, *after = NULL;
-	int myself = 0;
-	cif = (struct carp_if *)ifp->if_carp;
-	CARP_LOCK_ASSERT(cif);
+	CIF_LOCK_ASSERT(cif);
+	KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty",
+	    __func__));
 
-	TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
-		if (vr == sc)
-			myself = 1;
-		if (vr->sc_vhid < sc->sc_vhid)
-			after = vr;
-	}
+	IF_ADDR_WLOCK(ifp);
+	ifp->if_carp = NULL;
+	IF_ADDR_WUNLOCK(ifp);
 
-	if (!myself) {
-		/* We're trying to keep things in order */
-		if (after == NULL) {
-			TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
-		} else {
-			TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
-		}
-		cif->vhif_nvrs++;
-	}
-	}
+	CIF_LOCK_DESTROY(cif);
 
-	sc->sc_naddrs6++;
-	SC2IFP(sc)->if_flags |= IFF_UP;
-	if (own)
-		sc->sc_advskew = 0;
-	carp_sc_state_locked(sc);
-	carp_setrun(sc, 0);
+	ifpromisc(ifp, 0);
+	if_rele(ifp);
 
-	CARP_UNLOCK(cif);
-	ifa_free(&ia->ia_ifa);	/* XXXRW: should hold reference for softc. */
-
-	return (0);
-
-cleanup:
-	if (!sc->sc_naddrs6)
-		carp_multicast6_cleanup(sc, 1);
-	ifa_free(&ia->ia_ifa);
-	return (error);
+	free(cif, M_CARP);
 }
 
-static int
-carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
+static void
+carp_carprcp(struct carpreq *carpr, struct carp_softc *sc, int priv)
 {
-	int error = 0;
 
-	if (!--sc->sc_naddrs6) {
-		struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
-
-		CARP_LOCK(cif);
-		callout_stop(&sc->sc_ad_tmo);
-		SC2IFP(sc)->if_flags &= ~IFF_UP;
-		SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
-		sc->sc_vhid = -1;
-		carp_multicast6_cleanup(sc, 1);
-		TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
-		if (!--cif->vhif_nvrs) {
-			CARP_LOCK_DESTROY(cif);
-			sc->sc_carpdev->if_carp = NULL;
-			free(cif, M_CARP);
-		} else
-			CARP_UNLOCK(cif);
-	}
-
-	return (error);
+	CARP_LOCK(sc);
+	carpr->carpr_state = sc->sc_state;
+	carpr->carpr_vhid = sc->sc_vhid;
+	carpr->carpr_advbase = sc->sc_advbase;
+	carpr->carpr_advskew = sc->sc_advskew;
+	if (priv)
+		bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key));
+	else
+		bzero(carpr->carpr_key, sizeof(carpr->carpr_key));
+	CARP_UNLOCK(sc);
 }
-#endif /* INET6 */
 
-static int
-carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
+int
+carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td)
 {
-	struct carp_softc *sc = ifp->if_softc, *vr;
 	struct carpreq carpr;
-	struct ifaddr *ifa;
-	struct ifreq *ifr;
-	struct ifaliasreq *ifra;
-	int locked = 0, error = 0;
+	struct ifnet *ifp;
+	struct carp_softc *sc = NULL;
+	int error = 0, locked = 0;
 
-	ifa = (struct ifaddr *)addr;
-	ifra = (struct ifaliasreq *)addr;
-	ifr = (struct ifreq *)addr;
+	if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
+		return (error);
 
-	switch (cmd) {
-	case SIOCSIFADDR:
-		switch (ifa->ifa_addr->sa_family) {
-#ifdef INET
-		case AF_INET:
-			SC2IFP(sc)->if_flags |= IFF_UP;
-			bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
-			    sizeof(struct sockaddr));
-			error = carp_set_addr(sc, satosin(ifa->ifa_addr));
-			break;
-#endif /* INET */
-#ifdef INET6
-		case AF_INET6:
-			SC2IFP(sc)->if_flags |= IFF_UP;
-			error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
-			break;
-#endif /* INET6 */
-		default:
-			error = EAFNOSUPPORT;
-			break;
-		}
+	ifp = ifunit_ref(ifr->ifr_name);
+	if (ifp == NULL)
+		return (ENXIO);
+
+	switch (ifp->if_type) {
+	case IFT_ETHER:
+	case IFT_L2VLAN:
+	case IFT_BRIDGE:
+	case IFT_FDDI:
+	case IFT_ISO88025:
 		break;
+	default:
+		error = EOPNOTSUPP;
+		goto out;
+	}
 
-	case SIOCAIFADDR:
-		switch (ifa->ifa_addr->sa_family) {
-#ifdef INET
-		case AF_INET:
-			SC2IFP(sc)->if_flags |= IFF_UP;
-			bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
-			    sizeof(struct sockaddr));
-			error = carp_set_addr(sc, satosin(&ifra->ifra_addr));
+	if ((ifp->if_flags & IFF_MULTICAST) == 0) {
+		error = EADDRNOTAVAIL;
+		goto out;
+	}
+
+	switch (cmd) {
+	case SIOCSVH:
+		if ((error = priv_check(td, PRIV_NETINET_CARP)))
 			break;
-#endif /* INET */
-#ifdef INET6
-		case AF_INET6:
-			SC2IFP(sc)->if_flags |= IFF_UP;
-			error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr));
+		if (carpr.carpr_vhid <= 0 || carpr.carpr_vhid > CARP_MAXVHID ||
+		    carpr.carpr_advbase < 0 || carpr.carpr_advskew < 0) {
+			error = EINVAL;
 			break;
-#endif /* INET6 */
-		default:
-			error = EAFNOSUPPORT;
-			break;
 		}
-		break;
 
-	case SIOCDIFADDR:
-		switch (ifa->ifa_addr->sa_family) {
-#ifdef INET
-		case AF_INET:
-			error = carp_del_addr(sc, satosin(&ifra->ifra_addr));
-			break;
-#endif /* INET */
-#ifdef INET6
-		case AF_INET6:
-			error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr));
-			break;
-#endif /* INET6 */
-		default:
-			error = EAFNOSUPPORT;
-			break;
+		if (ifp->if_carp) {
+			CIF_LOCK(ifp->if_carp);
+			IFNET_FOREACH_CARP(ifp, sc)
+				if (sc->sc_vhid == carpr.carpr_vhid)
+					break;
+			CIF_UNLOCK(ifp->if_carp);
 		}
-		break;
+		if (sc == NULL) {
+			sc = carp_alloc(ifp);
+			if (sc == NULL) {
+				error = EINVAL; /* XXX: ifpromisc failed */
+				break;
+			}
 
-	case SIOCSIFFLAGS:
-		if (sc->sc_carpdev) {
-			locked = 1;
-			CARP_SCLOCK(sc);
+			CARP_LOCK(sc);
+			sc->sc_vhid = carpr.carpr_vhid;
+			LLADDR(&sc->sc_addr)[0] = 0;
+			LLADDR(&sc->sc_addr)[1] = 0;
+			LLADDR(&sc->sc_addr)[2] = 0x5e;
+			LLADDR(&sc->sc_addr)[3] = 0;
+			LLADDR(&sc->sc_addr)[4] = 1;
+			LLADDR(&sc->sc_addr)[5] = sc->sc_vhid;
+		} else
+			CARP_LOCK(sc);
+		locked = 1;
+		if (carpr.carpr_advbase > 0) {
+			if (carpr.carpr_advbase > 255 ||
+			    carpr.carpr_advbase < CARP_DFLTINTV) {
+				error = EINVAL;
+				break;
+			}
+			sc->sc_advbase = carpr.carpr_advbase;
 		}
-		if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) {
- 			callout_stop(&sc->sc_ad_tmo);
- 			callout_stop(&sc->sc_md_tmo);
- 			callout_stop(&sc->sc_md6_tmo);
-			if (sc->sc_state == MASTER)
-				carp_send_ad_locked(sc);
-			carp_set_state(sc, INIT);
-			carp_setrun(sc, 0);
-		} else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) {
-			SC2IFP(sc)->if_flags |= IFF_UP;
-			carp_setrun(sc, 0);
-		}
-		break;
-
-	case SIOCSVH:
-		error = priv_check(curthread, PRIV_NETINET_CARP);
-		if (error)
+		if (carpr.carpr_advskew >= 255) {
+			error = EINVAL;
 			break;
-		if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
-			break;
-		error = 1;
-		if (sc->sc_carpdev) {
-			locked = 1;
-			CARP_SCLOCK(sc);
 		}
-		if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) {
+		sc->sc_advskew = carpr.carpr_advskew;
+		if (carpr.carpr_key[0] != '\0') {
+			bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
+			carp_hmac_prepare(sc);
+		}
+		if (sc->sc_state != INIT &&
+		    carpr.carpr_state != sc->sc_state) {
 			switch (carpr.carpr_state) {
 			case BACKUP:
 				callout_stop(&sc->sc_ad_tmo);
 				carp_set_state(sc, BACKUP);
 				carp_setrun(sc, 0);
-				carp_setroute(sc, RTM_DELETE);
+				carp_delroute(sc);
 				break;
 			case MASTER:
 				carp_master_down_locked(sc);
@@ -2014,296 +1727,325 @@
 				break;
 			}
 		}
-		if (carpr.carpr_vhid > 0) {
-			if (carpr.carpr_vhid > 255) {
-				error = EINVAL;
-				break;
-			}
-			if (sc->sc_carpdev) {
-				struct carp_if *cif;
-				cif = (struct carp_if *)sc->sc_carpdev->if_carp;
-				TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
-					if (vr != sc &&
-					    vr->sc_vhid == carpr.carpr_vhid) {
-						error = EEXIST;
-						break;
-					}
-				if (error == EEXIST)
+		break;
+
+	case SIOCGVH:
+	    {
+		int priveleged;
+
+		if (carpr.carpr_vhid < 0 || carpr.carpr_vhid > CARP_MAXVHID) {
+			error = EINVAL;
+			break;
+		}
+		if (carpr.carpr_count < 1) {
+			error = EMSGSIZE;
+			break;
+		}
+		if (ifp->if_carp == NULL) {
+			error = ENOENT;
+			break;
+		}
+
+		priveleged = (priv_check(td, PRIV_NETINET_CARP) == 0);
+		if (carpr.carpr_vhid != 0) {
+			CIF_LOCK(ifp->if_carp);
+			IFNET_FOREACH_CARP(ifp, sc)
+				if (sc->sc_vhid == carpr.carpr_vhid)
 					break;
-			}
-			sc->sc_vhid = carpr.carpr_vhid;
-			IF_LLADDR(sc->sc_ifp)[0] = 0;
-			IF_LLADDR(sc->sc_ifp)[1] = 0;
-			IF_LLADDR(sc->sc_ifp)[2] = 0x5e;
-			IF_LLADDR(sc->sc_ifp)[3] = 0;
-			IF_LLADDR(sc->sc_ifp)[4] = 1;
-			IF_LLADDR(sc->sc_ifp)[5] = sc->sc_vhid;
-			error--;
-		}
-		if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
-			if (carpr.carpr_advskew >= 255) {
-				error = EINVAL;
+			CIF_UNLOCK(ifp->if_carp);
+			if (sc == NULL) {
+				error = ENOENT;
 				break;
 			}
-			if (carpr.carpr_advbase > 255) {
-				error = EINVAL;
+			carp_carprcp(&carpr, sc, priveleged);
+			error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
+		} else  {
+			int i, count;
+
+			count = 0;
+			CIF_LOCK(ifp->if_carp);
+			IFNET_FOREACH_CARP(ifp, sc)
+				count++;
+
+			if (count > carpr.carpr_count) {
+				CIF_UNLOCK(ifp->if_carp);
+				error = EMSGSIZE;
 				break;
 			}
-			sc->sc_advbase = carpr.carpr_advbase;
-			sc->sc_advskew = carpr.carpr_advskew;
-			error--;
+
+			i = 0;
+			IFNET_FOREACH_CARP(ifp, sc) {
+				carp_carprcp(&carpr, sc, priveleged);
+				carpr.carpr_count = count;
+				error = copyout(&carpr, ifr->ifr_data +
+				    (i * sizeof(carpr)), sizeof(carpr));
+				if (error) {
+					CIF_UNLOCK(ifp->if_carp);
+					break;
+				}
+				i++;
+			}
+			CIF_UNLOCK(ifp->if_carp);
 		}
-		bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
-		if (error > 0)
-			error = EINVAL;
-		else {
-			error = 0;
-			carp_setrun(sc, 0);
-		}
 		break;
-
-	case SIOCGVH:
-		/* XXX: lockless read */
-		bzero(&carpr, sizeof(carpr));
-		carpr.carpr_state = sc->sc_state;
-		carpr.carpr_vhid = sc->sc_vhid;
-		carpr.carpr_advbase = sc->sc_advbase;
-		carpr.carpr_advskew = sc->sc_advskew;
-		error = priv_check(curthread, PRIV_NETINET_CARP);
-		if (error == 0)
-			bcopy(sc->sc_key, carpr.carpr_key,
-			    sizeof(carpr.carpr_key));
-		error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
-		break;
-
+	    }
 	default:
 		error = EINVAL;
 	}
 
+out:
 	if (locked)
-		CARP_SCUNLOCK(sc);
+		CARP_UNLOCK(sc);
+	if_rele(ifp);
 
-	carp_hmac_prepare(sc);
-
 	return (error);
 }
 
-/*
- * XXX: this is looutput. We should eventually use it from there.
- */
 static int
-carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
-    struct route *ro)
+carp_get_vhid(struct ifaddr *ifa)
 {
-	u_int32_t af;
-	struct rtentry *rt = NULL;
 
-	M_ASSERTPKTHDR(m); /* check if we have the packet header */
+	if (ifa == NULL || ifa->ifa_carp == NULL)
+		return (0);
 
-	if (ro != NULL)
-		rt = ro->ro_rt;
-	if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
-		m_freem(m);
-		return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
-			rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
-	}
-
-	ifp->if_opackets++;
-	ifp->if_obytes += m->m_pkthdr.len;
-
-	/* BPF writes need to be handled specially. */
-	if (dst->sa_family == AF_UNSPEC) {
-		bcopy(dst->sa_data, &af, sizeof(af));
-		dst->sa_family = af;
-	}
-
-#if 1	/* XXX */
-	switch (dst->sa_family) {
-	case AF_INET:
-	case AF_INET6:
-	case AF_IPX:
-	case AF_APPLETALK:
-		break;
-	default:
-		printf("carp_looutput: af=%d unexpected\n", dst->sa_family);
-		m_freem(m);
-		return (EAFNOSUPPORT);
-	}
-#endif
-	return(if_simloop(ifp, m, dst->sa_family, 0));
+	return (ifa->ifa_carp->sc_vhid);
 }
 
-/*
- * Start output on carp interface. This function should never be called.
- */
-static void
-carp_start(struct ifnet *ifp)
-{
-#ifdef DEBUG
-	printf("%s: start called\n", ifp->if_xname);
-#endif
-}
-
 int
-carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
-    struct rtentry *rt)
+carp_attach(struct ifaddr *ifa, int vhid)
 {
-	struct m_tag *mtag;
+	struct ifnet *ifp = ifa->ifa_ifp;
+	struct carp_if *cif = ifp->if_carp;
 	struct carp_softc *sc;
-	struct ifnet *carp_ifp;
+	int index, error;
 
-	if (!sa)
-		return (0);
+	if (ifp->if_carp == NULL)
+		return (ENOPROTOOPT);
 
-	switch (sa->sa_family) {
+	switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 	case AF_INET:
-		break;
-#endif /* INET */
+#endif
 #ifdef INET6
 	case AF_INET6:
+#endif
 		break;
-#endif /* INET6 */
 	default:
-		return (0);
+		return (EPROTOTYPE);
 	}
 
-	mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
-	if (mtag == NULL)
-		return (0);
+	CIF_LOCK(cif);
+	IFNET_FOREACH_CARP(ifp, sc)
+		if (sc->sc_vhid == vhid)
+			break;
+	if (sc == NULL) {
+		CIF_UNLOCK(cif);
+		return (ENOENT);
+	}
 
-	bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *));
-	sc = carp_ifp->if_softc;
+	if (ifa->ifa_carp) {
+		if (ifa->ifa_carp->sc_vhid != vhid)
+			carp_detach_locked(ifa);
+		else {
+			CIF_UNLOCK(cif);
+			return (0);
+		}
+	}
 
-	/* Set the source MAC address to Virtual Router MAC Address */
-	switch (ifp->if_type) {
-	case IFT_ETHER:
-	case IFT_L2VLAN: {
-			struct ether_header *eh;
+	error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family);
+	if (error) {
+		CIF_FREE(cif);
+		return (error);
+	}
 
-			eh = mtod(m, struct ether_header *);
-			eh->ether_shost[0] = 0;
-			eh->ether_shost[1] = 0;
-			eh->ether_shost[2] = 0x5e;
-			eh->ether_shost[3] = 0;
-			eh->ether_shost[4] = 1;
-			eh->ether_shost[5] = sc->sc_vhid;
+	CARP_LOCK(sc);
+	index = sc->sc_naddrs + sc->sc_naddrs6 + 1;
+	if (index > sc->sc_ifasiz / sizeof(struct ifaddr *))
+		if ((error = carp_grow_ifas(sc)) != 0) {
+			carp_multicast_cleanup(cif,
+			    ifa->ifa_addr->sa_family);
+			CARP_UNLOCK(sc);
+			CIF_FREE(cif);
+			return (error);
 		}
-		break;
-	case IFT_FDDI: {
-			struct fddi_header *fh;
 
-			fh = mtod(m, struct fddi_header *);
-			fh->fddi_shost[0] = 0;
-			fh->fddi_shost[1] = 0;
-			fh->fddi_shost[2] = 0x5e;
-			fh->fddi_shost[3] = 0;
-			fh->fddi_shost[4] = 1;
-			fh->fddi_shost[5] = sc->sc_vhid;
-		}
+	switch (ifa->ifa_addr->sa_family) {
+#ifdef INET
+	case AF_INET:
+		cif->cif_naddrs++;
+		sc->sc_naddrs++;
 		break;
-	case IFT_ISO88025: {
- 			struct iso88025_header *th;
- 			th = mtod(m, struct iso88025_header *);
-			th->iso88025_shost[0] = 3;
-			th->iso88025_shost[1] = 0;
-			th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1);
-			th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1);
-			th->iso88025_shost[4] = 0;
-			th->iso88025_shost[5] = 0;
-		}
+#endif
+#ifdef INET6
+	case AF_INET6:
+		cif->cif_naddrs6++;
+		sc->sc_naddrs6++;
 		break;
-	default:
-		printf("%s: carp is not supported for this interface type\n",
-		    ifp->if_xname);
-		return (EOPNOTSUPP);
+#endif
 	}
 
+	ifa_ref(ifa);
+	sc->sc_ifas[index - 1] = ifa;
+	ifa->ifa_carp = sc;
+
+	carp_hmac_prepare(sc);
+	carp_sc_state(sc);
+
+	CARP_UNLOCK(sc);
+	CIF_UNLOCK(cif);
+
 	return (0);
 }
 
+void
+carp_detach(struct ifaddr *ifa)
+{
+	struct ifnet *ifp = ifa->ifa_ifp;
+	struct carp_if *cif = ifp->if_carp;
+
+	CIF_LOCK(cif);
+	carp_detach_locked(ifa);
+	CIF_FREE(cif);
+}
+
 static void
-carp_set_state(struct carp_softc *sc, int state)
+carp_detach_locked(struct ifaddr *ifa)
 {
-	int link_state;
+	struct ifnet *ifp = ifa->ifa_ifp;
+	struct carp_if *cif = ifp->if_carp;
+	struct carp_softc *sc = ifa->ifa_carp;
+	int i, index;
 
-	if (sc->sc_carpdev)
-		CARP_SCLOCK_ASSERT(sc);
+	KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa));
 
-	if (sc->sc_state == state)
-		return;
+	CIF_LOCK_ASSERT(cif);
+	CARP_LOCK(sc);
 
-	sc->sc_state = state;
-	switch (state) {
-	case BACKUP:
-		link_state = LINK_STATE_DOWN;
+	/* Shift array. */
+	index = sc->sc_naddrs + sc->sc_naddrs6;
+	for (i = 0; i < index; i++)
+		if (sc->sc_ifas[i] == ifa)
+			break;
+	KASSERT(i < index, ("%s: %p no backref", __func__, ifa));
+	for (; i < index - 1; i++)
+		sc->sc_ifas[i] = sc->sc_ifas[i+1];
+	sc->sc_ifas[index - 1] = NULL;
+
+	switch (ifa->ifa_addr->sa_family) {
+#ifdef INET
+	case AF_INET:
+		cif->cif_naddrs--;
+		sc->sc_naddrs--;
 		break;
-	case MASTER:
-		link_state = LINK_STATE_UP;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		cif->cif_naddrs6--;
+		sc->sc_naddrs6--;
 		break;
-	default:
-		link_state = LINK_STATE_UNKNOWN;
-		break;
+#endif
 	}
-	if_link_state_change(SC2IFP(sc), link_state);
+
+	carp_ifa_delroute(ifa);
+	carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family);
+
+	ifa->ifa_carp = NULL;
+	ifa_free(ifa);
+
+	carp_hmac_prepare(sc);
+	carp_sc_state(sc);
+
+	if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) {
+		CARP_UNLOCK(sc);
+		carp_destroy(sc);
+	} else
+		CARP_UNLOCK(sc);
 }
 
-void
-carp_carpdev_state(struct ifnet *ifp)
+static void
+carp_set_state(struct carp_softc *sc, int state)
 {
-	struct carp_if *cif;
 
-	cif = ifp->if_carp;
-	CARP_LOCK(cif);
-	carp_carpdev_state_locked(cif);
-	CARP_UNLOCK(cif);
+	CARP_LOCK_ASSERT(sc);
+
+	if (sc->sc_state != state) {
+		const char *carp_states[] = { CARP_STATES };
+		char subsys[IFNAMSIZ+5];
+
+		sc->sc_state = state;
+
+		snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid,
+		    sc->sc_carpdev->if_xname);
+		devctl_notify("CARP", subsys, carp_states[state], NULL);
+	}
 }
 
 static void
-carp_carpdev_state_locked(struct carp_if *cif)
+carp_linkstate(struct ifnet *ifp)
 {
 	struct carp_softc *sc;
 
-	TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
-		carp_sc_state_locked(sc);
+	CIF_LOCK(ifp->if_carp);
+	IFNET_FOREACH_CARP(ifp, sc) {
+		CARP_LOCK(sc);
+		carp_sc_state(sc);
+		CARP_UNLOCK(sc);
+	}
+	CIF_UNLOCK(ifp->if_carp);
 }
 
 static void
-carp_sc_state_locked(struct carp_softc *sc)
+carp_sc_state(struct carp_softc *sc)
 {
-	CARP_SCLOCK_ASSERT(sc);
 
+	CARP_LOCK_ASSERT(sc);
+
 	if (sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
 	    !(sc->sc_carpdev->if_flags & IFF_UP)) {
-		sc->sc_flags_backup = SC2IFP(sc)->if_flags;
-		SC2IFP(sc)->if_flags &= ~IFF_UP;
-		SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
 		callout_stop(&sc->sc_ad_tmo);
+#ifdef INET
 		callout_stop(&sc->sc_md_tmo);
+#endif
+#ifdef INET6
 		callout_stop(&sc->sc_md6_tmo);
+#endif
 		carp_set_state(sc, INIT);
 		carp_setrun(sc, 0);
-		if (!sc->sc_suppress) {
-			carp_suppress_preempt++;
-			if (carp_suppress_preempt == 1) {
-				CARP_SCUNLOCK(sc);
-				carp_send_ad_all();
-				CARP_SCLOCK(sc);
-			}
-		}
+		if (!sc->sc_suppress)
+			carp_demote_adj(V_carp_ifdown_adj, "interface down");
 		sc->sc_suppress = 1;
 	} else {
-		SC2IFP(sc)->if_flags |= sc->sc_flags_backup;
 		carp_set_state(sc, INIT);
 		carp_setrun(sc, 0);
 		if (sc->sc_suppress)
-			carp_suppress_preempt--;
+			carp_demote_adj(-V_carp_ifdown_adj, "interface up");
 		sc->sc_suppress = 0;
 	}
+}
 
-	return;
+static void
+carp_demote_adj(int adj, char *reason)
+{
+	atomic_add_int(&V_carp_demotion, adj);
+	CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason);
+	taskqueue_enqueue(taskqueue_swi, &carp_sendall_task);
 }
 
+static int
+carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	int new, error;
+
+	new = V_carp_demotion;
+	error = sysctl_handle_int(oidp, &new, 0, req);
+	if (error || !req->newptr)
+		return (error);
+
+	carp_demote_adj(new, "sysctl");
+
+	return (0);
+}
+
 #ifdef INET
 extern  struct domain inetdomain;
 static struct protosw in_carp_protosw = {
@@ -2336,10 +2078,6 @@
 carp_mod_cleanup(void)
 {
 
-	if (if_detach_event_tag == NULL)
-		return;
-	EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag);
-	if_clone_detach(&carp_cloner);
 #ifdef INET
 	if (proto_reg[CARP_INET] == 0) {
 		(void)ipproto_unregister(IPPROTO_CARP);
@@ -2357,9 +2095,17 @@
 	carp_iamatch6_p = NULL;
 	carp_macmatch6_p = NULL;
 #endif
+	carp_ioctl_p = NULL;
+	carp_attach_p = NULL;
+	carp_detach_p = NULL;
+	carp_get_vhid_p = NULL;
 	carp_linkstate_p = NULL;
 	carp_forus_p = NULL;
 	carp_output_p = NULL;
+	carp_demote_adj_p = NULL;
+	carp_master_p = NULL;
+	mtx_unlock(&carp_mtx);
+	taskqueue_drain(taskqueue_swi, &carp_sendall_task);
 	mtx_destroy(&carp_mtx);
 }
 
@@ -2368,22 +2114,23 @@
 {
 	int err;
 
-	if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
-		carp_ifdetach, NULL, EVENTHANDLER_PRI_ANY);
-	if (if_detach_event_tag == NULL)
-		return (ENOMEM);
 	mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF);
-	LIST_INIT(&carpif_list);
-	if_clone_attach(&carp_cloner);
-	carp_linkstate_p = carp_carpdev_state;
+	LIST_INIT(&carp_list);
+	carp_get_vhid_p = carp_get_vhid;
 	carp_forus_p = carp_forus;
 	carp_output_p = carp_output;
+	carp_linkstate_p = carp_linkstate;
+	carp_ioctl_p = carp_ioctl;
+	carp_attach_p = carp_attach;
+	carp_detach_p = carp_detach;
+	carp_demote_adj_p = carp_demote_adj;
+	carp_master_p = carp_master;
 #ifdef INET6
 	carp_iamatch6_p = carp_iamatch6;
 	carp_macmatch6_p = carp_macmatch6;
 	proto_reg[CARP_INET6] = pf_proto_register(PF_INET6,
 	    (struct protosw *)&in6_carp_protosw);
-	if (proto_reg[CARP_INET6] != 0) {
+	if (proto_reg[CARP_INET6]) {
 		printf("carp: error %d attaching to PF_INET6\n",
 		    proto_reg[CARP_INET6]);
 		carp_mod_cleanup();
@@ -2399,7 +2146,7 @@
 #ifdef INET
 	carp_iamatch_p = carp_iamatch;
 	proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw);
-	if (proto_reg[CARP_INET] != 0) {
+	if (proto_reg[CARP_INET]) {
 		printf("carp: error %d attaching to PF_INET\n",
 		    proto_reg[CARP_INET]);
 		carp_mod_cleanup();
@@ -2412,7 +2159,7 @@
 		return (err);
 	}
 #endif
-	return 0;
+	return (0);
 }
 
 static int
@@ -2423,17 +2170,13 @@
 		return carp_mod_load();
 		/* NOTREACHED */
 	case MOD_UNLOAD:
-		/*
-		 * XXX: For now, disallow module unloading by default due to
-		 * a race condition where a thread may dereference one of the
-		 * function pointer hooks after the module has been
-		 * unloaded, during processing of a packet, causing a panic.
-		 */
-#ifdef CARPMOD_CAN_UNLOAD
-		carp_mod_cleanup();
-#else
-		return (EBUSY);
-#endif
+		mtx_lock(&carp_mtx);
+		if (LIST_EMPTY(&carp_list))
+			carp_mod_cleanup();
+		else {
+			mtx_unlock(&carp_mtx);
+			return (EBUSY);
+		}
 		break;
 
 	default:

Modified: trunk/sys/netinet/ip_carp.h
===================================================================
--- trunk/sys/netinet/ip_carp.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_carp.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,5 +1,5 @@
 /* $MidnightBSD$ */
-/*	$FreeBSD: stable/9/sys/netinet/ip_carp.h 211193 2010-08-11 20:18:19Z will $	*/
+/*	$FreeBSD: stable/10/sys/netinet/ip_carp.h 253087 2013-07-09 10:02:51Z ae $	*/
 /*	$OpenBSD: ip_carp.h,v 1.8 2004/07/29 22:12:15 mcbride Exp $	*/
 
 /*
@@ -118,20 +118,18 @@
 	uint64_t	carps_preempt;		/* if enabled, preemptions */
 };
 
-#ifdef _KERNEL
-#define	CARPSTATS_ADD(name, val)	carpstats.name += (val)
-#define	CARPSTATS_INC(name)		CARPSTATS_ADD(name, 1)
-#endif
-
 /*
  * Configuration structure for SIOCSVH SIOCGVH
  */
 struct carpreq {
+	int		carpr_count;
+	int		carpr_vhid;
+#define	CARP_MAXVHID	255
 	int		carpr_state;
 #define	CARP_STATES	"INIT", "BACKUP", "MASTER"
 #define	CARP_MAXSTATE	2
-	int		carpr_vhid;
 	int		carpr_advskew;
+#define	CARP_MAXSKEW	240
 	int		carpr_advbase;
 	unsigned char	carpr_key[CARP_KEY_LEN];
 };
@@ -138,49 +136,39 @@
 #define	SIOCSVH	_IOWR('i', 245, struct ifreq)
 #define	SIOCGVH	_IOWR('i', 246, struct ifreq)
 
-/*
- * Names for CARP sysctl objects
- */
-#define	CARPCTL_ALLOW		1	/* accept incoming CARP packets */
-#define	CARPCTL_PREEMPT		2	/* high-pri backup preemption mode */
-#define	CARPCTL_LOG		3	/* log bad packets */
-#define	CARPCTL_STATS		4	/* statistics (read-only) */
-#define	CARPCTL_ARPBALANCE	5	/* balance arp responses */
-#define	CARPCTL_MAXID		6
-
-#define	CARPCTL_NAMES { \
-	{ 0, 0 }, \
-	{ "allow", CTLTYPE_INT }, \
-	{ "preempt", CTLTYPE_INT }, \
-	{ "log", CTLTYPE_INT }, \
-	{ "stats", CTLTYPE_STRUCT }, \
-	{ "arpbalance", CTLTYPE_INT }, \
-}
-
 #ifdef _KERNEL
-void		 carp_carpdev_state(struct ifnet *);
-void		 carp_input (struct mbuf *, int);
-int		 carp6_input (struct mbuf **, int *, int);
-int		 carp_output (struct ifnet *, struct mbuf *, struct sockaddr *,
-		     struct rtentry *);
-int		 carp_iamatch (struct ifnet *, struct in_ifaddr *, struct in_addr *,
-		     u_int8_t **);
+int		carp_ioctl(struct ifreq *, u_long, struct thread *);
+int		carp_attach(struct ifaddr *, int);
+void		carp_detach(struct ifaddr *);
+void		carp_carpdev_state(struct ifnet *);
+void		carp_input (struct mbuf *, int);
+int		carp6_input (struct mbuf **, int *, int);
+int		carp_output (struct ifnet *, struct mbuf *,
+		    const struct sockaddr *);
+int		carp_master(struct ifaddr *);
+int		carp_iamatch(struct ifaddr *, uint8_t **);
 struct ifaddr	*carp_iamatch6(struct ifnet *, struct in6_addr *);
 caddr_t		carp_macmatch6(struct ifnet *, struct mbuf *, const struct in6_addr *);
-struct	ifnet	*carp_forus (struct ifnet *, u_char *);
+int		carp_forus(struct ifnet *, u_char *);
 
 /* These are external networking stack hooks for CARP */
 /* net/if.c */
+extern int (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *);
+extern int (*carp_attach_p)(struct ifaddr *, int);
+extern void (*carp_detach_p)(struct ifaddr *);
 extern void (*carp_linkstate_p)(struct ifnet *);
+extern void (*carp_demote_adj_p)(int, char *);
+extern int (*carp_master_p)(struct ifaddr *);
 /* net/if_bridge.c net/if_ethersubr.c */
-extern struct ifnet *(*carp_forus_p)(struct ifnet *, u_char *);
+extern int (*carp_forus_p)(struct ifnet *, u_char *);
 /* net/if_ethersubr.c */
 extern int (*carp_output_p)(struct ifnet *, struct mbuf *,
-    struct sockaddr *, struct rtentry *);
+    const struct sockaddr *);
+/* net/rtsock.c */
+extern int (*carp_get_vhid_p)(struct ifaddr *);
 #ifdef INET
 /* netinet/if_ether.c */
-extern int (*carp_iamatch_p)(struct ifnet *, struct in_ifaddr *,
-    struct in_addr *, u_int8_t **);
+extern int (*carp_iamatch_p)(struct ifaddr *, uint8_t **);
 #endif
 #ifdef INET6
 /* netinet6/nd6_nbr.c */

Modified: trunk/sys/netinet/ip_divert.c
===================================================================
--- trunk/sys/netinet/ip_divert.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_divert.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -29,16 +29,14 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/ip_divert.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/ip_divert.c 241913 2012-10-22 21:09:03Z glebius $");
 
-#if !defined(KLD_MODULE)
 #include "opt_inet.h"
+#include "opt_inet6.h"
 #include "opt_sctp.h"
 #ifndef INET
-#error "IPDIVERT requires INET."
+#error "IPDIVERT requires INET"
 #endif
-#endif
-#include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -211,17 +209,13 @@
 
 	/* Delayed checksums are currently not compatible with divert. */
 	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
-		ip->ip_len = ntohs(ip->ip_len);
 		in_delayed_cksum(m);
 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
-		ip->ip_len = htons(ip->ip_len);
 	}
 #ifdef SCTP
 	if (m->m_pkthdr.csum_flags & CSUM_SCTP) {
-		ip->ip_len = ntohs(ip->ip_len);
 		sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
 		m->m_pkthdr.csum_flags &= ~CSUM_SCTP;
-		ip->ip_len = htons(ip->ip_len);
 	}
 #endif
 	bzero(&divsrc, sizeof(divsrc));
@@ -393,10 +387,6 @@
 				INP_RUNLOCK(inp);
 				goto cantsend;
 			}
-
-			/* Convert fields to host order for ip_output() */
-			ip->ip_len = ntohs(ip->ip_len);
-			ip->ip_off = ntohs(ip->ip_off);
 			break;
 #ifdef INET6
 		case IPV6_VERSION >> 4:
@@ -409,8 +399,6 @@
 				INP_RUNLOCK(inp);
 				goto cantsend;
 			}
-
-			ip6->ip6_plen = ntohs(ip6->ip6_plen);
 			break;
 		    }
 #endif

Modified: trunk/sys/netinet/ip_divert.h
===================================================================
--- trunk/sys/netinet/ip_divert.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_divert.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -30,7 +30,7 @@
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGES.
  *
- * $FreeBSD: stable/9/sys/netinet/ip_divert.h 201527 2010-01-04 19:01:22Z luigi $
+ * $FreeBSD: stable/10/sys/netinet/ip_divert.h 201527 2010-01-04 19:01:22Z luigi $
  */
 
 #ifndef _NETINET_IP_DIVERT_H_

Modified: trunk/sys/netinet/ip_dummynet.h
===================================================================
--- trunk/sys/netinet/ip_dummynet.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_dummynet.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -25,12 +25,12 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/netinet/ip_dummynet.h 206845 2010-04-19 16:17:30Z luigi $
+ * $FreeBSD: stable/10/sys/netinet/ip_dummynet.h 301772 2016-06-10 00:00:25Z truckman $
  */
 
 #ifndef _IP_DUMMYNET_H
 #define _IP_DUMMYNET_H
-
+#define NEW_AQM
 /*
  * Definition of the kernel-userland API for dummynet.
  *
@@ -86,7 +86,13 @@
 	/* special commands for emulation of sysctl variables */
 	DN_SYSCTL_GET,
 	DN_SYSCTL_SET,
-
+#ifdef NEW_AQM
+	/* subtypes used for setting/getting extra parameters.
+	 * these subtypes used with IP_DUMMYNET3 command (get)
+	 * and DN_TEXT (set). */
+	DN_AQM_PARAMS, /* AQM extra params */
+	DN_SCH_PARAMS, /* scheduler extra params */
+#endif
 	DN_LAST,
 };
 
@@ -105,6 +111,10 @@
 	DN_HAS_PROFILE	= 0x0010,	/* a link has a profile */
 	DN_IS_RED	= 0x0020,
 	DN_IS_GENTLE_RED= 0x0040,
+	DN_IS_ECN	= 0x0080,
+	#ifdef NEW_AQM
+	DN_IS_AQM = 0x0100,     /* AQMs: e.g Codel & PIE */
+	#endif
 	DN_PIPE_CMD	= 0x1000,	/* pipe config... */
 };
 
@@ -172,8 +182,8 @@
 	struct ipfw_flow_id fid;
 	uint64_t	tot_pkts; /* statistics counters  */
 	uint64_t	tot_bytes;
-	uint32_t	length; /* Queue lenght, in packets */
-	uint32_t	len_bytes; /* Queue lenght, in bytes */
+	uint32_t	length; /* Queue length, in packets */
+	uint32_t	len_bytes; /* Queue length, in bytes */
 	uint32_t	drops;
 };
 
@@ -210,8 +220,20 @@
 	int	samples[ED_MAX_SAMPLES_NO];	/* may be shorter */
 };
 
+#ifdef NEW_AQM
+/* Extra parameters for AQM and scheduler.
+ * This struct is used to pass and retrieve parameters (configurations)
+ * to/from AQM and Scheduler.
+ */
+struct dn_extra_parms {
+	struct dn_id oid;
+	char name[16];
+	uint32_t nr;
+#define DN_MAX_EXTRA_PARM	10
+	int64_t par[DN_MAX_EXTRA_PARM];
+};
+#endif
 
-
 /*
  * Overall structure of dummynet
 

Modified: trunk/sys/netinet/ip_ecn.c
===================================================================
--- trunk/sys/netinet/ip_ecn.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_ecn.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -36,7 +36,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/ip_ecn.c 172467 2007-10-07 20:44:24Z silby $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/ip_ecn.c 172467 2007-10-07 20:44:24Z silby $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"

Modified: trunk/sys/netinet/ip_ecn.h
===================================================================
--- trunk/sys/netinet/ip_ecn.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_ecn.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,5 +1,5 @@
 /* $MidnightBSD$ */
-/*	$FreeBSD: stable/9/sys/netinet/ip_ecn.h 139823 2005-01-07 01:45:51Z imp $	*/
+/*	$FreeBSD: stable/10/sys/netinet/ip_ecn.h 139823 2005-01-07 01:45:51Z imp $	*/
 /*	$KAME: ip_ecn.h,v 1.8 2002/01/07 11:34:47 kjc Exp $	*/
 
 /*-

Modified: trunk/sys/netinet/ip_encap.c
===================================================================
--- trunk/sys/netinet/ip_encap.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_encap.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -58,7 +58,7 @@
 /* XXX is M_NETADDR correct? */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/ip_encap.c 201145 2009-12-28 22:56:30Z antoine $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/ip_encap.c 201145 2009-12-28 22:56:30Z antoine $");
 
 #include "opt_mrouting.h"
 #include "opt_inet.h"

Modified: trunk/sys/netinet/ip_encap.h
===================================================================
--- trunk/sys/netinet/ip_encap.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_encap.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,5 +1,5 @@
 /* $MidnightBSD$ */
-/*	$FreeBSD: stable/9/sys/netinet/ip_encap.h 139823 2005-01-07 01:45:51Z imp $	*/
+/*	$FreeBSD: stable/10/sys/netinet/ip_encap.h 139823 2005-01-07 01:45:51Z imp $	*/
 /*	$KAME: ip_encap.h,v 1.7 2000/03/25 07:23:37 sumikawa Exp $	*/
 
 /*-

Modified: trunk/sys/netinet/ip_fastfwd.c
===================================================================
--- trunk/sys/netinet/ip_fastfwd.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_fastfwd.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -75,10 +75,11 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/ip_fastfwd.c 243586 2012-11-27 01:59:51Z ae $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/ip_fastfwd.c 272868 2014-10-09 23:45:26Z hrs $");
 
 #include "opt_ipfw.h"
 #include "opt_ipstealth.h"
+#include "opt_kdtrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -86,6 +87,7 @@
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
+#include <sys/sdt.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 
@@ -98,6 +100,7 @@
 #include <net/vnet.h>
 
 #include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
@@ -165,7 +168,7 @@
 	struct sockaddr_in *dst = NULL;
 	struct ifnet *ifp;
 	struct in_addr odest, dest;
-	u_short sum, ip_len;
+	uint16_t sum, ip_len, ip_off;
 	int error = 0;
 	int hlen, mtu;
 	struct m_tag *fwd_tag = NULL;
@@ -295,9 +298,9 @@
 	 * Only IP packets without options
 	 */
 	if (ip->ip_hl != (sizeof(struct ip) >> 2)) {
-		if (ip_doopts == 1)
+		if (V_ip_doopts == 1)
 			return m;
-		else if (ip_doopts == 2) {
+		else if (V_ip_doopts == 2) {
 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_FILTER_PROHIB,
 				0, 0);
 			return NULL;	/* mbuf already free'd */
@@ -339,12 +342,6 @@
 	 * Step 3: incoming packet firewall processing
 	 */
 
-	/*
-	 * Convert to host representation
-	 */
-	ip->ip_len = ntohs(ip->ip_len);
-	ip->ip_off = ntohs(ip->ip_off);
-
 	odest.s_addr = dest.s_addr = ip->ip_dst.s_addr;
 
 	/*
@@ -463,8 +460,6 @@
 forwardlocal:
 			/*
 			 * Return packet for processing by ip_input().
-			 * Keep host byte order as expected at ip_input's
-			 * "ours"-label.
 			 */
 			m->m_flags |= M_FASTFWD_OURS;
 			if (ro.ro_rt)
@@ -490,13 +485,14 @@
 	/*
 	 * Step 6: send off the packet
 	 */
+	ip_len = ntohs(ip->ip_len);
+	ip_off = ntohs(ip->ip_off);
 
 	/*
 	 * Check if route is dampned (when ARP is unable to resolve)
 	 */
 	if ((ro.ro_rt->rt_flags & RTF_REJECT) &&
-	    (ro.ro_rt->rt_rmx.rmx_expire == 0 ||
-	    time_uptime < ro.ro_rt->rt_rmx.rmx_expire)) {
+	    (ro.ro_rt->rt_expire == 0 || time_uptime < ro.ro_rt->rt_expire)) {
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
 		goto consumed;
 	}
@@ -505,7 +501,7 @@
 	/*
 	 * Check if there is enough space in the interface queue
 	 */
-	if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
+	if ((ifp->if_snd.ifq_len + ip_len / ifp->if_mtu + 1) >=
 	    ifp->if_snd.ifq_maxlen) {
 		IPSTAT_INC(ips_odropped);
 		/* would send source quench here but that is depreciated */
@@ -524,21 +520,21 @@
 	/*
 	 * Check if packet fits MTU or if hardware will fragment for us
 	 */
-	if (ro.ro_rt->rt_rmx.rmx_mtu)
-		mtu = min(ro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu);
+	if (ro.ro_rt->rt_mtu)
+		mtu = min(ro.ro_rt->rt_mtu, ifp->if_mtu);
 	else
 		mtu = ifp->if_mtu;
 
-	if (ip->ip_len <= mtu ||
-	    (ifp->if_hwassist & CSUM_FRAGMENT && (ip->ip_off & IP_DF) == 0)) {
+	if (ip_len <= mtu ||
+	    (ifp->if_hwassist & CSUM_FRAGMENT && (ip_off & IP_DF) == 0)) {
 		/*
-		 * Restore packet header fields to original values
+		 * Avoid confusing lower layers.
 		 */
-		ip->ip_len = htons(ip->ip_len);
-		ip->ip_off = htons(ip->ip_off);
+		m_clrprotoflags(m);
 		/*
 		 * Send off the packet via outgoing interface
 		 */
+		IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
 		error = (*ifp->if_output)(ifp, m,
 				(struct sockaddr *)dst, &ro);
 	} else {
@@ -545,7 +541,7 @@
 		/*
 		 * Handle EMSGSIZE with icmp reply needfrag for TCP MTU discovery
 		 */
-		if (ip->ip_off & IP_DF) {
+		if (ip_off & IP_DF) {
 			IPSTAT_INC(ips_cantfrag);
 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
 				0, mtu);
@@ -555,14 +551,8 @@
 			 * We have to fragment the packet
 			 */
 			m->m_pkthdr.csum_flags |= CSUM_IP;
-			/*
-			 * ip_fragment expects ip_len and ip_off in host byte
-			 * order but returns all packets in network byte order
-			 */
-			if (ip_fragment(ip, &m, mtu, ifp->if_hwassist,
-					(~ifp->if_hwassist & CSUM_DELAY_IP))) {
+			if (ip_fragment(ip, &m, mtu, ifp->if_hwassist))
 				goto drop;
-			}
 			KASSERT(m != NULL, ("null mbuf and no error"));
 			/*
 			 * Send off the fragments via outgoing interface
@@ -571,7 +561,12 @@
 			do {
 				m0 = m->m_nextpkt;
 				m->m_nextpkt = NULL;
+				/*
+				 * Avoid confusing lower layers.
+				 */
+				m_clrprotoflags(m);
 
+				IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
 				error = (*ifp->if_output)(ifp, m,
 					(struct sockaddr *)dst, &ro);
 				if (error)
@@ -591,7 +586,7 @@
 	if (error != 0)
 		IPSTAT_INC(ips_odropped);
 	else {
-		ro.ro_rt->rt_rmx.rmx_pksent++;
+		counter_u64_add(ro.ro_rt->rt_pksent, 1);
 		IPSTAT_INC(ips_forward);
 		IPSTAT_INC(ips_fastforward);
 	}

Modified: trunk/sys/netinet/ip_fw.h
===================================================================
--- trunk/sys/netinet/ip_fw.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_fw.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/netinet/ip_fw.h 234597 2012-04-23 07:15:15Z melifaro $
+ * $FreeBSD: stable/10/sys/netinet/ip_fw.h 287963 2015-09-18 17:29:24Z melifaro $
  */
 
 #ifndef _IPFW2_H
@@ -615,6 +615,7 @@
 	uint8_t		type;		/* entry type			*/
 	uint8_t		masklen;	/* mask length			*/
 	uint16_t	tbl;		/* table number			*/
+	uint16_t	flags;		/* record flags			*/
 	uint32_t	value;		/* value			*/
 	union {
 		/* Longest field needs to be aligned by 4-byte boundary	*/
@@ -622,6 +623,7 @@
 		char	iface[IF_NAMESIZE];	/* interface name	*/
 	} k;
 } ipfw_table_xentry;
+#define	IPFW_TCF_INET	0x01		/* CIDR flags: IPv4 record	*/
 
 typedef struct	_ipfw_table {
 	u_int32_t	size;		/* size of entries in bytes	*/

Modified: trunk/sys/netinet/ip_gre.c
===================================================================
--- trunk/sys/netinet/ip_gre.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_gre.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,8 +1,7 @@
 /* $MidnightBSD$ */
-/*	$NetBSD: ip_gre.c,v 1.29 2003/09/05 23:02:43 itojun Exp $ */
-
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
+ * Copyright (c) 2014 Andrey V. Elsukov <ae at FreeBSD.org>
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -30,19 +29,14 @@
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $NetBSD: ip_gre.c,v 1.29 2003/09/05 23:02:43 itojun Exp $
  */
 
-/*
- * deencapsulate tunneled packets and send them on
- * output half is in net/if_gre.[ch]
- * This currently handles IPPROTO_GRE, IPPROTO_MOBILE
- */
-
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/ip_gre.c 204522 2010-03-01 17:05:46Z joel $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/ip_gre.c 284072 2015-06-06 13:26:13Z ae $");
 
 #include "opt_inet.h"
-#include "opt_atalk.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
@@ -54,275 +48,136 @@
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
-#include <sys/syslog.h>
-#include <net/bpf.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/sysctl.h>
 #include <net/ethernet.h>
 #include <net/if.h>
-#include <net/netisr.h>
-#include <net/route.h>
+#include <net/if_var.h>
+#include <net/vnet.h>
 #include <net/raw_cb.h>
 
-#ifdef INET
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
+#include <netinet/ip_encap.h>
 #include <netinet/ip_var.h>
-#include <netinet/ip_gre.h>
 #include <machine/in_cksum.h>
-#else
-#error ip_gre input without IP?
-#endif
 
-#ifdef NETATALK
-#include <netatalk/at.h>
-#include <netatalk/at_var.h>
-#include <netatalk/at_extern.h>
+#ifdef INET6
+#include <netinet/ip6.h>
 #endif
 
 /* Needs IP headers. */
 #include <net/if_gre.h>
-
 #include <machine/stdarg.h>
 
-#if 1
-void gre_inet_ntoa(struct in_addr in);	/* XXX */
-#endif
+extern struct domain inetdomain;
+static void gre_input10(struct mbuf *, int);
+static const struct protosw in_gre_protosw = {
+	.pr_type =		SOCK_RAW,
+	.pr_domain =		&inetdomain,
+	.pr_protocol =		IPPROTO_GRE,
+	.pr_flags =		PR_ATOMIC|PR_ADDR,
+	.pr_input =		gre_input10,
+	.pr_output =		(pr_output_t *)rip_output,
+	.pr_ctlinput =		rip_ctlinput,
+	.pr_ctloutput =		rip_ctloutput,
+	.pr_usrreqs =		&rip_usrreqs
+};
 
-static struct gre_softc *gre_lookup(struct mbuf *, u_int8_t);
+#define	GRE_TTL			30
+VNET_DEFINE(int, ip_gre_ttl) = GRE_TTL;
+#define	V_ip_gre_ttl		VNET(ip_gre_ttl)
+SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, grettl, CTLFLAG_RW,
+	&VNET_NAME(ip_gre_ttl), 0, "");
 
-static struct mbuf *gre_input2(struct mbuf *, int, u_char);
-
-/*
- * De-encapsulate a packet and feed it back through ip input (this
- * routine is called whenever IP gets a packet with proto type
- * IPPROTO_GRE and a local destination address).
- * This really is simple
- */
-void
-gre_input(struct mbuf *m, int off)
+static void
+gre_input10(struct mbuf *m, int off)
 {
 	int proto;
 
 	proto = (mtod(m, struct ip *))->ip_p;
-
-	m = gre_input2(m, off, proto);
-
-	/*
-	 * If no matching tunnel that is up is found. We inject
-	 * the mbuf to raw ip socket to see if anyone picks it up.
-	 */
-	if (m != NULL)
-		rip_input(m, off);
+	gre_input(&m, &off, proto);
 }
 
-/*
- * Decapsulate. Does the real work and is called from gre_input()
- * (above). Returns an mbuf back if packet is not yet processed,
- * and NULL if it needs no further processing. proto is the protocol
- * number of the "calling" foo_input() routine.
- */
-static struct mbuf *
-gre_input2(struct mbuf *m ,int hlen, u_char proto)
+static int
+in_gre_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
 {
-	struct greip *gip;
-	int isr;
+	GRE_RLOCK_TRACKER;
 	struct gre_softc *sc;
-	u_int16_t flags;
-	u_int32_t af;
+	struct ip *ip;
 
-	if ((sc = gre_lookup(m, proto)) == NULL) {
-		/* No matching tunnel or tunnel is down. */
-		return (m);
-	}
+	sc = (struct gre_softc *)arg;
+	if ((GRE2IFP(sc)->if_flags & IFF_UP) == 0)
+		return (0);
 
-	if (m->m_len < sizeof(*gip)) {
-		m = m_pullup(m, sizeof(*gip));
-		if (m == NULL)
-			return (NULL);
-	}
-	gip = mtod(m, struct greip *);
+	M_ASSERTPKTHDR(m);
+	/*
+	 * We expect that payload contains at least IPv4
+	 * or IPv6 packet.
+	 */
+	if (m->m_pkthdr.len < sizeof(struct greip) + sizeof(struct ip))
+		return (0);
 
-	GRE2IFP(sc)->if_ipackets++;
-	GRE2IFP(sc)->if_ibytes += m->m_pkthdr.len;
+	GRE_RLOCK(sc);
+	if (sc->gre_family == 0)
+		goto bad;
 
-	switch (proto) {
-	case IPPROTO_GRE:
-		hlen += sizeof(struct gre_h);
+	KASSERT(sc->gre_family == AF_INET,
+	    ("wrong gre_family: %d", sc->gre_family));
 
-		/* process GRE flags as packet can be of variable len */
-		flags = ntohs(gip->gi_flags);
+	ip = mtod(m, struct ip *);
+	if (sc->gre_oip.ip_src.s_addr != ip->ip_dst.s_addr ||
+	    sc->gre_oip.ip_dst.s_addr != ip->ip_src.s_addr)
+		goto bad;
 
-		/* Checksum & Offset are present */
-		if ((flags & GRE_CP) | (flags & GRE_RP))
-			hlen += 4;
-		/* We don't support routing fields (variable length) */
-		if (flags & GRE_RP)
-			return (m);
-		if (flags & GRE_KP)
-			hlen += 4;
-		if (flags & GRE_SP)
-			hlen += 4;
-
-		switch (ntohs(gip->gi_ptype)) { /* ethertypes */
-		case WCCP_PROTOCOL_TYPE:
-			if (sc->wccp_ver == WCCP_V2)
-				hlen += 4;
-			/* FALLTHROUGH */
-		case ETHERTYPE_IP:	/* shouldn't need a schednetisr(), */
-			isr = NETISR_IP;/* as we are in ip_input */
-			af = AF_INET;
-			break;
-#ifdef INET6
-		case ETHERTYPE_IPV6:
-			isr = NETISR_IPV6;
-			af = AF_INET6;
-			break;
-#endif
-#ifdef NETATALK
-		case ETHERTYPE_ATALK:
-			isr = NETISR_ATALK1;
-			af = AF_APPLETALK;
-			break;
-#endif
-		default:
-			/* Others not yet supported. */
-			return (m);
-		}
-		break;
-	default:
-		/* Others not yet supported. */
-		return (m);
-	}
-
-	if (hlen > m->m_pkthdr.len) {
-		m_freem(m);
-		return (NULL);
-	}
-	/* Unlike NetBSD, in FreeBSD m_adj() adjusts m->m_pkthdr.len as well */
-	m_adj(m, hlen);
-
-	if (bpf_peers_present(GRE2IFP(sc)->if_bpf)) {
-		bpf_mtap2(GRE2IFP(sc)->if_bpf, &af, sizeof(af), m);
-	}
-
-	m->m_pkthdr.rcvif = GRE2IFP(sc);
-
-	netisr_queue(isr, m);
-
-	/* Packet is done, no further processing needed. */
-	return (NULL);
+	GRE_RUNLOCK(sc);
+	return (32 * 2);
+bad:
+	GRE_RUNLOCK(sc);
+	return (0);
 }
 
-/*
- * input routine for IPPRPOTO_MOBILE
- * This is a little bit diffrent from the other modes, as the
- * encapsulating header was not prepended, but instead inserted
- * between IP header and payload
- */
-
-void
-gre_mobile_input(struct mbuf *m, int hlen)
+int
+in_gre_output(struct mbuf *m, int af, int hlen)
 {
-	struct ip *ip;
-	struct mobip_h *mip;
-	struct gre_softc *sc;
-	int msiz;
+	struct greip *gi;
 
-	if ((sc = gre_lookup(m, IPPROTO_MOBILE)) == NULL) {
-		/* No matching tunnel or tunnel is down. */
-		m_freem(m);
-		return;
+	gi = mtod(m, struct greip *);
+	switch (af) {
+	case AF_INET:
+		/*
+		 * gre_transmit() has used M_PREPEND() that doesn't guarantee
+		 * m_data is contiguous more than hlen bytes. Use m_copydata()
+		 * here to avoid m_pullup().
+		 */
+		m_copydata(m, hlen + offsetof(struct ip, ip_tos),
+		    sizeof(u_char), &gi->gi_ip.ip_tos);
+		m_copydata(m, hlen + offsetof(struct ip, ip_id),
+		    sizeof(u_short), (caddr_t)&gi->gi_ip.ip_id);
+		break;
+#ifdef INET6
+	case AF_INET6:
+		gi->gi_ip.ip_tos = 0; /* XXX */
+		gi->gi_ip.ip_id = ip_newid();
+		break;
+#endif
 	}
-
-	if (m->m_len < sizeof(*mip)) {
-		m = m_pullup(m, sizeof(*mip));
-		if (m == NULL)
-			return;
-	}
-	ip = mtod(m, struct ip *);
-	mip = mtod(m, struct mobip_h *);
-
-	GRE2IFP(sc)->if_ipackets++;
-	GRE2IFP(sc)->if_ibytes += m->m_pkthdr.len;
-
-	if (ntohs(mip->mh.proto) & MOB_H_SBIT) {
-		msiz = MOB_H_SIZ_L;
-		mip->mi.ip_src.s_addr = mip->mh.osrc;
-	} else
-		msiz = MOB_H_SIZ_S;
-
-	if (m->m_len < (ip->ip_hl << 2) + msiz) {
-		m = m_pullup(m, (ip->ip_hl << 2) + msiz);
-		if (m == NULL)
-			return;
-		ip = mtod(m, struct ip *);
-		mip = mtod(m, struct mobip_h *);
-	}
-
-	mip->mi.ip_dst.s_addr = mip->mh.odst;
-	mip->mi.ip_p = (ntohs(mip->mh.proto) >> 8);
-
-	if (gre_in_cksum((u_int16_t *)&mip->mh, msiz) != 0) {
-		m_freem(m);
-		return;
-	}
-
-	bcopy((caddr_t)(ip) + (ip->ip_hl << 2) + msiz, (caddr_t)(ip) +
-	    (ip->ip_hl << 2), m->m_len - msiz - (ip->ip_hl << 2));
-	m->m_len -= msiz;
-	m->m_pkthdr.len -= msiz;
-
-	/*
-	 * On FreeBSD, rip_input() supplies us with ip->ip_len
-	 * already converted into host byteorder and also decreases
-	 * it by the lengh of IP header, however, ip_input() expects
-	 * that this field is in the original format (network byteorder
-	 * and full size of IP packet), so that adjust accordingly.
-	 */
-	ip->ip_len = htons(ip->ip_len + sizeof(struct ip) - msiz);
-
-	ip->ip_sum = 0;
-	ip->ip_sum = in_cksum(m, (ip->ip_hl << 2));
-
-	if (bpf_peers_present(GRE2IFP(sc)->if_bpf)) {
-		u_int32_t af = AF_INET;
-		bpf_mtap2(GRE2IFP(sc)->if_bpf, &af, sizeof(af), m);
-	}
-
-	m->m_pkthdr.rcvif = GRE2IFP(sc);
-
-	netisr_queue(NETISR_IP, m);
+	gi->gi_ip.ip_ttl = V_ip_gre_ttl;
+	gi->gi_ip.ip_len = htons(m->m_pkthdr.len);
+	return (ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL));
 }
 
-/*
- * Find the gre interface associated with our src/dst/proto set.
- *
- * XXXRW: Need some sort of drain/refcount mechanism so that the softc
- * reference remains valid after it's returned from gre_lookup().  Right
- * now, I'm thinking it should be reference-counted with a gre_dropref()
- * when the caller is done with the softc.  This is complicated by how
- * to handle destroying the gre softc; probably using a gre_drain() in
- * in_gre.c during destroy.
- */
-static struct gre_softc *
-gre_lookup(struct mbuf *m, u_int8_t proto)
+int
+in_gre_attach(struct gre_softc *sc)
 {
-	struct ip *ip = mtod(m, struct ip *);
-	struct gre_softc *sc;
 
-	mtx_lock(&gre_mtx);
-	for (sc = LIST_FIRST(&gre_softc_list); sc != NULL;
-	     sc = LIST_NEXT(sc, sc_list)) {
-		if ((sc->g_dst.s_addr == ip->ip_src.s_addr) &&
-		    (sc->g_src.s_addr == ip->ip_dst.s_addr) &&
-		    (sc->g_proto == proto) &&
-		    ((GRE2IFP(sc)->if_flags & IFF_UP) != 0)) {
-			mtx_unlock(&gre_mtx);
-			return (sc);
-		}
-	}
-	mtx_unlock(&gre_mtx);
-
-	return (NULL);
+	KASSERT(sc->gre_ecookie == NULL, ("gre_ecookie isn't NULL"));
+	sc->gre_ecookie = encap_attach_func(AF_INET, IPPROTO_GRE,
+	    in_gre_encapcheck, &in_gre_protosw, sc);
+	if (sc->gre_ecookie == NULL)
+		return (EEXIST);
+	return (0);
 }

Modified: trunk/sys/netinet/ip_icmp.c
===================================================================
--- trunk/sys/netinet/ip_icmp.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_icmp.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,10 +31,9 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/ip_icmp.c 242640 2012-11-06 00:49:52Z melifaro $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/ip_icmp.c 283901 2015-06-02 03:14:42Z ae $");
 
 #include "opt_inet.h"
-#include "opt_ipsec.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -65,10 +64,6 @@
 #include <netinet/icmp_var.h>
 
 #ifdef INET
-#ifdef IPSEC
-#include <netipsec/ipsec.h>
-#include <netipsec/key.h>
-#endif
 
 #include <machine/in_cksum.h>
 
@@ -90,13 +85,18 @@
 #define	V_icmplim_output		VNET(icmplim_output)
 SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, icmplim_output, CTLFLAG_RW,
 	&VNET_NAME(icmplim_output), 0,
-	"Enable rate limiting of ICMP responses");
+	"Enable logging of ICMP response rate limiting");
 
 #ifdef INET
-VNET_DEFINE(struct icmpstat, icmpstat);
-SYSCTL_VNET_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RW,
-	&VNET_NAME(icmpstat), icmpstat, "");
+VNET_PCPUSTAT_DEFINE(struct icmpstat, icmpstat);
+VNET_PCPUSTAT_SYSINIT(icmpstat);
+SYSCTL_VNET_PCPUSTAT(_net_inet_icmp, ICMPCTL_STATS, stats, struct icmpstat,
+    icmpstat, "ICMP statistics (struct icmpstat, netinet/icmp_var.h)");
 
+#ifdef VIMAGE
+VNET_PCPUSTAT_SYSUNINIT(icmpstat);
+#endif /* VIMAGE */
+
 static VNET_DEFINE(int, icmpmaskrepl) = 0;
 #define	V_icmpmaskrepl			VNET(icmpmaskrepl)
 SYSCTL_VNET_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_RW,
@@ -144,6 +144,10 @@
 	&VNET_NAME(icmpbmcastecho), 0,
 	"");
 
+static VNET_DEFINE(int, icmptstamprepl) = 1;
+#define	V_icmptstamprepl		VNET(icmptstamprepl)
+SYSCTL_INT(_net_inet_icmp, OID_AUTO, tstamprepl, CTLFLAG_RW,
+	&VNET_NAME(icmptstamprepl), 0, "Respond to ICMP Timestamp packets");
 
 #ifdef ICMPPRINTFS
 int	icmpprintfs = 0;
@@ -198,7 +202,7 @@
 kmod_icmpstat_inc(int statnum)
 {
 
-	(*((u_long *)&V_icmpstat + statnum))++;
+	counter_u64_add(VNET(icmpstat)[statnum], 1);
 }
 
 /*
@@ -230,7 +234,7 @@
 	 */
 	if (n->m_flags & M_DECRYPTED)
 		goto freeit;
-	if (oip->ip_off & ~(IP_MF|IP_DF))
+	if (oip->ip_off & htons(~(IP_MF|IP_DF)))
 		goto freeit;
 	if (n->m_flags & (M_BCAST|M_MCAST))
 		goto freeit;
@@ -264,7 +268,7 @@
 		tcphlen = th->th_off << 2;
 		if (tcphlen < sizeof(struct tcphdr))
 			goto freeit;
-		if (oip->ip_len < oiphlen + tcphlen)
+		if (ntohs(oip->ip_len) < oiphlen + tcphlen)
 			goto freeit;
 		if (oiphlen + tcphlen > n->m_len && n->m_next == NULL)
 			goto stdreply;
@@ -271,9 +275,10 @@
 		if (n->m_len < oiphlen + tcphlen && 
 		    ((n = m_pullup(n, oiphlen + tcphlen)) == NULL))
 			goto freeit;
-		icmpelen = max(tcphlen, min(V_icmp_quotelen, oip->ip_len - oiphlen));
+		icmpelen = max(tcphlen, min(V_icmp_quotelen,
+		    ntohs(oip->ip_len) - oiphlen));
 	} else
-stdreply:	icmpelen = max(8, min(V_icmp_quotelen, oip->ip_len - oiphlen));
+stdreply:	icmpelen = max(8, min(V_icmp_quotelen, ntohs(oip->ip_len) - oiphlen));
 
 	icmplen = min(oiphlen + icmpelen, nlen);
 	if (icmplen < sizeof(struct ip))
@@ -280,9 +285,9 @@
 		goto freeit;
 
 	if (MHLEN > sizeof(struct ip) + ICMP_MINLEN + icmplen)
-		m = m_gethdr(M_DONTWAIT, MT_DATA);
+		m = m_gethdr(M_NOWAIT, MT_DATA);
 	else
-		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 	if (m == NULL)
 		goto freeit;
 #ifdef MAC
@@ -323,8 +328,6 @@
 	 */
 	m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
 	nip = &icp->icmp_ip;
-	nip->ip_len = htons(nip->ip_len);
-	nip->ip_off = htons(nip->ip_off);
 
 	/*
 	 * Set up ICMP message mbuf and copy old IP header (without options
@@ -339,11 +342,12 @@
 	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
 	nip = mtod(m, struct ip *);
 	bcopy((caddr_t)oip, (caddr_t)nip, sizeof(struct ip));
-	nip->ip_len = m->m_len;
+	nip->ip_len = htons(m->m_len);
 	nip->ip_v = IPVERSION;
 	nip->ip_hl = 5;
 	nip->ip_p = IPPROTO_ICMP;
 	nip->ip_tos = 0;
+	nip->ip_off = 0;
 	icmp_reflect(m);
 
 freeit:
@@ -361,7 +365,7 @@
 	struct ip *ip = mtod(m, struct ip *);
 	struct sockaddr_in icmpsrc, icmpdst, icmpgw;
 	int hlen = off;
-	int icmplen = ip->ip_len;
+	int icmplen = ntohs(ip->ip_len) - off;
 	int i, code;
 	void (*ctlfunc)(int, struct sockaddr *, void *);
 	int fibnum;
@@ -502,7 +506,6 @@
 			ICMPSTAT_INC(icps_badlen);
 			goto freeit;
 		}
-		icp->icmp_ip.ip_len = ntohs(icp->icmp_ip.ip_len);
 		/* Discard ICMP's in response to multicast packets */
 		if (IN_MULTICAST(ntohl(icp->icmp_ip.ip_dst.s_addr)))
 			goto badcode;
@@ -538,6 +541,8 @@
 			goto reflect;
 
 	case ICMP_TSTAMP:
+		if (V_icmptstamprepl == 0)
+			break;
 		if (!V_icmpbmcastecho
 		    && (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
 			ICMPSTAT_INC(icps_bmcasttstamp);
@@ -595,7 +600,6 @@
 		}
 		ifa_free(&ia->ia_ifa);
 reflect:
-		ip->ip_len += hlen;	/* since ip_input deducts this */
 		ICMPSTAT_INC(icps_reflect);
 		ICMPSTAT_INC(icps_outhist[icp->icmp_type]);
 		icmp_reflect(m);
@@ -656,9 +660,6 @@
 			  (struct sockaddr *)&icmpgw, fibnum);
 		}
 		pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&icmpsrc);
-#ifdef IPSEC
-		key_sa_routechange((struct sockaddr *)&icmpsrc);
-#endif
 		break;
 
 	/*
@@ -705,8 +706,6 @@
 		goto done;	/* Ip_output() will check for broadcast */
 	}
 
-	m_addr_changed(m);
-
 	t = ip->ip_dst;
 	ip->ip_dst = ip->ip_src;
 
@@ -815,7 +814,7 @@
 		 */
 		cp = (u_char *) (ip + 1);
 		if ((opts = ip_srcroute(m)) == 0 &&
-		    (opts = m_gethdr(M_DONTWAIT, MT_DATA))) {
+		    (opts = m_gethdr(M_NOWAIT, MT_DATA))) {
 			opts->m_len = sizeof(struct in_addr);
 			mtod(opts, struct in_addr *)->s_addr = 0;
 		}
@@ -863,19 +862,7 @@
 			    printf("%d\n", opts->m_len);
 #endif
 		}
-		/*
-		 * Now strip out original options by copying rest of first
-		 * mbuf's data back, and adjust the IP length.
-		 */
-		ip->ip_len -= optlen;
-		ip->ip_v = IPVERSION;
-		ip->ip_hl = 5;
-		m->m_len -= optlen;
-		if (m->m_flags & M_PKTHDR)
-			m->m_pkthdr.len -= optlen;
-		optlen += sizeof(struct ip);
-		bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
-			 (unsigned)(m->m_len - sizeof(struct ip)));
+		ip_stripoptions(m);
 	}
 	m_tag_delete_nonpersistent(m);
 	m->m_flags &= ~(M_BCAST|M_MCAST);
@@ -901,7 +888,7 @@
 	m->m_len -= hlen;
 	icp = mtod(m, struct icmp *);
 	icp->icmp_cksum = 0;
-	icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
+	icp->icmp_cksum = in_cksum(m, ntohs(ip->ip_len) - hlen);
 	m->m_data -= hlen;
 	m->m_len += hlen;
 	m->m_pkthdr.rcvif = (struct ifnet *)0;

Modified: trunk/sys/netinet/ip_icmp.h
===================================================================
--- trunk/sys/netinet/ip_icmp.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_icmp.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)ip_icmp.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet/ip_icmp.h 207369 2010-04-29 11:52:42Z bz $
+ * $FreeBSD: stable/10/sys/netinet/ip_icmp.h 207369 2010-04-29 11:52:42Z bz $
  */
 
 #ifndef _NETINET_IP_ICMP_H_

Modified: trunk/sys/netinet/ip_id.c
===================================================================
--- trunk/sys/netinet/ip_id.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_id.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/ip_id.c 185571 2008-12-02 21:37:28Z bz $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/ip_id.c 250300 2013-05-06 16:42:18Z andre $");
 
 /*
  * IP ID generation is a fascinating topic.

Modified: trunk/sys/netinet/ip_input.c
===================================================================
--- trunk/sys/netinet/ip_input.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_input.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,12 +31,13 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/ip_input.c 243586 2012-11-27 01:59:51Z ae $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/ip_input.c 300518 2016-05-23 16:20:50Z loos $");
 
 #include "opt_bootp.h"
 #include "opt_ipfw.h"
 #include "opt_ipstealth.h"
 #include "opt_ipsec.h"
+#include "opt_kdtrace.h"
 #include "opt_route.h"
 
 #include <sys/param.h>
@@ -50,6 +51,7 @@
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/rwlock.h>
+#include <sys/sdt.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
 
@@ -61,9 +63,9 @@
 #include <net/route.h>
 #include <net/netisr.h>
 #include <net/vnet.h>
-#include <net/flowtable.h>
 
 #include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
@@ -154,11 +156,6 @@
 VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table  */
 VNET_DEFINE(u_long, in_ifaddrhmask);		/* mask for hash table */
 
-VNET_DEFINE(struct ipstat, ipstat);
-SYSCTL_VNET_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW,
-    &VNET_NAME(ipstat), ipstat,
-    "IP statistics (struct ipstat, netinet/ip_var.h)");
-
 static VNET_DEFINE(uma_zone_t, ipq_zone);
 static VNET_DEFINE(TAILQ_HEAD(ipqhead, ipq), ipq[IPREASS_NHASH]);
 static struct mtx ipqlock;
@@ -201,30 +198,29 @@
     "IP stealth mode, no TTL decrementation on forwarding");
 #endif
 
-#ifdef FLOWTABLE
-static VNET_DEFINE(int, ip_output_flowtable_size) = 2048;
-VNET_DEFINE(struct flowtable *, ip_ft);
-#define	V_ip_output_flowtable_size	VNET(ip_output_flowtable_size)
+static void	ip_freef(struct ipqhead *, struct ipq *);
 
-SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, output_flowtable_size, CTLFLAG_RDTUN,
-    &VNET_NAME(ip_output_flowtable_size), 2048,
-    "number of entries in the per-cpu output flow caches");
-#endif
+/*
+ * IP statistics are stored in the "array" of counter(9)s.
+ */
+VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat);
+VNET_PCPUSTAT_SYSINIT(ipstat);
+SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat,
+    "IP statistics (struct ipstat, netinet/ip_var.h)");
 
-static void	ip_freef(struct ipqhead *, struct ipq *);
+#ifdef VIMAGE
+VNET_PCPUSTAT_SYSUNINIT(ipstat);
+#endif /* VIMAGE */
 
 /*
  * Kernel module interface for updating ipstat.  The argument is an index
- * into ipstat treated as an array of u_long.  While this encodes the general
- * layout of ipstat into the caller, it doesn't encode its location, so that
- * future changes to add, for example, per-CPU stats support won't cause
- * binary compatibility problems for kernel modules.
+ * into ipstat treated as an array.
  */
 void
 kmod_ipstat_inc(int statnum)
 {
 
-	(*((u_long *)&V_ipstat + statnum))++;
+	counter_u64_add(VNET(ipstat)[statnum], 1);
 }
 
 void
@@ -231,7 +227,7 @@
 kmod_ipstat_dec(int statnum)
 {
 
-	(*((u_long *)&V_ipstat + statnum))--;
+	counter_u64_add(VNET(ipstat)[statnum], -1);
 }
 
 static int
@@ -303,24 +299,6 @@
 		printf("%s: WARNING: unable to register pfil hook, "
 			"error %d\n", __func__, i);
 
-#ifdef FLOWTABLE
-	if (TUNABLE_INT_FETCH("net.inet.ip.output_flowtable_size",
-		&V_ip_output_flowtable_size)) {
-		if (V_ip_output_flowtable_size < 256)
-			V_ip_output_flowtable_size = 256;
-		if (!powerof2(V_ip_output_flowtable_size)) {
-			printf("flowtable must be power of 2 size\n");
-			V_ip_output_flowtable_size = 2048;
-		}
-	} else {
-		/*
-		 * round up to the next power of 2
-		 */
-		V_ip_output_flowtable_size = 1 << fls((1024 + maxusers * 64)-1);
-	}
-	V_ip_ft = flowtable_alloc("ipv4", V_ip_output_flowtable_size, FL_PCPU);
-#endif
-
 	/* Skip initialization of globals for non-default instances. */
 	if (!IS_DEFAULT_VNET(curvnet))
 		return;
@@ -357,7 +335,12 @@
 void
 ip_destroy(void)
 {
+	int i;
 
+	if ((i = pfil_head_unregister(&V_inet_pfil_hook)) != 0)
+		printf("%s: WARNING: unable to unregister pfil hook, "
+		    "error %d\n", __func__, i);
+
 	/* Cleanup in_ifaddr hash table; should be empty. */
 	hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask);
 
@@ -381,7 +364,7 @@
 	struct ifaddr *ifa;
 	struct ifnet *ifp;
 	int    checkif, hlen = 0;
-	u_short sum;
+	uint16_t sum, ip_len;
 	int dchg = 0;				/* dest changed after fw */
 	struct in_addr odst;			/* original dst address */
 
@@ -388,14 +371,11 @@
 	M_ASSERTPKTHDR(m);
 
 	if (m->m_flags & M_FASTFWD_OURS) {
-		/*
-		 * Firewall or NAT changed destination to local.
-		 * We expect ip_len and ip_off to be in host byte order.
-		 */
 		m->m_flags &= ~M_FASTFWD_OURS;
 		/* Set up some basics that will be used later. */
 		ip = mtod(m, struct ip *);
 		hlen = ip->ip_hl << 2;
+		ip_len = ntohs(ip->ip_len);
 		goto ours;
 	}
 
@@ -429,6 +409,8 @@
 		ip = mtod(m, struct ip *);
 	}
 
+	IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL);
+
 	/* 127/8 must not appear on wire - RFC1122 */
 	ifp = m->m_pkthdr.rcvif;
 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
@@ -459,15 +441,11 @@
 		return;
 #endif
 
-	/*
-	 * Convert fields to host representation.
-	 */
-	ip->ip_len = ntohs(ip->ip_len);
-	if (ip->ip_len < hlen) {
+	ip_len = ntohs(ip->ip_len);
+	if (ip_len < hlen) {
 		IPSTAT_INC(ips_badlen);
 		goto bad;
 	}
-	ip->ip_off = ntohs(ip->ip_off);
 
 	/*
 	 * Check that the amount of data in the buffers
@@ -475,17 +453,17 @@
 	 * Trim mbufs if longer than we expect.
 	 * Drop packet if shorter than we expect.
 	 */
-	if (m->m_pkthdr.len < ip->ip_len) {
+	if (m->m_pkthdr.len < ip_len) {
 tooshort:
 		IPSTAT_INC(ips_tooshort);
 		goto bad;
 	}
-	if (m->m_pkthdr.len > ip->ip_len) {
+	if (m->m_pkthdr.len > ip_len) {
 		if (m->m_len == m->m_pkthdr.len) {
-			m->m_len = ip->ip_len;
-			m->m_pkthdr.len = ip->ip_len;
+			m->m_len = ip_len;
+			m->m_pkthdr.len = ip_len;
 		} else
-			m_adj(m, ip->ip_len - m->m_pkthdr.len);
+			m_adj(m, ip_len - m->m_pkthdr.len);
 	}
 #ifdef IPSEC
 	/*
@@ -522,8 +500,7 @@
 		goto ours;
 	}
 	if (m->m_flags & M_IP_NEXTHOP) {
-		dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL);
-		if (dchg != 0) {
+		if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) {
 			/*
 			 * Directly ship the packet on.  This allows
 			 * forwarding packets originally destined to us
@@ -534,6 +511,7 @@
 		}
 	}
 passin:
+
 	/*
 	 * Process options and, if not destined for us,
 	 * ship it on.  ip_dooptions returns 1 when an
@@ -728,7 +706,8 @@
 	 * Attempt reassembly; if it succeeds, proceed.
 	 * ip_reass() will return a different mbuf.
 	 */
-	if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
+	if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
+		/* XXXGL: shouldn't we save & set m_flags? */
 		m = ip_reass(m);
 		if (m == NULL)
 			return;
@@ -737,12 +716,6 @@
 		hlen = ip->ip_hl << 2;
 	}
 
-	/*
-	 * Further protocols expect the packet length to be w/o the
-	 * IP header.
-	 */
-	ip->ip_len -= hlen;
-
 #ifdef IPSEC
 	/*
 	 * enforce IPsec policy checking if we are seeing last header.
@@ -827,6 +800,8 @@
     NULL, 0, sysctl_maxnipq, "I",
     "Maximum number of IPv4 fragment reassembly queue entries");
 
+#define	M_IP_FRAG	M_PROTO9
+
 /*
  * Take incoming datagram fragment and try to reassemble it into
  * whole datagram.  If the argument is the first fragment or one
@@ -910,28 +885,27 @@
 	 * Adjust ip_len to not reflect header,
 	 * convert offset of this to bytes.
 	 */
-	ip->ip_len -= hlen;
-	if (ip->ip_off & IP_MF) {
+	ip->ip_len = htons(ntohs(ip->ip_len) - hlen);
+	if (ip->ip_off & htons(IP_MF)) {
 		/*
 		 * Make sure that fragments have a data length
 		 * that's a non-zero multiple of 8 bytes.
 		 */
-		if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) {
+		if (ip->ip_len == htons(0) || (ntohs(ip->ip_len) & 0x7) != 0) {
 			IPSTAT_INC(ips_toosmall); /* XXX */
 			goto dropfrag;
 		}
-		m->m_flags |= M_FRAG;
+		m->m_flags |= M_IP_FRAG;
 	} else
-		m->m_flags &= ~M_FRAG;
-	ip->ip_off <<= 3;
+		m->m_flags &= ~M_IP_FRAG;
+	ip->ip_off = htons(ntohs(ip->ip_off) << 3);
 
-
 	/*
 	 * Attempt reassembly; if it succeeds, proceed.
 	 * ip_reass() will return a different mbuf.
 	 */
 	IPSTAT_INC(ips_fragments);
-	m->m_pkthdr.header = ip;
+	m->m_pkthdr.PH_loc.ptr = ip;
 
 	/* Previous ip_reass() started here. */
 	/*
@@ -974,7 +948,7 @@
 #endif
 	}
 
-#define GETIP(m)	((struct ip*)((m)->m_pkthdr.header))
+#define GETIP(m)	((struct ip*)((m)->m_pkthdr.PH_loc.ptr))
 
 	/*
 	 * Handle ECN by comparing this segment with the first one;
@@ -996,7 +970,7 @@
 	 * Find a segment which begins after this one does.
 	 */
 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
-		if (GETIP(q)->ip_off > ip->ip_off)
+		if (ntohs(GETIP(q)->ip_off) > ntohs(ip->ip_off))
 			break;
 
 	/*
@@ -1009,14 +983,15 @@
 	 * segment, then it's checksum is invalidated.
 	 */
 	if (p) {
-		i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off;
+		i = ntohs(GETIP(p)->ip_off) + ntohs(GETIP(p)->ip_len) -
+		    ntohs(ip->ip_off);
 		if (i > 0) {
-			if (i >= ip->ip_len)
+			if (i >= ntohs(ip->ip_len))
 				goto dropfrag;
 			m_adj(m, i);
 			m->m_pkthdr.csum_flags = 0;
-			ip->ip_off += i;
-			ip->ip_len -= i;
+			ip->ip_off = htons(ntohs(ip->ip_off) + i);
+			ip->ip_len = htons(ntohs(ip->ip_len) - i);
 		}
 		m->m_nextpkt = p->m_nextpkt;
 		p->m_nextpkt = m;
@@ -1029,12 +1004,13 @@
 	 * While we overlap succeeding segments trim them or,
 	 * if they are completely covered, dequeue them.
 	 */
-	for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off;
-	     q = nq) {
-		i = (ip->ip_off + ip->ip_len) - GETIP(q)->ip_off;
-		if (i < GETIP(q)->ip_len) {
-			GETIP(q)->ip_len -= i;
-			GETIP(q)->ip_off += i;
+	for (; q != NULL && ntohs(ip->ip_off) + ntohs(ip->ip_len) >
+	    ntohs(GETIP(q)->ip_off); q = nq) {
+		i = (ntohs(ip->ip_off) + ntohs(ip->ip_len)) -
+		    ntohs(GETIP(q)->ip_off);
+		if (i < ntohs(GETIP(q)->ip_len)) {
+			GETIP(q)->ip_len = htons(ntohs(GETIP(q)->ip_len) - i);
+			GETIP(q)->ip_off = htons(ntohs(GETIP(q)->ip_off) + i);
 			m_adj(q, i);
 			q->m_pkthdr.csum_flags = 0;
 			break;
@@ -1058,7 +1034,7 @@
 	 */
 	next = 0;
 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
-		if (GETIP(q)->ip_off != next) {
+		if (ntohs(GETIP(q)->ip_off) != next) {
 			if (fp->ipq_nfrags > V_maxfragsperpacket) {
 				IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags);
 				ip_freef(head, fp);
@@ -1065,10 +1041,10 @@
 			}
 			goto done;
 		}
-		next += GETIP(q)->ip_len;
+		next += ntohs(GETIP(q)->ip_len);
 	}
 	/* Make sure the last packet didn't have the IP_MF flag */
-	if (p->m_flags & M_FRAG) {
+	if (p->m_flags & M_IP_FRAG) {
 		if (fp->ipq_nfrags > V_maxfragsperpacket) {
 			IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags);
 			ip_freef(head, fp);
@@ -1109,8 +1085,9 @@
 	 * (and not in for{} loop), though it implies we are not going to
 	 * reassemble more than 64k fragments.
 	 */
-	m->m_pkthdr.csum_data =
-	    (m->m_pkthdr.csum_data & 0xffff) + (m->m_pkthdr.csum_data >> 16);
+	while (m->m_pkthdr.csum_data & 0xffff0000)
+		m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
+		    (m->m_pkthdr.csum_data >> 16);
 #ifdef MAC
 	mac_ipq_reassemble(fp, m);
 	mac_ipq_destroy(fp);
@@ -1121,7 +1098,7 @@
 	 * packet;  dequeue and discard fragment reassembly header.
 	 * Make header visible.
 	 */
-	ip->ip_len = (ip->ip_hl << 2) + next;
+	ip->ip_len = htons((ip->ip_hl << 2) + next);
 	ip->ip_src = fp->ipq_src;
 	ip->ip_dst = fp->ipq_dst;
 	TAILQ_REMOVE(head, fp, ipq_list);
@@ -1368,6 +1345,7 @@
 	struct ip *ip = mtod(m, struct ip *);
 	struct in_ifaddr *ia;
 	struct mbuf *mcopy;
+	struct sockaddr_in *sin;
 	struct in_addr dest;
 	struct route ro;
 	int error, type = 0, code = 0, mtu = 0;
@@ -1389,7 +1367,23 @@
 	}
 #endif
 
-	ia = ip_rtaddr(ip->ip_dst, M_GETFIB(m));
+	bzero(&ro, sizeof(ro));
+	sin = (struct sockaddr_in *)&ro.ro_dst;
+	sin->sin_family = AF_INET;
+	sin->sin_len = sizeof(*sin);
+	sin->sin_addr = ip->ip_dst;
+#ifdef RADIX_MPATH
+	rtalloc_mpath_fib(&ro,
+	    ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
+	    M_GETFIB(m));
+#else
+	in_rtalloc_ign(&ro, 0, M_GETFIB(m));
+#endif
+	if (ro.ro_rt != NULL) {
+		ia = ifatoia(ro.ro_rt->rt_ifa);
+		ifa_ref(&ia->ia_ifa);
+	} else
+		ia = NULL;
 #ifndef IPSEC
 	/*
 	 * 'ia' may be NULL if there is no route for this destination.
@@ -1398,6 +1392,7 @@
 	 */
 	if (!srcrt && ia == NULL) {
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
+		RO_RTFREE(&ro);
 		return;
 	}
 #endif
@@ -1418,8 +1413,8 @@
 	 * assume exclusive access to the IP header in `m', so any
 	 * data in a cluster may change before we reach icmp_error().
 	 */
-	MGETHDR(mcopy, M_DONTWAIT, m->m_type);
-	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_DONTWAIT)) {
+	mcopy = m_gethdr(M_NOWAIT, m->m_type);
+	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) {
 		/*
 		 * It's probably ok if the pkthdr dup fails (because
 		 * the deep copy of the tag chain failed), but for now
@@ -1430,7 +1425,7 @@
 		mcopy = NULL;
 	}
 	if (mcopy != NULL) {
-		mcopy->m_len = min(ip->ip_len, M_TRAILINGSPACE(mcopy));
+		mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy));
 		mcopy->m_pkthdr.len = mcopy->m_len;
 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
 	}
@@ -1454,16 +1449,8 @@
 	dest.s_addr = 0;
 	if (!srcrt && V_ipsendredirects &&
 	    ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) {
-		struct sockaddr_in *sin;
 		struct rtentry *rt;
 
-		bzero(&ro, sizeof(ro));
-		sin = (struct sockaddr_in *)&ro.ro_dst;
-		sin->sin_family = AF_INET;
-		sin->sin_len = sizeof(*sin);
-		sin->sin_addr = ip->ip_dst;
-		in_rtalloc_ign(&ro, 0, M_GETFIB(m));
-
 		rt = ro.ro_rt;
 
 		if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
@@ -1482,20 +1469,12 @@
 				code = ICMP_REDIRECT_HOST;
 			}
 		}
-		if (rt)
-			RTFREE(rt);
 	}
 
-	/*
-	 * Try to cache the route MTU from ip_output so we can consider it for
-	 * the ICMP_UNREACH_NEEDFRAG "Next-Hop MTU" field described in RFC1191.
-	 */
-	bzero(&ro, sizeof(ro));
-
 	error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL);
 
 	if (error == EMSGSIZE && ro.ro_rt)
-		mtu = ro.ro_rt->rt_rmx.rmx_mtu;
+		mtu = ro.ro_rt->rt_mtu;
 	RO_RTFREE(&ro);
 
 	if (error)
@@ -1558,7 +1537,7 @@
 			if (ia != NULL)
 				mtu = ia->ia_ifp->if_mtu;
 			else
-				mtu = ip_next_mtu(ip->ip_len, 0);
+				mtu = ip_next_mtu(ntohs(ip->ip_len), 0);
 		}
 		IPSTAT_INC(ips_cantfrag);
 		break;
@@ -1604,8 +1583,8 @@
 
 		bintime(&bt);
 		if (inp->inp_socket->so_options & SO_BINTIME) {
-			*mp = sbcreatecontrol((caddr_t) &bt, sizeof(bt),
-			SCM_BINTIME, SOL_SOCKET);
+			*mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt),
+			    SCM_BINTIME, SOL_SOCKET);
 			if (*mp)
 				mp = &(*mp)->m_next;
 		}
@@ -1613,20 +1592,20 @@
 			struct timeval tv;
 
 			bintime2timeval(&bt, &tv);
-			*mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
-				SCM_TIMESTAMP, SOL_SOCKET);
+			*mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv),
+			    SCM_TIMESTAMP, SOL_SOCKET);
 			if (*mp)
 				mp = &(*mp)->m_next;
 		}
 	}
 	if (inp->inp_flags & INP_RECVDSTADDR) {
-		*mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
+		*mp = sbcreatecontrol((caddr_t)&ip->ip_dst,
 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 	if (inp->inp_flags & INP_RECVTTL) {
-		*mp = sbcreatecontrol((caddr_t) &ip->ip_ttl,
+		*mp = sbcreatecontrol((caddr_t)&ip->ip_ttl,
 		    sizeof(u_char), IP_RECVTTL, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
@@ -1638,7 +1617,7 @@
 	 */
 	/* options were tossed already */
 	if (inp->inp_flags & INP_RECVOPTS) {
-		*mp = sbcreatecontrol((caddr_t) opts_deleted_above,
+		*mp = sbcreatecontrol((caddr_t)opts_deleted_above,
 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
@@ -1645,7 +1624,7 @@
 	}
 	/* ip_srcroute doesn't do what we want here, need to fix */
 	if (inp->inp_flags & INP_RECVRETOPTS) {
-		*mp = sbcreatecontrol((caddr_t) ip_srcroute(m),
+		*mp = sbcreatecontrol((caddr_t)ip_srcroute(m),
 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
@@ -1660,32 +1639,32 @@
 		struct sockaddr_dl *sdp;
 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
 
-		if (((ifp = m->m_pkthdr.rcvif)) 
-		&& ( ifp->if_index && (ifp->if_index <= V_if_index))) {
+		if ((ifp = m->m_pkthdr.rcvif) &&
+		    ifp->if_index && ifp->if_index <= V_if_index) {
 			sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr;
 			/*
 			 * Change our mind and don't try copy.
 			 */
-			if ((sdp->sdl_family != AF_LINK)
-			|| (sdp->sdl_len > sizeof(sdlbuf))) {
+			if (sdp->sdl_family != AF_LINK ||
+			    sdp->sdl_len > sizeof(sdlbuf)) {
 				goto makedummy;
 			}
 			bcopy(sdp, sdl2, sdp->sdl_len);
 		} else {
 makedummy:	
-			sdl2->sdl_len
-				= offsetof(struct sockaddr_dl, sdl_data[0]);
+			sdl2->sdl_len =
+			    offsetof(struct sockaddr_dl, sdl_data[0]);
 			sdl2->sdl_family = AF_LINK;
 			sdl2->sdl_index = 0;
 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
 		}
-		*mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len,
-			IP_RECVIF, IPPROTO_IP);
+		*mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len,
+		    IP_RECVIF, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 	if (inp->inp_flags & INP_RECVTOS) {
-		*mp = sbcreatecontrol((caddr_t) &ip->ip_tos,
+		*mp = sbcreatecontrol((caddr_t)&ip->ip_tos,
 		    sizeof(u_char), IP_RECVTOS, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;

Modified: trunk/sys/netinet/ip_ipsec.c
===================================================================
--- trunk/sys/netinet/ip_ipsec.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_ipsec.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -29,7 +29,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/ip_ipsec.c 222845 2011-06-08 03:02:11Z bz $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/ip_ipsec.c 291355 2015-11-26 02:24:45Z gnn $");
 
 #include "opt_ipsec.h"
 #include "opt_sctp.h"
@@ -46,7 +46,6 @@
 #include <sys/sysctl.h>
 
 #include <net/if.h>
-#include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
@@ -93,7 +92,7 @@
 int
 ip_ipsec_filtertunnel(struct mbuf *m)
 {
-#if defined(IPSEC)
+#ifdef IPSEC
 
 	/*
 	 * Bypass packet filtering for packets previously handled by IPsec.
@@ -118,10 +117,9 @@
 	struct m_tag *mtag;
 	struct tdb_ident *tdbi;
 	struct secpolicy *sp;
-	int s, error;
+	int error;
 
 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
-	s = splnet();
 	if (mtag != NULL) {
 		tdbi = (struct tdb_ident *)(mtag + 1);
 		sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
@@ -130,7 +128,6 @@
 					   IP_FORWARDING, &error);   
 	}
 	if (sp == NULL) {	/* NB: can happen if error */
-		splx(s);
 		/*XXX error stat???*/
 		DPRINTF(("ip_input: no SP for forwarding\n"));	/*XXX*/
 		return 1;
@@ -141,7 +138,6 @@
 	 */
 	error = ipsec_in_reject(sp, m);
 	KEY_FREESP(&sp);
-	splx(s);
 	if (error) {
 		IPSTAT_INC(ips_cantforward);
 		return 1;
@@ -165,7 +161,7 @@
 	struct m_tag *mtag;
 	struct tdb_ident *tdbi;
 	struct secpolicy *sp;
-	int s, error;
+	int error;
 	/*
 	 * enforce IPsec policy checking if we are seeing last header.
 	 * note that we do not visit this with protocols with pcb layer
@@ -179,7 +175,6 @@
 		 * packet is returned to the ip input queue for delivery.
 		 */ 
 		mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
-		s = splnet();
 		if (mtag != NULL) {
 			tdbi = (struct tdb_ident *)(mtag + 1);
 			sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
@@ -199,7 +194,6 @@
 			DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/
 			return 1;
 		}
-		splx(s);
 		if (error)
 			return 1;
 	}
@@ -221,37 +215,7 @@
 	 *	tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
 	 * XXX quickhack!!!
 	 */
-	struct secpolicy *sp = NULL;
-	int ipsecerror;
-	int ipsechdr;
-	struct route *ro;
-	sp = ipsec_getpolicybyaddr(m,
-				   IPSEC_DIR_OUTBOUND,
-				   IP_FORWARDING,
-				   &ipsecerror);
-	if (sp != NULL) {
-		/* count IPsec header size */
-		ipsechdr = ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, NULL);
-
-		/*
-		 * find the correct route for outer IPv4
-		 * header, compute tunnel MTU.
-		 */
-		if (sp->req != NULL &&
-		    sp->req->sav != NULL &&
-		    sp->req->sav->sah != NULL) {
-			ro = &sp->req->sav->sah->route_cache.sa_route;
-			if (ro->ro_rt && ro->ro_rt->rt_ifp) {
-				mtu =
-				    ro->ro_rt->rt_rmx.rmx_mtu ?
-				    ro->ro_rt->rt_rmx.rmx_mtu :
-				    ro->ro_rt->rt_ifp->if_mtu;
-				mtu -= ipsechdr;
-			}
-		}
-		KEY_FREESP(&sp);
-	}
-	return mtu;
+	return (mtu - ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, NULL));
 }
 
 /*
@@ -265,10 +229,12 @@
 {
 #ifdef IPSEC
 	struct secpolicy *sp = NULL;
-	struct ip *ip = mtod(*m, struct ip *);
 	struct tdb_ident *tdbi;
 	struct m_tag *mtag;
-	int s;
+
+	if (!key_havesp(IPSEC_DIR_OUTBOUND))
+		return 0;
+
 	/*
 	 * Check the security policy (SP) for the packet and, if
 	 * required, do IPsec-related processing.  There are two
@@ -279,7 +245,6 @@
 	 * the lookup and related policy checking.
 	 */
 	mtag = m_tag_find(*m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
-	s = splnet();
 	if (mtag != NULL) {
 		tdbi = (struct tdb_ident *)(mtag + 1);
 		sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND);
@@ -328,7 +293,6 @@
 				 *     done: below.
 				 */
 				KEY_FREESP(&sp), sp = NULL;
-				splx(s);
 				goto done;
 			}
 		}
@@ -343,12 +307,12 @@
 		}
 #ifdef SCTP
 		if ((*m)->m_pkthdr.csum_flags & CSUM_SCTP) {
+			struct ip *ip = mtod(*m, struct ip *);
+
 			sctp_delayed_cksum(*m, (uint32_t)(ip->ip_hl << 2));
 			(*m)->m_pkthdr.csum_flags &= ~CSUM_SCTP;
 		}
 #endif
-		ip->ip_len = htons(ip->ip_len);
-		ip->ip_off = htons(ip->ip_off);
 
 		/* NB: callee frees mbuf */
 		*error = ipsec4_process_packet(*m, sp->req, *flags, 0);
@@ -359,8 +323,6 @@
 			 * IPsec processing and return without error.
 			 */
 			*error = 0;
-			ip->ip_len = ntohs(ip->ip_len);
-			ip->ip_off = ntohs(ip->ip_off);
 			goto done;
 		}
 		/*
@@ -372,10 +334,8 @@
 		 */
 		if (*error == ENOENT)
 			*error = 0;
-		splx(s);
 		goto reinjected;
 	} else {	/* sp == NULL */
-		splx(s);
 
 		if (*error != 0) {
 			/*

Modified: trunk/sys/netinet/ip_ipsec.h
===================================================================
--- trunk/sys/netinet/ip_ipsec.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_ipsec.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -27,7 +27,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/netinet/ip_ipsec.h 199102 2009-11-09 19:53:34Z trasz $
+ * $FreeBSD: stable/10/sys/netinet/ip_ipsec.h 199102 2009-11-09 19:53:34Z trasz $
  */
 
 #ifndef _NETINET_IP_IPSEC_H_

Modified: trunk/sys/netinet/ip_mroute.c
===================================================================
--- trunk/sys/netinet/ip_mroute.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_mroute.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -68,7 +68,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/ip_mroute.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/ip_mroute.c 314667 2017-03-04 13:03:31Z avg $");
 
 #include "opt_inet.h"
 #include "opt_mrouting.h"
@@ -94,6 +94,7 @@
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/time.h>
+#include <sys/counter.h>
 
 #include <net/if.h>
 #include <net/netisr.h>
@@ -146,11 +147,11 @@
 static int ip_mrouter_cnt;	/* # of vnets with active mrouters */
 static int ip_mrouter_unloading; /* Allow no more V_ip_mrouter sockets */
 
-static VNET_DEFINE(struct mrtstat, mrtstat);
-#define	V_mrtstat		VNET(mrtstat)
-SYSCTL_VNET_STRUCT(_net_inet_ip, OID_AUTO, mrtstat, CTLFLAG_RW,
-    &VNET_NAME(mrtstat), mrtstat,
-    "IPv4 Multicast Forwarding Statistics (struct mrtstat, "
+static VNET_PCPUSTAT_DEFINE(struct mrtstat, mrtstat);
+VNET_PCPUSTAT_SYSINIT(mrtstat);
+VNET_PCPUSTAT_SYSUNINIT(mrtstat);
+SYSCTL_VNET_PCPUSTAT(_net_inet_ip, OID_AUTO, mrtstat, struct mrtstat,
+    mrtstat, "IPv4 Multicast Forwarding Statistics (struct mrtstat, "
     "netinet/ip_mroute.h)");
 
 static VNET_DEFINE(u_long, mfchash);
@@ -226,13 +227,13 @@
 
 #define BW_UPCALLS_PERIOD (hz)		/* periodical flush of bw upcalls */
 
-static VNET_DEFINE(struct pimstat, pimstat);
-#define	V_pimstat		VNET(pimstat)
+static VNET_PCPUSTAT_DEFINE(struct pimstat, pimstat);
+VNET_PCPUSTAT_SYSINIT(pimstat);
+VNET_PCPUSTAT_SYSUNINIT(pimstat);
 
 SYSCTL_NODE(_net_inet, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM");
-SYSCTL_VNET_STRUCT(_net_inet_pim, PIMCTL_STATS, stats, CTLFLAG_RD,
-    &VNET_NAME(pimstat), pimstat,
-    "PIM Statistics (struct pimstat, netinet/pim_var.h)");
+SYSCTL_VNET_PCPUSTAT(_net_inet_pim, PIMCTL_STATS, stats, struct pimstat,
+    pimstat, "PIM Statistics (struct pimstat, netinet/pim_var.h)");
 
 static u_long	pim_squelch_wholepkt = 0;
 SYSCTL_ULONG(_net_inet_pim, OID_AUTO, squelch_wholepkt, CTLFLAG_RW,
@@ -609,7 +610,7 @@
 if_detached_event(void *arg __unused, struct ifnet *ifp)
 {
     vifi_t vifi;
-    int i;
+    u_long i;
 
     MROUTER_LOCK();
 
@@ -634,8 +635,8 @@
 		continue;
 	for (i = 0; i < mfchashsize; i++) {
 		struct mfc *rt, *nrt;
-		for (rt = LIST_FIRST(&V_mfchashtbl[i]); rt; rt = nrt) {
-			nrt = LIST_NEXT(rt, mfc_hash);
+
+		LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) {
 			if (rt->mfc_parent == vifi) {
 				expire_mfc(rt);
 			}
@@ -704,10 +705,9 @@
 static int
 X_ip_mrouter_done(void)
 {
+    struct ifnet *ifp;
+    u_long i;
     vifi_t vifi;
-    int i;
-    struct ifnet *ifp;
-    struct ifreq ifr;
 
     MROUTER_LOCK();
 
@@ -732,11 +732,6 @@
     for (vifi = 0; vifi < V_numvifs; vifi++) {
 	if (!in_nullhost(V_viftable[vifi].v_lcl_addr) &&
 		!(V_viftable[vifi].v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) {
-	    struct sockaddr_in *so = (struct sockaddr_in *)&(ifr.ifr_addr);
-
-	    so->sin_len = sizeof(struct sockaddr_in);
-	    so->sin_family = AF_INET;
-	    so->sin_addr.s_addr = INADDR_ANY;
 	    ifp = V_viftable[vifi].v_ifp;
 	    if_allmulti(ifp, 0);
 	}
@@ -759,8 +754,8 @@
      */
     for (i = 0; i < mfchashsize; i++) {
 	struct mfc *rt, *nrt;
-	for (rt = LIST_FIRST(&V_mfchashtbl[i]); rt; rt = nrt) {
-		nrt = LIST_NEXT(rt, mfc_hash);
+
+	LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) {
 		expire_mfc(rt);
 	}
     }
@@ -803,7 +798,7 @@
 int
 set_api_config(uint32_t *apival)
 {
-    int i;
+    u_long i;
 
     /*
      * We can set the API capabilities only if it is the first operation
@@ -825,6 +820,7 @@
 
     for (i = 0; i < mfchashsize; i++) {
 	if (LIST_FIRST(&V_mfchashtbl[i]) != NULL) {
+	    MFC_UNLOCK();
 	    *apival = 0;
 	    return EPERM;
 	}
@@ -1307,7 +1303,7 @@
 	    return ENOBUFS;
 	}
 
-	mb0 = m_copypacket(m, M_DONTWAIT);
+	mb0 = m_copypacket(m, M_NOWAIT);
 	if (mb0 && (M_HASCL(mb0) || mb0->m_len < hlen))
 	    mb0 = m_pullup(mb0, hlen);
 	if (mb0 == NULL) {
@@ -1438,7 +1434,7 @@
 static void
 expire_upcalls(void *arg)
 {
-    int i;
+    u_long i;
 
     CURVNET_SET((struct vnet *) arg);
 
@@ -1450,9 +1446,7 @@
 	if (V_nexpire[i] == 0)
 	    continue;
 
-	for (rt = LIST_FIRST(&V_mfchashtbl[i]); rt; rt = nrt) {
-		nrt = LIST_NEXT(rt, mfc_hash);
-
+	LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) {
 		if (TAILQ_EMPTY(&rt->mfc_stall))
 			continue;
 
@@ -1494,7 +1488,7 @@
 {
     struct ip  *ip = mtod(m, struct ip *);
     vifi_t vifi;
-    int plen = ip->ip_len;
+    int plen = ntohs(ip->ip_len);
 
     VIF_LOCK_ASSERT();
 
@@ -1670,7 +1664,7 @@
      * the IP header is actually copied, not just referenced,
      * so that ip_output() only scribbles on the copy.
      */
-    mb_copy = m_copypacket(m, M_DONTWAIT);
+    mb_copy = m_copypacket(m, M_NOWAIT);
     if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen))
 	mb_copy = m_pullup(mb_copy, hlen);
     if (mb_copy == NULL)
@@ -2084,13 +2078,12 @@
      * Allocate a new mbuf, initialize it with the header and
      * the payload for the pending calls.
      */
-    MGETHDR(m, M_DONTWAIT, MT_DATA);
+    m = m_gethdr(M_NOWAIT, MT_DATA);
     if (m == NULL) {
 	log(LOG_WARNING, "bw_upcalls_send: cannot allocate mbuf\n");
 	return;
     }
 
-    m->m_len = m->m_pkthdr.len = 0;
     m_copyback(m, 0, sizeof(struct igmpmsg), (caddr_t)&igmpmsg);
     m_copyback(m, sizeof(struct igmpmsg), len, (caddr_t)&V_bw_upcalls[0]);
 
@@ -2385,7 +2378,7 @@
      * Copy the old packet & pullup its IP header into the
      * new mbuf so we can modify it.
      */
-    mb_copy = m_copypacket(m, M_DONTWAIT);
+    mb_copy = m_copypacket(m, M_NOWAIT);
     if (mb_copy == NULL)
 	return NULL;
     mb_copy = m_pullup(mb_copy, ip->ip_hl << 2);
@@ -2399,15 +2392,14 @@
     /* Compute the MTU after the PIM Register encapsulation */
     mtu = 0xffff - sizeof(pim_encap_iphdr) - sizeof(pim_encap_pimhdr);
 
-    if (ip->ip_len <= mtu) {
+    if (ntohs(ip->ip_len) <= mtu) {
 	/* Turn the IP header into a valid one */
-	ip->ip_len = htons(ip->ip_len);
-	ip->ip_off = htons(ip->ip_off);
 	ip->ip_sum = 0;
 	ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
     } else {
 	/* Fragment the packet */
-	if (ip_fragment(ip, &mb_copy, mtu, 0, CSUM_DELAY_IP) != 0) {
+	mb_copy->m_pkthdr.csum_flags |= CSUM_IP;
+	if (ip_fragment(ip, &mb_copy, mtu, 0) != 0) {
 	    m_freem(mb_copy);
 	    return NULL;
 	}
@@ -2432,7 +2424,7 @@
     /*
      * Add a new mbuf with an upcall header
      */
-    MGETHDR(mb_first, M_DONTWAIT, MT_DATA);
+    mb_first = m_gethdr(M_NOWAIT, MT_DATA);
     if (mb_first == NULL) {
 	m_freem(mb_copy);
 	return ENOBUFS;
@@ -2490,7 +2482,7 @@
     /*
      * Add a new mbuf with the encapsulating header
      */
-    MGETHDR(mb_first, M_DONTWAIT, MT_DATA);
+    mb_first = m_gethdr(M_NOWAIT, MT_DATA);
     if (mb_first == NULL) {
 	m_freem(mb_copy);
 	return ENOBUFS;
@@ -2507,7 +2499,8 @@
     ip_outer = mtod(mb_first, struct ip *);
     *ip_outer = pim_encap_iphdr;
     ip_outer->ip_id = ip_newid();
-    ip_outer->ip_len = len + sizeof(pim_encap_iphdr) + sizeof(pim_encap_pimhdr);
+    ip_outer->ip_len = htons(len + sizeof(pim_encap_iphdr) +
+	sizeof(pim_encap_pimhdr));
     ip_outer->ip_src = V_viftable[vifi].v_lcl_addr;
     ip_outer->ip_dst = rt->mfc_rp;
     /*
@@ -2515,8 +2508,8 @@
      * IP_DF bit.
      */
     ip_outer->ip_tos = ip->ip_tos;
-    if (ntohs(ip->ip_off) & IP_DF)
-	ip_outer->ip_off |= IP_DF;
+    if (ip->ip_off & htons(IP_DF))
+	ip_outer->ip_off |= htons(IP_DF);
     pimhdr = (struct pim_encap_pimhdr *)((caddr_t)ip_outer
 					 + sizeof(pim_encap_iphdr));
     *pimhdr = pim_encap_pimhdr;
@@ -2564,14 +2557,13 @@
  * is passed to if_simloop().
  */
 void
-pim_input(struct mbuf *m, int off)
+pim_input(struct mbuf *m, int iphlen)
 {
     struct ip *ip = mtod(m, struct ip *);
     struct pim *pim;
     int minlen;
-    int datalen = ip->ip_len;
+    int datalen = ntohs(ip->ip_len) - iphlen;
     int ip_tos;
-    int iphlen = off;
 
     /* Keep statistics */
     PIMSTAT_INC(pims_rcv_total_msgs);
@@ -2601,8 +2593,7 @@
      * Get the IP and PIM headers in contiguous memory, and
      * possibly the PIM REGISTER header.
      */
-    if ((m->m_flags & M_EXT || m->m_len < minlen) &&
-	(m = m_pullup(m, minlen)) == 0) {
+    if (m->m_len < minlen && (m = m_pullup(m, minlen)) == 0) {
 	CTR1(KTR_IPMF, "%s: m_pullup() failed", __func__);
 	return;
     }
@@ -2817,9 +2808,9 @@
 
 	MALLOC(V_nexpire, u_char *, mfchashsize, M_MRTABLE, M_WAITOK|M_ZERO);
 	bzero(V_bw_meter_timers, sizeof(V_bw_meter_timers));
-	callout_init(&V_expire_upcalls_ch, CALLOUT_MPSAFE);
-	callout_init(&V_bw_upcalls_ch, CALLOUT_MPSAFE);
-	callout_init(&V_bw_meter_ch, CALLOUT_MPSAFE);
+	callout_init(&V_expire_upcalls_ch, 1);
+	callout_init(&V_bw_upcalls_ch, 1);
+	callout_init(&V_bw_meter_ch, 1);
 }
 
 VNET_SYSINIT(vnet_mroute_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_mroute_init,
@@ -2847,7 +2838,8 @@
 	if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, 
 	    if_detached_event, NULL, EVENTHANDLER_PRI_ANY);
 	if (if_detach_event_tag == NULL) {
-		printf("ip_mroute: unable to ifnet_deperture_even handler\n");
+		printf("ip_mroute: unable to register "
+		    "ifnet_departure_event handler\n");
 		MROUTER_LOCK_DESTROY();
 		return (EINVAL);
 	}

Modified: trunk/sys/netinet/ip_mroute.h
===================================================================
--- trunk/sys/netinet/ip_mroute.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_mroute.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)ip_mroute.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet/ip_mroute.h 240305 2012-09-10 11:38:02Z glebius $
+ * $FreeBSD: stable/10/sys/netinet/ip_mroute.h 253084 2013-07-09 09:50:15Z ae $
  */
 
 #ifndef _NETINET_IP_MROUTE_H_
@@ -207,23 +207,24 @@
  * The kernel's multicast routing statistics.
  */
 struct mrtstat {
-    u_long	mrts_mfc_lookups;	/* # forw. cache hash table hits   */
-    u_long	mrts_mfc_misses;	/* # forw. cache hash table misses */
-    u_long	mrts_upcalls;		/* # calls to multicast routing daemon */
-    u_long	mrts_no_route;		/* no route for packet's origin    */
-    u_long	mrts_bad_tunnel;	/* malformed tunnel options        */
-    u_long	mrts_cant_tunnel;	/* no room for tunnel options      */
-    u_long	mrts_wrong_if;		/* arrived on wrong interface	   */
-    u_long	mrts_upq_ovflw;		/* upcall Q overflow		   */
-    u_long	mrts_cache_cleanups;	/* # entries with no upcalls	   */
-    u_long	mrts_drop_sel;		/* pkts dropped selectively        */
-    u_long	mrts_q_overflow;	/* pkts dropped - Q overflow       */
-    u_long	mrts_pkt2large;		/* pkts dropped - size > BKT SIZE  */
-    u_long	mrts_upq_sockfull;	/* upcalls dropped - socket full */
+	uint64_t mrts_mfc_lookups;    /* # forw. cache hash table hits   */
+	uint64_t mrts_mfc_misses;     /* # forw. cache hash table misses */
+	uint64_t mrts_upcalls;	      /* # calls to multicast routing daemon */
+	uint64_t mrts_no_route;	      /* no route for packet's origin    */
+	uint64_t mrts_bad_tunnel;     /* malformed tunnel options        */
+	uint64_t mrts_cant_tunnel;    /* no room for tunnel options      */
+	uint64_t mrts_wrong_if;	      /* arrived on wrong interface	 */
+	uint64_t mrts_upq_ovflw;      /* upcall Q overflow		 */
+	uint64_t mrts_cache_cleanups; /* # entries with no upcalls	 */
+	uint64_t mrts_drop_sel;	      /* pkts dropped selectively        */
+	uint64_t mrts_q_overflow;     /* pkts dropped - Q overflow       */
+	uint64_t mrts_pkt2large;      /* pkts dropped - size > BKT SIZE  */
+	uint64_t mrts_upq_sockfull;   /* upcalls dropped - socket full   */
 };
 
 #ifdef _KERNEL
-#define	MRTSTAT_ADD(name, val)	V_mrtstat.name += (val)
+#define	MRTSTAT_ADD(name, val)	\
+    VNET_PCPUSTAT_ADD(struct mrtstat, mrtstat, name, (val))
 #define	MRTSTAT_INC(name)	MRTSTAT_ADD(name, 1)
 #endif
 

Modified: trunk/sys/netinet/ip_options.c
===================================================================
--- trunk/sys/netinet/ip_options.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_options.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/ip_options.c 213832 2010-10-14 12:32:49Z bz $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/ip_options.c 272869 2014-10-09 23:46:17Z hrs $");
 
 #include "opt_ipstealth.h"
 
@@ -66,18 +66,21 @@
 
 #include <sys/socketvar.h>
 
-static int	ip_dosourceroute = 0;
-SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
-    &ip_dosourceroute, 0, "Enable forwarding source routed IP packets");
+static VNET_DEFINE(int, ip_dosourceroute);
+SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute,
+    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_dosourceroute), 0,
+    "Enable forwarding source routed IP packets");
+#define	V_ip_dosourceroute	VNET(ip_dosourceroute)
 
-static int	ip_acceptsourceroute = 0;
+static VNET_DEFINE(int,	ip_acceptsourceroute);
 SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute, 
-    CTLFLAG_RW, &ip_acceptsourceroute, 0, 
+    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_acceptsourceroute), 0, 
     "Enable accepting source routed IP packets");
+#define	V_ip_acceptsourceroute	VNET(ip_acceptsourceroute)
 
-int		ip_doopts = 1;	/* 0 = ignore, 1 = process, 2 = reject */
-SYSCTL_INT(_net_inet_ip, OID_AUTO, process_options, CTLFLAG_RW,
-    &ip_doopts, 0, "Enable IP options processing ([LS]SRR, RR, TS)");
+VNET_DEFINE(int, ip_doopts) = 1; /* 0 = ignore, 1 = process, 2 = reject */
+SYSCTL_INT(_net_inet_ip, OID_AUTO, process_options, CTLFLAG_VNET | CTLFLAG_RW,
+    &VNET_NAME(ip_doopts), 0, "Enable IP options processing ([LS]SRR, RR, TS)");
 
 static void	save_rte(struct mbuf *m, u_char *, struct in_addr);
 
@@ -105,9 +108,9 @@
 	struct	sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
 
 	/* Ignore or reject packets with IP options. */
-	if (ip_doopts == 0)
+	if (V_ip_doopts == 0)
 		return 0;
-	else if (ip_doopts == 2) {
+	else if (V_ip_doopts == 2) {
 		type = ICMP_UNREACH;
 		code = ICMP_UNREACH_FILTER_PROHIB;
 		goto bad;
@@ -168,7 +171,7 @@
 					code = ICMP_UNREACH_SRCFAIL;
 					goto bad;
 				}
-				if (!ip_dosourceroute)
+				if (!V_ip_dosourceroute)
 					goto nosourcerouting;
 				/*
 				 * Loose routing, and not at next destination
@@ -181,7 +184,7 @@
 				/*
 				 * End of source route.  Should be for us.
 				 */
-				if (!ip_acceptsourceroute)
+				if (!V_ip_acceptsourceroute)
 					goto nosourcerouting;
 				save_rte(m, cp, ip->ip_src);
 				break;
@@ -190,7 +193,7 @@
 			if (V_ipstealth)
 				goto dropit;
 #endif
-			if (!ip_dosourceroute) {
+			if (!V_ip_dosourceroute) {
 				if (V_ipforwarding) {
 					char buf[16]; /* aaa.bbb.ccc.ddd\0 */
 					/*
@@ -412,7 +415,7 @@
 
 	if (opts->ip_nhops == 0)
 		return (NULL);
-	m = m_get(M_DONTWAIT, MT_DATA);
+	m = m_get(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (NULL);
 
@@ -454,29 +457,23 @@
 }
 
 /*
- * Strip out IP options, at higher level protocol in the kernel.  Second
- * argument is buffer to which options will be moved, and return value is
- * their length.
- *
- * XXX should be deleted; last arg currently ignored.
+ * Strip out IP options, at higher level protocol in the kernel.
  */
 void
-ip_stripoptions(struct mbuf *m, struct mbuf *mopt)
+ip_stripoptions(struct mbuf *m)
 {
-	int i;
 	struct ip *ip = mtod(m, struct ip *);
-	caddr_t opts;
 	int olen;
 
-	olen = (ip->ip_hl << 2) - sizeof (struct ip);
-	opts = (caddr_t)(ip + 1);
-	i = m->m_len - (sizeof (struct ip) + olen);
-	bcopy(opts + olen, opts, (unsigned)i);
+	olen = (ip->ip_hl << 2) - sizeof(struct ip);
 	m->m_len -= olen;
 	if (m->m_flags & M_PKTHDR)
 		m->m_pkthdr.len -= olen;
-	ip->ip_v = IPVERSION;
+	ip->ip_len = htons(ntohs(ip->ip_len) - olen);
 	ip->ip_hl = sizeof(struct ip) >> 2;
+
+	bcopy((char *)ip + sizeof(struct ip) + olen, (ip + 1),
+	    (size_t )(m->m_len - sizeof(struct ip)));
 }
 
 /*
@@ -495,7 +492,7 @@
 	unsigned optlen;
 
 	optlen = opt->m_len - sizeof(p->ipopt_dst);
-	if (optlen + ip->ip_len > IP_MAXPACKET) {
+	if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET) {
 		*phlen = 0;
 		return (m);		/* XXX should fail */
 	}
@@ -502,12 +499,12 @@
 	if (p->ipopt_dst.s_addr)
 		ip->ip_dst = p->ipopt_dst;
 	if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
-		MGETHDR(n, M_DONTWAIT, MT_DATA);
+		n = m_gethdr(M_NOWAIT, MT_DATA);
 		if (n == NULL) {
 			*phlen = 0;
 			return (m);
 		}
-		M_MOVE_PKTHDR(n, m);
+		m_move_pkthdr(n, m);
 		n->m_pkthdr.rcvif = NULL;
 		n->m_pkthdr.len += optlen;
 		m->m_len -= sizeof(struct ip);
@@ -528,7 +525,7 @@
 	*phlen = sizeof(struct ip) + optlen;
 	ip->ip_v = IPVERSION;
 	ip->ip_hl = *phlen >> 2;
-	ip->ip_len += optlen;
+	ip->ip_len = htons(ntohs(ip->ip_len) + optlen);
 	return (m);
 }
 

Modified: trunk/sys/netinet/ip_options.h
===================================================================
--- trunk/sys/netinet/ip_options.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_options.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -29,7 +29,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/netinet/ip_options.h 189343 2009-03-04 02:51:22Z bms $
+ * $FreeBSD: stable/10/sys/netinet/ip_options.h 272868 2014-10-09 23:45:26Z hrs $
  */
 
 #ifndef _NETINET_IP_OPTIONS_H_
@@ -48,7 +48,8 @@
 	struct	ipoptrt ip_srcrt;
 };
 
-extern	int	ip_doopts;		/* process or ignore IP options */
+VNET_DECLARE(int, ip_doopts);		/* process or ignore IP options */
+#define	V_ip_doopts	VNET(ip_doopts)
 
 int		 ip_checkrouteralert(struct mbuf *);
 int		 ip_dooptions(struct mbuf *, int);
@@ -55,7 +56,7 @@
 struct mbuf	*ip_insertoptions(struct mbuf *, struct mbuf *, int *);
 int		 ip_optcopy(struct ip *, struct ip *);
 int		 ip_pcbopts(struct inpcb *, int, struct mbuf *);
-void		 ip_stripoptions(struct mbuf *, struct mbuf *);
+void		 ip_stripoptions(struct mbuf *);
 struct mbuf	*ip_srcroute(struct mbuf *);
 
 #endif /* !_NETINET_IP_OPTIONS_H_ */

Modified: trunk/sys/netinet/ip_output.c
===================================================================
--- trunk/sys/netinet/ip_output.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_output.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,13 +31,15 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/ip_output.c 243586 2012-11-27 01:59:51Z ae $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/ip_output.c 284496 2015-06-17 07:21:43Z hselasky $");
 
+#include "opt_inet.h"
 #include "opt_ipfw.h"
 #include "opt_ipsec.h"
-#include "opt_route.h"
+#include "opt_kdtrace.h"
 #include "opt_mbuf_stress_test.h"
 #include "opt_mpath.h"
+#include "opt_route.h"
 #include "opt_sctp.h"
 
 #include <sys/param.h>
@@ -48,6 +50,7 @@
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
+#include <sys/sdt.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
@@ -65,6 +68,7 @@
 #include <net/vnet.h>
 
 #include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
@@ -103,7 +107,6 @@
 /*
  * IP output.  The packet in mbuf chain m contains a skeletal IP
  * header (with len, off, ttl, proto, tos, src, dst).
- * ip_len and ip_off are in host format.
  * The mbuf chain containing the packet will be freed.
  * The mbuf opt, if present, will not be freed.
  * If route ro is present and has ro_rt initialized, route lookup would be
@@ -125,12 +128,15 @@
 	int n;	/* scratchpad */
 	int error = 0;
 	struct sockaddr_in *dst;
+	const struct sockaddr_in *gw;
 	struct in_ifaddr *ia;
-	int isbroadcast, sw_csum;
+	int isbroadcast;
+	uint16_t ip_len, ip_off;
 	struct route iproute;
 	struct rtentry *rte;	/* cache for ro->ro_rt */
 	struct in_addr odst;
 	struct m_tag *fwd_tag = NULL;
+	int have_ia_ref;
 #ifdef IPSEC
 	int no_route_but_check_spd = 0;
 #endif
@@ -139,9 +145,9 @@
 	if (inp != NULL) {
 		INP_LOCK_ASSERT(inp);
 		M_SETFIB(m, inp->inp_inc.inc_fibnum);
-		if (inp->inp_flags & (INP_HW_FLOWID|INP_SW_FLOWID)) {
+		if (inp->inp_flowtype != M_HASHTYPE_NONE) {
 			m->m_pkthdr.flowid = inp->inp_flowid;
-			m->m_flags |= M_FLOWID;
+			M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
 		}
 	}
 
@@ -151,19 +157,8 @@
 	}
 
 #ifdef FLOWTABLE
-	if (ro->ro_rt == NULL) {
-		struct flentry *fle;
-			
-		/*
-		 * The flow table returns route entries valid for up to 30
-		 * seconds; we rely on the remainder of ip_output() taking no
-		 * longer than that long for the stability of ro_rt. The
-		 * flow ID assignment must have happened before this point.
-		 */
-		fle = flowtable_lookup_mbuf(V_ip_ft, m, AF_INET);
-		if (fle != NULL)
-			flow_to_route(fle, ro);
-	}
+	if (ro->ro_rt == NULL)
+		(void )flowtable_lookup(AF_INET, m, ro);
 #endif
 
 	if (opt) {
@@ -173,6 +168,8 @@
 			hlen = len; /* ip->ip_hl is updated above */
 	}
 	ip = mtod(m, struct ip *);
+	ip_len = ntohs(ip->ip_len);
+	ip_off = ntohs(ip->ip_off);
 
 	/*
 	 * Fill in IP header.  If we are not allowing fragmentation,
@@ -195,9 +192,17 @@
 		hlen = ip->ip_hl << 2;
 	}
 
+	/*
+	 * dst/gw handling:
+	 *
+	 * dst can be rewritten but always point to &ro->ro_dst
+	 * gw is readonly but can be pointed either to dst OR rt_gatewy
+	 * therefore we need restore GW if we're re-doing lookup
+	 */
+	gw = dst = (struct sockaddr_in *)&ro->ro_dst;
 again:
-	dst = (struct sockaddr_in *)&ro->ro_dst;
 	ia = NULL;
+	have_ia_ref = 0;
 	/*
 	 * If there is a cached route,
 	 * check that it is to the same destination
@@ -214,6 +219,7 @@
 		RO_RTFREE(ro);
 		ro->ro_lle = NULL;
 		rte = NULL;
+		gw = dst;
 	}
 	if (rte == NULL && fwd_tag == NULL) {
 		bzero(dst, sizeof(*dst));
@@ -234,6 +240,7 @@
 			error = ENETUNREACH;
 			goto bad;
 		}
+		have_ia_ref = 1;
 		ip->ip_dst.s_addr = INADDR_BROADCAST;
 		dst->sin_addr = ip->ip_dst;
 		ifp = ia->ia_ifp;
@@ -246,6 +253,7 @@
 			error = ENETUNREACH;
 			goto bad;
 		}
+		have_ia_ref = 1;
 		ifp = ia->ia_ifp;
 		ip->ip_ttl = 1;
 		isbroadcast = in_broadcast(dst->sin_addr, ifp);
@@ -257,6 +265,8 @@
 		 */
 		ifp = imo->imo_multicast_ifp;
 		IFP_TO_IA(ifp, ia);
+		if (ia)
+			have_ia_ref = 1;
 		isbroadcast = 0;	/* fool gcc */
 	} else {
 		/*
@@ -292,15 +302,14 @@
 			goto bad;
 		}
 		ia = ifatoia(rte->rt_ifa);
-		ifa_ref(&ia->ia_ifa);
 		ifp = rte->rt_ifp;
-		rte->rt_rmx.rmx_pksent++;
+		counter_u64_add(rte->rt_pksent, 1);
 		if (rte->rt_flags & RTF_GATEWAY)
-			dst = (struct sockaddr_in *)rte->rt_gateway;
+			gw = (struct sockaddr_in *)rte->rt_gateway;
 		if (rte->rt_flags & RTF_HOST)
 			isbroadcast = (rte->rt_flags & RTF_BROADCAST);
 		else
-			isbroadcast = in_broadcast(dst->sin_addr, ifp);
+			isbroadcast = in_broadcast(gw->sin_addr, ifp);
 	}
 	/*
 	 * Calculate MTU.  If we have a route that is up, use that,
@@ -314,9 +323,9 @@
 		 * them, there is no way for one to update all its
 		 * routes when the MTU is changed.
 		 */
-		if (rte->rt_rmx.rmx_mtu > ifp->if_mtu)
-			rte->rt_rmx.rmx_mtu = ifp->if_mtu;
-		mtu = rte->rt_rmx.rmx_mtu;
+		if (rte->rt_mtu > ifp->if_mtu)
+			rte->rt_mtu = ifp->if_mtu;
+		mtu = rte->rt_mtu;
 	} else {
 		mtu = ifp->if_mtu;
 	}
@@ -326,11 +335,11 @@
 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
 		m->m_flags |= M_MCAST;
 		/*
-		 * IP destination address is multicast.  Make sure "dst"
+		 * IP destination address is multicast.  Make sure "gw"
 		 * still points to the address in "ro".  (It may have been
 		 * changed to point to a gateway address, above.)
 		 */
-		dst = (struct sockaddr_in *)&ro->ro_dst;
+		gw = dst;
 		/*
 		 * See if the caller provided any multicast options
 		 */
@@ -436,7 +445,7 @@
 	 * packet or packet fragments, unless ALTQ is enabled on the given
 	 * interface in which case packetdrop should be done by queueing.
 	 */
-	n = ip->ip_len / mtu + 1; /* how many fragments ? */
+	n = ip_len / mtu + 1; /* how many fragments ? */
 	if (
 #ifdef ALTQ
 	    (!ALTQ_IS_ENABLED(&ifp->if_snd)) &&
@@ -463,7 +472,7 @@
 			goto bad;
 		}
 		/* don't allow broadcast messages to be fragmented */
-		if (ip->ip_len > mtu) {
+		if (ip_len > mtu) {
 			error = EMSGSIZE;
 			goto bad;
 		}
@@ -530,7 +539,7 @@
 			error = netisr_queue(NETISR_IP, m);
 			goto done;
 		} else {
-			if (ia != NULL)
+			if (have_ia_ref)
 				ifa_free(&ia->ia_ifa);
 			goto again;	/* Redo the routing table lookup. */
 		}
@@ -558,12 +567,11 @@
 	/* Or forward to some other address? */
 	if ((m->m_flags & M_IP_NEXTHOP) &&
 	    (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
-		dst = (struct sockaddr_in *)&ro->ro_dst;
 		bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
 		m->m_flags |= M_SKIP_FIREWALL;
 		m->m_flags &= ~M_IP_NEXTHOP;
 		m_tag_delete(m, fwd_tag);
-		if (ia != NULL)
+		if (have_ia_ref)
 			ifa_free(&ia->ia_ifa);
 		goto again;
 	}
@@ -580,31 +588,29 @@
 	}
 
 	m->m_pkthdr.csum_flags |= CSUM_IP;
-	sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist;
-	if (sw_csum & CSUM_DELAY_DATA) {
+	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) {
 		in_delayed_cksum(m);
-		sw_csum &= ~CSUM_DELAY_DATA;
+		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 	}
 #ifdef SCTP
-	if (sw_csum & CSUM_SCTP) {
+	if (m->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) {
 		sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
-		sw_csum &= ~CSUM_SCTP;
+		m->m_pkthdr.csum_flags &= ~CSUM_SCTP;
 	}
 #endif
-	m->m_pkthdr.csum_flags &= ifp->if_hwassist;
 
 	/*
 	 * If small enough for interface, or the interface will take
 	 * care of the fragmentation for us, we can just send directly.
 	 */
-	if (ip->ip_len <= mtu ||
+	if (ip_len <= mtu ||
 	    (m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 ||
-	    ((ip->ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) {
-		ip->ip_len = htons(ip->ip_len);
-		ip->ip_off = htons(ip->ip_off);
+	    ((ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) {
 		ip->ip_sum = 0;
-		if (sw_csum & CSUM_DELAY_IP)
+		if (m->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) {
 			ip->ip_sum = in_cksum(m, hlen);
+			m->m_pkthdr.csum_flags &= ~CSUM_IP;
+		}
 
 		/*
 		 * Record statistics for this interface address.
@@ -622,20 +628,21 @@
 		}
 #ifdef MBUF_STRESS_TEST
 		if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
-			m = m_fragment(m, M_DONTWAIT, mbuf_frag_size);
+			m = m_fragment(m, M_NOWAIT, mbuf_frag_size);
 #endif
 		/*
 		 * Reset layer specific mbuf flags
 		 * to avoid confusing lower layers.
 		 */
-		m->m_flags &= ~(M_PROTOFLAGS);
+		m_clrprotoflags(m);
+		IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
 		error = (*ifp->if_output)(ifp, m,
-		    		(struct sockaddr *)dst, ro);
+		    (const struct sockaddr *)gw, ro);
 		goto done;
 	}
 
 	/* Balk when DF bit is set or the interface didn't support TSO. */
-	if ((ip->ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) {
+	if ((ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) {
 		error = EMSGSIZE;
 		IPSTAT_INC(ips_cantfrag);
 		goto bad;
@@ -645,7 +652,7 @@
 	 * Too large for interface; fragment if possible. If successful,
 	 * on return, m will point to a list of packets to be sent.
 	 */
-	error = ip_fragment(ip, &m, mtu, ifp->if_hwassist, sw_csum);
+	error = ip_fragment(ip, &m, mtu, ifp->if_hwassist);
 	if (error)
 		goto bad;
 	for (; m; m = m0) {
@@ -661,10 +668,11 @@
 			 * Reset layer specific mbuf flags
 			 * to avoid confusing upper layers.
 			 */
-			m->m_flags &= ~(M_PROTOFLAGS);
+			m_clrprotoflags(m);
 
+			IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
 			error = (*ifp->if_output)(ifp, m,
-			    (struct sockaddr *)dst, ro);
+			    (const struct sockaddr *)gw, ro);
 		} else
 			m_freem(m);
 	}
@@ -675,7 +683,7 @@
 done:
 	if (ro == &iproute)
 		RO_RTFREE(ro);
-	if (ia != NULL)
+	if (have_ia_ref)
 		ifa_free(&ia->ia_ifa);
 	return (error);
 bad:
@@ -690,11 +698,10 @@
  * chain of fragments that should be freed by the caller.
  *
  * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist)
- * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP).
  */
 int
 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
-    u_long if_hwassist_flags, int sw_csum)
+    u_long if_hwassist_flags)
 {
 	int error = 0;
 	int hlen = ip->ip_hl << 2;
@@ -704,8 +711,12 @@
 	int firstlen;
 	struct mbuf **mnext;
 	int nfrags;
+	uint16_t ip_len, ip_off;
 
-	if (ip->ip_off & IP_DF) {	/* Fragmentation not allowed */
+	ip_len = ntohs(ip->ip_len);
+	ip_off = ntohs(ip->ip_off);
+
+	if (ip_off & IP_DF) {	/* Fragmentation not allowed */
 		IPSTAT_INC(ips_cantfrag);
 		return EMSGSIZE;
 	}
@@ -720,14 +731,12 @@
 	 * If the interface will not calculate checksums on
 	 * fragmented packets, then do it here.
 	 */
-	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
-	    (if_hwassist_flags & CSUM_IP_FRAGS) == 0) {
+	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 		in_delayed_cksum(m0);
 		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 	}
 #ifdef SCTP
-	if (m0->m_pkthdr.csum_flags & CSUM_SCTP &&
-	    (if_hwassist_flags & CSUM_IP_FRAGS) == 0) {
+	if (m0->m_pkthdr.csum_flags & CSUM_SCTP) {
 		sctp_delayed_cksum(m0, hlen);
 		m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
 	}
@@ -744,10 +753,8 @@
 		 * be less than the receiver's page size ?
 		 */
 		int newlen;
-		struct mbuf *m;
 
-		for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next)
-			off += m->m_len;
+		off = MIN(mtu, m0->m_pkthdr.len);
 
 		/*
 		 * firstlen (off - hlen) must be aligned on an 
@@ -779,19 +786,31 @@
 	 * The fragments are linked off the m_nextpkt of the original
 	 * packet, which after processing serves as the first fragment.
 	 */
-	for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) {
+	for (nfrags = 1; off < ip_len; off += len, nfrags++) {
 		struct ip *mhip;	/* ip header on the fragment */
 		struct mbuf *m;
 		int mhlen = sizeof (struct ip);
 
-		MGETHDR(m, M_DONTWAIT, MT_DATA);
+		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m == NULL) {
 			error = ENOBUFS;
 			IPSTAT_INC(ips_odropped);
 			goto done;
 		}
-		m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
 		/*
+		 * Make sure the complete packet header gets copied
+		 * from the originating mbuf to the newly created
+		 * mbuf. This also ensures that existing firewall
+		 * classification(s), VLAN tags and so on get copied
+		 * to the resulting fragmented packet(s):
+		 */
+		if (m_dup_pkthdr(m, m0, M_NOWAIT) == 0) {
+			m_free(m);
+			error = ENOBUFS;
+			IPSTAT_INC(ips_odropped);
+			goto done;
+		}
+		/*
 		 * In the first mbuf, leave room for the link header, then
 		 * copy the original IP header including options. The payload
 		 * goes into an additional mbuf chain returned by m_copym().
@@ -805,15 +824,14 @@
 			mhip->ip_hl = mhlen >> 2;
 		}
 		m->m_len = mhlen;
-		/* XXX do we need to add ip->ip_off below ? */
-		mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
-		if (off + len >= ip->ip_len) {	/* last fragment */
-			len = ip->ip_len - off;
-			m->m_flags |= M_LASTFRAG;
-		} else
+		/* XXX do we need to add ip_off below ? */
+		mhip->ip_off = ((off - hlen) >> 3) + ip_off;
+		if (off + len >= ip_len)
+			len = ip_len - off;
+		else
 			mhip->ip_off |= IP_MF;
 		mhip->ip_len = htons((u_short)(len + mhlen));
-		m->m_next = m_copym(m0, off, len, M_DONTWAIT);
+		m->m_next = m_copym(m0, off, len, M_NOWAIT);
 		if (m->m_next == NULL) {	/* copy failed */
 			m_free(m);
 			error = ENOBUFS;	/* ??? */
@@ -821,36 +839,33 @@
 			goto done;
 		}
 		m->m_pkthdr.len = mhlen + len;
-		m->m_pkthdr.rcvif = NULL;
 #ifdef MAC
 		mac_netinet_fragment(m0, m);
 #endif
-		m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
 		mhip->ip_off = htons(mhip->ip_off);
 		mhip->ip_sum = 0;
-		if (sw_csum & CSUM_DELAY_IP)
+		if (m->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) {
 			mhip->ip_sum = in_cksum(m, mhlen);
+			m->m_pkthdr.csum_flags &= ~CSUM_IP;
+		}
 		*mnext = m;
 		mnext = &m->m_nextpkt;
 	}
 	IPSTAT_ADD(ips_ofragments, nfrags);
 
-	/* set first marker for fragment chain */
-	m0->m_flags |= M_FIRSTFRAG | M_FRAG;
-	m0->m_pkthdr.csum_data = nfrags;
-
 	/*
 	 * Update first fragment by trimming what's been copied out
 	 * and updating header.
 	 */
-	m_adj(m0, hlen + firstlen - ip->ip_len);
+	m_adj(m0, hlen + firstlen - ip_len);
 	m0->m_pkthdr.len = hlen + firstlen;
 	ip->ip_len = htons((u_short)m0->m_pkthdr.len);
-	ip->ip_off |= IP_MF;
-	ip->ip_off = htons(ip->ip_off);
+	ip->ip_off = htons(ip_off | IP_MF);
 	ip->ip_sum = 0;
-	if (sw_csum & CSUM_DELAY_IP)
+	if (m0->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) {
 		ip->ip_sum = in_cksum(m0, hlen);
+		m0->m_pkthdr.csum_flags &= ~CSUM_IP;
+	}
 
 done:
 	*m_frag = m0;
@@ -861,26 +876,23 @@
 in_delayed_cksum(struct mbuf *m)
 {
 	struct ip *ip;
-	u_short csum, offset;
+	uint16_t csum, offset, ip_len;
 
 	ip = mtod(m, struct ip *);
 	offset = ip->ip_hl << 2 ;
-	csum = in_cksum_skip(m, ip->ip_len, offset);
+	ip_len = ntohs(ip->ip_len);
+	csum = in_cksum_skip(m, ip_len, offset);
 	if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
 		csum = 0xffff;
 	offset += m->m_pkthdr.csum_data;	/* checksum offset */
 
-	if (offset + sizeof(u_short) > m->m_len) {
-		printf("delayed m_pullup, m->len: %d  off: %d  p: %d\n",
-		    m->m_len, offset, ip->ip_p);
-		/*
-		 * XXX
-		 * this shouldn't happen, but if it does, the
-		 * correct behavior may be to insert the checksum
-		 * in the appropriate next mbuf in the chain.
-		 */
-		return;
+	/* find the mbuf in the chain where the checksum starts*/
+	while ((m != NULL) && (offset >= m->m_len)) {
+		offset -= m->m_len;
+		m = m->m_next;
 	}
+	KASSERT(m != NULL, ("in_delayed_cksum: checksum outside mbuf chain."));
+	KASSERT(offset + sizeof(u_short) <= m->m_len, ("in_delayed_cksum: checksum split between mbufs."));
 	*(u_short *)(m->m_data + offset) = csum;
 }
 
@@ -902,13 +914,10 @@
 			switch (sopt->sopt_name) {
 			case SO_REUSEADDR:
 				INP_WLOCK(inp);
-				if (IN_MULTICAST(ntohl(inp->inp_laddr.s_addr))) {
-					if ((so->so_options &
-					    (SO_REUSEADDR | SO_REUSEPORT)) != 0)
-						inp->inp_flags2 |= INP_REUSEPORT;
-					else
-						inp->inp_flags2 &= ~INP_REUSEPORT;
-				}
+				if ((so->so_options & SO_REUSEADDR) != 0)
+					inp->inp_flags2 |= INP_REUSEADDR;
+				else
+					inp->inp_flags2 &= ~INP_REUSEADDR;
 				INP_WUNLOCK(inp);
 				error = 0;
 				break;
@@ -947,7 +956,7 @@
 				error = EMSGSIZE;
 				break;
 			}
-			MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
+			m = m_get(sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
 			if (m == NULL) {
 				error = ENOBUFS;
 				break;
@@ -1285,7 +1294,7 @@
 	 * Make a deep copy of the packet because we're going to
 	 * modify the pack in order to generate checksums.
 	 */
-	copym = m_dup(m, M_DONTWAIT);
+	copym = m_dup(m, M_NOWAIT);
 	if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
 		copym = m_pullup(copym, hlen);
 	if (copym != NULL) {
@@ -1302,8 +1311,6 @@
 		 * than the interface's MTU.  Can this possibly matter?
 		 */
 		ip = mtod(copym, struct ip *);
-		ip->ip_len = htons(ip->ip_len);
-		ip->ip_off = htons(ip->ip_off);
 		ip->ip_sum = 0;
 		ip->ip_sum = in_cksum(copym, hlen);
 #if 1 /* XXX */

Modified: trunk/sys/netinet/ip_var.h
===================================================================
--- trunk/sys/netinet/ip_var.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_var.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)ip_var.h	8.2 (Berkeley) 1/9/95
- * $FreeBSD: stable/9/sys/netinet/ip_var.h 243586 2012-11-27 01:59:51Z ae $
+ * $FreeBSD: stable/10/sys/netinet/ip_var.h 263307 2014-03-18 16:56:05Z glebius $
  */
 
 #ifndef _NETINET_IP_VAR_H_
@@ -94,50 +94,54 @@
 	u_short	imo_max_memberships;	/* max memberships this socket */
 	struct	in_multi **imo_membership;	/* group memberships */
 	struct	in_mfilter *imo_mfilters;	/* source filters */
+	STAILQ_ENTRY(ip_moptions) imo_link;
 };
 
 struct	ipstat {
-	u_long	ips_total;		/* total packets received */
-	u_long	ips_badsum;		/* checksum bad */
-	u_long	ips_tooshort;		/* packet too short */
-	u_long	ips_toosmall;		/* not enough data */
-	u_long	ips_badhlen;		/* ip header length < data size */
-	u_long	ips_badlen;		/* ip length < ip header length */
-	u_long	ips_fragments;		/* fragments received */
-	u_long	ips_fragdropped;	/* frags dropped (dups, out of space) */
-	u_long	ips_fragtimeout;	/* fragments timed out */
-	u_long	ips_forward;		/* packets forwarded */
-	u_long	ips_fastforward;	/* packets fast forwarded */
-	u_long	ips_cantforward;	/* packets rcvd for unreachable dest */
-	u_long	ips_redirectsent;	/* packets forwarded on same net */
-	u_long	ips_noproto;		/* unknown or unsupported protocol */
-	u_long	ips_delivered;		/* datagrams delivered to upper level*/
-	u_long	ips_localout;		/* total ip packets generated here */
-	u_long	ips_odropped;		/* lost packets due to nobufs, etc. */
-	u_long	ips_reassembled;	/* total packets reassembled ok */
-	u_long	ips_fragmented;		/* datagrams successfully fragmented */
-	u_long	ips_ofragments;		/* output fragments created */
-	u_long	ips_cantfrag;		/* don't fragment flag was set, etc. */
-	u_long	ips_badoptions;		/* error in option processing */
-	u_long	ips_noroute;		/* packets discarded due to no route */
-	u_long	ips_badvers;		/* ip version != 4 */
-	u_long	ips_rawout;		/* total raw ip packets generated */
-	u_long	ips_toolong;		/* ip length > max ip packet size */
-	u_long	ips_notmember;		/* multicasts for unregistered grps */
-	u_long	ips_nogif;		/* no match gif found */
-	u_long	ips_badaddr;		/* invalid address on header */
+	uint64_t ips_total;		/* total packets received */
+	uint64_t ips_badsum;		/* checksum bad */
+	uint64_t ips_tooshort;		/* packet too short */
+	uint64_t ips_toosmall;		/* not enough data */
+	uint64_t ips_badhlen;		/* ip header length < data size */
+	uint64_t ips_badlen;		/* ip length < ip header length */
+	uint64_t ips_fragments;		/* fragments received */
+	uint64_t ips_fragdropped;	/* frags dropped (dups, out of space) */
+	uint64_t ips_fragtimeout;	/* fragments timed out */
+	uint64_t ips_forward;		/* packets forwarded */
+	uint64_t ips_fastforward;	/* packets fast forwarded */
+	uint64_t ips_cantforward;	/* packets rcvd for unreachable dest */
+	uint64_t ips_redirectsent;	/* packets forwarded on same net */
+	uint64_t ips_noproto;		/* unknown or unsupported protocol */
+	uint64_t ips_delivered;		/* datagrams delivered to upper level*/
+	uint64_t ips_localout;		/* total ip packets generated here */
+	uint64_t ips_odropped;		/* lost packets due to nobufs, etc. */
+	uint64_t ips_reassembled;	/* total packets reassembled ok */
+	uint64_t ips_fragmented;	/* datagrams successfully fragmented */
+	uint64_t ips_ofragments;	/* output fragments created */
+	uint64_t ips_cantfrag;		/* don't fragment flag was set, etc. */
+	uint64_t ips_badoptions;		/* error in option processing */
+	uint64_t ips_noroute;		/* packets discarded due to no route */
+	uint64_t ips_badvers;		/* ip version != 4 */
+	uint64_t ips_rawout;		/* total raw ip packets generated */
+	uint64_t ips_toolong;		/* ip length > max ip packet size */
+	uint64_t ips_notmember;		/* multicasts for unregistered grps */
+	uint64_t ips_nogif;		/* no match gif found */
+	uint64_t ips_badaddr;		/* invalid address on header */
 };
 
 #ifdef _KERNEL
 
+#include <sys/counter.h>
 #include <net/vnet.h>
 
+VNET_PCPUSTAT_DECLARE(struct ipstat, ipstat);
 /*
  * In-kernel consumers can use these accessor macros directly to update
  * stats.
  */
-#define	IPSTAT_ADD(name, val)	V_ipstat.name += (val)
-#define	IPSTAT_SUB(name, val)	V_ipstat.name -= (val)
+#define	IPSTAT_ADD(name, val)	\
+    VNET_PCPUSTAT_ADD(struct ipstat, ipstat, name, (val))
+#define	IPSTAT_SUB(name, val)	IPSTAT_ADD(name, -(val))
 #define	IPSTAT_INC(name)	IPSTAT_ADD(name, 1)
 #define	IPSTAT_DEC(name)	IPSTAT_SUB(name, 1)
 
@@ -145,11 +149,11 @@
  * Kernel module consumers must use this accessor macro.
  */
 void	kmod_ipstat_inc(int statnum);
-#define	KMOD_IPSTAT_INC(name)						\
-	kmod_ipstat_inc(offsetof(struct ipstat, name) / sizeof(u_long))
+#define	KMOD_IPSTAT_INC(name)	\
+    kmod_ipstat_inc(offsetof(struct ipstat, name) / sizeof(uint64_t))
 void	kmod_ipstat_dec(int statnum);
-#define	KMOD_IPSTAT_DEC(name)						\
-	kmod_ipstat_dec(offsetof(struct ipstat, name) / sizeof(u_long))
+#define	KMOD_IPSTAT_DEC(name)	\
+    kmod_ipstat_dec(offsetof(struct ipstat, name) / sizeof(uint64_t))
 
 /* flags passed to ip_output as last parameter */
 #define	IP_FORWARDING		0x1		/* most of ip header exists */
@@ -159,12 +163,6 @@
 #define IP_ROUTETOIF		SO_DONTROUTE	/* 0x10 bypass routing tables */
 #define IP_ALLOWBROADCAST	SO_BROADCAST	/* 0x20 can send broadcast packets */
 
-/*
- * mbuf flag used by ip_fastfwd
- */
-#define	M_FASTFWD_OURS		M_PROTO1	/* changed dst to local */
-#define	M_IP_NEXTHOP		M_PROTO2	/* explicit ip nexthop */
-
 #ifdef __NO_STRICT_ALIGNMENT
 #define IP_HDR_ALIGNED_P(ip)	1
 #else
@@ -176,7 +174,6 @@
 struct route;
 struct sockopt;
 
-VNET_DECLARE(struct ipstat, ipstat);
 VNET_DECLARE(u_short, ip_id);			/* ip packet ctr, for ids */
 VNET_DECLARE(int, ip_defttl);			/* default IP ttl */
 VNET_DECLARE(int, ipforwarding);		/* ip forwarding */
@@ -192,7 +189,6 @@
 VNET_DECLARE(int, drop_redirect);
 extern struct	pr_usrreqs rip_usrreqs;
 
-#define	V_ipstat		VNET(ipstat)
 #define	V_ip_id			VNET(ip_id)
 #define	V_ip_defttl		VNET(ip_defttl)
 #define	V_ipforwarding		VNET(ipforwarding)
@@ -211,7 +207,7 @@
 int	ip_ctloutput(struct socket *, struct sockopt *sopt);
 void	ip_drain(void);
 int	ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
-	    u_long if_hwassist_flags, int sw_csum);
+	    u_long if_hwassist_flags);
 void	ip_forward(struct mbuf *m, int srcrt);
 void	ip_init(void);
 #ifdef VIMAGE
@@ -295,9 +291,7 @@
 struct ip_fw_args;
 typedef int	(*ip_fw_chk_ptr_t)(struct ip_fw_args *args);
 typedef int	(*ip_fw_ctl_ptr_t)(struct sockopt *);
-VNET_DECLARE(ip_fw_chk_ptr_t, ip_fw_chk_ptr);
 VNET_DECLARE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr);
-#define	V_ip_fw_chk_ptr		VNET(ip_fw_chk_ptr)
 #define	V_ip_fw_ctl_ptr		VNET(ip_fw_ctl_ptr)
 
 /* Divert hooks. */

Modified: trunk/sys/netinet/khelp/h_ertt.c
===================================================================
--- trunk/sys/netinet/khelp/h_ertt.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/khelp/h_ertt.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2009-2010
  * 	Swinburne University of Technology, Melbourne, Australia
@@ -37,7 +38,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/khelp/h_ertt.c 239474 2012-08-21 02:17:06Z lstewart $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/khelp/h_ertt.c 239346 2012-08-17 01:49:51Z lstewart $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>

Modified: trunk/sys/netinet/khelp/h_ertt.h
===================================================================
--- trunk/sys/netinet/khelp/h_ertt.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/khelp/h_ertt.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2009-2010
  * 	Swinburne University of Technology, Melbourne, Australia
@@ -30,7 +31,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/netinet/khelp/h_ertt.h 220560 2011-04-12 08:13:18Z lstewart $
+ * $FreeBSD: stable/10/sys/netinet/khelp/h_ertt.h 220560 2011-04-12 08:13:18Z lstewart $
  */
 
 /*

Modified: trunk/sys/netinet/libalias/HISTORY
===================================================================
--- trunk/sys/netinet/libalias/HISTORY	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/HISTORY	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,4 +1,5 @@
-$FreeBSD: stable/9/sys/netinet/libalias/HISTORY 63899 2000-07-26 23:15:46Z archie $
+$MidnightBSD$
+$FreeBSD: stable/10/sys/netinet/libalias/HISTORY 63899 2000-07-26 23:15:46Z archie $
 
 Version 1.0: August 11, 1996 (cjm)
 


Property changes on: trunk/sys/netinet/libalias/HISTORY
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/netinet/libalias/alias.c
===================================================================
--- trunk/sys/netinet/libalias/alias.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2001 Charles Mott <cm at linktel.net>
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/libalias/alias.c 214754 2010-11-03 21:10:12Z n_hibma $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/libalias/alias.c 318519 2017-05-19 07:31:48Z eugen $");
 
 /*
     Alias.c provides supervisory control for the functions of the
@@ -699,12 +700,14 @@
 	struct alias_link *lnk;
 
 	LIBALIAS_LOCK_ASSERT(la);
-	(void)create;
 
 /* Return if proxy-only mode is enabled */
 	if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY)
 		return (PKT_ALIAS_OK);
 
+	if (!create)
+		return (PKT_ALIAS_IGNORED);
+
 	lnk = FindProtoOut(la, *ip_src, ip_dst, ip_p);
 	if (lnk != NULL) {
 		struct in_addr alias_address;
@@ -1749,40 +1752,22 @@
 struct mbuf *
 m_megapullup(struct mbuf *m, int len) {
 	struct mbuf *mcl;
-	
+
 	if (len > m->m_pkthdr.len)
 		goto bad;
-	
-	/* Do not reallocate packet if it is sequentional,
-	 * writable and has some extra space for expansion.
-	 * XXX: Constant 100bytes is completely empirical. */
-#define	RESERVE 100
-	if (m->m_next == NULL && M_WRITABLE(m) && M_TRAILINGSPACE(m) >= RESERVE)
+
+	if (m->m_next == NULL && M_WRITABLE(m))
 		return (m);
 
-	if (len <= MCLBYTES - RESERVE) {
-		mcl = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
-	} else if (len < MJUM16BYTES) {
-		int size;
-		if (len <= MJUMPAGESIZE - RESERVE) {
-			size = MJUMPAGESIZE;
-		} else if (len <= MJUM9BYTES - RESERVE) {
-			size = MJUM9BYTES;
-		} else {
-			size = MJUM16BYTES;
-		};
-		mcl = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, size);
-	} else {
-		goto bad;
-	}
+	mcl = m_get2(len, M_NOWAIT, MT_DATA, M_PKTHDR);
 	if (mcl == NULL)
 		goto bad;
- 
+	m_align(mcl, len);
 	m_move_pkthdr(mcl, m);
 	m_copydata(m, 0, len, mtod(mcl, caddr_t));
 	mcl->m_len = mcl->m_pkthdr.len = len;
 	m_freem(m);
- 
+
 	return (mcl);
 bad:
 	m_freem(m);

Modified: trunk/sys/netinet/libalias/alias.h
===================================================================
--- trunk/sys/netinet/libalias/alias.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /* lint -save -library Flexelint comment for external headers */
 
 /*-
@@ -25,7 +26,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/netinet/libalias/alias.h 223080 2011-06-14 13:35:24Z ae $
+ * $FreeBSD: stable/10/sys/netinet/libalias/alias.h 223080 2011-06-14 13:35:24Z ae $
  */
 
 /*

Modified: trunk/sys/netinet/libalias/alias_cuseeme.c
===================================================================
--- trunk/sys/netinet/libalias/alias_cuseeme.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_cuseeme.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1998 Brian Somers <brian at Awfulhak.org>
  *                    with the aid of code written by
@@ -27,7 +28,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/libalias/alias_cuseeme.c 190841 2009-04-08 11:56:49Z piso $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/libalias/alias_cuseeme.c 190841 2009-04-08 11:56:49Z piso $");
 
 #ifdef _KERNEL
 #include <sys/param.h>

Modified: trunk/sys/netinet/libalias/alias_db.c
===================================================================
--- trunk/sys/netinet/libalias/alias_db.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_db.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2001 Charles Mott <cm at linktel.net>
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/libalias/alias_db.c 223437 2011-06-22 20:00:27Z ae $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/libalias/alias_db.c 248158 2013-03-11 12:22:44Z glebius $");
 
 /*
     Alias_db.c encapsulates all data structures used for storing
@@ -2729,7 +2730,6 @@
 InitPunchFW(struct libalias *la)
 {
 
-	LIBALIAS_LOCK_ASSERT(la);
 	la->fireWallField = malloc(la->fireWallNumNums);
 	if (la->fireWallField) {
 		memset(la->fireWallField, 0, la->fireWallNumNums);
@@ -2745,7 +2745,6 @@
 UninitPunchFW(struct libalias *la)
 {
 
-	LIBALIAS_LOCK_ASSERT(la);
 	ClearAllFWHoles(la);
 	if (la->fireWallFD >= 0)
 		close(la->fireWallFD);
@@ -2765,7 +2764,6 @@
 	struct ip_fw rule;	/* On-the-fly built rule */
 	int fwhole;		/* Where to punch hole */
 
-	LIBALIAS_LOCK_ASSERT(la);
 	la = lnk->la;
 
 /* Don't do anything unless we are asked to */
@@ -2839,7 +2837,6 @@
 {
 	struct libalias *la;
 
-	LIBALIAS_LOCK_ASSERT(la);
 	la = lnk->la;
 	if (lnk->link_type == LINK_TCP) {
 		int fwhole = lnk->data.tcp->fwhole;	/* Where is the firewall
@@ -2864,7 +2861,6 @@
 	struct ip_fw rule;	/* On-the-fly built rule */
 	int i;
 
-	LIBALIAS_LOCK_ASSERT(la);
 	if (la->fireWallFD < 0)
 		return;
 
@@ -2878,7 +2874,7 @@
 	memset(la->fireWallField, 0, la->fireWallNumNums);
 }
 
-#endif
+#endif /* !NO_FW_PUNCH */
 
 void
 LibAliasSetFWBase(struct libalias *la, unsigned int base, unsigned int num)

Modified: trunk/sys/netinet/libalias/alias_dummy.c
===================================================================
--- trunk/sys/netinet/libalias/alias_dummy.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_dummy.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2005 Paolo Pisati <piso at FreeBSD.org>
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/libalias/alias_dummy.c 190841 2009-04-08 11:56:49Z piso $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/libalias/alias_dummy.c 190841 2009-04-08 11:56:49Z piso $");
 
 /* 
  * Alias_dummy is just an empty skeleton used to demostrate how to write

Modified: trunk/sys/netinet/libalias/alias_ftp.c
===================================================================
--- trunk/sys/netinet/libalias/alias_ftp.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_ftp.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2001 Charles Mott <cm at linktel.net>
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/libalias/alias_ftp.c 223437 2011-06-22 20:00:27Z ae $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/libalias/alias_ftp.c 223437 2011-06-22 20:00:27Z ae $");
 
 /*
     Alias_ftp.c performs special processing for FTP sessions under

Modified: trunk/sys/netinet/libalias/alias_irc.c
===================================================================
--- trunk/sys/netinet/libalias/alias_irc.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_irc.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2001 Charles Mott <cm at linktel.net>
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/libalias/alias_irc.c 190841 2009-04-08 11:56:49Z piso $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/libalias/alias_irc.c 190841 2009-04-08 11:56:49Z piso $");
 
 /* Alias_irc.c intercepts packages contain IRC CTCP commands, and
 	changes DCC commands to export a port on the aliasing host instead

Modified: trunk/sys/netinet/libalias/alias_local.h
===================================================================
--- trunk/sys/netinet/libalias/alias_local.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_local.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2001 Charles Mott <cm at linktel.net>
  * All rights reserved.
@@ -23,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/netinet/libalias/alias_local.h 223437 2011-06-22 20:00:27Z ae $
+ * $FreeBSD: stable/10/sys/netinet/libalias/alias_local.h 223437 2011-06-22 20:00:27Z ae $
  */
 
 /*

Modified: trunk/sys/netinet/libalias/alias_mod.c
===================================================================
--- trunk/sys/netinet/libalias/alias_mod.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_mod.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2005 Paolo Pisati <piso at FreeBSD.org>
  * All rights reserved.
@@ -25,7 +26,7 @@
  *
  */
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/libalias/alias_mod.c 201758 2010-01-07 21:01:37Z mbr $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/libalias/alias_mod.c 201758 2010-01-07 21:01:37Z mbr $");
 
 #ifdef _KERNEL
 #include <sys/libkern.h>

Modified: trunk/sys/netinet/libalias/alias_mod.h
===================================================================
--- trunk/sys/netinet/libalias/alias_mod.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_mod.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2005 Paolo Pisati <piso at FreeBSD.org>
  * All rights reserved.
@@ -23,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/netinet/libalias/alias_mod.h 190841 2009-04-08 11:56:49Z piso $
+ * $FreeBSD: stable/10/sys/netinet/libalias/alias_mod.h 190841 2009-04-08 11:56:49Z piso $
  */
 
 /*

Modified: trunk/sys/netinet/libalias/alias_nbt.c
===================================================================
--- trunk/sys/netinet/libalias/alias_nbt.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_nbt.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Written by Atsushi Murai <amurai at spec.co.jp>
  * Copyright (c) 1998, System Planning and Engineering Co.
@@ -29,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/libalias/alias_nbt.c 190938 2009-04-11 15:19:09Z piso $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/libalias/alias_nbt.c 190938 2009-04-11 15:19:09Z piso $");
 
 /*
     alias_nbt.c performs special processing for NetBios over TCP/IP

Modified: trunk/sys/netinet/libalias/alias_old.c
===================================================================
--- trunk/sys/netinet/libalias/alias_old.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_old.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2004 Poul-Henning Kamp <phk at FreeBSD.org>
  * All rights reserved.

Modified: trunk/sys/netinet/libalias/alias_pptp.c
===================================================================
--- trunk/sys/netinet/libalias/alias_pptp.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_pptp.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * alias_pptp.c
  *
@@ -37,7 +38,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/libalias/alias_pptp.c 190841 2009-04-08 11:56:49Z piso $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/libalias/alias_pptp.c 190841 2009-04-08 11:56:49Z piso $");
 
 /* Includes */
 #ifdef _KERNEL

Modified: trunk/sys/netinet/libalias/alias_proxy.c
===================================================================
--- trunk/sys/netinet/libalias/alias_proxy.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_proxy.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2001 Charles Mott <cm at linktel.net>
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/libalias/alias_proxy.c 243282 2012-11-19 15:26:03Z emaste $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/libalias/alias_proxy.c 241648 2012-10-17 20:23:07Z emaste $");
 
 /* file: alias_proxy.c
 

Modified: trunk/sys/netinet/libalias/alias_sctp.c
===================================================================
--- trunk/sys/netinet/libalias/alias_sctp.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_sctp.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2008
  * 	Swinburne University of Technology, Melbourne, Australia.
@@ -68,7 +69,7 @@
  * - Dynamic control of hash-table size
  */
 
-/* $FreeBSD: stable/9/sys/netinet/libalias/alias_sctp.c 249132 2013-04-05 08:22:11Z mav $ */
+/* $FreeBSD: stable/10/sys/netinet/libalias/alias_sctp.c 332284 2018-04-08 16:29:24Z tuexen $ */
 
 #ifdef _KERNEL
 #include <machine/stdarg.h>
@@ -185,7 +186,7 @@
 /* Use kernel allocator. */
 #ifdef _SYS_MALLOC_H_
 #define	sn_malloc(x)	malloc(x, M_SCTPNAT, M_NOWAIT|M_ZERO)
-#define	sn_calloc(n,x)	sn_malloc(x * n)
+#define	sn_calloc(n,x)	sn_malloc((x) * (n))
 #define	sn_free(x)	free(x, M_SCTPNAT)
 #endif// #ifdef _SYS_MALLOC_H_
 
@@ -420,9 +421,9 @@
 	error = sysctl_handle_int(oidp, &level, 0, req);
 	if (error) return (error);
 
-	sysctl_log_level = (level > SN_LOG_DEBUG_MAX)?(SN_LOG_DEBUG_MAX):(level);
-	sysctl_log_level = (level < SN_LOG_LOW)?(SN_LOG_LOW):(level);
-
+	level = (level > SN_LOG_DEBUG_MAX)?(SN_LOG_DEBUG_MAX):(level);
+	level = (level < SN_LOG_LOW)?(SN_LOG_LOW):(level);
+	sysctl_log_level = level;
 	return (0);
 }
 
@@ -2114,13 +2115,13 @@
  * @return pointer to association or NULL
  */
 static struct sctp_nat_assoc*
-FindSctpLocalT(struct libalias *la,  struct in_addr g_addr, uint32_t l_vtag, uint16_t g_port, uint16_t l_port)
+FindSctpLocalT(struct libalias *la, struct in_addr g_addr, uint32_t l_vtag, uint16_t g_port, uint16_t l_port)
 {
 	u_int i;
 	struct sctp_nat_assoc *assoc = NULL, *lastmatch = NULL;
 	struct sctp_GlobalAddress *G_Addr = NULL;
 	int cnt = 0;
-  
+
 	if (l_vtag != 0) { /* an init packet, vtag==0 */
 		i = SN_TABLE_HASH(l_vtag, g_port, la->sctpNatTableSize);
 		LIST_FOREACH(assoc, &la->sctpTableGlobal[i], list_G) {
@@ -2127,11 +2128,11 @@
 			if ((assoc->g_vtag == l_vtag) && (assoc->g_port == g_port) && (assoc->l_port == l_port)) {
 				if (assoc->num_Gaddr) {
 					LIST_FOREACH(G_Addr, &(assoc->Gaddr), list_Gaddr) {
-						if(G_Addr->g_addr.s_addr == G_Addr->g_addr.s_addr)
-							return(assoc); /* full match */
+						if (G_Addr->g_addr.s_addr == g_addr.s_addr)
+							return (assoc); /* full match */
 					}
 				} else {
-					if (++cnt > 1) return(NULL);
+					if (++cnt > 1) return (NULL);
 					lastmatch = assoc;
 				}
 			}
@@ -2138,7 +2139,7 @@
 		}
 	}
 	/* If there is more than one match we do not know which local address to send to */
-	return( cnt ? lastmatch : NULL );
+	return (cnt ? lastmatch : NULL);
 }
 
 /** @ingroup Hash

Modified: trunk/sys/netinet/libalias/alias_sctp.h
===================================================================
--- trunk/sys/netinet/libalias/alias_sctp.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_sctp.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2008
  * 	Swinburne University of Technology, Melbourne, Australia.
@@ -45,7 +46,7 @@
  *
  */
 
-/* $FreeBSD: stable/9/sys/netinet/libalias/alias_sctp.h 222809 2011-06-07 06:57:22Z ae $ */
+/* $FreeBSD: stable/10/sys/netinet/libalias/alias_sctp.h 235644 2012-05-19 05:14:24Z marcel $ */
 
 #ifndef _ALIAS_SCTP_H_
 #define _ALIAS_SCTP_H_
@@ -92,7 +93,6 @@
 #ifndef _KERNEL
 #include <stdlib.h>
 #include <stdio.h>
-#include <curses.h>
 #endif //#ifdef _KERNEL
 
 

Modified: trunk/sys/netinet/libalias/alias_skinny.c
===================================================================
--- trunk/sys/netinet/libalias/alias_skinny.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_skinny.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * alias_skinny.c
  *
@@ -27,7 +28,7 @@
  *
  * Author: Joe Marcus Clarke <marcus at FreeBSD.org>
  *
- * $FreeBSD: stable/9/sys/netinet/libalias/alias_skinny.c 190841 2009-04-08 11:56:49Z piso $
+ * $FreeBSD: stable/10/sys/netinet/libalias/alias_skinny.c 240725 2012-09-20 03:29:43Z kevlo $
  */
 
 #ifdef _KERNEL
@@ -340,7 +341,7 @@
 	 * through the packet using len to determine message boundaries.
 	 * This comes into play big time with port messages being in the
 	 * same packet as register messages.  Also, open receive channel
-	 * acks are usually buried in a pakcet some 400 bytes long.
+	 * acks are usually buried in a packet some 400 bytes long.
 	 */
 	while (dlen >= skinny_hdr_len) {
 		len = (sd->len);

Modified: trunk/sys/netinet/libalias/alias_smedia.c
===================================================================
--- trunk/sys/netinet/libalias/alias_smedia.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_smedia.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * alias_smedia.c
  *
@@ -64,7 +65,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/libalias/alias_smedia.c 190841 2009-04-08 11:56:49Z piso $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/libalias/alias_smedia.c 190841 2009-04-08 11:56:49Z piso $");
 
 /*
    Alias_smedia.c is meant to contain the aliasing code for streaming media

Modified: trunk/sys/netinet/libalias/alias_util.c
===================================================================
--- trunk/sys/netinet/libalias/alias_util.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_util.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2001 Charles Mott <cm at linktel.net>
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/libalias/alias_util.c 174348 2007-12-06 09:31:13Z des $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/libalias/alias_util.c 174348 2007-12-06 09:31:13Z des $");
 
 
 /*

Modified: trunk/sys/netinet/libalias/libalias.3
===================================================================
--- trunk/sys/netinet/libalias/libalias.3	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/libalias.3	2018-05-25 13:10:11 UTC (rev 9927)
@@ -23,7 +23,8 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.\" $FreeBSD: stable/9/sys/netinet/libalias/libalias.3 240059 2012-09-02 18:54:51Z eadler $
+.\" $FreeBSD: stable/10/sys/netinet/libalias/libalias.3 246144 2013-01-31 10:29:22Z glebius $
+.\" $MidnightBSD$
 .\"
 .Dd July 4, 2011
 .Dt LIBALIAS 3
@@ -201,11 +202,10 @@
 If this mode bit is set, traffic on the local network which does not
 originate from unregistered address spaces will be ignored.
 Standard Class A, B and C unregistered addresses are:
-.Bd -literal -offset indent
+.Pp
 10.0.0.0     ->  10.255.255.255   (Class A subnet)
 172.16.0.0   ->  172.31.255.255   (Class B subnets)
 192.168.0.0  ->  192.168.255.255  (Class C subnets)
-.Ed
 .Pp
 This option is useful in the case that the packet aliasing host has both
 registered and unregistered subnets on different interfaces.
@@ -499,7 +499,7 @@
 New traffic generated by any of the local machines, designated in the
 several function calls, will be aliased to the same address.
 Consider the following example:
-.Bd -literal -offset indent
+.Pp
 LibAliasRedirectAddr(la, inet_aton("192.168.0.2"),
                         inet_aton("141.221.254.101"));
 LibAliasRedirectAddr(la, inet_aton("192.168.0.3"),
@@ -506,7 +506,6 @@
                         inet_aton("141.221.254.101"));
 LibAliasRedirectAddr(la, inet_aton("192.168.0.4"),
                         inet_aton("141.221.254.101"));
-.Ed
 .Pp
 Any outgoing connections such as
 .Xr telnet 1
@@ -903,39 +902,6 @@
 This function can be used if an already-aliased packet needs to have its
 original IP header restored for further processing (e.g.\& logging).
 .Ed
-.Sh AUTHORS
-.An Charles Mott Aq cm at linktel.net ,
-versions 1.0 - 1.8, 2.0 - 2.4.
-.An Eivind Eklund Aq eivind at FreeBSD.org ,
-versions 1.8b, 1.9 and 2.5.
-Added IRC DCC support as well as contributing a number of architectural
-improvements; added the firewall bypass for FTP/IRC DCC.
-.An Erik Salander Aq erik at whistle.com
-added support for PPTP and RTSP.
-.An Junichi Satoh Aq junichi at junichi.org
-added support for RTSP/PNA.
-.An Ruslan Ermilov Aq ru at FreeBSD.org
-added support for PPTP and LSNAT as well as general hacking.
-.An Paolo Pisati Aq piso at FreeBSD.org
-made the library modular, moving support for all
-protocols (except for IP, TCP and UDP) to external modules.
-.Sh ACKNOWLEDGEMENTS
-Listed below, in approximate chronological order, are individuals who
-have provided valuable comments and/or debugging assistance.
-.Bd -ragged -offset indent
-.An -split
-.An Gary Roberts
-.An Tom Torrance
-.An Reto Burkhalter
-.An Martin Renters
-.An Brian Somers
-.An Paul Traina
-.An Ari Suutari
-.An Dave Remien
-.An J. Fortes
-.An Andrzej Bialecki
-.An Gordon Burditt
-.Ed
 .Sh CONCEPTUAL BACKGROUND
 This section is intended for those who are planning to modify the source
 code or want to create somewhat esoteric applications using the packet
@@ -1477,3 +1443,38 @@
 facility and the
 .Dv LOG_INFO
 level.
+.Sh AUTHORS
+.An Charles Mott Aq cm at linktel.net ,
+versions 1.0 - 1.8, 2.0 - 2.4.
+.An Eivind Eklund Aq eivind at FreeBSD.org ,
+versions 1.8b, 1.9 and 2.5.
+Added IRC DCC support as well as contributing a number of architectural
+improvements; added the firewall bypass for FTP/IRC DCC.
+.An Erik Salander Aq erik at whistle.com
+added support for PPTP and RTSP.
+.An Junichi Satoh Aq junichi at junichi.org
+added support for RTSP/PNA.
+.An Ruslan Ermilov Aq ru at FreeBSD.org
+added support for PPTP and LSNAT as well as general hacking.
+.An Gleb Smirnoff Aq glebius at FreeBSD.org
+ported the library to kernel space.
+.An Paolo Pisati Aq piso at FreeBSD.org
+made the library modular, moving support for all
+protocols (except for IP, TCP and UDP) to external modules.
+.Sh ACKNOWLEDGEMENTS
+Listed below, in approximate chronological order, are individuals who
+have provided valuable comments and/or debugging assistance.
+.Bd -ragged -offset indent
+.An -split
+.An Gary Roberts
+.An Tom Torrance
+.An Reto Burkhalter
+.An Martin Renters
+.An Brian Somers
+.An Paul Traina
+.An Ari Suutari
+.An Dave Remien
+.An J. Fortes
+.An Andrzej Bialecki
+.An Gordon Burditt
+.Ed


Property changes on: trunk/sys/netinet/libalias/libalias.3
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/netinet/pim.h
===================================================================
--- trunk/sys/netinet/pim.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/pim.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/netinet/pim.h 139823 2005-01-07 01:45:51Z imp $
+ * $FreeBSD: stable/10/sys/netinet/pim.h 139823 2005-01-07 01:45:51Z imp $
  */
 
 #ifndef _NETINET_PIM_H_

Modified: trunk/sys/netinet/pim_var.h
===================================================================
--- trunk/sys/netinet/pim_var.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/pim_var.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/netinet/pim_var.h 208744 2010-06-02 15:44:43Z zec $
+ * $FreeBSD: stable/10/sys/netinet/pim_var.h 254925 2013-08-26 18:16:05Z jhb $
  */
 
 #ifndef _NETINET_PIM_VAR_H_
@@ -47,21 +47,22 @@
  * PIM statistics kept in the kernel
  */
 struct pimstat {
-	u_quad_t pims_rcv_total_msgs;	   /* total PIM messages received    */
-	u_quad_t pims_rcv_total_bytes;	   /* total PIM bytes received	     */
-	u_quad_t pims_rcv_tooshort;	   /* rcvd with too few bytes	     */
-	u_quad_t pims_rcv_badsum;	   /* rcvd with bad checksum	     */
-	u_quad_t pims_rcv_badversion;	   /* rcvd bad PIM version	     */
-	u_quad_t pims_rcv_registers_msgs;  /* rcvd regs. msgs (data only)    */
-	u_quad_t pims_rcv_registers_bytes; /* rcvd regs. bytes (data only)   */
-	u_quad_t pims_rcv_registers_wrongiif; /* rcvd regs. on wrong iif     */
-	u_quad_t pims_rcv_badregisters;	   /* rcvd invalid registers	     */
-	u_quad_t pims_snd_registers_msgs;  /* sent regs. msgs (data only)    */
-	u_quad_t pims_snd_registers_bytes; /* sent regs. bytes (data only)   */
+	uint64_t pims_rcv_total_msgs;	   /* total PIM messages received    */
+	uint64_t pims_rcv_total_bytes;	   /* total PIM bytes received	     */
+	uint64_t pims_rcv_tooshort;	   /* rcvd with too few bytes	     */
+	uint64_t pims_rcv_badsum;	   /* rcvd with bad checksum	     */
+	uint64_t pims_rcv_badversion;	   /* rcvd bad PIM version	     */
+	uint64_t pims_rcv_registers_msgs;  /* rcvd regs. msgs (data only)    */
+	uint64_t pims_rcv_registers_bytes; /* rcvd regs. bytes (data only)   */
+	uint64_t pims_rcv_registers_wrongiif; /* rcvd regs. on wrong iif     */
+	uint64_t pims_rcv_badregisters;	   /* rcvd invalid registers	     */
+	uint64_t pims_snd_registers_msgs;  /* sent regs. msgs (data only)    */
+	uint64_t pims_snd_registers_bytes; /* sent regs. bytes (data only)   */
 };
 
 #ifdef _KERNEL
-#define	PIMSTAT_ADD(name, val)	V_pimstat.name += (val)
+#define	PIMSTAT_ADD(name, val)	\
+    VNET_PCPUSTAT_ADD(struct pimstat, pimstat, name, (val))
 #define	PIMSTAT_INC(name)	PIMSTAT_ADD(name, 1)
 #endif
 
@@ -71,11 +72,6 @@
 #define PIMCTL_STATS		1	/* statistics (read-only) */
 #define PIMCTL_MAXID		2
 
-#define PIMCTL_NAMES {			\
-	{ 0, 0 },			\
-	{ "stats", CTLTYPE_STRUCT },	\
-}
-
 #ifdef _KERNEL
 
 void pim_input(struct mbuf *, int);

Modified: trunk/sys/netinet/raw_ip.c
===================================================================
--- trunk/sys/netinet/raw_ip.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/raw_ip.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/raw_ip.c 227423 2011-11-10 19:10:53Z andre $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/raw_ip.c 266718 2014-05-26 22:54:15Z smh $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
@@ -101,9 +101,6 @@
 int	(*ng_ipfw_input_p)(struct mbuf **, int,
 			struct ip_fw_args *, int);
 
-/* Hook for telling pf that the destination address changed */
-void	(*m_addr_chg_pf_p)(struct mbuf *m);
-
 #ifdef INET
 /*
  * Hooks for multicast routing. They all default to NULL, so leave them not
@@ -290,6 +287,11 @@
 	last = NULL;
 
 	ifp = m->m_pkthdr.rcvif;
+	/*
+	 * Applications on raw sockets expect host byte order.
+	 */
+	ip->ip_len = ntohs(ip->ip_len);
+	ip->ip_off = ntohs(ip->ip_off);
 
 	hash = INP_PCBHASH_RAW(proto, ip->ip_src.s_addr,
 	    ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask);
@@ -438,7 +440,7 @@
 			m_freem(m);
 			return(EMSGSIZE);
 		}
-		M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
+		M_PREPEND(m, sizeof(struct ip), M_NOWAIT);
 		if (m == NULL)
 			return(ENOBUFS);
 
@@ -446,25 +448,26 @@
 		ip = mtod(m, struct ip *);
 		ip->ip_tos = inp->inp_ip_tos;
 		if (inp->inp_flags & INP_DONTFRAG)
-			ip->ip_off = IP_DF;
+			ip->ip_off = htons(IP_DF);
 		else
-			ip->ip_off = 0;
+			ip->ip_off = htons(0);
 		ip->ip_p = inp->inp_ip_p;
-		ip->ip_len = m->m_pkthdr.len;
+		ip->ip_len = htons(m->m_pkthdr.len);
 		ip->ip_src = inp->inp_laddr;
+		ip->ip_dst.s_addr = dst;
 		if (jailed(inp->inp_cred)) {
 			/*
 			 * prison_local_ip4() would be good enough but would
 			 * let a source of INADDR_ANY pass, which we do not
-			 * want to see from jails. We do not go through the
-			 * pain of in_pcbladdr() for raw sockets.
+			 * want to see from jails.
 			 */
-			if (ip->ip_src.s_addr == INADDR_ANY)
-				error = prison_get_ip4(inp->inp_cred,
-				    &ip->ip_src);
-			else
+			if (ip->ip_src.s_addr == INADDR_ANY) {
+				error = in_pcbladdr(inp, &ip->ip_dst, &ip->ip_src,
+				    inp->inp_cred);
+			} else {
 				error = prison_local_ip4(inp->inp_cred,
 				    &ip->ip_src);
+			}
 			if (error != 0) {
 				INP_RUNLOCK(inp);
 				m_freem(m);
@@ -471,7 +474,6 @@
 				return (error);
 			}
 		}
-		ip->ip_dst.s_addr = dst;
 		ip->ip_ttl = inp->inp_ip_ttl;
 	} else {
 		if (m->m_pkthdr.len > IP_MAXPACKET) {
@@ -502,6 +504,13 @@
 			ip->ip_id = ip_newid();
 
 		/*
+		 * Applications on raw sockets pass us packets
+		 * in host byte order.
+		 */
+		ip->ip_len = htons(ip->ip_len);
+		ip->ip_off = htons(ip->ip_off);
+
+		/*
 		 * XXX prevent ip_output from overwriting header fields.
 		 */
 		flags |= IP_RAWOUTPUT;
@@ -538,6 +547,8 @@
  *
  * When adding new socket options here, make sure to add access control
  * checks here as necessary.
+ *
+ * XXX-BZ inp locking?
  */
 int
 rip_ctloutput(struct socket *so, struct sockopt *sopt)

Modified: trunk/sys/netinet/sctp.h
===================================================================
--- trunk/sys/netinet/sctp.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp.h 237898 2012-07-01 08:04:10Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp.h 294158 2016-01-16 16:46:00Z tuexen $");
 
 #ifndef _NETINET_SCTP_H_
 #define _NETINET_SCTP_H_
@@ -44,13 +44,13 @@
 #define SCTP_PACKED __attribute__((packed))
 
 /*
- * SCTP protocol - RFC2960.
+ * SCTP protocol - RFC4960.
  */
 struct sctphdr {
 	uint16_t src_port;	/* source port */
 	uint16_t dest_port;	/* destination port */
 	uint32_t v_tag;		/* verification tag of packet */
-	uint32_t checksum;	/* Adler32 C-Sum */
+	uint32_t checksum;	/* CRC32C checksum */
 	/* chunks follow... */
 }       SCTP_PACKED;
 
@@ -122,6 +122,14 @@
 #define SCTP_DEFAULT_PRINFO             0x00000022
 #define SCTP_PEER_ADDR_THLDS            0x00000023
 #define SCTP_REMOTE_UDP_ENCAPS_PORT     0x00000024
+#define SCTP_ECN_SUPPORTED              0x00000025
+#define SCTP_PR_SUPPORTED               0x00000026
+#define SCTP_AUTH_SUPPORTED             0x00000027
+#define SCTP_ASCONF_SUPPORTED           0x00000028
+#define SCTP_RECONFIG_SUPPORTED         0x00000029
+#define SCTP_NRSACK_SUPPORTED           0x00000030
+#define SCTP_PKTDROP_SUPPORTED          0x00000031
+#define SCTP_MAX_CWND                   0x00000032
 
 /*
  * read-only options
@@ -134,6 +142,8 @@
 #define SCTP_GET_ASSOC_NUMBER           0x00000104	/* ro */
 #define SCTP_GET_ASSOC_ID_LIST          0x00000105	/* ro */
 #define SCTP_TIMEOUTS                   0x00000106
+#define SCTP_PR_STREAM_STATUS           0x00000107
+#define SCTP_PR_ASSOC_STATUS            0x00000108
 
 /*
  * user socket options: BSD implementation specific
@@ -366,6 +376,12 @@
 /*
  * error cause parameters (user visible)
  */
+struct sctp_gen_error_cause {
+	uint16_t code;
+	uint16_t length;
+	uint8_t info[];
+}                    SCTP_PACKED;
+
 struct sctp_error_cause {
 	uint16_t code;
 	uint16_t length;
@@ -373,36 +389,45 @@
 }                SCTP_PACKED;
 
 struct sctp_error_invalid_stream {
-	struct sctp_error_cause cause;	/* code=SCTP_ERROR_INVALID_STREAM */
+	struct sctp_error_cause cause;	/* code=SCTP_CAUSE_INVALID_STREAM */
 	uint16_t stream_id;	/* stream id of the DATA in error */
 	uint16_t reserved;
 }                         SCTP_PACKED;
 
 struct sctp_error_missing_param {
-	struct sctp_error_cause cause;	/* code=SCTP_ERROR_MISSING_PARAM */
+	struct sctp_error_cause cause;	/* code=SCTP_CAUSE_MISSING_PARAM */
 	uint32_t num_missing_params;	/* number of missing parameters */
-	/* uint16_t param_type's follow */
+	uint16_t type[];
 }                        SCTP_PACKED;
 
 struct sctp_error_stale_cookie {
-	struct sctp_error_cause cause;	/* code=SCTP_ERROR_STALE_COOKIE */
+	struct sctp_error_cause cause;	/* code=SCTP_CAUSE_STALE_COOKIE */
 	uint32_t stale_time;	/* time in usec of staleness */
 }                       SCTP_PACKED;
 
 struct sctp_error_out_of_resource {
-	struct sctp_error_cause cause;	/* code=SCTP_ERROR_OUT_OF_RESOURCES */
+	struct sctp_error_cause cause;	/* code=SCTP_CAUSE_OUT_OF_RESOURCES */
 }                          SCTP_PACKED;
 
 struct sctp_error_unresolv_addr {
-	struct sctp_error_cause cause;	/* code=SCTP_ERROR_UNRESOLVABLE_ADDR */
-
+	struct sctp_error_cause cause;	/* code=SCTP_CAUSE_UNRESOLVABLE_ADDR */
 }                        SCTP_PACKED;
 
 struct sctp_error_unrecognized_chunk {
-	struct sctp_error_cause cause;	/* code=SCTP_ERROR_UNRECOG_CHUNK */
+	struct sctp_error_cause cause;	/* code=SCTP_CAUSE_UNRECOG_CHUNK */
 	struct sctp_chunkhdr ch;/* header from chunk in error */
 }                             SCTP_PACKED;
 
+struct sctp_error_no_user_data {
+	struct sctp_error_cause cause;	/* code=SCTP_CAUSE_NO_USER_DATA */
+	uint32_t tsn;		/* TSN of the empty data chunk */
+}                       SCTP_PACKED;
+
+struct sctp_error_auth_invalid_hmac {
+	struct sctp_error_cause cause;	/* code=SCTP_CAUSE_UNSUPPORTED_HMACID */
+	uint16_t hmac_id;
+}                            SCTP_PACKED;
+
 /*
  * Main SCTP chunk types we place these here so natd and f/w's in user land
  * can find them.
@@ -426,7 +451,7 @@
 /* RFC4895 */
 #define SCTP_AUTHENTICATION     0x0f
 /* EY nr_sack chunk id*/
-#define SCTP_NR_SELECTIVE_ACK 0x10
+#define SCTP_NR_SELECTIVE_ACK	0x10
 /************0x40 series ***********/
 /************0x80 series ***********/
 /* RFC5061 */
@@ -501,47 +526,47 @@
 #define SCTP_PCB_FLAGS_SOCKET_GONE	0x10000000
 #define SCTP_PCB_FLAGS_SOCKET_ALLGONE	0x20000000
 #define SCTP_PCB_FLAGS_SOCKET_CANT_READ	0x40000000
+
 /* flags to copy to new PCB */
 #define SCTP_PCB_COPY_FLAGS		(SCTP_PCB_FLAGS_BOUNDALL|\
 					 SCTP_PCB_FLAGS_WAKEINPUT|\
 					 SCTP_PCB_FLAGS_BOUND_V6)
 
-
 /*
  * PCB Features (in sctp_features bitmask)
  */
-#define SCTP_PCB_FLAGS_DO_NOT_PMTUD      0x00000001
-#define SCTP_PCB_FLAGS_EXT_RCVINFO       0x00000002	/* deprecated */
-#define SCTP_PCB_FLAGS_DONOT_HEARTBEAT   0x00000004
-#define SCTP_PCB_FLAGS_FRAG_INTERLEAVE   0x00000008
-#define SCTP_PCB_FLAGS_INTERLEAVE_STRMS  0x00000010
-#define SCTP_PCB_FLAGS_DO_ASCONF         0x00000020
-#define SCTP_PCB_FLAGS_AUTO_ASCONF       0x00000040
-#define SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE  0x00000080
+#define SCTP_PCB_FLAGS_DO_NOT_PMTUD      0x0000000000000001
+#define SCTP_PCB_FLAGS_EXT_RCVINFO       0x0000000000000002	/* deprecated */
+#define SCTP_PCB_FLAGS_DONOT_HEARTBEAT   0x0000000000000004
+#define SCTP_PCB_FLAGS_FRAG_INTERLEAVE   0x0000000000000008
+#define SCTP_PCB_FLAGS_INTERLEAVE_STRMS  0x0000000000000010
+#define SCTP_PCB_FLAGS_DO_ASCONF         0x0000000000000020
+#define SCTP_PCB_FLAGS_AUTO_ASCONF       0x0000000000000040
+#define SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE  0x0000000000000080
 /* socket options */
-#define SCTP_PCB_FLAGS_NODELAY           0x00000100
-#define SCTP_PCB_FLAGS_AUTOCLOSE         0x00000200
-#define SCTP_PCB_FLAGS_RECVDATAIOEVNT    0x00000400	/* deprecated */
-#define SCTP_PCB_FLAGS_RECVASSOCEVNT     0x00000800
-#define SCTP_PCB_FLAGS_RECVPADDREVNT     0x00001000
-#define SCTP_PCB_FLAGS_RECVPEERERR       0x00002000
-#define SCTP_PCB_FLAGS_RECVSENDFAILEVNT  0x00004000	/* deprecated */
-#define SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT  0x00008000
-#define SCTP_PCB_FLAGS_ADAPTATIONEVNT    0x00010000
-#define SCTP_PCB_FLAGS_PDAPIEVNT         0x00020000
-#define SCTP_PCB_FLAGS_AUTHEVNT          0x00040000
-#define SCTP_PCB_FLAGS_STREAM_RESETEVNT  0x00080000
-#define SCTP_PCB_FLAGS_NO_FRAGMENT       0x00100000
-#define SCTP_PCB_FLAGS_EXPLICIT_EOR      0x00400000
-#define SCTP_PCB_FLAGS_NEEDS_MAPPED_V4   0x00800000
-#define SCTP_PCB_FLAGS_MULTIPLE_ASCONFS  0x01000000
-#define SCTP_PCB_FLAGS_PORTREUSE         0x02000000
-#define SCTP_PCB_FLAGS_DRYEVNT           0x04000000
-#define SCTP_PCB_FLAGS_RECVRCVINFO       0x08000000
-#define SCTP_PCB_FLAGS_RECVNXTINFO       0x10000000
-#define SCTP_PCB_FLAGS_ASSOC_RESETEVNT   0x20000000
-#define SCTP_PCB_FLAGS_STREAM_CHANGEEVNT 0x40000000
-#define SCTP_PCB_FLAGS_RECVNSENDFAILEVNT 0x80000000
+#define SCTP_PCB_FLAGS_NODELAY           0x0000000000000100
+#define SCTP_PCB_FLAGS_AUTOCLOSE         0x0000000000000200
+#define SCTP_PCB_FLAGS_RECVDATAIOEVNT    0x0000000000000400	/* deprecated */
+#define SCTP_PCB_FLAGS_RECVASSOCEVNT     0x0000000000000800
+#define SCTP_PCB_FLAGS_RECVPADDREVNT     0x0000000000001000
+#define SCTP_PCB_FLAGS_RECVPEERERR       0x0000000000002000
+#define SCTP_PCB_FLAGS_RECVSENDFAILEVNT  0x0000000000004000	/* deprecated */
+#define SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT  0x0000000000008000
+#define SCTP_PCB_FLAGS_ADAPTATIONEVNT    0x0000000000010000
+#define SCTP_PCB_FLAGS_PDAPIEVNT         0x0000000000020000
+#define SCTP_PCB_FLAGS_AUTHEVNT          0x0000000000040000
+#define SCTP_PCB_FLAGS_STREAM_RESETEVNT  0x0000000000080000
+#define SCTP_PCB_FLAGS_NO_FRAGMENT       0x0000000000100000
+#define SCTP_PCB_FLAGS_EXPLICIT_EOR      0x0000000000400000
+#define SCTP_PCB_FLAGS_NEEDS_MAPPED_V4   0x0000000000800000
+#define SCTP_PCB_FLAGS_MULTIPLE_ASCONFS  0x0000000001000000
+#define SCTP_PCB_FLAGS_PORTREUSE         0x0000000002000000
+#define SCTP_PCB_FLAGS_DRYEVNT           0x0000000004000000
+#define SCTP_PCB_FLAGS_RECVRCVINFO       0x0000000008000000
+#define SCTP_PCB_FLAGS_RECVNXTINFO       0x0000000010000000
+#define SCTP_PCB_FLAGS_ASSOC_RESETEVNT   0x0000000020000000
+#define SCTP_PCB_FLAGS_STREAM_CHANGEEVNT 0x0000000040000000
+#define SCTP_PCB_FLAGS_RECVNSENDFAILEVNT 0x0000000080000000
 
 /*-
  * mobility_features parameters (by micchie).Note

Modified: trunk/sys/netinet/sctp_asconf.c
===================================================================
--- trunk/sys/netinet/sctp_asconf.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_asconf.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_asconf.c 238613 2012-07-19 09:32:59Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_asconf.c 296052 2016-02-25 18:46:06Z tuexen $");
 
 #include <netinet/sctp_os.h>
 #include <netinet/sctp_var.h>
@@ -51,59 +51,8 @@
  */
 
 
-static void
-sctp_asconf_get_source_ip(struct mbuf *m, struct sockaddr *sa)
-{
-	struct ip *iph;
-
-#ifdef INET
-	struct sockaddr_in *sin;
-
-#endif
-#ifdef INET6
-	struct sockaddr_in6 *sin6;
-
-#endif
-
-	iph = mtod(m, struct ip *);
-	switch (iph->ip_v) {
-#ifdef INET
-	case IPVERSION:
-		{
-			/* IPv4 source */
-			sin = (struct sockaddr_in *)sa;
-			bzero(sin, sizeof(*sin));
-			sin->sin_family = AF_INET;
-			sin->sin_len = sizeof(struct sockaddr_in);
-			sin->sin_port = 0;
-			sin->sin_addr.s_addr = iph->ip_src.s_addr;
-			break;
-		}
-#endif
-#ifdef INET6
-	case (IPV6_VERSION >> 4):
-		{
-			/* IPv6 source */
-			struct ip6_hdr *ip6;
-
-			sin6 = (struct sockaddr_in6 *)sa;
-			bzero(sin6, sizeof(*sin6));
-			sin6->sin6_family = AF_INET6;
-			sin6->sin6_len = sizeof(struct sockaddr_in6);
-			sin6->sin6_port = 0;
-			ip6 = mtod(m, struct ip6_hdr *);
-			sin6->sin6_addr = ip6->ip6_src;
-			break;
-		}
-#endif				/* INET6 */
-	default:
-		break;
-	}
-	return;
-}
-
 /*
- * draft-ietf-tsvwg-addip-sctp
+ * RFC 5061
  *
  * An ASCONF parameter queue exists per asoc which holds the pending address
  * operations.  Lists are updated upon receipt of ASCONF-ACK.
@@ -132,7 +81,7 @@
 	struct sctp_asconf_paramhdr *aph;
 
 	m_reply = sctp_get_mbuf_for_msg(sizeof(struct sctp_asconf_paramhdr),
-	    0, M_DONTWAIT, 1, MT_DATA);
+	    0, M_NOWAIT, 1, MT_DATA);
 	if (m_reply == NULL) {
 		SCTPDBG(SCTP_DEBUG_ASCONF1,
 		    "asconf_success_response: couldn't get mbuf!\n");
@@ -160,7 +109,7 @@
 	m_reply = sctp_get_mbuf_for_msg((sizeof(struct sctp_asconf_paramhdr) +
 	    tlv_length +
 	    sizeof(struct sctp_error_cause)),
-	    0, M_DONTWAIT, 1, MT_DATA);
+	    0, M_NOWAIT, 1, MT_DATA);
 	if (m_reply == NULL) {
 		SCTPDBG(SCTP_DEBUG_ASCONF1,
 		    "asconf_error_response: couldn't get mbuf!\n");
@@ -195,14 +144,19 @@
 }
 
 static struct mbuf *
-sctp_process_asconf_add_ip(struct mbuf *m, struct sctp_asconf_paramhdr *aph,
+sctp_process_asconf_add_ip(struct sockaddr *src, struct sctp_asconf_paramhdr *aph,
     struct sctp_tcb *stcb, int send_hb, int response_required)
 {
 	struct sctp_nets *net;
 	struct mbuf *m_reply = NULL;
-	struct sockaddr_storage sa_source, sa_store;
+	union sctp_sockstore store;
 	struct sctp_paramhdr *ph;
-	uint16_t param_type, param_length, aparam_length;
+	uint16_t param_type, aparam_length;
+
+#if defined(INET) || defined(INET6)
+	uint16_t param_length;
+
+#endif
 	struct sockaddr *sa;
 	int zero_address = 0;
 	int bad_address = 0;
@@ -221,9 +175,10 @@
 	aparam_length = ntohs(aph->ph.param_length);
 	ph = (struct sctp_paramhdr *)(aph + 1);
 	param_type = ntohs(ph->param_type);
+#if defined(INET) || defined(INET6)
 	param_length = ntohs(ph->param_length);
-
-	sa = (struct sockaddr *)&sa_store;
+#endif
+	sa = &store.sa;
 	switch (param_type) {
 #ifdef INET
 	case SCTP_IPV4_ADDRESS:
@@ -232,7 +187,7 @@
 			return (NULL);
 		}
 		v4addr = (struct sctp_ipv4addr_param *)ph;
-		sin = (struct sockaddr_in *)&sa_store;
+		sin = &store.sin;
 		bzero(sin, sizeof(*sin));
 		sin->sin_family = AF_INET;
 		sin->sin_len = sizeof(struct sockaddr_in);
@@ -255,7 +210,7 @@
 			return (NULL);
 		}
 		v6addr = (struct sctp_ipv6addr_param *)ph;
-		sin6 = (struct sockaddr_in6 *)&sa_store;
+		sin6 = &store.sin6;
 		bzero(sin6, sizeof(*sin6));
 		sin6->sin6_family = AF_INET6;
 		sin6->sin6_len = sizeof(struct sockaddr_in6);
@@ -280,11 +235,10 @@
 
 	/* if 0.0.0.0/::0, add the source address instead */
 	if (zero_address && SCTP_BASE_SYSCTL(sctp_nat_friendly)) {
-		sa = (struct sockaddr *)&sa_source;
-		sctp_asconf_get_source_ip(m, sa);
+		sa = src;
 		SCTPDBG(SCTP_DEBUG_ASCONF1,
 		    "process_asconf_add_ip: using source addr ");
-		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, src);
 	}
 	/* add the address */
 	if (bad_address) {
@@ -344,13 +298,19 @@
 }
 
 static struct mbuf *
-sctp_process_asconf_delete_ip(struct mbuf *m, struct sctp_asconf_paramhdr *aph,
+sctp_process_asconf_delete_ip(struct sockaddr *src,
+    struct sctp_asconf_paramhdr *aph,
     struct sctp_tcb *stcb, int response_required)
 {
 	struct mbuf *m_reply = NULL;
-	struct sockaddr_storage sa_source, sa_store;
+	union sctp_sockstore store;
 	struct sctp_paramhdr *ph;
-	uint16_t param_type, param_length, aparam_length;
+	uint16_t param_type, aparam_length;
+
+#if defined(INET) || defined(INET6)
+	uint16_t param_length;
+
+#endif
 	struct sockaddr *sa;
 	int zero_address = 0;
 	int result;
@@ -366,15 +326,13 @@
 
 #endif
 
-	/* get the source IP address for src and 0.0.0.0/::0 delete checks */
-	sctp_asconf_get_source_ip(m, (struct sockaddr *)&sa_source);
-
 	aparam_length = ntohs(aph->ph.param_length);
 	ph = (struct sctp_paramhdr *)(aph + 1);
 	param_type = ntohs(ph->param_type);
+#if defined(INET) || defined(INET6)
 	param_length = ntohs(ph->param_length);
-
-	sa = (struct sockaddr *)&sa_store;
+#endif
+	sa = &store.sa;
 	switch (param_type) {
 #ifdef INET
 	case SCTP_IPV4_ADDRESS:
@@ -383,7 +341,7 @@
 			return (NULL);
 		}
 		v4addr = (struct sctp_ipv4addr_param *)ph;
-		sin = (struct sockaddr_in *)&sa_store;
+		sin = &store.sin;
 		bzero(sin, sizeof(*sin));
 		sin->sin_family = AF_INET;
 		sin->sin_len = sizeof(struct sockaddr_in);
@@ -403,7 +361,7 @@
 			return (NULL);
 		}
 		v6addr = (struct sctp_ipv6addr_param *)ph;
-		sin6 = (struct sockaddr_in6 *)&sa_store;
+		sin6 = &store.sin6;
 		bzero(sin6, sizeof(*sin6));
 		sin6->sin6_family = AF_INET6;
 		sin6->sin6_len = sizeof(struct sockaddr_in6);
@@ -425,7 +383,7 @@
 	}
 
 	/* make sure the source address is not being deleted */
-	if (sctp_cmpaddr(sa, (struct sockaddr *)&sa_source)) {
+	if (sctp_cmpaddr(sa, src)) {
 		/* trying to delete the source address! */
 		SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_delete_ip: tried to delete source addr\n");
 		m_reply = sctp_asconf_error_response(aph->correlation_id,
@@ -435,8 +393,7 @@
 	}
 	/* if deleting 0.0.0.0/::0, delete all addresses except src addr */
 	if (zero_address && SCTP_BASE_SYSCTL(sctp_nat_friendly)) {
-		result = sctp_asconf_del_remote_addrs_except(stcb,
-		    (struct sockaddr *)&sa_source);
+		result = sctp_asconf_del_remote_addrs_except(stcb, src);
 
 		if (result) {
 			/* src address did not exist? */
@@ -476,14 +433,19 @@
 }
 
 static struct mbuf *
-sctp_process_asconf_set_primary(struct mbuf *m,
+sctp_process_asconf_set_primary(struct sockaddr *src,
     struct sctp_asconf_paramhdr *aph,
     struct sctp_tcb *stcb, int response_required)
 {
 	struct mbuf *m_reply = NULL;
-	struct sockaddr_storage sa_source, sa_store;
+	union sctp_sockstore store;
 	struct sctp_paramhdr *ph;
-	uint16_t param_type, param_length, aparam_length;
+	uint16_t param_type, aparam_length;
+
+#if defined(INET) || defined(INET6)
+	uint16_t param_length;
+
+#endif
 	struct sockaddr *sa;
 	int zero_address = 0;
 
@@ -501,9 +463,10 @@
 	aparam_length = ntohs(aph->ph.param_length);
 	ph = (struct sctp_paramhdr *)(aph + 1);
 	param_type = ntohs(ph->param_type);
+#if defined(INET) || defined(INET6)
 	param_length = ntohs(ph->param_length);
-
-	sa = (struct sockaddr *)&sa_store;
+#endif
+	sa = &store.sa;
 	switch (param_type) {
 #ifdef INET
 	case SCTP_IPV4_ADDRESS:
@@ -512,7 +475,7 @@
 			return (NULL);
 		}
 		v4addr = (struct sctp_ipv4addr_param *)ph;
-		sin = (struct sockaddr_in *)&sa_store;
+		sin = &store.sin;
 		bzero(sin, sizeof(*sin));
 		sin->sin_family = AF_INET;
 		sin->sin_len = sizeof(struct sockaddr_in);
@@ -530,7 +493,7 @@
 			return (NULL);
 		}
 		v6addr = (struct sctp_ipv6addr_param *)ph;
-		sin6 = (struct sockaddr_in6 *)&sa_store;
+		sin6 = &store.sin6;
 		bzero(sin6, sizeof(*sin6));
 		sin6->sin6_family = AF_INET6;
 		sin6->sin6_len = sizeof(struct sockaddr_in6);
@@ -551,11 +514,10 @@
 
 	/* if 0.0.0.0/::0, use the source address instead */
 	if (zero_address && SCTP_BASE_SYSCTL(sctp_nat_friendly)) {
-		sa = (struct sockaddr *)&sa_source;
-		sctp_asconf_get_source_ip(m, sa);
+		sa = src;
 		SCTPDBG(SCTP_DEBUG_ASCONF1,
 		    "process_asconf_set_primary: using source addr ");
-		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, src);
 	}
 	/* set the primary address */
 	if (sctp_set_primary_addr(stcb, sa, NULL) == 0) {
@@ -594,7 +556,9 @@
 		    (stcb->asoc.primary_destination->dest_state &
 		    SCTP_ADDR_UNCONFIRMED) == 0) {
 
-			sctp_timer_stop(SCTP_TIMER_TYPE_PRIM_DELETED, stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_TIMER + SCTP_LOC_7);
+			sctp_timer_stop(SCTP_TIMER_TYPE_PRIM_DELETED,
+			    stcb->sctp_ep, stcb, NULL,
+			    SCTP_FROM_SCTP_ASCONF + SCTP_LOC_1);
 			if (sctp_is_mobility_feature_on(stcb->sctp_ep,
 			    SCTP_MOBILITY_FASTHANDOFF)) {
 				sctp_assoc_immediate_retrans(stcb,
@@ -627,6 +591,7 @@
  */
 void
 sctp_handle_asconf(struct mbuf *m, unsigned int offset,
+    struct sockaddr *src,
     struct sctp_asconf_chunk *cp, struct sctp_tcb *stcb,
     int first)
 {
@@ -634,7 +599,7 @@
 	uint32_t serial_num;
 	struct mbuf *n, *m_ack, *m_result, *m_tail;
 	struct sctp_asconf_ack_chunk *ack_cp;
-	struct sctp_asconf_paramhdr *aph, *ack_aph;
+	struct sctp_asconf_paramhdr *aph;
 	struct sctp_ipv6addr_param *p_addr;
 	unsigned int asconf_limit, cnt;
 	int error = 0;		/* did an error occur? */
@@ -689,7 +654,7 @@
 		}
 	}
 	m_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_asconf_ack_chunk), 0,
-	    M_DONTWAIT, 1, MT_DATA);
+	    M_NOWAIT, 1, MT_DATA);
 	if (m_ack == NULL) {
 		SCTPDBG(SCTP_DEBUG_ASCONF1,
 		    "handle_asconf: couldn't get mbuf!\n");
@@ -717,13 +682,6 @@
 	}
 	/* param_length is already validated in process_control... */
 	offset += ntohs(p_addr->ph.param_length);	/* skip lookup addr */
-
-	/* get pointer to first asconf param in ASCONF-ACK */
-	ack_aph = (struct sctp_asconf_paramhdr *)(mtod(m_ack, caddr_t)+sizeof(struct sctp_asconf_ack_chunk));
-	if (ack_aph == NULL) {
-		SCTPDBG(SCTP_DEBUG_ASCONF1, "Gak in asconf2\n");
-		return;
-	}
 	/* get pointer to first asconf param in ASCONF */
 	aph = (struct sctp_asconf_paramhdr *)sctp_m_getptr(m, offset, sizeof(struct sctp_asconf_paramhdr), (uint8_t *) & aparam_buf);
 	if (aph == NULL) {
@@ -762,14 +720,12 @@
 		}
 		switch (param_type) {
 		case SCTP_ADD_IP_ADDRESS:
-			asoc->peer_supports_asconf = 1;
-			m_result = sctp_process_asconf_add_ip(m, aph, stcb,
+			m_result = sctp_process_asconf_add_ip(src, aph, stcb,
 			    (cnt < SCTP_BASE_SYSCTL(sctp_hb_maxburst)), error);
 			cnt++;
 			break;
 		case SCTP_DEL_IP_ADDRESS:
-			asoc->peer_supports_asconf = 1;
-			m_result = sctp_process_asconf_delete_ip(m, aph, stcb,
+			m_result = sctp_process_asconf_delete_ip(src, aph, stcb,
 			    error);
 			break;
 		case SCTP_ERROR_CAUSE_IND:
@@ -776,8 +732,7 @@
 			/* not valid in an ASCONF chunk */
 			break;
 		case SCTP_SET_PRIM_ADDR:
-			asoc->peer_supports_asconf = 1;
-			m_result = sctp_process_asconf_set_primary(m, aph,
+			m_result = sctp_process_asconf_set_primary(src, aph,
 			    stcb, error);
 			break;
 		case SCTP_NAT_VTAGS:
@@ -857,11 +812,7 @@
 		 * this could happen if the source address was just newly
 		 * added
 		 */
-		struct sockaddr_storage addr;
-		struct sockaddr *src = (struct sockaddr *)&addr;
-
 		SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: looking up net for IP source address\n");
-		sctp_asconf_get_source_ip(m, src);
 		SCTPDBG(SCTP_DEBUG_ASCONF1, "Looking for IP source: ");
 		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, src);
 		/* look up the from address */
@@ -920,10 +871,12 @@
 static uint32_t
 sctp_addr_match(struct sctp_paramhdr *ph, struct sockaddr *sa)
 {
+#if defined(INET) || defined(INET6)
 	uint16_t param_type, param_length;
 
 	param_type = ntohs(ph->param_type);
 	param_length = ntohs(ph->param_length);
+#endif
 	switch (sa->sa_family) {
 #ifdef INET6
 	case AF_INET6:
@@ -934,7 +887,7 @@
 
 			v6addr = (struct sctp_ipv6addr_param *)ph;
 			if ((param_type == SCTP_IPV6_ADDRESS) &&
-			    param_length == sizeof(struct sctp_ipv6addr_param) &&
+			    (param_length == sizeof(struct sctp_ipv6addr_param)) &&
 			    (memcmp(&v6addr->addr, &sin6->sin6_addr,
 			    sizeof(struct in6_addr)) == 0)) {
 				return (1);
@@ -950,7 +903,7 @@
 
 			v4addr = (struct sctp_ipv4addr_param *)ph;
 			if ((param_type == SCTP_IPV4_ADDRESS) &&
-			    param_length == sizeof(struct sctp_ipv4addr_param) &&
+			    (param_length == sizeof(struct sctp_ipv4addr_param)) &&
 			    (memcmp(&v4addr->addr, &sin->sin_addr,
 			    sizeof(struct in_addr)) == 0)) {
 				return (1);
@@ -970,8 +923,6 @@
 void
 sctp_asconf_cleanup(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
-	/* mark peer as ASCONF incapable */
-	stcb->asoc.peer_supports_asconf = 0;
 	/*
 	 * clear out any existing asconfs going out
 	 */
@@ -1043,7 +994,7 @@
 		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, &stcb->asoc.primary_destination->ro._l_addr.sa);
 		sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb,
 		    stcb->asoc.deleted_primary,
-		    SCTP_FROM_SCTP_TIMER + SCTP_LOC_8);
+		    SCTP_FROM_SCTP_ASCONF + SCTP_LOC_3);
 		stcb->asoc.num_send_timers_up--;
 		if (stcb->asoc.num_send_timers_up < 0) {
 			stcb->asoc.num_send_timers_up = 0;
@@ -1082,7 +1033,7 @@
 
 	SCTPDBG(SCTP_DEBUG_ASCONF1, "net_immediate_retrans: RTO is %d\n", net->RTO);
 	sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, net,
-	    SCTP_FROM_SCTP_TIMER + SCTP_LOC_5);
+	    SCTP_FROM_SCTP_ASCONF + SCTP_LOC_4);
 	stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net);
 	net->error_count = 0;
 	TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
@@ -1159,7 +1110,8 @@
 		 * not be changed.
 		 */
 		SCTP_RTALLOC((sctp_route_t *) & net->ro,
-		    stcb->sctp_ep->def_vrf_id);
+		    stcb->sctp_ep->def_vrf_id,
+		    stcb->sctp_ep->fibnum);
 		if (net->ro.ro_rt == NULL)
 			continue;
 
@@ -1251,7 +1203,6 @@
     uint16_t type)
 {
 	struct sctp_asconf_addr *aa, *aa_next;
-	struct sockaddr *sa;
 
 	/* make sure the request isn't already in the queue */
 	TAILQ_FOREACH_SAFE(aa, &stcb->asoc.asconf_queue, next, aa_next) {
@@ -1314,8 +1265,7 @@
 		{
 			struct sockaddr_in6 *sin6;
 
-			sin6 = (struct sockaddr_in6 *)&ifa->address.sa;
-			sa = (struct sockaddr *)sin6;
+			sin6 = &ifa->address.sin6;
 			aa->ap.addrp.ph.param_type = SCTP_IPV6_ADDRESS;
 			aa->ap.addrp.ph.param_length = (sizeof(struct sctp_ipv6addr_param));
 			aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_paramhdr) +
@@ -1330,8 +1280,7 @@
 		{
 			struct sockaddr_in *sin;
 
-			sin = (struct sockaddr_in *)&ifa->address.sa;
-			sa = (struct sockaddr *)sin;
+			sin = &ifa->address.sin;
 			aa->ap.addrp.ph.param_type = SCTP_IPV4_ADDRESS;
 			aa->ap.addrp.ph.param_length = (sizeof(struct sctp_ipv4addr_param));
 			aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_paramhdr) +
@@ -1354,13 +1303,13 @@
 	if (SCTP_BASE_SYSCTL(sctp_debug_on) & SCTP_DEBUG_ASCONF2) {
 		if (type == SCTP_ADD_IP_ADDRESS) {
 			SCTP_PRINTF("asconf_queue_mgmt: inserted asconf ADD_IP_ADDRESS: ");
-			SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, sa);
+			SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, &ifa->address.sa);
 		} else if (type == SCTP_DEL_IP_ADDRESS) {
 			SCTP_PRINTF("asconf_queue_mgmt: appended asconf DEL_IP_ADDRESS: ");
-			SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, sa);
+			SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, &ifa->address.sa);
 		} else {
 			SCTP_PRINTF("asconf_queue_mgmt: appended asconf SET_PRIM_ADDR: ");
-			SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, sa);
+			SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, &ifa->address.sa);
 		}
 	}
 #endif
@@ -1381,9 +1330,10 @@
 {
 	uint32_t status;
 	int pending_delete_queued = 0;
+	int last;
 
 	/* see if peer supports ASCONF */
-	if (stcb->asoc.peer_supports_asconf == 0) {
+	if (stcb->asoc.asconf_supported == 0) {
 		return (-1);
 	}
 	/*
@@ -1390,15 +1340,21 @@
 	 * if this is deleting the last address from the assoc, mark it as
 	 * pending.
 	 */
-	if ((type == SCTP_DEL_IP_ADDRESS) && !stcb->asoc.asconf_del_pending &&
-	    (sctp_local_addr_count(stcb) < 2)) {
-		/* set the pending delete info only */
-		stcb->asoc.asconf_del_pending = 1;
-		stcb->asoc.asconf_addr_del_pending = ifa;
-		atomic_add_int(&ifa->refcount, 1);
-		SCTPDBG(SCTP_DEBUG_ASCONF2,
-		    "asconf_queue_add: mark delete last address pending\n");
-		return (-1);
+	if ((type == SCTP_DEL_IP_ADDRESS) && !stcb->asoc.asconf_del_pending) {
+		if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+			last = (sctp_local_addr_count(stcb) == 0);
+		} else {
+			last = (sctp_local_addr_count(stcb) == 1);
+		}
+		if (last) {
+			/* set the pending delete info only */
+			stcb->asoc.asconf_del_pending = 1;
+			stcb->asoc.asconf_addr_del_pending = ifa;
+			atomic_add_int(&ifa->refcount, 1);
+			SCTPDBG(SCTP_DEBUG_ASCONF2,
+			    "asconf_queue_add: mark delete last address pending\n");
+			return (-1);
+		}
 	}
 	/* queue an asconf parameter */
 	status = sctp_asconf_queue_mgmt(stcb, ifa, type);
@@ -1467,13 +1423,12 @@
 {
 	struct sctp_ifa *ifa;
 	struct sctp_asconf_addr *aa, *aa_next;
-	uint32_t vrf_id;
 
 	if (stcb == NULL) {
 		return (-1);
 	}
 	/* see if peer supports ASCONF */
-	if (stcb->asoc.peer_supports_asconf == 0) {
+	if (stcb->asoc.asconf_supported == 0) {
 		return (-1);
 	}
 	/* make sure the request isn't already in the queue */
@@ -1499,12 +1454,7 @@
 	}			/* for each aa */
 
 	/* find any existing ifa-- NOTE ifa CAN be allowed to be NULL */
-	if (stcb) {
-		vrf_id = stcb->asoc.vrf_id;
-	} else {
-		vrf_id = SCTP_DEFAULT_VRFID;
-	}
-	ifa = sctp_find_ifa_by_addr(sa, vrf_id, SCTP_ADDR_NOT_LOCKED);
+	ifa = sctp_find_ifa_by_addr(sa, stcb->asoc.vrf_id, SCTP_ADDR_NOT_LOCKED);
 
 	/* adding new request to the queue */
 	SCTP_MALLOC(aa, struct sctp_asconf_addr *, sizeof(*aa),
@@ -1593,7 +1543,7 @@
  * notifications based on the error response
  */
 static void
-sctp_asconf_process_error(struct sctp_tcb *stcb,
+sctp_asconf_process_error(struct sctp_tcb *stcb SCTP_UNUSED,
     struct sctp_asconf_paramhdr *aph)
 {
 	struct sctp_error_cause *eh;
@@ -1631,10 +1581,7 @@
 		switch (param_type) {
 		case SCTP_ADD_IP_ADDRESS:
 		case SCTP_DEL_IP_ADDRESS:
-			stcb->asoc.peer_supports_asconf = 0;
-			break;
 		case SCTP_SET_PRIM_ADDR:
-			stcb->asoc.peer_supports_asconf = 0;
 			break;
 		default:
 			break;
@@ -1670,8 +1617,6 @@
 		SCTPDBG(SCTP_DEBUG_ASCONF1,
 		    "process_param_ack: set primary IP address\n");
 		/* nothing to do... peer may start using this addr */
-		if (flag == 0)
-			stcb->asoc.peer_supports_asconf = 0;
 		break;
 	default:
 		/* should NEVER happen */
@@ -1689,11 +1634,11 @@
  * cleanup from a bad asconf ack parameter
  */
 static void
-sctp_asconf_ack_clear(struct sctp_tcb *stcb)
+sctp_asconf_ack_clear(struct sctp_tcb *stcb SCTP_UNUSED)
 {
 	/* assume peer doesn't really know how to do asconfs */
-	stcb->asoc.peer_supports_asconf = 0;
 	/* XXX we could free the pending queue here */
+
 }
 
 void
@@ -1736,8 +1681,14 @@
 	 * abort the asoc, since someone probably just hijacked us...
 	 */
 	if (serial_num == (asoc->asconf_seq_out + 1)) {
+		struct mbuf *op_err;
+		char msg[SCTP_DIAG_INFO_LEN];
+
 		SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf_ack: got unexpected next serial number! Aborting asoc!\n");
-		sctp_abort_an_association(stcb->sctp_ep, stcb, NULL, SCTP_SO_NOT_LOCKED);
+		snprintf(msg, sizeof(msg), "Never sent serial number %8.8x",
+		    serial_num);
+		op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+		sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
 		*abort_no_unlock = 1;
 		return;
 	}
@@ -1750,7 +1701,7 @@
 	if (serial_num == asoc->asconf_seq_out - 1) {
 		/* stop our timer */
 		sctp_timer_stop(SCTP_TIMER_TYPE_ASCONF, stcb->sctp_ep, stcb, net,
-		    SCTP_FROM_SCTP_ASCONF + SCTP_LOC_3);
+		    SCTP_FROM_SCTP_ASCONF + SCTP_LOC_5);
 	}
 	/* process the ASCONF-ACK contents */
 	ack_length = ntohs(cp->ch.chunk_length) -
@@ -1922,7 +1873,6 @@
 {
 	int status;
 
-
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0 ||
 	    sctp_is_feature_off(inp, SCTP_PCB_FLAGS_DO_ASCONF)) {
 		/* subset bound, no ASCONF allowed case, so ignore */
@@ -1933,14 +1883,22 @@
 	 * this is boundall or subset bound w/ASCONF allowed
 	 */
 
-	/* first, make sure it's a good address family */
+	/* first, make sure that the address is IPv4 or IPv6 and not jailed */
 	switch (ifa->address.sa.sa_family) {
 #ifdef INET6
 	case AF_INET6:
+		if (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+		    &ifa->address.sin6.sin6_addr) != 0) {
+			return;
+		}
 		break;
 #endif
 #ifdef INET
 	case AF_INET:
+		if (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+		    &ifa->address.sin.sin_addr) != 0) {
+			return;
+		}
 		break;
 #endif
 	default:
@@ -1971,13 +1929,13 @@
 		{
 			struct sockaddr_in6 *sin6;
 
-			sin6 = (struct sockaddr_in6 *)&ifa->address.sin6;
+			sin6 = &ifa->address.sin6;
 			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 				/* we skip unspecifed addresses */
 				return;
 			}
 			if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
-				if (stcb->asoc.local_scope == 0) {
+				if (stcb->asoc.scope.local_scope == 0) {
 					return;
 				}
 				/* is it the right link local scope? */
@@ -1985,7 +1943,7 @@
 					return;
 				}
 			}
-			if (stcb->asoc.site_scope == 0 &&
+			if (stcb->asoc.scope.site_scope == 0 &&
 			    IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) {
 				return;
 			}
@@ -2004,12 +1962,12 @@
 			    SCTP_IPV6_V6ONLY(inp6))
 				return;
 
-			sin = (struct sockaddr_in *)&ifa->address.sa;
+			sin = &ifa->address.sin;
 			if (sin->sin_addr.s_addr == 0) {
 				/* we skip unspecifed addresses */
 				return;
 			}
-			if (stcb->asoc.ipv4_local_scope == 0 &&
+			if (stcb->asoc.scope.ipv4_local_scope == 0 &&
 			    IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
 				return;
 			}
@@ -2024,7 +1982,7 @@
 	/* queue an asconf for this address add/delete */
 	if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF)) {
 		/* does the peer do asconf? */
-		if (stcb->asoc.peer_supports_asconf) {
+		if (stcb->asoc.asconf_supported) {
 			/* queue an asconf for this addr */
 			status = sctp_asconf_queue_add(stcb, ifa, type);
 
@@ -2034,7 +1992,8 @@
 			 * sent when the state goes open.
 			 */
 			if (status == 0 &&
-			    SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+			    ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
+			    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED))) {
 #ifdef SCTP_TIMER_BASED_ASCONF
 				sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp,
 				    stcb, stcb->asoc.primary_destination);
@@ -2161,13 +2120,17 @@
 					else
 						continue;
 				}
-				sin6 = (struct sockaddr_in6 *)&ifa->address.sin6;
+				sin6 = &ifa->address.sin6;
 				if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 					/* we skip unspecifed addresses */
 					continue;
 				}
+				if (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+				    &sin6->sin6_addr) != 0) {
+					continue;
+				}
 				if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
-					if (stcb->asoc.local_scope == 0) {
+					if (stcb->asoc.scope.local_scope == 0) {
 						continue;
 					}
 					/* is it the right link local scope? */
@@ -2191,12 +2154,16 @@
 				    SCTP_IPV6_V6ONLY(inp6))
 					continue;
 
-				sin = (struct sockaddr_in *)&ifa->address.sa;
+				sin = &ifa->address.sin;
 				if (sin->sin_addr.s_addr == 0) {
 					/* we skip unspecifed addresses */
 					continue;
 				}
-				if (stcb->asoc.ipv4_local_scope == 0 &&
+				if (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+				    &sin->sin_addr) != 0) {
+					continue;
+				}
+				if (stcb->asoc.scope.ipv4_local_scope == 0 &&
 				    IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
 					continue;
 				}
@@ -2259,13 +2226,7 @@
 				}
 			} else {
 				/* Need to check scopes for this guy */
-				if (sctp_is_address_in_scope(ifa,
-				    stcb->asoc.ipv4_addr_legal,
-				    stcb->asoc.ipv6_addr_legal,
-				    stcb->asoc.loopback_scope,
-				    stcb->asoc.ipv4_local_scope,
-				    stcb->asoc.local_scope,
-				    stcb->asoc.site_scope, 0) == 0) {
+				if (sctp_is_address_in_scope(ifa, &stcb->asoc.scope, 0) == 0) {
 					continue;
 				}
 			}
@@ -2272,7 +2233,7 @@
 		}
 		/* queue an asconf for this address add/delete */
 		if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF) &&
-		    stcb->asoc.peer_supports_asconf) {
+		    stcb->asoc.asconf_supported == 1) {
 			/* queue an asconf for this addr */
 			status = sctp_asconf_queue_add(stcb, ifa, type);
 			/*
@@ -2280,7 +2241,8 @@
 			 * count of queued params.  If in the non-open
 			 * state, these get sent when the assoc goes open.
 			 */
-			if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+			if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
+			    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
 				if (status >= 0) {
 					num_queued++;
 				}
@@ -2338,9 +2300,10 @@
 		/* set primary queuing succeeded */
 		SCTPDBG(SCTP_DEBUG_ASCONF1,
 		    "set_primary_ip_address_sa: queued on tcb=%p, ",
-		    stcb);
+		    (void *)stcb);
 		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
-		if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+		if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
+		    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
 #ifdef SCTP_TIMER_BASED_ASCONF
 			sctp_timer_start(SCTP_TIMER_TYPE_ASCONF,
 			    stcb->sctp_ep, stcb,
@@ -2351,7 +2314,7 @@
 		}
 	} else {
 		SCTPDBG(SCTP_DEBUG_ASCONF1, "set_primary_ip_address_sa: failed to add to queue on tcb=%p, ",
-		    stcb);
+		    (void *)stcb);
 		SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
 		return (-1);
 	}
@@ -2374,9 +2337,10 @@
 			    SCTP_SET_PRIM_ADDR)) {
 				/* set primary queuing succeeded */
 				SCTPDBG(SCTP_DEBUG_ASCONF1, "set_primary_ip_address: queued on stcb=%p, ",
-				    stcb);
+				    (void *)stcb);
 				SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, &ifa->address.sa);
-				if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+				if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
+				    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
 #ifdef SCTP_TIMER_BASED_ASCONF
 					sctp_timer_start(SCTP_TIMER_TYPE_ASCONF,
 					    stcb->sctp_ep, stcb,
@@ -2498,7 +2462,7 @@
 		return (NULL);
 	}
 	LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
-		if (stcb->asoc.loopback_scope == 0 &&
+		if (stcb->asoc.scope.loopback_scope == 0 &&
 		    SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
 			/* Skip if loopback_scope not set */
 			continue;
@@ -2507,15 +2471,19 @@
 			switch (sctp_ifa->address.sa.sa_family) {
 #ifdef INET
 			case AF_INET:
-				if (stcb->asoc.ipv4_addr_legal) {
+				if (stcb->asoc.scope.ipv4_addr_legal) {
 					struct sockaddr_in *sin;
 
-					sin = (struct sockaddr_in *)&sctp_ifa->address.sa;
+					sin = &sctp_ifa->address.sin;
 					if (sin->sin_addr.s_addr == 0) {
 						/* skip unspecifed addresses */
 						continue;
 					}
-					if (stcb->asoc.ipv4_local_scope == 0 &&
+					if (prison_check_ip4(stcb->sctp_ep->ip_inp.inp.inp_cred,
+					    &sin->sin_addr) != 0) {
+						continue;
+					}
+					if (stcb->asoc.scope.ipv4_local_scope == 0 &&
 					    IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))
 						continue;
 
@@ -2534,13 +2502,13 @@
 #endif
 #ifdef INET6
 			case AF_INET6:
-				if (stcb->asoc.ipv6_addr_legal) {
+				if (stcb->asoc.scope.ipv6_addr_legal) {
 					struct sockaddr_in6 *sin6;
 
 					if (sctp_ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
 						continue;
 					}
-					sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sa;
+					sin6 = &sctp_ifa->address.sin6;
 					if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 						/*
 						 * we skip unspecifed
@@ -2548,10 +2516,14 @@
 						 */
 						continue;
 					}
-					if (stcb->asoc.local_scope == 0 &&
+					if (prison_check_ip6(stcb->sctp_ep->ip_inp.inp.inp_cred,
+					    &sin6->sin6_addr) != 0) {
+						continue;
+					}
+					if (stcb->asoc.scope.local_scope == 0 &&
 					    IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))
 						continue;
-					if (stcb->asoc.site_scope == 0 &&
+					if (stcb->asoc.scope.site_scope == 0 &&
 					    IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))
 						continue;
 
@@ -2630,7 +2602,7 @@
 	 * it's simpler to fill in the asconf chunk header lookup address on
 	 * the fly
 	 */
-	m_asconf_chk = sctp_get_mbuf_for_msg(sizeof(struct sctp_asconf_chunk), 0, M_DONTWAIT, 1, MT_DATA);
+	m_asconf_chk = sctp_get_mbuf_for_msg(sizeof(struct sctp_asconf_chunk), 0, M_NOWAIT, 1, MT_DATA);
 	if (m_asconf_chk == NULL) {
 		/* no mbuf's */
 		SCTPDBG(SCTP_DEBUG_ASCONF1,
@@ -2637,7 +2609,7 @@
 		    "compose_asconf: couldn't get chunk mbuf!\n");
 		return (NULL);
 	}
-	m_asconf = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+	m_asconf = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
 	if (m_asconf == NULL) {
 		/* no mbuf's */
 		SCTPDBG(SCTP_DEBUG_ASCONF1,
@@ -2666,7 +2638,8 @@
 		/* get the parameter length */
 		p_length = SCTP_SIZE32(aa->ap.aph.ph.param_length);
 		/* will it fit in current chunk? */
-		if (SCTP_BUF_LEN(m_asconf) + p_length > stcb->asoc.smallest_mtu) {
+		if ((SCTP_BUF_LEN(m_asconf) + p_length > stcb->asoc.smallest_mtu) ||
+		    (SCTP_BUF_LEN(m_asconf) + p_length > MCLBYTES)) {
 			/* won't fit, so we're done with this chunk */
 			break;
 		}
@@ -2787,7 +2760,7 @@
 	/* chain it all together */
 	SCTP_BUF_NEXT(m_asconf_chk) = m_asconf;
 	*retlen = SCTP_BUF_LEN(m_asconf_chk) + SCTP_BUF_LEN(m_asconf);
-	acp->ch.chunk_length = ntohs(*retlen);
+	acp->ch.chunk_length = htons(*retlen);
 
 	return (m_asconf_chk);
 }
@@ -2807,19 +2780,16 @@
 	struct sctp_paramhdr tmp_param, *ph;
 	uint16_t plen, ptype;
 	struct sctp_ifa *sctp_ifa;
+	union sctp_sockstore store;
 
 #ifdef INET6
 	struct sctp_ipv6addr_param addr6_store;
-	struct sockaddr_in6 sin6;
 
 #endif
 #ifdef INET
 	struct sctp_ipv4addr_param addr4_store;
-	struct sockaddr_in sin;
 
 #endif
-	struct sockaddr *sa;
-	uint32_t vrf_id;
 
 	SCTPDBG(SCTP_DEBUG_ASCONF2, "processing init-ack addresses\n");
 	if (stcb == NULL)	/* Un-needed check for SA */
@@ -2831,21 +2801,6 @@
 	if ((offset + sizeof(struct sctp_paramhdr)) > length) {
 		return;
 	}
-	/* init the addresses */
-#ifdef INET6
-	bzero(&sin6, sizeof(sin6));
-	sin6.sin6_family = AF_INET6;
-	sin6.sin6_len = sizeof(sin6);
-	sin6.sin6_port = stcb->rport;
-#endif
-
-#ifdef INET
-	bzero(&sin, sizeof(sin));
-	sin.sin_family = AF_INET;
-	sin.sin_len = sizeof(sin);
-	sin.sin_port = stcb->rport;
-#endif
-
 	/* go through the addresses in the init-ack */
 	ph = (struct sctp_paramhdr *)
 	    sctp_m_getptr(m, offset, sizeof(struct sctp_paramhdr),
@@ -2868,9 +2823,11 @@
 				    a6p == NULL) {
 					return;
 				}
-				memcpy(&sin6.sin6_addr, a6p->addr,
-				    sizeof(struct in6_addr));
-				sa = (struct sockaddr *)&sin6;
+				memset(&store, 0, sizeof(union sctp_sockstore));
+				store.sin6.sin6_family = AF_INET6;
+				store.sin6.sin6_len = sizeof(struct sockaddr_in6);
+				store.sin6.sin6_port = stcb->rport;
+				memcpy(&store.sin6.sin6_addr, a6p->addr, sizeof(struct in6_addr));
 				break;
 			}
 #endif
@@ -2887,8 +2844,11 @@
 				    a4p == NULL) {
 					return;
 				}
-				sin.sin_addr.s_addr = a4p->addr;
-				sa = (struct sockaddr *)&sin;
+				memset(&store, 0, sizeof(union sctp_sockstore));
+				store.sin.sin_family = AF_INET;
+				store.sin.sin_len = sizeof(struct sockaddr_in);
+				store.sin.sin_port = stcb->rport;
+				store.sin.sin_addr.s_addr = a4p->addr;
 				break;
 			}
 #endif
@@ -2897,12 +2857,7 @@
 		}
 
 		/* see if this address really (still) exists */
-		if (stcb) {
-			vrf_id = stcb->asoc.vrf_id;
-		} else {
-			vrf_id = SCTP_DEFAULT_VRFID;
-		}
-		sctp_ifa = sctp_find_ifa_by_addr(sa, vrf_id,
+		sctp_ifa = sctp_find_ifa_by_addr(&store.sa, stcb->asoc.vrf_id,
 		    SCTP_ADDR_NOT_LOCKED);
 		if (sctp_ifa == NULL) {
 			/* address doesn't exist anymore */
@@ -2911,9 +2866,9 @@
 			/* are ASCONFs allowed ? */
 			if ((sctp_is_feature_on(stcb->sctp_ep,
 			    SCTP_PCB_FLAGS_DO_ASCONF)) &&
-			    stcb->asoc.peer_supports_asconf) {
+			    stcb->asoc.asconf_supported) {
 				/* queue an ASCONF DEL_IP_ADDRESS */
-				status = sctp_asconf_queue_sa_delete(stcb, sa);
+				status = sctp_asconf_queue_sa_delete(stcb, &store.sa);
 				/*
 				 * if queued ok, and in correct state, send
 				 * out the ASCONF.
@@ -3160,7 +3115,11 @@
 			switch (sctp_ifa->address.sa.sa_family) {
 #ifdef INET
 			case AF_INET:
-				sin = (struct sockaddr_in *)&sctp_ifa->address.sin;
+				sin = &sctp_ifa->address.sin;
+				if (prison_check_ip4(stcb->sctp_ep->ip_inp.inp.inp_cred,
+				    &sin->sin_addr) != 0) {
+					continue;
+				}
 				if ((ipv4_scope == 0) &&
 				    (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) {
 					/* private address not in scope */
@@ -3170,7 +3129,11 @@
 #endif
 #ifdef INET6
 			case AF_INET6:
-				sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sin6;
+				sin6 = &sctp_ifa->address.sin6;
+				if (prison_check_ip6(stcb->sctp_ep->ip_inp.inp.inp_cred,
+				    &sin6->sin6_addr) != 0) {
+					continue;
+				}
 				if ((local_scope == 0) &&
 				    (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))) {
 					continue;
@@ -3286,6 +3249,7 @@
 		} else {
 			struct sctp_asconf_iterator *asc;
 			struct sctp_laddr *wi;
+			int ret;
 
 			SCTP_MALLOC(asc, struct sctp_asconf_iterator *,
 			    sizeof(struct sctp_asconf_iterator),
@@ -3307,7 +3271,7 @@
 			wi->action = type;
 			atomic_add_int(&ifa->refcount, 1);
 			LIST_INSERT_HEAD(&asc->list_of_work, wi, sctp_nxt_addr);
-			(void)sctp_initiate_iterator(sctp_asconf_iterator_ep,
+			ret = sctp_initiate_iterator(sctp_asconf_iterator_ep,
 			    sctp_asconf_iterator_stcb,
 			    sctp_asconf_iterator_ep_end,
 			    SCTP_PCB_ANY_FLAGS,
@@ -3315,6 +3279,12 @@
 			    SCTP_ASOC_ANY_STATE,
 			    (void *)asc, 0,
 			    sctp_asconf_iterator_end, inp, 0);
+			if (ret) {
+				SCTP_PRINTF("Failed to initiate iterator for addr_mgmt_ep_sa\n");
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_ASCONF, EFAULT);
+				sctp_asconf_iterator_end(asc, 0);
+				return (EFAULT);
+			}
 		}
 		return (0);
 	} else {
@@ -3404,6 +3374,11 @@
 		TAILQ_INSERT_TAIL(&stcb->asoc.asconf_queue, aa, next);
 		break;
 #endif
+	default:
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "sctp_asconf_send_nat_state_update: unknown address family\n");
+		SCTP_FREE(aa, SCTP_M_ASC_ADDR);
+		return;
 	}
 	SCTP_MALLOC(aa, struct sctp_asconf_addr *, sizeof(*aa),
 	    SCTP_M_ASC_ADDR);
@@ -3437,6 +3412,11 @@
 		TAILQ_INSERT_TAIL(&stcb->asoc.asconf_queue, aa, next);
 		break;
 #endif
+	default:
+		SCTPDBG(SCTP_DEBUG_ASCONF1,
+		    "sctp_asconf_send_nat_state_update: unknown address family\n");
+		SCTP_FREE(aa, SCTP_M_ASC_ADDR);
+		return;
 	}
 	/* Now we must hunt the addresses and add all global addresses */
 	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
@@ -3456,6 +3436,10 @@
 #ifdef INET
 				case AF_INET:
 					to = &sctp_ifap->address.sin;
+					if (prison_check_ip4(stcb->sctp_ep->ip_inp.inp.inp_cred,
+					    &to->sin_addr) != 0) {
+						continue;
+					}
 					if (IN4_ISPRIVATE_ADDRESS(&to->sin_addr)) {
 						continue;
 					}
@@ -3467,6 +3451,10 @@
 #ifdef INET6
 				case AF_INET6:
 					to6 = &sctp_ifap->address.sin6;
+					if (prison_check_ip6(stcb->sctp_ep->ip_inp.inp.inp_cred,
+					    &to6->sin6_addr) != 0) {
+						continue;
+					}
 					if (IN6_IS_ADDR_LOOPBACK(&to6->sin6_addr)) {
 						continue;
 					}

Modified: trunk/sys/netinet/sctp_asconf.h
===================================================================
--- trunk/sys/netinet/sctp_asconf.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_asconf.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_asconf.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_asconf.h 237715 2012-06-28 16:01:08Z tuexen $");
 
 #ifndef _NETINET_SCTP_ASCONF_H_
 #define _NETINET_SCTP_ASCONF_H_
@@ -47,8 +47,8 @@
 extern struct mbuf *sctp_compose_asconf(struct sctp_tcb *, int *, int);
 
 extern void
-sctp_handle_asconf(struct mbuf *, unsigned int, struct sctp_asconf_chunk *,
-    struct sctp_tcb *, int i);
+sctp_handle_asconf(struct mbuf *, unsigned int, struct sockaddr *,
+    struct sctp_asconf_chunk *, struct sctp_tcb *, int);
 
 extern void
 sctp_handle_asconf_ack(struct mbuf *, int, struct sctp_asconf_ack_chunk *,

Modified: trunk/sys/netinet/sctp_auth.c
===================================================================
--- trunk/sys/netinet/sctp_auth.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_auth.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_auth.c 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_auth.c 294174 2016-01-16 17:56:06Z tuexen $");
 
 #include <netinet/sctp_os.h>
 #include <netinet/sctp.h>
@@ -134,11 +134,6 @@
 	if (list == NULL)
 		return (-1);
 
-	/* is chunk restricted? */
-	if ((chunk == SCTP_ASCONF) ||
-	    (chunk == SCTP_ASCONF_ACK)) {
-		return (-1);
-	}
 	if (list->chunks[chunk] == 1) {
 		list->chunks[chunk] = 0;
 		list->num_chunks--;
@@ -159,16 +154,6 @@
 }
 
 /*
- * set the default list of chunks requiring AUTH
- */
-void
-sctp_auth_set_default_chunks(sctp_auth_chklist_t * list)
-{
-	(void)sctp_auth_add_chunk(SCTP_ASCONF, list);
-	(void)sctp_auth_add_chunk(SCTP_ASCONF_ACK, list);
-}
-
-/*
  * return the current number and list of required chunks caller must
  * guarantee ptr has space for up to 256 bytes
  */
@@ -334,10 +319,6 @@
 {
 	sctp_key_t *new_key;
 
-	/* validate keylen */
-	if (keylen > SCTP_AUTH_RANDOM_SIZE_MAX)
-		keylen = SCTP_AUTH_RANDOM_SIZE_MAX;
-
 	new_key = sctp_alloc_key(keylen);
 	if (new_key == NULL) {
 		/* out of memory */
@@ -375,7 +356,7 @@
 	uint32_t i;
 	uint32_t key1len, key2len;
 	uint8_t *key_1, *key_2;
-	uint8_t temp[SCTP_AUTH_RANDOM_SIZE_MAX];
+	uint8_t val1, val2;
 
 	/* sanity/length check */
 	key1len = sctp_get_keylen(key1);
@@ -387,38 +368,24 @@
 	else if (key2len == 0)
 		return (1);
 
-	if (key1len != key2len) {
-		if (key1len >= key2len)
-			maxlen = key1len;
-		else
-			maxlen = key2len;
-		bzero(temp, maxlen);
-		if (key1len < maxlen) {
-			/* prepend zeroes to key1 */
-			bcopy(key1->key, temp + (maxlen - key1len), key1len);
-			key_1 = temp;
-			key_2 = key2->key;
-		} else {
-			/* prepend zeroes to key2 */
-			bcopy(key2->key, temp + (maxlen - key2len), key2len);
-			key_1 = key1->key;
-			key_2 = temp;
-		}
+	if (key1len < key2len) {
+		maxlen = key2len;
 	} else {
 		maxlen = key1len;
-		key_1 = key1->key;
-		key_2 = key2->key;
 	}
-
+	key_1 = key1->key;
+	key_2 = key2->key;
+	/* check for numeric equality */
 	for (i = 0; i < maxlen; i++) {
-		if (*key_1 > *key_2)
+		/* left-pad with zeros */
+		val1 = (i < (maxlen - key1len)) ? 0 : *(key_1++);
+		val2 = (i < (maxlen - key2len)) ? 0 : *(key_2++);
+		if (val1 > val2) {
 			return (1);
-		else if (*key_1 < *key_2)
+		} else if (val1 < val2) {
 			return (-1);
-		key_1++;
-		key_2++;
+		}
 	}
-
 	/* keys are equal value, so check lengths */
 	if (key1len == key2len)
 		return (0);
@@ -592,7 +559,7 @@
 		atomic_add_int(&skey->refcount, 1);
 		SCTPDBG(SCTP_DEBUG_AUTH2,
 		    "%s: stcb %p key %u refcount acquire to %d\n",
-		    __FUNCTION__, stcb, key_id, skey->refcount);
+		    __func__, (void *)stcb, key_id, skey->refcount);
 	}
 }
 
@@ -610,20 +577,20 @@
 
 	/* decrement the ref count */
 	if (skey) {
-		sctp_free_sharedkey(skey);
 		SCTPDBG(SCTP_DEBUG_AUTH2,
 		    "%s: stcb %p key %u refcount release to %d\n",
-		    __FUNCTION__, stcb, key_id, skey->refcount);
+		    __func__, (void *)stcb, key_id, skey->refcount);
 
 		/* see if a notification should be generated */
-		if ((skey->refcount <= 1) && (skey->deactivated)) {
+		if ((skey->refcount <= 2) && (skey->deactivated)) {
 			/* notify ULP that key is no longer used */
 			sctp_ulp_notify(SCTP_NOTIFY_AUTH_FREE_KEY, stcb,
 			    key_id, 0, so_locked);
 			SCTPDBG(SCTP_DEBUG_AUTH2,
 			    "%s: stcb %p key %u no longer used, %d\n",
-			    __FUNCTION__, stcb, key_id, skey->refcount);
+			    __func__, (void *)stcb, key_id, skey->refcount);
 		}
+		sctp_free_sharedkey(skey);
 	}
 }
 
@@ -665,7 +632,7 @@
 
 
 sctp_hmaclist_t *
-sctp_alloc_hmaclist(uint8_t num_hmacs)
+sctp_alloc_hmaclist(uint16_t num_hmacs)
 {
 	sctp_hmaclist_t *new_list;
 	int alloc_size;
@@ -704,15 +671,7 @@
 		return (-1);
 	}
 	if ((hmac_id != SCTP_AUTH_HMAC_ID_SHA1) &&
-#ifdef HAVE_SHA224
-	    (hmac_id != SCTP_AUTH_HMAC_ID_SHA224) &&
-#endif
-#ifdef HAVE_SHA2
-	    (hmac_id != SCTP_AUTH_HMAC_ID_SHA256) &&
-	    (hmac_id != SCTP_AUTH_HMAC_ID_SHA384) &&
-	    (hmac_id != SCTP_AUTH_HMAC_ID_SHA512) &&
-#endif
-	    1) {
+	    (hmac_id != SCTP_AUTH_HMAC_ID_SHA256)) {
 		return (-1);
 	}
 	/* Now is it already in the list */
@@ -755,8 +714,9 @@
 	new_list = sctp_alloc_hmaclist(2);
 	if (new_list == NULL)
 		return (NULL);
+	/* We prefer SHA256, so list it first */
+	(void)sctp_auth_add_hmacid(new_list, SCTP_AUTH_HMAC_ID_SHA256);
 	(void)sctp_auth_add_hmacid(new_list, SCTP_AUTH_HMAC_ID_SHA1);
-	(void)sctp_auth_add_hmacid(new_list, SCTP_AUTH_HMAC_ID_SHA256);
 	return (new_list);
 }
 
@@ -812,19 +772,13 @@
 sctp_verify_hmac_param(struct sctp_auth_hmac_algo *hmacs, uint32_t num_hmacs)
 {
 	uint32_t i;
-	uint16_t hmac_id;
-	uint32_t sha1_supported = 0;
 
 	for (i = 0; i < num_hmacs; i++) {
-		hmac_id = ntohs(hmacs->hmac_ids[i]);
-		if (hmac_id == SCTP_AUTH_HMAC_ID_SHA1)
-			sha1_supported = 1;
+		if (ntohs(hmacs->hmac_ids[i]) == SCTP_AUTH_HMAC_ID_SHA1) {
+			return (0);
+		}
 	}
-	/* all HMAC id's are supported */
-	if (sha1_supported == 0)
-		return (-1);
-	else
-		return (0);
+	return (-1);
 }
 
 sctp_authinfo_t *
@@ -878,18 +832,8 @@
 	switch (hmac_algo) {
 	case SCTP_AUTH_HMAC_ID_SHA1:
 		return (SCTP_AUTH_DIGEST_LEN_SHA1);
-#ifdef HAVE_SHA224
-	case SCTP_AUTH_HMAC_ID_SHA224:
-		return (SCTP_AUTH_DIGEST_LEN_SHA224);
-#endif
-#ifdef HAVE_SHA2
 	case SCTP_AUTH_HMAC_ID_SHA256:
 		return (SCTP_AUTH_DIGEST_LEN_SHA256);
-	case SCTP_AUTH_HMAC_ID_SHA384:
-		return (SCTP_AUTH_DIGEST_LEN_SHA384);
-	case SCTP_AUTH_HMAC_ID_SHA512:
-		return (SCTP_AUTH_DIGEST_LEN_SHA512);
-#endif
 	default:
 		/* unknown HMAC algorithm: can't do anything */
 		return (0);
@@ -901,17 +845,9 @@
 {
 	switch (hmac_algo) {
 	case SCTP_AUTH_HMAC_ID_SHA1:
-#ifdef HAVE_SHA224
-	case SCTP_AUTH_HMAC_ID_SHA224:
-#endif
 		return (64);
-#ifdef HAVE_SHA2
 	case SCTP_AUTH_HMAC_ID_SHA256:
 		return (64);
-	case SCTP_AUTH_HMAC_ID_SHA384:
-	case SCTP_AUTH_HMAC_ID_SHA512:
-		return (128);
-#endif
 	case SCTP_AUTH_HMAC_ID_RSVD:
 	default:
 		/* unknown HMAC algorithm: can't do anything */
@@ -924,23 +860,11 @@
 {
 	switch (hmac_algo) {
 	case SCTP_AUTH_HMAC_ID_SHA1:
-		SHA1_Init(&ctx->sha1);
+		SCTP_SHA1_INIT(&ctx->sha1);
 		break;
-#ifdef HAVE_SHA224
-	case SCTP_AUTH_HMAC_ID_SHA224:
-		break;
-#endif
-#ifdef HAVE_SHA2
 	case SCTP_AUTH_HMAC_ID_SHA256:
-		SHA256_Init(&ctx->sha256);
+		SCTP_SHA256_INIT(&ctx->sha256);
 		break;
-	case SCTP_AUTH_HMAC_ID_SHA384:
-		SHA384_Init(&ctx->sha384);
-		break;
-	case SCTP_AUTH_HMAC_ID_SHA512:
-		SHA512_Init(&ctx->sha512);
-		break;
-#endif
 	case SCTP_AUTH_HMAC_ID_RSVD:
 	default:
 		/* unknown HMAC algorithm: can't do anything */
@@ -954,23 +878,11 @@
 {
 	switch (hmac_algo) {
 	case SCTP_AUTH_HMAC_ID_SHA1:
-		SHA1_Update(&ctx->sha1, text, textlen);
+		SCTP_SHA1_UPDATE(&ctx->sha1, text, textlen);
 		break;
-#ifdef HAVE_SHA224
-	case SCTP_AUTH_HMAC_ID_SHA224:
-		break;
-#endif
-#ifdef HAVE_SHA2
 	case SCTP_AUTH_HMAC_ID_SHA256:
-		SHA256_Update(&ctx->sha256, text, textlen);
+		SCTP_SHA256_UPDATE(&ctx->sha256, text, textlen);
 		break;
-	case SCTP_AUTH_HMAC_ID_SHA384:
-		SHA384_Update(&ctx->sha384, text, textlen);
-		break;
-	case SCTP_AUTH_HMAC_ID_SHA512:
-		SHA512_Update(&ctx->sha512, text, textlen);
-		break;
-#endif
 	case SCTP_AUTH_HMAC_ID_RSVD:
 	default:
 		/* unknown HMAC algorithm: can't do anything */
@@ -984,24 +896,11 @@
 {
 	switch (hmac_algo) {
 	case SCTP_AUTH_HMAC_ID_SHA1:
-		SHA1_Final(digest, &ctx->sha1);
+		SCTP_SHA1_FINAL(digest, &ctx->sha1);
 		break;
-#ifdef HAVE_SHA224
-	case SCTP_AUTH_HMAC_ID_SHA224:
-		break;
-#endif
-#ifdef HAVE_SHA2
 	case SCTP_AUTH_HMAC_ID_SHA256:
-		SHA256_Final(digest, &ctx->sha256);
+		SCTP_SHA256_FINAL(digest, &ctx->sha256);
 		break;
-	case SCTP_AUTH_HMAC_ID_SHA384:
-		/* SHA384 is truncated SHA512 */
-		SHA384_Final(digest, &ctx->sha384);
-		break;
-	case SCTP_AUTH_HMAC_ID_SHA512:
-		SHA512_Final(digest, &ctx->sha512);
-		break;
-#endif
 	case SCTP_AUTH_HMAC_ID_RSVD:
 	default:
 		/* unknown HMAC algorithm: can't do anything */
@@ -1540,8 +1439,8 @@
 			p_random = (struct sctp_auth_random *)phdr;
 			random_len = plen - sizeof(*p_random);
 		} else if (ptype == SCTP_HMAC_LIST) {
-			int num_hmacs;
-			int i;
+			uint16_t num_hmacs;
+			uint16_t i;
 
 			if (plen > sizeof(hmacs_store))
 				break;
@@ -1753,8 +1652,8 @@
 
 	/* is the indicated HMAC supported? */
 	if (!sctp_auth_is_supported_hmac(stcb->asoc.local_hmacs, hmac_id)) {
-		struct mbuf *m_err;
-		struct sctp_auth_invalid_hmac *err;
+		struct mbuf *op_err;
+		struct sctp_error_auth_invalid_hmac *cause;
 
 		SCTP_STAT_INCR(sctps_recvivalhmacid);
 		SCTPDBG(SCTP_DEBUG_AUTH1,
@@ -1764,20 +1663,19 @@
 		 * report this in an Error Chunk: Unsupported HMAC
 		 * Identifier
 		 */
-		m_err = sctp_get_mbuf_for_msg(sizeof(*err), 0, M_DONTWAIT,
-		    1, MT_HEADER);
-		if (m_err != NULL) {
+		op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_error_auth_invalid_hmac),
+		    0, M_NOWAIT, 1, MT_HEADER);
+		if (op_err != NULL) {
 			/* pre-reserve some space */
-			SCTP_BUF_RESV_UF(m_err, sizeof(struct sctp_chunkhdr));
+			SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
 			/* fill in the error */
-			err = mtod(m_err, struct sctp_auth_invalid_hmac *);
-			bzero(err, sizeof(*err));
-			err->ph.param_type = htons(SCTP_CAUSE_UNSUPPORTED_HMACID);
-			err->ph.param_length = htons(sizeof(*err));
-			err->hmac_id = ntohs(hmac_id);
-			SCTP_BUF_LEN(m_err) = sizeof(*err);
+			cause = mtod(op_err, struct sctp_error_auth_invalid_hmac *);
+			cause->cause.code = htons(SCTP_CAUSE_UNSUPPORTED_HMACID);
+			cause->cause.length = htons(sizeof(struct sctp_error_auth_invalid_hmac));
+			cause->hmac_id = ntohs(hmac_id);
+			SCTP_BUF_LEN(op_err) = sizeof(struct sctp_error_auth_invalid_hmac);
 			/* queue it */
-			sctp_queue_op_err(stcb, m_err);
+			sctp_queue_op_err(stcb, op_err);
 		}
 		return (-1);
 	}
@@ -1870,7 +1768,7 @@
 		return;
 
 	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_authkey_event),
-	    0, M_DONTWAIT, 1, MT_HEADER);
+	    0, M_NOWAIT, 1, MT_HEADER);
 	if (m_notify == NULL)
 		/* no space left */
 		return;
@@ -1877,6 +1775,7 @@
 
 	SCTP_BUF_LEN(m_notify) = 0;
 	auth = mtod(m_notify, struct sctp_authkey_event *);
+	memset(auth, 0, sizeof(struct sctp_authkey_event));
 	auth->auth_type = SCTP_AUTHENTICATION_EVENT;
 	auth->auth_flags = 0;
 	auth->auth_length = sizeof(*auth);
@@ -2035,8 +1934,7 @@
 		    "SCTP: peer sent chunk list w/o AUTH\n");
 		return (-1);
 	}
-	if (!SCTP_BASE_SYSCTL(sctp_asconf_auth_nochk) && peer_supports_asconf &&
-	    !peer_supports_auth) {
+	if (peer_supports_asconf && !peer_supports_auth) {
 		SCTPDBG(SCTP_DEBUG_AUTH1,
 		    "SCTP: peer supports ASCONF but not AUTH\n");
 		return (-1);

Modified: trunk/sys/netinet/sctp_auth.h
===================================================================
--- trunk/sys/netinet/sctp_auth.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_auth.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,33 +32,26 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_auth.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_auth.h 271750 2014-09-18 09:49:49Z tuexen $");
 
 #ifndef _NETINET_SCTP_AUTH_H_
 #define _NETINET_SCTP_AUTH_H_
 
+#include <netinet/sctp_os.h>
 
 /* digest lengths */
 #define SCTP_AUTH_DIGEST_LEN_SHA1	20
-#define SCTP_AUTH_DIGEST_LEN_SHA224	28
 #define SCTP_AUTH_DIGEST_LEN_SHA256	32
-#define SCTP_AUTH_DIGEST_LEN_SHA384	48
-#define SCTP_AUTH_DIGEST_LEN_SHA512	64
-#define SCTP_AUTH_DIGEST_LEN_MAX	64
+#define SCTP_AUTH_DIGEST_LEN_MAX	SCTP_AUTH_DIGEST_LEN_SHA256
 
 /* random sizes */
 #define SCTP_AUTH_RANDOM_SIZE_DEFAULT	32
 #define SCTP_AUTH_RANDOM_SIZE_REQUIRED	32
-#define SCTP_AUTH_RANDOM_SIZE_MAX	256
 
 /* union of all supported HMAC algorithm contexts */
 typedef union sctp_hash_context {
-	SHA1_CTX sha1;
-#ifdef HAVE_SHA2
-	SHA256_CTX sha256;
-	SHA384_CTX sha384;
-	SHA512_CTX sha512;
-#endif
+	SCTP_SHA1_CTX sha1;
+	SCTP_SHA256_CTX sha256;
 }                 sctp_hash_context_t;
 
 typedef struct sctp_key {
@@ -120,7 +113,6 @@
 extern int sctp_auth_add_chunk(uint8_t chunk, sctp_auth_chklist_t * list);
 extern int sctp_auth_delete_chunk(uint8_t chunk, sctp_auth_chklist_t * list);
 extern size_t sctp_auth_get_chklist_size(const sctp_auth_chklist_t * list);
-extern void sctp_auth_set_default_chunks(sctp_auth_chklist_t * list);
 extern int 
 sctp_serialize_auth_chunks(const sctp_auth_chklist_t * list,
     uint8_t * ptr);
@@ -163,7 +155,7 @@
 
 
 /* hmac list handling */
-extern sctp_hmaclist_t *sctp_alloc_hmaclist(uint8_t num_hmacs);
+extern sctp_hmaclist_t *sctp_alloc_hmaclist(uint16_t num_hmacs);
 extern void sctp_free_hmaclist(sctp_hmaclist_t * list);
 extern int sctp_auth_add_hmacid(sctp_hmaclist_t * list, uint16_t hmac_id);
 extern sctp_hmaclist_t *sctp_copy_hmaclist(sctp_hmaclist_t * list);

Modified: trunk/sys/netinet/sctp_bsd_addr.c
===================================================================
--- trunk/sys/netinet/sctp_bsd_addr.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_bsd_addr.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_bsd_addr.c 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_bsd_addr.c 296052 2016-02-25 18:46:06Z tuexen $");
 
 #include <netinet/sctp_os.h>
 #include <netinet/sctp_var.h>
@@ -97,22 +97,15 @@
 void
 sctp_startup_iterator(void)
 {
-	static int called = 0;
-	int ret;
-
-	if (called) {
+	if (sctp_it_ctl.thread_proc) {
 		/* You only get one */
 		return;
 	}
-	/* init the iterator head */
-	called = 1;
-	sctp_it_ctl.iterator_running = 0;
-	sctp_it_ctl.iterator_flags = 0;
-	sctp_it_ctl.cur_it = NULL;
+	/* Initialize global locks here, thus only once. */
 	SCTP_ITERATOR_LOCK_INIT();
 	SCTP_IPI_ITERATOR_WQ_INIT();
 	TAILQ_INIT(&sctp_it_ctl.iteratorhead);
-	ret = kproc_create(sctp_iterator_thread,
+	kproc_create(sctp_iterator_thread,
 	    (void *)NULL,
 	    &sctp_it_ctl.thread_proc,
 	    RFPROC,
@@ -153,12 +146,12 @@
 
 
 static uint32_t
-sctp_is_desired_interface_type(struct ifaddr *ifa)
+sctp_is_desired_interface_type(struct ifnet *ifn)
 {
 	int result;
 
 	/* check the interface type to see if it's one we care about */
-	switch (ifa->ifa_ifp->if_type) {
+	switch (ifn->if_type) {
 	case IFT_ETHER:
 	case IFT_ISO88023:
 	case IFT_ISO88024:
@@ -217,6 +210,10 @@
 
 	IFNET_RLOCK();
 	TAILQ_FOREACH(ifn, &MODULE_GLOBAL(ifnet), if_list) {
+		if (sctp_is_desired_interface_type(ifn) == 0) {
+			/* non desired type */
+			continue;
+		}
 		IF_ADDR_RLOCK(ifn);
 		TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) {
 			if (ifa->ifa_addr == NULL) {
@@ -241,10 +238,6 @@
 			default:
 				continue;
 			}
-			if (sctp_is_desired_interface_type(ifa) == 0) {
-				/* non desired type */
-				continue;
-			}
 			switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 			case AF_INET:
@@ -301,6 +294,9 @@
 {
 	uint32_t ifa_flags = 0;
 
+	if (SCTP_BASE_VAR(sctp_pcb_initialized) == 0) {
+		return;
+	}
 	/*
 	 * BSD only has one VRF, if this changes we will need to hook in the
 	 * right things here to get the id to pass to the address managment
@@ -318,6 +314,10 @@
 	if (ifa->ifa_addr == NULL) {
 		return;
 	}
+	if (sctp_is_desired_interface_type(ifa->ifa_ifp) == 0) {
+		/* non desired type */
+		return;
+	}
 	switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 	case AF_INET:
@@ -339,22 +339,16 @@
 		/* non inet/inet6 skip */
 		return;
 	}
-
-	if (sctp_is_desired_interface_type(ifa) == 0) {
-		/* non desired type */
-		return;
-	}
 	if (cmd == RTM_ADD) {
 		(void)sctp_add_addr_to_vrf(SCTP_DEFAULT_VRFID, (void *)ifa->ifa_ifp,
-		    ifa->ifa_ifp->if_index, ifa->ifa_ifp->if_type,
-		    ifa->ifa_ifp->if_xname,
+		    ifa->ifa_ifp->if_index, ifa->ifa_ifp->if_type, ifa->ifa_ifp->if_xname,
 		    (void *)ifa, ifa->ifa_addr, ifa_flags, 1);
 	} else {
 
 		sctp_del_addr_from_vrf(SCTP_DEFAULT_VRFID, ifa->ifa_addr,
 		    ifa->ifa_ifp->if_index,
-		    ifa->ifa_ifp->if_xname
-		    );
+		    ifa->ifa_ifp->if_xname);
+
 		/*
 		 * We don't bump refcount here so when it completes the
 		 * final delete will happen.
@@ -412,9 +406,7 @@
 	}
 #ifdef SCTP_MBUF_LOGGING
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
-		if (SCTP_BUF_IS_EXTENDED(m)) {
-			sctp_log_mb(m, SCTP_MBUF_IALLOC);
-		}
+		sctp_log_mb(m, SCTP_MBUF_IALLOC);
 	}
 #endif
 	return (m);
@@ -423,11 +415,12 @@
 
 #ifdef SCTP_PACKET_LOGGING
 void
-sctp_packet_log(struct mbuf *m, int length)
+sctp_packet_log(struct mbuf *m)
 {
 	int *lenat, thisone;
 	void *copyto;
 	uint32_t *tick_tock;
+	int length;
 	int total_len;
 	int grabbed_lock = 0;
 	int value, newval, thisend, thisbegin;
@@ -437,6 +430,7 @@
 	 * (value) -ticks of log      (ticks) o -ip packet o -as logged -
 	 * where this started (thisbegin) x <--end points here
 	 */
+	length = SCTP_HEADER_LEN(m);
 	total_len = SCTP_SIZE32((length + (4 * sizeof(int))));
 	/* Log a packet to the buffer. */
 	if (total_len > SCTP_PACKET_LOG_SIZE) {

Modified: trunk/sys/netinet/sctp_bsd_addr.h
===================================================================
--- trunk/sys/netinet/sctp_bsd_addr.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_bsd_addr.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_bsd_addr.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_bsd_addr.h 237540 2012-06-24 21:25:54Z tuexen $");
 
 #ifndef _NETINET_SCTP_BSD_ADDR_H_
 #define _NETINET_SCTP_BSD_ADDR_H_
@@ -54,7 +54,7 @@
 
 #ifdef  SCTP_PACKET_LOGGING
 
-void sctp_packet_log(struct mbuf *m, int length);
+void sctp_packet_log(struct mbuf *m);
 int sctp_copy_out_packet_log(uint8_t * target, int length);
 
 #endif

Modified: trunk/sys/netinet/sctp_cc_functions.c
===================================================================
--- trunk/sys/netinet/sctp_cc_functions.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_cc_functions.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_cc_functions.c 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_cc_functions.c 302237 2016-06-27 22:10:07Z bdrewery $");
 
 #include <netinet/sctp_os.h>
 #include <netinet/sctp_var.h>
@@ -54,6 +54,19 @@
 #define SHIFT_MPTCP_MULTI 8
 
 static void
+sctp_enforce_cwnd_limit(struct sctp_association *assoc, struct sctp_nets *net)
+{
+	if ((assoc->max_cwnd > 0) &&
+	    (net->cwnd > assoc->max_cwnd) &&
+	    (net->cwnd > (net->mtu - sizeof(struct sctphdr)))) {
+		net->cwnd = assoc->max_cwnd;
+		if (net->cwnd < (net->mtu - sizeof(struct sctphdr))) {
+			net->cwnd = net->mtu - sizeof(struct sctphdr);
+		}
+	}
+}
+
+static void
 sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	struct sctp_association *assoc;
@@ -81,8 +94,9 @@
 			net->cwnd = net->mtu - sizeof(struct sctphdr);
 		}
 	}
+	sctp_enforce_cwnd_limit(assoc, net);
 	net->ssthresh = assoc->peers_rwnd;
-	SDT_PROBE(sctp, cwnd, net, init,
+	SDT_PROBE5(sctp, cwnd, net, init,
 	    stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net,
 	    0, net->cwnd);
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) &
@@ -179,7 +193,8 @@
 					}
 				}
 				net->cwnd = net->ssthresh;
-				SDT_PROBE(sctp, cwnd, net, fr,
+				sctp_enforce_cwnd_limit(asoc, net);
+				SDT_PROBE5(sctp, cwnd, net, fr,
 				    stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net,
 				    old_cwnd, net->cwnd);
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
@@ -212,7 +227,8 @@
 				}
 
 				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
-				    stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
+				    stcb->sctp_ep, stcb, net,
+				    SCTP_FROM_SCTP_CC_FUNCTIONS + SCTP_LOC_1);
 				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
 				    stcb->sctp_ep, stcb, net);
 			}
@@ -246,7 +262,7 @@
 		 */
 		/* Probe point 5 */
 		probepoint |= ((5 << 16) | 1);
-		SDT_PROBE(sctp, cwnd, net, rttvar,
+		SDT_PROBE5(sctp, cwnd, net, rttvar,
 		    vtag,
 		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
 		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -267,7 +283,7 @@
 				oth |= net->cc_mod.rtcc.step_cnt;
 				oth <<= 16;
 				oth |= net->cc_mod.rtcc.last_step_state;
-				SDT_PROBE(sctp, cwnd, net, rttstep,
+				SDT_PROBE5(sctp, cwnd, net, rttstep,
 				    vtag,
 				    ((net->cc_mod.rtcc.lbw << 32) | nbw),
 				    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -291,7 +307,7 @@
 		 */
 		/* Probe point 6 */
 		probepoint |= ((6 << 16) | 0);
-		SDT_PROBE(sctp, cwnd, net, rttvar,
+		SDT_PROBE5(sctp, cwnd, net, rttvar,
 		    vtag,
 		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
 		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -303,7 +319,7 @@
 			oth |= net->cc_mod.rtcc.step_cnt;
 			oth <<= 16;
 			oth |= net->cc_mod.rtcc.last_step_state;
-			SDT_PROBE(sctp, cwnd, net, rttstep,
+			SDT_PROBE5(sctp, cwnd, net, rttstep,
 			    vtag,
 			    ((net->cc_mod.rtcc.lbw << 32) | nbw),
 			    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -334,7 +350,7 @@
 	 */
 	/* Probe point 7 */
 	probepoint |= ((7 << 16) | net->cc_mod.rtcc.ret_from_eq);
-	SDT_PROBE(sctp, cwnd, net, rttvar,
+	SDT_PROBE5(sctp, cwnd, net, rttvar,
 	    vtag,
 	    ((net->cc_mod.rtcc.lbw << 32) | nbw),
 	    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -383,7 +399,7 @@
 			/* We caused it maybe.. back off? */
 			/* PROBE POINT 1 */
 			probepoint |= ((1 << 16) | 1);
-			SDT_PROBE(sctp, cwnd, net, rttvar,
+			SDT_PROBE5(sctp, cwnd, net, rttvar,
 			    vtag,
 			    ((net->cc_mod.rtcc.lbw << 32) | nbw),
 			    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -401,7 +417,7 @@
 		}
 		/* Probe point 2 */
 		probepoint |= ((2 << 16) | 0);
-		SDT_PROBE(sctp, cwnd, net, rttvar,
+		SDT_PROBE5(sctp, cwnd, net, rttvar,
 		    vtag,
 		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
 		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -414,7 +430,7 @@
 			oth |= net->cc_mod.rtcc.step_cnt;
 			oth <<= 16;
 			oth |= net->cc_mod.rtcc.last_step_state;
-			SDT_PROBE(sctp, cwnd, net, rttstep,
+			SDT_PROBE5(sctp, cwnd, net, rttstep,
 			    vtag,
 			    ((net->cc_mod.rtcc.lbw << 32) | nbw),
 			    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -427,6 +443,7 @@
 			if ((net->cc_mod.rtcc.vol_reduce) &&
 			    (inst_ind != SCTP_INST_GAINING)) {
 				net->cwnd += net->mtu;
+				sctp_enforce_cwnd_limit(&stcb->asoc, net);
 				net->cc_mod.rtcc.vol_reduce--;
 			}
 			net->cc_mod.rtcc.last_step_state = 2;
@@ -437,7 +454,7 @@
 		/* bw & rtt decreased */
 		/* Probe point 3 */
 		probepoint |= ((3 << 16) | 0);
-		SDT_PROBE(sctp, cwnd, net, rttvar,
+		SDT_PROBE5(sctp, cwnd, net, rttvar,
 		    vtag,
 		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
 		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -449,7 +466,7 @@
 			oth |= net->cc_mod.rtcc.step_cnt;
 			oth <<= 16;
 			oth |= net->cc_mod.rtcc.last_step_state;
-			SDT_PROBE(sctp, cwnd, net, rttstep,
+			SDT_PROBE5(sctp, cwnd, net, rttstep,
 			    vtag,
 			    ((net->cc_mod.rtcc.lbw << 32) | nbw),
 			    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -458,6 +475,7 @@
 			if ((net->cc_mod.rtcc.vol_reduce) &&
 			    (inst_ind != SCTP_INST_GAINING)) {
 				net->cwnd += net->mtu;
+				sctp_enforce_cwnd_limit(&stcb->asoc, net);
 				net->cc_mod.rtcc.vol_reduce--;
 			}
 			net->cc_mod.rtcc.last_step_state = 3;
@@ -468,7 +486,7 @@
 	/* The bw decreased but rtt stayed the same */
 	/* Probe point 4 */
 	probepoint |= ((4 << 16) | 0);
-	SDT_PROBE(sctp, cwnd, net, rttvar,
+	SDT_PROBE5(sctp, cwnd, net, rttvar,
 	    vtag,
 	    ((net->cc_mod.rtcc.lbw << 32) | nbw),
 	    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -480,7 +498,7 @@
 		oth |= net->cc_mod.rtcc.step_cnt;
 		oth <<= 16;
 		oth |= net->cc_mod.rtcc.last_step_state;
-		SDT_PROBE(sctp, cwnd, net, rttstep,
+		SDT_PROBE5(sctp, cwnd, net, rttstep,
 		    vtag,
 		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
 		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -489,6 +507,7 @@
 		if ((net->cc_mod.rtcc.vol_reduce) &&
 		    (inst_ind != SCTP_INST_GAINING)) {
 			net->cwnd += net->mtu;
+			sctp_enforce_cwnd_limit(&stcb->asoc, net);
 			net->cc_mod.rtcc.vol_reduce--;
 		}
 		net->cc_mod.rtcc.last_step_state = 4;
@@ -517,7 +536,7 @@
 	 */
 	/* PROBE POINT 0 */
 	probepoint = (((uint64_t) net->cwnd) << 32);
-	SDT_PROBE(sctp, cwnd, net, rttvar,
+	SDT_PROBE5(sctp, cwnd, net, rttvar,
 	    vtag,
 	    ((net->cc_mod.rtcc.lbw << 32) | nbw),
 	    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -529,7 +548,7 @@
 		oth |= net->cc_mod.rtcc.step_cnt;
 		oth <<= 16;
 		oth |= net->cc_mod.rtcc.last_step_state;
-		SDT_PROBE(sctp, cwnd, net, rttstep,
+		SDT_PROBE5(sctp, cwnd, net, rttstep,
 		    vtag,
 		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
 		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -629,7 +648,7 @@
 			/* Can't determine do not change */
 			probepoint |= ((0xd << 16) | inst_ind);
 		}
-		SDT_PROBE(sctp, cwnd, net, rttvar,
+		SDT_PROBE5(sctp, cwnd, net, rttvar,
 		    vtag,
 		    ((nbw << 32) | inst_bw),
 		    ((net->cc_mod.rtcc.lbw_rtt << 32) | rtt),
@@ -789,7 +808,7 @@
 				    (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) |
 				    (stcb->rport);
 
-				SDT_PROBE(sctp, cwnd, net, rttvar,
+				SDT_PROBE5(sctp, cwnd, net, rttvar,
 				    vtag,
 				    nbw,
 				    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -883,11 +902,12 @@
 						break;
 					}
 					net->cwnd += incr;
+					sctp_enforce_cwnd_limit(asoc, net);
 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 						sctp_log_cwnd(stcb, net, incr,
 						    SCTP_CWND_LOG_FROM_SS);
 					}
-					SDT_PROBE(sctp, cwnd, net, ack,
+					SDT_PROBE5(sctp, cwnd, net, ack,
 					    stcb->asoc.my_vtag,
 					    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
 					    net,
@@ -949,7 +969,8 @@
 						break;
 					}
 					net->cwnd += incr;
-					SDT_PROBE(sctp, cwnd, net, ack,
+					sctp_enforce_cwnd_limit(asoc, net);
+					SDT_PROBE5(sctp, cwnd, net, ack,
 					    stcb->asoc.my_vtag,
 					    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
 					    net,
@@ -981,11 +1002,11 @@
 
 	old_cwnd = net->cwnd;
 	net->cwnd = net->mtu;
-	SDT_PROBE(sctp, cwnd, net, ack,
+	SDT_PROBE5(sctp, cwnd, net, ack,
 	    stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net,
 	    old_cwnd, net->cwnd);
 	SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n",
-	    net, net->cwnd);
+	    (void *)net, net->cwnd);
 }
 
 
@@ -1052,7 +1073,7 @@
 	}
 	net->cwnd = net->mtu;
 	net->partial_bytes_acked = 0;
-	SDT_PROBE(sctp, cwnd, net, to,
+	SDT_PROBE5(sctp, cwnd, net, to,
 	    stcb->asoc.my_vtag,
 	    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
 	    net,
@@ -1112,7 +1133,7 @@
 				net->RTO <<= 1;
 			}
 			net->cwnd = net->ssthresh;
-			SDT_PROBE(sctp, cwnd, net, ecn,
+			SDT_PROBE5(sctp, cwnd, net, ecn,
 			    stcb->asoc.my_vtag,
 			    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
 			    net,
@@ -1131,12 +1152,9 @@
     uint32_t * bottle_bw, uint32_t * on_queue)
 {
 	uint32_t bw_avail;
-	int rtt;
 	unsigned int incr;
 	int old_cwnd = net->cwnd;
 
-	/* need real RTT in msd for this calc */
-	rtt = net->rtt / 1000;
 	/* get bottle neck bw */
 	*bottle_bw = ntohl(cp->bottle_bw);
 	/* and whats on queue */
@@ -1145,10 +1163,11 @@
 	 * adjust the on-queue if our flight is more it could be that the
 	 * router has not yet gotten data "in-flight" to it
 	 */
-	if (*on_queue < net->flight_size)
+	if (*on_queue < net->flight_size) {
 		*on_queue = net->flight_size;
-	/* calculate the available space */
-	bw_avail = (*bottle_bw * rtt) / 1000;
+	}
+	/* rtt is measured in micro seconds, bottle_bw in bytes per second */
+	bw_avail = (uint32_t) (((uint64_t) (*bottle_bw) * net->rtt) / (uint64_t) 1000000);
 	if (bw_avail > *bottle_bw) {
 		/*
 		 * Cap the growth to no more than the bottle neck. This can
@@ -1168,7 +1187,6 @@
 		int seg_inflight, seg_onqueue, my_portion;
 
 		net->partial_bytes_acked = 0;
-
 		/* how much are we over queue size? */
 		incr = *on_queue - bw_avail;
 		if (stcb->asoc.seen_a_sack_this_pkt) {
@@ -1231,9 +1249,10 @@
 		/* We always have 1 MTU */
 		net->cwnd = net->mtu;
 	}
+	sctp_enforce_cwnd_limit(&stcb->asoc, net);
 	if (net->cwnd - old_cwnd != 0) {
 		/* log only changes */
-		SDT_PROBE(sctp, cwnd, net, pd,
+		SDT_PROBE5(sctp, cwnd, net, pd,
 		    stcb->asoc.my_vtag,
 		    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
 		    net,
@@ -1255,7 +1274,8 @@
 		net->ssthresh = net->cwnd;
 	if (burst_limit) {
 		net->cwnd = (net->flight_size + (burst_limit * net->mtu));
-		SDT_PROBE(sctp, cwnd, net, bl,
+		sctp_enforce_cwnd_limit(&stcb->asoc, net);
+		SDT_PROBE5(sctp, cwnd, net, bl,
 		    stcb->asoc.my_vtag,
 		    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
 		    net,
@@ -1331,7 +1351,7 @@
 		probepoint = (((uint64_t) net->cwnd) << 32);
 		/* Probe point 8 */
 		probepoint |= ((8 << 16) | 0);
-		SDT_PROBE(sctp, cwnd, net, rttvar,
+		SDT_PROBE5(sctp, cwnd, net, rttvar,
 		    vtag,
 		    ((net->cc_mod.rtcc.lbw << 32) | 0),
 		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -1394,7 +1414,7 @@
 	vtag = (net->rtt << 32) |
 	    (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) |
 	    (stcb->rport);
-	SDT_PROBE(sctp, cwnd, net, rttvar,
+	SDT_PROBE5(sctp, cwnd, net, rttvar,
 	    vtag,
 	    0,
 	    0,
@@ -1593,6 +1613,7 @@
 sctp_hs_cwnd_increase(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	int cur_val, i, indx, incr;
+	int old_cwnd = net->cwnd;
 
 	cur_val = net->cwnd >> 10;
 	indx = SCTP_HS_TABLE_SIZE - 1;
@@ -1601,14 +1622,8 @@
 		/* normal mode */
 		if (net->net_ack > net->mtu) {
 			net->cwnd += net->mtu;
-			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
-				sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_FROM_SS);
-			}
 		} else {
 			net->cwnd += net->net_ack;
-			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
-				sctp_log_cwnd(stcb, net, net->net_ack, SCTP_CWND_LOG_FROM_SS);
-			}
 		}
 	} else {
 		for (i = net->last_hs_used; i < SCTP_HS_TABLE_SIZE; i++) {
@@ -1620,10 +1635,11 @@
 		net->last_hs_used = indx;
 		incr = ((sctp_cwnd_adjust[indx].increase) << 10);
 		net->cwnd += incr;
-		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
-			sctp_log_cwnd(stcb, net, incr, SCTP_CWND_LOG_FROM_SS);
-		}
 	}
+	sctp_enforce_cwnd_limit(&stcb->asoc, net);
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
+		sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SS);
+	}
 }
 
 static void
@@ -1661,6 +1677,7 @@
 			net->last_hs_used = indx;
 		}
 	}
+	sctp_enforce_cwnd_limit(&stcb->asoc, net);
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 		sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_FR);
 	}
@@ -1717,7 +1734,8 @@
 				}
 
 				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
-				    stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
+				    stcb->sctp_ep, stcb, net,
+				    SCTP_FROM_SCTP_CC_FUNCTIONS + SCTP_LOC_2);
 				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
 				    stcb->sctp_ep, stcb, net);
 			}
@@ -1792,9 +1810,7 @@
 			if (net->cwnd <= net->ssthresh) {
 				/* We are in slow start */
 				if (net->flight_size + net->net_ack >= net->cwnd) {
-
 					sctp_hs_cwnd_increase(stcb, net);
-
 				} else {
 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 						sctp_log_cwnd(stcb, net, net->net_ack,
@@ -1808,6 +1824,7 @@
 				    (net->partial_bytes_acked >= net->cwnd)) {
 					net->partial_bytes_acked -= net->cwnd;
 					net->cwnd += net->mtu;
+					sctp_enforce_cwnd_limit(asoc, net);
 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 						sctp_log_cwnd(stcb, net, net->mtu,
 						    SCTP_CWND_LOG_FROM_CA);
@@ -1918,10 +1935,9 @@
 		return;
 	}
 	net->cc_mod.htcp_ca.bytecount += net->net_ack;
-
-	if (net->cc_mod.htcp_ca.bytecount >= net->cwnd - ((net->cc_mod.htcp_ca.alpha >> 7 ? : 1) * net->mtu)
-	    && now - net->cc_mod.htcp_ca.lasttime >= net->cc_mod.htcp_ca.minRTT
-	    && net->cc_mod.htcp_ca.minRTT > 0) {
+	if ((net->cc_mod.htcp_ca.bytecount >= net->cwnd - (((net->cc_mod.htcp_ca.alpha >> 7) ? (net->cc_mod.htcp_ca.alpha >> 7) : 1) * net->mtu)) &&
+	    (now - net->cc_mod.htcp_ca.lasttime >= net->cc_mod.htcp_ca.minRTT) &&
+	    (net->cc_mod.htcp_ca.minRTT > 0)) {
 		uint32_t cur_Bi = net->cc_mod.htcp_ca.bytecount / net->mtu * hz / (now - net->cc_mod.htcp_ca.lasttime);
 
 		if (htcp_ccount(&net->cc_mod.htcp_ca) <= 3) {
@@ -2047,6 +2063,7 @@
 					    SCTP_CWND_LOG_FROM_SS);
 				}
 			}
+			sctp_enforce_cwnd_limit(&stcb->asoc, net);
 		} else {
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 				sctp_log_cwnd(stcb, net, net->net_ack,
@@ -2068,6 +2085,7 @@
 			 */
 			net->cwnd += net->mtu;
 			net->partial_bytes_acked = 0;
+			sctp_enforce_cwnd_limit(&stcb->asoc, net);
 			htcp_alpha_update(&net->cc_mod.htcp_ca);
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 				sctp_log_cwnd(stcb, net, net->mtu,
@@ -2114,6 +2132,7 @@
 	 */
 	net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND));
 	net->ssthresh = stcb->asoc.peers_rwnd;
+	sctp_enforce_cwnd_limit(&stcb->asoc, net);
 	htcp_init(net);
 
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
@@ -2217,6 +2236,7 @@
 				htcp_reset(&net->cc_mod.htcp_ca);
 				net->ssthresh = htcp_recalc_ssthresh(net);
 				net->cwnd = net->ssthresh;
+				sctp_enforce_cwnd_limit(asoc, net);
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 					sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
 					    SCTP_CWND_LOG_FROM_FR);
@@ -2247,7 +2267,8 @@
 				}
 
 				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
-				    stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
+				    stcb->sctp_ep, stcb, net,
+				    SCTP_FROM_SCTP_CC_FUNCTIONS + SCTP_LOC_3);
 				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
 				    stcb->sctp_ep, stcb, net);
 			}
@@ -2296,6 +2317,7 @@
 			net->RTO <<= 1;
 		}
 		net->cwnd = net->ssthresh;
+		sctp_enforce_cwnd_limit(&stcb->asoc, net);
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 			sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
 		}

Modified: trunk/sys/netinet/sctp_constants.h
===================================================================
--- trunk/sys/netinet/sctp_constants.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_constants.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,21 +32,15 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_constants.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_constants.h 296052 2016-02-25 18:46:06Z tuexen $");
 
 #ifndef _NETINET_SCTP_CONSTANTS_H_
 #define _NETINET_SCTP_CONSTANTS_H_
 
+
 /* IANA assigned port number for SCTP over UDP encapsulation */
-/* For freebsd we cannot bind the port at
- * startup. Otherwise what will happen is
- * we really won't be bound. The user must
- * put it into the sysctl... or we need
- * to build a special timer for this to allow
- * us to wait 1 second or so after the system
- * comes up.
- */
-#define SCTP_OVER_UDP_TUNNELING_PORT 0
+#define SCTP_OVER_UDP_TUNNELING_PORT 9899
+
 /* Number of packets to get before sack sent by default */
 #define SCTP_DEFAULT_SACK_FREQ 2
 
@@ -73,6 +67,8 @@
  */
 #define SCTP_LARGEST_INIT_ACCEPTED (65535 - 2048)
 
+/* Largest length of a chunk */
+#define SCTP_MAX_CHUNK_LENGTH 0xffff
 /* Number of addresses where we just skip the counting */
 #define SCTP_COUNT_LIMIT 40
 
@@ -274,18 +270,9 @@
 /* how many addresses per assoc remote and local */
 #define SCTP_SCALE_FOR_ADDR	2
 
-/* default AUTO_ASCONF mode enable(1)/disable(0) value (sysctl) */
-#define SCTP_DEFAULT_AUTO_ASCONF	1
-
 /* default MULTIPLE_ASCONF mode enable(1)/disable(0) value (sysctl) */
 #define SCTP_DEFAULT_MULTIPLE_ASCONFS	0
 
-/* default MOBILITY_BASE mode enable(1)/disable(0) value (sysctl) */
-#define SCTP_DEFAULT_MOBILITY_BASE	0
-
-/* default MOBILITY_FASTHANDOFF mode enable(1)/disable(0) value (sysctl) */
-#define SCTP_DEFAULT_MOBILITY_FASTHANDOFF	0
-
 /*
  * Theshold for rwnd updates, we have to read (sb_hiwat >>
  * SCTP_RWND_HIWAT_SHIFT) before we will look to see if we need to send a
@@ -371,6 +358,7 @@
 #define SCTP_DATAGRAM_ACKED		10010
 #define SCTP_DATAGRAM_MARKED		20010
 #define SCTP_FORWARD_TSN_SKIP		30010
+#define SCTP_DATAGRAM_NR_ACKED		40010
 
 /* chunk output send from locations */
 #define SCTP_OUTPUT_FROM_USR_SEND       0
@@ -473,7 +461,7 @@
 
 
 /*
- * SCTP states for internal state machine XXX (should match "user" values)
+ * SCTP states for internal state machine
  */
 #define SCTP_STATE_EMPTY		0x0000
 #define SCTP_STATE_INUSE		0x0001
@@ -521,9 +509,6 @@
 /* How long a cookie lives in milli-seconds */
 #define SCTP_DEFAULT_COOKIE_LIFE	60000
 
-/* resource limit of streams */
-#define MAX_SCTP_STREAMS	2048
-
 /* Maximum the mapping array will  grow to (TSN mapping array) */
 #define SCTP_MAPPING_ARRAY	512
 
@@ -630,10 +615,6 @@
 /* 30 seconds + RTO (in ms) */
 #define SCTP_HB_DEFAULT_MSEC	30000
 
-/* Max time I will wait for Shutdown to complete */
-#define SCTP_DEF_MAX_SHUTDOWN_SEC 180
-
-
 /*
  * This is how long a secret lives, NOT how long a cookie lives how many
  * ticks the current secret will live.
@@ -658,6 +639,7 @@
 
 /* How many streams I request initally by default */
 #define SCTP_OSTREAM_INITIAL 10
+#define SCTP_ISTREAM_INITIAL 2048
 
 /*
  * How many smallest_mtu's need to increase before a window update sack is
@@ -728,7 +710,6 @@
 /* small chunk store for looking at chunk_list in auth */
 #define SCTP_SMALL_CHUNK_STORE 260
 
-#define SCTP_DEFAULT_MINSEGMENT 512	/* MTU size ... if no mtu disc */
 #define SCTP_HOW_MANY_SECRETS	2	/* how many secrets I keep */
 
 #define SCTP_NUMBER_OF_SECRETS	8	/* or 8 * 4 = 32 octets */
@@ -774,6 +755,9 @@
  */
 #define SCTP_DEFAULT_SPLIT_POINT_MIN 2904
 
+/* Maximum length of diagnostic information in error causes */
+#define SCTP_DIAG_INFO_LEN 64
+
 /* ABORT CODES and other tell-tale location
  * codes are generated by adding the below
  * to the instance id.
@@ -780,18 +764,19 @@
  */
 
 /* File defines */
-#define SCTP_FROM_SCTP_INPUT   0x10000000
-#define SCTP_FROM_SCTP_PCB     0x20000000
-#define SCTP_FROM_SCTP_INDATA  0x30000000
-#define SCTP_FROM_SCTP_TIMER   0x40000000
-#define SCTP_FROM_SCTP_USRREQ  0x50000000
-#define SCTP_FROM_SCTPUTIL     0x60000000
-#define SCTP_FROM_SCTP6_USRREQ 0x70000000
-#define SCTP_FROM_SCTP_ASCONF  0x80000000
-#define SCTP_FROM_SCTP_OUTPUT  0x90000000
-#define SCTP_FROM_SCTP_PEELOFF 0xa0000000
-#define SCTP_FROM_SCTP_PANDA   0xb0000000
-#define SCTP_FROM_SCTP_SYSCTL  0xc0000000
+#define SCTP_FROM_SCTP_INPUT        0x10000000
+#define SCTP_FROM_SCTP_PCB          0x20000000
+#define SCTP_FROM_SCTP_INDATA       0x30000000
+#define SCTP_FROM_SCTP_TIMER        0x40000000
+#define SCTP_FROM_SCTP_USRREQ       0x50000000
+#define SCTP_FROM_SCTPUTIL          0x60000000
+#define SCTP_FROM_SCTP6_USRREQ      0x70000000
+#define SCTP_FROM_SCTP_ASCONF       0x80000000
+#define SCTP_FROM_SCTP_OUTPUT       0x90000000
+#define SCTP_FROM_SCTP_PEELOFF      0xa0000000
+#define SCTP_FROM_SCTP_PANDA        0xb0000000
+#define SCTP_FROM_SCTP_SYSCTL       0xc0000000
+#define SCTP_FROM_SCTP_CC_FUNCTIONS 0xd0000000
 
 /* Location ID's */
 #define SCTP_LOC_1  0x00000001
@@ -827,6 +812,8 @@
 #define SCTP_LOC_31 0x0000001f
 #define SCTP_LOC_32 0x00000020
 #define SCTP_LOC_33 0x00000021
+#define SCTP_LOC_34 0x00000022
+#define SCTP_LOC_35 0x00000023
 
 
 /* Free assoc codes */
@@ -992,12 +979,13 @@
      (((uint8_t *)&(a)->s_addr)[1] == 168)))
 
 #define IN4_ISLOOPBACK_ADDRESS(a) \
-    ((((uint8_t *)&(a)->s_addr)[0] == 127) && \
-     (((uint8_t *)&(a)->s_addr)[1] == 0) && \
-     (((uint8_t *)&(a)->s_addr)[2] == 0) && \
-     (((uint8_t *)&(a)->s_addr)[3] == 1))
+    (((uint8_t *)&(a)->s_addr)[0] == 127)
 
+#define IN4_ISLINKLOCAL_ADDRESS(a) \
+    ((((uint8_t *)&(a)->s_addr)[0] == 169) && \
+     (((uint8_t *)&(a)->s_addr)[1] == 254))
 
+
 #if defined(_KERNEL)
 #define SCTP_GETTIME_TIMEVAL(x) (getmicrouptime(x))
 #define SCTP_GETPTIME_TIMEVAL(x) (microuptime(x))

Modified: trunk/sys/netinet/sctp_crc32.c
===================================================================
--- trunk/sys/netinet/sctp_crc32.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_crc32.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_crc32.c 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_crc32.c 235828 2012-05-23 11:26:28Z tuexen $");
 
 #include <netinet/sctp_os.h>
 #include <netinet/sctp.h>

Modified: trunk/sys/netinet/sctp_crc32.h
===================================================================
--- trunk/sys/netinet/sctp_crc32.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_crc32.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_crc32.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_crc32.h 235828 2012-05-23 11:26:28Z tuexen $");
 
 #ifndef _NETINET_SCTP_CRC32_H_
 #define _NETINET_SCTP_CRC32_H_

Modified: trunk/sys/netinet/sctp_dtrace_declare.h
===================================================================
--- trunk/sys/netinet/sctp_dtrace_declare.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_dtrace_declare.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_dtrace_declare.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_dtrace_declare.h 235828 2012-05-23 11:26:28Z tuexen $");
 
 #ifndef _NETINET_SCTP_DTRACE_DECLARE_H_
 #define _NETINET_SCTP_DTRACE_DECLARE_H_

Modified: trunk/sys/netinet/sctp_dtrace_define.h
===================================================================
--- trunk/sys/netinet/sctp_dtrace_define.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_dtrace_define.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_dtrace_define.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_dtrace_define.h 260817 2014-01-17 10:58:59Z avg $");
 
 #ifndef _NETINET_SCTP_DTRACE_DEFINE_H_
 #define _NETINET_SCTP_DTRACE_DEFINE_H_
@@ -46,189 +46,132 @@
 /* Cwnd probe - tracks changes in the congestion window on a netp */
 /********************************************************/
 /* Initial */
-SDT_PROBE_DEFINE(sctp, cwnd, net, init, init);
-/* The Vtag for this end */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, init, 0, "uint32_t");
-/* The port number of the local side << 16 | port number of remote
- * in network byte order.
- */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, init, 1, "uint32_t");
-/* The pointer to the struct sctp_nets * changing */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, init, 2, "uintptr_t");
-/* The old value of the cwnd  */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, init, 3, "int");
-/* The new value of the cwnd */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, init, 4, "int");
+SDT_PROBE_DEFINE5(sctp, cwnd, net, init,
+    "uint32_t",		/* The Vtag for this end */
+    "uint32_t",		/*
+			 * The port number of the local side << 16 | port number
+			 * of remote in network byte order.
+			 */
+    "uintptr_t",	/* The pointer to the struct sctp_nets * changing */
+    "int",		/* The old value of the cwnd */
+    "int");		/* The new value of the cwnd */
 
-
 /* ACK-INCREASE */
-SDT_PROBE_DEFINE(sctp, cwnd, net, ack, ack);
-/* The Vtag for this end */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, ack, 0, "uint32_t");
-/* The port number of the local side << 16 | port number of remote
- * in network byte order.
- */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, ack, 1, "uint32_t");
-/* The pointer to the struct sctp_nets * changing */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, ack, 2, "uintptr_t");
-/* The old value of the cwnd  */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, ack, 3, "int");
-/* The new value of the cwnd */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, ack, 4, "int");
+SDT_PROBE_DEFINE5(sctp, cwnd, net, ack,
+    "uint32_t",		/* The Vtag for this end */
+    "uint32_t",		/*
+			 * The port number of the local side << 16 | port number
+			 * of remote in network byte order.
+			 */
+    "uintptr_t",	/* The pointer to the struct sctp_nets * changing */
+    "int",		/* The old value of the cwnd */
+    "int");		/* The new value of the cwnd */
 
-
 /* ACK-INCREASE */
-SDT_PROBE_DEFINE(sctp, cwnd, net, rttvar, rttvar);
-/* The Vtag << 32 | localport << 16 | remoteport */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttvar, 0, "uint64_t");
-/* obw | nbw */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttvar, 1, "uint64_t");
-/* bwrtt | newrtt */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttvar, 2, "uint64_t");
-/* flight */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttvar, 3, "uint64_t");
-/* (cwnd << 32) | point << 16 | retval(0/1) */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttvar, 4, "uint64_t");
+SDT_PROBE_DEFINE5(sctp, cwnd, net, rttvar,
+    "uint64_t",		/* The Vtag << 32 | localport << 16 | remoteport */
+    "uint64_t",		/* obw | nbw */
+    "uint64_t",		/* bwrtt | newrtt */
+    "uint64_t",		/* flight */
+    "uint64_t");	/* (cwnd << 32) | point << 16 | retval(0/1) */
 
+SDT_PROBE_DEFINE5(sctp, cwnd, net, rttstep,
+    "uint64_t",		/* The Vtag << 32 | localport << 16 | remoteport */
+    "uint64_t",		/* obw | nbw */
+    "uint64_t",		/* bwrtt | newrtt */
+    "uint64_t",		/* flight */
+    "uint64_t");	/* (cwnd << 32) | point << 16 | retval(0/1) */
 
-SDT_PROBE_DEFINE(sctp, cwnd, net, rttstep, rttstep);
-/* The Vtag << 32 | localport << 16 | remoteport */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttstep, 0, "uint64_t");
-/* obw | nbw */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttstep, 1, "uint64_t");
-/* bwrtt | nrtt */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttstep, 2, "uint64_t");
-/* cwnd_saved | stepcnt << 16 | oldstep  */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttstep, 3, "uint64_t");
-/* (cwnd << 32) | point << 16 | retval(0/1) */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttstep, 4, "uint64_t");
-
-
 /* FastRetransmit-DECREASE */
-SDT_PROBE_DEFINE(sctp, cwnd, net, fr, fr);
-/* The Vtag for this end */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, fr, 0, "uint32_t");
-/* The port number of the local side << 16 | port number of remote
- * in network byte order.
- */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, fr, 1, "uint32_t");
-/* The pointer to the struct sctp_nets * changing */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, fr, 2, "uintptr_t");
-/* The old value of the cwnd  */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, fr, 3, "int");
-/* The new value of the cwnd */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, fr, 4, "int");
+SDT_PROBE_DEFINE5(sctp, cwnd, net, fr,
+    "uint32_t",		/* The Vtag for this end */
+    "uint32_t",		/*
+			 * The port number of the local side << 16 | port number
+			 * of remote in network byte order.
+			 */
+    "uintptr_t",	/* The pointer to the struct sctp_nets * changing */
+    "int",		/* The old value of the cwnd */
+    "int");		/* The new value of the cwnd */
 
-
 /* TimeOut-DECREASE */
-SDT_PROBE_DEFINE(sctp, cwnd, net, to, to);
-/* The Vtag for this end */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, to, 0, "uint32_t");
-/* The port number of the local side << 16 | port number of remote
- * in network byte order.
- */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, to, 1, "uint32_t");
-/* The pointer to the struct sctp_nets * changing */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, to, 2, "uintptr_t");
-/* The old value of the cwnd  */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, to, 3, "int");
-/* The new value of the cwnd */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, to, 4, "int");
+SDT_PROBE_DEFINE5(sctp, cwnd, net, to,
+    "uint32_t",		/* The Vtag for this end */
+    "uint32_t",		/*
+			 * The port number of the local side << 16 | port number
+			 * of remote in network byte order.
+			 */
+    "uintptr_t",	/* The pointer to the struct sctp_nets * changing */
+    "int",		/* The old value of the cwnd */
+    "int");		/* The new value of the cwnd */
 
-
 /* BurstLimit-DECREASE */
-SDT_PROBE_DEFINE(sctp, cwnd, net, bl, bl);
-/* The Vtag for this end */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, bl, 0, "uint32_t");
-/* The port number of the local side << 16 | port number of remote
- * in network byte order.
- */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, bl, 1, "uint32_t");
-/* The pointer to the struct sctp_nets * changing */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, bl, 2, "uintptr_t");
-/* The old value of the cwnd  */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, bl, 3, "int");
-/* The new value of the cwnd */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, bl, 4, "int");
+SDT_PROBE_DEFINE5(sctp, cwnd, net, bl,
+    "uint32_t",		/* The Vtag for this end */
+    "uint32_t",		/*
+			 * The port number of the local side << 16 | port number
+			 * of remote in network byte order.
+			 */
+    "uintptr_t",	/* The pointer to the struct sctp_nets * changing */
+    "int",		/* The old value of the cwnd */
+    "int");		/* The new value of the cwnd */
 
-
 /* ECN-DECREASE */
-SDT_PROBE_DEFINE(sctp, cwnd, net, ecn, ecn);
-/* The Vtag for this end */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, ecn, 0, "uint32_t");
-/* The port number of the local side << 16 | port number of remote
- * in network byte order.
- */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, ecn, 1, "uint32_t");
-/* The pointer to the struct sctp_nets * changing */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, ecn, 2, "uintptr_t");
-/* The old value of the cwnd  */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, ecn, 3, "int");
-/* The new value of the cwnd */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, ecn, 4, "int");
+SDT_PROBE_DEFINE5(sctp, cwnd, net, ecn,
+    "uint32_t",		/* The Vtag for this end */
+    "uint32_t",		/*
+			 * The port number of the local side << 16 | port number
+			 * of remote in network byte order.
+			 */
+    "uintptr_t",	/* The pointer to the struct sctp_nets * changing */
+    "int",		/* The old value of the cwnd */
+    "int");		/* The new value of the cwnd */
 
-
 /* PacketDrop-DECREASE */
-SDT_PROBE_DEFINE(sctp, cwnd, net, pd, pd);
-/* The Vtag for this end */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, pd, 0, "uint32_t");
-/* The port number of the local side << 16 | port number of remote
- * in network byte order.
- */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, pd, 1, "uint32_t");
-/* The pointer to the struct sctp_nets * changing */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, pd, 2, "uintptr_t");
-/* The old value of the cwnd  */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, pd, 3, "int");
-/* The new value of the cwnd */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, pd, 4, "int");
+SDT_PROBE_DEFINE5(sctp, cwnd, net, pd,
+    "uint32_t",		/* The Vtag for this end */
+    "uint32_t",		/*
+			 * The port number of the local side << 16 | port number
+			 * of remote in network byte order.
+			 */
+    "uintptr_t",	/* The pointer to the struct sctp_nets * changing */
+    "int",		/* The old value of the cwnd */
+    "int");		/* The new value of the cwnd */
 
-
-
 /********************************************************/
 /* Rwnd probe - tracks changes in the receiver window for an assoc */
 /********************************************************/
-SDT_PROBE_DEFINE(sctp, rwnd, assoc, val, val);
-/* The Vtag for this end */
-SDT_PROBE_ARGTYPE(sctp, rwnd, assoc, val, 0, "uint32_t");
-/* The port number of the local side << 16 | port number of remote
- * in network byte order.
- */
-SDT_PROBE_ARGTYPE(sctp, rwnd, assoc, val, 1, "uint32_t");
-/* The up/down amount */
-SDT_PROBE_ARGTYPE(sctp, rwnd, assoc, val, 2, "int");
-/* The new value of the cwnd */
-SDT_PROBE_ARGTYPE(sctp, rwnd, assoc, val, 3, "int");
+SDT_PROBE_DEFINE4(sctp, rwnd, assoc, val,
+    "uint32_t",		/* The Vtag for this end */
+    "uint32_t",		/*
+			 * The port number of the local side << 16 | port number
+			 * of remote in network byte order.
+			 */
+    "int",		/* The up/down amount */
+    "int");		/* The new value of the cwnd */
 
 /********************************************************/
 /* flight probe - tracks changes in the flight size on a net or assoc */
 /********************************************************/
-SDT_PROBE_DEFINE(sctp, flightsize, net, val, val);
-/* The Vtag for this end */
-SDT_PROBE_ARGTYPE(sctp, flightsize, net, val, 0, "uint32_t");
-/* The port number of the local side << 16 | port number of remote
- * in network byte order.
- */
-SDT_PROBE_ARGTYPE(sctp, flightsize, net, val, 1, "uint32_t");
-/* The pointer to the struct sctp_nets * changing */
-SDT_PROBE_ARGTYPE(sctp, flightsize, net, val, 2, "uintptr_t");
-/* The up/down amount */
-SDT_PROBE_ARGTYPE(sctp, flightsize, net, val, 3, "int");
-/* The new value of the cwnd */
-SDT_PROBE_ARGTYPE(sctp, flightsize, net, val, 4, "int");
+SDT_PROBE_DEFINE5(sctp, flightsize, net, val,
+    "uint32_t",		/* The Vtag for this end */
+    "uint32_t",		/*
+			 * The port number of the local side << 16 | port number
+			 * of remote in network byte order.
+			 */
+    "uintptr_t",        /* The pointer to the struct sctp_nets * changing */
+    "int",		/* The up/down amount */
+    "int");		/* The new value of the cwnd */
+
 /********************************************************/
 /* The total flight version */
 /********************************************************/
-SDT_PROBE_DEFINE(sctp, flightsize, assoc, val, val);
-/* The Vtag for this end */
-SDT_PROBE_ARGTYPE(sctp, flightsize, assoc, val, 0, "uint32_t");
-/* The port number of the local side << 16 | port number of remote
- * in network byte order.
- */
-SDT_PROBE_ARGTYPE(sctp, flightsize, assoc, val, 1, "uint32_t");
-/* The up/down amount */
-SDT_PROBE_ARGTYPE(sctp, flightsize, assoc, val, 2, "int");
-/* The new value of the cwnd */
-SDT_PROBE_ARGTYPE(sctp, flightsize, assoc, val, 3, "int");
+SDT_PROBE_DEFINE4(sctp, flightsize, assoc, val,
+    "uint32_t",		/* The Vtag for this end */
+    "uint32_t",		/*
+			 * The port number of the local side << 16 | port number
+			 * of remote in network byte order.
+			 */
+    "int",		/* The up/down amount */
+    "int");		/* The new value of the cwnd */
 
 #endif

Modified: trunk/sys/netinet/sctp_header.h
===================================================================
--- trunk/sys/netinet/sctp_header.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_header.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_header.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_header.h 294158 2016-01-16 16:46:00Z tuexen $");
 
 #ifndef _NETINET_SCTP_HEADER_H_
 #define _NETINET_SCTP_HEADER_H_
@@ -83,12 +83,6 @@
 	uint16_t addr_type[2];	/* array of supported address types */
 }                         SCTP_PACKED;
 
-/* ECN parameter */
-struct sctp_ecn_supported_param {
-	struct sctp_paramhdr ph;/* type=SCTP_ECN_CAPABLE */
-}                        SCTP_PACKED;
-
-
 /* heartbeat info parameter */
 struct sctp_heartbeat_info_param {
 	struct sctp_paramhdr ph;
@@ -209,34 +203,6 @@
 	 */
 }                 SCTP_PACKED;
 
-
-/* Used for NAT state error cause */
-struct sctp_missing_nat_state {
-	uint16_t cause;
-	uint16_t length;
-	uint8_t data[];
-}                      SCTP_PACKED;
-
-
-struct sctp_inv_mandatory_param {
-	uint16_t cause;
-	uint16_t length;
-	uint32_t num_param;
-	uint16_t param;
-	/*
-	 * We include this to 0 it since only a missing cookie will cause
-	 * this error.
-	 */
-	uint16_t resv;
-}                        SCTP_PACKED;
-
-struct sctp_unresolv_addr {
-	uint16_t cause;
-	uint16_t length;
-	uint16_t addr_type;
-	uint16_t reserved;	/* Only one invalid addr type */
-}                  SCTP_PACKED;
-
 /* state cookie parameter */
 struct sctp_state_cookie_param {
 	struct sctp_paramhdr ph;
@@ -377,28 +343,11 @@
 	struct sctp_chunkhdr ch;
 }                            SCTP_PACKED;
 
-/* Oper error holding a stale cookie */
-struct sctp_stale_cookie_msg {
-	struct sctp_paramhdr ph;/* really an error cause */
-	uint32_t time_usec;
-}                     SCTP_PACKED;
-
 struct sctp_adaptation_layer_indication {
 	struct sctp_paramhdr ph;
 	uint32_t indication;
 }                                SCTP_PACKED;
 
-struct sctp_cookie_while_shutting_down {
-	struct sctphdr sh;
-	struct sctp_chunkhdr ch;
-	struct sctp_paramhdr ph;/* really an error cause */
-}                               SCTP_PACKED;
-
-struct sctp_shutdown_complete_msg {
-	struct sctphdr sh;
-	struct sctp_shutdown_complete_chunk shut_cmp;
-}                          SCTP_PACKED;
-
 /*
  * draft-ietf-tsvwg-addip-sctp
  */
@@ -457,6 +406,11 @@
 
 /**********STREAM RESET STUFF ******************/
 
+struct sctp_stream_reset_request {
+	struct sctp_paramhdr ph;
+	uint32_t request_seq;
+}                         SCTP_PACKED;
+
 struct sctp_stream_reset_out_request {
 	struct sctp_paramhdr ph;
 	uint32_t request_seq;	/* monotonically increasing seq no */
@@ -471,7 +425,6 @@
 	uint16_t list_of_streams[];	/* if not all list of streams */
 }                            SCTP_PACKED;
 
-
 struct sctp_stream_reset_tsn_request {
 	struct sctp_paramhdr ph;
 	uint32_t request_seq;
@@ -511,16 +464,6 @@
  * streams then the request will need to be an overlay structure.
  */
 
-struct sctp_stream_reset_out_req {
-	struct sctp_chunkhdr ch;
-	struct sctp_stream_reset_out_request sr_req;
-}                         SCTP_PACKED;
-
-struct sctp_stream_reset_in_req {
-	struct sctp_chunkhdr ch;
-	struct sctp_stream_reset_in_request sr_req;
-}                        SCTP_PACKED;
-
 struct sctp_stream_reset_tsn_req {
 	struct sctp_chunkhdr ch;
 	struct sctp_stream_reset_tsn_request sr_req;
@@ -567,12 +510,6 @@
 	uint8_t hmac[];
 }               SCTP_PACKED;
 
-struct sctp_auth_invalid_hmac {
-	struct sctp_paramhdr ph;
-	uint16_t hmac_id;
-	uint16_t padding;
-}                      SCTP_PACKED;
-
 /*
  * we pre-reserve enough room for a ECNE or CWR AND a SACK with no missing
  * pieces. If ENCE is missing we could have a couple of blocks. This way we

Modified: trunk/sys/netinet/sctp_indata.c
===================================================================
--- trunk/sys/netinet/sctp_indata.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_indata.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_indata.c 238253 2012-07-08 16:14:42Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_indata.c 294219 2016-01-17 12:39:35Z tuexen $");
 
 #include <netinet/sctp_os.h>
 #include <netinet/sctp_var.h>
@@ -224,9 +224,9 @@
 	}
 	seinfo = (struct sctp_extrcvinfo *)sinfo;
 	if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVNXTINFO) &&
-	    (seinfo->sreinfo_next_flags & SCTP_NEXT_MSG_AVAIL)) {
+	    (seinfo->serinfo_next_flags & SCTP_NEXT_MSG_AVAIL)) {
 		provide_nxt = 1;
-		len += CMSG_SPACE(sizeof(struct sctp_rcvinfo));
+		len += CMSG_SPACE(sizeof(struct sctp_nxtinfo));
 	} else {
 		provide_nxt = 0;
 	}
@@ -242,7 +242,7 @@
 		use_extended = 0;
 	}
 
-	ret = sctp_get_mbuf_for_msg(len, 0, M_DONTWAIT, 1, MT_DATA);
+	ret = sctp_get_mbuf_for_msg(len, 0, M_NOWAIT, 1, MT_DATA);
 	if (ret == NULL) {
 		/* No space */
 		return (ret);
@@ -251,6 +251,11 @@
 
 	/* We need a CMSG header followed by the struct */
 	cmh = mtod(ret, struct cmsghdr *);
+	/*
+	 * Make sure that there is no un-initialized padding between the
+	 * cmsg header and cmsg data and after the cmsg data.
+	 */
+	memset(cmh, 0, len);
 	if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVRCVINFO)) {
 		cmh->cmsg_level = IPPROTO_SCTP;
 		cmh->cmsg_len = CMSG_LEN(sizeof(struct sctp_rcvinfo));
@@ -272,20 +277,20 @@
 		cmh->cmsg_len = CMSG_LEN(sizeof(struct sctp_nxtinfo));
 		cmh->cmsg_type = SCTP_NXTINFO;
 		nxtinfo = (struct sctp_nxtinfo *)CMSG_DATA(cmh);
-		nxtinfo->nxt_sid = seinfo->sreinfo_next_stream;
+		nxtinfo->nxt_sid = seinfo->serinfo_next_stream;
 		nxtinfo->nxt_flags = 0;
-		if (seinfo->sreinfo_next_flags & SCTP_NEXT_MSG_IS_UNORDERED) {
+		if (seinfo->serinfo_next_flags & SCTP_NEXT_MSG_IS_UNORDERED) {
 			nxtinfo->nxt_flags |= SCTP_UNORDERED;
 		}
-		if (seinfo->sreinfo_next_flags & SCTP_NEXT_MSG_IS_NOTIFICATION) {
+		if (seinfo->serinfo_next_flags & SCTP_NEXT_MSG_IS_NOTIFICATION) {
 			nxtinfo->nxt_flags |= SCTP_NOTIFICATION;
 		}
-		if (seinfo->sreinfo_next_flags & SCTP_NEXT_MSG_ISCOMPLETE) {
+		if (seinfo->serinfo_next_flags & SCTP_NEXT_MSG_ISCOMPLETE) {
 			nxtinfo->nxt_flags |= SCTP_COMPLETE;
 		}
-		nxtinfo->nxt_ppid = seinfo->sreinfo_next_ppid;
-		nxtinfo->nxt_length = seinfo->sreinfo_next_length;
-		nxtinfo->nxt_assoc_id = seinfo->sreinfo_next_aid;
+		nxtinfo->nxt_ppid = seinfo->serinfo_next_ppid;
+		nxtinfo->nxt_length = seinfo->serinfo_next_length;
+		nxtinfo->nxt_assoc_id = seinfo->serinfo_next_aid;
 		cmh = (struct cmsghdr *)((caddr_t)cmh + CMSG_SPACE(sizeof(struct sctp_nxtinfo)));
 		SCTP_BUF_LEN(ret) += CMSG_SPACE(sizeof(struct sctp_nxtinfo));
 	}
@@ -562,7 +567,8 @@
 	struct sctp_queued_to_read *at;
 	int queue_needed;
 	uint16_t nxt_todel;
-	struct mbuf *oper;
+	struct mbuf *op_err;
+	char msg[SCTP_DIAG_INFO_LEN];
 
 	queue_needed = 1;
 	asoc->size_on_all_streams += control->length;
@@ -573,13 +579,13 @@
 		sctp_log_strm_del(control, NULL, SCTP_STR_LOG_FROM_INTO_STRD);
 	}
 	SCTPDBG(SCTP_DEBUG_INDATA1,
-	    "queue to stream called for ssn:%u lastdel:%u nxt:%u\n",
-	    (uint32_t) control->sinfo_stream,
-	    (uint32_t) strm->last_sequence_delivered,
-	    (uint32_t) nxt_todel);
+	    "queue to stream called for sid:%u ssn:%u tsn:%u lastdel:%u nxt:%u\n",
+	    (uint32_t) control->sinfo_stream, (uint32_t) control->sinfo_ssn,
+	    (uint32_t) control->sinfo_tsn,
+	    (uint32_t) strm->last_sequence_delivered, (uint32_t) nxt_todel);
 	if (SCTP_SSN_GE(strm->last_sequence_delivered, control->sinfo_ssn)) {
 		/* The incoming sseq is behind where we last delivered? */
-		SCTPDBG(SCTP_DEBUG_INDATA1, "Duplicate S-SEQ:%d delivered:%d from peer, Abort  association\n",
+		SCTPDBG(SCTP_DEBUG_INDATA1, "Duplicate S-SEQ:%d delivered:%d from peer, Abort association\n",
 		    control->sinfo_ssn, strm->last_sequence_delivered);
 protocol_error:
 		/*
@@ -587,30 +593,30 @@
 		 * association destruction
 		 */
 		TAILQ_INSERT_HEAD(&strm->inqueue, control, next);
-		oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
-		    0, M_DONTWAIT, 1, MT_DATA);
-		if (oper) {
-			struct sctp_paramhdr *ph;
-			uint32_t *ippp;
-
-			SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
-			    (sizeof(uint32_t) * 3);
-			ph = mtod(oper, struct sctp_paramhdr *);
-			ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
-			ph->param_length = htons(SCTP_BUF_LEN(oper));
-			ippp = (uint32_t *) (ph + 1);
-			*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_1);
-			ippp++;
-			*ippp = control->sinfo_tsn;
-			ippp++;
-			*ippp = ((control->sinfo_stream << 16) | control->sinfo_ssn);
-		}
+		snprintf(msg, sizeof(msg), "Delivered SSN=%4.4x, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+		    strm->last_sequence_delivered, control->sinfo_tsn,
+		    control->sinfo_stream, control->sinfo_ssn);
+		op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
 		stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_1;
-		sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+		sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
 		*abort_flag = 1;
 		return;
 
 	}
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	struct socket *so;
+
+	so = SCTP_INP_SO(stcb->sctp_ep);
+	atomic_add_int(&stcb->asoc.refcnt, 1);
+	SCTP_TCB_UNLOCK(stcb);
+	SCTP_SOCKET_LOCK(so, 1);
+	SCTP_TCB_LOCK(stcb);
+	atomic_subtract_int(&stcb->asoc.refcnt, 1);
+	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+		SCTP_SOCKET_UNLOCK(so, 1);
+		return;
+	}
+#endif
 	if (nxt_todel == control->sinfo_ssn) {
 		/* can be delivered right away? */
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_STR_LOGGING_ENABLE) {
@@ -626,7 +632,7 @@
 		sctp_add_to_readq(stcb->sctp_ep, stcb,
 		    control,
 		    &stcb->sctp_socket->so_rcv, 1,
-		    SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED);
+		    SCTP_READ_LOCK_NOT_HELD, SCTP_SO_LOCKED);
 		TAILQ_FOREACH_SAFE(control, &strm->inqueue, next, at) {
 			/* all delivered */
 			nxt_todel = strm->last_sequence_delivered + 1;
@@ -650,7 +656,7 @@
 				    control,
 				    &stcb->sctp_socket->so_rcv, 1,
 				    SCTP_READ_LOCK_NOT_HELD,
-				    SCTP_SO_NOT_LOCKED);
+				    SCTP_SO_LOCKED);
 				continue;
 			}
 			break;
@@ -662,6 +668,9 @@
 		 * to put it on the queue.
 		 */
 		if (SCTP_TSN_GE(asoc->cumulative_tsn, control->sinfo_tsn)) {
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			SCTP_SOCKET_UNLOCK(so, 1);
+#endif
 			goto protocol_error;
 		}
 		if (TAILQ_EMPTY(&strm->inqueue)) {
@@ -708,6 +717,9 @@
 						control->whoFrom = NULL;
 					}
 					sctp_free_a_readq(stcb, control);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+					SCTP_SOCKET_UNLOCK(so, 1);
+#endif
 					return;
 				} else {
 					if (TAILQ_NEXT(at, next) == NULL) {
@@ -727,6 +739,9 @@
 			}
 		}
 	}
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	SCTP_SOCKET_UNLOCK(so, 1);
+#endif
 }
 
 /*
@@ -790,13 +805,12 @@
 			 * but should we?
 			 */
 			if (stcb->sctp_socket) {
-				pd_point = min(SCTP_SB_LIMIT_RCV(stcb->sctp_socket),
+				pd_point = min(SCTP_SB_LIMIT_RCV(stcb->sctp_socket) >> SCTP_PARTIAL_DELIVERY_SHIFT,
 				    stcb->sctp_ep->partial_delivery_point);
 			} else {
 				pd_point = stcb->sctp_ep->partial_delivery_point;
 			}
 			if (sctp_is_all_msg_on_reasm(asoc, &tsize) || (tsize >= pd_point)) {
-
 				/*
 				 * Yes, we setup to start reception, by
 				 * backing down the TSN just in case we
@@ -841,7 +855,8 @@
 sctp_queue_data_for_reasm(struct sctp_tcb *stcb, struct sctp_association *asoc,
     struct sctp_tmit_chunk *chk, int *abort_flag)
 {
-	struct mbuf *oper;
+	struct mbuf *op_err;
+	char msg[SCTP_DIAG_INFO_LEN];
 	uint32_t cum_ackp1, prev_tsn, post_tsn;
 	struct sctp_tmit_chunk *at, *prev, *next;
 
@@ -866,30 +881,14 @@
 				 * a FIRST fragment mark.
 				 */
 				SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, its not first, no fragmented delivery in progress\n");
-				oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
-				    0, M_DONTWAIT, 1, MT_DATA);
-
-				if (oper) {
-					struct sctp_paramhdr *ph;
-					uint32_t *ippp;
-
-					SCTP_BUF_LEN(oper) =
-					    sizeof(struct sctp_paramhdr) +
-					    (sizeof(uint32_t) * 3);
-					ph = mtod(oper, struct sctp_paramhdr *);
-					ph->param_type =
-					    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
-					ph->param_length = htons(SCTP_BUF_LEN(oper));
-					ippp = (uint32_t *) (ph + 1);
-					*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_2);
-					ippp++;
-					*ippp = chk->rec.data.TSN_seq;
-					ippp++;
-					*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
-
-				}
+				snprintf(msg, sizeof(msg),
+				    "Expected B-bit for TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+				    chk->rec.data.TSN_seq,
+				    chk->rec.data.stream_number,
+				    chk->rec.data.stream_seq);
+				op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
 				stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_2;
-				sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+				sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
 				*abort_flag = 1;
 			} else if (asoc->fragmented_delivery_inprogress &&
 			    (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) == SCTP_DATA_FIRST_FRAG) {
@@ -899,28 +898,14 @@
 				 * MIDDLE fragment NOT a FIRST
 				 */
 				SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, it IS a first and fragmented delivery in progress\n");
-				oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
-				    0, M_DONTWAIT, 1, MT_DATA);
-				if (oper) {
-					struct sctp_paramhdr *ph;
-					uint32_t *ippp;
-
-					SCTP_BUF_LEN(oper) =
-					    sizeof(struct sctp_paramhdr) +
-					    (3 * sizeof(uint32_t));
-					ph = mtod(oper, struct sctp_paramhdr *);
-					ph->param_type =
-					    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
-					ph->param_length = htons(SCTP_BUF_LEN(oper));
-					ippp = (uint32_t *) (ph + 1);
-					*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_3);
-					ippp++;
-					*ippp = chk->rec.data.TSN_seq;
-					ippp++;
-					*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
-				}
+				snprintf(msg, sizeof(msg),
+				    "Didn't expect B-bit for TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+				    chk->rec.data.TSN_seq,
+				    chk->rec.data.stream_number,
+				    chk->rec.data.stream_seq);
+				op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
 				stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_3;
-				sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+				sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
 				*abort_flag = 1;
 			} else if (asoc->fragmented_delivery_inprogress) {
 				/*
@@ -933,30 +918,15 @@
 					SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, it IS not same stream number %d vs %d\n",
 					    chk->rec.data.stream_number,
 					    asoc->str_of_pdapi);
-					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
-					    0, M_DONTWAIT, 1, MT_DATA);
-					if (oper) {
-						struct sctp_paramhdr *ph;
-						uint32_t *ippp;
-
-						SCTP_BUF_LEN(oper) =
-						    sizeof(struct sctp_paramhdr) +
-						    (sizeof(uint32_t) * 3);
-						ph = mtod(oper,
-						    struct sctp_paramhdr *);
-						ph->param_type =
-						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
-						ph->param_length =
-						    htons(SCTP_BUF_LEN(oper));
-						ippp = (uint32_t *) (ph + 1);
-						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_4);
-						ippp++;
-						*ippp = chk->rec.data.TSN_seq;
-						ippp++;
-						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
-					}
+					snprintf(msg, sizeof(msg),
+					    "Expected SID=%4.4x, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+					    asoc->str_of_pdapi,
+					    chk->rec.data.TSN_seq,
+					    chk->rec.data.stream_number,
+					    chk->rec.data.stream_seq);
+					op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
 					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_4;
-					sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+					sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
 					*abort_flag = 1;
 				} else if ((asoc->fragment_flags & SCTP_DATA_UNORDERED) !=
 					    SCTP_DATA_UNORDERED &&
@@ -965,31 +935,15 @@
 					SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, it IS not same stream seq %d vs %d\n",
 					    chk->rec.data.stream_seq,
 					    asoc->ssn_of_pdapi);
-					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
-					    0, M_DONTWAIT, 1, MT_DATA);
-					if (oper) {
-						struct sctp_paramhdr *ph;
-						uint32_t *ippp;
-
-						SCTP_BUF_LEN(oper) =
-						    sizeof(struct sctp_paramhdr) +
-						    (3 * sizeof(uint32_t));
-						ph = mtod(oper,
-						    struct sctp_paramhdr *);
-						ph->param_type =
-						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
-						ph->param_length =
-						    htons(SCTP_BUF_LEN(oper));
-						ippp = (uint32_t *) (ph + 1);
-						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_5);
-						ippp++;
-						*ippp = chk->rec.data.TSN_seq;
-						ippp++;
-						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
-
-					}
+					snprintf(msg, sizeof(msg),
+					    "Expected SSN=%4.4x, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+					    asoc->ssn_of_pdapi,
+					    chk->rec.data.TSN_seq,
+					    chk->rec.data.stream_number,
+					    chk->rec.data.stream_seq);
+					op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
 					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_5;
-					sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+					sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
 					*abort_flag = 1;
 				}
 			}
@@ -1059,31 +1013,14 @@
 				    SCTP_DATA_FIRST_FRAG) {
 					SCTPDBG(SCTP_DEBUG_INDATA1, "Prev check - It can be a midlle or last but not a first\n");
 					SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, it's a FIRST!\n");
-					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
-					    0, M_DONTWAIT, 1, MT_DATA);
-					if (oper) {
-						struct sctp_paramhdr *ph;
-						uint32_t *ippp;
-
-						SCTP_BUF_LEN(oper) =
-						    sizeof(struct sctp_paramhdr) +
-						    (3 * sizeof(uint32_t));
-						ph = mtod(oper,
-						    struct sctp_paramhdr *);
-						ph->param_type =
-						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
-						ph->param_length =
-						    htons(SCTP_BUF_LEN(oper));
-						ippp = (uint32_t *) (ph + 1);
-						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_6);
-						ippp++;
-						*ippp = chk->rec.data.TSN_seq;
-						ippp++;
-						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
-
-					}
+					snprintf(msg, sizeof(msg),
+					    "Can't handle B-bit, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+					    chk->rec.data.TSN_seq,
+					    chk->rec.data.stream_number,
+					    chk->rec.data.stream_seq);
+					op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
 					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_6;
-					sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+					sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
 					*abort_flag = 1;
 					return;
 				}
@@ -1093,36 +1030,39 @@
 					 * Huh, need the correct STR here,
 					 * they must be the same.
 					 */
-					SCTP_PRINTF("Prev check - Gak, Evil plot, ssn:%d not the same as at:%d\n",
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Prev check - Gak, Evil plot, sid:%d not the same as at:%d\n",
 					    chk->rec.data.stream_number,
 					    prev->rec.data.stream_number);
-					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
-					    0, M_DONTWAIT, 1, MT_DATA);
-					if (oper) {
-						struct sctp_paramhdr *ph;
-						uint32_t *ippp;
-
-						SCTP_BUF_LEN(oper) =
-						    sizeof(struct sctp_paramhdr) +
-						    (3 * sizeof(uint32_t));
-						ph = mtod(oper,
-						    struct sctp_paramhdr *);
-						ph->param_type =
-						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
-						ph->param_length =
-						    htons(SCTP_BUF_LEN(oper));
-						ippp = (uint32_t *) (ph + 1);
-						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_7);
-						ippp++;
-						*ippp = chk->rec.data.TSN_seq;
-						ippp++;
-						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
-					}
+					snprintf(msg, sizeof(msg),
+					    "Expect SID=%4.4x, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+					    prev->rec.data.stream_number,
+					    chk->rec.data.TSN_seq,
+					    chk->rec.data.stream_number,
+					    chk->rec.data.stream_seq);
+					op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
 					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_7;
-					sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+					sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
 					*abort_flag = 1;
 					return;
 				}
+				if ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) !=
+				    (prev->rec.data.rcv_flags & SCTP_DATA_UNORDERED)) {
+					/*
+					 * Huh, need the same ordering here,
+					 * they must be the same.
+					 */
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Prev check - Gak, Evil plot, U-bit not constant\n");
+					snprintf(msg, sizeof(msg),
+					    "Expect U-bit=%d for TSN=%8.8x, got U-bit=%d",
+					    (prev->rec.data.rcv_flags & SCTP_DATA_UNORDERED) ? 1 : 0,
+					    chk->rec.data.TSN_seq,
+					    (chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) ? 1 : 0);
+					op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_8;
+					sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
+					*abort_flag = 1;
+					return;
+				}
 				if ((prev->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == 0 &&
 				    chk->rec.data.stream_seq !=
 				    prev->rec.data.stream_seq) {
@@ -1133,30 +1073,15 @@
 					SCTPDBG(SCTP_DEBUG_INDATA1, "Prev check - Gak, Evil plot, sseq:%d not the same as at:%d\n",
 					    chk->rec.data.stream_seq,
 					    prev->rec.data.stream_seq);
-					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
-					    0, M_DONTWAIT, 1, MT_DATA);
-					if (oper) {
-						struct sctp_paramhdr *ph;
-						uint32_t *ippp;
-
-						SCTP_BUF_LEN(oper) =
-						    sizeof(struct sctp_paramhdr) +
-						    (3 * sizeof(uint32_t));
-						ph = mtod(oper,
-						    struct sctp_paramhdr *);
-						ph->param_type =
-						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
-						ph->param_length =
-						    htons(SCTP_BUF_LEN(oper));
-						ippp = (uint32_t *) (ph + 1);
-						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_8);
-						ippp++;
-						*ippp = chk->rec.data.TSN_seq;
-						ippp++;
-						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
-					}
-					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_8;
-					sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+					snprintf(msg, sizeof(msg),
+					    "Expect SSN=%4.4x, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+					    prev->rec.data.stream_seq,
+					    chk->rec.data.TSN_seq,
+					    chk->rec.data.stream_number,
+					    chk->rec.data.stream_seq);
+					op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_9;
+					sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
 					*abort_flag = 1;
 					return;
 				}
@@ -1166,31 +1091,14 @@
 				if ((chk->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) !=
 				    SCTP_DATA_FIRST_FRAG) {
 					SCTPDBG(SCTP_DEBUG_INDATA1, "Prev check - Gak, evil plot, its not FIRST and it must be!\n");
-					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
-					    0, M_DONTWAIT, 1, MT_DATA);
-					if (oper) {
-						struct sctp_paramhdr *ph;
-						uint32_t *ippp;
-
-						SCTP_BUF_LEN(oper) =
-						    sizeof(struct sctp_paramhdr) +
-						    (3 * sizeof(uint32_t));
-						ph = mtod(oper,
-						    struct sctp_paramhdr *);
-						ph->param_type =
-						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
-						ph->param_length =
-						    htons(SCTP_BUF_LEN(oper));
-						ippp = (uint32_t *) (ph + 1);
-						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_9);
-						ippp++;
-						*ippp = chk->rec.data.TSN_seq;
-						ippp++;
-						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
-
-					}
-					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_9;
-					sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+					snprintf(msg, sizeof(msg),
+					    "Expect B-bit, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+					    chk->rec.data.TSN_seq,
+					    chk->rec.data.stream_number,
+					    chk->rec.data.stream_seq);
+					op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_10;
+					sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
 					*abort_flag = 1;
 					return;
 				}
@@ -1210,30 +1118,14 @@
 				    != SCTP_DATA_LAST_FRAG) {
 					SCTPDBG(SCTP_DEBUG_INDATA1, "Next chk - Next is FIRST, we must be LAST\n");
 					SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, its not a last!\n");
-					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
-					    0, M_DONTWAIT, 1, MT_DATA);
-					if (oper) {
-						struct sctp_paramhdr *ph;
-						uint32_t *ippp;
-
-						SCTP_BUF_LEN(oper) =
-						    sizeof(struct sctp_paramhdr) +
-						    (3 * sizeof(uint32_t));
-						ph = mtod(oper,
-						    struct sctp_paramhdr *);
-						ph->param_type =
-						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
-						ph->param_length =
-						    htons(SCTP_BUF_LEN(oper));
-						ippp = (uint32_t *) (ph + 1);
-						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_10);
-						ippp++;
-						*ippp = chk->rec.data.TSN_seq;
-						ippp++;
-						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
-					}
-					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_10;
-					sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+					snprintf(msg, sizeof(msg),
+					    "Expect only E-bit, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+					    chk->rec.data.TSN_seq,
+					    chk->rec.data.stream_number,
+					    chk->rec.data.stream_seq);
+					op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_11;
+					sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
 					*abort_flag = 1;
 					return;
 				}
@@ -1249,31 +1141,14 @@
 				    SCTP_DATA_LAST_FRAG) {
 					SCTPDBG(SCTP_DEBUG_INDATA1, "Next chk - Next is a MIDDLE/LAST\n");
 					SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, new prev chunk is a LAST\n");
-					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
-					    0, M_DONTWAIT, 1, MT_DATA);
-					if (oper) {
-						struct sctp_paramhdr *ph;
-						uint32_t *ippp;
-
-						SCTP_BUF_LEN(oper) =
-						    sizeof(struct sctp_paramhdr) +
-						    (3 * sizeof(uint32_t));
-						ph = mtod(oper,
-						    struct sctp_paramhdr *);
-						ph->param_type =
-						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
-						ph->param_length =
-						    htons(SCTP_BUF_LEN(oper));
-						ippp = (uint32_t *) (ph + 1);
-						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_11);
-						ippp++;
-						*ippp = chk->rec.data.TSN_seq;
-						ippp++;
-						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
-
-					}
-					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_11;
-					sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+					snprintf(msg, sizeof(msg),
+					    "Didn't expect E-bit, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+					    chk->rec.data.TSN_seq,
+					    chk->rec.data.stream_number,
+					    chk->rec.data.stream_seq);
+					op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_12;
+					sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
 					*abort_flag = 1;
 					return;
 				}
@@ -1286,34 +1161,36 @@
 					SCTPDBG(SCTP_DEBUG_INDATA1, "Next chk - Gak, Evil plot, ssn:%d not the same as at:%d\n",
 					    chk->rec.data.stream_number,
 					    next->rec.data.stream_number);
-					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
-					    0, M_DONTWAIT, 1, MT_DATA);
-					if (oper) {
-						struct sctp_paramhdr *ph;
-						uint32_t *ippp;
-
-						SCTP_BUF_LEN(oper) =
-						    sizeof(struct sctp_paramhdr) +
-						    (3 * sizeof(uint32_t));
-						ph = mtod(oper,
-						    struct sctp_paramhdr *);
-						ph->param_type =
-						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
-						ph->param_length =
-						    htons(SCTP_BUF_LEN(oper));
-						ippp = (uint32_t *) (ph + 1);
-						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_12);
-						ippp++;
-						*ippp = chk->rec.data.TSN_seq;
-						ippp++;
-						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
-
-					}
-					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_12;
-					sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+					snprintf(msg, sizeof(msg),
+					    "Required SID %4.4x, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+					    next->rec.data.stream_number,
+					    chk->rec.data.TSN_seq,
+					    chk->rec.data.stream_number,
+					    chk->rec.data.stream_seq);
+					op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_13;
+					sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
 					*abort_flag = 1;
 					return;
 				}
+				if ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) !=
+				    (next->rec.data.rcv_flags & SCTP_DATA_UNORDERED)) {
+					/*
+					 * Huh, need the same ordering here,
+					 * they must be the same.
+					 */
+					SCTPDBG(SCTP_DEBUG_INDATA1, "Next check - Gak, Evil plot, U-bit not constant\n");
+					snprintf(msg, sizeof(msg),
+					    "Expect U-bit=%d for TSN=%8.8x, got U-bit=%d",
+					    (next->rec.data.rcv_flags & SCTP_DATA_UNORDERED) ? 1 : 0,
+					    chk->rec.data.TSN_seq,
+					    (chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) ? 1 : 0);
+					op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_14;
+					sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
+					*abort_flag = 1;
+					return;
+				}
 				if ((next->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == 0 &&
 				    chk->rec.data.stream_seq !=
 				    next->rec.data.stream_seq) {
@@ -1324,30 +1201,15 @@
 					SCTPDBG(SCTP_DEBUG_INDATA1, "Next chk - Gak, Evil plot, sseq:%d not the same as at:%d\n",
 					    chk->rec.data.stream_seq,
 					    next->rec.data.stream_seq);
-					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
-					    0, M_DONTWAIT, 1, MT_DATA);
-					if (oper) {
-						struct sctp_paramhdr *ph;
-						uint32_t *ippp;
-
-						SCTP_BUF_LEN(oper) =
-						    sizeof(struct sctp_paramhdr) +
-						    (3 * sizeof(uint32_t));
-						ph = mtod(oper,
-						    struct sctp_paramhdr *);
-						ph->param_type =
-						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
-						ph->param_length =
-						    htons(SCTP_BUF_LEN(oper));
-						ippp = (uint32_t *) (ph + 1);
-						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_13);
-						ippp++;
-						*ippp = chk->rec.data.TSN_seq;
-						ippp++;
-						*ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
-					}
-					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_13;
-					sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+					snprintf(msg, sizeof(msg),
+					    "Required SSN %4.4x, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+					    next->rec.data.stream_seq,
+					    chk->rec.data.TSN_seq,
+					    chk->rec.data.stream_number,
+					    chk->rec.data.stream_seq);
+					op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_15;
+					sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
 					*abort_flag = 1;
 					return;
 				}
@@ -1418,7 +1280,6 @@
 	return (0);
 }
 
-
 static int
 sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc,
     struct mbuf **m, int offset, struct sctp_data_chunk *ch, int chk_length,
@@ -1433,7 +1294,8 @@
 	int the_len;
 	int need_reasm_check = 0;
 	uint16_t strmno, strmseq;
-	struct mbuf *oper;
+	struct mbuf *op_err;
+	char msg[SCTP_DIAG_INFO_LEN];
 	struct sctp_queued_to_read *control;
 	int ordered;
 	uint32_t protocol_id;
@@ -1500,15 +1362,12 @@
 	 */
 	if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
 	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
-	    (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET))
-	    ) {
+	    (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET))) {
 		/*
 		 * wait a minute, this guy is gone, there is no longer a
 		 * receiver. Send peer an ABORT!
 		 */
-		struct mbuf *op_err;
-
-		op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
+		op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, "");
 		sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
 		*abort_flag = 1;
 		return (0);
@@ -1530,7 +1389,7 @@
 		 */
 		if (stcb->sctp_socket->so_rcv.sb_cc) {
 			/* some to read, wake-up */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 			struct socket *so;
 
 			so = SCTP_INP_SO(stcb->sctp_ep);
@@ -1546,7 +1405,7 @@
 			}
 #endif
 			sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 			SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 		}
@@ -1568,30 +1427,25 @@
 	}
 	strmno = ntohs(ch->dp.stream_id);
 	if (strmno >= asoc->streamincnt) {
-		struct sctp_paramhdr *phdr;
-		struct mbuf *mb;
+		struct sctp_error_invalid_stream *cause;
 
-		mb = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) * 2),
-		    0, M_DONTWAIT, 1, MT_DATA);
-		if (mb != NULL) {
+		op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_error_invalid_stream),
+		    0, M_NOWAIT, 1, MT_DATA);
+		if (op_err != NULL) {
 			/* add some space up front so prepend will work well */
-			SCTP_BUF_RESV_UF(mb, sizeof(struct sctp_chunkhdr));
-			phdr = mtod(mb, struct sctp_paramhdr *);
+			SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
+			cause = mtod(op_err, struct sctp_error_invalid_stream *);
 			/*
 			 * Error causes are just param's and this one has
 			 * two back to back phdr, one with the error type
 			 * and size, the other with the streamid and a rsvd
 			 */
-			SCTP_BUF_LEN(mb) = (sizeof(struct sctp_paramhdr) * 2);
-			phdr->param_type = htons(SCTP_CAUSE_INVALID_STREAM);
-			phdr->param_length =
-			    htons(sizeof(struct sctp_paramhdr) * 2);
-			phdr++;
-			/* We insert the stream in the type field */
-			phdr->param_type = ch->dp.stream_id;
-			/* And set the length to 0 for the rsvd field */
-			phdr->param_length = 0;
-			sctp_queue_op_err(stcb, mb);
+			SCTP_BUF_LEN(op_err) = sizeof(struct sctp_error_invalid_stream);
+			cause->cause.code = htons(SCTP_CAUSE_INVALID_STREAM);
+			cause->cause.length = htons(sizeof(struct sctp_error_invalid_stream));
+			cause->stream_id = ch->dp.stream_id;
+			cause->reserved = htons(0);
+			sctp_queue_op_err(stcb, op_err);
 		}
 		SCTP_STAT_INCR(sctps_badsid);
 		SCTP_TCB_LOCK_ASSERT(stcb);
@@ -1636,27 +1490,13 @@
 		/* The incoming sseq is behind where we last delivered? */
 		SCTPDBG(SCTP_DEBUG_INDATA1, "EVIL/Broken-Dup S-SEQ:%d delivered:%d from peer, Abort!\n",
 		    strmseq, asoc->strmin[strmno].last_sequence_delivered);
-		oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
-		    0, M_DONTWAIT, 1, MT_DATA);
-		if (oper) {
-			struct sctp_paramhdr *ph;
-			uint32_t *ippp;
 
-			SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
-			    (3 * sizeof(uint32_t));
-			ph = mtod(oper, struct sctp_paramhdr *);
-			ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
-			ph->param_length = htons(SCTP_BUF_LEN(oper));
-			ippp = (uint32_t *) (ph + 1);
-			*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_14);
-			ippp++;
-			*ippp = tsn;
-			ippp++;
-			*ippp = ((strmno << 16) | strmseq);
-
-		}
-		stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_14;
-		sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+		snprintf(msg, sizeof(msg), "Delivered SSN=%4.4x, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+		    asoc->strmin[strmno].last_sequence_delivered,
+		    tsn, strmno, strmseq);
+		op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+		stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_16;
+		sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
 		*abort_flag = 1;
 		return (0);
 	}
@@ -1669,16 +1509,10 @@
 	if (last_chunk == 0) {
 		dmbuf = SCTP_M_COPYM(*m,
 		    (offset + sizeof(struct sctp_data_chunk)),
-		    the_len, M_DONTWAIT);
+		    the_len, M_NOWAIT);
 #ifdef SCTP_MBUF_LOGGING
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
-			struct mbuf *mat;
-
-			for (mat = dmbuf; mat; mat = SCTP_BUF_NEXT(mat)) {
-				if (SCTP_BUF_IS_EXTENDED(mat)) {
-					sctp_log_mb(mat, SCTP_MBUF_ICOPY);
-				}
-			}
+			sctp_log_mbc(dmbuf, SCTP_MBUF_ICOPY);
 		}
 #endif
 	} else {
@@ -1730,7 +1564,6 @@
 		sctp_alloc_a_readq(stcb, control);
 		sctp_build_readq_entry_mac(control, stcb, asoc->context, net, tsn,
 		    protocol_id,
-		    stcb->asoc.context,
 		    strmno, strmseq,
 		    chunk_flags,
 		    dmbuf);
@@ -1791,7 +1624,6 @@
 				asoc->highest_tsn_inside_nr_map = tsn;
 			}
 			SCTP_STAT_INCR(sctps_recvexpressm);
-			control->sinfo_tsn = tsn;
 			asoc->tsn_last_delivered = tsn;
 			asoc->fragment_flags = chunk_flags;
 			asoc->tsn_of_pdapi_last_delivered = tsn;
@@ -1858,7 +1690,6 @@
 		sctp_alloc_a_readq(stcb, control);
 		sctp_build_readq_entry_mac(control, stcb, asoc->context, net, tsn,
 		    protocol_id,
-		    stcb->asoc.context,
 		    strmno, strmseq,
 		    chunk_flags,
 		    dmbuf);
@@ -1898,29 +1729,15 @@
 					control->whoFrom = NULL;
 				}
 				sctp_free_a_readq(stcb, control);
-				oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
-				    0, M_DONTWAIT, 1, MT_DATA);
-				if (oper) {
-					struct sctp_paramhdr *ph;
-					uint32_t *ippp;
-
-					SCTP_BUF_LEN(oper) =
-					    sizeof(struct sctp_paramhdr) +
-					    (3 * sizeof(uint32_t));
-					ph = mtod(oper, struct sctp_paramhdr *);
-					ph->param_type =
-					    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
-					ph->param_length = htons(SCTP_BUF_LEN(oper));
-					ippp = (uint32_t *) (ph + 1);
-					*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_15);
-					ippp++;
-					*ippp = tsn;
-					ippp++;
-					*ippp = ((strmno << 16) | strmseq);
+				snprintf(msg, sizeof(msg), "Reas. queue emtpy, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+				    tsn, strmno, strmseq);
+				op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+				stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_17;
+				sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
+				*abort_flag = 1;
+				if (last_chunk) {
+					*m = NULL;
 				}
-				stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_15;
-				sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
-				*abort_flag = 1;
 				return (0);
 			} else {
 				if (sctp_does_tsn_belong_to_reasm(asoc, control->sinfo_tsn)) {
@@ -1931,32 +1748,15 @@
 						control->whoFrom = NULL;
 					}
 					sctp_free_a_readq(stcb, control);
-
-					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
-					    0, M_DONTWAIT, 1, MT_DATA);
-					if (oper) {
-						struct sctp_paramhdr *ph;
-						uint32_t *ippp;
-
-						SCTP_BUF_LEN(oper) =
-						    sizeof(struct sctp_paramhdr) +
-						    (3 * sizeof(uint32_t));
-						ph = mtod(oper,
-						    struct sctp_paramhdr *);
-						ph->param_type =
-						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
-						ph->param_length =
-						    htons(SCTP_BUF_LEN(oper));
-						ippp = (uint32_t *) (ph + 1);
-						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_16);
-						ippp++;
-						*ippp = tsn;
-						ippp++;
-						*ippp = ((strmno << 16) | strmseq);
+					snprintf(msg, sizeof(msg), "PD ongoing, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+					    tsn, strmno, strmseq);
+					op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_18;
+					sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
+					*abort_flag = 1;
+					if (last_chunk) {
+						*m = NULL;
 					}
-					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_16;
-					sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
-					*abort_flag = 1;
 					return (0);
 				}
 			}
@@ -1977,31 +1777,15 @@
 						control->whoFrom = NULL;
 					}
 					sctp_free_a_readq(stcb, control);
-					oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
-					    0, M_DONTWAIT, 1, MT_DATA);
-					if (oper) {
-						struct sctp_paramhdr *ph;
-						uint32_t *ippp;
-
-						SCTP_BUF_LEN(oper) =
-						    sizeof(struct sctp_paramhdr) +
-						    (3 * sizeof(uint32_t));
-						ph = mtod(oper,
-						    struct sctp_paramhdr *);
-						ph->param_type =
-						    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
-						ph->param_length =
-						    htons(SCTP_BUF_LEN(oper));
-						ippp = (uint32_t *) (ph + 1);
-						*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_17);
-						ippp++;
-						*ippp = tsn;
-						ippp++;
-						*ippp = ((strmno << 16) | strmseq);
+					snprintf(msg, sizeof(msg), "No PD ongoing, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+					    tsn, strmno, strmseq);
+					op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_19;
+					sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
+					*abort_flag = 1;
+					if (last_chunk) {
+						*m = NULL;
 					}
-					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_17;
-					sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
-					*abort_flag = 1;
 					return (0);
 				}
 			}
@@ -2065,6 +1849,9 @@
 			} else {
 				sctp_queue_data_to_stream(stcb, asoc, control, abort_flag);
 				if (*abort_flag) {
+					if (last_chunk) {
+						*m = NULL;
+					}
 					return (0);
 				}
 			}
@@ -2077,7 +1864,9 @@
 			 * the assoc is now gone and chk was put onto the
 			 * reasm queue, which has all been freed.
 			 */
-			*m = NULL;
+			if (last_chunk) {
+				*m = NULL;
+			}
 			return (0);
 		}
 	}
@@ -2114,8 +1903,9 @@
 		 */
 		struct sctp_queued_to_read *ctl, *nctl;
 
-		sctp_reset_in_stream(stcb, liste->number_entries, liste->req.list_of_streams);
+		sctp_reset_in_stream(stcb, liste->number_entries, liste->list_of_streams);
 		TAILQ_REMOVE(&asoc->resetHead, liste, next_resp);
+		sctp_send_deferred_reset_response(stcb, liste, SCTP_STREAM_RESET_RESULT_PERFORMED);
 		SCTP_FREE(liste, SCTP_M_STRESET);
 		/* sa_ignore FREED_MEMORY */
 		liste = TAILQ_FIRST(&asoc->resetHead);
@@ -2389,7 +2179,8 @@
 		 */
 		if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
 			sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
-			    stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_INDATA + SCTP_LOC_18);
+			    stcb->sctp_ep, stcb, NULL,
+			    SCTP_FROM_SCTP_INDATA + SCTP_LOC_20);
 		}
 		sctp_send_shutdown(stcb,
 		    ((stcb->asoc.alternate) ? stcb->asoc.alternate : stcb->asoc.primary_destination));
@@ -2495,7 +2286,7 @@
 		 * delivery queue and something can be delivered.
 		 */
 		if (stcb->sctp_socket) {
-			pd_point = min(SCTP_SB_LIMIT_RCV(stcb->sctp_socket),
+			pd_point = min(SCTP_SB_LIMIT_RCV(stcb->sctp_socket) >> SCTP_PARTIAL_DELIVERY_SHIFT,
 			    stcb->sctp_ep->partial_delivery_point);
 		} else {
 			pd_point = stcb->sctp_ep->partial_delivery_point;
@@ -2517,10 +2308,8 @@
 
 int
 sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length,
-    struct sctphdr *sh, struct sctp_inpcb *inp,
-    struct sctp_tcb *stcb, struct sctp_nets *net, uint32_t * high_tsn,
-    uint8_t use_mflowid, uint32_t mflowid,
-    uint32_t vrf_id, uint16_t port)
+    struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    struct sctp_nets *net, uint32_t * high_tsn)
 {
 	struct sctp_data_chunk *ch, chunk_buf;
 	struct sctp_association *asoc;
@@ -2560,7 +2349,7 @@
 	 */
 	if (SCTP_BUF_LEN(m) < (long)MLEN && SCTP_BUF_NEXT(m) == NULL) {
 		/* we only handle mbufs that are singletons.. not chains */
-		m = sctp_get_mbuf_for_msg(SCTP_BUF_LEN(m), 0, M_DONTWAIT, 1, MT_DATA);
+		m = sctp_get_mbuf_for_msg(SCTP_BUF_LEN(m), 0, M_NOWAIT, 1, MT_DATA);
 		if (m) {
 			/* ok lets see if we can copy the data up */
 			caddr_t *from, *to;
@@ -2600,37 +2389,31 @@
 			continue;
 		}
 		if (ch->ch.chunk_type == SCTP_DATA) {
-			if ((size_t)chk_length < sizeof(struct sctp_data_chunk) + 1) {
+			if ((size_t)chk_length < sizeof(struct sctp_data_chunk)) {
 				/*
 				 * Need to send an abort since we had a
 				 * invalid data chunk.
 				 */
 				struct mbuf *op_err;
+				char msg[SCTP_DIAG_INFO_LEN];
 
-				op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 2 * sizeof(uint32_t)),
-				    0, M_DONTWAIT, 1, MT_DATA);
+				snprintf(msg, sizeof(msg), "DATA chunk of length %d",
+				    chk_length);
+				op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+				stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_21;
+				sctp_abort_an_association(inp, stcb, op_err, SCTP_SO_NOT_LOCKED);
+				return (2);
+			}
+			if ((size_t)chk_length == sizeof(struct sctp_data_chunk)) {
+				/*
+				 * Need to send an abort since we had an
+				 * empty data chunk.
+				 */
+				struct mbuf *op_err;
 
-				if (op_err) {
-					struct sctp_paramhdr *ph;
-					uint32_t *ippp;
-
-					SCTP_BUF_LEN(op_err) = sizeof(struct sctp_paramhdr) +
-					    (2 * sizeof(uint32_t));
-					ph = mtod(op_err, struct sctp_paramhdr *);
-					ph->param_type =
-					    htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
-					ph->param_length = htons(SCTP_BUF_LEN(op_err));
-					ippp = (uint32_t *) (ph + 1);
-					*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_19);
-					ippp++;
-					*ippp = asoc->cumulative_tsn;
-
-				}
-				stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_19;
-				sctp_abort_association(inp, stcb, m, iphlen, sh,
-				    op_err,
-				    use_mflowid, mflowid,
-				    vrf_id, port);
+				op_err = sctp_generate_no_user_data_cause(ch->dp.tsn);
+				stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_22;
+				sctp_abort_an_association(inp, stcb, op_err, SCTP_SO_NOT_LOCKED);
 				return (2);
 			}
 #ifdef SCTP_AUDITING_ENABLED
@@ -2692,13 +2475,12 @@
 				 */
 				if (SCTP_BASE_SYSCTL(sctp_strict_data_order)) {
 					struct mbuf *op_err;
+					char msg[SCTP_DIAG_INFO_LEN];
 
-					op_err = sctp_generate_invmanparam(SCTP_CAUSE_PROTOCOL_VIOLATION);
-					sctp_abort_association(inp, stcb,
-					    m, iphlen,
-					    sh, op_err,
-					    use_mflowid, mflowid,
-					    vrf_id, port);
+					snprintf(msg, sizeof(msg), "DATA chunk followed by chunk of type %2.2x",
+					    ch->ch.chunk_type);
+					op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+					sctp_abort_an_association(inp, stcb, op_err, SCTP_SO_NOT_LOCKED);
 					return (2);
 				}
 				break;
@@ -2706,34 +2488,21 @@
 				/* unknown chunk type, use bit rules */
 				if (ch->ch.chunk_type & 0x40) {
 					/* Add a error report to the queue */
-					struct mbuf *merr;
-					struct sctp_paramhdr *phd;
+					struct mbuf *op_err;
+					struct sctp_gen_error_cause *cause;
 
-					merr = sctp_get_mbuf_for_msg(sizeof(*phd), 0, M_DONTWAIT, 1, MT_DATA);
-					if (merr) {
-						phd = mtod(merr, struct sctp_paramhdr *);
-						/*
-						 * We cheat and use param
-						 * type since we did not
-						 * bother to define a error
-						 * cause struct. They are
-						 * the same basic format
-						 * with different names.
-						 */
-						phd->param_type =
-						    htons(SCTP_CAUSE_UNRECOG_CHUNK);
-						phd->param_length =
-						    htons(chk_length + sizeof(*phd));
-						SCTP_BUF_LEN(merr) = sizeof(*phd);
-						SCTP_BUF_NEXT(merr) = SCTP_M_COPYM(m, *offset, chk_length, M_DONTWAIT);
-						if (SCTP_BUF_NEXT(merr)) {
-							if (sctp_pad_lastmbuf(SCTP_BUF_NEXT(merr), SCTP_SIZE32(chk_length) - chk_length, NULL)) {
-								sctp_m_freem(merr);
-							} else {
-								sctp_queue_op_err(stcb, merr);
-							}
+					op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_gen_error_cause),
+					    0, M_NOWAIT, 1, MT_DATA);
+					if (op_err != NULL) {
+						cause = mtod(op_err, struct sctp_gen_error_cause *);
+						cause->code = htons(SCTP_CAUSE_UNRECOG_CHUNK);
+						cause->length = htons(chk_length + sizeof(struct sctp_gen_error_cause));
+						SCTP_BUF_LEN(op_err) = sizeof(struct sctp_gen_error_cause);
+						SCTP_BUF_NEXT(op_err) = SCTP_M_COPYM(m, *offset, chk_length, M_NOWAIT);
+						if (SCTP_BUF_NEXT(op_err) != NULL) {
+							sctp_queue_op_err(stcb, op_err);
 						} else {
-							sctp_m_freem(merr);
+							sctp_m_freem(op_err);
 						}
 					}
 				}
@@ -2763,7 +2532,7 @@
 		/*
 		 * we need to report rwnd overrun drops.
 		 */
-		sctp_send_packet_dropped(stcb, net, *mm, iphlen, 0);
+		sctp_send_packet_dropped(stcb, net, *mm, length, iphlen, 0);
 	}
 	if (num_chunks) {
 		/*
@@ -2825,12 +2594,14 @@
 			 * cumack trackers for first transmissions,
 			 * and retransmissions.
 			 */
-			if ((tp1->whoTo->find_pseudo_cumack == 1) && (tp1->sent < SCTP_DATAGRAM_RESEND) &&
+			if ((tp1->sent < SCTP_DATAGRAM_RESEND) &&
+			    (tp1->whoTo->find_pseudo_cumack == 1) &&
 			    (tp1->snd_count == 1)) {
 				tp1->whoTo->pseudo_cumack = tp1->rec.data.TSN_seq;
 				tp1->whoTo->find_pseudo_cumack = 0;
 			}
-			if ((tp1->whoTo->find_rtx_pseudo_cumack == 1) && (tp1->sent < SCTP_DATAGRAM_RESEND) &&
+			if ((tp1->sent < SCTP_DATAGRAM_RESEND) &&
+			    (tp1->whoTo->find_rtx_pseudo_cumack == 1) &&
 			    (tp1->snd_count > 1)) {
 				tp1->whoTo->rtx_pseudo_cumack = tp1->rec.data.TSN_seq;
 				tp1->whoTo->find_rtx_pseudo_cumack = 0;
@@ -2974,9 +2745,10 @@
 					 * All chunks NOT UNSENT fall through here and are marked
 					 * (leave PR-SCTP ones that are to skip alone though)
 					 */
-					if (tp1->sent != SCTP_FORWARD_TSN_SKIP)
+					if ((tp1->sent != SCTP_FORWARD_TSN_SKIP) &&
+					    (tp1->sent != SCTP_DATAGRAM_NR_ACKED)) {
 						tp1->sent = SCTP_DATAGRAM_MARKED;
-
+					}
 					if (tp1->rec.data.chunk_was_revoked) {
 						/* deflate the cwnd */
 						tp1->whoTo->cwnd -= tp1->book_size;
@@ -2983,7 +2755,21 @@
 						tp1->rec.data.chunk_was_revoked = 0;
 					}
 					/* NR Sack code here */
-					if (nr_sacking) {
+					if (nr_sacking &&
+					    (tp1->sent != SCTP_DATAGRAM_NR_ACKED)) {
+						if (stcb->asoc.strmout[tp1->rec.data.stream_number].chunks_on_queues > 0) {
+							stcb->asoc.strmout[tp1->rec.data.stream_number].chunks_on_queues--;
+#ifdef INVARIANTS
+						} else {
+							panic("No chunks on the queues for sid %u.", tp1->rec.data.stream_number);
+#endif
+						}
+						if ((stcb->asoc.strmout[tp1->rec.data.stream_number].chunks_on_queues == 0) &&
+						    (stcb->asoc.strmout[tp1->rec.data.stream_number].state == SCTP_STREAM_RESET_PENDING) &&
+						    TAILQ_EMPTY(&stcb->asoc.strmout[tp1->rec.data.stream_number].outqueue)) {
+							stcb->asoc.trigger_reset = 1;
+						}
+						tp1->sent = SCTP_DATAGRAM_NR_ACKED;
 						if (tp1->data) {
 							/*
 							 * sa_ignore
@@ -3088,7 +2874,6 @@
     uint32_t biggest_tsn_acked)
 {
 	struct sctp_tmit_chunk *tp1;
-	int tot_revoked = 0;
 
 	TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) {
 		if (SCTP_TSN_GT(tp1->rec.data.TSN_seq, cumack)) {
@@ -3123,7 +2908,6 @@
 				 * artificial inflation of the flight_size.
 				 */
 				tp1->whoTo->cwnd += tp1->book_size;
-				tot_revoked++;
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SACK_LOGGING_ENABLE) {
 					sctp_log_sack(asoc->last_acked_seq,
 					    cumack,
@@ -3174,7 +2958,7 @@
 				num_dests_sacked++;
 		}
 	}
-	if (stcb->asoc.peer_supports_prsctp) {
+	if (stcb->asoc.prsctp_supported) {
 		(void)SCTP_GETTIME_TIMEVAL(&now);
 	}
 	TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) {
@@ -3195,7 +2979,7 @@
 			/* done */
 			break;
 		}
-		if (stcb->asoc.peer_supports_prsctp) {
+		if (stcb->asoc.prsctp_supported) {
 			if ((PR_SCTP_TTL_ENABLED(tp1->flags)) && tp1->sent < SCTP_DATAGRAM_ACKED) {
 				/* Is it expired? */
 				if (timevalcmp(&now, &tp1->rec.data.timetodrop, >)) {
@@ -3449,7 +3233,7 @@
 			/* remove from the total flight */
 			sctp_total_flight_decrease(stcb, tp1);
 
-			if ((stcb->asoc.peer_supports_prsctp) &&
+			if ((stcb->asoc.prsctp_supported) &&
 			    (PR_SCTP_RTX_ENABLED(tp1->flags))) {
 				/*
 				 * Has it been retransmitted tv_sec times? -
@@ -3594,17 +3378,19 @@
 	struct timeval now;
 	int now_filled = 0;
 
-	if (asoc->peer_supports_prsctp == 0) {
+	if (asoc->prsctp_supported == 0) {
 		return (NULL);
 	}
 	TAILQ_FOREACH_SAFE(tp1, &asoc->sent_queue, sctp_next, tp2) {
 		if (tp1->sent != SCTP_FORWARD_TSN_SKIP &&
-		    tp1->sent != SCTP_DATAGRAM_RESEND) {
+		    tp1->sent != SCTP_DATAGRAM_RESEND &&
+		    tp1->sent != SCTP_DATAGRAM_NR_ACKED) {
 			/* no chance to advance, out of here */
 			break;
 		}
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_TRY_ADVANCE) {
-			if (tp1->sent == SCTP_FORWARD_TSN_SKIP) {
+			if ((tp1->sent == SCTP_FORWARD_TSN_SKIP) ||
+			    (tp1->sent == SCTP_DATAGRAM_NR_ACKED)) {
 				sctp_misc_ints(SCTP_FWD_TSN_CHECK,
 				    asoc->advanced_peer_ack_point,
 				    tp1->rec.data.TSN_seq, 0, 0);
@@ -3652,7 +3438,8 @@
 		 * the chunk, advance our peer ack point and we can check
 		 * the next chunk.
 		 */
-		if (tp1->sent == SCTP_FORWARD_TSN_SKIP) {
+		if ((tp1->sent == SCTP_FORWARD_TSN_SKIP) ||
+		    (tp1->sent == SCTP_DATAGRAM_NR_ACKED)) {
 			/* advance PeerAckPoint goes forward */
 			if (SCTP_TSN_GT(tp1->rec.data.TSN_seq, asoc->advanced_peer_ack_point)) {
 				asoc->advanced_peer_ack_point = tp1->rec.data.TSN_seq;
@@ -3677,12 +3464,18 @@
 {
 	struct sctp_tmit_chunk *chk;
 	int inflight = 0, resend = 0, inbetween = 0, acked = 0, above = 0;
-	int entry_flight, entry_cnt, ret;
+	int ret;
 
+#ifndef INVARIANTS
+	int entry_flight, entry_cnt;
+
+#endif
+
+	ret = 0;
+#ifndef INVARIANTS
 	entry_flight = asoc->total_flight;
 	entry_cnt = asoc->total_flight_count;
-	ret = 0;
-
+#endif
 	if (asoc->pr_sctp_cnt >= asoc->sent_queue_cnt)
 		return (0);
 
@@ -3729,7 +3522,7 @@
 	if ((tp1->sent >= SCTP_DATAGRAM_ACKED) || (tp1->data == NULL)) {
 		/* TSN's skipped we do NOT move back. */
 		sctp_misc_ints(SCTP_FLIGHT_LOG_DWN_WP_FWD,
-		    tp1->whoTo->flight_size,
+		    tp1->whoTo ? tp1->whoTo->flight_size : 0,
 		    tp1->book_size,
 		    (uintptr_t) tp1->whoTo,
 		    tp1->rec.data.TSN_seq);
@@ -3829,34 +3622,17 @@
 			send_s = asoc->sending_seq;
 		}
 		if (SCTP_TSN_GE(cumack, send_s)) {
-#ifndef INVARIANTS
-			struct mbuf *oper;
+			struct mbuf *op_err;
+			char msg[SCTP_DIAG_INFO_LEN];
 
-#endif
-#ifdef INVARIANTS
-			panic("Impossible sack 1");
-#else
-
 			*abort_now = 1;
 			/* XXX */
-			oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
-			    0, M_DONTWAIT, 1, MT_DATA);
-			if (oper) {
-				struct sctp_paramhdr *ph;
-				uint32_t *ippp;
-
-				SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
-				    sizeof(uint32_t);
-				ph = mtod(oper, struct sctp_paramhdr *);
-				ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
-				ph->param_length = htons(SCTP_BUF_LEN(oper));
-				ippp = (uint32_t *) (ph + 1);
-				*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_25);
-			}
-			stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_25;
-			sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+			snprintf(msg, sizeof(msg), "Cum ack %8.8x greater or equal than TSN %8.8x",
+			    cumack, send_s);
+			op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+			stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_23;
+			sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
 			return;
-#endif
 		}
 	}
 	asoc->this_sack_highest_gap = cumack;
@@ -3957,7 +3733,20 @@
 					tp1->whoTo->cwnd -= tp1->book_size;
 					tp1->rec.data.chunk_was_revoked = 0;
 				}
-				tp1->sent = SCTP_DATAGRAM_ACKED;
+				if (tp1->sent != SCTP_DATAGRAM_NR_ACKED) {
+					if (asoc->strmout[tp1->rec.data.stream_number].chunks_on_queues > 0) {
+						asoc->strmout[tp1->rec.data.stream_number].chunks_on_queues--;
+#ifdef INVARIANTS
+					} else {
+						panic("No chunks on the queues for sid %u.", tp1->rec.data.stream_number);
+#endif
+					}
+				}
+				if ((asoc->strmout[tp1->rec.data.stream_number].chunks_on_queues == 0) &&
+				    (asoc->strmout[tp1->rec.data.stream_number].state == SCTP_STREAM_RESET_PENDING) &&
+				    TAILQ_EMPTY(&asoc->strmout[tp1->rec.data.stream_number].outqueue)) {
+					asoc->trigger_reset = 1;
+				}
 				TAILQ_REMOVE(&asoc->sent_queue, tp1, sctp_next);
 				if (tp1->data) {
 					/* sa_ignore NO_NULL_CHK */
@@ -3983,7 +3772,7 @@
 	}
 	/* sa_ignore NO_NULL_CHK */
 	if (stcb->sctp_socket) {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		struct socket *so;
 
 #endif
@@ -3992,7 +3781,7 @@
 			/* sa_ignore NO_NULL_CHK */
 			sctp_wakeup_log(stcb, 1, SCTP_WAKESND_FROM_SACK);
 		}
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		so = SCTP_INP_SO(stcb->sctp_ep);
 		atomic_add_int(&stcb->asoc.refcnt, 1);
 		SCTP_TCB_UNLOCK(stcb);
@@ -4006,7 +3795,7 @@
 		}
 #endif
 		sctp_sowwakeup_locked(stcb->sctp_ep, stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 	} else {
@@ -4042,7 +3831,9 @@
 				}
 				if (net->dest_state & SCTP_ADDR_PF) {
 					net->dest_state &= ~SCTP_ADDR_PF;
-					sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_3);
+					sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
+					    stcb->sctp_ep, stcb, net,
+					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_24);
 					sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net);
 					asoc->cc_functions.sctp_cwnd_update_exit_pf(stcb, net);
 					/* Done with this net */
@@ -4128,7 +3919,7 @@
 			} else if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
 				sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
 				    stcb, net,
-				    SCTP_FROM_SCTP_INDATA + SCTP_LOC_22);
+				    SCTP_FROM_SCTP_INDATA + SCTP_LOC_25);
 			}
 		}
 	}
@@ -4196,27 +3987,15 @@
 		    (asoc->stream_queue_cnt == 0)) {
 			if (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT) {
 				/* Need to abort here */
-				struct mbuf *oper;
+				struct mbuf *op_err;
 
 		abort_out_now:
 				*abort_now = 1;
 				/* XXX */
-				oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
-				    0, M_DONTWAIT, 1, MT_DATA);
-				if (oper) {
-					struct sctp_paramhdr *ph;
-					uint32_t *ippp;
-
-					SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
-					    sizeof(uint32_t);
-					ph = mtod(oper, struct sctp_paramhdr *);
-					ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
-					ph->param_length = htons(SCTP_BUF_LEN(oper));
-					ippp = (uint32_t *) (ph + 1);
-					*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_24);
-				}
-				stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_24;
-				sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+				op_err = sctp_generate_cause(SCTP_CAUSE_USER_INITIATED_ABT, "");
+				stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_26;
+				sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
+				return;
 			} else {
 				struct sctp_nets *netp;
 
@@ -4242,11 +4021,6 @@
 		    (asoc->stream_queue_cnt == 0)) {
 			struct sctp_nets *netp;
 
-			if (asoc->alternate) {
-				netp = asoc->alternate;
-			} else {
-				netp = asoc->primary_destination;
-			}
 			if (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT) {
 				goto abort_out_now;
 			}
@@ -4253,8 +4027,13 @@
 			SCTP_STAT_DECR_GAUGE32(sctps_currestab);
 			SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT);
 			SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+			sctp_stop_timers_for_shutdown(stcb);
+			if (asoc->alternate) {
+				netp = asoc->alternate;
+			} else {
+				netp = asoc->primary_destination;
+			}
 			sctp_send_shutdown_ack(stcb, netp);
-			sctp_stop_timers_for_shutdown(stcb);
 			sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK,
 			    stcb->sctp_ep, stcb, netp);
 		}
@@ -4268,7 +4047,7 @@
 		asoc->advanced_peer_ack_point = cumack;
 	}
 	/* PR-Sctp issues need to be addressed too */
-	if ((asoc->peer_supports_prsctp) && (asoc->pr_sctp_cnt > 0)) {
+	if ((asoc->prsctp_supported) && (asoc->pr_sctp_cnt > 0)) {
 		struct sctp_tmit_chunk *lchk;
 		uint32_t old_adv_peer_ack_point;
 
@@ -4409,7 +4188,8 @@
 			send_s = asoc->sending_seq;
 		}
 		if (SCTP_TSN_GE(cum_ack, send_s)) {
-			struct mbuf *oper;
+			struct mbuf *op_err;
+			char msg[SCTP_DIAG_INFO_LEN];
 
 			/*
 			 * no way, we have not even sent this TSN out yet.
@@ -4419,27 +4199,16 @@
 			    cum_ack, send_s);
 			if (tp1) {
 				SCTP_PRINTF("Got send_s from tsn:%x + 1 of tp1:%p\n",
-				    tp1->rec.data.TSN_seq, tp1);
+				    tp1->rec.data.TSN_seq, (void *)tp1);
 			}
 	hopeless_peer:
 			*abort_now = 1;
 			/* XXX */
-			oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
-			    0, M_DONTWAIT, 1, MT_DATA);
-			if (oper) {
-				struct sctp_paramhdr *ph;
-				uint32_t *ippp;
-
-				SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
-				    sizeof(uint32_t);
-				ph = mtod(oper, struct sctp_paramhdr *);
-				ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
-				ph->param_length = htons(SCTP_BUF_LEN(oper));
-				ippp = (uint32_t *) (ph + 1);
-				*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_25);
-			}
-			stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_25;
-			sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+			snprintf(msg, sizeof(msg), "Cum ack %8.8x greater or equal than TSN %8.8x",
+			    cum_ack, send_s);
+			op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+			stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_27;
+			sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
 			return;
 		}
 	}
@@ -4470,7 +4239,7 @@
 		/* stop any timers */
 		TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 			sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
-			    stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_26);
+			    stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_28);
 			net->partial_bytes_acked = 0;
 			net->flight_size = 0;
 		}
@@ -4616,7 +4385,9 @@
 					tp1->whoTo->cwnd -= tp1->book_size;
 					tp1->rec.data.chunk_was_revoked = 0;
 				}
-				tp1->sent = SCTP_DATAGRAM_ACKED;
+				if (tp1->sent != SCTP_DATAGRAM_NR_ACKED) {
+					tp1->sent = SCTP_DATAGRAM_ACKED;
+				}
 			}
 		} else {
 			break;
@@ -4673,7 +4444,7 @@
 			if (net->new_pseudo_cumack)
 				sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
 				    stcb, net,
-				    SCTP_FROM_SCTP_INDATA + SCTP_LOC_27);
+				    SCTP_FROM_SCTP_INDATA + SCTP_LOC_29);
 
 		}
 	} else {
@@ -4680,7 +4451,7 @@
 		if (accum_moved) {
 			TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 				sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
-				    stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_28);
+				    stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_30);
 			}
 		}
 	}
@@ -4693,13 +4464,22 @@
 		if (SCTP_TSN_GT(tp1->rec.data.TSN_seq, cum_ack)) {
 			break;
 		}
-		if (tp1->sent == SCTP_DATAGRAM_UNSENT) {
-			/* no more sent on list */
-			SCTP_PRINTF("Warning, tp1->sent == %d and its now acked?\n",
-			    tp1->sent);
+		if (tp1->sent != SCTP_DATAGRAM_NR_ACKED) {
+			if (asoc->strmout[tp1->rec.data.stream_number].chunks_on_queues > 0) {
+				asoc->strmout[tp1->rec.data.stream_number].chunks_on_queues--;
+#ifdef INVARIANTS
+			} else {
+				panic("No chunks on the queues for sid %u.", tp1->rec.data.stream_number);
+#endif
+			}
 		}
+		if ((asoc->strmout[tp1->rec.data.stream_number].chunks_on_queues == 0) &&
+		    (asoc->strmout[tp1->rec.data.stream_number].state == SCTP_STREAM_RESET_PENDING) &&
+		    TAILQ_EMPTY(&asoc->strmout[tp1->rec.data.stream_number].outqueue)) {
+			asoc->trigger_reset = 1;
+		}
 		TAILQ_REMOVE(&asoc->sent_queue, tp1, sctp_next);
-		if (tp1->pr_sctp_on) {
+		if (PR_SCTP_ENABLED(tp1->flags)) {
 			if (asoc->pr_sctp_cnt != 0)
 				asoc->pr_sctp_cnt--;
 		}
@@ -4709,7 +4489,7 @@
 			sctp_free_bufspace(stcb, asoc, tp1, 1);
 			sctp_m_freem(tp1->data);
 			tp1->data = NULL;
-			if (asoc->peer_supports_prsctp && PR_SCTP_BUF_ENABLED(tp1->flags)) {
+			if (asoc->prsctp_supported && PR_SCTP_BUF_ENABLED(tp1->flags)) {
 				asoc->sent_queue_cnt_removeable--;
 			}
 		}
@@ -4735,7 +4515,7 @@
 	}
 	/* sa_ignore NO_NULL_CHK */
 	if ((wake_him) && (stcb->sctp_socket)) {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		struct socket *so;
 
 #endif
@@ -4743,7 +4523,7 @@
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_WAKE_LOGGING_ENABLE) {
 			sctp_wakeup_log(stcb, wake_him, SCTP_WAKESND_FROM_SACK);
 		}
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		so = SCTP_INP_SO(stcb->sctp_ep);
 		atomic_add_int(&stcb->asoc.refcnt, 1);
 		SCTP_TCB_UNLOCK(stcb);
@@ -4757,7 +4537,7 @@
 		}
 #endif
 		sctp_sowwakeup_locked(stcb->sctp_ep, stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 	} else {
@@ -4849,7 +4629,9 @@
 				}
 				if (net->dest_state & SCTP_ADDR_PF) {
 					net->dest_state &= ~SCTP_ADDR_PF;
-					sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_3);
+					sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
+					    stcb->sctp_ep, stcb, net,
+					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_31);
 					sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net);
 					asoc->cc_functions.sctp_cwnd_update_exit_pf(stcb, net);
 					/* Done with this net */
@@ -4872,7 +4654,8 @@
 		TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 			/* stop all timers */
 			sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
-			    stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_30);
+			    stcb, net,
+			    SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
 			net->flight_size = 0;
 			net->partial_bytes_acked = 0;
 		}
@@ -4922,36 +4705,18 @@
 		    (asoc->stream_queue_cnt == 0)) {
 			if (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT) {
 				/* Need to abort here */
-				struct mbuf *oper;
+				struct mbuf *op_err;
 
 		abort_out_now:
 				*abort_now = 1;
 				/* XXX */
-				oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
-				    0, M_DONTWAIT, 1, MT_DATA);
-				if (oper) {
-					struct sctp_paramhdr *ph;
-					uint32_t *ippp;
-
-					SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
-					    sizeof(uint32_t);
-					ph = mtod(oper, struct sctp_paramhdr *);
-					ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
-					ph->param_length = htons(SCTP_BUF_LEN(oper));
-					ippp = (uint32_t *) (ph + 1);
-					*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_31);
-				}
-				stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_31;
-				sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+				op_err = sctp_generate_cause(SCTP_CAUSE_USER_INITIATED_ABT, "");
+				stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_33;
+				sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
 				return;
 			} else {
 				struct sctp_nets *netp;
 
-				if (asoc->alternate) {
-					netp = asoc->alternate;
-				} else {
-					netp = asoc->primary_destination;
-				}
 				if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
 				    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
 					SCTP_STAT_DECR_GAUGE32(sctps_currestab);
@@ -4959,6 +4724,11 @@
 				SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
 				SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
 				sctp_stop_timers_for_shutdown(stcb);
+				if (asoc->alternate) {
+					netp = asoc->alternate;
+				} else {
+					netp = asoc->primary_destination;
+				}
 				sctp_send_shutdown(stcb, netp);
 				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
 				    stcb->sctp_ep, stcb, netp);
@@ -4970,11 +4740,6 @@
 		    (asoc->stream_queue_cnt == 0)) {
 			struct sctp_nets *netp;
 
-			if (asoc->alternate) {
-				netp = asoc->alternate;
-			} else {
-				netp = asoc->primary_destination;
-			}
 			if (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT) {
 				goto abort_out_now;
 			}
@@ -4981,8 +4746,13 @@
 			SCTP_STAT_DECR_GAUGE32(sctps_currestab);
 			SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT);
 			SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+			sctp_stop_timers_for_shutdown(stcb);
+			if (asoc->alternate) {
+				netp = asoc->alternate;
+			} else {
+				netp = asoc->primary_destination;
+			}
 			sctp_send_shutdown_ack(stcb, netp);
-			sctp_stop_timers_for_shutdown(stcb);
 			sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK,
 			    stcb->sctp_ep, stcb, netp);
 			return;
@@ -5093,7 +4863,7 @@
 			} else if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
 				sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
 				    stcb, net,
-				    SCTP_FROM_SCTP_INDATA + SCTP_LOC_22);
+				    SCTP_FROM_SCTP_INDATA + SCTP_LOC_34);
 			}
 		}
 	}
@@ -5134,7 +4904,7 @@
 		asoc->advanced_peer_ack_point = cum_ack;
 	}
 	/* C2. try to further move advancedPeerAckPoint ahead */
-	if ((asoc->peer_supports_prsctp) && (asoc->pr_sctp_cnt > 0)) {
+	if ((asoc->prsctp_supported) && (asoc->pr_sctp_cnt > 0)) {
 		struct sctp_tmit_chunk *lchk;
 		uint32_t old_adv_peer_ack_point;
 
@@ -5371,7 +5141,8 @@
 	asoc->cumulative_tsn = new_cum_tsn;
 	if (gap >= m_size) {
 		if ((long)gap > sctp_sbspace(&stcb->asoc, &stcb->sctp_socket->so_rcv)) {
-			struct mbuf *oper;
+			struct mbuf *op_err;
+			char msg[SCTP_DIAG_INFO_LEN];
 
 			/*
 			 * out of range (of single byte chunks in the rwnd I
@@ -5378,26 +5149,12 @@
 			 * give out). This must be an attacker.
 			 */
 			*abort_flag = 1;
-			oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
-			    0, M_DONTWAIT, 1, MT_DATA);
-			if (oper) {
-				struct sctp_paramhdr *ph;
-				uint32_t *ippp;
-
-				SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
-				    (sizeof(uint32_t) * 3);
-				ph = mtod(oper, struct sctp_paramhdr *);
-				ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
-				ph->param_length = htons(SCTP_BUF_LEN(oper));
-				ippp = (uint32_t *) (ph + 1);
-				*ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_33);
-				ippp++;
-				*ippp = asoc->highest_tsn_inside_map;
-				ippp++;
-				*ippp = new_cum_tsn;
-			}
-			stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_33;
-			sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+			snprintf(msg, sizeof(msg),
+			    "New cum ack %8.8x too high, highest TSN %8.8x",
+			    new_cum_tsn, asoc->highest_tsn_inside_map);
+			op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+			stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_35;
+			sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
 			return;
 		}
 		SCTP_STAT_INCR(sctps_fwdtsn_map_over);

Modified: trunk/sys/netinet/sctp_indata.h
===================================================================
--- trunk/sys/netinet/sctp_indata.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_indata.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_indata.h 238253 2012-07-08 16:14:42Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_indata.h 294147 2016-01-16 12:47:28Z tuexen $");
 
 #ifndef _NETINET_SCTP_INDATA_H_
 #define _NETINET_SCTP_INDATA_H_
@@ -48,7 +48,7 @@
     struct mbuf *dm);
 
 
-#define sctp_build_readq_entry_mac(_ctl, in_it, a, net, tsn, ppid, context, stream_no, stream_seq, flags, dm) do { \
+#define sctp_build_readq_entry_mac(_ctl, in_it, context, net, tsn, ppid, stream_no, stream_seq, flags, dm) do { \
 	if (_ctl) { \
 		atomic_add_int(&((net)->ref_count), 1); \
 		(_ctl)->sinfo_stream = stream_no; \
@@ -55,7 +55,7 @@
 		(_ctl)->sinfo_ssn = stream_seq; \
 		(_ctl)->sinfo_flags = (flags << 8); \
 		(_ctl)->sinfo_ppid = ppid; \
-		(_ctl)->sinfo_context = a; \
+		(_ctl)->sinfo_context = context; \
 		(_ctl)->sinfo_timetolive = 0; \
 		(_ctl)->sinfo_tsn = tsn; \
 		(_ctl)->sinfo_cumtsn = tsn; \
@@ -112,11 +112,9 @@
      sctp_update_acked(struct sctp_tcb *, struct sctp_shutdown_chunk *, int *);
 
 int
-sctp_process_data(struct mbuf **, int, int *, int, struct sctphdr *,
+sctp_process_data(struct mbuf **, int, int *, int,
     struct sctp_inpcb *, struct sctp_tcb *,
-    struct sctp_nets *, uint32_t *,
-    uint8_t, uint32_t,
-    uint32_t, uint16_t);
+    struct sctp_nets *, uint32_t *);
 
 void sctp_slide_mapping_arrays(struct sctp_tcb *stcb);
 

Modified: trunk/sys/netinet/sctp_input.c
===================================================================
--- trunk/sys/netinet/sctp_input.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_input.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_input.c 240580 2012-09-17 00:47:35Z eadler $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_input.c 296052 2016-02-25 18:46:06Z tuexen $");
 
 #include <netinet/sctp_os.h>
 #include <netinet/sctp_var.h>
@@ -48,7 +48,9 @@
 #include <netinet/sctp_bsd_addr.h>
 #include <netinet/sctp_timer.h>
 #include <netinet/sctp_crc32.h>
+#if defined(INET) || defined(INET6)
 #include <netinet/udp.h>
+#endif
 #include <sys/smp.h>
 
 
@@ -81,10 +83,11 @@
 
 /* INIT handler */
 static void
-sctp_handle_init(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh,
+sctp_handle_init(struct mbuf *m, int iphlen, int offset,
+    struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh,
     struct sctp_init_chunk *cp, struct sctp_inpcb *inp,
-    struct sctp_tcb *stcb, int *abort_no_unlock,
-    uint8_t use_mflowid, uint32_t mflowid,
+    struct sctp_tcb *stcb, struct sctp_nets *net, int *abort_no_unlock,
+    uint8_t mflowtype, uint32_t mflowid,
     uint32_t vrf_id, uint16_t port)
 {
 	struct sctp_init *init;
@@ -91,15 +94,15 @@
 	struct mbuf *op_err;
 
 	SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_init: handling INIT tcb:%p\n",
-	    stcb);
+	    (void *)stcb);
 	if (stcb == NULL) {
 		SCTP_INP_RLOCK(inp);
 	}
 	/* validate length */
 	if (ntohs(cp->ch.chunk_length) < sizeof(struct sctp_init_chunk)) {
-		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
-		sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
-		    use_mflowid, mflowid,
+		op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
+		sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
+		    mflowtype, mflowid,
 		    vrf_id, port);
 		if (stcb)
 			*abort_no_unlock = 1;
@@ -109,9 +112,9 @@
 	init = &cp->init;
 	if (init->initiate_tag == 0) {
 		/* protocol error... send abort */
-		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
-		sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
-		    use_mflowid, mflowid,
+		op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
+		sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
+		    mflowtype, mflowid,
 		    vrf_id, port);
 		if (stcb)
 			*abort_no_unlock = 1;
@@ -119,9 +122,9 @@
 	}
 	if (ntohl(init->a_rwnd) < SCTP_MIN_RWND) {
 		/* invalid parameter... send abort */
-		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
-		sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
-		    use_mflowid, mflowid,
+		op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
+		sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
+		    mflowtype, mflowid,
 		    vrf_id, port);
 		if (stcb)
 			*abort_no_unlock = 1;
@@ -129,9 +132,9 @@
 	}
 	if (init->num_inbound_streams == 0) {
 		/* protocol error... send abort */
-		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
-		sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
-		    use_mflowid, mflowid,
+		op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
+		sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
+		    mflowtype, mflowid,
 		    vrf_id, port);
 		if (stcb)
 			*abort_no_unlock = 1;
@@ -139,9 +142,9 @@
 	}
 	if (init->num_outbound_streams == 0) {
 		/* protocol error... send abort */
-		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
-		sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
-		    use_mflowid, mflowid,
+		op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
+		sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
+		    mflowtype, mflowid,
 		    vrf_id, port);
 		if (stcb)
 			*abort_no_unlock = 1;
@@ -150,8 +153,10 @@
 	if (sctp_validate_init_auth_params(m, offset + sizeof(*cp),
 	    offset + ntohs(cp->ch.chunk_length))) {
 		/* auth parameter(s) error... send abort */
-		sctp_abort_association(inp, stcb, m, iphlen, sh, NULL,
-		    use_mflowid, mflowid,
+		op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+		    "Problem with AUTH parameters");
+		sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
+		    mflowtype, mflowid,
 		    vrf_id, port);
 		if (stcb)
 			*abort_no_unlock = 1;
@@ -179,8 +184,10 @@
 		 * state :-)
 		 */
 		if (SCTP_BASE_SYSCTL(sctp_blackhole) == 0) {
-			sctp_send_abort(m, iphlen, sh, 0, NULL,
-			    use_mflowid, mflowid,
+			op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+			    "No listener");
+			sctp_send_abort(m, iphlen, src, dst, sh, 0, op_err,
+			    mflowtype, mflowid, inp->fibnum,
 			    vrf_id, port);
 		}
 		goto outnow;
@@ -192,8 +199,9 @@
 		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CONTROL_PROC, SCTP_SO_NOT_LOCKED);
 	} else {
 		SCTPDBG(SCTP_DEBUG_INPUT3, "sctp_handle_init: sending INIT-ACK\n");
-		sctp_send_initiate_ack(inp, stcb, m, iphlen, offset, sh, cp,
-		    use_mflowid, mflowid,
+		sctp_send_initiate_ack(inp, stcb, net, m, iphlen, offset,
+		    src, dst, sh, cp,
+		    mflowtype, mflowid,
 		    vrf_id, port,
 		    ((stcb == NULL) ? SCTP_HOLDS_LOCK : SCTP_NOT_LOCKED));
 	}
@@ -310,6 +318,13 @@
 			if (chk->rec.data.stream_number >= newcnt) {
 				TAILQ_REMOVE(&asoc->send_queue, chk, sctp_next);
 				asoc->send_queue_cnt--;
+				if (asoc->strmout[chk->rec.data.stream_number].chunks_on_queues > 0) {
+					asoc->strmout[chk->rec.data.stream_number].chunks_on_queues--;
+#ifdef INVARIANTS
+				} else {
+					panic("No chunks on the queues for sid %u.", chk->rec.data.stream_number);
+#endif
+				}
 				if (chk->data != NULL) {
 					sctp_free_bufspace(stcb, asoc, chk, 1);
 					sctp_ulp_notify(SCTP_NOTIFY_UNSENT_DG_FAIL, stcb,
@@ -343,6 +358,7 @@
 					sctp_free_a_strmoq(stcb, sp, SCTP_SO_NOT_LOCKED);
 					/* sa_ignore FREED_MEMORY */
 				}
+				outs->state = SCTP_STREAM_CLOSED;
 			}
 		}
 		/* cut back the count */
@@ -349,8 +365,12 @@
 		asoc->pre_open_streams = newcnt;
 	}
 	SCTP_TCB_SEND_UNLOCK(stcb);
-	asoc->strm_realoutsize = asoc->streamoutcnt = asoc->pre_open_streams;
-
+	asoc->streamoutcnt = asoc->pre_open_streams;
+	if (asoc->strmout) {
+		for (i = 0; i < asoc->streamoutcnt; i++) {
+			asoc->strmout[i].state = SCTP_STREAM_OPEN;
+		}
+	}
 	/* EY - nr_sack: initialize highest tsn in nr_mapping_array */
 	asoc->highest_tsn_inside_nr_map = asoc->highest_tsn_inside_map;
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) {
@@ -381,9 +401,10 @@
 		}
 		SCTP_FREE(asoc->strmin, SCTP_M_STRMI);
 	}
-	asoc->streamincnt = ntohs(init->num_outbound_streams);
-	if (asoc->streamincnt > MAX_SCTP_STREAMS) {
-		asoc->streamincnt = MAX_SCTP_STREAMS;
+	if (asoc->max_inbound_streams > ntohs(init->num_outbound_streams)) {
+		asoc->streamincnt = ntohs(init->num_outbound_streams);
+	} else {
+		asoc->streamincnt = asoc->max_inbound_streams;
 	}
 	SCTP_MALLOC(asoc->strmin, struct sctp_stream_in *, asoc->streamincnt *
 	    sizeof(struct sctp_stream_in), SCTP_M_STRMI);
@@ -395,11 +416,6 @@
 	for (i = 0; i < asoc->streamincnt; i++) {
 		asoc->strmin[i].stream_no = i;
 		asoc->strmin[i].last_sequence_delivered = 0xffff;
-		/*
-		 * U-stream ranges will be set when the cookie is unpacked.
-		 * Or for the INIT sender they are un set (if pr-sctp not
-		 * supported) when the INIT-ACK arrives.
-		 */
 		TAILQ_INIT(&asoc->strmin[i].inqueue);
 		asoc->strmin[i].delivery_started = 0;
 	}
@@ -420,10 +436,11 @@
  * INIT-ACK message processing/consumption returns value < 0 on error
  */
 static int
-sctp_process_init_ack(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh,
+sctp_process_init_ack(struct mbuf *m, int iphlen, int offset,
+    struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh,
     struct sctp_init_ack_chunk *cp, struct sctp_tcb *stcb,
     struct sctp_nets *net, int *abort_no_unlock,
-    uint8_t use_mflowid, uint32_t mflowid,
+    uint8_t mflowtype, uint32_t mflowid,
     uint32_t vrf_id)
 {
 	struct sctp_association *asoc;
@@ -434,7 +451,6 @@
 
 	/* First verify that we have no illegal param's */
 	abort_flag = 0;
-	op_err = NULL;
 
 	op_err = sctp_arethere_unrecognized_parameters(m,
 	    (offset + sizeof(struct sctp_init_chunk)),
@@ -455,20 +471,22 @@
 	initack_limit = offset + ntohs(cp->ch.chunk_length);
 	/* load all addresses */
 	if ((retval = sctp_load_addresses_from_init(stcb, m,
-	    (offset + sizeof(struct sctp_init_chunk)), initack_limit, sh,
-	    NULL))) {
-		/* Huh, we should abort */
+	    (offset + sizeof(struct sctp_init_chunk)), initack_limit,
+	    src, dst, NULL))) {
+		op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+		    "Problem with address parameters");
 		SCTPDBG(SCTP_DEBUG_INPUT1,
 		    "Load addresses from INIT causes an abort %d\n",
 		    retval);
-		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh, NULL,
-		    use_mflowid, mflowid,
+		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
+		    src, dst, sh, op_err,
+		    mflowtype, mflowid,
 		    vrf_id, net->port);
 		*abort_no_unlock = 1;
 		return (-1);
 	}
 	/* if the peer doesn't support asconf, flush the asconf queue */
-	if (asoc->peer_supports_asconf == 0) {
+	if (asoc->asconf_supported == 0) {
 		struct sctp_asconf_addr *param, *nparam;
 
 		TAILQ_FOREACH_SAFE(param, &asoc->asconf_queue, next, nparam) {
@@ -501,12 +519,11 @@
 	 * primary.
 	 */
 	sctp_timer_stop(SCTP_TIMER_TYPE_INIT, stcb->sctp_ep, stcb,
-	    asoc->primary_destination, SCTP_FROM_SCTP_INPUT + SCTP_LOC_4);
+	    asoc->primary_destination, SCTP_FROM_SCTP_INPUT + SCTP_LOC_3);
 
 	/* calculate the RTO */
 	net->RTO = sctp_calculate_rto(stcb, asoc, net, &asoc->time_entered, sctp_align_safe_nocopy,
 	    SCTP_RTT_FROM_NON_DATA);
-
 	retval = sctp_send_cookie_echo(m, offset, stcb, net);
 	if (retval < 0) {
 		/*
@@ -515,30 +532,25 @@
 		 * abandon the peer, its broke.
 		 */
 		if (retval == -3) {
+			uint16_t len;
+
+			len = (uint16_t) (sizeof(struct sctp_error_missing_param) + sizeof(uint16_t));
 			/* We abort with an error of missing mandatory param */
-			op_err =
-			    sctp_generate_invmanparam(SCTP_CAUSE_MISSING_PARAM);
-			if (op_err) {
-				/*
-				 * Expand beyond to include the mandatory
-				 * param cookie
-				 */
-				struct sctp_inv_mandatory_param *mp;
+			op_err = sctp_get_mbuf_for_msg(len, 0, M_NOWAIT, 1, MT_DATA);
+			if (op_err != NULL) {
+				struct sctp_error_missing_param *cause;
 
-				SCTP_BUF_LEN(op_err) =
-				    sizeof(struct sctp_inv_mandatory_param);
-				mp = mtod(op_err,
-				    struct sctp_inv_mandatory_param *);
+				SCTP_BUF_LEN(op_err) = len;
+				cause = mtod(op_err, struct sctp_error_missing_param *);
 				/* Subtract the reserved param */
-				mp->length =
-				    htons(sizeof(struct sctp_inv_mandatory_param) - 2);
-				mp->num_param = htonl(1);
-				mp->param = htons(SCTP_STATE_COOKIE);
-				mp->resv = 0;
+				cause->cause.code = htons(SCTP_CAUSE_MISSING_PARAM);
+				cause->cause.length = htons(len);
+				cause->num_missing_params = htonl(1);
+				cause->type[0] = htons(SCTP_STATE_COOKIE);
 			}
 			sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
-			    sh, op_err,
-			    use_mflowid, mflowid,
+			    src, dst, sh, op_err,
+			    mflowtype, mflowid,
 			    vrf_id, net->port);
 			*abort_no_unlock = 1;
 		}
@@ -551,21 +563,12 @@
 sctp_handle_heartbeat_ack(struct sctp_heartbeat_chunk *cp,
     struct sctp_tcb *stcb, struct sctp_nets *net)
 {
-	struct sockaddr_storage store;
+	union sctp_sockstore store;
 	struct sctp_nets *r_net, *f_net;
 	struct timeval tv;
 	int req_prim = 0;
 	uint16_t old_error_counter;
 
-#ifdef INET
-	struct sockaddr_in *sin;
-
-#endif
-#ifdef INET6
-	struct sockaddr_in6 *sin6;
-
-#endif
-
 	if (ntohs(cp->ch.chunk_length) != sizeof(struct sctp_heartbeat_chunk)) {
 		/* Invalid length */
 		return;
@@ -575,12 +578,11 @@
 #ifdef INET
 	case AF_INET:
 		if (cp->heartbeat.hb_info.addr_len == sizeof(struct sockaddr_in)) {
-			sin = (struct sockaddr_in *)&store;
-			sin->sin_family = cp->heartbeat.hb_info.addr_family;
-			sin->sin_len = cp->heartbeat.hb_info.addr_len;
-			sin->sin_port = stcb->rport;
-			memcpy(&sin->sin_addr, cp->heartbeat.hb_info.address,
-			    sizeof(sin->sin_addr));
+			store.sin.sin_family = cp->heartbeat.hb_info.addr_family;
+			store.sin.sin_len = cp->heartbeat.hb_info.addr_len;
+			store.sin.sin_port = stcb->rport;
+			memcpy(&store.sin.sin_addr, cp->heartbeat.hb_info.address,
+			    sizeof(store.sin.sin_addr));
 		} else {
 			return;
 		}
@@ -589,12 +591,10 @@
 #ifdef INET6
 	case AF_INET6:
 		if (cp->heartbeat.hb_info.addr_len == sizeof(struct sockaddr_in6)) {
-			sin6 = (struct sockaddr_in6 *)&store;
-			sin6->sin6_family = cp->heartbeat.hb_info.addr_family;
-			sin6->sin6_len = cp->heartbeat.hb_info.addr_len;
-			sin6->sin6_port = stcb->rport;
-			memcpy(&sin6->sin6_addr, cp->heartbeat.hb_info.address,
-			    sizeof(sin6->sin6_addr));
+			store.sin6.sin6_family = cp->heartbeat.hb_info.addr_family;
+			store.sin6.sin6_len = cp->heartbeat.hb_info.addr_len;
+			store.sin6.sin6_port = stcb->rport;
+			memcpy(&store.sin6.sin6_addr, cp->heartbeat.hb_info.address, sizeof(struct in6_addr));
 		} else {
 			return;
 		}
@@ -603,7 +603,7 @@
 	default:
 		return;
 	}
-	r_net = sctp_findnet(stcb, (struct sockaddr *)&store);
+	r_net = sctp_findnet(stcb, &store.sa);
 	if (r_net == NULL) {
 		SCTPDBG(SCTP_DEBUG_INPUT1, "Huh? I can't find the address I sent it to, discard\n");
 		return;
@@ -634,7 +634,8 @@
 		}
 		sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED,
 		    stcb, 0, (void *)r_net, SCTP_SO_NOT_LOCKED);
-		sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, r_net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_3);
+		sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb,
+		    r_net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_4);
 		sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, r_net);
 	}
 	old_error_counter = r_net->error_count;
@@ -655,7 +656,8 @@
 		stcb->asoc.cc_functions.sctp_cwnd_update_exit_pf(stcb, net);
 	}
 	if (old_error_counter > 0) {
-		sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, r_net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_3);
+		sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep,
+		    stcb, r_net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_5);
 		sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, r_net);
 	}
 	if (r_net == stcb->asoc.primary_destination) {
@@ -674,7 +676,9 @@
 		    sctp_is_mobility_feature_on(stcb->sctp_ep,
 		    SCTP_MOBILITY_PRIM_DELETED)) {
 
-			sctp_timer_stop(SCTP_TIMER_TYPE_PRIM_DELETED, stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_TIMER + SCTP_LOC_7);
+			sctp_timer_stop(SCTP_TIMER_TYPE_PRIM_DELETED,
+			    stcb->sctp_ep, stcb, NULL,
+			    SCTP_FROM_SCTP_INPUT + SCTP_LOC_6);
 			if (sctp_is_mobility_feature_on(stcb->sctp_ep,
 			    SCTP_MOBILITY_FASTHANDOFF)) {
 				sctp_assoc_immediate_retrans(stcb,
@@ -745,7 +749,7 @@
 	 * return 0 means we want you to proceed with the abort non-zero
 	 * means no abort processing
 	 */
-	if (stcb->asoc.peer_supports_auth == 0) {
+	if (stcb->asoc.auth_supported == 0) {
 		SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_nat_missing_state: Peer does not support AUTH, cannot send an asconf\n");
 		return (0);
 	}
@@ -758,7 +762,7 @@
 sctp_handle_abort(struct sctp_abort_chunk *abort,
     struct sctp_tcb *stcb, struct sctp_nets *net)
 {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	struct socket *so;
 
 #endif
@@ -775,10 +779,10 @@
 		 * Need to check the cause codes for our two magic nat
 		 * aborts which don't kill the assoc necessarily.
 		 */
-		struct sctp_missing_nat_state *natc;
+		struct sctp_gen_error_cause *cause;
 
-		natc = (struct sctp_missing_nat_state *)(abort + 1);
-		error = ntohs(natc->cause);
+		cause = (struct sctp_gen_error_cause *)(abort + 1);
+		error = ntohs(cause->code);
 		if (error == SCTP_CAUSE_NAT_COLLIDING_STATE) {
 			SCTPDBG(SCTP_DEBUG_INPUT2, "Received Colliding state abort flags:%x\n",
 			    abort->ch.chunk_flags);
@@ -796,7 +800,8 @@
 		error = 0;
 	}
 	/* stop any receive timers */
-	sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_6);
+	sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, net,
+	    SCTP_FROM_SCTP_INPUT + SCTP_LOC_7);
 	/* notify user of the abort and clean up... */
 	sctp_abort_notification(stcb, 1, error, abort, SCTP_SO_NOT_LOCKED);
 	/* free the tcb */
@@ -808,7 +813,7 @@
 #ifdef SCTP_ASOCLOG_OF_TSNS
 	sctp_print_out_track_log(stcb);
 #endif
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	so = SCTP_INP_SO(stcb->sctp_ep);
 	atomic_add_int(&stcb->asoc.refcnt, 1);
 	SCTP_TCB_UNLOCK(stcb);
@@ -818,8 +823,8 @@
 #endif
 	stcb->asoc.state |= SCTP_STATE_WAS_ABORTED;
 	(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
-	    SCTP_FROM_SCTP_INPUT + SCTP_LOC_6);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	    SCTP_FROM_SCTP_INPUT + SCTP_LOC_8);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 	SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_abort: finished\n");
@@ -860,8 +865,9 @@
 {
 	struct sctp_association *asoc;
 	int some_on_streamwheel;
+	int old_state;
 
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	struct socket *so;
 
 #endif
@@ -878,12 +884,12 @@
 	if (ntohs(cp->ch.chunk_length) != sizeof(struct sctp_shutdown_chunk)) {
 		/* Shutdown NOT the expected size */
 		return;
-	} else {
-		sctp_update_acked(stcb, cp, abort_flag);
-		if (*abort_flag) {
-			return;
-		}
 	}
+	old_state = SCTP_GET_STATE(asoc);
+	sctp_update_acked(stcb, cp, abort_flag);
+	if (*abort_flag) {
+		return;
+	}
 	if (asoc->control_pdapi) {
 		/*
 		 * With a normal shutdown we assume the end of last record.
@@ -893,7 +899,7 @@
 		asoc->control_pdapi->pdapi_aborted = 1;
 		asoc->control_pdapi = NULL;
 		SCTP_INP_READ_UNLOCK(stcb->sctp_ep);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		so = SCTP_INP_SO(stcb->sctp_ep);
 		atomic_add_int(&stcb->asoc.refcnt, 1);
 		SCTP_TCB_UNLOCK(stcb);
@@ -906,8 +912,10 @@
 			return;
 		}
 #endif
-		sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		if (stcb->sctp_socket) {
+			sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
+		}
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 	}
@@ -933,7 +941,8 @@
 		 * stop the shutdown timer, since we WILL move to
 		 * SHUTDOWN-ACK-SENT.
 		 */
-		sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_8);
+		sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb,
+		    net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_9);
 	}
 	/* Now is there unsent data on a stream somewhere? */
 	some_on_streamwheel = sctp_is_there_unsent_data(stcb, SCTP_SO_NOT_LOCKED);
@@ -946,17 +955,21 @@
 	} else {
 		/* no outstanding data to send, so move on... */
 		/* send SHUTDOWN-ACK */
-		sctp_send_shutdown_ack(stcb, net);
 		/* move to SHUTDOWN-ACK-SENT state */
 		if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
 		    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
 			SCTP_STAT_DECR_GAUGE32(sctps_currestab);
 		}
-		SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT);
 		SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
-		sctp_stop_timers_for_shutdown(stcb);
-		sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK, stcb->sctp_ep,
-		    stcb, net);
+		if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT) {
+			SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT);
+			sctp_stop_timers_for_shutdown(stcb);
+			sctp_send_shutdown_ack(stcb, net);
+			sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK,
+			    stcb->sctp_ep, stcb, net);
+		} else if (old_state == SCTP_STATE_SHUTDOWN_ACK_SENT) {
+			sctp_send_shutdown_ack(stcb, net);
+		}
 	}
 }
 
@@ -967,7 +980,7 @@
 {
 	struct sctp_association *asoc;
 
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	struct socket *so;
 
 	so = SCTP_INP_SO(stcb->sctp_ep);
@@ -1001,7 +1014,7 @@
 		asoc->control_pdapi->pdapi_aborted = 1;
 		asoc->control_pdapi = NULL;
 		SCTP_INP_READ_UNLOCK(stcb->sctp_ep);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		atomic_add_int(&stcb->asoc.refcnt, 1);
 		SCTP_TCB_UNLOCK(stcb);
 		SCTP_SOCKET_LOCK(so, 1);
@@ -1014,18 +1027,20 @@
 		}
 #endif
 		sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 	}
-	/* are the queues empty? */
+#ifdef INVARIANTS
 	if (!TAILQ_EMPTY(&asoc->send_queue) ||
 	    !TAILQ_EMPTY(&asoc->sent_queue) ||
 	    !stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc)) {
-		sctp_report_all_outbound(stcb, 0, 0, SCTP_SO_NOT_LOCKED);
+		panic("Queues are not empty when handling SHUTDOWN-ACK");
 	}
+#endif
 	/* stop the timer */
-	sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_9);
+	sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net,
+	    SCTP_FROM_SCTP_INPUT + SCTP_LOC_10);
 	/* send SHUTDOWN-COMPLETE */
 	sctp_send_shutdown_complete(stcb, net, 0);
 	/* notify upper layer protocol */
@@ -1038,7 +1053,7 @@
 	}
 	SCTP_STAT_INCR_COUNTER32(sctps_shutdown);
 	/* free the TCB but first save off the ep */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	atomic_add_int(&stcb->asoc.refcnt, 1);
 	SCTP_TCB_UNLOCK(stcb);
 	SCTP_SOCKET_LOCK(so, 1);
@@ -1046,8 +1061,8 @@
 	atomic_subtract_int(&stcb->asoc.refcnt, 1);
 #endif
 	(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
-	    SCTP_FROM_SCTP_INPUT + SCTP_LOC_10);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	    SCTP_FROM_SCTP_INPUT + SCTP_LOC_11);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 }
@@ -1070,7 +1085,7 @@
 		sctp_asconf_cleanup(stcb, net);
 		break;
 	case SCTP_FORWARD_CUM_TSN:
-		stcb->asoc.peer_supports_prsctp = 0;
+		stcb->asoc.prsctp_supported = 0;
 		break;
 	default:
 		SCTPDBG(SCTP_DEBUG_INPUT2,
@@ -1084,6 +1099,7 @@
  * Skip past the param header and then we will find the param that caused the
  * problem.  There are a number of param's in a ASCONF OR the prsctp param
  * these will turn of specific features.
+ * XXX: Is this the right thing to do?
  */
 static void
 sctp_process_unrecog_param(struct sctp_tcb *stcb, struct sctp_paramhdr *phdr)
@@ -1094,7 +1110,7 @@
 	switch (ntohs(pbad->param_type)) {
 		/* pr-sctp draft */
 	case SCTP_PRSCTP_SUPPORTED:
-		stcb->asoc.peer_supports_prsctp = 0;
+		stcb->asoc.prsctp_supported = 0;
 		break;
 	case SCTP_SUPPORTED_CHUNK_EXT:
 		break;
@@ -1105,7 +1121,7 @@
 	case SCTP_ADD_IP_ADDRESS:
 	case SCTP_DEL_IP_ADDRESS:
 	case SCTP_SET_PRIM_ADDR:
-		stcb->asoc.peer_supports_asconf = 0;
+		stcb->asoc.asconf_supported = 0;
 		break;
 	case SCTP_SUCCESS_REPORT:
 	case SCTP_ERROR_CAUSE_IND:
@@ -1112,7 +1128,7 @@
 		SCTPDBG(SCTP_DEBUG_INPUT2, "Huh, the peer does not support success? or error cause?\n");
 		SCTPDBG(SCTP_DEBUG_INPUT2,
 		    "Turning off ASCONF to this strange peer\n");
-		stcb->asoc.peer_supports_asconf = 0;
+		stcb->asoc.asconf_supported = 0;
 		break;
 	default:
 		SCTPDBG(SCTP_DEBUG_INPUT2,
@@ -1133,7 +1149,7 @@
 	struct sctp_association *asoc;
 	int adjust;
 
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	struct socket *so;
 
 #endif
@@ -1196,7 +1212,7 @@
 				    asoc->max_init_times) {
 					sctp_abort_notification(stcb, 0, 0, NULL, SCTP_SO_NOT_LOCKED);
 					/* now free the asoc */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 					so = SCTP_INP_SO(stcb->sctp_ep);
 					atomic_add_int(&stcb->asoc.refcnt, 1);
 					SCTP_TCB_UNLOCK(stcb);
@@ -1205,8 +1221,8 @@
 					atomic_subtract_int(&stcb->asoc.refcnt, 1);
 #endif
 					(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
-					    SCTP_FROM_SCTP_INPUT + SCTP_LOC_11);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+					    SCTP_FROM_SCTP_INPUT + SCTP_LOC_12);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 					SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 					return (-1);
@@ -1279,10 +1295,11 @@
 }
 
 static int
-sctp_handle_init_ack(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh,
+sctp_handle_init_ack(struct mbuf *m, int iphlen, int offset,
+    struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh,
     struct sctp_init_ack_chunk *cp, struct sctp_tcb *stcb,
     struct sctp_nets *net, int *abort_no_unlock,
-    uint8_t use_mflowid, uint32_t mflowid,
+    uint8_t mflowtype, uint32_t mflowid,
     uint32_t vrf_id)
 {
 	struct sctp_init_ack *init_ack;
@@ -1298,10 +1315,10 @@
 	}
 	if (ntohs(cp->ch.chunk_length) < sizeof(struct sctp_init_ack_chunk)) {
 		/* Invalid length */
-		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
-		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
-		    op_err,
-		    use_mflowid, mflowid,
+		op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
+		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
+		    src, dst, sh, op_err,
+		    mflowtype, mflowid,
 		    vrf_id, net->port);
 		*abort_no_unlock = 1;
 		return (-1);
@@ -1310,10 +1327,10 @@
 	/* validate parameters */
 	if (init_ack->initiate_tag == 0) {
 		/* protocol error... send an abort */
-		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
-		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
-		    op_err,
-		    use_mflowid, mflowid,
+		op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
+		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
+		    src, dst, sh, op_err,
+		    mflowtype, mflowid,
 		    vrf_id, net->port);
 		*abort_no_unlock = 1;
 		return (-1);
@@ -1320,10 +1337,10 @@
 	}
 	if (ntohl(init_ack->a_rwnd) < SCTP_MIN_RWND) {
 		/* protocol error... send an abort */
-		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
-		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
-		    op_err,
-		    use_mflowid, mflowid,
+		op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
+		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
+		    src, dst, sh, op_err,
+		    mflowtype, mflowid,
 		    vrf_id, net->port);
 		*abort_no_unlock = 1;
 		return (-1);
@@ -1330,10 +1347,10 @@
 	}
 	if (init_ack->num_inbound_streams == 0) {
 		/* protocol error... send an abort */
-		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
-		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
-		    op_err,
-		    use_mflowid, mflowid,
+		op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
+		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
+		    src, dst, sh, op_err,
+		    mflowtype, mflowid,
 		    vrf_id, net->port);
 		*abort_no_unlock = 1;
 		return (-1);
@@ -1340,10 +1357,10 @@
 	}
 	if (init_ack->num_outbound_streams == 0) {
 		/* protocol error... send an abort */
-		op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
-		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
-		    op_err,
-		    use_mflowid, mflowid,
+		op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
+		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
+		    src, dst, sh, op_err,
+		    mflowtype, mflowid,
 		    vrf_id, net->port);
 		*abort_no_unlock = 1;
 		return (-1);
@@ -1366,9 +1383,9 @@
 			sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED,
 			    stcb, 0, (void *)stcb->asoc.primary_destination, SCTP_SO_NOT_LOCKED);
 		}
-		if (sctp_process_init_ack(m, iphlen, offset, sh, cp, stcb,
+		if (sctp_process_init_ack(m, iphlen, offset, src, dst, sh, cp, stcb,
 		    net, abort_no_unlock,
-		    use_mflowid, mflowid,
+		    mflowtype, mflowid,
 		    vrf_id) < 0) {
 			/* error in parsing parameters */
 			return (-1);
@@ -1420,11 +1437,12 @@
 
 static struct sctp_tcb *
 sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
+    struct sockaddr *src, struct sockaddr *dst,
     struct sctphdr *sh, struct sctp_state_cookie *cookie, int cookie_len,
     struct sctp_inpcb *inp, struct sctp_nets **netp,
     struct sockaddr *init_src, int *notification,
     int auth_skipped, uint32_t auth_offset, uint32_t auth_len,
-    uint8_t use_mflowid, uint32_t mflowid,
+    uint8_t mflowtype, uint32_t mflowid,
     uint32_t vrf_id, uint16_t port);
 
 
@@ -1436,11 +1454,12 @@
  */
 static struct sctp_tcb *
 sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
+    struct sockaddr *src, struct sockaddr *dst,
     struct sctphdr *sh, struct sctp_state_cookie *cookie, int cookie_len,
     struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets **netp,
     struct sockaddr *init_src, int *notification,
     int auth_skipped, uint32_t auth_offset, uint32_t auth_len,
-    uint8_t use_mflowid, uint32_t mflowid,
+    uint8_t mflowtype, uint32_t mflowid,
     uint32_t vrf_id, uint16_t port)
 {
 	struct sctp_association *asoc;
@@ -1448,12 +1467,16 @@
 	struct sctp_init_ack_chunk *initack_cp, initack_buf;
 	struct sctp_nets *net;
 	struct mbuf *op_err;
-	struct sctp_paramhdr *ph;
 	int init_offset, initack_offset, i;
 	int retval;
 	int spec_flag = 0;
 	uint32_t how_indx;
 
+#if defined(SCTP_DETAILED_STR_STATS)
+	int j;
+
+#endif
+
 	net = *netp;
 	/* I know that the TCB is non-NULL from the caller */
 	asoc = &stcb->asoc;
@@ -1467,19 +1490,9 @@
 	if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) {
 		/* SHUTDOWN came in after sending INIT-ACK */
 		sctp_send_shutdown_ack(stcb, stcb->asoc.primary_destination);
-		op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr),
-		    0, M_DONTWAIT, 1, MT_DATA);
-		if (op_err == NULL) {
-			/* FOOBAR */
-			return (NULL);
-		}
-		/* Set the len */
-		SCTP_BUF_LEN(op_err) = sizeof(struct sctp_paramhdr);
-		ph = mtod(op_err, struct sctp_paramhdr *);
-		ph->param_type = htons(SCTP_CAUSE_COOKIE_IN_SHUTDOWN);
-		ph->param_length = htons(sizeof(struct sctp_paramhdr));
-		sctp_send_operr_to(m, sh, cookie->peers_vtag, op_err,
-		    use_mflowid, mflowid,
+		op_err = sctp_generate_cause(SCTP_CAUSE_COOKIE_IN_SHUTDOWN, "");
+		sctp_send_operr_to(src, dst, sh, cookie->peers_vtag, op_err,
+		    mflowtype, mflowid, inp->fibnum,
 		    vrf_id, net->port);
 		if (how_indx < sizeof(asoc->cookie_how))
 			asoc->cookie_how[how_indx] = 2;
@@ -1543,8 +1556,7 @@
 			return (NULL);
 
 		}
-		switch SCTP_GET_STATE
-			(asoc) {
+		switch (SCTP_GET_STATE(asoc)) {
 		case SCTP_STATE_COOKIE_WAIT:
 		case SCTP_STATE_COOKIE_ECHOED:
 			/*
@@ -1561,9 +1573,12 @@
 				return (NULL);
 			}
 			/* we have already processed the INIT so no problem */
-			sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb,
-			    net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_12);
-			sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_13);
+			sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp,
+			    stcb, net,
+			    SCTP_FROM_SCTP_INPUT + SCTP_LOC_13);
+			sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp,
+			    stcb, net,
+			    SCTP_FROM_SCTP_INPUT + SCTP_LOC_14);
 			/* update current state */
 			if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)
 				SCTP_STAT_INCR_COUNTER32(sctps_activeestab);
@@ -1581,7 +1596,7 @@
 			    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
 			    (inp->sctp_socket->so_qlimit == 0)
 			    ) {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 				struct socket *so;
 
 #endif
@@ -1593,7 +1608,7 @@
 				 */
 				stcb->sctp_ep->sctp_flags |=
 				    SCTP_PCB_FLAGS_CONNECTED;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 				so = SCTP_INP_SO(stcb->sctp_ep);
 				atomic_add_int(&stcb->asoc.refcnt, 1);
 				SCTP_TCB_UNLOCK(stcb);
@@ -1606,7 +1621,7 @@
 				}
 #endif
 				soisconnected(stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 				SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 			}
@@ -1634,7 +1649,7 @@
 			 * have simply lost the COOKIE-ACK
 			 */
 			break;
-			}	/* end switch */
+		}		/* end switch */
 		sctp_stop_all_cookie_timers(stcb);
 		/*
 		 * We ignore the return code here.. not sure if we should
@@ -1643,7 +1658,7 @@
 		 */
 		if (sctp_load_addresses_from_init(stcb, m,
 		    init_offset + sizeof(struct sctp_init_chunk),
-		    initack_offset, sh, init_src)) {
+		    initack_offset, src, dst, init_src)) {
 			if (how_indx < sizeof(asoc->cookie_how))
 				asoc->cookie_how[how_indx] = 4;
 			return (NULL);
@@ -1685,27 +1700,9 @@
 		 * Now we have colliding state. We must send an abort here
 		 * with colliding state indication.
 		 */
-		op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr),
-		    0, M_DONTWAIT, 1, MT_DATA);
-		if (op_err == NULL) {
-			/* FOOBAR */
-			return (NULL);
-		}
-		/* pre-reserve some space */
-#ifdef INET6
-		SCTP_BUF_RESV_UF(op_err, sizeof(struct ip6_hdr));
-#else
-		SCTP_BUF_RESV_UF(op_err, sizeof(struct ip));
-#endif
-		SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr));
-		SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
-		/* Set the len */
-		SCTP_BUF_LEN(op_err) = sizeof(struct sctp_paramhdr);
-		ph = mtod(op_err, struct sctp_paramhdr *);
-		ph->param_type = htons(SCTP_CAUSE_NAT_COLLIDING_STATE);
-		ph->param_length = htons(sizeof(struct sctp_paramhdr));
-		sctp_send_abort(m, iphlen, sh, 0, op_err,
-		    use_mflowid, mflowid,
+		op_err = sctp_generate_cause(SCTP_CAUSE_NAT_COLLIDING_STATE, "");
+		sctp_send_abort(m, iphlen, src, dst, sh, 0, op_err,
+		    mflowtype, mflowid, inp->fibnum,
 		    vrf_id, port);
 		return (NULL);
 	}
@@ -1741,7 +1738,8 @@
 		}
 		if (how_indx < sizeof(asoc->cookie_how))
 			asoc->cookie_how[how_indx] = 8;
-		sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_14);
+		sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net,
+		    SCTP_FROM_SCTP_INPUT + SCTP_LOC_15);
 		sctp_stop_all_cookie_timers(stcb);
 		/*
 		 * since we did not send a HB make sure we don't double
@@ -1787,7 +1785,7 @@
 		}
 		if (sctp_load_addresses_from_init(stcb, m,
 		    init_offset + sizeof(struct sctp_init_chunk),
-		    initack_offset, sh, init_src)) {
+		    initack_offset, src, dst, init_src)) {
 			if (how_indx < sizeof(asoc->cookie_how))
 				asoc->cookie_how[how_indx] = 10;
 			return (NULL);
@@ -1799,13 +1797,13 @@
 			if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 			    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
 			    (inp->sctp_socket->so_qlimit == 0)) {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 				struct socket *so;
 
 #endif
 				stcb->sctp_ep->sctp_flags |=
 				    SCTP_PCB_FLAGS_CONNECTED;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 				so = SCTP_INP_SO(stcb->sctp_ep);
 				atomic_add_int(&stcb->asoc.refcnt, 1);
 				SCTP_TCB_UNLOCK(stcb);
@@ -1818,7 +1816,7 @@
 				}
 #endif
 				soisconnected(stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 				SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 			}
@@ -1862,16 +1860,22 @@
 	    cookie->tie_tag_peer_vtag != 0) {
 		struct sctpasochead *head;
 
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		struct socket *so;
+
+#endif
+
 		if (asoc->peer_supports_nat) {
 			/*
-			 * This is a gross gross hack. just call the
+			 * This is a gross gross hack. Just call the
 			 * cookie_new code since we are allowing a duplicate
 			 * association. I hope this works...
 			 */
-			return (sctp_process_cookie_new(m, iphlen, offset, sh, cookie, cookie_len,
+			return (sctp_process_cookie_new(m, iphlen, offset, src, dst,
+			    sh, cookie, cookie_len,
 			    inp, netp, init_src, notification,
 			    auth_skipped, auth_offset, auth_len,
-			    use_mflowid, mflowid,
+			    mflowtype, mflowid,
 			    vrf_id, port));
 		}
 		/*
@@ -1880,8 +1884,10 @@
 		/* temp code */
 		if (how_indx < sizeof(asoc->cookie_how))
 			asoc->cookie_how[how_indx] = 12;
-		sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_15);
-		sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_16);
+		sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, net,
+		    SCTP_FROM_SCTP_INPUT + SCTP_LOC_16);
+		sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net,
+		    SCTP_FROM_SCTP_INPUT + SCTP_LOC_17);
 
 		/* notify upper layer */
 		*notification = SCTP_NOTIFY_ASSOC_RESTART;
@@ -1925,6 +1931,10 @@
 			    asoc->mapping_array_size);
 		}
 		SCTP_TCB_UNLOCK(stcb);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		so = SCTP_INP_SO(stcb->sctp_ep);
+		SCTP_SOCKET_LOCK(so, 1);
+#endif
 		SCTP_INP_INFO_WLOCK();
 		SCTP_INP_WLOCK(stcb->sctp_ep);
 		SCTP_TCB_LOCK(stcb);
@@ -1932,10 +1942,20 @@
 		/* send up all the data */
 		SCTP_TCB_SEND_LOCK(stcb);
 
-		sctp_report_all_outbound(stcb, 0, 1, SCTP_SO_NOT_LOCKED);
+		sctp_report_all_outbound(stcb, 0, 1, SCTP_SO_LOCKED);
 		for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+			stcb->asoc.strmout[i].chunks_on_queues = 0;
+#if defined(SCTP_DETAILED_STR_STATS)
+			for (j = 0; j < SCTP_PR_SCTP_MAX + 1; j++) {
+				asoc->strmout[i].abandoned_sent[j] = 0;
+				asoc->strmout[i].abandoned_unsent[j] = 0;
+			}
+#else
+			asoc->strmout[i].abandoned_sent[0] = 0;
+			asoc->strmout[i].abandoned_unsent[0] = 0;
+#endif
 			stcb->asoc.strmout[i].stream_no = i;
-			stcb->asoc.strmout[i].next_sequence_sent = 0;
+			stcb->asoc.strmout[i].next_sequence_send = 0;
 			stcb->asoc.strmout[i].last_msg_incomplete = 0;
 		}
 		/* process the INIT-ACK info (my info) */
@@ -1953,11 +1973,15 @@
 		 */
 		LIST_INSERT_HEAD(head, stcb, sctp_asocs);
 
-		/* process the INIT info (peer's info) */
 		SCTP_TCB_SEND_UNLOCK(stcb);
 		SCTP_INP_WUNLOCK(stcb->sctp_ep);
 		SCTP_INP_INFO_WUNLOCK();
-
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+		asoc->total_flight = 0;
+		asoc->total_flight_count = 0;
+		/* process the INIT info (peer's info) */
 		retval = sctp_process_init(init_cp, stcb);
 		if (retval < 0) {
 			if (how_indx < sizeof(asoc->cookie_how))
@@ -1973,7 +1997,7 @@
 
 		if (sctp_load_addresses_from_init(stcb, m,
 		    init_offset + sizeof(struct sctp_init_chunk),
-		    initack_offset, sh, init_src)) {
+		    initack_offset, src, dst, init_src)) {
 			if (how_indx < sizeof(asoc->cookie_how))
 				asoc->cookie_how[how_indx] = 14;
 
@@ -2004,18 +2028,18 @@
  */
 static struct sctp_tcb *
 sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
+    struct sockaddr *src, struct sockaddr *dst,
     struct sctphdr *sh, struct sctp_state_cookie *cookie, int cookie_len,
     struct sctp_inpcb *inp, struct sctp_nets **netp,
     struct sockaddr *init_src, int *notification,
     int auth_skipped, uint32_t auth_offset, uint32_t auth_len,
-    uint8_t use_mflowid, uint32_t mflowid,
+    uint8_t mflowtype, uint32_t mflowid,
     uint32_t vrf_id, uint16_t port)
 {
 	struct sctp_tcb *stcb;
 	struct sctp_init_chunk *init_cp, init_buf;
 	struct sctp_init_ack_chunk *initack_cp, initack_buf;
-	struct sockaddr_storage sa_store;
-	struct sockaddr *initack_src = (struct sockaddr *)&sa_store;
+	union sctp_sockstore store;
 	struct sctp_association *asoc;
 	int init_offset, initack_offset, initack_limit;
 	int retval;
@@ -2022,15 +2046,7 @@
 	int error = 0;
 	uint8_t auth_chunk_buf[SCTP_PARAM_BUFFER_SIZE];
 
-#ifdef INET
-	struct sockaddr_in *sin;
-
-#endif
-#ifdef INET6
-	struct sockaddr_in6 *sin6;
-
-#endif
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	struct socket *so;
 
 	so = SCTP_INP_SO(inp);
@@ -2092,6 +2108,7 @@
 	 */
 	stcb = sctp_aloc_assoc(inp, init_src, &error,
 	    ntohl(initack_cp->init.initiate_tag), vrf_id,
+	    ntohs(initack_cp->init.num_outbound_streams),
 	    (struct thread *)NULL
 	    );
 	if (stcb == NULL) {
@@ -2100,11 +2117,10 @@
 		/* memory problem? */
 		SCTPDBG(SCTP_DEBUG_INPUT1,
 		    "process_cookie_new: no room for another TCB!\n");
-		op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
-
+		op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, "");
 		sctp_abort_association(inp, (struct sctp_tcb *)NULL, m, iphlen,
-		    sh, op_err,
-		    use_mflowid, mflowid,
+		    src, dst, sh, op_err,
+		    mflowtype, mflowid,
 		    vrf_id, port);
 		return (NULL);
 	}
@@ -2114,13 +2130,13 @@
 
 	asoc = &stcb->asoc;
 	/* get scope variables out of cookie */
-	asoc->ipv4_local_scope = cookie->ipv4_scope;
-	asoc->site_scope = cookie->site_scope;
-	asoc->local_scope = cookie->local_scope;
-	asoc->loopback_scope = cookie->loopback_scope;
+	asoc->scope.ipv4_local_scope = cookie->ipv4_scope;
+	asoc->scope.site_scope = cookie->site_scope;
+	asoc->scope.local_scope = cookie->local_scope;
+	asoc->scope.loopback_scope = cookie->loopback_scope;
 
-	if ((asoc->ipv4_addr_legal != cookie->ipv4_addr_legal) ||
-	    (asoc->ipv6_addr_legal != cookie->ipv6_addr_legal)) {
+	if ((asoc->scope.ipv4_addr_legal != cookie->ipv4_addr_legal) ||
+	    (asoc->scope.ipv6_addr_legal != cookie->ipv6_addr_legal)) {
 		struct mbuf *op_err;
 
 		/*
@@ -2129,19 +2145,19 @@
 		 * association.
 		 */
 		atomic_add_int(&stcb->asoc.refcnt, 1);
-		op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
+		op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, "");
 		sctp_abort_association(inp, (struct sctp_tcb *)NULL, m, iphlen,
-		    sh, op_err,
-		    use_mflowid, mflowid,
+		    src, dst, sh, op_err,
+		    mflowtype, mflowid,
 		    vrf_id, port);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_TCB_UNLOCK(stcb);
 		SCTP_SOCKET_LOCK(so, 1);
 		SCTP_TCB_LOCK(stcb);
 #endif
 		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
-		    SCTP_FROM_SCTP_INPUT + SCTP_LOC_16);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		    SCTP_FROM_SCTP_INPUT + SCTP_LOC_18);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 		atomic_subtract_int(&stcb->asoc.refcnt, 1);
@@ -2166,13 +2182,14 @@
 		retval = 0;
 	if (retval < 0) {
 		atomic_add_int(&stcb->asoc.refcnt, 1);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_TCB_UNLOCK(stcb);
 		SCTP_SOCKET_LOCK(so, 1);
 		SCTP_TCB_LOCK(stcb);
 #endif
-		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_16);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+		    SCTP_FROM_SCTP_INPUT + SCTP_LOC_19);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 		atomic_subtract_int(&stcb->asoc.refcnt, 1);
@@ -2180,16 +2197,17 @@
 	}
 	/* load all addresses */
 	if (sctp_load_addresses_from_init(stcb, m,
-	    init_offset + sizeof(struct sctp_init_chunk), initack_offset, sh,
-	    init_src)) {
+	    init_offset + sizeof(struct sctp_init_chunk), initack_offset,
+	    src, dst, init_src)) {
 		atomic_add_int(&stcb->asoc.refcnt, 1);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_TCB_UNLOCK(stcb);
 		SCTP_SOCKET_LOCK(so, 1);
 		SCTP_TCB_LOCK(stcb);
 #endif
-		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_17);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+		    SCTP_FROM_SCTP_INPUT + SCTP_LOC_20);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 		atomic_subtract_int(&stcb->asoc.refcnt, 1);
@@ -2212,13 +2230,14 @@
 			SCTPDBG(SCTP_DEBUG_AUTH1,
 			    "COOKIE-ECHO: AUTH failed\n");
 			atomic_add_int(&stcb->asoc.refcnt, 1);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 			SCTP_TCB_UNLOCK(stcb);
 			SCTP_SOCKET_LOCK(so, 1);
 			SCTP_TCB_LOCK(stcb);
 #endif
-			(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_18);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+			(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+			    SCTP_FROM_SCTP_INPUT + SCTP_LOC_21);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 			SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 			atomic_subtract_int(&stcb->asoc.refcnt, 1);
@@ -2254,34 +2273,32 @@
 #ifdef INET
 	case SCTP_IPV4_ADDRESS:
 		/* source addr is IPv4 */
-		sin = (struct sockaddr_in *)initack_src;
-		memset(sin, 0, sizeof(*sin));
-		sin->sin_family = AF_INET;
-		sin->sin_len = sizeof(struct sockaddr_in);
-		sin->sin_addr.s_addr = cookie->laddress[0];
+		memset(&store.sin, 0, sizeof(struct sockaddr_in));
+		store.sin.sin_family = AF_INET;
+		store.sin.sin_len = sizeof(struct sockaddr_in);
+		store.sin.sin_addr.s_addr = cookie->laddress[0];
 		break;
 #endif
 #ifdef INET6
 	case SCTP_IPV6_ADDRESS:
 		/* source addr is IPv6 */
-		sin6 = (struct sockaddr_in6 *)initack_src;
-		memset(sin6, 0, sizeof(*sin6));
-		sin6->sin6_family = AF_INET6;
-		sin6->sin6_len = sizeof(struct sockaddr_in6);
-		sin6->sin6_scope_id = cookie->scope_id;
-		memcpy(&sin6->sin6_addr, cookie->laddress,
-		    sizeof(sin6->sin6_addr));
+		memset(&store.sin6, 0, sizeof(struct sockaddr_in6));
+		store.sin6.sin6_family = AF_INET6;
+		store.sin6.sin6_len = sizeof(struct sockaddr_in6);
+		store.sin6.sin6_scope_id = cookie->scope_id;
+		memcpy(&store.sin6.sin6_addr, cookie->laddress, sizeof(struct in6_addr));
 		break;
 #endif
 	default:
 		atomic_add_int(&stcb->asoc.refcnt, 1);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_TCB_UNLOCK(stcb);
 		SCTP_SOCKET_LOCK(so, 1);
 		SCTP_TCB_LOCK(stcb);
 #endif
-		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_19);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+		    SCTP_FROM_SCTP_INPUT + SCTP_LOC_22);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 		atomic_subtract_int(&stcb->asoc.refcnt, 1);
@@ -2302,7 +2319,7 @@
 		 * a bit of protection is worth having..
 		 */
 		stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		atomic_add_int(&stcb->asoc.refcnt, 1);
 		SCTP_TCB_UNLOCK(stcb);
 		SCTP_SOCKET_LOCK(so, 1);
@@ -2314,7 +2331,7 @@
 		}
 #endif
 		soisconnected(stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 	} else if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
@@ -2334,12 +2351,18 @@
 	    sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE)) {
 		sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE, inp, stcb, NULL);
 	}
-	/* calculate the RTT */
 	(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
-	if ((netp) && (*netp)) {
+	if ((netp != NULL) && (*netp != NULL)) {
+		/* calculate the RTT and set the encaps port */
 		(*netp)->RTO = sctp_calculate_rto(stcb, asoc, *netp,
 		    &cookie->time_entered, sctp_align_unsafe_makecopy,
 		    SCTP_RTT_FROM_NON_DATA);
+#if defined(INET) || defined(INET6)
+		if (((*netp)->port == 0) && (port != 0)) {
+			sctp_pathmtu_adjustment(stcb, (*netp)->mtu - sizeof(struct udphdr));
+		}
+		(*netp)->port = port;
+#endif
 	}
 	/* respond with a COOKIE-ACK */
 	sctp_send_cookie_ack(stcb);
@@ -2351,7 +2374,7 @@
 	sctp_check_address_list(stcb, m,
 	    initack_offset + sizeof(struct sctp_init_ack_chunk),
 	    initack_limit - (initack_offset + sizeof(struct sctp_init_ack_chunk)),
-	    initack_src, cookie->local_scope, cookie->site_scope,
+	    &store.sa, cookie->local_scope, cookie->site_scope,
 	    cookie->ipv4_scope, cookie->loopback_scope);
 
 
@@ -2377,11 +2400,12 @@
  */
 static struct mbuf *
 sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
+    struct sockaddr *src, struct sockaddr *dst,
     struct sctphdr *sh, struct sctp_cookie_echo_chunk *cp,
     struct sctp_inpcb **inp_p, struct sctp_tcb **stcb, struct sctp_nets **netp,
     int auth_skipped, uint32_t auth_offset, uint32_t auth_len,
     struct sctp_tcb **locked_tcb,
-    uint8_t use_mflowid, uint32_t mflowid,
+    uint8_t mflowtype, uint32_t mflowid,
     uint32_t vrf_id, uint16_t port)
 {
 	struct sctp_state_cookie *cookie;
@@ -2393,13 +2417,10 @@
 	uint8_t calc_sig[SCTP_SIGNATURE_SIZE], tmp_sig[SCTP_SIGNATURE_SIZE];
 	uint8_t *sig;
 	uint8_t cookie_ok = 0;
-	unsigned int size_of_pkt, sig_offset, cookie_offset;
+	unsigned int sig_offset, cookie_offset;
 	unsigned int cookie_len;
 	struct timeval now;
 	struct timeval time_expires;
-	struct sockaddr_storage dest_store;
-	struct sockaddr *localep_sa = (struct sockaddr *)&dest_store;
-	struct ip *iph;
 	int notification = 0;
 	struct sctp_nets *netl;
 	int had_a_existing_tcb = 0;
@@ -2420,53 +2441,12 @@
 	if (inp_p == NULL) {
 		return (NULL);
 	}
-	/* First get the destination address setup too. */
-	iph = mtod(m, struct ip *);
-	switch (iph->ip_v) {
-#ifdef INET
-	case IPVERSION:
-		{
-			/* its IPv4 */
-			struct sockaddr_in *lsin;
-
-			lsin = (struct sockaddr_in *)(localep_sa);
-			memset(lsin, 0, sizeof(*lsin));
-			lsin->sin_family = AF_INET;
-			lsin->sin_len = sizeof(*lsin);
-			lsin->sin_port = sh->dest_port;
-			lsin->sin_addr.s_addr = iph->ip_dst.s_addr;
-			size_of_pkt = SCTP_GET_IPV4_LENGTH(iph);
-			break;
-		}
-#endif
-#ifdef INET6
-	case IPV6_VERSION >> 4:
-		{
-			/* its IPv6 */
-			struct ip6_hdr *ip6;
-			struct sockaddr_in6 *lsin6;
-
-			lsin6 = (struct sockaddr_in6 *)(localep_sa);
-			memset(lsin6, 0, sizeof(*lsin6));
-			lsin6->sin6_family = AF_INET6;
-			lsin6->sin6_len = sizeof(struct sockaddr_in6);
-			ip6 = mtod(m, struct ip6_hdr *);
-			lsin6->sin6_port = sh->dest_port;
-			lsin6->sin6_addr = ip6->ip6_dst;
-			size_of_pkt = SCTP_GET_IPV6_LENGTH(ip6) + iphlen;
-			break;
-		}
-#endif
-	default:
-		return (NULL);
-	}
-
 	cookie = &cp->cookie;
 	cookie_offset = offset + sizeof(struct sctp_chunkhdr);
 	cookie_len = ntohs(cp->ch.chunk_length);
 
-	if ((cookie->peerport != sh->src_port) &&
-	    (cookie->myport != sh->dest_port) &&
+	if ((cookie->peerport != sh->src_port) ||
+	    (cookie->myport != sh->dest_port) ||
 	    (cookie->my_vtag != sh->v_tag)) {
 		/*
 		 * invalid ports or bad tag.  Note that we always leave the
@@ -2477,11 +2457,10 @@
 		 */
 		return (NULL);
 	}
-	if (cookie_len > size_of_pkt ||
-	    cookie_len < sizeof(struct sctp_cookie_echo_chunk) +
+	if (cookie_len < sizeof(struct sctp_cookie_echo_chunk) +
 	    sizeof(struct sctp_init_chunk) +
 	    sizeof(struct sctp_init_ack_chunk) + SCTP_SIGNATURE_SIZE) {
-		/* cookie too long!  or too small */
+		/* cookie too small */
 		return (NULL);
 	}
 	/*
@@ -2489,12 +2468,7 @@
 	 * calculated in the sctp_hmac_m() call).
 	 */
 	sig_offset = offset + cookie_len - SCTP_SIGNATURE_SIZE;
-	if (sig_offset > size_of_pkt) {
-		/* packet not correct size! */
-		/* XXX this may already be accounted for earlier... */
-		return (NULL);
-	}
-	m_sig = m_split(m, sig_offset, M_DONTWAIT);
+	m_sig = m_split(m, sig_offset, M_NOWAIT);
 	if (m_sig == NULL) {
 		/* out of memory or ?? */
 		return (NULL);
@@ -2501,13 +2475,7 @@
 	}
 #ifdef SCTP_MBUF_LOGGING
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
-		struct mbuf *mat;
-
-		for (mat = m_sig; mat; mat = SCTP_BUF_NEXT(mat)) {
-			if (SCTP_BUF_IS_EXTENDED(mat)) {
-				sctp_log_mb(mat, SCTP_MBUF_SPLIT);
-			}
-		}
+		sctp_log_mbc(m_sig, SCTP_MBUF_SPLIT);
 	}
 #endif
 
@@ -2596,20 +2564,20 @@
 	if (timevalcmp(&now, &time_expires, >)) {
 		/* cookie is stale! */
 		struct mbuf *op_err;
-		struct sctp_stale_cookie_msg *scm;
+		struct sctp_error_stale_cookie *cause;
 		uint32_t tim;
 
-		op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_stale_cookie_msg),
-		    0, M_DONTWAIT, 1, MT_DATA);
+		op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_error_stale_cookie),
+		    0, M_NOWAIT, 1, MT_DATA);
 		if (op_err == NULL) {
 			/* FOOBAR */
 			return (NULL);
 		}
 		/* Set the len */
-		SCTP_BUF_LEN(op_err) = sizeof(struct sctp_stale_cookie_msg);
-		scm = mtod(op_err, struct sctp_stale_cookie_msg *);
-		scm->ph.param_type = htons(SCTP_CAUSE_STALE_COOKIE);
-		scm->ph.param_length = htons((sizeof(struct sctp_paramhdr) +
+		SCTP_BUF_LEN(op_err) = sizeof(struct sctp_error_stale_cookie);
+		cause = mtod(op_err, struct sctp_error_stale_cookie *);
+		cause->cause.code = htons(SCTP_CAUSE_STALE_COOKIE);
+		cause->cause.length = htons((sizeof(struct sctp_paramhdr) +
 		    (sizeof(uint32_t))));
 		/* seconds to usec */
 		tim = (now.tv_sec - time_expires.tv_sec) * 1000000;
@@ -2616,9 +2584,9 @@
 		/* add in usec */
 		if (tim == 0)
 			tim = now.tv_usec - cookie->time_entered.tv_usec;
-		scm->time_usec = htonl(tim);
-		sctp_send_operr_to(m, sh, cookie->peers_vtag, op_err,
-		    use_mflowid, mflowid,
+		cause->stale_time = htonl(tim);
+		sctp_send_operr_to(src, dst, sh, cookie->peers_vtag, op_err,
+		    mflowtype, mflowid, l_inp->fibnum,
 		    vrf_id, port);
 		return (NULL);
 	}
@@ -2659,9 +2627,9 @@
 		/* This should not happen */
 		return (NULL);
 	}
-	if ((*stcb == NULL) && to) {
+	if (*stcb == NULL) {
 		/* Yep, lets check */
-		*stcb = sctp_findassociation_ep_addr(inp_p, to, netp, localep_sa, NULL);
+		*stcb = sctp_findassociation_ep_addr(inp_p, to, netp, dst, NULL);
 		if (*stcb == NULL) {
 			/*
 			 * We should have only got back the same inp. If we
@@ -2693,29 +2661,28 @@
 				SCTP_INP_INCR_REF((*stcb)->sctp_ep);
 				if ((*stcb)->sctp_ep != l_inp) {
 					SCTP_PRINTF("Huh? ep:%p diff then l_inp:%p?\n",
-					    (*stcb)->sctp_ep, l_inp);
+					    (void *)(*stcb)->sctp_ep, (void *)l_inp);
 				}
 			}
 		}
 	}
-	if (to == NULL) {
-		return (NULL);
-	}
 	cookie_len -= SCTP_SIGNATURE_SIZE;
 	if (*stcb == NULL) {
 		/* this is the "normal" case... get a new TCB */
-		*stcb = sctp_process_cookie_new(m, iphlen, offset, sh, cookie,
-		    cookie_len, *inp_p, netp, to, &notification,
+		*stcb = sctp_process_cookie_new(m, iphlen, offset, src, dst, sh,
+		    cookie, cookie_len, *inp_p,
+		    netp, to, &notification,
 		    auth_skipped, auth_offset, auth_len,
-		    use_mflowid, mflowid,
+		    mflowtype, mflowid,
 		    vrf_id, port);
 	} else {
 		/* this is abnormal... cookie-echo on existing TCB */
 		had_a_existing_tcb = 1;
-		*stcb = sctp_process_cookie_existing(m, iphlen, offset, sh,
+		*stcb = sctp_process_cookie_existing(m, iphlen, offset,
+		    src, dst, sh,
 		    cookie, cookie_len, *inp_p, *stcb, netp, to,
 		    &notification, auth_skipped, auth_offset, auth_len,
-		    use_mflowid, mflowid,
+		    mflowtype, mflowid,
 		    vrf_id, port);
 	}
 
@@ -2723,11 +2690,9 @@
 		/* still no TCB... must be bad cookie-echo */
 		return (NULL);
 	}
-	if ((*netp != NULL) && (use_mflowid != 0)) {
+	if (*netp != NULL) {
+		(*netp)->flowtype = mflowtype;
 		(*netp)->flowid = mflowid;
-#ifdef INVARIANTS
-		(*netp)->flowidset = 1;
-#endif
 	}
 	/*
 	 * Ok, we built an association so confirm the address we sent the
@@ -2789,18 +2754,18 @@
 			if (so == NULL) {
 				struct mbuf *op_err;
 
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 				struct socket *pcb_so;
 
 #endif
 				/* Too many sockets */
 				SCTPDBG(SCTP_DEBUG_INPUT1, "process_cookie_new: no room for another socket!\n");
-				op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
+				op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, "");
 				sctp_abort_association(*inp_p, NULL, m, iphlen,
-				    sh, op_err,
-				    use_mflowid, mflowid,
+				    src, dst, sh, op_err,
+				    mflowtype, mflowid,
 				    vrf_id, port);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 				pcb_so = SCTP_INP_SO(*inp_p);
 				atomic_add_int(&(*stcb)->asoc.refcnt, 1);
 				SCTP_TCB_UNLOCK((*stcb));
@@ -2808,8 +2773,9 @@
 				SCTP_TCB_LOCK((*stcb));
 				atomic_subtract_int(&(*stcb)->asoc.refcnt, 1);
 #endif
-				(void)sctp_free_assoc(*inp_p, *stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_20);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+				(void)sctp_free_assoc(*inp_p, *stcb, SCTP_NORMAL_PROC,
+				    SCTP_FROM_SCTP_INPUT + SCTP_LOC_23);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 				SCTP_SOCKET_UNLOCK(pcb_so, 1);
 #endif
 				return (NULL);
@@ -2831,11 +2797,19 @@
 			inp->sctp_mobility_features = (*inp_p)->sctp_mobility_features;
 			inp->sctp_socket = so;
 			inp->sctp_frag_point = (*inp_p)->sctp_frag_point;
+			inp->max_cwnd = (*inp_p)->max_cwnd;
 			inp->sctp_cmt_on_off = (*inp_p)->sctp_cmt_on_off;
-			inp->sctp_ecn_enable = (*inp_p)->sctp_ecn_enable;
+			inp->ecn_supported = (*inp_p)->ecn_supported;
+			inp->prsctp_supported = (*inp_p)->prsctp_supported;
+			inp->auth_supported = (*inp_p)->auth_supported;
+			inp->asconf_supported = (*inp_p)->asconf_supported;
+			inp->reconfig_supported = (*inp_p)->reconfig_supported;
+			inp->nrsack_supported = (*inp_p)->nrsack_supported;
+			inp->pktdrop_supported = (*inp_p)->pktdrop_supported;
 			inp->partial_delivery_point = (*inp_p)->partial_delivery_point;
 			inp->sctp_context = (*inp_p)->sctp_context;
 			inp->local_strreset_support = (*inp_p)->local_strreset_support;
+			inp->fibnum = (*inp_p)->fibnum;
 			inp->inp_starting_point_for_iterator = NULL;
 			/*
 			 * copy in the authentication parameters from the
@@ -2899,13 +2873,13 @@
 			 * Pull it from the incomplete queue and wake the
 			 * guy
 			 */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 			atomic_add_int(&(*stcb)->asoc.refcnt, 1);
 			SCTP_TCB_UNLOCK((*stcb));
 			SCTP_SOCKET_LOCK(so, 1);
 #endif
 			soisconnected(so);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 			SCTP_TCB_LOCK((*stcb));
 			atomic_subtract_int(&(*stcb)->asoc.refcnt, 1);
 			SCTP_SOCKET_UNLOCK(so, 1);
@@ -2932,9 +2906,9 @@
 
 	SCTPDBG(SCTP_DEBUG_INPUT2,
 	    "sctp_handle_cookie_ack: handling COOKIE-ACK\n");
-	if (stcb == NULL)
+	if ((stcb == NULL) || (net == NULL)) {
 		return;
-
+	}
 	asoc = &stcb->asoc;
 
 	sctp_stop_all_cookie_timers(stcb);
@@ -2961,12 +2935,12 @@
 		sctp_ulp_notify(SCTP_NOTIFY_ASSOC_UP, stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
 		if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 		    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 			struct socket *so;
 
 #endif
 			stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 			so = SCTP_INP_SO(stcb->sctp_ep);
 			atomic_add_int(&stcb->asoc.refcnt, 1);
 			SCTP_TCB_UNLOCK(stcb);
@@ -2977,7 +2951,7 @@
 			if ((stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) == 0) {
 				soisconnected(stcb->sctp_socket);
 			}
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 			SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 		}
@@ -3009,7 +2983,7 @@
 		 * in flight)
 		 */
 		if ((sctp_is_feature_on(stcb->sctp_ep, SCTP_PCB_FLAGS_DO_ASCONF)) &&
-		    (stcb->asoc.peer_supports_asconf) &&
+		    (stcb->asoc.asconf_supported == 1) &&
 		    (!TAILQ_EMPTY(&stcb->asoc.asconf_queue))) {
 #ifdef SCTP_TIMER_BASED_ASCONF
 			sctp_timer_start(SCTP_TIMER_TYPE_ASCONF,
@@ -3170,7 +3144,6 @@
 	uint32_t cwr_tsn;
 
 	cwr_tsn = ntohl(cp->tsn);
-
 	override = cp->ch.chunk_flags & SCTP_CWR_REDUCE_OVERRIDE;
 	TAILQ_FOREACH(chk, &stcb->asoc.control_send_queue, sctp_next) {
 		if (chk->rec.chunk_id.id != SCTP_ECN_ECHO) {
@@ -3186,10 +3159,8 @@
 			stcb->asoc.ecn_echo_cnt_onq--;
 			TAILQ_REMOVE(&stcb->asoc.control_send_queue, chk,
 			    sctp_next);
-			if (chk->data) {
-				sctp_m_freem(chk->data);
-				chk->data = NULL;
-			}
+			sctp_m_freem(chk->data);
+			chk->data = NULL;
 			stcb->asoc.ctrl_queue_cnt--;
 			sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
 			if (override == 0) {
@@ -3205,7 +3176,7 @@
 {
 	struct sctp_association *asoc;
 
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	struct socket *so;
 
 #endif
@@ -3227,20 +3198,22 @@
 	/* notify upper layer protocol */
 	if (stcb->sctp_socket) {
 		sctp_ulp_notify(SCTP_NOTIFY_ASSOC_DOWN, stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
-		/* are the queues empty? they should be */
-		if (!TAILQ_EMPTY(&asoc->send_queue) ||
-		    !TAILQ_EMPTY(&asoc->sent_queue) ||
-		    !stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc)) {
-			sctp_report_all_outbound(stcb, 0, 0, SCTP_SO_NOT_LOCKED);
-		}
 	}
+#ifdef INVARIANTS
+	if (!TAILQ_EMPTY(&asoc->send_queue) ||
+	    !TAILQ_EMPTY(&asoc->sent_queue) ||
+	    !stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc)) {
+		panic("Queues are not empty when handling SHUTDOWN-COMPLETE");
+	}
+#endif
 	/* stop the timer */
-	sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWNACK, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_22);
+	sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWNACK, stcb->sctp_ep, stcb, net,
+	    SCTP_FROM_SCTP_INPUT + SCTP_LOC_24);
 	SCTP_STAT_INCR_COUNTER32(sctps_shutdown);
 	/* free the TCB */
 	SCTPDBG(SCTP_DEBUG_INPUT2,
 	    "sctp_handle_shutdown_complete: calls free-asoc\n");
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	so = SCTP_INP_SO(stcb->sctp_ep);
 	atomic_add_int(&stcb->asoc.refcnt, 1);
 	SCTP_TCB_UNLOCK(stcb);
@@ -3248,8 +3221,9 @@
 	SCTP_TCB_LOCK(stcb);
 	atomic_subtract_int(&stcb->asoc.refcnt, 1);
 #endif
-	(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_23);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
+	    SCTP_FROM_SCTP_INPUT + SCTP_LOC_25);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 	return;
@@ -3356,7 +3330,8 @@
 
 				/* restart the timer */
 				sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
-				    stcb, tp1->whoTo, SCTP_FROM_SCTP_INPUT + SCTP_LOC_24);
+				    stcb, tp1->whoTo,
+				    SCTP_FROM_SCTP_INPUT + SCTP_LOC_26);
 				sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
 				    stcb, tp1->whoTo);
 
@@ -3424,7 +3399,8 @@
 			 * this, otherwise we let the timer fire.
 			 */
 			sctp_timer_stop(SCTP_TIMER_TYPE_INIT, stcb->sctp_ep,
-			    stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_25);
+			    stcb, net,
+			    SCTP_FROM_SCTP_INPUT + SCTP_LOC_27);
 			sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
 		}
 		break;
@@ -3494,9 +3470,9 @@
 }
 
 void
-sctp_reset_in_stream(struct sctp_tcb *stcb, int number_entries, uint16_t * list)
+sctp_reset_in_stream(struct sctp_tcb *stcb, uint32_t number_entries, uint16_t * list)
 {
-	int i;
+	uint32_t i;
 	uint16_t temp;
 
 	/*
@@ -3522,36 +3498,57 @@
 }
 
 static void
-sctp_reset_out_streams(struct sctp_tcb *stcb, int number_entries, uint16_t * list)
+sctp_reset_out_streams(struct sctp_tcb *stcb, uint32_t number_entries, uint16_t * list)
 {
-	int i;
+	uint32_t i;
+	uint16_t temp;
 
-	if (number_entries == 0) {
+	if (number_entries > 0) {
+		for (i = 0; i < number_entries; i++) {
+			temp = ntohs(list[i]);
+			if (temp >= stcb->asoc.streamoutcnt) {
+				/* no such stream */
+				continue;
+			}
+			stcb->asoc.strmout[temp].next_sequence_send = 0;
+		}
+	} else {
 		for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
-			stcb->asoc.strmout[i].next_sequence_sent = 0;
+			stcb->asoc.strmout[i].next_sequence_send = 0;
 		}
-	} else if (number_entries) {
+	}
+	sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_SEND, stcb, number_entries, (void *)list, SCTP_SO_NOT_LOCKED);
+}
+
+static void
+sctp_reset_clear_pending(struct sctp_tcb *stcb, uint32_t number_entries, uint16_t * list)
+{
+	uint32_t i;
+	uint16_t temp;
+
+	if (number_entries > 0) {
 		for (i = 0; i < number_entries; i++) {
-			uint16_t temp;
-
 			temp = ntohs(list[i]);
 			if (temp >= stcb->asoc.streamoutcnt) {
 				/* no such stream */
 				continue;
 			}
-			stcb->asoc.strmout[temp].next_sequence_sent = 0;
+			stcb->asoc.strmout[temp].state = SCTP_STREAM_OPEN;
 		}
+	} else {
+		for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+			stcb->asoc.strmout[i].state = SCTP_STREAM_OPEN;
+		}
 	}
-	sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_SEND, stcb, number_entries, (void *)list, SCTP_SO_NOT_LOCKED);
 }
 
 
-struct sctp_stream_reset_out_request *
+struct sctp_stream_reset_request *
 sctp_find_stream_reset(struct sctp_tcb *stcb, uint32_t seq, struct sctp_tmit_chunk **bchk)
 {
 	struct sctp_association *asoc;
-	struct sctp_stream_reset_out_req *req;
-	struct sctp_stream_reset_out_request *r;
+	struct sctp_chunkhdr *ch;
+	struct sctp_stream_reset_request *r;
 	struct sctp_tmit_chunk *chk;
 	int len, clen;
 
@@ -3573,8 +3570,8 @@
 		*bchk = chk;
 	}
 	clen = chk->send_size;
-	req = mtod(chk->data, struct sctp_stream_reset_out_req *);
-	r = &req->sr_req;
+	ch = mtod(chk->data, struct sctp_chunkhdr *);
+	r = (struct sctp_stream_reset_request *)(ch + 1);
 	if (ntohl(r->request_seq) == seq) {
 		/* found it */
 		return (r);
@@ -3582,7 +3579,7 @@
 	len = SCTP_SIZE32(ntohs(r->ph.param_length));
 	if (clen > (len + (int)sizeof(struct sctp_chunkhdr))) {
 		/* move to the next one, there can only be a max of two */
-		r = (struct sctp_stream_reset_out_request *)((caddr_t)r + len);
+		r = (struct sctp_stream_reset_request *)((caddr_t)r + len);
 		if (ntohl(r->request_seq) == seq) {
 			return (r);
 		}
@@ -3602,7 +3599,8 @@
 	}
 	asoc = &stcb->asoc;
 
-	sctp_timer_stop(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo, SCTP_FROM_SCTP_INPUT + SCTP_LOC_26);
+	sctp_timer_stop(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb,
+	    chk->whoTo, SCTP_FROM_SCTP_INPUT + SCTP_LOC_28);
 	TAILQ_REMOVE(&asoc->control_send_queue,
 	    chk,
 	    sctp_next);
@@ -3626,8 +3624,10 @@
 	int lparm_len;
 	struct sctp_association *asoc = &stcb->asoc;
 	struct sctp_tmit_chunk *chk;
-	struct sctp_stream_reset_out_request *srparam;
-	int number_entries;
+	struct sctp_stream_reset_request *req_param;
+	struct sctp_stream_reset_out_request *req_out_param;
+	struct sctp_stream_reset_in_request *req_in_param;
+	uint32_t number_entries;
 
 	if (asoc->stream_reset_outstanding == 0) {
 		/* duplicate */
@@ -3634,12 +3634,15 @@
 		return (0);
 	}
 	if (seq == stcb->asoc.str_reset_seq_out) {
-		srparam = sctp_find_stream_reset(stcb, seq, &chk);
-		if (srparam) {
+		req_param = sctp_find_stream_reset(stcb, seq, &chk);
+		if (req_param != NULL) {
 			stcb->asoc.str_reset_seq_out++;
-			type = ntohs(srparam->ph.param_type);
-			lparm_len = ntohs(srparam->ph.param_length);
+			type = ntohs(req_param->ph.param_type);
+			lparm_len = ntohs(req_param->ph.param_length);
 			if (type == SCTP_STR_RESET_OUT_REQUEST) {
+				int no_clear = 0;
+
+				req_out_param = (struct sctp_stream_reset_out_request *)req_param;
 				number_entries = (lparm_len - sizeof(struct sctp_stream_reset_out_request)) / sizeof(uint16_t);
 				asoc->stream_reset_out_is_outstanding = 0;
 				if (asoc->stream_reset_outstanding)
@@ -3646,23 +3649,35 @@
 					asoc->stream_reset_outstanding--;
 				if (action == SCTP_STREAM_RESET_RESULT_PERFORMED) {
 					/* do it */
-					sctp_reset_out_streams(stcb, number_entries, srparam->list_of_streams);
+					sctp_reset_out_streams(stcb, number_entries, req_out_param->list_of_streams);
 				} else if (action == SCTP_STREAM_RESET_RESULT_DENIED) {
-					sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_DENIED_OUT, stcb, number_entries, srparam->list_of_streams, SCTP_SO_NOT_LOCKED);
+					sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_DENIED_OUT, stcb, number_entries, req_out_param->list_of_streams, SCTP_SO_NOT_LOCKED);
+				} else if (action == SCTP_STREAM_RESET_RESULT_IN_PROGRESS) {
+					/*
+					 * Set it up so we don't stop
+					 * retransmitting
+					 */
+					asoc->stream_reset_outstanding++;
+					stcb->asoc.str_reset_seq_out--;
+					asoc->stream_reset_out_is_outstanding = 1;
+					no_clear = 1;
 				} else {
-					sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_FAILED_OUT, stcb, number_entries, srparam->list_of_streams, SCTP_SO_NOT_LOCKED);
+					sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_FAILED_OUT, stcb, number_entries, req_out_param->list_of_streams, SCTP_SO_NOT_LOCKED);
 				}
+				if (no_clear == 0) {
+					sctp_reset_clear_pending(stcb, number_entries, req_out_param->list_of_streams);
+				}
 			} else if (type == SCTP_STR_RESET_IN_REQUEST) {
-				/* Answered my request */
+				req_in_param = (struct sctp_stream_reset_in_request *)req_param;
 				number_entries = (lparm_len - sizeof(struct sctp_stream_reset_in_request)) / sizeof(uint16_t);
 				if (asoc->stream_reset_outstanding)
 					asoc->stream_reset_outstanding--;
 				if (action == SCTP_STREAM_RESET_RESULT_DENIED) {
 					sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_DENIED_IN, stcb,
-					    number_entries, srparam->list_of_streams, SCTP_SO_NOT_LOCKED);
+					    number_entries, req_in_param->list_of_streams, SCTP_SO_NOT_LOCKED);
 				} else if (action != SCTP_STREAM_RESET_RESULT_PERFORMED) {
 					sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_FAILED_IN, stcb,
-					    number_entries, srparam->list_of_streams, SCTP_SO_NOT_LOCKED);
+					    number_entries, req_in_param->list_of_streams, SCTP_SO_NOT_LOCKED);
 				}
 			} else if (type == SCTP_STR_RESET_ADD_OUT_STREAMS) {
 				/* Ok we now may have more streams */
@@ -3678,7 +3693,12 @@
 					asoc->stream_reset_outstanding--;
 				if (action == SCTP_STREAM_RESET_RESULT_PERFORMED) {
 					/* Put the new streams into effect */
-					stcb->asoc.streamoutcnt += num_stream;
+					int i;
+
+					for (i = asoc->streamoutcnt; i < (asoc->streamoutcnt + num_stream); i++) {
+						asoc->strmout[i].state = SCTP_STREAM_OPEN;
+					}
+					asoc->streamoutcnt += num_stream;
 					sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt, 0);
 				} else if (action == SCTP_STREAM_RESET_RESULT_DENIED) {
 					sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt,
@@ -3711,6 +3731,9 @@
 					/* huh ? */
 					return (0);
 				}
+				if (ntohs(respin->ph.param_length) < sizeof(struct sctp_stream_reset_response_tsn)) {
+					return (0);
+				}
 				if (action == SCTP_STREAM_RESET_RESULT_PERFORMED) {
 					resp = (struct sctp_stream_reset_response_tsn *)respin;
 					asoc->stream_reset_outstanding--;
@@ -3752,6 +3775,9 @@
 			}
 		}
 	}
+	if (asoc->stream_reset_outstanding == 0) {
+		sctp_send_stream_reset_out_if_possible(stcb, SCTP_SO_NOT_LOCKED);
+	}
 	return (0);
 }
 
@@ -3782,22 +3808,33 @@
 		} else if (stcb->asoc.stream_reset_out_is_outstanding == 0) {
 			len = ntohs(req->ph.param_length);
 			number_entries = ((len - sizeof(struct sctp_stream_reset_in_request)) / sizeof(uint16_t));
-			for (i = 0; i < number_entries; i++) {
-				temp = ntohs(req->list_of_streams[i]);
-				req->list_of_streams[i] = temp;
+			if (number_entries) {
+				for (i = 0; i < number_entries; i++) {
+					temp = ntohs(req->list_of_streams[i]);
+					if (temp >= stcb->asoc.streamoutcnt) {
+						asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
+						goto bad_boy;
+					}
+					req->list_of_streams[i] = temp;
+				}
+				for (i = 0; i < number_entries; i++) {
+					if (stcb->asoc.strmout[req->list_of_streams[i]].state == SCTP_STREAM_OPEN) {
+						stcb->asoc.strmout[req->list_of_streams[i]].state = SCTP_STREAM_RESET_PENDING;
+					}
+				}
+			} else {
+				/* Its all */
+				for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+					if (stcb->asoc.strmout[i].state == SCTP_STREAM_OPEN)
+						stcb->asoc.strmout[i].state = SCTP_STREAM_RESET_PENDING;
+				}
 			}
 			asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED;
-			sctp_add_stream_reset_out(chk, number_entries, req->list_of_streams,
-			    asoc->str_reset_seq_out,
-			    seq, (asoc->sending_seq - 1));
-			asoc->stream_reset_out_is_outstanding = 1;
-			asoc->str_reset = chk;
-			sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo);
-			stcb->asoc.stream_reset_outstanding++;
 		} else {
 			/* Can't do it, since we have sent one out */
 			asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_ERR_IN_PROGRESS;
 		}
+bad_boy:
 		sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
 		asoc->str_reset_seq_in++;
 	} else if (asoc->str_reset_seq_in - 1 == seq) {
@@ -3807,6 +3844,7 @@
 	} else {
 		sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_RESULT_ERR_BAD_SEQNO);
 	}
+	sctp_send_stream_reset_out_if_possible(stcb, SCTP_SO_NOT_LOCKED);
 }
 
 static int
@@ -3925,12 +3963,12 @@
 				sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
 				return;
 			}
+			liste->seq = seq;
 			liste->tsn = tsn;
 			liste->number_entries = number_entries;
-			memcpy(&liste->req, req,
-			    (sizeof(struct sctp_stream_reset_out_request) + (number_entries * sizeof(uint16_t))));
+			memcpy(&liste->list_of_streams, req->list_of_streams, number_entries * sizeof(uint16_t));
 			TAILQ_INSERT_TAIL(&asoc->resetHead, liste, next_resp);
-			asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED;
+			asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_IN_PROGRESS;
 		}
 		sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
 		asoc->str_reset_seq_in++;
@@ -4067,7 +4105,7 @@
 			mychk += num_stream;
 			if (mychk < 0x10000) {
 				stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED;
-				if (sctp_send_str_reset_req(stcb, 0, NULL, 0, 0, 0, 1, num_stream, 0, 1)) {
+				if (sctp_send_str_reset_req(stcb, 0, NULL, 0, 0, 1, num_stream, 0, 1)) {
 					stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
 				}
 			} else {
@@ -4098,9 +4136,9 @@
 #endif
 	static int
 	    sctp_handle_stream_reset(struct sctp_tcb *stcb, struct mbuf *m, int offset,
-        struct sctp_stream_reset_out_req *sr_req)
+        struct sctp_chunkhdr *ch_req)
 {
-	int chk_length, param_len, ptype;
+	uint16_t remaining_length, param_len, ptype;
 	struct sctp_paramhdr pstore;
 	uint8_t cstore[SCTP_CHUNK_BUFFER_SIZE];
 	uint32_t seq = 0;
@@ -4113,7 +4151,7 @@
 	int num_param = 0;
 
 	/* now it may be a reset or a reset-response */
-	chk_length = ntohs(sr_req->ch.chunk_length);
+	remaining_length = ntohs(ch_req->chunk_length) - sizeof(struct sctp_chunkhdr);
 
 	/* setup for adding the response */
 	sctp_alloc_a_chunk(stcb, chk);
@@ -4120,13 +4158,15 @@
 	if (chk == NULL) {
 		return (ret_code);
 	}
+	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_STREAM_RESET;
 	chk->rec.chunk_id.can_take_data = 0;
+	chk->flags = 0;
 	chk->asoc = &stcb->asoc;
 	chk->no_fr_allowed = 0;
 	chk->book_size = chk->send_size = sizeof(struct sctp_chunkhdr);
 	chk->book_size_scale = 0;
-	chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+	chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
 	if (chk->data == NULL) {
 strres_nochunk:
 		if (chk->data) {
@@ -4149,20 +4189,27 @@
 	ch->chunk_length = htons(chk->send_size);
 	SCTP_BUF_LEN(chk->data) = SCTP_SIZE32(chk->send_size);
 	offset += sizeof(struct sctp_chunkhdr);
-	while ((size_t)chk_length >= sizeof(struct sctp_stream_reset_tsn_request)) {
+	while (remaining_length >= sizeof(struct sctp_paramhdr)) {
 		ph = (struct sctp_paramhdr *)sctp_m_getptr(m, offset, sizeof(pstore), (uint8_t *) & pstore);
-		if (ph == NULL)
+		if (ph == NULL) {
+			/* TSNH */
 			break;
+		}
 		param_len = ntohs(ph->param_length);
-		if (param_len < (int)sizeof(struct sctp_stream_reset_tsn_request)) {
-			/* bad param */
+		if ((param_len > remaining_length) ||
+		    (param_len < (sizeof(struct sctp_paramhdr) + sizeof(uint32_t)))) {
+			/* bad parameter length */
 			break;
 		}
-		ph = (struct sctp_paramhdr *)sctp_m_getptr(m, offset, min(param_len, (int)sizeof(cstore)),
+		ph = (struct sctp_paramhdr *)sctp_m_getptr(m, offset, min(param_len, sizeof(cstore)),
 		    (uint8_t *) & cstore);
+		if (ph == NULL) {
+			/* TSNH */
+			break;
+		}
 		ptype = ntohs(ph->param_type);
 		num_param++;
-		if (param_len > (int)sizeof(cstore)) {
+		if (param_len > sizeof(cstore)) {
 			trunc = 1;
 		} else {
 			trunc = 0;
@@ -4174,6 +4221,9 @@
 		if (ptype == SCTP_STR_RESET_OUT_REQUEST) {
 			struct sctp_stream_reset_out_request *req_out;
 
+			if (param_len < sizeof(struct sctp_stream_reset_out_request)) {
+				break;
+			}
 			req_out = (struct sctp_stream_reset_out_request *)ph;
 			num_req++;
 			if (stcb->asoc.stream_reset_outstanding) {
@@ -4187,6 +4237,9 @@
 		} else if (ptype == SCTP_STR_RESET_ADD_OUT_STREAMS) {
 			struct sctp_stream_reset_add_strm *str_add;
 
+			if (param_len < sizeof(struct sctp_stream_reset_add_strm)) {
+				break;
+			}
 			str_add = (struct sctp_stream_reset_add_strm *)ph;
 			num_req++;
 			sctp_handle_str_reset_add_strm(stcb, chk, str_add);
@@ -4193,6 +4246,9 @@
 		} else if (ptype == SCTP_STR_RESET_ADD_IN_STREAMS) {
 			struct sctp_stream_reset_add_strm *str_add;
 
+			if (param_len < sizeof(struct sctp_stream_reset_add_strm)) {
+				break;
+			}
 			str_add = (struct sctp_stream_reset_add_strm *)ph;
 			num_req++;
 			sctp_handle_str_reset_add_out_strm(stcb, chk, str_add);
@@ -4217,6 +4273,9 @@
 			struct sctp_stream_reset_response *resp;
 			uint32_t result;
 
+			if (param_len < sizeof(struct sctp_stream_reset_response)) {
+				break;
+			}
 			resp = (struct sctp_stream_reset_response *)ph;
 			seq = ntohl(resp->response_seq);
 			result = ntohl(resp->result);
@@ -4228,7 +4287,11 @@
 			break;
 		}
 		offset += SCTP_SIZE32(param_len);
-		chk_length -= SCTP_SIZE32(param_len);
+		if (remaining_length >= SCTP_SIZE32(param_len)) {
+			remaining_length -= SCTP_SIZE32(param_len);
+		} else {
+			remaining_length = 0;
+		}
 	}
 	if (num_req == 0) {
 		/* we have no response free the stuff */
@@ -4411,12 +4474,15 @@
 #endif
 	static struct sctp_tcb *
 	         sctp_process_control(struct mbuf *m, int iphlen, int *offset, int length,
+             struct sockaddr *src, struct sockaddr *dst,
              struct sctphdr *sh, struct sctp_chunkhdr *ch, struct sctp_inpcb *inp,
              struct sctp_tcb *stcb, struct sctp_nets **netp, int *fwd_tsn_seen,
-             uint8_t use_mflowid, uint32_t mflowid,
+             uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum,
              uint32_t vrf_id, uint16_t port)
 {
 	struct sctp_association *asoc;
+	struct mbuf *op_err;
+	char msg[SCTP_DIAG_INFO_LEN];
 	uint32_t vtag_in;
 	int num_chunks = 0;	/* number of control chunks processed */
 	uint32_t chk_length;
@@ -4436,13 +4502,13 @@
 	int auth_skipped = 0;
 	int asconf_cnt = 0;
 
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	struct socket *so;
 
 #endif
 
 	SCTPDBG(SCTP_DEBUG_INPUT1, "sctp_process_control: iphlen=%u, offset=%u, length=%u stcb:%p\n",
-	    iphlen, *offset, length, stcb);
+	    iphlen, *offset, length, (void *)stcb);
 
 	/* validate chunk header length... */
 	if (ntohs(ch->chunk_length) < sizeof(*ch)) {
@@ -4480,7 +4546,7 @@
 		 */
 		if ((ch->chunk_type == SCTP_AUTHENTICATION) &&
 		    (stcb == NULL) &&
-		    !SCTP_BASE_SYSCTL(sctp_auth_disable)) {
+		    (inp->auth_supported == 1)) {
 			/* save this chunk for later processing */
 			auth_skipped = 1;
 			auth_offset = *offset;
@@ -4527,7 +4593,9 @@
 				if (asconf_len < sizeof(struct sctp_asconf_paramhdr))
 					break;
 				stcb = sctp_findassociation_ep_asconf(m,
-				    *offset, sh, &inp, netp, vrf_id);
+				    *offset,
+				    dst,
+				    sh, &inp, netp, vrf_id);
 				if (stcb != NULL)
 					break;
 				asconf_offset += SCTP_SIZE32(asconf_len);
@@ -4568,9 +4636,12 @@
 			}
 		}
 		if (stcb == NULL) {
+			snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__);
+			op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+			    msg);
 			/* no association, so it's out of the blue... */
-			sctp_handle_ootb(m, iphlen, *offset, sh, inp,
-			    use_mflowid, mflowid,
+			sctp_handle_ootb(m, iphlen, *offset, src, dst, sh, inp, op_err,
+			    mflowtype, mflowid, inp->fibnum,
 			    vrf_id, port);
 			*offset = length;
 			if (locked_tcb) {
@@ -4583,8 +4654,10 @@
 		if ((ch->chunk_type == SCTP_ABORT_ASSOCIATION) ||
 		    (ch->chunk_type == SCTP_SHUTDOWN_COMPLETE) ||
 		    (ch->chunk_type == SCTP_PACKET_DROPPED)) {
-			if ((vtag_in == asoc->my_vtag) ||
-			    ((ch->chunk_flags & SCTP_HAD_NO_TCB) &&
+			/* Take the T-bit always into account. */
+			if ((((ch->chunk_flags & SCTP_HAD_NO_TCB) == 0) &&
+			    (vtag_in == asoc->my_vtag)) ||
+			    (((ch->chunk_flags & SCTP_HAD_NO_TCB) == SCTP_HAD_NO_TCB) &&
 			    (vtag_in == asoc->peer_vtag))) {
 				/* this is valid */
 			} else {
@@ -4607,8 +4680,12 @@
 				if (locked_tcb) {
 					SCTP_TCB_UNLOCK(locked_tcb);
 				}
-				sctp_handle_ootb(m, iphlen, *offset, sh, inp,
-				    use_mflowid, mflowid,
+				snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__);
+				op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+				    msg);
+				sctp_handle_ootb(m, iphlen, *offset, src, dst,
+				    sh, inp, op_err,
+				    mflowtype, mflowid, fibnum,
 				    vrf_id, port);
 				return (NULL);
 			}
@@ -4736,7 +4813,7 @@
 
 		/* check to see if this chunk required auth, but isn't */
 		if ((stcb != NULL) &&
-		    !SCTP_BASE_SYSCTL(sctp_auth_disable) &&
+		    (stcb->asoc.auth_supported == 1) &&
 		    sctp_auth_is_required_chunk(ch->chunk_type, stcb->asoc.local_auth_chunks) &&
 		    !stcb->asoc.authenticated) {
 			/* "silently" ignore */
@@ -4749,29 +4826,27 @@
 			/* The INIT chunk must be the only chunk. */
 			if ((num_chunks > 1) ||
 			    (length - *offset > (int)SCTP_SIZE32(chk_length))) {
-				sctp_abort_association(inp, stcb, m,
-				    iphlen, sh, NULL,
-				    use_mflowid, mflowid,
-				    vrf_id, port);
+				/* RFC 4960 requires that no ABORT is sent */
 				*offset = length;
+				if (locked_tcb) {
+					SCTP_TCB_UNLOCK(locked_tcb);
+				}
 				return (NULL);
 			}
 			/* Honor our resource limit. */
 			if (chk_length > SCTP_LARGEST_INIT_ACCEPTED) {
-				struct mbuf *op_err;
-
-				op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
-				sctp_abort_association(inp, stcb, m,
-				    iphlen, sh, op_err,
-				    use_mflowid, mflowid,
+				op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, "");
+				sctp_abort_association(inp, stcb, m, iphlen,
+				    src, dst, sh, op_err,
+				    mflowtype, mflowid,
 				    vrf_id, port);
 				*offset = length;
 				return (NULL);
 			}
-			sctp_handle_init(m, iphlen, *offset, sh,
+			sctp_handle_init(m, iphlen, *offset, src, dst, sh,
 			    (struct sctp_init_chunk *)ch, inp,
-			    stcb, &abort_no_unlock,
-			    use_mflowid, mflowid,
+			    stcb, *netp, &abort_no_unlock,
+			    mflowtype, mflowid,
 			    vrf_id, port);
 			*offset = length;
 			if ((!abort_no_unlock) && (locked_tcb)) {
@@ -4788,13 +4863,13 @@
 				if ((stcb) && (stcb->asoc.total_output_queue_size)) {
 					;
 				} else {
-					if (locked_tcb != stcb) {
+					if ((locked_tcb != NULL) && (locked_tcb != stcb)) {
 						/* Very unlikely */
 						SCTP_TCB_UNLOCK(locked_tcb);
 					}
 					*offset = length;
 					if (stcb) {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 						so = SCTP_INP_SO(inp);
 						atomic_add_int(&stcb->asoc.refcnt, 1);
 						SCTP_TCB_UNLOCK(stcb);
@@ -4802,8 +4877,9 @@
 						SCTP_TCB_LOCK(stcb);
 						atomic_subtract_int(&stcb->asoc.refcnt, 1);
 #endif
-						(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_27);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+						(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+						    SCTP_FROM_SCTP_INPUT + SCTP_LOC_29);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 						SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 					}
@@ -4820,11 +4896,12 @@
 				return (NULL);
 			}
 			if ((netp) && (*netp)) {
-				ret = sctp_handle_init_ack(m, iphlen, *offset, sh,
+				ret = sctp_handle_init_ack(m, iphlen, *offset,
+				    src, dst, sh,
 				    (struct sctp_init_ack_chunk *)ch,
 				    stcb, *netp,
 				    &abort_no_unlock,
-				    use_mflowid, mflowid,
+				    mflowtype, mflowid,
 				    vrf_id);
 			} else {
 				ret = -1;
@@ -4943,8 +5020,7 @@
 					SCTPDBG(SCTP_DEBUG_INDATA1, "No stcb when processing NR-SACK chunk\n");
 					break;
 				}
-				if ((stcb->asoc.sctp_nr_sack_on_off == 0) ||
-				    (stcb->asoc.peer_supports_nr_sack == 0)) {
+				if (stcb->asoc.nrsack_supported == 0) {
 					goto unknown_chunk;
 				}
 				if (chk_length < sizeof(struct sctp_nr_sack_chunk)) {
@@ -5057,7 +5133,7 @@
 			break;
 		case SCTP_ABORT_ASSOCIATION:
 			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ABORT, stcb %p\n",
-			    stcb);
+			    (void *)stcb);
 			if ((stcb) && netp && *netp)
 				sctp_handle_abort((struct sctp_abort_chunk *)ch,
 				    stcb, *netp);
@@ -5066,7 +5142,7 @@
 			break;
 		case SCTP_SHUTDOWN:
 			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN, stcb %p\n",
-			    stcb);
+			    (void *)stcb);
 			if ((stcb == NULL) || (chk_length != sizeof(struct sctp_shutdown_chunk))) {
 				*offset = length;
 				if (locked_tcb) {
@@ -5086,7 +5162,7 @@
 			}
 			break;
 		case SCTP_SHUTDOWN_ACK:
-			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN-ACK, stcb %p\n", stcb);
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN-ACK, stcb %p\n", (void *)stcb);
 			if ((stcb) && (netp) && (*netp))
 				sctp_handle_shutdown_ack((struct sctp_shutdown_ack_chunk *)ch, stcb, *netp);
 			*offset = length;
@@ -5102,7 +5178,7 @@
 			break;
 		case SCTP_COOKIE_ECHO:
 			SCTPDBG(SCTP_DEBUG_INPUT3,
-			    "SCTP_COOKIE-ECHO, stcb %p\n", stcb);
+			    "SCTP_COOKIE-ECHO, stcb %p\n", (void *)stcb);
 			if ((stcb) && (stcb->asoc.total_output_queue_size)) {
 				;
 			} else {
@@ -5127,12 +5203,10 @@
 			if ((stcb == NULL) && (inp->sctp_socket->so_qlen >= inp->sctp_socket->so_qlimit)) {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
 				    (SCTP_BASE_SYSCTL(sctp_abort_if_one_2_one_hits_limit))) {
-					struct mbuf *op_err;
-
-					op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
-					sctp_abort_association(inp, stcb, m,
-					    iphlen, sh, op_err,
-					    use_mflowid, mflowid,
+					op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, "");
+					sctp_abort_association(inp, stcb, m, iphlen,
+					    src, dst, sh, op_err,
+					    mflowtype, mflowid,
 					    vrf_id, port);
 				}
 				*offset = length;
@@ -5158,7 +5232,9 @@
 				if (netp) {
 					ret_buf =
 					    sctp_handle_cookie_echo(m, iphlen,
-					    *offset, sh,
+					    *offset,
+					    src, dst,
+					    sh,
 					    (struct sctp_cookie_echo_chunk *)ch,
 					    &inp, &stcb, netp,
 					    auth_skipped,
@@ -5165,7 +5241,7 @@
 					    auth_offset,
 					    auth_len,
 					    &locked_tcb,
-					    use_mflowid,
+					    mflowtype,
 					    mflowid,
 					    vrf_id,
 					    port);
@@ -5202,7 +5278,7 @@
 			}
 			break;
 		case SCTP_COOKIE_ACK:
-			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_COOKIE-ACK, stcb %p\n", stcb);
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_COOKIE-ACK, stcb %p\n", (void *)stcb);
 			if ((stcb == NULL) || chk_length != sizeof(struct sctp_cookie_ack_chunk)) {
 				if (locked_tcb) {
 					SCTP_TCB_UNLOCK(locked_tcb);
@@ -5214,7 +5290,7 @@
 				if ((stcb) && (stcb->asoc.total_output_queue_size)) {
 					;
 				} else if (stcb) {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 					so = SCTP_INP_SO(inp);
 					atomic_add_int(&stcb->asoc.refcnt, 1);
 					SCTP_TCB_UNLOCK(stcb);
@@ -5222,8 +5298,9 @@
 					SCTP_TCB_LOCK(stcb);
 					atomic_subtract_int(&stcb->asoc.refcnt, 1);
 #endif
-					(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_27);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+					(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+					    SCTP_FROM_SCTP_INPUT + SCTP_LOC_30);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 					SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 					*offset = length;
@@ -5255,6 +5332,9 @@
 				return (NULL);
 			}
 			if (stcb) {
+				if (stcb->asoc.ecn_supported == 0) {
+					goto unknown_chunk;
+				}
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
 					sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
 					    stcb->asoc.overall_error_count,
@@ -5280,6 +5360,9 @@
 				return (NULL);
 			}
 			if (stcb) {
+				if (stcb->asoc.ecn_supported == 0) {
+					goto unknown_chunk;
+				}
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
 					sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
 					    stcb->asoc.overall_error_count,
@@ -5292,7 +5375,7 @@
 			}
 			break;
 		case SCTP_SHUTDOWN_COMPLETE:
-			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN-COMPLETE, stcb %p\n", stcb);
+			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN-COMPLETE, stcb %p\n", (void *)stcb);
 			/* must be first and only chunk */
 			if ((num_chunks > 1) ||
 			    (length - *offset > (int)SCTP_SIZE32(chk_length))) {
@@ -5313,6 +5396,9 @@
 			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ASCONF\n");
 			/* He's alive so give him credit */
 			if (stcb) {
+				if (stcb->asoc.asconf_supported == 0) {
+					goto unknown_chunk;
+				}
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
 					sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
 					    stcb->asoc.overall_error_count,
@@ -5321,7 +5407,7 @@
 					    __LINE__);
 				}
 				stcb->asoc.overall_error_count = 0;
-				sctp_handle_asconf(m, *offset,
+				sctp_handle_asconf(m, *offset, src,
 				    (struct sctp_asconf_chunk *)ch, stcb, asconf_cnt == 0);
 				asconf_cnt++;
 			}
@@ -5337,6 +5423,9 @@
 				return (NULL);
 			}
 			if ((stcb) && netp && *netp) {
+				if (stcb->asoc.asconf_supported == 0) {
+					goto unknown_chunk;
+				}
 				/* He's alive so give him credit */
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
 					sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
@@ -5366,6 +5455,9 @@
 			if (stcb) {
 				int abort_flag = 0;
 
+				if (stcb->asoc.prsctp_supported == 0) {
+					goto unknown_chunk;
+				}
 				stcb->asoc.overall_error_count = 0;
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
 					sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
@@ -5377,7 +5469,7 @@
 				*fwd_tsn_seen = 1;
 				if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
 					/* We are not interested anymore */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 					so = SCTP_INP_SO(inp);
 					atomic_add_int(&stcb->asoc.refcnt, 1);
 					SCTP_TCB_UNLOCK(stcb);
@@ -5385,8 +5477,9 @@
 					SCTP_TCB_LOCK(stcb);
 					atomic_subtract_int(&stcb->asoc.refcnt, 1);
 #endif
-					(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_29);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+					(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+					    SCTP_FROM_SCTP_INPUT + SCTP_LOC_31);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 					SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 					*offset = length;
@@ -5420,32 +5513,10 @@
 				*offset = length;
 				return (NULL);
 			}
-			if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
-				/* We are not interested anymore */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
-				so = SCTP_INP_SO(inp);
-				atomic_add_int(&stcb->asoc.refcnt, 1);
-				SCTP_TCB_UNLOCK(stcb);
-				SCTP_SOCKET_LOCK(so, 1);
-				SCTP_TCB_LOCK(stcb);
-				atomic_subtract_int(&stcb->asoc.refcnt, 1);
-#endif
-				(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_30);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
-				SCTP_SOCKET_UNLOCK(so, 1);
-#endif
-				*offset = length;
-				return (NULL);
+			if (stcb->asoc.reconfig_supported == 0) {
+				goto unknown_chunk;
 			}
-			if (stcb->asoc.peer_supports_strreset == 0) {
-				/*
-				 * hmm, peer should have announced this, but
-				 * we will turn it on since he is sending us
-				 * a stream reset.
-				 */
-				stcb->asoc.peer_supports_strreset = 1;
-			}
-			if (sctp_handle_stream_reset(stcb, m, *offset, (struct sctp_stream_reset_out_req *)ch)) {
+			if (sctp_handle_stream_reset(stcb, m, *offset, ch)) {
 				/* stop processing */
 				*offset = length;
 				return (NULL);
@@ -5463,6 +5534,9 @@
 				return (NULL);
 			}
 			if (ch && (stcb) && netp && (*netp)) {
+				if (stcb->asoc.pktdrop_supported == 0) {
+					goto unknown_chunk;
+				}
 				sctp_handle_packet_dropped((struct sctp_pktdrop_chunk *)ch,
 				    stcb, *netp,
 				    min(chk_length, (sizeof(chunk_buf) - 4)));
@@ -5469,12 +5543,8 @@
 
 			}
 			break;
-
 		case SCTP_AUTHENTICATION:
 			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_AUTHENTICATION\n");
-			if (SCTP_BASE_SYSCTL(sctp_auth_disable))
-				goto unknown_chunk;
-
 			if (stcb == NULL) {
 				/* save the first AUTH for later processing */
 				if (auth_skipped == 0) {
@@ -5485,6 +5555,9 @@
 				/* skip this chunk (temporarily) */
 				goto next_chunk;
 			}
+			if (stcb->asoc.auth_supported == 0) {
+				goto unknown_chunk;
+			}
 			if ((chk_length < (sizeof(struct sctp_auth_chunk))) ||
 			    (chk_length > (sizeof(struct sctp_auth_chunk) +
 			    SCTP_AUTH_DIGEST_LEN_MAX))) {
@@ -5515,43 +5588,27 @@
 	unknown_chunk:
 			/* it's an unknown chunk! */
 			if ((ch->chunk_type & 0x40) && (stcb != NULL)) {
-				struct mbuf *mm;
-				struct sctp_paramhdr *phd;
+				struct sctp_gen_error_cause *cause;
+				int len;
 
-				mm = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr),
-				    0, M_DONTWAIT, 1, MT_DATA);
-				if (mm) {
-					phd = mtod(mm, struct sctp_paramhdr *);
-					/*
-					 * We cheat and use param type since
-					 * we did not bother to define a
-					 * error cause struct. They are the
-					 * same basic format with different
-					 * names.
-					 */
-					phd->param_type = htons(SCTP_CAUSE_UNRECOG_CHUNK);
-					phd->param_length = htons(chk_length + sizeof(*phd));
-					SCTP_BUF_LEN(mm) = sizeof(*phd);
-					SCTP_BUF_NEXT(mm) = SCTP_M_COPYM(m, *offset, chk_length, M_DONTWAIT);
-					if (SCTP_BUF_NEXT(mm)) {
-						if (sctp_pad_lastmbuf(SCTP_BUF_NEXT(mm), SCTP_SIZE32(chk_length) - chk_length, NULL)) {
-							sctp_m_freem(mm);
-						} else {
+				op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_gen_error_cause),
+				    0, M_NOWAIT, 1, MT_DATA);
+				if (op_err != NULL) {
+					len = min(SCTP_SIZE32(chk_length), (uint32_t) (length - *offset));
+					cause = mtod(op_err, struct sctp_gen_error_cause *);
+					cause->code = htons(SCTP_CAUSE_UNRECOG_CHUNK);
+					cause->length = htons(len + sizeof(struct sctp_gen_error_cause));
+					SCTP_BUF_LEN(op_err) = sizeof(struct sctp_gen_error_cause);
+					SCTP_BUF_NEXT(op_err) = SCTP_M_COPYM(m, *offset, len, M_NOWAIT);
+					if (SCTP_BUF_NEXT(op_err) != NULL) {
 #ifdef SCTP_MBUF_LOGGING
-							if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
-								struct mbuf *mat;
-
-								for (mat = SCTP_BUF_NEXT(mm); mat; mat = SCTP_BUF_NEXT(mat)) {
-									if (SCTP_BUF_IS_EXTENDED(mat)) {
-										sctp_log_mb(mat, SCTP_MBUF_ICOPY);
-									}
-								}
-							}
+						if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
+							sctp_log_mbc(SCTP_BUF_NEXT(op_err), SCTP_MBUF_ICOPY);
+						}
 #endif
-							sctp_queue_op_err(stcb, mm);
-						}
+						sctp_queue_op_err(stcb, op_err);
 					} else {
-						sctp_m_freem(mm);
+						sctp_m_freem(op_err);
 					}
 				}
 			}
@@ -5589,49 +5646,29 @@
 }
 
 
-#ifdef INVARIANTS
-#ifdef __GNUC__
-__attribute__((noinline))
-#endif
-	void
-	     sctp_validate_no_locks(struct sctp_inpcb *inp)
-{
-	struct sctp_tcb *lstcb;
-
-	LIST_FOREACH(lstcb, &inp->sctp_asoc_list, sctp_tcblist) {
-		if (mtx_owned(&lstcb->tcb_mtx)) {
-			panic("Own lock on stcb at return from input");
-		}
-	}
-	if (mtx_owned(&inp->inp_create_mtx)) {
-		panic("Own create lock on inp");
-	}
-	if (mtx_owned(&inp->inp_mtx)) {
-		panic("Own inp lock on inp");
-	}
-}
-
-#endif
-
 /*
  * common input chunk processing (v4 and v6)
  */
 void
-sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset,
-    int length, struct sctphdr *sh, struct sctp_chunkhdr *ch,
-    struct sctp_inpcb *inp, struct sctp_tcb *stcb,
-    struct sctp_nets *net, uint8_t ecn_bits,
-    uint8_t use_mflowid, uint32_t mflowid,
+sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int length,
+    struct sockaddr *src, struct sockaddr *dst,
+    struct sctphdr *sh, struct sctp_chunkhdr *ch,
+#if !defined(SCTP_WITH_NO_CSUM)
+    uint8_t compute_crc,
+#endif
+    uint8_t ecn_bits,
+    uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum,
     uint32_t vrf_id, uint16_t port)
 {
-	/*
-	 * Control chunk processing
-	 */
 	uint32_t high_tsn;
 	int fwd_tsn_seen = 0, data_processed = 0;
-	struct mbuf *m = *mm;
+	struct mbuf *m = *mm, *op_err;
+	char msg[SCTP_DIAG_INFO_LEN];
 	int un_sent;
 	int cnt_ctrl_ready = 0;
+	struct sctp_inpcb *inp = NULL, *inp_decr = NULL;
+	struct sctp_tcb *stcb = NULL;
+	struct sctp_nets *net = NULL;
 
 	SCTP_STAT_INCR(sctps_recvdatagrams);
 #ifdef SCTP_AUDITING_ENABLED
@@ -5638,15 +5675,148 @@
 	sctp_audit_log(0xE0, 1);
 	sctp_auditing(0, inp, stcb, net);
 #endif
+#if !defined(SCTP_WITH_NO_CSUM)
+	if (compute_crc != 0) {
+		uint32_t check, calc_check;
 
+		check = sh->checksum;
+		sh->checksum = 0;
+		calc_check = sctp_calculate_cksum(m, iphlen);
+		sh->checksum = check;
+		if (calc_check != check) {
+			SCTPDBG(SCTP_DEBUG_INPUT1, "Bad CSUM on SCTP packet calc_check:%x check:%x  m:%p mlen:%d iphlen:%d\n",
+			    calc_check, check, (void *)m, length, iphlen);
+			stcb = sctp_findassociation_addr(m, offset, src, dst,
+			    sh, ch, &inp, &net, vrf_id);
+#if defined(INET) || defined(INET6)
+			if ((ch->chunk_type != SCTP_INITIATION) &&
+			    (net != NULL) && (net->port != port)) {
+				if (net->port == 0) {
+					/* UDP encapsulation turned on. */
+					net->mtu -= sizeof(struct udphdr);
+					if (stcb->asoc.smallest_mtu > net->mtu) {
+						sctp_pathmtu_adjustment(stcb, net->mtu);
+					}
+				} else if (port == 0) {
+					/* UDP encapsulation turned off. */
+					net->mtu += sizeof(struct udphdr);
+					/* XXX Update smallest_mtu */
+				}
+				net->port = port;
+			}
+#endif
+			if (net != NULL) {
+				net->flowtype = mflowtype;
+				net->flowid = mflowid;
+			}
+			if ((inp != NULL) && (stcb != NULL)) {
+				sctp_send_packet_dropped(stcb, net, m, length, iphlen, 1);
+				sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_INPUT_ERROR, SCTP_SO_NOT_LOCKED);
+			} else if ((inp != NULL) && (stcb == NULL)) {
+				inp_decr = inp;
+			}
+			SCTP_STAT_INCR(sctps_badsum);
+			SCTP_STAT_INCR_COUNTER32(sctps_checksumerrors);
+			goto out;
+		}
+	}
+#endif
+	/* Destination port of 0 is illegal, based on RFC4960. */
+	if (sh->dest_port == 0) {
+		SCTP_STAT_INCR(sctps_hdrops);
+		goto out;
+	}
+	stcb = sctp_findassociation_addr(m, offset, src, dst,
+	    sh, ch, &inp, &net, vrf_id);
+#if defined(INET) || defined(INET6)
+	if ((ch->chunk_type != SCTP_INITIATION) &&
+	    (net != NULL) && (net->port != port)) {
+		if (net->port == 0) {
+			/* UDP encapsulation turned on. */
+			net->mtu -= sizeof(struct udphdr);
+			if (stcb->asoc.smallest_mtu > net->mtu) {
+				sctp_pathmtu_adjustment(stcb, net->mtu);
+			}
+		} else if (port == 0) {
+			/* UDP encapsulation turned off. */
+			net->mtu += sizeof(struct udphdr);
+			/* XXX Update smallest_mtu */
+		}
+		net->port = port;
+	}
+#endif
+	if (net != NULL) {
+		net->flowtype = mflowtype;
+		net->flowid = mflowid;
+	}
+	if (inp == NULL) {
+		SCTP_STAT_INCR(sctps_noport);
+		if (badport_bandlim(BANDLIM_SCTP_OOTB) < 0) {
+			goto out;
+		}
+		if (ch->chunk_type == SCTP_SHUTDOWN_ACK) {
+			sctp_send_shutdown_complete2(src, dst, sh,
+			    mflowtype, mflowid, fibnum,
+			    vrf_id, port);
+			goto out;
+		}
+		if (ch->chunk_type == SCTP_SHUTDOWN_COMPLETE) {
+			goto out;
+		}
+		if (ch->chunk_type != SCTP_ABORT_ASSOCIATION) {
+			if ((SCTP_BASE_SYSCTL(sctp_blackhole) == 0) ||
+			    ((SCTP_BASE_SYSCTL(sctp_blackhole) == 1) &&
+			    (ch->chunk_type != SCTP_INIT))) {
+				op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+				    "Out of the blue");
+				sctp_send_abort(m, iphlen, src, dst,
+				    sh, 0, op_err,
+				    mflowtype, mflowid, fibnum,
+				    vrf_id, port);
+			}
+		}
+		goto out;
+	} else if (stcb == NULL) {
+		inp_decr = inp;
+	}
+#ifdef IPSEC
+	/*-
+	 * I very much doubt any of the IPSEC stuff will work but I have no
+	 * idea, so I will leave it in place.
+	 */
+	if (inp != NULL) {
+		switch (dst->sa_family) {
+#ifdef INET
+		case AF_INET:
+			if (ipsec4_in_reject(m, &inp->ip_inp.inp)) {
+				IPSECSTAT_INC(ips_in_polvio);
+				SCTP_STAT_INCR(sctps_hdrops);
+				goto out;
+			}
+			break;
+#endif
+#ifdef INET6
+		case AF_INET6:
+			if (ipsec6_in_reject(m, &inp->ip_inp.inp)) {
+				IPSEC6STAT_INC(ips_in_polvio);
+				SCTP_STAT_INCR(sctps_hdrops);
+				goto out;
+			}
+			break;
+#endif
+		default:
+			break;
+		}
+	}
+#endif
 	SCTPDBG(SCTP_DEBUG_INPUT1, "Ok, Common input processing called, m:%p iphlen:%d offset:%d length:%d stcb:%p\n",
-	    m, iphlen, offset, length, stcb);
+	    (void *)m, iphlen, offset, length, (void *)stcb);
 	if (stcb) {
 		/* always clear this before beginning a packet */
 		stcb->asoc.authenticated = 0;
 		stcb->asoc.seen_a_sack_this_pkt = 0;
 		SCTPDBG(SCTP_DEBUG_INPUT1, "stcb:%p state:%x\n",
-		    stcb, stcb->asoc.state);
+		    (void *)stcb, stcb->asoc.state);
 
 		if ((stcb->asoc.state & SCTP_STATE_WAS_ABORTED) ||
 		    (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED)) {
@@ -5657,18 +5827,23 @@
 			 * NOT respond to any packet.. its OOTB.
 			 */
 			SCTP_TCB_UNLOCK(stcb);
-			sctp_handle_ootb(m, iphlen, offset, sh, inp,
-			    use_mflowid, mflowid,
+			stcb = NULL;
+			snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__);
+			op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+			    msg);
+			sctp_handle_ootb(m, iphlen, offset, src, dst, sh, inp, op_err,
+			    mflowtype, mflowid, inp->fibnum,
 			    vrf_id, port);
-			goto out_now;
+			goto out;
 		}
 	}
 	if (IS_SCTP_CONTROL(ch)) {
 		/* process the control portion of the SCTP packet */
 		/* sa_ignore NO_NULL_CHK */
-		stcb = sctp_process_control(m, iphlen, &offset, length, sh, ch,
+		stcb = sctp_process_control(m, iphlen, &offset, length,
+		    src, dst, sh, ch,
 		    inp, stcb, &net, &fwd_tsn_seen,
-		    use_mflowid, mflowid,
+		    mflowtype, mflowid, fibnum,
 		    vrf_id, port);
 		if (stcb) {
 			/*
@@ -5676,12 +5851,23 @@
 			 * it changes our INP.
 			 */
 			inp = stcb->sctp_ep;
-			if ((net) && (port)) {
+#if defined(INET) || defined(INET6)
+			if ((ch->chunk_type != SCTP_INITIATION) &&
+			    (net != NULL) && (net->port != port)) {
 				if (net->port == 0) {
-					sctp_pathmtu_adjustment(stcb, net->mtu - sizeof(struct udphdr));
+					/* UDP encapsulation turned on. */
+					net->mtu -= sizeof(struct udphdr);
+					if (stcb->asoc.smallest_mtu > net->mtu) {
+						sctp_pathmtu_adjustment(stcb, net->mtu);
+					}
+				} else if (port == 0) {
+					/* UDP encapsulation turned off. */
+					net->mtu += sizeof(struct udphdr);
+					/* XXX Update smallest_mtu */
 				}
 				net->port = port;
 			}
+#endif
 		}
 	} else {
 		/*
@@ -5695,25 +5881,26 @@
 		 * chunks
 		 */
 		if ((stcb != NULL) &&
-		    !SCTP_BASE_SYSCTL(sctp_auth_disable) &&
+		    (stcb->asoc.auth_supported == 1) &&
 		    sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.local_auth_chunks)) {
 			/* "silently" ignore */
 			SCTP_STAT_INCR(sctps_recvauthmissing);
-			SCTP_TCB_UNLOCK(stcb);
-			goto out_now;
+			goto out;
 		}
 		if (stcb == NULL) {
 			/* out of the blue DATA chunk */
-			sctp_handle_ootb(m, iphlen, offset, sh, inp,
-			    use_mflowid, mflowid,
+			snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__);
+			op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+			    msg);
+			sctp_handle_ootb(m, iphlen, offset, src, dst, sh, inp, op_err,
+			    mflowtype, mflowid, fibnum,
 			    vrf_id, port);
-			goto out_now;
+			goto out;
 		}
 		if (stcb->asoc.my_vtag != ntohl(sh->v_tag)) {
 			/* v_tag mismatch! */
 			SCTP_STAT_INCR(sctps_badvtag);
-			SCTP_TCB_UNLOCK(stcb);
-			goto out_now;
+			goto out;
 		}
 	}
 
@@ -5723,7 +5910,7 @@
 		 * packet while processing control, or we're done with this
 		 * packet (done or skip rest of data), so we drop it...
 		 */
-		goto out_now;
+		goto out;
 	}
 	/*
 	 * DATA chunk processing
@@ -5736,7 +5923,7 @@
 	 */
 	if ((length > offset) &&
 	    (stcb != NULL) &&
-	    !SCTP_BASE_SYSCTL(sctp_auth_disable) &&
+	    (stcb->asoc.auth_supported == 1) &&
 	    sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.local_auth_chunks) &&
 	    !stcb->asoc.authenticated) {
 		/* "silently" ignore */
@@ -5774,11 +5961,13 @@
 			/*
 			 * We consider OOTB any data sent during asoc setup.
 			 */
-			sctp_handle_ootb(m, iphlen, offset, sh, inp,
-			    use_mflowid, mflowid,
+			snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__);
+			op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+			    msg);
+			sctp_handle_ootb(m, iphlen, offset, src, dst, sh, inp, op_err,
+			    mflowtype, mflowid, inp->fibnum,
 			    vrf_id, port);
-			SCTP_TCB_UNLOCK(stcb);
-			goto out_now;
+			goto out;
 			/* sa_ignore NOTREACHED */
 			break;
 		case SCTP_STATE_EMPTY:	/* should not happen */
@@ -5786,8 +5975,7 @@
 		case SCTP_STATE_SHUTDOWN_RECEIVED:	/* This is a peer error */
 		case SCTP_STATE_SHUTDOWN_ACK_SENT:
 		default:
-			SCTP_TCB_UNLOCK(stcb);
-			goto out_now;
+			goto out;
 			/* sa_ignore NOTREACHED */
 			break;
 		case SCTP_STATE_OPEN:
@@ -5795,16 +5983,15 @@
 			break;
 		}
 		/* plow through the data chunks while length > offset */
-		retval = sctp_process_data(mm, iphlen, &offset, length, sh,
-		    inp, stcb, net, &high_tsn,
-		    use_mflowid, mflowid,
-		    vrf_id, port);
+		retval = sctp_process_data(mm, iphlen, &offset, length,
+		    inp, stcb, net, &high_tsn);
 		if (retval == 2) {
 			/*
 			 * The association aborted, NO UNLOCK needed since
 			 * the association is destroyed.
 			 */
-			goto out_now;
+			stcb = NULL;
+			goto out;
 		}
 		data_processed = 1;
 		/*
@@ -5814,7 +6001,7 @@
 	}
 	/* take care of ecn */
 	if ((data_processed == 1) &&
-	    (stcb->asoc.ecn_allowed == 1) &&
+	    (stcb->asoc.ecn_supported == 1) &&
 	    ((ecn_bits & SCTP_CE_BITS) == SCTP_CE_BITS)) {
 		/* Yep, we need to add a ECNE */
 		sctp_send_ecn_echo(stcb, net, high_tsn);
@@ -5849,7 +6036,7 @@
 	if (!TAILQ_EMPTY(&stcb->asoc.control_send_queue)) {
 		cnt_ctrl_ready = stcb->asoc.ctrl_queue_cnt - stcb->asoc.ecn_echo_cnt_onq;
 	}
-	if (cnt_ctrl_ready ||
+	if (cnt_ctrl_ready || stcb->asoc.trigger_reset ||
 	    ((un_sent) &&
 	    (stcb->asoc.peers_rwnd > 0 ||
 	    (stcb->asoc.peers_rwnd <= 0 && stcb->asoc.total_flight == 0)))) {
@@ -5861,11 +6048,16 @@
 	sctp_audit_log(0xE0, 3);
 	sctp_auditing(2, inp, stcb, net);
 #endif
-	SCTP_TCB_UNLOCK(stcb);
-out_now:
-#ifdef INVARIANTS
-	sctp_validate_no_locks(inp);
-#endif
+out:
+	if (stcb != NULL) {
+		SCTP_TCB_UNLOCK(stcb);
+	}
+	if (inp_decr != NULL) {
+		/* reduce ref-count */
+		SCTP_INP_WLOCK(inp_decr);
+		SCTP_INP_DECR_REF(inp_decr);
+		SCTP_INP_WUNLOCK(inp_decr);
+	}
 	return;
 }
 
@@ -5874,9 +6066,9 @@
 sctp_print_mbuf_chain(struct mbuf *m)
 {
 	for (; m; m = SCTP_BUF_NEXT(m)) {
-		SCTP_PRINTF("%p: m_len = %ld\n", m, SCTP_BUF_LEN(m));
+		SCTP_PRINTF("%p: m_len = %ld\n", (void *)m, SCTP_BUF_LEN(m));
 		if (SCTP_BUF_IS_EXTENDED(m))
-			SCTP_PRINTF("%p: extend_size = %d\n", m, SCTP_BUF_EXTEND_SIZE(m));
+			SCTP_PRINTF("%p: extend_size = %d\n", (void *)m, SCTP_BUF_EXTEND_SIZE(m));
 	}
 }
 
@@ -5890,259 +6082,111 @@
 	int iphlen;
 	uint32_t vrf_id = 0;
 	uint8_t ecn_bits;
+	struct sockaddr_in src, dst;
 	struct ip *ip;
 	struct sctphdr *sh;
-	struct sctp_inpcb *inp = NULL;
-	struct sctp_nets *net;
-	struct sctp_tcb *stcb = NULL;
 	struct sctp_chunkhdr *ch;
-	int refcount_up = 0;
-	int length, mlen, offset;
-	uint32_t mflowid;
-	uint8_t use_mflowid;
+	int length, offset;
 
 #if !defined(SCTP_WITH_NO_CSUM)
-	uint32_t check, calc_check;
+	uint8_t compute_crc;
 
 #endif
+	uint32_t mflowid;
+	uint8_t mflowtype;
+	uint16_t fibnum;
 
+	iphlen = off;
 	if (SCTP_GET_PKT_VRFID(i_pak, vrf_id)) {
 		SCTP_RELEASE_PKT(i_pak);
 		return;
 	}
-	mlen = SCTP_HEADER_LEN(i_pak);
-	iphlen = off;
 	m = SCTP_HEADER_TO_CHAIN(i_pak);
-
-	net = NULL;
-	SCTP_STAT_INCR(sctps_recvpackets);
-	SCTP_STAT_INCR_COUNTER64(sctps_inpackets);
-
-
 #ifdef SCTP_MBUF_LOGGING
 	/* Log in any input mbufs */
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
-		struct mbuf *mat;
-
-		for (mat = m; mat; mat = SCTP_BUF_NEXT(mat)) {
-			if (SCTP_BUF_IS_EXTENDED(mat)) {
-				sctp_log_mb(mat, SCTP_MBUF_INPUT);
-			}
-		}
+		sctp_log_mbc(m, SCTP_MBUF_INPUT);
 	}
 #endif
-#ifdef  SCTP_PACKET_LOGGING
-	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
-		sctp_packet_log(m, mlen);
+#ifdef SCTP_PACKET_LOGGING
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) {
+		sctp_packet_log(m);
+	}
 #endif
-	if (m->m_flags & M_FLOWID) {
-		mflowid = m->m_pkthdr.flowid;
-		use_mflowid = 1;
-	} else {
-		mflowid = 0;
-		use_mflowid = 0;
-	}
-	/*
-	 * Must take out the iphlen, since mlen expects this (only effect lb
-	 * case)
-	 */
-	mlen -= iphlen;
-
-	/*
-	 * Get IP, SCTP, and first chunk header together in first mbuf.
-	 */
-	ip = mtod(m, struct ip *);
-	offset = iphlen + sizeof(*sh) + sizeof(*ch);
+	SCTPDBG(SCTP_DEBUG_CRCOFFLOAD,
+	    "sctp_input(): Packet of length %d received on %s with csum_flags 0x%b.\n",
+	    m->m_pkthdr.len,
+	    if_name(m->m_pkthdr.rcvif),
+	    (int)m->m_pkthdr.csum_flags, CSUM_BITS);
+	mflowid = m->m_pkthdr.flowid;
+	mflowtype = M_HASHTYPE_GET(m);
+	fibnum = M_GETFIB(m);
+	SCTP_STAT_INCR(sctps_recvpackets);
+	SCTP_STAT_INCR_COUNTER64(sctps_inpackets);
+	/* Get IP, SCTP, and first chunk header together in the first mbuf. */
+	offset = iphlen + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
 	if (SCTP_BUF_LEN(m) < offset) {
-		if ((m = m_pullup(m, offset)) == 0) {
+		if ((m = m_pullup(m, offset)) == NULL) {
 			SCTP_STAT_INCR(sctps_hdrops);
 			return;
 		}
-		ip = mtod(m, struct ip *);
 	}
-	/* validate mbuf chain length with IP payload length */
-	if (mlen < (SCTP_GET_IPV4_LENGTH(ip) - iphlen)) {
+	ip = mtod(m, struct ip *);
+	sh = (struct sctphdr *)((caddr_t)ip + iphlen);
+	ch = (struct sctp_chunkhdr *)((caddr_t)sh + sizeof(struct sctphdr));
+	offset -= sizeof(struct sctp_chunkhdr);
+	memset(&src, 0, sizeof(struct sockaddr_in));
+	src.sin_family = AF_INET;
+	src.sin_len = sizeof(struct sockaddr_in);
+	src.sin_port = sh->src_port;
+	src.sin_addr = ip->ip_src;
+	memset(&dst, 0, sizeof(struct sockaddr_in));
+	dst.sin_family = AF_INET;
+	dst.sin_len = sizeof(struct sockaddr_in);
+	dst.sin_port = sh->dest_port;
+	dst.sin_addr = ip->ip_dst;
+	length = ntohs(ip->ip_len);
+	/* Validate mbuf chain length with IP payload length. */
+	if (SCTP_HEADER_LEN(m) != length) {
+		SCTPDBG(SCTP_DEBUG_INPUT1,
+		    "sctp_input() length:%d reported length:%d\n", length, SCTP_HEADER_LEN(m));
 		SCTP_STAT_INCR(sctps_hdrops);
-		goto bad;
+		goto out;
 	}
-	sh = (struct sctphdr *)((caddr_t)ip + iphlen);
-	ch = (struct sctp_chunkhdr *)((caddr_t)sh + sizeof(*sh));
-	SCTPDBG(SCTP_DEBUG_INPUT1,
-	    "sctp_input() length:%d iphlen:%d\n", mlen, iphlen);
-
 	/* SCTP does not allow broadcasts or multicasts */
-	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
-		goto bad;
+	if (IN_MULTICAST(ntohl(dst.sin_addr.s_addr))) {
+		goto out;
 	}
-	if (SCTP_IS_IT_BROADCAST(ip->ip_dst, m)) {
-		/*
-		 * We only look at broadcast if its a front state, All
-		 * others we will not have a tcb for anyway.
-		 */
-		goto bad;
+	if (SCTP_IS_IT_BROADCAST(dst.sin_addr, m)) {
+		goto out;
 	}
-	/* validate SCTP checksum */
-	SCTPDBG(SCTP_DEBUG_CRCOFFLOAD,
-	    "sctp_input(): Packet of length %d received on %s with csum_flags 0x%x.\n",
-	    m->m_pkthdr.len,
-	    if_name(m->m_pkthdr.rcvif),
-	    m->m_pkthdr.csum_flags);
+	ecn_bits = ip->ip_tos;
 #if defined(SCTP_WITH_NO_CSUM)
 	SCTP_STAT_INCR(sctps_recvnocrc);
 #else
 	if (m->m_pkthdr.csum_flags & CSUM_SCTP_VALID) {
 		SCTP_STAT_INCR(sctps_recvhwcrc);
-		goto sctp_skip_csum_4;
+		compute_crc = 0;
+	} else {
+		SCTP_STAT_INCR(sctps_recvswcrc);
+		compute_crc = 1;
 	}
-	check = sh->checksum;	/* save incoming checksum */
-	sh->checksum = 0;	/* prepare for calc */
-	calc_check = sctp_calculate_cksum(m, iphlen);
-	sh->checksum = check;
-	SCTP_STAT_INCR(sctps_recvswcrc);
-	if (calc_check != check) {
-		SCTPDBG(SCTP_DEBUG_INPUT1, "Bad CSUM on SCTP packet calc_check:%x check:%x  m:%p mlen:%d iphlen:%d\n",
-		    calc_check, check, m, mlen, iphlen);
-
-		stcb = sctp_findassociation_addr(m,
-		    offset - sizeof(*ch),
-		    sh, ch, &inp, &net,
-		    vrf_id);
-		if ((net) && (port)) {
-			if (net->port == 0) {
-				sctp_pathmtu_adjustment(stcb, net->mtu - sizeof(struct udphdr));
-			}
-			net->port = port;
-		}
-		if ((net != NULL) && (use_mflowid != 0)) {
-			net->flowid = mflowid;
-#ifdef INVARIANTS
-			net->flowidset = 1;
 #endif
-		}
-		if ((inp) && (stcb)) {
-			sctp_send_packet_dropped(stcb, net, m, iphlen, 1);
-			sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_INPUT_ERROR, SCTP_SO_NOT_LOCKED);
-		} else if ((inp != NULL) && (stcb == NULL)) {
-			refcount_up = 1;
-		}
-		SCTP_STAT_INCR(sctps_badsum);
-		SCTP_STAT_INCR_COUNTER32(sctps_checksumerrors);
-		goto bad;
-	}
-sctp_skip_csum_4:
+	sctp_common_input_processing(&m, iphlen, offset, length,
+	    (struct sockaddr *)&src,
+	    (struct sockaddr *)&dst,
+	    sh, ch,
+#if !defined(SCTP_WITH_NO_CSUM)
+	    compute_crc,
 #endif
-	/* destination port of 0 is illegal, based on RFC2960. */
-	if (sh->dest_port == 0) {
-		SCTP_STAT_INCR(sctps_hdrops);
-		goto bad;
-	}
-	/*
-	 * Locate pcb and tcb for datagram sctp_findassociation_addr() wants
-	 * IP/SCTP/first chunk header...
-	 */
-	stcb = sctp_findassociation_addr(m, offset - sizeof(*ch),
-	    sh, ch, &inp, &net, vrf_id);
-	if ((net) && (port)) {
-		if (net->port == 0) {
-			sctp_pathmtu_adjustment(stcb, net->mtu - sizeof(struct udphdr));
-		}
-		net->port = port;
-	}
-	if ((net != NULL) && (use_mflowid != 0)) {
-		net->flowid = mflowid;
-#ifdef INVARIANTS
-		net->flowidset = 1;
-#endif
-	}
-	/* inp's ref-count increased && stcb locked */
-	if (inp == NULL) {
-		struct sctp_init_chunk *init_chk, chunk_buf;
-
-		SCTP_STAT_INCR(sctps_noport);
-		if (badport_bandlim(BANDLIM_SCTP_OOTB) < 0)
-			goto bad;
-		SCTPDBG(SCTP_DEBUG_INPUT1,
-		    "Sending a ABORT from packet entry!\n");
-		if (ch->chunk_type == SCTP_INITIATION) {
-			/*
-			 * we do a trick here to get the INIT tag, dig in
-			 * and get the tag from the INIT and put it in the
-			 * common header.
-			 */
-			init_chk = (struct sctp_init_chunk *)sctp_m_getptr(m,
-			    iphlen + sizeof(*sh), sizeof(*init_chk),
-			    (uint8_t *) & chunk_buf);
-			if (init_chk != NULL)
-				sh->v_tag = init_chk->init.initiate_tag;
-		}
-		if (ch->chunk_type == SCTP_SHUTDOWN_ACK) {
-			sctp_send_shutdown_complete2(m, sh,
-			    use_mflowid, mflowid,
-			    vrf_id, port);
-			goto bad;
-		}
-		if (ch->chunk_type == SCTP_SHUTDOWN_COMPLETE) {
-			goto bad;
-		}
-		if (ch->chunk_type != SCTP_ABORT_ASSOCIATION) {
-			if ((SCTP_BASE_SYSCTL(sctp_blackhole) == 0) ||
-			    ((SCTP_BASE_SYSCTL(sctp_blackhole) == 1) &&
-			    (ch->chunk_type != SCTP_INIT))) {
-				sctp_send_abort(m, iphlen, sh, 0, NULL,
-				    use_mflowid, mflowid,
-				    vrf_id, port);
-			}
-		}
-		goto bad;
-	} else if (stcb == NULL) {
-		refcount_up = 1;
-	}
-#ifdef IPSEC
-	/*
-	 * I very much doubt any of the IPSEC stuff will work but I have no
-	 * idea, so I will leave it in place.
-	 */
-	if (inp && ipsec4_in_reject(m, &inp->ip_inp.inp)) {
-		MODULE_GLOBAL(ipsec4stat).in_polvio++;
-		SCTP_STAT_INCR(sctps_hdrops);
-		goto bad;
-	}
-#endif				/* IPSEC */
-
-	/*
-	 * common chunk processing
-	 */
-	length = ip->ip_len + iphlen;
-	offset -= sizeof(struct sctp_chunkhdr);
-
-	ecn_bits = ip->ip_tos;
-
-	/* sa_ignore NO_NULL_CHK */
-	sctp_common_input_processing(&m, iphlen, offset, length, sh, ch,
-	    inp, stcb, net, ecn_bits,
-	    use_mflowid, mflowid,
+	    ecn_bits,
+	    mflowtype, mflowid, fibnum,
 	    vrf_id, port);
-	/* inp's ref-count reduced && stcb unlocked */
+out:
 	if (m) {
 		sctp_m_freem(m);
 	}
-	if ((inp) && (refcount_up)) {
-		/* reduce ref-count */
-		SCTP_INP_DECR_REF(inp);
-	}
 	return;
-bad:
-	if (stcb) {
-		SCTP_TCB_UNLOCK(stcb);
-	}
-	if ((inp) && (refcount_up)) {
-		/* reduce ref-count */
-		SCTP_INP_DECR_REF(inp);
-	}
-	if (m) {
-		sctp_m_freem(m);
-	}
-	return;
 }
 
 #if defined(__FreeBSD__) && defined(SCTP_MCORE_INPUT) && defined(SMP)
@@ -6161,7 +6205,7 @@
 	uint32_t flowid, tag;
 
 	if (mp_ncpus > 1) {
-		if (m->m_flags & M_FLOWID) {
+		if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
 			flowid = m->m_pkthdr.flowid;
 		} else {
 			/*
@@ -6168,20 +6212,19 @@
 			 * No flow id built by lower layers fix it so we
 			 * create one.
 			 */
-			ip = mtod(m, struct ip *);
-			offset = off + sizeof(*sh);
+			offset = off + sizeof(struct sctphdr);
 			if (SCTP_BUF_LEN(m) < offset) {
-				if ((m = m_pullup(m, offset)) == 0) {
+				if ((m = m_pullup(m, offset)) == NULL) {
 					SCTP_STAT_INCR(sctps_hdrops);
 					return;
 				}
-				ip = mtod(m, struct ip *);
 			}
+			ip = mtod(m, struct ip *);
 			sh = (struct sctphdr *)((caddr_t)ip + off);
 			tag = htonl(sh->v_tag);
 			flowid = tag ^ ntohs(sh->dest_port) ^ ntohs(sh->src_port);
 			m->m_pkthdr.flowid = flowid;
-			m->m_flags |= M_FLOWID;
+			M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
 		}
 		cpu_to_use = sctp_cpuarry[flowid % mp_ncpus];
 		sctp_queue_to_mcore(m, off, cpu_to_use);

Modified: trunk/sys/netinet/sctp_input.h
===================================================================
--- trunk/sys/netinet/sctp_input.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_input.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_input.h 238253 2012-07-08 16:14:42Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_input.h 284633 2015-06-20 08:25:27Z tuexen $");
 
 #ifndef _NETINET_SCTP_INPUT_H_
 #define _NETINET_SCTP_INPUT_H_
@@ -40,18 +40,21 @@
 #if defined(_KERNEL) || defined(__Userspace__)
 void
 sctp_common_input_processing(struct mbuf **, int, int, int,
+    struct sockaddr *, struct sockaddr *,
     struct sctphdr *, struct sctp_chunkhdr *,
-    struct sctp_inpcb *, struct sctp_tcb *,
-    struct sctp_nets *, uint8_t,
-    uint8_t, uint32_t,
+#if !defined(SCTP_WITH_NO_CSUM)
+    uint8_t,
+#endif
+    uint8_t,
+    uint8_t, uint32_t, uint16_t,
     uint32_t, uint16_t);
 
-struct sctp_stream_reset_out_request *
+struct sctp_stream_reset_request *
 sctp_find_stream_reset(struct sctp_tcb *stcb, uint32_t seq,
     struct sctp_tmit_chunk **bchk);
 
 void 
-sctp_reset_in_stream(struct sctp_tcb *stcb, int number_entries,
+sctp_reset_in_stream(struct sctp_tcb *stcb, uint32_t number_entries,
     uint16_t * list);
 
 

Modified: trunk/sys/netinet/sctp_lock_bsd.h
===================================================================
--- trunk/sys/netinet/sctp_lock_bsd.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_lock_bsd.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_lock_bsd.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_lock_bsd.h 235828 2012-05-23 11:26:28Z tuexen $");
 
 #ifndef _NETINET_SCTP_LOCK_BSD_H_
 #define _NETINET_SCTP_LOCK_BSD_H_

Modified: trunk/sys/netinet/sctp_os.h
===================================================================
--- trunk/sys/netinet/sctp_os.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_os.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_os.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_os.h 235828 2012-05-23 11:26:28Z tuexen $");
 
 #ifndef _NETINET_SCTP_OS_H_
 #define _NETINET_SCTP_OS_H_

Modified: trunk/sys/netinet/sctp_os_bsd.h
===================================================================
--- trunk/sys/netinet/sctp_os_bsd.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_os_bsd.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_os_bsd.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_os_bsd.h 314327 2017-02-27 08:27:38Z avg $");
 
 #ifndef _NETINET_SCTP_OS_BSD_H_
 #define _NETINET_SCTP_OS_BSD_H_
@@ -96,7 +96,6 @@
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/in6_pcb.h>
-#include <netinet/icmp6.h>
 #include <netinet6/ip6protosw.h>
 #include <netinet6/nd6.h>
 #include <netinet6/scope6_var.h>
@@ -105,6 +104,9 @@
 
 #include <netinet/ip_options.h>
 
+#include <crypto/sha1.h>
+#include <crypto/sha2/sha256.h>
+
 #ifndef in6pcb
 #define in6pcb		inpcb
 #endif
@@ -150,46 +152,31 @@
 #define V_system_base_info VNET(system_base_info)
 #define SCTP_BASE_INFO(__m) V_system_base_info.sctppcbinfo.__m
 #define SCTP_BASE_STATS V_system_base_info.sctpstat
-#define SCTP_BASE_STATS_SYSCTL VNET_NAME(system_base_info.sctpstat)
-#define SCTP_BASE_STAT(__m)     V_system_base_info.sctpstat.__m
-#define SCTP_BASE_SYSCTL(__m) VNET_NAME(system_base_info.sctpsysctl.__m)
+#define SCTP_BASE_STAT(__m) V_system_base_info.sctpstat.__m
+#define SCTP_BASE_SYSCTL(__m) V_system_base_info.sctpsysctl.__m
 #define SCTP_BASE_VAR(__m) V_system_base_info.__m
 
-/*
- *
- */
-#define USER_ADDR_NULL	(NULL)	/* FIX ME: temp */
-
 #define SCTP_PRINTF(params...)	printf(params)
 #if defined(SCTP_DEBUG)
 #define SCTPDBG(level, params...)					\
 {									\
-    do {								\
-	if (SCTP_BASE_SYSCTL(sctp_debug_on) & level ) {			\
-	    SCTP_PRINTF(params);						\
-	}								\
-    } while (0);							\
+	do {								\
+		if (SCTP_BASE_SYSCTL(sctp_debug_on) & level ) {		\
+			SCTP_PRINTF(params);				\
+		}							\
+	} while (0);							\
 }
 #define SCTPDBG_ADDR(level, addr)					\
 {									\
-    do {								\
-	if (SCTP_BASE_SYSCTL(sctp_debug_on) & level ) {			\
-	    sctp_print_address(addr);					\
-	}								\
-    } while (0);							\
+	do {								\
+		if (SCTP_BASE_SYSCTL(sctp_debug_on) & level ) {		\
+			sctp_print_address(addr);			\
+		}							\
+	} while (0);							\
 }
-#define SCTPDBG_PKT(level, iph, sh)					\
-{									\
-    do {								\
-	    if (SCTP_BASE_SYSCTL(sctp_debug_on) & level) {		\
-		    sctp_print_address_pkt(iph, sh);			\
-	    }								\
-    } while (0);							\
-}
 #else
 #define SCTPDBG(level, params...)
 #define SCTPDBG_ADDR(level, addr)
-#define SCTPDBG_PKT(level, iph, sh)
 #endif
 
 #ifdef SCTP_LTRACE_CHUNKS
@@ -201,11 +188,11 @@
 #ifdef SCTP_LTRACE_ERRORS
 #define SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, file, err) \
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LTRACE_ERROR_ENABLE) \
-        	SCTP_PRINTF("mbuf:%p inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \
+		SCTP_PRINTF("mbuf:%p inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \
 		            m, inp, stcb, net, file, __LINE__, err);
 #define SCTP_LTRACE_ERR_RET(inp, stcb, net, file, err) \
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LTRACE_ERROR_ENABLE) \
-        	SCTP_PRINTF("inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \
+		SCTP_PRINTF("inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \
 		            inp, stcb, net, file, __LINE__, err);
 #else
 #define SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, file, err)
@@ -239,16 +226,16 @@
  * general memory allocation
  */
 #define SCTP_MALLOC(var, type, size, name) \
-    do { \
-	var = (type)malloc(size, name, M_NOWAIT); \
-    } while (0)
+	do { \
+		var = (type)malloc(size, name, M_NOWAIT); \
+	} while (0)
 
 #define SCTP_FREE(var, type)	free(var, type)
 
 #define SCTP_MALLOC_SONAME(var, type, size) \
-    do { \
-	var = (type)malloc(size, M_SONAME, M_WAITOK | M_ZERO); \
-    } while (0)
+	do { \
+		var = (type)malloc(size, M_SONAME, M_WAITOK | M_ZERO); \
+	} while (0)
 
 #define SCTP_FREE_SONAME(var)	free(var, M_SONAME)
 
@@ -335,11 +322,11 @@
 /*      MTU              */
 /*************************/
 #define SCTP_GATHER_MTU_FROM_IFN_INFO(ifn, ifn_index, af) ((struct ifnet *)ifn)->if_mtu
-#define SCTP_GATHER_MTU_FROM_ROUTE(sctp_ifa, sa, rt) ((rt != NULL) ? rt->rt_rmx.rmx_mtu : 0)
+#define SCTP_GATHER_MTU_FROM_ROUTE(sctp_ifa, sa, rt) ((uint32_t)((rt != NULL) ? rt->rt_mtu : 0))
 #define SCTP_GATHER_MTU_FROM_INTFC(sctp_ifn) ((sctp_ifn->ifn_p != NULL) ? ((struct ifnet *)(sctp_ifn->ifn_p))->if_mtu : 0)
 #define SCTP_SET_MTU_OF_ROUTE(sa, rt, mtu) do { \
                                               if (rt != NULL) \
-                                                 rt->rt_rmx.rmx_mtu = mtu; \
+                                                 rt->rt_mtu = mtu; \
                                            } while(0)
 
 /* (de-)register interface event notifications */
@@ -366,7 +353,7 @@
  */
 #define SCTP_HEADER_TO_CHAIN(m) (m)
 #define SCTP_DETACH_HEADER_FROM_CHAIN(m)
-#define SCTP_HEADER_LEN(m) (m->m_pkthdr.len)
+#define SCTP_HEADER_LEN(m) ((m)->m_pkthdr.len)
 #define SCTP_GET_HEADER_FOR_OUTPUT(o_pak) 0
 #define SCTP_RELEASE_HEADER(m)
 #define SCTP_RELEASE_PKT(m)	sctp_m_freem(m)
@@ -395,10 +382,6 @@
  * its a NOP.
  */
 
-/* Macro's for getting length from V6/V4 header */
-#define SCTP_GET_IPV4_LENGTH(iph) (iph->ip_len)
-#define SCTP_GET_IPV6_LENGTH(ip6) (ntohs(ip6->ip6_plen))
-
 /* get the v6 hop limit */
 #define SCTP_GET_HLIM(inp, ro)	in6_selecthlim((struct in6pcb *)&inp->ip_inp.inp, (ro ? (ro->ro_rt ? (ro->ro_rt->rt_ifp) : (NULL)) : (NULL)));
 
@@ -429,13 +412,8 @@
 typedef struct route sctp_route_t;
 typedef struct rtentry sctp_rtentry_t;
 
-/*
- * XXX multi-FIB support was backed out in r179783 and it seems clear that the
- * VRF support as currently in FreeBSD is not ready to support multi-FIB.
- * It might be best to implement multi-FIB support for both v4 and v6 indepedent
- * of VRFs and leave those to a real MPLS stack.
- */
-#define SCTP_RTALLOC(ro, vrf_id) rtalloc_ign((struct route *)ro, 0UL)
+#define SCTP_RTALLOC(ro, vrf_id, fibnum) \
+	rtalloc_ign_fib((struct route *)ro, 0UL, fibnum)
 
 /* Future zero copy wakeup/send  function */
 #define SCTP_ZERO_COPY_EVENT(inp, so)
@@ -443,6 +421,11 @@
 #define SCTP_ZERO_COPY_SENDQ_EVENT(inp, so)
 
 /*
+ * SCTP protocol specific mbuf flags.
+ */
+#define	M_NOTIFICATION		M_PROTO1	/* SCTP notification */
+
+/*
  * IP output routines
  */
 #define SCTP_IP_OUTPUT(result, o_pak, ro, stcb, vrf_id) \
@@ -453,6 +436,7 @@
 	    local_stcb->sctp_ep && \
 	    local_stcb->sctp_ep->sctp_socket) \
 		o_flgs |= local_stcb->sctp_ep->sctp_socket->so_options & SO_DONTROUTE; \
+	m_clrprotoflags(o_pak); \
 	result = ip_output(o_pak, NULL, ro, o_flgs, 0, NULL); \
 }
 
@@ -459,6 +443,7 @@
 #define SCTP_IP6_OUTPUT(result, o_pak, ro, ifp, stcb, vrf_id) \
 { \
 	struct sctp_tcb *local_stcb = stcb; \
+	m_clrprotoflags(o_pak); \
 	if (local_stcb && local_stcb->sctp_ep) \
 		result = ip6_output(o_pak, \
 				    ((struct in6pcb *)(local_stcb->sctp_ep))->in6p_outputopts, \
@@ -475,23 +460,18 @@
 /*
  * SCTP AUTH
  */
-#define HAVE_SHA2
-
 #define SCTP_READ_RANDOM(buf, len)	read_random(buf, len)
 
-#ifdef USE_SCTP_SHA1
-#include <netinet/sctp_sha1.h>
-#else
-#include <crypto/sha1.h>
 /* map standard crypto API names */
-#define SHA1_Init	SHA1Init
-#define SHA1_Update	SHA1Update
-#define SHA1_Final(x,y)	SHA1Final((caddr_t)x, y)
-#endif
+#define SCTP_SHA1_CTX		SHA1_CTX
+#define SCTP_SHA1_INIT		SHA1Init
+#define SCTP_SHA1_UPDATE	SHA1Update
+#define SCTP_SHA1_FINAL(x,y)	SHA1Final((caddr_t)x, y)
 
-#if defined(HAVE_SHA2)
-#include <crypto/sha2/sha2.h>
-#endif
+#define SCTP_SHA256_CTX		SHA256_CTX
+#define SCTP_SHA256_INIT	SHA256_Init
+#define SCTP_SHA256_UPDATE	SHA256_Update
+#define SCTP_SHA256_FINAL(x,y)	SHA256_Final((caddr_t)x, y)
 
 #endif
 

Modified: trunk/sys/netinet/sctp_output.c
===================================================================
--- trunk/sys/netinet/sctp_output.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_output.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_output.c 240305 2012-09-10 11:38:02Z glebius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_output.c 296052 2016-02-25 18:46:06Z tuexen $");
 
 #include <netinet/sctp_os.h>
 #include <sys/proc.h>
@@ -51,7 +51,9 @@
 #include <netinet/sctp_bsd_addr.h>
 #include <netinet/sctp_input.h>
 #include <netinet/sctp_crc32.h>
+#if defined(INET) || defined(INET6)
 #include <netinet/udp.h>
+#endif
 #include <netinet/udp_var.h>
 #include <machine/in_cksum.h>
 
@@ -1864,15 +1866,10 @@
 
 int
 sctp_is_address_in_scope(struct sctp_ifa *ifa,
-    int ipv4_addr_legal,
-    int ipv6_addr_legal,
-    int loopback_scope,
-    int ipv4_local_scope,
-    int local_scope SCTP_UNUSED,/* XXX */
-    int site_scope,
+    struct sctp_scoping *scope,
     int do_update)
 {
-	if ((loopback_scope == 0) &&
+	if ((scope->loopback_scope == 0) &&
 	    (ifa->ifn_p) && SCTP_IFN_IS_IFT_LOOP(ifa->ifn_p)) {
 		/*
 		 * skip loopback if not in scope *
@@ -1882,15 +1879,15 @@
 	switch (ifa->address.sa.sa_family) {
 #ifdef INET
 	case AF_INET:
-		if (ipv4_addr_legal) {
+		if (scope->ipv4_addr_legal) {
 			struct sockaddr_in *sin;
 
-			sin = (struct sockaddr_in *)&ifa->address.sin;
+			sin = &ifa->address.sin;
 			if (sin->sin_addr.s_addr == 0) {
 				/* not in scope , unspecified */
 				return (0);
 			}
-			if ((ipv4_local_scope == 0) &&
+			if ((scope->ipv4_local_scope == 0) &&
 			    (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) {
 				/* private address not in scope */
 				return (0);
@@ -1902,7 +1899,7 @@
 #endif
 #ifdef INET6
 	case AF_INET6:
-		if (ipv6_addr_legal) {
+		if (scope->ipv6_addr_legal) {
 			struct sockaddr_in6 *sin6;
 
 			/*
@@ -1916,7 +1913,7 @@
 				return (0);
 			}
 			/* ok to use deprecated addresses? */
-			sin6 = (struct sockaddr_in6 *)&ifa->address.sin6;
+			sin6 = &ifa->address.sin6;
 			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 				/* skip unspecifed addresses */
 				return (0);
@@ -1925,7 +1922,7 @@
 			    (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))) {
 				return (0);
 			}
-			if ((site_scope == 0) &&
+			if ((scope->site_scope == 0) &&
 			    (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) {
 				return (0);
 			}
@@ -1941,27 +1938,31 @@
 }
 
 static struct mbuf *
-sctp_add_addr_to_mbuf(struct mbuf *m, struct sctp_ifa *ifa)
+sctp_add_addr_to_mbuf(struct mbuf *m, struct sctp_ifa *ifa, uint16_t * len)
 {
+#if defined(INET) || defined(INET6)
 	struct sctp_paramhdr *parmh;
 	struct mbuf *mret;
-	int len;
+	uint16_t plen;
 
+#endif
+
 	switch (ifa->address.sa.sa_family) {
 #ifdef INET
 	case AF_INET:
-		len = sizeof(struct sctp_ipv4addr_param);
+		plen = (uint16_t) sizeof(struct sctp_ipv4addr_param);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
-		len = sizeof(struct sctp_ipv6addr_param);
+		plen = (uint16_t) sizeof(struct sctp_ipv6addr_param);
 		break;
 #endif
 	default:
 		return (m);
 	}
-	if (M_TRAILINGSPACE(m) >= len) {
+#if defined(INET) || defined(INET6)
+	if (M_TRAILINGSPACE(m) >= plen) {
 		/* easy side we just drop it on the end */
 		parmh = (struct sctp_paramhdr *)(SCTP_BUF_AT(m, SCTP_BUF_LEN(m)));
 		mret = m;
@@ -1971,7 +1972,7 @@
 		while (SCTP_BUF_NEXT(mret) != NULL) {
 			mret = SCTP_BUF_NEXT(mret);
 		}
-		SCTP_BUF_NEXT(mret) = sctp_get_mbuf_for_msg(len, 0, M_DONTWAIT, 1, MT_DATA);
+		SCTP_BUF_NEXT(mret) = sctp_get_mbuf_for_msg(plen, 0, M_NOWAIT, 1, MT_DATA);
 		if (SCTP_BUF_NEXT(mret) == NULL) {
 			/* We are hosed, can't add more addresses */
 			return (m);
@@ -1987,12 +1988,12 @@
 			struct sctp_ipv4addr_param *ipv4p;
 			struct sockaddr_in *sin;
 
-			sin = (struct sockaddr_in *)&ifa->address.sin;
+			sin = &ifa->address.sin;
 			ipv4p = (struct sctp_ipv4addr_param *)parmh;
 			parmh->param_type = htons(SCTP_IPV4_ADDRESS);
-			parmh->param_length = htons(len);
+			parmh->param_length = htons(plen);
 			ipv4p->addr = sin->sin_addr.s_addr;
-			SCTP_BUF_LEN(mret) += len;
+			SCTP_BUF_LEN(mret) += plen;
 			break;
 		}
 #endif
@@ -2002,15 +2003,15 @@
 			struct sctp_ipv6addr_param *ipv6p;
 			struct sockaddr_in6 *sin6;
 
-			sin6 = (struct sockaddr_in6 *)&ifa->address.sin6;
+			sin6 = &ifa->address.sin6;
 			ipv6p = (struct sctp_ipv6addr_param *)parmh;
 			parmh->param_type = htons(SCTP_IPV6_ADDRESS);
-			parmh->param_length = htons(len);
+			parmh->param_length = htons(plen);
 			memcpy(ipv6p->addr, &sin6->sin6_addr,
 			    sizeof(ipv6p->addr));
 			/* clear embedded scope in the address */
 			in6_clearscope((struct in6_addr *)ipv6p->addr);
-			SCTP_BUF_LEN(mret) += len;
+			SCTP_BUF_LEN(mret) += plen;
 			break;
 		}
 #endif
@@ -2017,7 +2018,11 @@
 	default:
 		return (m);
 	}
+	if (len != NULL) {
+		*len += plen;
+	}
 	return (mret);
+#endif
 }
 
 
@@ -2024,7 +2029,8 @@
 struct mbuf *
 sctp_add_addresses_to_i_ia(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
     struct sctp_scoping *scope,
-    struct mbuf *m_at, int cnt_inits_to)
+    struct mbuf *m_at, int cnt_inits_to,
+    uint16_t * padding_len, uint16_t * chunk_len)
 {
 	struct sctp_vrf *vrf = NULL;
 	int cnt, limit_out = 0, total_count;
@@ -2057,16 +2063,24 @@
 				continue;
 			}
 			LIST_FOREACH(sctp_ifap, &sctp_ifnp->ifalist, next_ifa) {
+#ifdef INET
+				if ((sctp_ifap->address.sa.sa_family == AF_INET) &&
+				    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+				    &sctp_ifap->address.sin.sin_addr) != 0)) {
+					continue;
+				}
+#endif
+#ifdef INET6
+				if ((sctp_ifap->address.sa.sa_family == AF_INET6) &&
+				    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+				    &sctp_ifap->address.sin6.sin6_addr) != 0)) {
+					continue;
+				}
+#endif
 				if (sctp_is_addr_restricted(stcb, sctp_ifap)) {
 					continue;
 				}
-				if (sctp_is_address_in_scope(sctp_ifap,
-				    scope->ipv4_addr_legal,
-				    scope->ipv6_addr_legal,
-				    scope->loopback_scope,
-				    scope->ipv4_local_scope,
-				    scope->local_scope,
-				    scope->site_scope, 1) == 0) {
+				if (sctp_is_address_in_scope(sctp_ifap, scope, 1) == 0) {
 					continue;
 				}
 				cnt++;
@@ -2092,19 +2106,36 @@
 					continue;
 				}
 				LIST_FOREACH(sctp_ifap, &sctp_ifnp->ifalist, next_ifa) {
+#ifdef INET
+					if ((sctp_ifap->address.sa.sa_family == AF_INET) &&
+					    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+					    &sctp_ifap->address.sin.sin_addr) != 0)) {
+						continue;
+					}
+#endif
+#ifdef INET6
+					if ((sctp_ifap->address.sa.sa_family == AF_INET6) &&
+					    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+					    &sctp_ifap->address.sin6.sin6_addr) != 0)) {
+						continue;
+					}
+#endif
 					if (sctp_is_addr_restricted(stcb, sctp_ifap)) {
 						continue;
 					}
 					if (sctp_is_address_in_scope(sctp_ifap,
-					    scope->ipv4_addr_legal,
-					    scope->ipv6_addr_legal,
-					    scope->loopback_scope,
-					    scope->ipv4_local_scope,
-					    scope->local_scope,
-					    scope->site_scope, 0) == 0) {
+					    scope, 0) == 0) {
 						continue;
 					}
-					m_at = sctp_add_addr_to_mbuf(m_at, sctp_ifap);
+					if ((chunk_len != NULL) &&
+					    (padding_len != NULL) &&
+					    (*padding_len > 0)) {
+						memset(mtod(m_at, caddr_t)+*chunk_len, 0, *padding_len);
+						SCTP_BUF_LEN(m_at) += *padding_len;
+						*chunk_len += *padding_len;
+						*padding_len = 0;
+					}
+					m_at = sctp_add_addr_to_mbuf(m_at, sctp_ifap, chunk_len);
 					if (limit_out) {
 						cnt++;
 						total_count++;
@@ -2146,12 +2177,7 @@
 				continue;
 			}
 			if (sctp_is_address_in_scope(laddr->ifa,
-			    scope->ipv4_addr_legal,
-			    scope->ipv6_addr_legal,
-			    scope->loopback_scope,
-			    scope->ipv4_local_scope,
-			    scope->local_scope,
-			    scope->site_scope, 1) == 0) {
+			    scope, 1) == 0) {
 				continue;
 			}
 			cnt++;
@@ -2171,15 +2197,18 @@
 					continue;
 				}
 				if (sctp_is_address_in_scope(laddr->ifa,
-				    scope->ipv4_addr_legal,
-				    scope->ipv6_addr_legal,
-				    scope->loopback_scope,
-				    scope->ipv4_local_scope,
-				    scope->local_scope,
-				    scope->site_scope, 0) == 0) {
+				    scope, 0) == 0) {
 					continue;
 				}
-				m_at = sctp_add_addr_to_mbuf(m_at, laddr->ifa);
+				if ((chunk_len != NULL) &&
+				    (padding_len != NULL) &&
+				    (*padding_len > 0)) {
+					memset(mtod(m_at, caddr_t)+*chunk_len, 0, *padding_len);
+					SCTP_BUF_LEN(m_at) += *padding_len;
+					*chunk_len += *padding_len;
+					*padding_len = 0;
+				}
+				m_at = sctp_add_addr_to_mbuf(m_at, laddr->ifa, chunk_len);
 				cnt++;
 				if (cnt >= SCTP_ADDRESS_LIMIT) {
 					break;
@@ -2389,7 +2418,7 @@
 	LIST_FOREACH(laddr, &stcb->asoc.sctp_restricted_addrs, sctp_nxt_addr) {
 		if (laddr->ifa == NULL) {
 			SCTPDBG(SCTP_DEBUG_OUTPUT1, "%s: NULL ifa\n",
-			    __FUNCTION__);
+			    __func__);
 			continue;
 		}
 		if (laddr->ifa == ifa) {
@@ -2411,7 +2440,7 @@
 	LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
 		if (laddr->ifa == NULL) {
 			SCTPDBG(SCTP_DEBUG_OUTPUT1, "%s: NULL ifa\n",
-			    __FUNCTION__);
+			    __func__);
 			continue;
 		}
 		if ((laddr->ifa == ifa) && laddr->action == 0)
@@ -2455,6 +2484,20 @@
 	if (sctp_ifn) {
 		/* is a preferred one on the interface we route out? */
 		LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+#ifdef INET
+			if ((sctp_ifa->address.sa.sa_family == AF_INET) &&
+			    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+			    &sctp_ifa->address.sin.sin_addr) != 0)) {
+				continue;
+			}
+#endif
+#ifdef INET6
+			if ((sctp_ifa->address.sa.sa_family == AF_INET6) &&
+			    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+			    &sctp_ifa->address.sin6.sin6_addr) != 0)) {
+				continue;
+			}
+#endif
 			if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
 			    (non_asoc_addr_ok == 0))
 				continue;
@@ -2578,6 +2621,20 @@
 	if (sctp_ifn) {
 		/* first try for a preferred address on the ep */
 		LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+#ifdef INET
+			if ((sctp_ifa->address.sa.sa_family == AF_INET) &&
+			    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+			    &sctp_ifa->address.sin.sin_addr) != 0)) {
+				continue;
+			}
+#endif
+#ifdef INET6
+			if ((sctp_ifa->address.sa.sa_family == AF_INET6) &&
+			    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+			    &sctp_ifa->address.sin6.sin6_addr) != 0)) {
+				continue;
+			}
+#endif
 			if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0))
 				continue;
 			if (sctp_is_addr_in_ep(inp, sctp_ifa)) {
@@ -2598,6 +2655,20 @@
 		}
 		/* next try for an acceptable address on the ep */
 		LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+#ifdef INET
+			if ((sctp_ifa->address.sa.sa_family == AF_INET) &&
+			    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+			    &sctp_ifa->address.sin.sin_addr) != 0)) {
+				continue;
+			}
+#endif
+#ifdef INET6
+			if ((sctp_ifa->address.sa.sa_family == AF_INET6) &&
+			    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+			    &sctp_ifa->address.sin6.sin6_addr) != 0)) {
+				continue;
+			}
+#endif
 			if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0))
 				continue;
 			if (sctp_is_addr_in_ep(inp, sctp_ifa)) {
@@ -2702,6 +2773,7 @@
 
 static struct sctp_ifa *
 sctp_select_nth_preferred_addr_from_ifn_boundall(struct sctp_ifn *ifn,
+    struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,
     int non_asoc_addr_ok,
     uint8_t dest_is_loop,
@@ -2723,6 +2795,20 @@
 	}
 #endif				/* INET6 */
 	LIST_FOREACH(ifa, &ifn->ifalist, next_ifa) {
+#ifdef INET
+		if ((ifa->address.sa.sa_family == AF_INET) &&
+		    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+		    &ifa->address.sin.sin_addr) != 0)) {
+			continue;
+		}
+#endif
+#ifdef INET6
+		if ((ifa->address.sa.sa_family == AF_INET6) &&
+		    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+		    &ifa->address.sin6.sin6_addr) != 0)) {
+			continue;
+		}
+#endif
 		if ((ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
 		    (non_asoc_addr_ok == 0))
 			continue;
@@ -2782,13 +2868,7 @@
 		}
 #endif
 		if (stcb) {
-			if (sctp_is_address_in_scope(ifa,
-			    stcb->asoc.ipv4_addr_legal,
-			    stcb->asoc.ipv6_addr_legal,
-			    stcb->asoc.loopback_scope,
-			    stcb->asoc.ipv4_local_scope,
-			    stcb->asoc.local_scope,
-			    stcb->asoc.site_scope, 0) == 0) {
+			if (sctp_is_address_in_scope(ifa, &stcb->asoc.scope, 0) == 0) {
 				continue;
 			}
 			if (((non_asoc_addr_ok == 0) &&
@@ -2814,6 +2894,7 @@
 
 static int
 sctp_count_num_preferred_boundall(struct sctp_ifn *ifn,
+    struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,
     int non_asoc_addr_ok,
     uint8_t dest_is_loop,
@@ -2824,6 +2905,21 @@
 	int num_eligible_addr = 0;
 
 	LIST_FOREACH(ifa, &ifn->ifalist, next_ifa) {
+#ifdef INET
+		if ((ifa->address.sa.sa_family == AF_INET) &&
+		    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+		    &ifa->address.sin.sin_addr) != 0)) {
+			continue;
+		}
+#endif
+#ifdef INET6
+		if ((ifa->address.sa.sa_family == AF_INET6) &&
+		    (stcb != NULL) &&
+		    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+		    &ifa->address.sin6.sin6_addr) != 0)) {
+			continue;
+		}
+#endif
 		if ((ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
 		    (non_asoc_addr_ok == 0)) {
 			continue;
@@ -2834,13 +2930,7 @@
 			continue;
 		}
 		if (stcb) {
-			if (sctp_is_address_in_scope(ifa,
-			    stcb->asoc.ipv4_addr_legal,
-			    stcb->asoc.ipv6_addr_legal,
-			    stcb->asoc.loopback_scope,
-			    stcb->asoc.ipv4_local_scope,
-			    stcb->asoc.local_scope,
-			    stcb->asoc.site_scope, 0) == 0) {
+			if (sctp_is_address_in_scope(ifa, &stcb->asoc.scope, 0) == 0) {
 				continue;
 			}
 			if (((non_asoc_addr_ok == 0) &&
@@ -2861,7 +2951,8 @@
 }
 
 static struct sctp_ifa *
-sctp_choose_boundall(struct sctp_tcb *stcb,
+sctp_choose_boundall(struct sctp_inpcb *inp,
+    struct sctp_tcb *stcb,
     struct sctp_nets *net,
     sctp_route_t * ro,
     uint32_t vrf_id,
@@ -2916,7 +3007,7 @@
 		cur_addr_num = net->indx_of_eligible_next_to_use;
 	}
 	num_preferred = sctp_count_num_preferred_boundall(sctp_ifn,
-	    stcb,
+	    inp, stcb,
 	    non_asoc_addr_ok,
 	    dest_is_loop,
 	    dest_is_priv, fam);
@@ -2943,7 +3034,7 @@
 	 */
 	SCTPDBG(SCTP_DEBUG_OUTPUT2, "cur_addr_num:%d\n", cur_addr_num);
 
-	sctp_ifa = sctp_select_nth_preferred_addr_from_ifn_boundall(sctp_ifn, stcb, non_asoc_addr_ok, dest_is_loop,
+	sctp_ifa = sctp_select_nth_preferred_addr_from_ifn_boundall(sctp_ifn, inp, stcb, non_asoc_addr_ok, dest_is_loop,
 	    dest_is_priv, cur_addr_num, fam, ro);
 
 	/* if sctp_ifa is NULL something changed??, fall to plan b. */
@@ -2974,7 +3065,7 @@
 			SCTPDBG(SCTP_DEBUG_OUTPUT2, "already seen\n");
 			continue;
 		}
-		num_preferred = sctp_count_num_preferred_boundall(sctp_ifn, stcb, non_asoc_addr_ok,
+		num_preferred = sctp_count_num_preferred_boundall(sctp_ifn, inp, stcb, non_asoc_addr_ok,
 		    dest_is_loop, dest_is_priv, fam);
 		SCTPDBG(SCTP_DEBUG_OUTPUT2,
 		    "Found ifn:%p %d preferred source addresses\n",
@@ -2986,7 +3077,7 @@
 		}
 		SCTPDBG(SCTP_DEBUG_OUTPUT2,
 		    "num preferred:%d on interface:%p cur_addr_num:%d\n",
-		    num_preferred, sctp_ifn, cur_addr_num);
+		    num_preferred, (void *)sctp_ifn, cur_addr_num);
 
 		/*
 		 * Ok we have num_eligible_addr set with how many we can
@@ -2996,7 +3087,7 @@
 		if (cur_addr_num >= num_preferred) {
 			cur_addr_num = 0;
 		}
-		sifa = sctp_select_nth_preferred_addr_from_ifn_boundall(sctp_ifn, stcb, non_asoc_addr_ok, dest_is_loop,
+		sifa = sctp_select_nth_preferred_addr_from_ifn_boundall(sctp_ifn, inp, stcb, non_asoc_addr_ok, dest_is_loop,
 		    dest_is_priv, cur_addr_num, fam, ro);
 		if (sifa == NULL)
 			continue;
@@ -3023,7 +3114,23 @@
 		goto plan_d;
 	}
 	LIST_FOREACH(sctp_ifa, &emit_ifn->ifalist, next_ifa) {
-		SCTPDBG(SCTP_DEBUG_OUTPUT2, "ifa:%p\n", sctp_ifa);
+		SCTPDBG(SCTP_DEBUG_OUTPUT2, "ifa:%p\n", (void *)sctp_ifa);
+#ifdef INET
+		if ((sctp_ifa->address.sa.sa_family == AF_INET) &&
+		    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+		    &sctp_ifa->address.sin.sin_addr) != 0)) {
+			SCTPDBG(SCTP_DEBUG_OUTPUT2, "Jailed\n");
+			continue;
+		}
+#endif
+#ifdef INET6
+		if ((sctp_ifa->address.sa.sa_family == AF_INET6) &&
+		    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+		    &sctp_ifa->address.sin6.sin6_addr) != 0)) {
+			SCTPDBG(SCTP_DEBUG_OUTPUT2, "Jailed\n");
+			continue;
+		}
+#endif
 		if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
 		    (non_asoc_addr_ok == 0)) {
 			SCTPDBG(SCTP_DEBUG_OUTPUT2, "Defer\n");
@@ -3036,13 +3143,7 @@
 			continue;
 		}
 		if (stcb) {
-			if (sctp_is_address_in_scope(sifa,
-			    stcb->asoc.ipv4_addr_legal,
-			    stcb->asoc.ipv6_addr_legal,
-			    stcb->asoc.loopback_scope,
-			    stcb->asoc.ipv4_local_scope,
-			    stcb->asoc.local_scope,
-			    stcb->asoc.site_scope, 0) == 0) {
+			if (sctp_is_address_in_scope(sifa, &stcb->asoc.scope, 0) == 0) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT2, "NOT in scope\n");
 				sifa = NULL;
 				continue;
@@ -3060,8 +3161,6 @@
 				sifa = NULL;
 				continue;
 			}
-		} else {
-			SCTP_PRINTF("Stcb is null - no print\n");
 		}
 		atomic_add_int(&sifa->refcount, 1);
 		goto out;
@@ -3073,7 +3172,7 @@
 	 * out and see if we can find an acceptable address somewhere
 	 * amongst all interfaces.
 	 */
-	SCTPDBG(SCTP_DEBUG_OUTPUT2, "Trying Plan D looked_at is %p\n", looked_at);
+	SCTPDBG(SCTP_DEBUG_OUTPUT2, "Trying Plan D looked_at is %p\n", (void *)looked_at);
 	LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
 		if (dest_is_loop == 0 && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
 			/* wrong base scope */
@@ -3080,6 +3179,20 @@
 			continue;
 		}
 		LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+#ifdef INET
+			if ((sctp_ifa->address.sa.sa_family == AF_INET) &&
+			    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+			    &sctp_ifa->address.sin.sin_addr) != 0)) {
+				continue;
+			}
+#endif
+#ifdef INET6
+			if ((sctp_ifa->address.sa.sa_family == AF_INET6) &&
+			    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+			    &sctp_ifa->address.sin6.sin6_addr) != 0)) {
+				continue;
+			}
+#endif
 			if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
 			    (non_asoc_addr_ok == 0))
 				continue;
@@ -3089,13 +3202,7 @@
 			if (sifa == NULL)
 				continue;
 			if (stcb) {
-				if (sctp_is_address_in_scope(sifa,
-				    stcb->asoc.ipv4_addr_legal,
-				    stcb->asoc.ipv6_addr_legal,
-				    stcb->asoc.loopback_scope,
-				    stcb->asoc.ipv4_local_scope,
-				    stcb->asoc.local_scope,
-				    stcb->asoc.site_scope, 0) == 0) {
+				if (sctp_is_address_in_scope(sifa, &stcb->asoc.scope, 0) == 0) {
 					sifa = NULL;
 					continue;
 				}
@@ -3116,12 +3223,14 @@
 		}
 	}
 #ifdef INET
-	if ((retried == 0) && (stcb->asoc.ipv4_local_scope == 0)) {
-		stcb->asoc.ipv4_local_scope = 1;
-		retried = 1;
-		goto again_with_private_addresses_allowed;
-	} else if (retried == 1) {
-		stcb->asoc.ipv4_local_scope = 0;
+	if (stcb) {
+		if ((retried == 0) && (stcb->asoc.scope.ipv4_local_scope == 0)) {
+			stcb->asoc.scope.ipv4_local_scope = 1;
+			retried = 1;
+			goto again_with_private_addresses_allowed;
+		} else if (retried == 1) {
+			stcb->asoc.scope.ipv4_local_scope = 0;
+		}
 	}
 #endif
 out:
@@ -3136,6 +3245,20 @@
 				LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
 					struct sctp_ifa *tmp_sifa;
 
+#ifdef INET
+					if ((sctp_ifa->address.sa.sa_family == AF_INET) &&
+					    (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+					    &sctp_ifa->address.sin.sin_addr) != 0)) {
+						continue;
+					}
+#endif
+#ifdef INET6
+					if ((sctp_ifa->address.sa.sa_family == AF_INET6) &&
+					    (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+					    &sctp_ifa->address.sin6.sin6_addr) != 0)) {
+						continue;
+					}
+#endif
 					if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
 					    (non_asoc_addr_ok == 0))
 						continue;
@@ -3150,12 +3273,7 @@
 					}
 					if (stcb) {
 						if (sctp_is_address_in_scope(tmp_sifa,
-						    stcb->asoc.ipv4_addr_legal,
-						    stcb->asoc.ipv6_addr_legal,
-						    stcb->asoc.loopback_scope,
-						    stcb->asoc.ipv4_local_scope,
-						    stcb->asoc.local_scope,
-						    stcb->asoc.site_scope, 0) == 0) {
+						    &stcb->asoc.scope, 0) == 0) {
 							continue;
 						}
 						if (((non_asoc_addr_ok == 0) &&
@@ -3274,7 +3392,7 @@
 		/*
 		 * Need a route to cache.
 		 */
-		SCTP_RTALLOC(ro, vrf_id);
+		SCTP_RTALLOC(ro, vrf_id, inp->fibnum);
 	}
 	if (ro->ro_rt == NULL) {
 		return (NULL);
@@ -3326,7 +3444,7 @@
 		/*
 		 * Bound all case
 		 */
-		answer = sctp_choose_boundall(stcb, net, ro, vrf_id,
+		answer = sctp_choose_boundall(inp, stcb, net, ro, vrf_id,
 		    dest_is_priv, dest_is_loop,
 		    non_asoc_addr_ok, fam);
 		SCTP_IPI_ADDR_RUNLOCK();
@@ -3372,7 +3490,7 @@
 			return (found);
 		}
 		m_copydata(control, at, sizeof(cmh), (caddr_t)&cmh);
-		if (cmh.cmsg_len < CMSG_ALIGN(sizeof(struct cmsghdr))) {
+		if (cmh.cmsg_len < CMSG_ALIGN(sizeof(cmh))) {
 			/* We dont't have a complete CMSG header. */
 			return (found);
 		}
@@ -3387,11 +3505,11 @@
 		    (cmh.cmsg_type == SCTP_PRINFO) ||
 		    (cmh.cmsg_type == SCTP_AUTHINFO))))) {
 			if (c_type == cmh.cmsg_type) {
-				if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < cpsize) {
+				if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < cpsize) {
 					return (found);
 				}
 				/* It is exactly what we want. Copy it out. */
-				m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), cpsize, (caddr_t)data);
+				m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), cpsize, (caddr_t)data);
 				return (1);
 			} else {
 				struct sctp_sndrcvinfo *sndrcvinfo;
@@ -3405,10 +3523,10 @@
 				}
 				switch (cmh.cmsg_type) {
 				case SCTP_SNDINFO:
-					if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct sctp_sndinfo)) {
+					if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct sctp_sndinfo)) {
 						return (found);
 					}
-					m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct sctp_sndinfo), (caddr_t)&sndinfo);
+					m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct sctp_sndinfo), (caddr_t)&sndinfo);
 					sndrcvinfo->sinfo_stream = sndinfo.snd_sid;
 					sndrcvinfo->sinfo_flags = sndinfo.snd_flags;
 					sndrcvinfo->sinfo_ppid = sndinfo.snd_ppid;
@@ -3416,18 +3534,22 @@
 					sndrcvinfo->sinfo_assoc_id = sndinfo.snd_assoc_id;
 					break;
 				case SCTP_PRINFO:
-					if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct sctp_prinfo)) {
+					if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct sctp_prinfo)) {
 						return (found);
 					}
-					m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct sctp_prinfo), (caddr_t)&prinfo);
-					sndrcvinfo->sinfo_timetolive = prinfo.pr_value;
+					m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct sctp_prinfo), (caddr_t)&prinfo);
+					if (prinfo.pr_policy != SCTP_PR_SCTP_NONE) {
+						sndrcvinfo->sinfo_timetolive = prinfo.pr_value;
+					} else {
+						sndrcvinfo->sinfo_timetolive = 0;
+					}
 					sndrcvinfo->sinfo_flags |= prinfo.pr_policy;
 					break;
 				case SCTP_AUTHINFO:
-					if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct sctp_authinfo)) {
+					if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct sctp_authinfo)) {
 						return (found);
 					}
-					m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct sctp_authinfo), (caddr_t)&authinfo);
+					m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct sctp_authinfo), (caddr_t)&authinfo);
 					sndrcvinfo->sinfo_keynumber_valid = 1;
 					sndrcvinfo->sinfo_keynumber = authinfo.auth_keynumber;
 					break;
@@ -3467,7 +3589,7 @@
 			return (1);
 		}
 		m_copydata(control, at, sizeof(cmh), (caddr_t)&cmh);
-		if (cmh.cmsg_len < CMSG_ALIGN(sizeof(struct cmsghdr))) {
+		if (cmh.cmsg_len < CMSG_ALIGN(sizeof(cmh))) {
 			/* We dont't have a complete CMSG header. */
 			*error = EINVAL;
 			return (1);
@@ -3480,11 +3602,11 @@
 		if (cmh.cmsg_level == IPPROTO_SCTP) {
 			switch (cmh.cmsg_type) {
 			case SCTP_INIT:
-				if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct sctp_initmsg)) {
+				if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct sctp_initmsg)) {
 					*error = EINVAL;
 					return (1);
 				}
-				m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct sctp_initmsg), (caddr_t)&initmsg);
+				m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct sctp_initmsg), (caddr_t)&initmsg);
 				if (initmsg.sinit_max_attempts)
 					stcb->asoc.max_init_times = initmsg.sinit_max_attempts;
 				if (initmsg.sinit_num_ostreams)
@@ -3497,6 +3619,11 @@
 					struct sctp_stream_out *tmp_str;
 					unsigned int i;
 
+#if defined(SCTP_DETAILED_STR_STATS)
+					int j;
+
+#endif
+
 					/* Default is NOT correct */
 					SCTPDBG(SCTP_DEBUG_OUTPUT1, "Ok, default:%d pre_open:%d\n",
 					    stcb->asoc.streamoutcnt, stcb->asoc.pre_open_streams);
@@ -3514,10 +3641,21 @@
 						stcb->asoc.pre_open_streams = stcb->asoc.streamoutcnt;
 					}
 					for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
-						stcb->asoc.strmout[i].next_sequence_sent = 0;
 						TAILQ_INIT(&stcb->asoc.strmout[i].outqueue);
+						stcb->asoc.strmout[i].chunks_on_queues = 0;
+						stcb->asoc.strmout[i].next_sequence_send = 0;
+#if defined(SCTP_DETAILED_STR_STATS)
+						for (j = 0; j < SCTP_PR_SCTP_MAX + 1; j++) {
+							stcb->asoc.strmout[i].abandoned_sent[j] = 0;
+							stcb->asoc.strmout[i].abandoned_unsent[j] = 0;
+						}
+#else
+						stcb->asoc.strmout[i].abandoned_sent[0] = 0;
+						stcb->asoc.strmout[i].abandoned_unsent[0] = 0;
+#endif
 						stcb->asoc.strmout[i].stream_no = i;
 						stcb->asoc.strmout[i].last_msg_incomplete = 0;
+						stcb->asoc.strmout[i].state = SCTP_STREAM_OPENING;
 						stcb->asoc.ss_functions.sctp_ss_init_stream(&stcb->asoc.strmout[i], NULL);
 					}
 				}
@@ -3524,7 +3662,7 @@
 				break;
 #ifdef INET
 			case SCTP_DSTADDRV4:
-				if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct in_addr)) {
+				if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct in_addr)) {
 					*error = EINVAL;
 					return (1);
 				}
@@ -3532,7 +3670,7 @@
 				sin.sin_family = AF_INET;
 				sin.sin_len = sizeof(struct sockaddr_in);
 				sin.sin_port = stcb->rport;
-				m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct in_addr), (caddr_t)&sin.sin_addr);
+				m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct in_addr), (caddr_t)&sin.sin_addr);
 				if ((sin.sin_addr.s_addr == INADDR_ANY) ||
 				    (sin.sin_addr.s_addr == INADDR_BROADCAST) ||
 				    IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
@@ -3548,7 +3686,7 @@
 #endif
 #ifdef INET6
 			case SCTP_DSTADDRV6:
-				if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct in6_addr)) {
+				if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct in6_addr)) {
 					*error = EINVAL;
 					return (1);
 				}
@@ -3556,7 +3694,7 @@
 				sin6.sin6_family = AF_INET6;
 				sin6.sin6_len = sizeof(struct sockaddr_in6);
 				sin6.sin6_port = stcb->rport;
-				m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct in6_addr), (caddr_t)&sin6.sin6_addr);
+				m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct in6_addr), (caddr_t)&sin6.sin6_addr);
 				if (IN6_IS_ADDR_UNSPECIFIED(&sin6.sin6_addr) ||
 				    IN6_IS_ADDR_MULTICAST(&sin6.sin6_addr)) {
 					*error = EINVAL;
@@ -3596,7 +3734,7 @@
 
 static struct sctp_tcb *
 sctp_findassociation_cmsgs(struct sctp_inpcb **inp_p,
-    in_port_t port,
+    uint16_t port,
     struct mbuf *control,
     struct sctp_nets **net_p,
     int *error)
@@ -3624,7 +3762,7 @@
 			return (NULL);
 		}
 		m_copydata(control, at, sizeof(cmh), (caddr_t)&cmh);
-		if (cmh.cmsg_len < CMSG_ALIGN(sizeof(struct cmsghdr))) {
+		if (cmh.cmsg_len < CMSG_ALIGN(sizeof(cmh))) {
 			/* We dont't have a complete CMSG header. */
 			*error = EINVAL;
 			return (NULL);
@@ -3638,7 +3776,7 @@
 			switch (cmh.cmsg_type) {
 #ifdef INET
 			case SCTP_DSTADDRV4:
-				if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct in_addr)) {
+				if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct in_addr)) {
 					*error = EINVAL;
 					return (NULL);
 				}
@@ -3646,13 +3784,13 @@
 				sin.sin_family = AF_INET;
 				sin.sin_len = sizeof(struct sockaddr_in);
 				sin.sin_port = port;
-				m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct in_addr), (caddr_t)&sin.sin_addr);
+				m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct in_addr), (caddr_t)&sin.sin_addr);
 				addr = (struct sockaddr *)&sin;
 				break;
 #endif
 #ifdef INET6
 			case SCTP_DSTADDRV6:
-				if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct in6_addr)) {
+				if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct in6_addr)) {
 					*error = EINVAL;
 					return (NULL);
 				}
@@ -3660,7 +3798,7 @@
 				sin6.sin6_family = AF_INET6;
 				sin6.sin6_len = sizeof(struct sockaddr_in6);
 				sin6.sin6_port = port;
-				m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct in6_addr), (caddr_t)&sin6.sin6_addr);
+				m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct in6_addr), (caddr_t)&sin6.sin6_addr);
 #ifdef INET
 				if (IN6_IS_ADDR_V4MAPPED(&sin6.sin6_addr)) {
 					in6_sin6_2_sin(&sin, &sin6);
@@ -3697,14 +3835,13 @@
 	int sig_offset;
 	uint16_t cookie_sz;
 
-	mret = NULL;
 	mret = sctp_get_mbuf_for_msg((sizeof(struct sctp_state_cookie) +
 	    sizeof(struct sctp_paramhdr)), 0,
-	    M_DONTWAIT, 1, MT_DATA);
+	    M_NOWAIT, 1, MT_DATA);
 	if (mret == NULL) {
 		return (NULL);
 	}
-	copy_init = SCTP_M_COPYM(init, init_offset, M_COPYALL, M_DONTWAIT);
+	copy_init = SCTP_M_COPYM(init, init_offset, M_COPYALL, M_NOWAIT);
 	if (copy_init == NULL) {
 		sctp_m_freem(mret);
 		return (NULL);
@@ -3711,17 +3848,11 @@
 	}
 #ifdef SCTP_MBUF_LOGGING
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
-		struct mbuf *mat;
-
-		for (mat = copy_init; mat; mat = SCTP_BUF_NEXT(mat)) {
-			if (SCTP_BUF_IS_EXTENDED(mat)) {
-				sctp_log_mb(mat, SCTP_MBUF_ICOPY);
-			}
-		}
+		sctp_log_mbc(copy_init, SCTP_MBUF_ICOPY);
 	}
 #endif
 	copy_initack = SCTP_M_COPYM(initack, initack_offset, M_COPYALL,
-	    M_DONTWAIT);
+	    M_NOWAIT);
 	if (copy_initack == NULL) {
 		sctp_m_freem(mret);
 		sctp_m_freem(copy_init);
@@ -3729,13 +3860,7 @@
 	}
 #ifdef SCTP_MBUF_LOGGING
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
-		struct mbuf *mat;
-
-		for (mat = copy_initack; mat; mat = SCTP_BUF_NEXT(mat)) {
-			if (SCTP_BUF_IS_EXTENDED(mat)) {
-				sctp_log_mb(mat, SCTP_MBUF_ICOPY);
-			}
-		}
+		sctp_log_mbc(copy_initack, SCTP_MBUF_ICOPY);
 	}
 #endif
 	/* easy side we just drop it on the end */
@@ -3771,7 +3896,7 @@
 			break;
 		}
 	}
-	sig = sctp_get_mbuf_for_msg(SCTP_SECRET_SIZE, 0, M_DONTWAIT, 1, MT_DATA);
+	sig = sctp_get_mbuf_for_msg(SCTP_SECRET_SIZE, 0, M_NOWAIT, 1, MT_DATA);
 	if (sig == NULL) {
 		/* no space, so free the entire chain */
 		sctp_m_freem(mret);
@@ -3793,7 +3918,7 @@
 static uint8_t
 sctp_get_ect(struct sctp_tcb *stcb)
 {
-	if ((stcb != NULL) && (stcb->asoc.ecn_allowed == 1)) {
+	if ((stcb != NULL) && (stcb->asoc.ecn_supported == 1)) {
 		return (SCTP_ECT0_BIT);
 	} else {
 		return (0);
@@ -3813,7 +3938,7 @@
 		SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT1, &net->ro._l_addr.sa);
 		if (net->dest_state & SCTP_ADDR_CONFIRMED) {
 			if ((net->dest_state & SCTP_ADDR_REACHABLE) && stcb) {
-				SCTPDBG(SCTP_DEBUG_OUTPUT1, "no route takes interface %p down\n", net);
+				SCTPDBG(SCTP_DEBUG_OUTPUT1, "no route takes interface %p down\n", (void *)net);
 				sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN,
 				    stcb, 0,
 				    (void *)net,
@@ -3864,7 +3989,7 @@
     uint32_t v_tag,
     uint16_t port,
     union sctp_sockstore *over_addr,
-    uint8_t use_mflowid, uint32_t mflowid,
+    uint8_t mflowtype, uint32_t mflowid,
 #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
     int so_locked SCTP_UNUSED
 #else
@@ -3890,8 +4015,11 @@
 	struct sctphdr *sctphdr;
 	int packet_length;
 	int ret;
+
+#if defined(INET) || defined(INET6)
 	uint32_t vrf_id;
 
+#endif
 #if defined(INET) || defined(INET6)
 	struct mbuf *o_pak;
 	sctp_route_t *ro = NULL;
@@ -3900,7 +4028,7 @@
 #endif
 	uint8_t tos_value;
 
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	struct socket *so = NULL;
 
 #endif
@@ -3910,12 +4038,13 @@
 		sctp_m_freem(m);
 		return (EFAULT);
 	}
+#if defined(INET) || defined(INET6)
 	if (stcb) {
 		vrf_id = stcb->asoc.vrf_id;
 	} else {
 		vrf_id = inp->def_vrf_id;
 	}
-
+#endif
 	/* fill in the HMAC digest for any AUTH chunk in the packet */
 	if ((auth != NULL) && (stcb != NULL)) {
 		sctp_fill_hmac_digest_m(m, auth_offset, auth, stcb, auth_keyid);
@@ -3936,11 +4065,11 @@
 			sctp_route_t iproute;
 			int len;
 
-			len = sizeof(struct ip) + sizeof(struct sctphdr);
+			len = SCTP_MIN_V4_OVERHEAD;
 			if (port) {
 				len += sizeof(struct udphdr);
 			}
-			newm = sctp_get_mbuf_for_msg(len, 1, M_DONTWAIT, 1, MT_DATA);
+			newm = sctp_get_mbuf_for_msg(len, 1, M_NOWAIT, 1, MT_DATA);
 			if (newm == NULL) {
 				sctp_m_freem(m);
 				SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
@@ -3951,18 +4080,11 @@
 			SCTP_BUF_NEXT(newm) = m;
 			m = newm;
 			if (net != NULL) {
-#ifdef INVARIANTS
-				if (net->flowidset == 0) {
-					panic("Flow ID not set");
-				}
-#endif
 				m->m_pkthdr.flowid = net->flowid;
-				m->m_flags |= M_FLOWID;
+				M_HASHTYPE_SET(m, net->flowtype);
 			} else {
-				if (use_mflowid != 0) {
-					m->m_pkthdr.flowid = mflowid;
-					m->m_flags |= M_FLOWID;
-				}
+				m->m_pkthdr.flowid = mflowid;
+				M_HASHTYPE_SET(m, mflowtype);
 			}
 			packet_length = sctp_calculate_len(m);
 			ip = mtod(m, struct ip *);
@@ -3981,15 +4103,15 @@
 				tos_value |= sctp_get_ect(stcb);
 			}
 			if ((nofragment_flag) && (port == 0)) {
-				ip->ip_off = IP_DF;
-			} else
-				ip->ip_off = 0;
-
+				ip->ip_off = htons(IP_DF);
+			} else {
+				ip->ip_off = htons(0);
+			}
 			/* FreeBSD has a function for ip_id's */
 			ip->ip_id = ip_newid();
 
 			ip->ip_ttl = inp->ip_inp.inp.inp_ip_ttl;
-			ip->ip_len = packet_length;
+			ip->ip_len = htons(packet_length);
 			ip->ip_tos = tos_value;
 			if (port) {
 				ip->ip_p = IPPROTO_UDP;
@@ -4052,7 +4174,7 @@
 					sctp_free_ifa(_lsrc);
 				} else {
 					ip->ip_src = over_addr->sin.sin_addr;
-					SCTP_RTALLOC(ro, vrf_id);
+					SCTP_RTALLOC(ro, vrf_id, inp->fibnum);
 				}
 			}
 			if (port) {
@@ -4107,7 +4229,7 @@
 			SCTPDBG(SCTP_DEBUG_OUTPUT3, "Destination is %x\n",
 			    (uint32_t) (ntohl(ip->ip_dst.s_addr)));
 			SCTPDBG(SCTP_DEBUG_OUTPUT3, "RTP route is %p through\n",
-			    ro->ro_rt);
+			    (void *)ro->ro_rt);
 
 			if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
 				/* failed to prepend data, give up */
@@ -4115,10 +4237,6 @@
 				sctp_m_freem(m);
 				return (ENOMEM);
 			}
-#ifdef  SCTP_PACKET_LOGGING
-			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
-				sctp_packet_log(m, packet_length);
-#endif
 			SCTP_ATTACH_CHAIN(o_pak, m, packet_length);
 			if (port) {
 #if defined(SCTP_WITH_NO_CSUM)
@@ -4135,12 +4253,16 @@
 				SCTP_STAT_INCR(sctps_sendnocrc);
 #else
 				m->m_pkthdr.csum_flags = CSUM_SCTP;
-				m->m_pkthdr.csum_data = 0;
+				m->m_pkthdr.csum_data = offsetof(struct sctphdr, checksum);
 				SCTP_STAT_INCR(sctps_sendhwcrc);
 #endif
 			}
+#ifdef SCTP_PACKET_LOGGING
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
+				sctp_packet_log(o_pak);
+#endif
 			/* send it out.  table id is taken from stcb */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 			if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) {
 				so = SCTP_INP_SO(inp);
 				SCTP_SOCKET_UNLOCK(so, 0);
@@ -4147,7 +4269,7 @@
 			}
 #endif
 			SCTP_IP_OUTPUT(ret, o_pak, ro, stcb, vrf_id);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 			if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) {
 				atomic_add_int(&stcb->asoc.refcnt, 1);
 				SCTP_TCB_UNLOCK(stcb);
@@ -4225,11 +4347,11 @@
 				flowlabel = ntohl(((struct in6pcb *)inp)->in6p_flowinfo);
 			}
 			flowlabel &= 0x000fffff;
-			len = sizeof(struct ip6_hdr) + sizeof(struct sctphdr);
+			len = SCTP_MIN_OVERHEAD;
 			if (port) {
 				len += sizeof(struct udphdr);
 			}
-			newm = sctp_get_mbuf_for_msg(len, 1, M_DONTWAIT, 1, MT_DATA);
+			newm = sctp_get_mbuf_for_msg(len, 1, M_NOWAIT, 1, MT_DATA);
 			if (newm == NULL) {
 				sctp_m_freem(m);
 				SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
@@ -4240,18 +4362,11 @@
 			SCTP_BUF_NEXT(newm) = m;
 			m = newm;
 			if (net != NULL) {
-#ifdef INVARIANTS
-				if (net->flowidset == 0) {
-					panic("Flow ID not set");
-				}
-#endif
 				m->m_pkthdr.flowid = net->flowid;
-				m->m_flags |= M_FLOWID;
+				M_HASHTYPE_SET(m, net->flowtype);
 			} else {
-				if (use_mflowid != 0) {
-					m->m_pkthdr.flowid = mflowid;
-					m->m_flags |= M_FLOWID;
-				}
+				m->m_pkthdr.flowid = mflowid;
+				M_HASHTYPE_SET(m, mflowtype);
 			}
 			packet_length = sctp_calculate_len(m);
 
@@ -4373,7 +4488,7 @@
 					sctp_free_ifa(_lsrc);
 				} else {
 					lsa6->sin6_addr = over_addr->sin6.sin6_addr;
-					SCTP_RTALLOC(ro, vrf_id);
+					SCTP_RTALLOC(ro, vrf_id, inp->fibnum);
 				}
 				(void)sa6_recoverscope(sin6);
 			}
@@ -4464,10 +4579,6 @@
 				SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
 				return (ENOMEM);
 			}
-#ifdef  SCTP_PACKET_LOGGING
-			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
-				sctp_packet_log(m, packet_length);
-#endif
 			SCTP_ATTACH_CHAIN(o_pak, m, packet_length);
 			if (port) {
 #if defined(SCTP_WITH_NO_CSUM)
@@ -4484,19 +4595,23 @@
 				SCTP_STAT_INCR(sctps_sendnocrc);
 #else
 				m->m_pkthdr.csum_flags = CSUM_SCTP_IPV6;
-				m->m_pkthdr.csum_data = 0;
+				m->m_pkthdr.csum_data = offsetof(struct sctphdr, checksum);
 				SCTP_STAT_INCR(sctps_sendhwcrc);
 #endif
 			}
 			/* send it out. table id is taken from stcb */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 			if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) {
 				so = SCTP_INP_SO(inp);
 				SCTP_SOCKET_UNLOCK(so, 0);
 			}
 #endif
+#ifdef SCTP_PACKET_LOGGING
+			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
+				sctp_packet_log(o_pak);
+#endif
 			SCTP_IP6_OUTPUT(ret, o_pak, (struct route_in6 *)ro, &ifp, stcb, vrf_id);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 			if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) {
 				atomic_add_int(&stcb->asoc.refcnt, 1);
 				SCTP_TCB_UNLOCK(stcb);
@@ -4575,21 +4690,18 @@
 #endif
 )
 {
-	struct mbuf *m, *m_at, *mp_last;
+	struct mbuf *m, *m_last;
 	struct sctp_nets *net;
 	struct sctp_init_chunk *init;
 	struct sctp_supported_addr_param *sup_addr;
 	struct sctp_adaptation_layer_indication *ali;
-	struct sctp_ecn_supported_param *ecn;
-	struct sctp_prsctp_supported_param *prsctp;
 	struct sctp_supported_chunk_types_param *pr_supported;
+	struct sctp_paramhdr *ph;
 	int cnt_inits_to = 0;
-	int padval, ret;
-	int num_ext;
-	int p_len;
+	int ret;
+	uint16_t num_ext, chunk_len, padding_len, parameter_len;
 
 	/* INIT's always go to the primary (and usually ONLY address) */
-	mp_last = NULL;
 	net = stcb->asoc.primary_destination;
 	if (net == NULL) {
 		net = TAILQ_FIRST(&stcb->asoc.nets);
@@ -4606,15 +4718,12 @@
 	}
 	SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT\n");
 #ifdef INET6
-	if (((struct sockaddr *)&(net->ro._l_addr))->sa_family == AF_INET6) {
+	if (net->ro._l_addr.sa.sa_family == AF_INET6) {
 		/*
 		 * special hook, if we are sending to link local it will not
 		 * show up in our private address count.
 		 */
-		struct sockaddr_in6 *sin6l;
-
-		sin6l = &net->ro._l_addr.sin6;
-		if (IN6_IS_ADDR_LINKLOCAL(&sin6l->sin6_addr))
+		if (IN6_IS_ADDR_LINKLOCAL(&net->ro._l_addr.sin6.sin6_addr))
 			cnt_inits_to = 1;
 	}
 #endif
@@ -4626,20 +4735,15 @@
 	/* start the INIT timer */
 	sctp_timer_start(SCTP_TIMER_TYPE_INIT, inp, stcb, net);
 
-	m = sctp_get_mbuf_for_msg(MCLBYTES, 1, M_DONTWAIT, 1, MT_DATA);
+	m = sctp_get_mbuf_for_msg(MCLBYTES, 1, M_NOWAIT, 1, MT_DATA);
 	if (m == NULL) {
 		/* No memory, INIT timer will re-attempt. */
 		SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT - mbuf?\n");
 		return;
 	}
-	SCTP_BUF_LEN(m) = sizeof(struct sctp_init_chunk);
-	/*
-	 * assume peer supports asconf in order to be able to queue local
-	 * address changes while an INIT is in flight and before the assoc
-	 * is established.
-	 */
-	stcb->asoc.peer_supports_asconf = 1;
-	/* Now lets put the SCTP header in place */
+	chunk_len = (uint16_t) sizeof(struct sctp_init_chunk);
+	padding_len = 0;
+	/* Now lets put the chunk header in place */
 	init = mtod(m, struct sctp_init_chunk *);
 	/* now the chunk header */
 	init->ch.chunk_type = SCTP_INITIATION;
@@ -4651,190 +4755,187 @@
 	/* set up some of the credits. */
 	init->init.a_rwnd = htonl(max(inp->sctp_socket ? SCTP_SB_LIMIT_RCV(inp->sctp_socket) : 0,
 	    SCTP_MINIMAL_RWND));
-
 	init->init.num_outbound_streams = htons(stcb->asoc.pre_open_streams);
 	init->init.num_inbound_streams = htons(stcb->asoc.max_inbound_streams);
 	init->init.initial_tsn = htonl(stcb->asoc.init_seq_number);
-	/* now the address restriction */
-	/* XXX Should we take the address family of the socket into account? */
-	sup_addr = (struct sctp_supported_addr_param *)((caddr_t)init +
-	    sizeof(*init));
-	sup_addr->ph.param_type = htons(SCTP_SUPPORTED_ADDRTYPE);
-#ifdef INET6
-#ifdef INET
-	/* we support 2 types: IPv4/IPv6 */
-	sup_addr->ph.param_length = htons(sizeof(struct sctp_paramhdr) + 2 * sizeof(uint16_t));
-	sup_addr->addr_type[0] = htons(SCTP_IPV4_ADDRESS);
-	sup_addr->addr_type[1] = htons(SCTP_IPV6_ADDRESS);
-#else
-	/* we support 1 type: IPv6 */
-	sup_addr->ph.param_length = htons(sizeof(struct sctp_paramhdr) + sizeof(uint16_t));
-	sup_addr->addr_type[0] = htons(SCTP_IPV6_ADDRESS);
-	sup_addr->addr_type[1] = htons(0);	/* this is the padding */
-#endif
-#else
-	/* we support 1 type: IPv4 */
-	sup_addr->ph.param_length = htons(sizeof(struct sctp_paramhdr) + sizeof(uint16_t));
-	sup_addr->addr_type[0] = htons(SCTP_IPV4_ADDRESS);
-	sup_addr->addr_type[1] = htons(0);	/* this is the padding */
-#endif
-	SCTP_BUF_LEN(m) += sizeof(struct sctp_supported_addr_param);
-	/* adaptation layer indication parameter */
-	ali = (struct sctp_adaptation_layer_indication *)((caddr_t)sup_addr + sizeof(struct sctp_supported_addr_param));
-	ali->ph.param_type = htons(SCTP_ULP_ADAPTATION);
-	ali->ph.param_length = htons(sizeof(*ali));
-	ali->indication = ntohl(inp->sctp_ep.adaptation_layer_indicator);
-	SCTP_BUF_LEN(m) += sizeof(*ali);
-	ecn = (struct sctp_ecn_supported_param *)((caddr_t)ali + sizeof(*ali));
 
+	/* Adaptation layer indication parameter */
+	if (inp->sctp_ep.adaptation_layer_indicator_provided) {
+		parameter_len = (uint16_t) sizeof(struct sctp_adaptation_layer_indication);
+		ali = (struct sctp_adaptation_layer_indication *)(mtod(m, caddr_t)+chunk_len);
+		ali->ph.param_type = htons(SCTP_ULP_ADAPTATION);
+		ali->ph.param_length = htons(parameter_len);
+		ali->indication = htonl(inp->sctp_ep.adaptation_layer_indicator);
+		chunk_len += parameter_len;
+	}
+	/* ECN parameter */
+	if (stcb->asoc.ecn_supported == 1) {
+		parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
+		ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
+		ph->param_type = htons(SCTP_ECN_CAPABLE);
+		ph->param_length = htons(parameter_len);
+		chunk_len += parameter_len;
+	}
+	/* PR-SCTP supported parameter */
+	if (stcb->asoc.prsctp_supported == 1) {
+		parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
+		ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
+		ph->param_type = htons(SCTP_PRSCTP_SUPPORTED);
+		ph->param_length = htons(parameter_len);
+		chunk_len += parameter_len;
+	}
+	/* Add NAT friendly parameter. */
 	if (SCTP_BASE_SYSCTL(sctp_inits_include_nat_friendly)) {
-		/* Add NAT friendly parameter */
-		struct sctp_paramhdr *ph;
-
-		ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+		parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
+		ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
 		ph->param_type = htons(SCTP_HAS_NAT_SUPPORT);
-		ph->param_length = htons(sizeof(struct sctp_paramhdr));
-		SCTP_BUF_LEN(m) += sizeof(struct sctp_paramhdr);
-		ecn = (struct sctp_ecn_supported_param *)((caddr_t)ph + sizeof(*ph));
+		ph->param_length = htons(parameter_len);
+		chunk_len += parameter_len;
 	}
-	/* now any cookie time extensions */
-	if (stcb->asoc.cookie_preserve_req) {
-		struct sctp_cookie_perserve_param *cookie_preserve;
-
-		cookie_preserve = (struct sctp_cookie_perserve_param *)(ecn);
-		cookie_preserve->ph.param_type = htons(SCTP_COOKIE_PRESERVE);
-		cookie_preserve->ph.param_length = htons(
-		    sizeof(*cookie_preserve));
-		cookie_preserve->time = htonl(stcb->asoc.cookie_preserve_req);
-		SCTP_BUF_LEN(m) += sizeof(*cookie_preserve);
-		ecn = (struct sctp_ecn_supported_param *)(
-		    (caddr_t)cookie_preserve + sizeof(*cookie_preserve));
-		stcb->asoc.cookie_preserve_req = 0;
+	/* And now tell the peer which extensions we support */
+	num_ext = 0;
+	pr_supported = (struct sctp_supported_chunk_types_param *)(mtod(m, caddr_t)+chunk_len);
+	if (stcb->asoc.prsctp_supported == 1) {
+		pr_supported->chunk_types[num_ext++] = SCTP_FORWARD_CUM_TSN;
 	}
-	/* ECN parameter */
-	if (stcb->asoc.ecn_allowed == 1) {
-		ecn->ph.param_type = htons(SCTP_ECN_CAPABLE);
-		ecn->ph.param_length = htons(sizeof(*ecn));
-		SCTP_BUF_LEN(m) += sizeof(*ecn);
-		prsctp = (struct sctp_prsctp_supported_param *)((caddr_t)ecn +
-		    sizeof(*ecn));
-	} else {
-		prsctp = (struct sctp_prsctp_supported_param *)((caddr_t)ecn);
-	}
-	/* And now tell the peer we do pr-sctp */
-	prsctp->ph.param_type = htons(SCTP_PRSCTP_SUPPORTED);
-	prsctp->ph.param_length = htons(sizeof(*prsctp));
-	SCTP_BUF_LEN(m) += sizeof(*prsctp);
-
-	/* And now tell the peer we do all the extensions */
-	pr_supported = (struct sctp_supported_chunk_types_param *)
-	    ((caddr_t)prsctp + sizeof(*prsctp));
-	pr_supported->ph.param_type = htons(SCTP_SUPPORTED_CHUNK_EXT);
-	num_ext = 0;
-	pr_supported->chunk_types[num_ext++] = SCTP_ASCONF;
-	pr_supported->chunk_types[num_ext++] = SCTP_ASCONF_ACK;
-	pr_supported->chunk_types[num_ext++] = SCTP_FORWARD_CUM_TSN;
-	pr_supported->chunk_types[num_ext++] = SCTP_PACKET_DROPPED;
-	pr_supported->chunk_types[num_ext++] = SCTP_STREAM_RESET;
-	if (!SCTP_BASE_SYSCTL(sctp_auth_disable)) {
+	if (stcb->asoc.auth_supported == 1) {
 		pr_supported->chunk_types[num_ext++] = SCTP_AUTHENTICATION;
 	}
-	if (stcb->asoc.sctp_nr_sack_on_off == 1) {
+	if (stcb->asoc.asconf_supported == 1) {
+		pr_supported->chunk_types[num_ext++] = SCTP_ASCONF;
+		pr_supported->chunk_types[num_ext++] = SCTP_ASCONF_ACK;
+	}
+	if (stcb->asoc.reconfig_supported == 1) {
+		pr_supported->chunk_types[num_ext++] = SCTP_STREAM_RESET;
+	}
+	if (stcb->asoc.nrsack_supported == 1) {
 		pr_supported->chunk_types[num_ext++] = SCTP_NR_SELECTIVE_ACK;
 	}
-	p_len = sizeof(*pr_supported) + num_ext;
-	pr_supported->ph.param_length = htons(p_len);
-	bzero((caddr_t)pr_supported + p_len, SCTP_SIZE32(p_len) - p_len);
-	SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
-
-
+	if (stcb->asoc.pktdrop_supported == 1) {
+		pr_supported->chunk_types[num_ext++] = SCTP_PACKET_DROPPED;
+	}
+	if (num_ext > 0) {
+		parameter_len = (uint16_t) sizeof(struct sctp_supported_chunk_types_param) + num_ext;
+		pr_supported->ph.param_type = htons(SCTP_SUPPORTED_CHUNK_EXT);
+		pr_supported->ph.param_length = htons(parameter_len);
+		padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
+		chunk_len += parameter_len;
+	}
 	/* add authentication parameters */
-	if (!SCTP_BASE_SYSCTL(sctp_auth_disable)) {
-		struct sctp_auth_random *randp;
-		struct sctp_auth_hmac_algo *hmacs;
-		struct sctp_auth_chunk_list *chunks;
-
+	if (stcb->asoc.auth_supported) {
 		/* attach RANDOM parameter, if available */
 		if (stcb->asoc.authinfo.random != NULL) {
-			randp = (struct sctp_auth_random *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
-			p_len = sizeof(*randp) + stcb->asoc.authinfo.random_len;
+			struct sctp_auth_random *randp;
+
+			if (padding_len > 0) {
+				memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
+				chunk_len += padding_len;
+				padding_len = 0;
+			}
+			randp = (struct sctp_auth_random *)(mtod(m, caddr_t)+chunk_len);
+			parameter_len = (uint16_t) sizeof(struct sctp_auth_random) + stcb->asoc.authinfo.random_len;
 			/* random key already contains the header */
-			bcopy(stcb->asoc.authinfo.random->key, randp, p_len);
-			/* zero out any padding required */
-			bzero((caddr_t)randp + p_len, SCTP_SIZE32(p_len) - p_len);
-			SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+			memcpy(randp, stcb->asoc.authinfo.random->key, parameter_len);
+			padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
+			chunk_len += parameter_len;
 		}
 		/* add HMAC_ALGO parameter */
-		hmacs = (struct sctp_auth_hmac_algo *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
-		p_len = sctp_serialize_hmaclist(stcb->asoc.local_hmacs,
-		    (uint8_t *) hmacs->hmac_ids);
-		if (p_len > 0) {
-			p_len += sizeof(*hmacs);
+		if (stcb->asoc.local_hmacs != NULL) {
+			struct sctp_auth_hmac_algo *hmacs;
+
+			if (padding_len > 0) {
+				memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
+				chunk_len += padding_len;
+				padding_len = 0;
+			}
+			hmacs = (struct sctp_auth_hmac_algo *)(mtod(m, caddr_t)+chunk_len);
+			parameter_len = (uint16_t) (sizeof(struct sctp_auth_hmac_algo) +
+			    stcb->asoc.local_hmacs->num_algo * sizeof(uint16_t));
 			hmacs->ph.param_type = htons(SCTP_HMAC_LIST);
-			hmacs->ph.param_length = htons(p_len);
-			/* zero out any padding required */
-			bzero((caddr_t)hmacs + p_len, SCTP_SIZE32(p_len) - p_len);
-			SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+			hmacs->ph.param_length = htons(parameter_len);
+			sctp_serialize_hmaclist(stcb->asoc.local_hmacs, (uint8_t *) hmacs->hmac_ids);
+			padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
+			chunk_len += parameter_len;
 		}
 		/* add CHUNKS parameter */
-		chunks = (struct sctp_auth_chunk_list *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
-		p_len = sctp_serialize_auth_chunks(stcb->asoc.local_auth_chunks,
-		    chunks->chunk_types);
-		if (p_len > 0) {
-			p_len += sizeof(*chunks);
+		if (stcb->asoc.local_auth_chunks != NULL) {
+			struct sctp_auth_chunk_list *chunks;
+
+			if (padding_len > 0) {
+				memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
+				chunk_len += padding_len;
+				padding_len = 0;
+			}
+			chunks = (struct sctp_auth_chunk_list *)(mtod(m, caddr_t)+chunk_len);
+			parameter_len = (uint16_t) (sizeof(struct sctp_auth_chunk_list) +
+			    sctp_auth_get_chklist_size(stcb->asoc.local_auth_chunks));
 			chunks->ph.param_type = htons(SCTP_CHUNK_LIST);
-			chunks->ph.param_length = htons(p_len);
-			/* zero out any padding required */
-			bzero((caddr_t)chunks + p_len, SCTP_SIZE32(p_len) - p_len);
-			SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+			chunks->ph.param_length = htons(parameter_len);
+			sctp_serialize_auth_chunks(stcb->asoc.local_auth_chunks, chunks->chunk_types);
+			padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
+			chunk_len += parameter_len;
 		}
 	}
-	/* now the addresses */
-	{
-		struct sctp_scoping scp;
+	/* now any cookie time extensions */
+	if (stcb->asoc.cookie_preserve_req) {
+		struct sctp_cookie_perserve_param *cookie_preserve;
 
-		/*
-		 * To optimize this we could put the scoping stuff into a
-		 * structure and remove the individual uint8's from the
-		 * assoc structure. Then we could just sifa in the address
-		 * within the stcb. But for now this is a quick hack to get
-		 * the address stuff teased apart.
-		 */
-
-		scp.ipv4_addr_legal = stcb->asoc.ipv4_addr_legal;
-		scp.ipv6_addr_legal = stcb->asoc.ipv6_addr_legal;
-		scp.loopback_scope = stcb->asoc.loopback_scope;
-		scp.ipv4_local_scope = stcb->asoc.ipv4_local_scope;
-		scp.local_scope = stcb->asoc.local_scope;
-		scp.site_scope = stcb->asoc.site_scope;
-
-		sctp_add_addresses_to_i_ia(inp, stcb, &scp, m, cnt_inits_to);
+		if (padding_len > 0) {
+			memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
+			chunk_len += padding_len;
+			padding_len = 0;
+		}
+		parameter_len = (uint16_t) sizeof(struct sctp_cookie_perserve_param);
+		cookie_preserve = (struct sctp_cookie_perserve_param *)(mtod(m, caddr_t)+chunk_len);
+		cookie_preserve->ph.param_type = htons(SCTP_COOKIE_PRESERVE);
+		cookie_preserve->ph.param_length = htons(parameter_len);
+		cookie_preserve->time = htonl(stcb->asoc.cookie_preserve_req);
+		stcb->asoc.cookie_preserve_req = 0;
+		chunk_len += parameter_len;
 	}
+	if (stcb->asoc.scope.ipv4_addr_legal || stcb->asoc.scope.ipv6_addr_legal) {
+		uint8_t i;
 
-	/* calulate the size and update pkt header and chunk header */
-	p_len = 0;
-	for (m_at = m; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
-		if (SCTP_BUF_NEXT(m_at) == NULL)
-			mp_last = m_at;
-		p_len += SCTP_BUF_LEN(m_at);
+		if (padding_len > 0) {
+			memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
+			chunk_len += padding_len;
+			padding_len = 0;
+		}
+		parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
+		if (stcb->asoc.scope.ipv4_addr_legal) {
+			parameter_len += (uint16_t) sizeof(uint16_t);
+		}
+		if (stcb->asoc.scope.ipv6_addr_legal) {
+			parameter_len += (uint16_t) sizeof(uint16_t);
+		}
+		sup_addr = (struct sctp_supported_addr_param *)(mtod(m, caddr_t)+chunk_len);
+		sup_addr->ph.param_type = htons(SCTP_SUPPORTED_ADDRTYPE);
+		sup_addr->ph.param_length = htons(parameter_len);
+		i = 0;
+		if (stcb->asoc.scope.ipv4_addr_legal) {
+			sup_addr->addr_type[i++] = htons(SCTP_IPV4_ADDRESS);
+		}
+		if (stcb->asoc.scope.ipv6_addr_legal) {
+			sup_addr->addr_type[i++] = htons(SCTP_IPV6_ADDRESS);
+		}
+		padding_len = 4 - 2 * i;
+		chunk_len += parameter_len;
 	}
-	init->ch.chunk_length = htons(p_len);
+	SCTP_BUF_LEN(m) = chunk_len;
+	/* now the addresses */
 	/*
-	 * We sifa 0 here to NOT set IP_DF if its IPv4, we ignore the return
-	 * here since the timer will drive a retranmission.
+	 * To optimize this we could put the scoping stuff into a structure
+	 * and remove the individual uint8's from the assoc structure. Then
+	 * we could just sifa in the address within the stcb. But for now
+	 * this is a quick hack to get the address stuff teased apart.
 	 */
+	m_last = sctp_add_addresses_to_i_ia(inp, stcb, &stcb->asoc.scope,
+	    m, cnt_inits_to,
+	    &padding_len, &chunk_len);
 
-	/* I don't expect this to execute but we will be safe here */
-	padval = p_len % 4;
-	if ((padval) && (mp_last)) {
-		/*
-		 * The compiler worries that mp_last may not be set even
-		 * though I think it is impossible :-> however we add
-		 * mp_last here just in case.
-		 */
-		ret = sctp_add_pad_tombuf(mp_last, (4 - padval));
-		if (ret) {
-			/* Houston we have a problem, no space */
+	init->ch.chunk_length = htons(chunk_len);
+	if (padding_len > 0) {
+		if (sctp_add_pad_tombuf(m_last, padding_len) == NULL) {
 			sctp_m_freem(m);
 			return;
 		}
@@ -4971,7 +5072,6 @@
 			*nat_friendly = 1;
 			/* fall through */
 		case SCTP_PRSCTP_SUPPORTED:
-
 			if (padded_size != sizeof(struct sctp_paramhdr)) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error prsctp/nat support %d\n", plen);
 				goto invalid_size;
@@ -4979,7 +5079,7 @@
 			at += padded_size;
 			break;
 		case SCTP_ECN_CAPABLE:
-			if (padded_size != sizeof(struct sctp_ecn_supported_param)) {
+			if (padded_size != sizeof(struct sctp_paramhdr)) {
 				SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error ecn %d\n", plen);
 				goto invalid_size;
 			}
@@ -5009,13 +5109,14 @@
 				if (op_err == NULL) {
 					/* Ok need to try to get a mbuf */
 #ifdef INET6
-					l_len = sizeof(struct ip6_hdr) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+					l_len = SCTP_MIN_OVERHEAD;
 #else
-					l_len = sizeof(struct ip) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+					l_len = SCTP_MIN_V4_OVERHEAD;
 #endif
+					l_len += sizeof(struct sctp_chunkhdr);
 					l_len += plen;
 					l_len += sizeof(struct sctp_paramhdr);
-					op_err = sctp_get_mbuf_for_msg(l_len, 0, M_DONTWAIT, 1, MT_DATA);
+					op_err = sctp_get_mbuf_for_msg(l_len, 0, M_NOWAIT, 1, MT_DATA);
 					if (op_err) {
 						SCTP_BUF_LEN(op_err) = 0;
 						/*
@@ -5078,13 +5179,14 @@
 
 					/* Ok need to try to get an mbuf */
 #ifdef INET6
-					l_len = sizeof(struct ip6_hdr) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+					l_len = SCTP_MIN_OVERHEAD;
 #else
-					l_len = sizeof(struct ip) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+					l_len = SCTP_MIN_V4_OVERHEAD;
 #endif
+					l_len += sizeof(struct sctp_chunkhdr);
 					l_len += plen;
 					l_len += sizeof(struct sctp_paramhdr);
-					op_err = sctp_get_mbuf_for_msg(l_len, 0, M_DONTWAIT, 1, MT_DATA);
+					op_err = sctp_get_mbuf_for_msg(l_len, 0, M_NOWAIT, 1, MT_DATA);
 					if (op_err) {
 						SCTP_BUF_LEN(op_err) = 0;
 #ifdef INET6
@@ -5153,12 +5255,13 @@
 		int l_len;
 
 #ifdef INET6
-		l_len = sizeof(struct ip6_hdr) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+		l_len = SCTP_MIN_OVERHEAD;
 #else
-		l_len = sizeof(struct ip) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+		l_len = SCTP_MIN_V4_OVERHEAD;
 #endif
+		l_len += sizeof(struct sctp_chunkhdr);
 		l_len += (2 * sizeof(struct sctp_paramhdr));
-		op_err = sctp_get_mbuf_for_msg(l_len, 0, M_DONTWAIT, 1, MT_DATA);
+		op_err = sctp_get_mbuf_for_msg(l_len, 0, M_NOWAIT, 1, MT_DATA);
 		if (op_err) {
 			SCTP_BUF_LEN(op_err) = 0;
 #ifdef INET6
@@ -5192,7 +5295,7 @@
 
 static int
 sctp_are_there_new_addresses(struct sctp_association *asoc,
-    struct mbuf *in_initpkt, int offset)
+    struct mbuf *in_initpkt, int offset, struct sockaddr *src)
 {
 	/*
 	 * Given a INIT packet, look through the packet to verify that there
@@ -5207,7 +5310,7 @@
 	uint16_t ptype, plen;
 	uint8_t fnd;
 	struct sctp_nets *net;
-	struct ip *iph;
+	int check_src;
 
 #ifdef INET
 	struct sockaddr_in sin4, *sa4;
@@ -5215,7 +5318,6 @@
 #endif
 #ifdef INET6
 	struct sockaddr_in6 sin6, *sa6;
-	struct ip6_hdr *ip6h;
 
 #endif
 
@@ -5229,57 +5331,63 @@
 	sin6.sin6_family = AF_INET6;
 	sin6.sin6_len = sizeof(sin6);
 #endif
-	sa_touse = NULL;
 	/* First what about the src address of the pkt ? */
-	iph = mtod(in_initpkt, struct ip *);
-	switch (iph->ip_v) {
+	check_src = 0;
+	switch (src->sa_family) {
 #ifdef INET
-	case IPVERSION:
-		/* source addr is IPv4 */
-		sin4.sin_addr = iph->ip_src;
-		sa_touse = (struct sockaddr *)&sin4;
+	case AF_INET:
+		if (asoc->scope.ipv4_addr_legal) {
+			check_src = 1;
+		}
 		break;
 #endif
 #ifdef INET6
-	case IPV6_VERSION >> 4:
-		/* source addr is IPv6 */
-		ip6h = mtod(in_initpkt, struct ip6_hdr *);
-		sin6.sin6_addr = ip6h->ip6_src;
-		sa_touse = (struct sockaddr *)&sin6;
+	case AF_INET6:
+		if (asoc->scope.ipv6_addr_legal) {
+			check_src = 1;
+		}
 		break;
 #endif
 	default:
-		return (1);
+		/* TSNH */
+		break;
 	}
+	if (check_src) {
+		fnd = 0;
+		TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+			sa = (struct sockaddr *)&net->ro._l_addr;
+			if (sa->sa_family == src->sa_family) {
+#ifdef INET
+				if (sa->sa_family == AF_INET) {
+					struct sockaddr_in *src4;
 
-	fnd = 0;
-	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
-		sa = (struct sockaddr *)&net->ro._l_addr;
-		if (sa->sa_family == sa_touse->sa_family) {
-#ifdef INET
-			if (sa->sa_family == AF_INET) {
-				sa4 = (struct sockaddr_in *)sa;
-				if (sa4->sin_addr.s_addr == sin4.sin_addr.s_addr) {
-					fnd = 1;
-					break;
+					sa4 = (struct sockaddr_in *)sa;
+					src4 = (struct sockaddr_in *)src;
+					if (sa4->sin_addr.s_addr == src4->sin_addr.s_addr) {
+						fnd = 1;
+						break;
+					}
 				}
-			}
 #endif
 #ifdef INET6
-			if (sa->sa_family == AF_INET6) {
-				sa6 = (struct sockaddr_in6 *)sa;
-				if (SCTP6_ARE_ADDR_EQUAL(sa6, &sin6)) {
-					fnd = 1;
-					break;
+				if (sa->sa_family == AF_INET6) {
+					struct sockaddr_in6 *src6;
+
+					sa6 = (struct sockaddr_in6 *)sa;
+					src6 = (struct sockaddr_in6 *)src;
+					if (SCTP6_ARE_ADDR_EQUAL(sa6, src6)) {
+						fnd = 1;
+						break;
+					}
 				}
+#endif
 			}
-#endif
 		}
+		if (fnd == 0) {
+			/* New address added! no need to look futher. */
+			return (1);
+		}
 	}
-	if (fnd == 0) {
-		/* New address added! no need to look futher. */
-		return (1);
-	}
 	/* Ok so far lets munge through the rest of the packet */
 	offset += sizeof(struct sctp_init_chunk);
 	phdr = sctp_get_next_param(in_initpkt, offset, &params, sizeof(params));
@@ -5299,9 +5407,11 @@
 				    phdr == NULL) {
 					return (1);
 				}
-				p4 = (struct sctp_ipv4addr_param *)phdr;
-				sin4.sin_addr.s_addr = p4->addr;
-				sa_touse = (struct sockaddr *)&sin4;
+				if (asoc->scope.ipv4_addr_legal) {
+					p4 = (struct sctp_ipv4addr_param *)phdr;
+					sin4.sin_addr.s_addr = p4->addr;
+					sa_touse = (struct sockaddr *)&sin4;
+				}
 				break;
 			}
 #endif
@@ -5316,10 +5426,12 @@
 				    phdr == NULL) {
 					return (1);
 				}
-				p6 = (struct sctp_ipv6addr_param *)phdr;
-				memcpy((caddr_t)&sin6.sin6_addr, p6->addr,
-				    sizeof(p6->addr));
-				sa_touse = (struct sockaddr *)&sin6;
+				if (asoc->scope.ipv6_addr_legal) {
+					p6 = (struct sctp_ipv6addr_param *)phdr;
+					memcpy((caddr_t)&sin6.sin6_addr, p6->addr,
+					    sizeof(p6->addr));
+					sa_touse = (struct sockaddr *)&sin6;
+				}
 				break;
 			}
 #endif
@@ -5375,34 +5487,34 @@
  */
 void
 sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
-    struct mbuf *init_pkt, int iphlen, int offset,
+    struct sctp_nets *src_net, struct mbuf *init_pkt,
+    int iphlen, int offset,
+    struct sockaddr *src, struct sockaddr *dst,
     struct sctphdr *sh, struct sctp_init_chunk *init_chk,
-    uint8_t use_mflowid, uint32_t mflowid,
+    uint8_t mflowtype, uint32_t mflowid,
     uint32_t vrf_id, uint16_t port, int hold_inp_lock)
 {
 	struct sctp_association *asoc;
-	struct mbuf *m, *m_at, *m_tmp, *m_cookie, *op_err, *mp_last;
+	struct mbuf *m, *m_tmp, *m_last, *m_cookie, *op_err;
 	struct sctp_init_ack_chunk *initack;
 	struct sctp_adaptation_layer_indication *ali;
-	struct sctp_ecn_supported_param *ecn;
-	struct sctp_prsctp_supported_param *prsctp;
 	struct sctp_supported_chunk_types_param *pr_supported;
-	union sctp_sockstore store, store1, *over_addr;
+	struct sctp_paramhdr *ph;
+	union sctp_sockstore *over_addr;
+	struct sctp_scoping scp;
 
 #ifdef INET
-	struct sockaddr_in *sin, *to_sin;
+	struct sockaddr_in *dst4 = (struct sockaddr_in *)dst;
+	struct sockaddr_in *src4 = (struct sockaddr_in *)src;
+	struct sockaddr_in *sin;
 
 #endif
 #ifdef INET6
-	struct sockaddr_in6 *sin6, *to_sin6;
+	struct sockaddr_in6 *dst6 = (struct sockaddr_in6 *)dst;
+	struct sockaddr_in6 *src6 = (struct sockaddr_in6 *)src;
+	struct sockaddr_in6 *sin6;
 
 #endif
-	struct ip *iph;
-
-#ifdef INET6
-	struct ip6_hdr *ip6;
-
-#endif
 	struct sockaddr *to;
 	struct sctp_state_cookie stc;
 	struct sctp_nets *net = NULL;
@@ -5409,30 +5521,50 @@
 	uint8_t *signature = NULL;
 	int cnt_inits_to = 0;
 	uint16_t his_limit, i_want;
-	int abort_flag, padval;
-	int num_ext;
-	int p_len;
+	int abort_flag;
 	int nat_friendly = 0;
 	struct socket *so;
+	uint16_t num_ext, chunk_len, padding_len, parameter_len;
 
-	if (stcb)
+	if (stcb) {
 		asoc = &stcb->asoc;
-	else
+	} else {
 		asoc = NULL;
-	mp_last = NULL;
+	}
 	if ((asoc != NULL) &&
-	    (SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_WAIT) &&
-	    (sctp_are_there_new_addresses(asoc, init_pkt, offset))) {
-		/* new addresses, out of here in non-cookie-wait states */
-		/*
-		 * Send a ABORT, we don't add the new address error clause
-		 * though we even set the T bit and copy in the 0 tag.. this
-		 * looks no different than if no listener was present.
-		 */
-		sctp_send_abort(init_pkt, iphlen, sh, 0, NULL,
-		    use_mflowid, mflowid,
-		    vrf_id, port);
-		return;
+	    (SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_WAIT)) {
+		if (sctp_are_there_new_addresses(asoc, init_pkt, offset, src)) {
+			/*
+			 * new addresses, out of here in non-cookie-wait
+			 * states
+			 * 
+			 * Send an ABORT, without the new address error cause.
+			 * This looks no different than if no listener was
+			 * present.
+			 */
+			op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+			    "Address added");
+			sctp_send_abort(init_pkt, iphlen, src, dst, sh, 0, op_err,
+			    mflowtype, mflowid, inp->fibnum,
+			    vrf_id, port);
+			return;
+		}
+		if (src_net != NULL && (src_net->port != port)) {
+			/*
+			 * change of remote encapsulation port, out of here
+			 * in non-cookie-wait states
+			 * 
+			 * Send an ABORT, without an specific error cause. This
+			 * looks no different than if no listener was
+			 * present.
+			 */
+			op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+			    "Remote encapsulation port changed");
+			sctp_send_abort(init_pkt, iphlen, src, dst, sh, 0, op_err,
+			    mflowtype, mflowid, inp->fibnum,
+			    vrf_id, port);
+			return;
+		}
 	}
 	abort_flag = 0;
 	op_err = sctp_arethere_unrecognized_parameters(init_pkt,
@@ -5440,13 +5572,20 @@
 	    &abort_flag, (struct sctp_chunkhdr *)init_chk, &nat_friendly);
 	if (abort_flag) {
 do_a_abort:
-		sctp_send_abort(init_pkt, iphlen, sh,
+		if (op_err == NULL) {
+			char msg[SCTP_DIAG_INFO_LEN];
+
+			snprintf(msg, sizeof(msg), "%s:%d at %s", __FILE__, __LINE__, __func__);
+			op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+			    msg);
+		}
+		sctp_send_abort(init_pkt, iphlen, src, dst, sh,
 		    init_chk->init.initiate_tag, op_err,
-		    use_mflowid, mflowid,
+		    mflowtype, mflowid, inp->fibnum,
 		    vrf_id, port);
 		return;
 	}
-	m = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+	m = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
 	if (m == NULL) {
 		/* No memory, INIT timer will re-attempt. */
 		if (op_err)
@@ -5453,8 +5592,17 @@
 			sctp_m_freem(op_err);
 		return;
 	}
-	SCTP_BUF_LEN(m) = sizeof(struct sctp_init_chunk);
+	chunk_len = (uint16_t) sizeof(struct sctp_init_ack_chunk);
+	padding_len = 0;
 
+	/*
+	 * We might not overwrite the identification[] completely and on
+	 * some platforms time_entered will contain some padding. Therefore
+	 * zero out the cookie to avoid putting uninitialized memory on the
+	 * wire.
+	 */
+	memset(&stc, 0, sizeof(struct sctp_state_cookie));
+
 	/* the time I built cookie */
 	(void)SCTP_GETTIME_TIMEVAL(&stc.time_entered);
 
@@ -5482,84 +5630,31 @@
 	 */
 	stc.site_scope = stc.local_scope = stc.loopback_scope = 0;
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
-		struct inpcb *in_inp;
-
-		/* Its a V6 socket */
-		in_inp = (struct inpcb *)inp;
 		stc.ipv6_addr_legal = 1;
-		/* Now look at the binding flag to see if V4 will be legal */
-		if (SCTP_IPV6_V6ONLY(in_inp) == 0) {
+		if (SCTP_IPV6_V6ONLY(inp)) {
+			stc.ipv4_addr_legal = 0;
+		} else {
 			stc.ipv4_addr_legal = 1;
-		} else {
-			/* V4 addresses are NOT legal on the association */
-			stc.ipv4_addr_legal = 0;
 		}
 	} else {
-		/* Its a V4 socket, no - V6 */
+		stc.ipv6_addr_legal = 0;
 		stc.ipv4_addr_legal = 1;
-		stc.ipv6_addr_legal = 0;
 	}
-
-#ifdef SCTP_DONT_DO_PRIVADDR_SCOPE
-	stc.ipv4_scope = 1;
-#else
 	stc.ipv4_scope = 0;
-#endif
-	/* now for scope setup */
-	memset((caddr_t)&store, 0, sizeof(store));
-	memset((caddr_t)&store1, 0, sizeof(store1));
-#ifdef INET
-	sin = &store.sin;
-	to_sin = &store1.sin;
-#endif
-#ifdef INET6
-	sin6 = &store.sin6;
-	to_sin6 = &store1.sin6;
-#endif
-	iph = mtod(init_pkt, struct ip *);
-	/* establish the to_addr's */
-	switch (iph->ip_v) {
-#ifdef INET
-	case IPVERSION:
-		to_sin->sin_port = sh->dest_port;
-		to_sin->sin_family = AF_INET;
-		to_sin->sin_len = sizeof(struct sockaddr_in);
-		to_sin->sin_addr = iph->ip_dst;
-		break;
-#endif
-#ifdef INET6
-	case IPV6_VERSION >> 4:
-		ip6 = mtod(init_pkt, struct ip6_hdr *);
-		to_sin6->sin6_addr = ip6->ip6_dst;
-		to_sin6->sin6_scope_id = 0;
-		to_sin6->sin6_port = sh->dest_port;
-		to_sin6->sin6_family = AF_INET6;
-		to_sin6->sin6_len = sizeof(struct sockaddr_in6);
-		break;
-#endif
-	default:
-		goto do_a_abort;
-		break;
-	}
-
 	if (net == NULL) {
-		to = (struct sockaddr *)&store;
-		switch (iph->ip_v) {
+		to = src;
+		switch (dst->sa_family) {
 #ifdef INET
-		case IPVERSION:
+		case AF_INET:
 			{
-				sin->sin_family = AF_INET;
-				sin->sin_len = sizeof(struct sockaddr_in);
-				sin->sin_port = sh->src_port;
-				sin->sin_addr = iph->ip_src;
 				/* lookup address */
-				stc.address[0] = sin->sin_addr.s_addr;
+				stc.address[0] = src4->sin_addr.s_addr;
 				stc.address[1] = 0;
 				stc.address[2] = 0;
 				stc.address[3] = 0;
 				stc.addr_type = SCTP_IPV4_ADDRESS;
 				/* local from address */
-				stc.laddress[0] = to_sin->sin_addr.s_addr;
+				stc.laddress[0] = dst4->sin_addr.s_addr;
 				stc.laddress[1] = 0;
 				stc.laddress[2] = 0;
 				stc.laddress[3] = 0;
@@ -5566,15 +5661,12 @@
 				stc.laddr_type = SCTP_IPV4_ADDRESS;
 				/* scope_id is only for v6 */
 				stc.scope_id = 0;
-#ifndef SCTP_DONT_DO_PRIVADDR_SCOPE
-				if (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
+				if ((IN4_ISPRIVATE_ADDRESS(&src4->sin_addr)) ||
+				    (IN4_ISPRIVATE_ADDRESS(&dst4->sin_addr))) {
 					stc.ipv4_scope = 1;
 				}
-#else
-				stc.ipv4_scope = 1;
-#endif				/* SCTP_DONT_DO_PRIVADDR_SCOPE */
 				/* Must use the address in this case */
-				if (sctp_is_address_on_local_host((struct sockaddr *)sin, vrf_id)) {
+				if (sctp_is_address_on_local_host(src, vrf_id)) {
 					stc.loopback_scope = 1;
 					stc.ipv4_scope = 1;
 					stc.site_scope = 1;
@@ -5584,41 +5676,28 @@
 			}
 #endif
 #ifdef INET6
-		case IPV6_VERSION >> 4:
+		case AF_INET6:
 			{
-				ip6 = mtod(init_pkt, struct ip6_hdr *);
-				sin6->sin6_family = AF_INET6;
-				sin6->sin6_len = sizeof(struct sockaddr_in6);
-				sin6->sin6_port = sh->src_port;
-				sin6->sin6_addr = ip6->ip6_src;
-				/* lookup address */
-				memcpy(&stc.address, &sin6->sin6_addr,
-				    sizeof(struct in6_addr));
-				sin6->sin6_scope_id = 0;
 				stc.addr_type = SCTP_IPV6_ADDRESS;
-				stc.scope_id = 0;
-				if (sctp_is_address_on_local_host((struct sockaddr *)sin6, vrf_id)) {
-					/*
-					 * FIX ME: does this have scope from
-					 * rcvif?
-					 */
-					(void)sa6_recoverscope(sin6);
-					stc.scope_id = sin6->sin6_scope_id;
-					sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone));
+				memcpy(&stc.address, &src6->sin6_addr, sizeof(struct in6_addr));
+				stc.scope_id = in6_getscope(&src6->sin6_addr);
+				if (sctp_is_address_on_local_host(src, vrf_id)) {
 					stc.loopback_scope = 1;
 					stc.local_scope = 0;
 					stc.site_scope = 1;
 					stc.ipv4_scope = 1;
-				} else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+				} else if (IN6_IS_ADDR_LINKLOCAL(&src6->sin6_addr) ||
+				    IN6_IS_ADDR_LINKLOCAL(&dst6->sin6_addr)) {
 					/*
-					 * If the new destination is a
-					 * LINK_LOCAL we must have common
-					 * both site and local scope. Don't
-					 * set local scope though since we
-					 * must depend on the source to be
-					 * added implicitly. We cannot
-					 * assure just because we share one
-					 * link that all links are common.
+					 * If the new destination or source
+					 * is a LINK_LOCAL we must have
+					 * common both site and local scope.
+					 * Don't set local scope though
+					 * since we must depend on the
+					 * source to be added implicitly. We
+					 * cannot assure just because we
+					 * share one link that all links are
+					 * common.
 					 */
 					stc.local_scope = 0;
 					stc.site_scope = 1;
@@ -5634,22 +5713,16 @@
 					 * pull out the scope_id from
 					 * incoming pkt
 					 */
+				} else if (IN6_IS_ADDR_SITELOCAL(&src6->sin6_addr) ||
+				    IN6_IS_ADDR_SITELOCAL(&dst6->sin6_addr)) {
 					/*
-					 * FIX ME: does this have scope from
-					 * rcvif?
+					 * If the new destination or source
+					 * is SITE_LOCAL then we must have
+					 * site scope in common.
 					 */
-					(void)sa6_recoverscope(sin6);
-					stc.scope_id = sin6->sin6_scope_id;
-					sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone));
-				} else if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) {
-					/*
-					 * If the new destination is
-					 * SITE_LOCAL then we must have site
-					 * scope in common.
-					 */
 					stc.site_scope = 1;
 				}
-				memcpy(&stc.laddress, &to_sin6->sin6_addr, sizeof(struct in6_addr));
+				memcpy(&stc.laddress, &dst6->sin6_addr, sizeof(struct in6_addr));
 				stc.laddr_type = SCTP_IPV6_ADDRESS;
 				break;
 			}
@@ -5667,10 +5740,10 @@
 
 #endif
 
-		stc.loopback_scope = asoc->loopback_scope;
-		stc.ipv4_scope = asoc->ipv4_local_scope;
-		stc.site_scope = asoc->site_scope;
-		stc.local_scope = asoc->local_scope;
+		stc.loopback_scope = asoc->scope.loopback_scope;
+		stc.ipv4_scope = asoc->scope.ipv4_local_scope;
+		stc.site_scope = asoc->scope.site_scope;
+		stc.local_scope = asoc->scope.local_scope;
 #ifdef INET6
 		/* Why do we not consider IPv4 LL addresses? */
 		TAILQ_FOREACH(lnet, &asoc->nets, sctp_next) {
@@ -5729,7 +5802,7 @@
 			if (net->src_addr_selected == 0) {
 				/*
 				 * strange case here, the INIT should have
-				 * did the selection.
+				 * done the selection.
 				 */
 				net->ro._s_addr = sctp_source_address_selection(inp,
 				    stcb, (sctp_route_t *) & net->ro,
@@ -5819,10 +5892,10 @@
 	his_limit = ntohs(init_chk->init.num_inbound_streams);
 	/* choose what I want */
 	if (asoc != NULL) {
-		if (asoc->streamoutcnt > inp->sctp_ep.pre_open_stream_count) {
+		if (asoc->streamoutcnt > asoc->pre_open_streams) {
 			i_want = asoc->streamoutcnt;
 		} else {
-			i_want = inp->sctp_ep.pre_open_stream_count;
+			i_want = asoc->pre_open_streams;
 		}
 	} else {
 		i_want = inp->sctp_ep.pre_open_stream_count;
@@ -5834,163 +5907,180 @@
 		/* I can have what I want :> */
 		initack->init.num_outbound_streams = htons(i_want);
 	}
-	/* tell him his limt. */
+	/* tell him his limit. */
 	initack->init.num_inbound_streams =
 	    htons(inp->sctp_ep.max_open_streams_intome);
 
 	/* adaptation layer indication parameter */
-	ali = (struct sctp_adaptation_layer_indication *)((caddr_t)initack + sizeof(*initack));
-	ali->ph.param_type = htons(SCTP_ULP_ADAPTATION);
-	ali->ph.param_length = htons(sizeof(*ali));
-	ali->indication = ntohl(inp->sctp_ep.adaptation_layer_indicator);
-	SCTP_BUF_LEN(m) += sizeof(*ali);
-	ecn = (struct sctp_ecn_supported_param *)((caddr_t)ali + sizeof(*ali));
-
+	if (inp->sctp_ep.adaptation_layer_indicator_provided) {
+		parameter_len = (uint16_t) sizeof(struct sctp_adaptation_layer_indication);
+		ali = (struct sctp_adaptation_layer_indication *)(mtod(m, caddr_t)+chunk_len);
+		ali->ph.param_type = htons(SCTP_ULP_ADAPTATION);
+		ali->ph.param_length = htons(parameter_len);
+		ali->indication = htonl(inp->sctp_ep.adaptation_layer_indicator);
+		chunk_len += parameter_len;
+	}
 	/* ECN parameter */
-	if (((asoc != NULL) && (asoc->ecn_allowed == 1)) ||
-	    (inp->sctp_ecn_enable == 1)) {
-		ecn->ph.param_type = htons(SCTP_ECN_CAPABLE);
-		ecn->ph.param_length = htons(sizeof(*ecn));
-		SCTP_BUF_LEN(m) += sizeof(*ecn);
-
-		prsctp = (struct sctp_prsctp_supported_param *)((caddr_t)ecn +
-		    sizeof(*ecn));
-	} else {
-		prsctp = (struct sctp_prsctp_supported_param *)((caddr_t)ecn);
+	if (((asoc != NULL) && (asoc->ecn_supported == 1)) ||
+	    ((asoc == NULL) && (inp->ecn_supported == 1))) {
+		parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
+		ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
+		ph->param_type = htons(SCTP_ECN_CAPABLE);
+		ph->param_length = htons(parameter_len);
+		chunk_len += parameter_len;
 	}
-	/* And now tell the peer we do  pr-sctp */
-	prsctp->ph.param_type = htons(SCTP_PRSCTP_SUPPORTED);
-	prsctp->ph.param_length = htons(sizeof(*prsctp));
-	SCTP_BUF_LEN(m) += sizeof(*prsctp);
+	/* PR-SCTP supported parameter */
+	if (((asoc != NULL) && (asoc->prsctp_supported == 1)) ||
+	    ((asoc == NULL) && (inp->prsctp_supported == 1))) {
+		parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
+		ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
+		ph->param_type = htons(SCTP_PRSCTP_SUPPORTED);
+		ph->param_length = htons(parameter_len);
+		chunk_len += parameter_len;
+	}
+	/* Add NAT friendly parameter */
 	if (nat_friendly) {
-		/* Add NAT friendly parameter */
-		struct sctp_paramhdr *ph;
-
-		ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+		parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
+		ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
 		ph->param_type = htons(SCTP_HAS_NAT_SUPPORT);
-		ph->param_length = htons(sizeof(struct sctp_paramhdr));
-		SCTP_BUF_LEN(m) += sizeof(struct sctp_paramhdr);
+		ph->param_length = htons(parameter_len);
+		chunk_len += parameter_len;
 	}
-	/* And now tell the peer we do all the extensions */
-	pr_supported = (struct sctp_supported_chunk_types_param *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
-	pr_supported->ph.param_type = htons(SCTP_SUPPORTED_CHUNK_EXT);
+	/* And now tell the peer which extensions we support */
 	num_ext = 0;
-	pr_supported->chunk_types[num_ext++] = SCTP_ASCONF;
-	pr_supported->chunk_types[num_ext++] = SCTP_ASCONF_ACK;
-	pr_supported->chunk_types[num_ext++] = SCTP_FORWARD_CUM_TSN;
-	pr_supported->chunk_types[num_ext++] = SCTP_PACKET_DROPPED;
-	pr_supported->chunk_types[num_ext++] = SCTP_STREAM_RESET;
-	if (!SCTP_BASE_SYSCTL(sctp_auth_disable))
+	pr_supported = (struct sctp_supported_chunk_types_param *)(mtod(m, caddr_t)+chunk_len);
+	if (((asoc != NULL) && (asoc->prsctp_supported == 1)) ||
+	    ((asoc == NULL) && (inp->prsctp_supported == 1))) {
+		pr_supported->chunk_types[num_ext++] = SCTP_FORWARD_CUM_TSN;
+	}
+	if (((asoc != NULL) && (asoc->auth_supported == 1)) ||
+	    ((asoc == NULL) && (inp->auth_supported == 1))) {
 		pr_supported->chunk_types[num_ext++] = SCTP_AUTHENTICATION;
-	if (SCTP_BASE_SYSCTL(sctp_nr_sack_on_off))
+	}
+	if (((asoc != NULL) && (asoc->asconf_supported == 1)) ||
+	    ((asoc == NULL) && (inp->asconf_supported == 1))) {
+		pr_supported->chunk_types[num_ext++] = SCTP_ASCONF;
+		pr_supported->chunk_types[num_ext++] = SCTP_ASCONF_ACK;
+	}
+	if (((asoc != NULL) && (asoc->reconfig_supported == 1)) ||
+	    ((asoc == NULL) && (inp->reconfig_supported == 1))) {
+		pr_supported->chunk_types[num_ext++] = SCTP_STREAM_RESET;
+	}
+	if (((asoc != NULL) && (asoc->nrsack_supported == 1)) ||
+	    ((asoc == NULL) && (inp->nrsack_supported == 1))) {
 		pr_supported->chunk_types[num_ext++] = SCTP_NR_SELECTIVE_ACK;
-	p_len = sizeof(*pr_supported) + num_ext;
-	pr_supported->ph.param_length = htons(p_len);
-	bzero((caddr_t)pr_supported + p_len, SCTP_SIZE32(p_len) - p_len);
-	SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
-
+	}
+	if (((asoc != NULL) && (asoc->pktdrop_supported == 1)) ||
+	    ((asoc == NULL) && (inp->pktdrop_supported == 1))) {
+		pr_supported->chunk_types[num_ext++] = SCTP_PACKET_DROPPED;
+	}
+	if (num_ext > 0) {
+		parameter_len = (uint16_t) sizeof(struct sctp_supported_chunk_types_param) + num_ext;
+		pr_supported->ph.param_type = htons(SCTP_SUPPORTED_CHUNK_EXT);
+		pr_supported->ph.param_length = htons(parameter_len);
+		padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
+		chunk_len += parameter_len;
+	}
 	/* add authentication parameters */
-	if (!SCTP_BASE_SYSCTL(sctp_auth_disable)) {
+	if (((asoc != NULL) && (asoc->auth_supported == 1)) ||
+	    ((asoc == NULL) && (inp->auth_supported == 1))) {
 		struct sctp_auth_random *randp;
 		struct sctp_auth_hmac_algo *hmacs;
 		struct sctp_auth_chunk_list *chunks;
-		uint16_t random_len;
 
+		if (padding_len > 0) {
+			memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
+			chunk_len += padding_len;
+			padding_len = 0;
+		}
 		/* generate and add RANDOM parameter */
-		random_len = SCTP_AUTH_RANDOM_SIZE_DEFAULT;
-		randp = (struct sctp_auth_random *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+		randp = (struct sctp_auth_random *)(mtod(m, caddr_t)+chunk_len);
+		parameter_len = (uint16_t) sizeof(struct sctp_auth_random) +
+		    SCTP_AUTH_RANDOM_SIZE_DEFAULT;
 		randp->ph.param_type = htons(SCTP_RANDOM);
-		p_len = sizeof(*randp) + random_len;
-		randp->ph.param_length = htons(p_len);
-		SCTP_READ_RANDOM(randp->random_data, random_len);
-		/* zero out any padding required */
-		bzero((caddr_t)randp + p_len, SCTP_SIZE32(p_len) - p_len);
-		SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+		randp->ph.param_length = htons(parameter_len);
+		SCTP_READ_RANDOM(randp->random_data, SCTP_AUTH_RANDOM_SIZE_DEFAULT);
+		padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
+		chunk_len += parameter_len;
 
+		if (padding_len > 0) {
+			memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
+			chunk_len += padding_len;
+			padding_len = 0;
+		}
 		/* add HMAC_ALGO parameter */
-		hmacs = (struct sctp_auth_hmac_algo *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
-		p_len = sctp_serialize_hmaclist(inp->sctp_ep.local_hmacs,
+		hmacs = (struct sctp_auth_hmac_algo *)(mtod(m, caddr_t)+chunk_len);
+		parameter_len = (uint16_t) sizeof(struct sctp_auth_hmac_algo) +
+		    sctp_serialize_hmaclist(inp->sctp_ep.local_hmacs,
 		    (uint8_t *) hmacs->hmac_ids);
-		if (p_len > 0) {
-			p_len += sizeof(*hmacs);
-			hmacs->ph.param_type = htons(SCTP_HMAC_LIST);
-			hmacs->ph.param_length = htons(p_len);
-			/* zero out any padding required */
-			bzero((caddr_t)hmacs + p_len, SCTP_SIZE32(p_len) - p_len);
-			SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+		hmacs->ph.param_type = htons(SCTP_HMAC_LIST);
+		hmacs->ph.param_length = htons(parameter_len);
+		padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
+		chunk_len += parameter_len;
+
+		if (padding_len > 0) {
+			memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
+			chunk_len += padding_len;
+			padding_len = 0;
 		}
 		/* add CHUNKS parameter */
-		chunks = (struct sctp_auth_chunk_list *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
-		p_len = sctp_serialize_auth_chunks(inp->sctp_ep.local_auth_chunks,
+		chunks = (struct sctp_auth_chunk_list *)(mtod(m, caddr_t)+chunk_len);
+		parameter_len = (uint16_t) sizeof(struct sctp_auth_chunk_list) +
+		    sctp_serialize_auth_chunks(inp->sctp_ep.local_auth_chunks,
 		    chunks->chunk_types);
-		if (p_len > 0) {
-			p_len += sizeof(*chunks);
-			chunks->ph.param_type = htons(SCTP_CHUNK_LIST);
-			chunks->ph.param_length = htons(p_len);
-			/* zero out any padding required */
-			bzero((caddr_t)chunks + p_len, SCTP_SIZE32(p_len) - p_len);
-			SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
-		}
+		chunks->ph.param_type = htons(SCTP_CHUNK_LIST);
+		chunks->ph.param_length = htons(parameter_len);
+		padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
+		chunk_len += parameter_len;
 	}
-	m_at = m;
+	SCTP_BUF_LEN(m) = chunk_len;
+	m_last = m;
 	/* now the addresses */
-	{
-		struct sctp_scoping scp;
-
-		/*
-		 * To optimize this we could put the scoping stuff into a
-		 * structure and remove the individual uint8's from the stc
-		 * structure. Then we could just sifa in the address within
-		 * the stc.. but for now this is a quick hack to get the
-		 * address stuff teased apart.
-		 */
-		scp.ipv4_addr_legal = stc.ipv4_addr_legal;
-		scp.ipv6_addr_legal = stc.ipv6_addr_legal;
-		scp.loopback_scope = stc.loopback_scope;
-		scp.ipv4_local_scope = stc.ipv4_scope;
-		scp.local_scope = stc.local_scope;
-		scp.site_scope = stc.site_scope;
-		m_at = sctp_add_addresses_to_i_ia(inp, stcb, &scp, m_at, cnt_inits_to);
+	/*
+	 * To optimize this we could put the scoping stuff into a structure
+	 * and remove the individual uint8's from the stc structure. Then we
+	 * could just sifa in the address within the stc.. but for now this
+	 * is a quick hack to get the address stuff teased apart.
+	 */
+	scp.ipv4_addr_legal = stc.ipv4_addr_legal;
+	scp.ipv6_addr_legal = stc.ipv6_addr_legal;
+	scp.loopback_scope = stc.loopback_scope;
+	scp.ipv4_local_scope = stc.ipv4_scope;
+	scp.local_scope = stc.local_scope;
+	scp.site_scope = stc.site_scope;
+	m_last = sctp_add_addresses_to_i_ia(inp, stcb, &scp, m_last,
+	    cnt_inits_to,
+	    &padding_len, &chunk_len);
+	/* padding_len can only be positive, if no addresses have been added */
+	if (padding_len > 0) {
+		memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
+		chunk_len += padding_len;
+		SCTP_BUF_LEN(m) += padding_len;
+		padding_len = 0;
 	}
-
 	/* tack on the operational error if present */
 	if (op_err) {
-		struct mbuf *ol;
-		int llen;
-
-		llen = 0;
-		ol = op_err;
-
-		while (ol) {
-			llen += SCTP_BUF_LEN(ol);
-			ol = SCTP_BUF_NEXT(ol);
+		parameter_len = 0;
+		for (m_tmp = op_err; m_tmp != NULL; m_tmp = SCTP_BUF_NEXT(m_tmp)) {
+			parameter_len += SCTP_BUF_LEN(m_tmp);
 		}
-		if (llen % 4) {
-			/* must add a pad to the param */
-			uint32_t cpthis = 0;
-			int padlen;
-
-			padlen = 4 - (llen % 4);
-			m_copyback(op_err, llen, padlen, (caddr_t)&cpthis);
+		padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
+		SCTP_BUF_NEXT(m_last) = op_err;
+		while (SCTP_BUF_NEXT(m_last) != NULL) {
+			m_last = SCTP_BUF_NEXT(m_last);
 		}
-		while (SCTP_BUF_NEXT(m_at) != NULL) {
-			m_at = SCTP_BUF_NEXT(m_at);
-		}
-		SCTP_BUF_NEXT(m_at) = op_err;
-		while (SCTP_BUF_NEXT(m_at) != NULL) {
-			m_at = SCTP_BUF_NEXT(m_at);
-		}
+		chunk_len += parameter_len;
 	}
-	/* pre-calulate the size and update pkt header and chunk header */
-	p_len = 0;
-	for (m_tmp = m; m_tmp; m_tmp = SCTP_BUF_NEXT(m_tmp)) {
-		p_len += SCTP_BUF_LEN(m_tmp);
-		if (SCTP_BUF_NEXT(m_tmp) == NULL) {
-			/* m_tmp should now point to last one */
-			break;
+	if (padding_len > 0) {
+		m_last = sctp_add_pad_tombuf(m_last, padding_len);
+		if (m_last == NULL) {
+			/* Houston we have a problem, no space */
+			sctp_m_freem(m);
+			return;
 		}
+		chunk_len += padding_len;
+		padding_len = 0;
 	}
-
 	/* Now we must build a cookie */
 	m_cookie = sctp_add_cookie(init_pkt, offset, m, 0, &stc, &signature);
 	if (m_cookie == NULL) {
@@ -5999,21 +6089,22 @@
 		return;
 	}
 	/* Now append the cookie to the end and update the space/size */
-	SCTP_BUF_NEXT(m_tmp) = m_cookie;
-
-	for (m_tmp = m_cookie; m_tmp; m_tmp = SCTP_BUF_NEXT(m_tmp)) {
-		p_len += SCTP_BUF_LEN(m_tmp);
+	SCTP_BUF_NEXT(m_last) = m_cookie;
+	parameter_len = 0;
+	for (m_tmp = m_cookie; m_tmp != NULL; m_tmp = SCTP_BUF_NEXT(m_tmp)) {
+		parameter_len += SCTP_BUF_LEN(m_tmp);
 		if (SCTP_BUF_NEXT(m_tmp) == NULL) {
-			/* m_tmp should now point to last one */
-			mp_last = m_tmp;
-			break;
+			m_last = m_tmp;
 		}
 	}
+	padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
+	chunk_len += parameter_len;
+
 	/*
 	 * Place in the size, but we don't include the last pad (if any) in
 	 * the INIT-ACK.
 	 */
-	initack->ch.chunk_length = htons(p_len);
+	initack->ch.chunk_length = htons(chunk_len);
 
 	/*
 	 * Time to sign the cookie, we don't sign over the cookie signature
@@ -6027,17 +6118,14 @@
 	 * We sifa 0 here to NOT set IP_DF if its IPv4, we ignore the return
 	 * here since the timer will drive a retranmission.
 	 */
-	padval = p_len % 4;
-	if ((padval) && (mp_last)) {
-		/* see my previous comments on mp_last */
-		if (sctp_add_pad_tombuf(mp_last, (4 - padval))) {
-			/* Houston we have a problem, no space */
+	if (padding_len > 0) {
+		if (sctp_add_pad_tombuf(m_last, padding_len) == NULL) {
 			sctp_m_freem(m);
 			return;
 		}
 	}
 	if (stc.loopback_scope) {
-		over_addr = &store1;
+		over_addr = (union sctp_sockstore *)dst;
 	} else {
 		over_addr = NULL;
 	}
@@ -6046,7 +6134,7 @@
 	    0, 0,
 	    inp->sctp_lport, sh->src_port, init_chk->init.initiate_tag,
 	    port, over_addr,
-	    use_mflowid, mflowid,
+	    mflowtype, mflowid,
 	    SCTP_SO_NOT_LOCKED);
 	SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
 }
@@ -6062,7 +6150,7 @@
 	struct sctp_tmit_chunk *chk, *nchk;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
-	if ((asoc->peer_supports_prsctp) &&
+	if ((asoc->prsctp_supported) &&
 	    (asoc->sent_queue_cnt_removeable > 0)) {
 		TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
 			/*
@@ -6175,17 +6263,15 @@
 static void
 sctp_set_prsctp_policy(struct sctp_stream_queue_pending *sp)
 {
-	sp->pr_sctp_on = 0;
 	/*
 	 * We assume that the user wants PR_SCTP_TTL if the user provides a
-	 * positive lifetime but does not specify any PR_SCTP policy. This
-	 * is a BAD assumption and causes problems at least with the
-	 * U-Vancovers MPI folks. I will change this to be no policy means
-	 * NO PR-SCTP.
+	 * positive lifetime but does not specify any PR_SCTP policy.
 	 */
 	if (PR_SCTP_ENABLED(sp->sinfo_flags)) {
 		sp->act_flags |= PR_SCTP_POLICY(sp->sinfo_flags);
-		sp->pr_sctp_on = 1;
+	} else if (sp->timetolive > 0) {
+		sp->sinfo_flags |= SCTP_PR_SCTP_TTL;
+		sp->act_flags |= PR_SCTP_POLICY(sp->sinfo_flags);
 	} else {
 		return;
 	}
@@ -6276,7 +6362,6 @@
 	sp->timetolive = srcv->sinfo_timetolive;
 	sp->ppid = srcv->sinfo_ppid;
 	sp->context = srcv->sinfo_context;
-	sp->strseq = 0;
 	if (sp->sinfo_flags & SCTP_ADDR_OVER) {
 		sp->net = net;
 		atomic_add_int(&sp->net->ref_count, 1);
@@ -6317,10 +6402,6 @@
 	sctp_snd_sb_alloc(stcb, sp->length);
 	atomic_add_int(&stcb->asoc.stream_queue_cnt, 1);
 	TAILQ_INSERT_TAIL(&strm->outqueue, sp, next);
-	if ((srcv->sinfo_flags & SCTP_UNORDERED) == 0) {
-		sp->strseq = strm->next_sequence_sent;
-		strm->next_sequence_sent++;
-	}
 	stcb->asoc.ss_functions.sctp_ss_add_to_stream(stcb, &stcb->asoc, strm, sp, 1);
 	m = NULL;
 	if (hold_stcb_lock == 0) {
@@ -6366,7 +6447,7 @@
 				if (outchain == NULL) {
 					/* This is the general case */
 			new_mbuf:
-					outchain = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_HEADER);
+					outchain = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_HEADER);
 					if (outchain == NULL) {
 						goto error_out;
 					}
@@ -6420,7 +6501,7 @@
 					/* now we need another one */
 					sizeofcpy -= len;
 				}
-				m = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_HEADER);
+				m = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_HEADER);
 				if (m == NULL) {
 					/* We failed */
 					goto error_out;
@@ -6434,16 +6515,10 @@
 			return (outchain);
 		} else {
 			/* copy the old fashion way */
-			appendchain = SCTP_M_COPYM(clonechain, 0, M_COPYALL, M_DONTWAIT);
+			appendchain = SCTP_M_COPYM(clonechain, 0, M_COPYALL, M_NOWAIT);
 #ifdef SCTP_MBUF_LOGGING
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
-				struct mbuf *mat;
-
-				for (mat = appendchain; mat; mat = SCTP_BUF_NEXT(mat)) {
-					if (SCTP_BUF_IS_EXTENDED(mat)) {
-						sctp_log_mb(mat, SCTP_MBUF_ICOPY);
-					}
-				}
+				sctp_log_mbc(appendchain, SCTP_MBUF_ICOPY);
 			}
 #endif
 		}
@@ -6494,7 +6569,7 @@
 	}
 }
 
-int
+static int
 sctp_med_chunk_output(struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,
     struct sctp_association *asoc,
@@ -6527,8 +6602,8 @@
 		/* TSNH */
 		return;
 	}
-	if ((ca->m) && ca->sndlen) {
-		m = SCTP_M_COPYM(ca->m, 0, M_COPYALL, M_DONTWAIT);
+	if (ca->sndlen > 0) {
+		m = SCTP_M_COPYM(ca->m, 0, M_COPYALL, M_NOWAIT);
 		if (m == NULL) {
 			/* can't copy so we are done */
 			ca->cnt_failed++;
@@ -6536,13 +6611,7 @@
 		}
 #ifdef SCTP_MBUF_LOGGING
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
-			struct mbuf *mat;
-
-			for (mat = m; mat; mat = SCTP_BUF_NEXT(mat)) {
-				if (SCTP_BUF_IS_EXTENDED(mat)) {
-					sctp_log_mb(mat, SCTP_MBUF_ICOPY);
-				}
-			}
+			sctp_log_mbc(m, SCTP_MBUF_ICOPY);
 		}
 #endif
 	} else {
@@ -6556,38 +6625,40 @@
 	}
 	if (ca->sndrcv.sinfo_flags & SCTP_ABORT) {
 		/* Abort this assoc with m as the user defined reason */
-		if (m) {
+		if (m != NULL) {
+			SCTP_BUF_PREPEND(m, sizeof(struct sctp_paramhdr), M_NOWAIT);
+		} else {
+			m = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr),
+			    0, M_NOWAIT, 1, MT_DATA);
+			SCTP_BUF_LEN(m) = sizeof(struct sctp_paramhdr);
+		}
+		if (m != NULL) {
 			struct sctp_paramhdr *ph;
 
-			SCTP_BUF_PREPEND(m, sizeof(struct sctp_paramhdr), M_DONTWAIT);
-			if (m) {
-				ph = mtod(m, struct sctp_paramhdr *);
-				ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
-				ph->param_length = htons(ca->sndlen);
-			}
-			/*
-			 * We add one here to keep the assoc from
-			 * dis-appearing on us.
-			 */
-			atomic_add_int(&stcb->asoc.refcnt, 1);
-			sctp_abort_an_association(inp, stcb, m, SCTP_SO_NOT_LOCKED);
-			/*
-			 * sctp_abort_an_association calls sctp_free_asoc()
-			 * free association will NOT free it since we
-			 * incremented the refcnt .. we do this to prevent
-			 * it being freed and things getting tricky since we
-			 * could end up (from free_asoc) calling inpcb_free
-			 * which would get a recursive lock call to the
-			 * iterator lock.. But as a consequence of that the
-			 * stcb will return to us un-locked.. since
-			 * free_asoc returns with either no TCB or the TCB
-			 * unlocked, we must relock.. to unlock in the
-			 * iterator timer :-0
-			 */
-			SCTP_TCB_LOCK(stcb);
-			atomic_add_int(&stcb->asoc.refcnt, -1);
-			goto no_chunk_output;
+			ph = mtod(m, struct sctp_paramhdr *);
+			ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
+			ph->param_length = htons(sizeof(struct sctp_paramhdr) + ca->sndlen);
 		}
+		/*
+		 * We add one here to keep the assoc from dis-appearing on
+		 * us.
+		 */
+		atomic_add_int(&stcb->asoc.refcnt, 1);
+		sctp_abort_an_association(inp, stcb, m, SCTP_SO_NOT_LOCKED);
+		/*
+		 * sctp_abort_an_association calls sctp_free_asoc() free
+		 * association will NOT free it since we incremented the
+		 * refcnt .. we do this to prevent it being freed and things
+		 * getting tricky since we could end up (from free_asoc)
+		 * calling inpcb_free which would get a recursive lock call
+		 * to the iterator lock.. But as a consequence of that the
+		 * stcb will return to us un-locked.. since free_asoc
+		 * returns with either no TCB or the TCB unlocked, we must
+		 * relock.. to unlock in the iterator timer :-0
+		 */
+		SCTP_TCB_LOCK(stcb);
+		atomic_add_int(&stcb->asoc.refcnt, -1);
+		goto no_chunk_output;
 	} else {
 		if (m) {
 			ret = sctp_msg_append(stcb, net, m,
@@ -6617,12 +6688,13 @@
 					 * only send SHUTDOWN the first time
 					 * through
 					 */
-					sctp_send_shutdown(stcb, net);
 					if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
 						SCTP_STAT_DECR_GAUGE32(sctps_currestab);
 					}
 					SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
 					SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+					sctp_stop_timers_for_shutdown(stcb);
+					sctp_send_shutdown(stcb, net);
 					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb,
 					    net);
 					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
@@ -6661,10 +6733,17 @@
 					if (TAILQ_EMPTY(&asoc->send_queue) &&
 					    TAILQ_EMPTY(&asoc->sent_queue) &&
 					    (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
+						struct mbuf *op_err;
+						char msg[SCTP_DIAG_INFO_LEN];
+
 				abort_anyway:
+						snprintf(msg, sizeof(msg),
+						    "%s:%d at %s", __FILE__, __LINE__, __func__);
+						op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+						    msg);
 						atomic_add_int(&stcb->asoc.refcnt, 1);
 						sctp_abort_an_association(stcb->sctp_ep, stcb,
-						    NULL, SCTP_SO_NOT_LOCKED);
+						    op_err, SCTP_SO_NOT_LOCKED);
 						atomic_add_int(&stcb->asoc.refcnt, -1);
 						goto no_chunk_output;
 					}
@@ -6680,14 +6759,13 @@
 
 	if ((sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY)) &&
 	    (stcb->asoc.total_flight > 0) &&
-	    (un_sent < (int)(stcb->asoc.smallest_mtu - SCTP_MIN_OVERHEAD))
-	    ) {
+	    (un_sent < (int)(stcb->asoc.smallest_mtu - SCTP_MIN_OVERHEAD))) {
 		do_chunk_output = 0;
 	}
 	if (do_chunk_output)
 		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_NOT_LOCKED);
 	else if (added_control) {
-		int num_out = 0, reason = 0, now_filled = 0;
+		int num_out, reason, now_filled = 0;
 		struct timeval now;
 		int frag_point;
 
@@ -6723,13 +6801,6 @@
 	SCTP_FREE(ca, SCTP_M_COPYAL);
 }
 
-
-#define	MC_ALIGN(m, len) do {						\
-	SCTP_BUF_RESV_UF(m, ((MCLBYTES - (len)) & ~(sizeof(long) - 1));	\
-} while (0)
-
-
-
 static struct mbuf *
 sctp_copy_out_all(struct uio *uio, int len)
 {
@@ -6736,7 +6807,7 @@
 	struct mbuf *ret, *at;
 	int left, willcpy, cancpy, error;
 
-	ret = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_WAIT, 1, MT_DATA);
+	ret = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_WAITOK, 1, MT_DATA);
 	if (ret == NULL) {
 		/* TSNH */
 		return (NULL);
@@ -6759,7 +6830,7 @@
 		SCTP_BUF_NEXT_PKT(at) = SCTP_BUF_NEXT(at) = 0;
 		left -= willcpy;
 		if (left > 0) {
-			SCTP_BUF_NEXT(at) = sctp_get_mbuf_for_msg(left, 0, M_WAIT, 1, MT_DATA);
+			SCTP_BUF_NEXT(at) = sctp_get_mbuf_for_msg(left, 0, M_WAITOK, 1, MT_DATA);
 			if (SCTP_BUF_NEXT(at) == NULL) {
 				goto err_out_now;
 			}
@@ -6810,13 +6881,10 @@
 		/* Gather the length of the send */
 		struct mbuf *mat;
 
-		mat = m;
 		ca->sndlen = 0;
-		while (m) {
-			ca->sndlen += SCTP_BUF_LEN(m);
-			m = SCTP_BUF_NEXT(m);
+		for (mat = m; mat; mat = SCTP_BUF_NEXT(mat)) {
+			ca->sndlen += SCTP_BUF_LEN(mat);
 		}
-		ca->m = mat;
 	}
 	ret = sctp_initiate_iterator(NULL, sctp_sendall_iterator, NULL,
 	    SCTP_PCB_ANY_FLAGS, SCTP_PCB_ANY_FEATURES,
@@ -7152,6 +7220,11 @@
 			}
 			atomic_subtract_int(&asoc->stream_queue_cnt, 1);
 			TAILQ_REMOVE(&strq->outqueue, sp, next);
+			if ((strq->state == SCTP_STREAM_RESET_PENDING) &&
+			    (strq->chunks_on_queues == 0) &&
+			    TAILQ_EMPTY(&strq->outqueue)) {
+				stcb->asoc.trigger_reset = 1;
+			}
 			stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, strq, sp, send_lock_up);
 			if (sp->net) {
 				sctp_free_remote_addr(sp->net);
@@ -7196,7 +7269,7 @@
 			}
 			/* Whack down the size */
 			atomic_subtract_int(&stcb->asoc.total_output_queue_size, sp->length);
-			if ((stcb->sctp_socket != NULL) && \
+			if ((stcb->sctp_socket != NULL) &&
 			    ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 			    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) {
 				atomic_subtract_int(&stcb->sctp_socket->so_snd.sb_cc, sp->length);
@@ -7215,9 +7288,6 @@
 		}
 	}
 	some_taken = sp->some_taken;
-	if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
-		sp->msg_is_complete = 1;
-	}
 re_look:
 	length = sp->length;
 	if (sp->msg_is_complete) {
@@ -7323,7 +7393,7 @@
 		struct mbuf *m;
 
 dont_do_it:
-		chk->data = SCTP_M_COPYM(sp->data, 0, to_move, M_DONTWAIT);
+		chk->data = SCTP_M_COPYM(sp->data, 0, to_move, M_NOWAIT);
 		chk->last_mbuf = NULL;
 		if (chk->data == NULL) {
 			sp->some_taken = some_taken;
@@ -7334,13 +7404,7 @@
 		}
 #ifdef SCTP_MBUF_LOGGING
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
-			struct mbuf *mat;
-
-			for (mat = chk->data; mat; mat = SCTP_BUF_NEXT(mat)) {
-				if (SCTP_BUF_IS_EXTENDED(mat)) {
-					sctp_log_mb(mat, SCTP_MBUF_ICOPY);
-				}
-			}
+			sctp_log_mbc(chk->data, SCTP_MBUF_ICOPY);
 		}
 #endif
 		/* Pull off the data */
@@ -7402,7 +7466,7 @@
 		/* Not enough room for a chunk header, get some */
 		struct mbuf *m;
 
-		m = sctp_get_mbuf_for_msg(1, 0, M_DONTWAIT, 0, MT_DATA);
+		m = sctp_get_mbuf_for_msg(1, 0, M_NOWAIT, 0, MT_DATA);
 		if (m == NULL) {
 			/*
 			 * we're in trouble here. _PREPEND below will free
@@ -7413,7 +7477,7 @@
 				SCTP_TCB_SEND_LOCK(stcb);
 				send_lock_up = 1;
 			}
-			if (chk->data == NULL) {
+			if (sp->data == NULL) {
 				/* unsteal the data */
 				sp->data = chk->data;
 				sp->tail_mbuf = chk->last_mbuf;
@@ -7439,7 +7503,7 @@
 			M_ALIGN(chk->data, 4);
 		}
 	}
-	SCTP_BUF_PREPEND(chk->data, sizeof(struct sctp_data_chunk), M_DONTWAIT);
+	SCTP_BUF_PREPEND(chk->data, sizeof(struct sctp_data_chunk), M_NOWAIT);
 	if (chk->data == NULL) {
 		/* HELP, TSNH since we assured it would not above? */
 #ifdef INVARIANTS
@@ -7461,7 +7525,11 @@
 	chk->asoc = &stcb->asoc;
 	chk->pad_inplace = 0;
 	chk->no_fr_allowed = 0;
-	chk->rec.data.stream_seq = sp->strseq;
+	chk->rec.data.stream_seq = strq->next_sequence_send;
+	if ((rcv_flags & SCTP_DATA_LAST_FRAG) &&
+	    !(rcv_flags & SCTP_DATA_UNORDERED)) {
+		strq->next_sequence_send++;
+	}
 	chk->rec.data.stream_number = sp->stream;
 	chk->rec.data.payloadtype = sp->ppid;
 	chk->rec.data.context = sp->context;
@@ -7525,22 +7593,15 @@
 		int pads;
 
 		pads = SCTP_SIZE32(chk->book_size) - chk->send_size;
-		if (sctp_pad_lastmbuf(chk->data, pads, chk->last_mbuf) == 0) {
+		lm = sctp_pad_lastmbuf(chk->data, pads, chk->last_mbuf);
+		if (lm != NULL) {
+			chk->last_mbuf = lm;
 			chk->pad_inplace = 1;
 		}
-		if ((lm = SCTP_BUF_NEXT(chk->last_mbuf)) != NULL) {
-			/* pad added an mbuf */
-			chk->last_mbuf = lm;
-		}
 		chk->send_size += pads;
 	}
-	/* We only re-set the policy if it is on */
-	if (sp->pr_sctp_on) {
-		sctp_set_prsctp_policy(sp);
+	if (PR_SCTP_ENABLED(chk->flags)) {
 		asoc->pr_sctp_cnt++;
-		chk->pr_sctp_on = 1;
-	} else {
-		chk->pr_sctp_on = 0;
 	}
 	if (sp->msg_is_complete && (sp->length == 0) && (sp->sender_all_done)) {
 		/* All done pull and kill the message */
@@ -7559,6 +7620,11 @@
 			send_lock_up = 1;
 		}
 		TAILQ_REMOVE(&strq->outqueue, sp, next);
+		if ((strq->state == SCTP_STREAM_RESET_PENDING) &&
+		    (strq->chunks_on_queues == 0) &&
+		    TAILQ_EMPTY(&strq->outqueue)) {
+			stcb->asoc.trigger_reset = 1;
+		}
 		stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, strq, sp, send_lock_up);
 		if (sp->net) {
 			sctp_free_remote_addr(sp->net);
@@ -7578,6 +7644,7 @@
 		*locked = 1;
 	}
 	asoc->chunks_on_out_queue++;
+	strq->chunks_on_queues++;
 	TAILQ_INSERT_TAIL(&asoc->send_queue, chk, sctp_next);
 	asoc->send_queue_cnt++;
 out_of:
@@ -7729,7 +7796,7 @@
 #endif
 )
 {
-	/*
+	/**
 	 * Ok this is the generic chunk service queue. we must do the
 	 * following: - Service the stream queue that is next, moving any
 	 * message (note I must get a complete message i.e. FIRST/MIDDLE and
@@ -7765,8 +7832,8 @@
 	int quit_now = 0;
 
 	*num_out = 0;
+	*reason_code = 0;
 	auth_keyid = stcb->asoc.authinfo.active_keyid;
-
 	if ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) ||
 	    (asoc->state & SCTP_STATE_SHUTDOWN_RECEIVED) ||
 	    (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR))) {
@@ -7785,7 +7852,7 @@
 #endif
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	hbflag = 0;
-	if ((control_only) || (asoc->stream_reset_outstanding))
+	if (control_only)
 		no_data_chunks = 1;
 	else
 		no_data_chunks = 0;
@@ -7972,12 +8039,12 @@
 		switch (((struct sockaddr *)&net->ro._l_addr)->sa_family) {
 #ifdef INET
 		case AF_INET:
-			mtu = net->mtu - (sizeof(struct ip) + sizeof(struct sctphdr));
+			mtu = net->mtu - SCTP_MIN_V4_OVERHEAD;
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
-			mtu = net->mtu - (sizeof(struct ip6_hdr) + sizeof(struct sctphdr));
+			mtu = net->mtu - SCTP_MIN_OVERHEAD;
 			break;
 #endif
 		default:
@@ -7999,6 +8066,7 @@
 		} else {
 			r_mtu = mtu;
 		}
+		error = 0;
 		/************************/
 		/* ASCONF transmission */
 		/************************/
@@ -8122,6 +8190,12 @@
 					 * it is used to do appropriate
 					 * source address selection.
 					 */
+					if (*now_filled == 0) {
+						(void)SCTP_GETTIME_TIMEVAL(now);
+						*now_filled = 1;
+					}
+					net->last_sent_time = *now;
+					hbflag = 0;
 					if ((error = sctp_lowlevel_chunk_output(inp, stcb, net,
 					    (struct sockaddr *)&net->ro._l_addr,
 					    outchain, auth_offset, auth,
@@ -8132,21 +8206,18 @@
 					    net->port, NULL,
 					    0, 0,
 					    so_locked))) {
+						/*
+						 * error, we could not
+						 * output
+						 */
+						SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error);
+						if (from_where == 0) {
+							SCTP_STAT_INCR(sctps_lowlevelerrusr);
+						}
 						if (error == ENOBUFS) {
 							asoc->ifp_had_enobuf = 1;
 							SCTP_STAT_INCR(sctps_lowlevelerr);
 						}
-						if (from_where == 0) {
-							SCTP_STAT_INCR(sctps_lowlevelerrusr);
-						}
-						if (*now_filled == 0) {
-							(void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
-							*now_filled = 1;
-							*now = net->last_sent_time;
-						} else {
-							net->last_sent_time = *now;
-						}
-						hbflag = 0;
 						/* error, could not output */
 						if (error == EHOSTUNREACH) {
 							/*
@@ -8157,17 +8228,10 @@
 							sctp_move_chunks_from_net(stcb, net);
 						}
 						*reason_code = 7;
-						continue;
-					} else
+						break;
+					} else {
 						asoc->ifp_had_enobuf = 0;
-					if (*now_filled == 0) {
-						(void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
-						*now_filled = 1;
-						*now = net->last_sent_time;
-					} else {
-						net->last_sent_time = *now;
 					}
-					hbflag = 0;
 					/*
 					 * increase the number we sent, if a
 					 * cookie is sent we don't tell them
@@ -8200,6 +8264,10 @@
 				}
 			}
 		}
+		if (error != 0) {
+			/* try next net */
+			continue;
+		}
 		/************************/
 		/* Control transmission */
 		/************************/
@@ -8338,7 +8406,8 @@
 						/* turn off the timer */
 						if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
 							sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
-							    inp, stcb, net, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_1);
+							    inp, stcb, net,
+							    SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_1);
 						}
 					}
 					ctl_cnt++;
@@ -8395,6 +8464,15 @@
 						sctp_timer_start(SCTP_TIMER_TYPE_COOKIE, inp, stcb, net);
 						cookie = 0;
 					}
+					/* Only HB or ASCONF advances time */
+					if (hbflag) {
+						if (*now_filled == 0) {
+							(void)SCTP_GETTIME_TIMEVAL(now);
+							*now_filled = 1;
+						}
+						net->last_sent_time = *now;
+						hbflag = 0;
+					}
 					if ((error = sctp_lowlevel_chunk_output(inp, stcb, net,
 					    (struct sockaddr *)&net->ro._l_addr,
 					    outchain,
@@ -8406,24 +8484,18 @@
 					    net->port, NULL,
 					    0, 0,
 					    so_locked))) {
+						/*
+						 * error, we could not
+						 * output
+						 */
+						SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error);
+						if (from_where == 0) {
+							SCTP_STAT_INCR(sctps_lowlevelerrusr);
+						}
 						if (error == ENOBUFS) {
 							asoc->ifp_had_enobuf = 1;
 							SCTP_STAT_INCR(sctps_lowlevelerr);
 						}
-						if (from_where == 0) {
-							SCTP_STAT_INCR(sctps_lowlevelerrusr);
-						}
-						/* error, could not output */
-						if (hbflag) {
-							if (*now_filled == 0) {
-								(void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
-								*now_filled = 1;
-								*now = net->last_sent_time;
-							} else {
-								net->last_sent_time = *now;
-							}
-							hbflag = 0;
-						}
 						if (error == EHOSTUNREACH) {
 							/*
 							 * Destination went
@@ -8433,19 +8505,9 @@
 							sctp_move_chunks_from_net(stcb, net);
 						}
 						*reason_code = 7;
-						continue;
-					} else
+						break;
+					} else {
 						asoc->ifp_had_enobuf = 0;
-					/* Only HB or ASCONF advances time */
-					if (hbflag) {
-						if (*now_filled == 0) {
-							(void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
-							*now_filled = 1;
-							*now = net->last_sent_time;
-						} else {
-							net->last_sent_time = *now;
-						}
-						hbflag = 0;
 					}
 					/*
 					 * increase the number we sent, if a
@@ -8479,6 +8541,10 @@
 				}
 			}
 		}
+		if (error != 0) {
+			/* try next net */
+			continue;
+		}
 		/* JRI: if dest is in PF state, do not send data to it */
 		if ((asoc->sctp_cmt_on_off > 0) &&
 		    (net != stcb->asoc.alternate) &&
@@ -8523,8 +8589,8 @@
 		switch (((struct sockaddr *)&net->ro._l_addr)->sa_family) {
 #ifdef INET
 		case AF_INET:
-			if (net->mtu > (sizeof(struct ip) + sizeof(struct sctphdr)))
-				omtu = net->mtu - (sizeof(struct ip) + sizeof(struct sctphdr));
+			if (net->mtu > SCTP_MIN_V4_OVERHEAD)
+				omtu = net->mtu - SCTP_MIN_V4_OVERHEAD;
 			else
 				omtu = 0;
 			break;
@@ -8531,8 +8597,8 @@
 #endif
 #ifdef INET6
 		case AF_INET6:
-			if (net->mtu > (sizeof(struct ip6_hdr) + sizeof(struct sctphdr)))
-				omtu = net->mtu - (sizeof(struct ip6_hdr) + sizeof(struct sctphdr));
+			if (net->mtu > SCTP_MIN_OVERHEAD)
+				omtu = net->mtu - SCTP_MIN_OVERHEAD;
 			else
 				omtu = 0;
 			break;
@@ -8542,7 +8608,8 @@
 			omtu = 0;
 			break;
 		}
-		if ((((asoc->state & SCTP_STATE_OPEN) == SCTP_STATE_OPEN) &&
+		if ((((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+		    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) &&
 		    (skip_data_for_this_net == 0)) ||
 		    (cookie)) {
 			TAILQ_FOREACH_SAFE(chk, &asoc->send_queue, sctp_next, nchk) {
@@ -8732,6 +8799,14 @@
 				 */
 				sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
 			}
+			if (bundle_at || hbflag) {
+				/* For data/asconf and hb set time */
+				if (*now_filled == 0) {
+					(void)SCTP_GETTIME_TIMEVAL(now);
+					*now_filled = 1;
+				}
+				net->last_sent_time = *now;
+			}
 			/* Now send it, if there is anything to send :> */
 			if ((error = sctp_lowlevel_chunk_output(inp,
 			    stcb,
@@ -8750,24 +8825,14 @@
 			    0, 0,
 			    so_locked))) {
 				/* error, we could not output */
+				SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error);
+				if (from_where == 0) {
+					SCTP_STAT_INCR(sctps_lowlevelerrusr);
+				}
 				if (error == ENOBUFS) {
 					SCTP_STAT_INCR(sctps_lowlevelerr);
 					asoc->ifp_had_enobuf = 1;
 				}
-				if (from_where == 0) {
-					SCTP_STAT_INCR(sctps_lowlevelerrusr);
-				}
-				SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error);
-				if (hbflag) {
-					if (*now_filled == 0) {
-						(void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
-						*now_filled = 1;
-						*now = net->last_sent_time;
-					} else {
-						net->last_sent_time = *now;
-					}
-					hbflag = 0;
-				}
 				if (error == EHOSTUNREACH) {
 					/*
 					 * Destination went unreachable
@@ -8791,16 +8856,6 @@
 			endoutchain = NULL;
 			auth = NULL;
 			auth_offset = 0;
-			if (bundle_at || hbflag) {
-				/* For data/asconf and hb set time */
-				if (*now_filled == 0) {
-					(void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
-					*now_filled = 1;
-					*now = net->last_sent_time;
-				} else {
-					net->last_sent_time = *now;
-				}
-			}
 			if (!no_out_cnt) {
 				*num_out += (ctl_cnt + bundle_at);
 			}
@@ -8861,9 +8916,37 @@
 	 */
 	struct sctp_chunkhdr *hdr;
 	struct sctp_tmit_chunk *chk;
-	struct mbuf *mat;
+	struct mbuf *mat, *last_mbuf;
+	uint32_t chunk_length;
+	uint16_t padding_length;
 
 	SCTP_TCB_LOCK_ASSERT(stcb);
+	SCTP_BUF_PREPEND(op_err, sizeof(struct sctp_chunkhdr), M_NOWAIT);
+	if (op_err == NULL) {
+		return;
+	}
+	last_mbuf = NULL;
+	chunk_length = 0;
+	for (mat = op_err; mat != NULL; mat = SCTP_BUF_NEXT(mat)) {
+		chunk_length += SCTP_BUF_LEN(mat);
+		if (SCTP_BUF_NEXT(mat) == NULL) {
+			last_mbuf = mat;
+		}
+	}
+	if (chunk_length > SCTP_MAX_CHUNK_LENGTH) {
+		sctp_m_freem(op_err);
+		return;
+	}
+	padding_length = chunk_length % 4;
+	if (padding_length != 0) {
+		padding_length = 4 - padding_length;
+	}
+	if (padding_length != 0) {
+		if (sctp_add_pad_tombuf(last_mbuf, padding_length) == NULL) {
+			sctp_m_freem(op_err);
+			return;
+		}
+	}
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		/* no memory */
@@ -8871,22 +8954,9 @@
 		return;
 	}
 	chk->copy_by_ref = 0;
-	SCTP_BUF_PREPEND(op_err, sizeof(struct sctp_chunkhdr), M_DONTWAIT);
-	if (op_err == NULL) {
-		sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
-		return;
-	}
-	chk->send_size = 0;
-	mat = op_err;
-	while (mat != NULL) {
-		chk->send_size += SCTP_BUF_LEN(mat);
-		mat = SCTP_BUF_NEXT(mat);
-	}
-	chk->rec.chunk_id.id = SCTP_OPERATION_ERROR;
-	chk->rec.chunk_id.can_take_data = 1;
+	chk->send_size = (uint16_t) chunk_length;
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
-	chk->flags = 0;
 	chk->asoc = &stcb->asoc;
 	chk->data = op_err;
 	chk->whoTo = NULL;
@@ -8894,9 +8964,7 @@
 	hdr->chunk_type = SCTP_OPERATION_ERROR;
 	hdr->chunk_flags = 0;
 	hdr->chunk_length = htons(chk->send_size);
-	TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue,
-	    chk,
-	    sctp_next);
+	TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
 	chk->asoc->ctrl_queue_cnt++;
 }
 
@@ -8917,12 +8985,11 @@
 	struct sctp_tmit_chunk *chk;
 	uint16_t ptype, plen;
 
+	SCTP_TCB_LOCK_ASSERT(stcb);
 	/* First find the cookie in the param area */
 	cookie = NULL;
 	at = offset + sizeof(struct sctp_init_chunk);
-
-	SCTP_TCB_LOCK_ASSERT(stcb);
-	do {
+	for (;;) {
 		phdr = sctp_get_next_param(m, at, &parm, sizeof(parm));
 		if (phdr == NULL) {
 			return (-3);
@@ -8936,7 +9003,7 @@
 			if ((pad = (plen % 4))) {
 				plen += 4 - pad;
 			}
-			cookie = SCTP_M_COPYM(m, at, plen, M_DONTWAIT);
+			cookie = SCTP_M_COPYM(m, at, plen, M_NOWAIT);
 			if (cookie == NULL) {
 				/* No memory */
 				return (-2);
@@ -8943,25 +9010,14 @@
 			}
 #ifdef SCTP_MBUF_LOGGING
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
-				struct mbuf *mat;
-
-				for (mat = cookie; mat; mat = SCTP_BUF_NEXT(mat)) {
-					if (SCTP_BUF_IS_EXTENDED(mat)) {
-						sctp_log_mb(mat, SCTP_MBUF_ICOPY);
-					}
-				}
+				sctp_log_mbc(cookie, SCTP_MBUF_ICOPY);
 			}
 #endif
 			break;
 		}
 		at += SCTP_SIZE32(plen);
-	} while (phdr);
-	if (cookie == NULL) {
-		/* Did not find the cookie */
-		return (-3);
 	}
 	/* ok, we got the cookie lets change it into a cookie echo chunk */
-
 	/* first the change from param to cookie */
 	hdr = mtod(cookie, struct sctp_chunkhdr *);
 	hdr->chunk_type = SCTP_COOKIE_ECHO;
@@ -8974,12 +9030,12 @@
 		return (-5);
 	}
 	chk->copy_by_ref = 0;
-	chk->send_size = plen;
 	chk->rec.chunk_id.id = SCTP_COOKIE_ECHO;
 	chk->rec.chunk_id.can_take_data = 0;
+	chk->flags = CHUNK_FLAGS_FRAGMENT_OK;
+	chk->send_size = plen;
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
-	chk->flags = CHUNK_FLAGS_FRAGMENT_OK;
 	chk->asoc = &stcb->asoc;
 	chk->data = cookie;
 	chk->whoTo = net;
@@ -9008,7 +9064,7 @@
 		/* must have a net pointer */
 		return;
 
-	outchain = SCTP_M_COPYM(m, offset, chk_length, M_DONTWAIT);
+	outchain = SCTP_M_COPYM(m, offset, chk_length, M_NOWAIT);
 	if (outchain == NULL) {
 		/* gak out of memory */
 		return;
@@ -9015,13 +9071,7 @@
 	}
 #ifdef SCTP_MBUF_LOGGING
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
-		struct mbuf *mat;
-
-		for (mat = outchain; mat; mat = SCTP_BUF_NEXT(mat)) {
-			if (SCTP_BUF_IS_EXTENDED(mat)) {
-				sctp_log_mb(mat, SCTP_MBUF_ICOPY);
-			}
-		}
+		sctp_log_mbc(outchain, SCTP_MBUF_ICOPY);
 	}
 #endif
 	chdr = mtod(outchain, struct sctp_chunkhdr *);
@@ -9042,12 +9092,12 @@
 		return;
 	}
 	chk->copy_by_ref = 0;
-	chk->send_size = chk_length;
 	chk->rec.chunk_id.id = SCTP_HEARTBEAT_ACK;
 	chk->rec.chunk_id.can_take_data = 1;
+	chk->flags = 0;
+	chk->send_size = chk_length;
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
-	chk->flags = 0;
 	chk->asoc = &stcb->asoc;
 	chk->data = outchain;
 	chk->whoTo = net;
@@ -9064,10 +9114,9 @@
 	struct sctp_chunkhdr *hdr;
 	struct sctp_tmit_chunk *chk;
 
-	cookie_ack = NULL;
 	SCTP_TCB_LOCK_ASSERT(stcb);
 
-	cookie_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_chunkhdr), 0, M_DONTWAIT, 1, MT_HEADER);
+	cookie_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_chunkhdr), 0, M_NOWAIT, 1, MT_HEADER);
 	if (cookie_ack == NULL) {
 		/* no mbuf's */
 		return;
@@ -9080,12 +9129,12 @@
 		return;
 	}
 	chk->copy_by_ref = 0;
-	chk->send_size = sizeof(struct sctp_chunkhdr);
 	chk->rec.chunk_id.id = SCTP_COOKIE_ACK;
 	chk->rec.chunk_id.can_take_data = 1;
+	chk->flags = 0;
+	chk->send_size = sizeof(struct sctp_chunkhdr);
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
-	chk->flags = 0;
 	chk->asoc = &stcb->asoc;
 	chk->data = cookie_ack;
 	if (chk->asoc->last_control_chunk_from != NULL) {
@@ -9113,7 +9162,7 @@
 	struct sctp_shutdown_ack_chunk *ack_cp;
 	struct sctp_tmit_chunk *chk;
 
-	m_shutdown_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_ack_chunk), 0, M_DONTWAIT, 1, MT_HEADER);
+	m_shutdown_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_ack_chunk), 0, M_NOWAIT, 1, MT_HEADER);
 	if (m_shutdown_ack == NULL) {
 		/* no mbuf's */
 		return;
@@ -9126,9 +9175,10 @@
 		return;
 	}
 	chk->copy_by_ref = 0;
-	chk->send_size = sizeof(struct sctp_chunkhdr);
 	chk->rec.chunk_id.id = SCTP_SHUTDOWN_ACK;
 	chk->rec.chunk_id.can_take_data = 1;
+	chk->flags = 0;
+	chk->send_size = sizeof(struct sctp_chunkhdr);
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	chk->flags = 0;
@@ -9156,7 +9206,7 @@
 	struct sctp_shutdown_chunk *shutdown_cp;
 	struct sctp_tmit_chunk *chk;
 
-	m_shutdown = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_chunk), 0, M_DONTWAIT, 1, MT_HEADER);
+	m_shutdown = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_chunk), 0, M_NOWAIT, 1, MT_HEADER);
 	if (m_shutdown == NULL) {
 		/* no mbuf's */
 		return;
@@ -9169,9 +9219,10 @@
 		return;
 	}
 	chk->copy_by_ref = 0;
-	chk->send_size = sizeof(struct sctp_shutdown_chunk);
 	chk->rec.chunk_id.id = SCTP_SHUTDOWN;
 	chk->rec.chunk_id.can_take_data = 1;
+	chk->flags = 0;
+	chk->send_size = sizeof(struct sctp_shutdown_chunk);
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	chk->flags = 0;
@@ -9222,13 +9273,13 @@
 		return;
 	}
 	chk->copy_by_ref = 0;
+	chk->rec.chunk_id.id = SCTP_ASCONF;
+	chk->rec.chunk_id.can_take_data = 0;
+	chk->flags = CHUNK_FLAGS_FRAGMENT_OK;
 	chk->data = m_asconf;
 	chk->send_size = len;
-	chk->rec.chunk_id.id = SCTP_ASCONF;
-	chk->rec.chunk_id.can_take_data = 0;
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
-	chk->flags = CHUNK_FLAGS_FRAGMENT_OK;
 	chk->asoc = &stcb->asoc;
 	chk->whoTo = net;
 	if (chk->whoTo) {
@@ -9292,7 +9343,7 @@
 			continue;
 		}
 		/* copy the asconf_ack */
-		m_ack = SCTP_M_COPYM(ack->data, 0, M_COPYALL, M_DONTWAIT);
+		m_ack = SCTP_M_COPYM(ack->data, 0, M_COPYALL, M_NOWAIT);
 		if (m_ack == NULL) {
 			/* couldn't copy it */
 			return;
@@ -9299,13 +9350,7 @@
 		}
 #ifdef SCTP_MBUF_LOGGING
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
-			struct mbuf *mat;
-
-			for (mat = m_ack; mat; mat = SCTP_BUF_NEXT(mat)) {
-				if (SCTP_BUF_IS_EXTENDED(mat)) {
-					sctp_log_mb(mat, SCTP_MBUF_ICOPY);
-				}
-			}
+			sctp_log_mbc(m_ack, SCTP_MBUF_ICOPY);
 		}
 #endif
 
@@ -9317,20 +9362,17 @@
 			return;
 		}
 		chk->copy_by_ref = 0;
-
+		chk->rec.chunk_id.id = SCTP_ASCONF_ACK;
+		chk->rec.chunk_id.can_take_data = 1;
+		chk->flags = CHUNK_FLAGS_FRAGMENT_OK;
 		chk->whoTo = net;
 		if (chk->whoTo) {
 			atomic_add_int(&chk->whoTo->ref_count, 1);
 		}
 		chk->data = m_ack;
-		chk->send_size = 0;
-		/* Get size */
 		chk->send_size = ack->len;
-		chk->rec.chunk_id.id = SCTP_ASCONF_ACK;
-		chk->rec.chunk_id.can_take_data = 1;
 		chk->sent = SCTP_DATAGRAM_UNSENT;
 		chk->snd_count = 0;
-		chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;	/* XXX */
 		chk->asoc = &stcb->asoc;
 
 		TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
@@ -9503,12 +9545,16 @@
 		}
 		if ((SCTP_BASE_SYSCTL(sctp_max_retran_chunk)) &&
 		    (chk->snd_count >= SCTP_BASE_SYSCTL(sctp_max_retran_chunk))) {
-			/* Gak, we have exceeded max unlucky retran, abort! */
-			SCTP_PRINTF("Gak, chk->snd_count:%d >= max:%d - send abort\n",
-			    chk->snd_count,
-			    SCTP_BASE_SYSCTL(sctp_max_retran_chunk));
+			struct mbuf *op_err;
+			char msg[SCTP_DIAG_INFO_LEN];
+
+			snprintf(msg, sizeof(msg), "TSN %8.8x retransmitted %d times, giving up",
+			    chk->rec.data.TSN_seq, chk->snd_count);
+			op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+			    msg);
 			atomic_add_int(&stcb->asoc.refcnt, 1);
-			sctp_abort_an_association(stcb->sctp_ep, stcb, NULL, so_locked);
+			sctp_abort_an_association(stcb->sctp_ep, stcb, op_err,
+			    so_locked);
 			SCTP_TCB_LOCK(stcb);
 			atomic_subtract_int(&stcb->asoc.refcnt, 1);
 			return (SCTP_RETRAN_EXIT);
@@ -9822,7 +9868,7 @@
 						 * t3-expiring.
 						 */
 						sctp_timer_stop(SCTP_TIMER_TYPE_SEND, inp, stcb, net,
-						    SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_4);
+						    SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_2);
 						sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
 					}
 				}
@@ -9902,7 +9948,7 @@
 	 */
 	struct sctp_association *asoc;
 	struct sctp_nets *net;
-	int error = 0, num_out = 0, tot_out = 0, ret = 0, reason_code = 0;
+	int error = 0, num_out, tot_out = 0, ret = 0, reason_code;
 	unsigned int burst_cnt = 0;
 	struct timeval now;
 	int now_filled = 0;
@@ -9913,6 +9959,7 @@
 	unsigned int tot_frs = 0;
 
 	asoc = &stcb->asoc;
+do_it_again:
 	/* The Nagle algorithm is only applied when handling a send call. */
 	if (from_where == SCTP_OUTPUT_FROM_USR_SEND) {
 		if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NODELAY)) {
@@ -9930,7 +9977,8 @@
 	if ((un_sent <= 0) &&
 	    (TAILQ_EMPTY(&asoc->control_send_queue)) &&
 	    (TAILQ_EMPTY(&asoc->asconf_send_queue)) &&
-	    (asoc->sent_queue_retran_cnt == 0)) {
+	    (asoc->sent_queue_retran_cnt == 0) &&
+	    (asoc->trigger_reset == 0)) {
 		/* Nothing to do unless there is something to be sent left */
 		return;
 	}
@@ -10149,6 +10197,12 @@
 	 */
 	if (stcb->asoc.ecn_echo_cnt_onq)
 		sctp_fix_ecn_echo(asoc);
+
+	if (stcb->asoc.trigger_reset) {
+		if (sctp_send_stream_reset_out_if_possible(stcb, so_locked) == 0) {
+			goto do_it_again;
+		}
+	}
 	return;
 }
 
@@ -10210,9 +10264,10 @@
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_FORWARD_CUM_TSN;
 	chk->rec.chunk_id.can_take_data = 0;
+	chk->flags = 0;
 	chk->asoc = asoc;
 	chk->whoTo = NULL;
-	chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+	chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
 	if (chk->data == NULL) {
 		sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
 		return;
@@ -10236,7 +10291,8 @@
 		unsigned int cnt_of_skipped = 0;
 
 		TAILQ_FOREACH(at, &asoc->sent_queue, sctp_next) {
-			if (at->sent != SCTP_FORWARD_TSN_SKIP) {
+			if ((at->sent != SCTP_FORWARD_TSN_SKIP) &&
+			    (at->sent != SCTP_DATAGRAM_NR_ACKED)) {
 				/* no more to look at */
 				break;
 			}
@@ -10390,8 +10446,7 @@
 	uint8_t type;
 	uint8_t tsn_map;
 
-	if ((stcb->asoc.sctp_nr_sack_on_off == 1) &&
-	    (stcb->asoc.peer_supports_nr_sack == 1)) {
+	if (stcb->asoc.nrsack_supported == 1) {
 		type = SCTP_NR_SELECTIVE_ACK;
 	} else {
 		type = SCTP_SELECTIVE_ACK;
@@ -10428,7 +10483,8 @@
 			/* No memory so we drop the idea, and set a timer */
 			if (stcb->asoc.delayed_ack) {
 				sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
-				    stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_5);
+				    stcb->sctp_ep, stcb, NULL,
+				    SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_3);
 				sctp_timer_start(SCTP_TIMER_TYPE_RECV,
 				    stcb->sctp_ep, stcb, NULL);
 			} else {
@@ -10443,6 +10499,7 @@
 	/* Clear our pkt counts */
 	asoc->data_pkts_seen = 0;
 
+	a_chk->flags = 0;
 	a_chk->asoc = asoc;
 	a_chk->snd_count = 0;
 	a_chk->send_size = 0;	/* fill in later */
@@ -10449,32 +10506,17 @@
 	a_chk->sent = SCTP_DATAGRAM_UNSENT;
 	a_chk->whoTo = NULL;
 
-	if ((asoc->numduptsns) ||
-	    (!(asoc->last_data_chunk_from->dest_state & SCTP_ADDR_REACHABLE))) {
+	if (!(asoc->last_data_chunk_from->dest_state & SCTP_ADDR_REACHABLE)) {
 		/*-
-		 * Ok, we have some duplicates or the destination for the
-		 * sack is unreachable, lets see if we can select an
-		 * alternate than asoc->last_data_chunk_from
+		 * Ok, the destination for the SACK is unreachable, lets see if
+		 * we can select an alternate to asoc->last_data_chunk_from
 		 */
-		if ((asoc->last_data_chunk_from->dest_state & SCTP_ADDR_REACHABLE) &&
-		    (asoc->used_alt_onsack > asoc->numnets)) {
-			/* We used an alt last time, don't this time */
-			a_chk->whoTo = NULL;
-		} else {
-			asoc->used_alt_onsack++;
-			a_chk->whoTo = sctp_find_alternate_net(stcb, asoc->last_data_chunk_from, 0);
-		}
+		a_chk->whoTo = sctp_find_alternate_net(stcb, asoc->last_data_chunk_from, 0);
 		if (a_chk->whoTo == NULL) {
 			/* Nope, no alternate */
 			a_chk->whoTo = asoc->last_data_chunk_from;
-			asoc->used_alt_onsack = 0;
 		}
 	} else {
-		/*
-		 * No duplicates so we use the last place we received data
-		 * from.
-		 */
-		asoc->used_alt_onsack = 0;
 		a_chk->whoTo = asoc->last_data_chunk_from;
 	}
 	if (a_chk->whoTo) {
@@ -10497,7 +10539,7 @@
 		space_req = MCLBYTES;
 	}
 	/* Ok now lets formulate a MBUF with our sack */
-	a_chk->data = sctp_get_mbuf_for_msg(space_req, 0, M_DONTWAIT, 1, MT_DATA);
+	a_chk->data = sctp_get_mbuf_for_msg(space_req, 0, M_NOWAIT, 1, MT_DATA);
 	if ((a_chk->data == NULL) ||
 	    (a_chk->whoTo == NULL)) {
 		/* rats, no mbuf memory */
@@ -10510,7 +10552,8 @@
 		/* sa_ignore NO_NULL_CHK */
 		if (stcb->asoc.delayed_ack) {
 			sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
-			    stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_6);
+			    stcb->sctp_ep, stcb, NULL,
+			    SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_4);
 			sctp_timer_start(SCTP_TIMER_TYPE_RECV,
 			    stcb->sctp_ep, stcb, NULL);
 		} else {
@@ -10589,7 +10632,7 @@
 				 * Clear all bits corresponding to TSNs
 				 * smaller or equal to the cumulative TSN.
 				 */
-				tsn_map &= (~0 << (1 - offset));
+				tsn_map &= (~0U << (1 - offset));
 			}
 			selector = &sack_array[tsn_map];
 			if (mergeable && selector->right_edge) {
@@ -10664,7 +10707,7 @@
 					 * TSNs smaller or equal to the
 					 * cumulative TSN.
 					 */
-					tsn_map &= (~0 << (1 - offset));
+					tsn_map &= (~0U << (1 - offset));
 				}
 				selector = &sack_array[tsn_map];
 				if (mergeable && selector->right_edge) {
@@ -10775,14 +10818,16 @@
 #endif
 )
 {
-	struct mbuf *m_abort;
-	struct mbuf *m_out = NULL, *m_end = NULL;
-	struct sctp_abort_chunk *abort = NULL;
-	int sz;
-	uint32_t auth_offset = 0;
+	struct mbuf *m_abort, *m, *m_last;
+	struct mbuf *m_out, *m_end = NULL;
+	struct sctp_abort_chunk *abort;
 	struct sctp_auth_chunk *auth = NULL;
 	struct sctp_nets *net;
+	uint32_t vtag;
+	uint32_t auth_offset = 0;
+	uint16_t cause_len, chunk_len, padding_len;
 
+	SCTP_TCB_LOCK_ASSERT(stcb);
 	/*-
 	 * Add an AUTH chunk, if chunk requires it and save the offset into
 	 * the chain for AUTH
@@ -10789,31 +10834,35 @@
 	 */
 	if (sctp_auth_is_required_chunk(SCTP_ABORT_ASSOCIATION,
 	    stcb->asoc.peer_auth_chunks)) {
-		m_out = sctp_add_auth_chunk(m_out, &m_end, &auth, &auth_offset,
+		m_out = sctp_add_auth_chunk(NULL, &m_end, &auth, &auth_offset,
 		    stcb, SCTP_ABORT_ASSOCIATION);
 		SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+	} else {
+		m_out = NULL;
 	}
-	SCTP_TCB_LOCK_ASSERT(stcb);
-	m_abort = sctp_get_mbuf_for_msg(sizeof(struct sctp_abort_chunk), 0, M_DONTWAIT, 1, MT_HEADER);
+	m_abort = sctp_get_mbuf_for_msg(sizeof(struct sctp_abort_chunk), 0, M_NOWAIT, 1, MT_HEADER);
 	if (m_abort == NULL) {
-		/* no mbuf's */
-		if (m_out)
+		if (m_out) {
 			sctp_m_freem(m_out);
+		}
+		if (operr) {
+			sctp_m_freem(operr);
+		}
 		return;
 	}
 	/* link in any error */
 	SCTP_BUF_NEXT(m_abort) = operr;
-	sz = 0;
-	if (operr) {
-		struct mbuf *n;
-
-		n = operr;
-		while (n) {
-			sz += SCTP_BUF_LEN(n);
-			n = SCTP_BUF_NEXT(n);
+	cause_len = 0;
+	m_last = NULL;
+	for (m = operr; m; m = SCTP_BUF_NEXT(m)) {
+		cause_len += (uint16_t) SCTP_BUF_LEN(m);
+		if (SCTP_BUF_NEXT(m) == NULL) {
+			m_last = m;
 		}
 	}
-	SCTP_BUF_LEN(m_abort) = sizeof(*abort);
+	SCTP_BUF_LEN(m_abort) = sizeof(struct sctp_abort_chunk);
+	chunk_len = (uint16_t) sizeof(struct sctp_abort_chunk) + cause_len;
+	padding_len = SCTP_SIZE32(chunk_len) - chunk_len;
 	if (m_out == NULL) {
 		/* NO Auth chunk prepended, so reserve space in front */
 		SCTP_BUF_RESV_UF(m_abort, SCTP_MIN_OVERHEAD);
@@ -10827,16 +10876,30 @@
 	} else {
 		net = stcb->asoc.primary_destination;
 	}
-	/* fill in the ABORT chunk */
+	/* Fill in the ABORT chunk header. */
 	abort = mtod(m_abort, struct sctp_abort_chunk *);
 	abort->ch.chunk_type = SCTP_ABORT_ASSOCIATION;
-	abort->ch.chunk_flags = 0;
-	abort->ch.chunk_length = htons(sizeof(*abort) + sz);
-
+	if (stcb->asoc.peer_vtag == 0) {
+		/* This happens iff the assoc is in COOKIE-WAIT state. */
+		vtag = stcb->asoc.my_vtag;
+		abort->ch.chunk_flags = SCTP_HAD_NO_TCB;
+	} else {
+		vtag = stcb->asoc.peer_vtag;
+		abort->ch.chunk_flags = 0;
+	}
+	abort->ch.chunk_length = htons(chunk_len);
+	/* Add padding, if necessary. */
+	if (padding_len > 0) {
+		if ((m_last == NULL) ||
+		    (sctp_add_pad_tombuf(m_last, padding_len) == NULL)) {
+			sctp_m_freem(m_out);
+			return;
+		}
+	}
 	(void)sctp_lowlevel_chunk_output(stcb->sctp_ep, stcb, net,
 	    (struct sockaddr *)&net->ro._l_addr,
 	    m_out, auth_offset, auth, stcb->asoc.authinfo.active_keyid, 1, 0, 0,
-	    stcb->sctp_ep->sctp_lport, stcb->rport, htonl(stcb->asoc.peer_vtag),
+	    stcb->sctp_ep->sctp_lport, stcb->rport, htonl(vtag),
 	    stcb->asoc.primary_destination->port, NULL,
 	    0, 0,
 	    so_locked);
@@ -10854,7 +10917,7 @@
 	uint32_t vtag;
 	uint8_t flags;
 
-	m_shutdown_comp = sctp_get_mbuf_for_msg(sizeof(struct sctp_chunkhdr), 0, M_DONTWAIT, 1, MT_HEADER);
+	m_shutdown_comp = sctp_get_mbuf_for_msg(sizeof(struct sctp_chunkhdr), 0, M_NOWAIT, 1, MT_HEADER);
 	if (m_shutdown_comp == NULL) {
 		/* no mbuf's */
 		return;
@@ -10884,9 +10947,10 @@
 }
 
 static void
-sctp_send_resp_msg(struct mbuf *m, struct sctphdr *sh, uint32_t vtag,
+sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst,
+    struct sctphdr *sh, uint32_t vtag,
     uint8_t type, struct mbuf *cause,
-    uint8_t use_mflowid, uint32_t mflowid,
+    uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum,
     uint32_t vrf_id, uint16_t port)
 {
 	struct mbuf *o_pak;
@@ -10893,16 +10957,22 @@
 	struct mbuf *mout;
 	struct sctphdr *shout;
 	struct sctp_chunkhdr *ch;
-	struct ip *iph;
+
+#if defined(INET) || defined(INET6)
 	struct udphdr *udp;
-	int len, cause_len, padding_len, ret;
+	int ret;
 
+#endif
+	int len, cause_len, padding_len;
+
 #ifdef INET
-	struct ip *iph_out;
+	struct sockaddr_in *src_sin, *dst_sin;
+	struct ip *ip;
 
 #endif
 #ifdef INET6
-	struct ip6_hdr *ip6, *ip6_out;
+	struct sockaddr_in6 *src_sin6, *dst_sin6;
+	struct ip6_hdr *ip6;
 
 #endif
 
@@ -10921,7 +10991,7 @@
 			padding_len = 4 - padding_len;
 		}
 		if (padding_len != 0) {
-			if (sctp_add_pad_tombuf(m_last, padding_len)) {
+			if (sctp_add_pad_tombuf(m_last, padding_len) == NULL) {
 				sctp_m_freem(cause);
 				return;
 			}
@@ -10931,15 +11001,14 @@
 	}
 	/* Get an mbuf for the header. */
 	len = sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
-	iph = mtod(m, struct ip *);
-	switch (iph->ip_v) {
+	switch (dst->sa_family) {
 #ifdef INET
-	case IPVERSION:
+	case AF_INET:
 		len += sizeof(struct ip);
 		break;
 #endif
 #ifdef INET6
-	case IPV6_VERSION >> 4:
+	case AF_INET6:
 		len += sizeof(struct ip6_hdr);
 		break;
 #endif
@@ -10946,10 +11015,12 @@
 	default:
 		break;
 	}
+#if defined(INET) || defined(INET6)
 	if (port) {
 		len += sizeof(struct udphdr);
 	}
-	mout = sctp_get_mbuf_for_msg(len + max_linkhdr, 1, M_DONTWAIT, 1, MT_DATA);
+#endif
+	mout = sctp_get_mbuf_for_msg(len + max_linkhdr, 1, M_NOWAIT, 1, MT_DATA);
 	if (mout == NULL) {
 		if (cause) {
 			sctp_m_freem(cause);
@@ -10959,56 +11030,58 @@
 	SCTP_BUF_RESV_UF(mout, max_linkhdr);
 	SCTP_BUF_LEN(mout) = len;
 	SCTP_BUF_NEXT(mout) = cause;
-	if (use_mflowid != 0) {
-		mout->m_pkthdr.flowid = mflowid;
-		mout->m_flags |= M_FLOWID;
-	}
+	M_SETFIB(mout, fibnum);
+	mout->m_pkthdr.flowid = mflowid;
+	M_HASHTYPE_SET(mout, mflowtype);
 #ifdef INET
-	iph_out = NULL;
+	ip = NULL;
 #endif
 #ifdef INET6
-	ip6_out = NULL;
+	ip6 = NULL;
 #endif
-	switch (iph->ip_v) {
+	switch (dst->sa_family) {
 #ifdef INET
-	case IPVERSION:
-		iph_out = mtod(mout, struct ip *);
-		iph_out->ip_v = IPVERSION;
-		iph_out->ip_hl = (sizeof(struct ip) >> 2);
-		iph_out->ip_tos = 0;
-		iph_out->ip_id = ip_newid();
-		iph_out->ip_off = 0;
-		iph_out->ip_ttl = MODULE_GLOBAL(ip_defttl);
+	case AF_INET:
+		src_sin = (struct sockaddr_in *)src;
+		dst_sin = (struct sockaddr_in *)dst;
+		ip = mtod(mout, struct ip *);
+		ip->ip_v = IPVERSION;
+		ip->ip_hl = (sizeof(struct ip) >> 2);
+		ip->ip_tos = 0;
+		ip->ip_id = ip_newid();
+		ip->ip_off = 0;
+		ip->ip_ttl = MODULE_GLOBAL(ip_defttl);
 		if (port) {
-			iph_out->ip_p = IPPROTO_UDP;
+			ip->ip_p = IPPROTO_UDP;
 		} else {
-			iph_out->ip_p = IPPROTO_SCTP;
+			ip->ip_p = IPPROTO_SCTP;
 		}
-		iph_out->ip_src.s_addr = iph->ip_dst.s_addr;
-		iph_out->ip_dst.s_addr = iph->ip_src.s_addr;
-		iph_out->ip_sum = 0;
+		ip->ip_src.s_addr = dst_sin->sin_addr.s_addr;
+		ip->ip_dst.s_addr = src_sin->sin_addr.s_addr;
+		ip->ip_sum = 0;
 		len = sizeof(struct ip);
-		shout = (struct sctphdr *)((caddr_t)iph_out + len);
+		shout = (struct sctphdr *)((caddr_t)ip + len);
 		break;
 #endif
 #ifdef INET6
-	case IPV6_VERSION >> 4:
-		ip6 = (struct ip6_hdr *)iph;
-		ip6_out = mtod(mout, struct ip6_hdr *);
-		ip6_out->ip6_flow = htonl(0x60000000);
+	case AF_INET6:
+		src_sin6 = (struct sockaddr_in6 *)src;
+		dst_sin6 = (struct sockaddr_in6 *)dst;
+		ip6 = mtod(mout, struct ip6_hdr *);
+		ip6->ip6_flow = htonl(0x60000000);
 		if (V_ip6_auto_flowlabel) {
-			ip6_out->ip6_flow |= (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
+			ip6->ip6_flow |= (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
 		}
-		ip6_out->ip6_hlim = MODULE_GLOBAL(ip6_defhlim);
+		ip6->ip6_hlim = MODULE_GLOBAL(ip6_defhlim);
 		if (port) {
-			ip6_out->ip6_nxt = IPPROTO_UDP;
+			ip6->ip6_nxt = IPPROTO_UDP;
 		} else {
-			ip6_out->ip6_nxt = IPPROTO_SCTP;
+			ip6->ip6_nxt = IPPROTO_SCTP;
 		}
-		ip6_out->ip6_src = ip6->ip6_dst;
-		ip6_out->ip6_dst = ip6->ip6_src;
+		ip6->ip6_src = dst_sin6->sin6_addr;
+		ip6->ip6_dst = src_sin6->sin6_addr;
 		len = sizeof(struct ip6_hdr);
-		shout = (struct sctphdr *)((caddr_t)ip6_out + len);
+		shout = (struct sctphdr *)((caddr_t)ip6 + len);
 		break;
 #endif
 	default:
@@ -11016,6 +11089,7 @@
 		shout = mtod(mout, struct sctphdr *);
 		break;
 	}
+#if defined(INET) || defined(INET6)
 	if (port) {
 		if (htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)) == 0) {
 			sctp_m_freem(mout);
@@ -11034,6 +11108,7 @@
 	} else {
 		udp = NULL;
 	}
+#endif
 	shout->src_port = sh->dest_port;
 	shout->dest_port = sh->src_port;
 	shout->checksum = 0;
@@ -11059,21 +11134,17 @@
 		return;
 	}
 	SCTP_ATTACH_CHAIN(o_pak, mout, len);
+	switch (dst->sa_family) {
 #ifdef INET
-	if (iph_out != NULL) {
+	case AF_INET:
 		if (port) {
 			if (V_udp_cksum) {
-				udp->uh_sum = in_pseudo(iph_out->ip_src.s_addr, iph_out->ip_dst.s_addr, udp->uh_ulen + htons(IPPROTO_UDP));
+				udp->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, udp->uh_ulen + htons(IPPROTO_UDP));
 			} else {
 				udp->uh_sum = 0;
 			}
 		}
-		iph_out->ip_len = len;
-#ifdef SCTP_PACKET_LOGGING
-		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) {
-			sctp_packet_log(mout, len);
-		}
-#endif
+		ip->ip_len = htons(len);
 		if (port) {
 #if defined(SCTP_WITH_NO_CSUM)
 			SCTP_STAT_INCR(sctps_sendnocrc);
@@ -11089,21 +11160,21 @@
 			SCTP_STAT_INCR(sctps_sendnocrc);
 #else
 			mout->m_pkthdr.csum_flags = CSUM_SCTP;
-			mout->m_pkthdr.csum_data = 0;
+			mout->m_pkthdr.csum_data = offsetof(struct sctphdr, checksum);
 			SCTP_STAT_INCR(sctps_sendhwcrc);
 #endif
 		}
+#ifdef SCTP_PACKET_LOGGING
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) {
+			sctp_packet_log(o_pak);
+		}
+#endif
 		SCTP_IP_OUTPUT(ret, o_pak, NULL, NULL, vrf_id);
-	}
+		break;
 #endif
 #ifdef INET6
-	if (ip6_out != NULL) {
-		ip6_out->ip6_plen = len - sizeof(struct ip6_hdr);
-#ifdef  SCTP_PACKET_LOGGING
-		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) {
-			sctp_packet_log(mout, len);
-		}
-#endif
+	case AF_INET6:
+		ip6->ip6_plen = len - sizeof(struct ip6_hdr);
 		if (port) {
 #if defined(SCTP_WITH_NO_CSUM)
 			SCTP_STAT_INCR(sctps_sendnocrc);
@@ -11119,13 +11190,25 @@
 			SCTP_STAT_INCR(sctps_sendnocrc);
 #else
 			mout->m_pkthdr.csum_flags = CSUM_SCTP_IPV6;
-			mout->m_pkthdr.csum_data = 0;
+			mout->m_pkthdr.csum_data = offsetof(struct sctphdr, checksum);
 			SCTP_STAT_INCR(sctps_sendhwcrc);
 #endif
 		}
+#ifdef SCTP_PACKET_LOGGING
+		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) {
+			sctp_packet_log(o_pak);
+		}
+#endif
 		SCTP_IP6_OUTPUT(ret, o_pak, NULL, NULL, NULL, vrf_id);
+		break;
+#endif
+	default:
+		SCTPDBG(SCTP_DEBUG_OUTPUT1, "Unknown protocol (TSNH) type %d\n",
+		    dst->sa_family);
+		sctp_m_freem(mout);
+		SCTP_LTRACE_ERR_RET_PKT(mout, NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EFAULT);
+		return;
 	}
-#endif
 	SCTP_STAT_INCR(sctps_sendpackets);
 	SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
 	SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
@@ -11133,12 +11216,13 @@
 }
 
 void
-sctp_send_shutdown_complete2(struct mbuf *m, struct sctphdr *sh,
-    uint8_t use_mflowid, uint32_t mflowid,
+sctp_send_shutdown_complete2(struct sockaddr *src, struct sockaddr *dst,
+    struct sctphdr *sh,
+    uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum,
     uint32_t vrf_id, uint16_t port)
 {
-	sctp_send_resp_msg(m, sh, 0, SCTP_SHUTDOWN_COMPLETE, NULL,
-	    use_mflowid, mflowid,
+	sctp_send_resp_msg(src, dst, sh, 0, SCTP_SHUTDOWN_COMPLETE, NULL,
+	    mflowtype, mflowid, fibnum,
 	    vrf_id, port);
 }
 
@@ -11178,10 +11262,11 @@
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_HEARTBEAT_REQUEST;
 	chk->rec.chunk_id.can_take_data = 1;
+	chk->flags = 0;
 	chk->asoc = &stcb->asoc;
 	chk->send_size = sizeof(struct sctp_heartbeat_chunk);
 
-	chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_DONTWAIT, 1, MT_HEADER);
+	chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_NOWAIT, 1, MT_HEADER);
 	if (chk->data == NULL) {
 		sctp_free_a_chunk(stcb, chk, so_locked);
 		return;
@@ -11205,7 +11290,7 @@
 	hb->heartbeat.hb_info.time_value_1 = now.tv_sec;
 	hb->heartbeat.hb_info.time_value_2 = now.tv_usec;
 	/* Did our user request this one, put it in */
-	hb->heartbeat.hb_info.addr_family = net->ro._l_addr.sa.sa_family;
+	hb->heartbeat.hb_info.addr_family = (uint8_t) net->ro._l_addr.sa.sa_family;
 	hb->heartbeat.hb_info.addr_len = net->ro._l_addr.sa.sa_len;
 	if (net->dest_state & SCTP_ADDR_UNCONFIRMED) {
 		/*
@@ -11234,6 +11319,11 @@
 		break;
 #endif
 	default:
+		if (chk->data) {
+			sctp_m_freem(chk->data);
+			chk->data = NULL;
+		}
+		sctp_free_a_chunk(stcb, chk, so_locked);
 		return;
 		break;
 	}
@@ -11279,13 +11369,14 @@
 	if (chk == NULL) {
 		return;
 	}
+	SCTP_STAT_INCR(sctps_queue_upd_ecne);
 	chk->copy_by_ref = 0;
-	SCTP_STAT_INCR(sctps_queue_upd_ecne);
 	chk->rec.chunk_id.id = SCTP_ECN_ECHO;
 	chk->rec.chunk_id.can_take_data = 0;
+	chk->flags = 0;
 	chk->asoc = &stcb->asoc;
 	chk->send_size = sizeof(struct sctp_ecne_chunk);
-	chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_DONTWAIT, 1, MT_HEADER);
+	chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_NOWAIT, 1, MT_HEADER);
 	if (chk->data == NULL) {
 		sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
 		return;
@@ -11310,21 +11401,14 @@
 
 void
 sctp_send_packet_dropped(struct sctp_tcb *stcb, struct sctp_nets *net,
-    struct mbuf *m, int iphlen, int bad_crc)
+    struct mbuf *m, int len, int iphlen, int bad_crc)
 {
 	struct sctp_association *asoc;
 	struct sctp_pktdrop_chunk *drp;
 	struct sctp_tmit_chunk *chk;
 	uint8_t *datap;
-	int len;
 	int was_trunc = 0;
-	struct ip *iph;
-
-#ifdef INET6
-	struct ip6_hdr *ip6h;
-
-#endif
-	int fullsz = 0, extra = 0;
+	int fullsz = 0;
 	long spc;
 	int offset;
 	struct sctp_chunkhdr *ch, chunk_buf;
@@ -11335,7 +11419,7 @@
 	}
 	asoc = &stcb->asoc;
 	SCTP_TCB_LOCK_ASSERT(stcb);
-	if (asoc->peer_supports_pktdrop == 0) {
+	if (asoc->pktdrop_supported == 0) {
 		/*-
 		 * peer must declare support before I send one.
 		 */
@@ -11349,28 +11433,11 @@
 		return;
 	}
 	chk->copy_by_ref = 0;
-	iph = mtod(m, struct ip *);
-	if (iph == NULL) {
-		sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
-		return;
-	}
-	switch (iph->ip_v) {
-#ifdef INET
-	case IPVERSION:
-		/* IPv4 */
-		len = chk->send_size = iph->ip_len;
-		break;
-#endif
-#ifdef INET6
-	case IPV6_VERSION >> 4:
-		/* IPv6 */
-		ip6h = mtod(m, struct ip6_hdr *);
-		len = chk->send_size = htons(ip6h->ip6_plen);
-		break;
-#endif
-	default:
-		return;
-	}
+	chk->rec.chunk_id.id = SCTP_PACKET_DROPPED;
+	chk->rec.chunk_id.can_take_data = 1;
+	chk->flags = 0;
+	len -= iphlen;
+	chk->send_size = len;
 	/* Validate that we do not have an ABORT in here. */
 	offset = iphlen + sizeof(struct sctphdr);
 	ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset,
@@ -11406,12 +11473,12 @@
 		/*
 		 * only send 1 mtu worth, trim off the excess on the end.
 		 */
-		fullsz = len - extra;
+		fullsz = len;
 		len = min(stcb->asoc.smallest_mtu, MCLBYTES) - SCTP_MAX_OVERHEAD;
 		was_trunc = 1;
 	}
 	chk->asoc = &stcb->asoc;
-	chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+	chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
 	if (chk->data == NULL) {
 jump_out:
 		sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
@@ -11455,8 +11522,6 @@
 	} else {
 		chk->whoTo = NULL;
 	}
-	chk->rec.chunk_id.id = SCTP_PACKET_DROPPED;
-	chk->rec.chunk_id.can_take_data = 1;
 	drp->ch.chunk_type = SCTP_PACKET_DROPPED;
 	drp->ch.chunk_length = htons(chk->send_size);
 	spc = SCTP_SB_LIMIT_RCV(stcb->sctp_socket);
@@ -11490,11 +11555,11 @@
 	struct sctp_cwr_chunk *cwr;
 	struct sctp_tmit_chunk *chk;
 
-	asoc = &stcb->asoc;
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	if (net == NULL) {
 		return;
 	}
+	asoc = &stcb->asoc;
 	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
 		if ((chk->rec.chunk_id.id == SCTP_ECN_CWR) && (net == chk->whoTo)) {
 			/*
@@ -11522,9 +11587,10 @@
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_ECN_CWR;
 	chk->rec.chunk_id.can_take_data = 1;
+	chk->flags = 0;
 	chk->asoc = &stcb->asoc;
 	chk->send_size = sizeof(struct sctp_cwr_chunk);
-	chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_DONTWAIT, 1, MT_HEADER);
+	chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_NOWAIT, 1, MT_HEADER);
 	if (chk->data == NULL) {
 		sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
 		return;
@@ -11544,23 +11610,37 @@
 	asoc->ctrl_queue_cnt++;
 }
 
-void
-sctp_add_stream_reset_out(struct sctp_tmit_chunk *chk,
-    int number_entries, uint16_t * list,
+static int
+sctp_add_stream_reset_out(struct sctp_tcb *stcb, struct sctp_tmit_chunk *chk,
     uint32_t seq, uint32_t resp_seq, uint32_t last_sent)
 {
-	int len, old_len, i;
+	uint16_t len, old_len, i;
 	struct sctp_stream_reset_out_request *req_out;
 	struct sctp_chunkhdr *ch;
+	int at;
+	int number_entries = 0;
 
 	ch = mtod(chk->data, struct sctp_chunkhdr *);
-
-
 	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
-
 	/* get to new offset for the param. */
 	req_out = (struct sctp_stream_reset_out_request *)((caddr_t)ch + len);
 	/* now how long will this param be? */
+	for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+		if ((stcb->asoc.strmout[i].state == SCTP_STREAM_RESET_PENDING) &&
+		    (stcb->asoc.strmout[i].chunks_on_queues == 0) &&
+		    TAILQ_EMPTY(&stcb->asoc.strmout[i].outqueue)) {
+			number_entries++;
+		}
+	}
+	if (number_entries == 0) {
+		return (0);
+	}
+	if (number_entries == stcb->asoc.streamoutcnt) {
+		number_entries = 0;
+	}
+	if (number_entries > SCTP_MAX_STREAMS_AT_ONCE_RESET) {
+		number_entries = SCTP_MAX_STREAMS_AT_ONCE_RESET;
+	}
 	len = (sizeof(struct sctp_stream_reset_out_request) + (sizeof(uint16_t) * number_entries));
 	req_out->ph.param_type = htons(SCTP_STR_RESET_OUT_REQUEST);
 	req_out->ph.param_length = htons(len);
@@ -11567,10 +11647,24 @@
 	req_out->request_seq = htonl(seq);
 	req_out->response_seq = htonl(resp_seq);
 	req_out->send_reset_at_tsn = htonl(last_sent);
+	at = 0;
 	if (number_entries) {
-		for (i = 0; i < number_entries; i++) {
-			req_out->list_of_streams[i] = htons(list[i]);
+		for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+			if ((stcb->asoc.strmout[i].state == SCTP_STREAM_RESET_PENDING) &&
+			    (stcb->asoc.strmout[i].chunks_on_queues == 0) &&
+			    TAILQ_EMPTY(&stcb->asoc.strmout[i].outqueue)) {
+				req_out->list_of_streams[at] = htons(i);
+				at++;
+				stcb->asoc.strmout[i].state = SCTP_STREAM_RESET_IN_FLIGHT;
+				if (at >= number_entries) {
+					break;
+				}
+			}
 		}
+	} else {
+		for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+			stcb->asoc.strmout[i].state = SCTP_STREAM_RESET_IN_FLIGHT;
+		}
 	}
 	if (SCTP_SIZE32(len) > len) {
 		/*-
@@ -11586,22 +11680,19 @@
 	chk->book_size_scale = 0;
 	chk->send_size = SCTP_SIZE32(chk->book_size);
 	SCTP_BUF_LEN(chk->data) = chk->send_size;
-	return;
+	return (1);
 }
 
-
-void
+static void
 sctp_add_stream_reset_in(struct sctp_tmit_chunk *chk,
     int number_entries, uint16_t * list,
     uint32_t seq)
 {
-	int len, old_len, i;
+	uint16_t len, old_len, i;
 	struct sctp_stream_reset_in_request *req_in;
 	struct sctp_chunkhdr *ch;
 
 	ch = mtod(chk->data, struct sctp_chunkhdr *);
-
-
 	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
 
 	/* get to new offset for the param. */
@@ -11633,18 +11724,15 @@
 	return;
 }
 
-
-void
+static void
 sctp_add_stream_reset_tsn(struct sctp_tmit_chunk *chk,
     uint32_t seq)
 {
-	int len, old_len;
+	uint16_t len, old_len;
 	struct sctp_stream_reset_tsn_request *req_tsn;
 	struct sctp_chunkhdr *ch;
 
 	ch = mtod(chk->data, struct sctp_chunkhdr *);
-
-
 	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
 
 	/* get to new offset for the param. */
@@ -11668,13 +11756,11 @@
 sctp_add_stream_reset_result(struct sctp_tmit_chunk *chk,
     uint32_t resp_seq, uint32_t result)
 {
-	int len, old_len;
+	uint16_t len, old_len;
 	struct sctp_stream_reset_response *resp;
 	struct sctp_chunkhdr *ch;
 
 	ch = mtod(chk->data, struct sctp_chunkhdr *);
-
-
 	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
 
 	/* get to new offset for the param. */
@@ -11693,22 +11779,80 @@
 	chk->send_size = SCTP_SIZE32(chk->book_size);
 	SCTP_BUF_LEN(chk->data) = chk->send_size;
 	return;
+}
 
+void
+sctp_send_deferred_reset_response(struct sctp_tcb *stcb,
+    struct sctp_stream_reset_list *ent,
+    int response)
+{
+	struct sctp_association *asoc;
+	struct sctp_tmit_chunk *chk;
+	struct sctp_chunkhdr *ch;
+
+	asoc = &stcb->asoc;
+
+	/*
+	 * Reset our last reset action to the new one IP -> response
+	 * (PERFORMED probably). This assures that if we fail to send, a
+	 * retran from the peer will get the new response.
+	 */
+	asoc->last_reset_action[0] = response;
+	if (asoc->stream_reset_outstanding) {
+		return;
+	}
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+		return;
+	}
+	chk->copy_by_ref = 0;
+	chk->rec.chunk_id.id = SCTP_STREAM_RESET;
+	chk->rec.chunk_id.can_take_data = 0;
+	chk->flags = 0;
+	chk->asoc = &stcb->asoc;
+	chk->book_size = sizeof(struct sctp_chunkhdr);
+	chk->send_size = SCTP_SIZE32(chk->book_size);
+	chk->book_size_scale = 0;
+	chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
+	if (chk->data == NULL) {
+		sctp_free_a_chunk(stcb, chk, SCTP_SO_LOCKED);
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+		return;
+	}
+	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
+	sctp_add_stream_reset_result(chk, ent->seq, response);
+	/* setup chunk parameters */
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	if (stcb->asoc.alternate) {
+		chk->whoTo = stcb->asoc.alternate;
+	} else {
+		chk->whoTo = stcb->asoc.primary_destination;
+	}
+	ch = mtod(chk->data, struct sctp_chunkhdr *);
+	ch->chunk_type = SCTP_STREAM_RESET;
+	ch->chunk_flags = 0;
+	ch->chunk_length = htons(chk->book_size);
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+	SCTP_BUF_LEN(chk->data) = chk->send_size;
+	/* insert the chunk for sending */
+	TAILQ_INSERT_TAIL(&asoc->control_send_queue,
+	    chk,
+	    sctp_next);
+	asoc->ctrl_queue_cnt++;
 }
 
-
 void
 sctp_add_stream_reset_result_tsn(struct sctp_tmit_chunk *chk,
     uint32_t resp_seq, uint32_t result,
     uint32_t send_una, uint32_t recv_next)
 {
-	int len, old_len;
+	uint16_t len, old_len;
 	struct sctp_stream_reset_response_tsn *resp;
 	struct sctp_chunkhdr *ch;
 
 	ch = mtod(chk->data, struct sctp_chunkhdr *);
-
-
 	old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
 
 	/* get to new offset for the param. */
@@ -11736,7 +11880,7 @@
     uint32_t seq,
     uint16_t adding)
 {
-	int len, old_len;
+	uint16_t len, old_len;
 	struct sctp_chunkhdr *ch;
 	struct sctp_stream_reset_add_strm *addstr;
 
@@ -11769,7 +11913,7 @@
     uint32_t seq,
     uint16_t adding)
 {
-	int len, old_len;
+	uint16_t len, old_len;
 	struct sctp_chunkhdr *ch;
 	struct sctp_stream_reset_add_strm *addstr;
 
@@ -11796,23 +11940,91 @@
 	return;
 }
 
+int
+sctp_send_stream_reset_out_if_possible(struct sctp_tcb *stcb, int so_locked)
+{
+	struct sctp_association *asoc;
+	struct sctp_tmit_chunk *chk;
+	struct sctp_chunkhdr *ch;
+	uint32_t seq;
 
+	asoc = &stcb->asoc;
+	asoc->trigger_reset = 0;
+	if (asoc->stream_reset_outstanding) {
+		return (EALREADY);
+	}
+	sctp_alloc_a_chunk(stcb, chk);
+	if (chk == NULL) {
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+		return (ENOMEM);
+	}
+	chk->copy_by_ref = 0;
+	chk->rec.chunk_id.id = SCTP_STREAM_RESET;
+	chk->rec.chunk_id.can_take_data = 0;
+	chk->flags = 0;
+	chk->asoc = &stcb->asoc;
+	chk->book_size = sizeof(struct sctp_chunkhdr);
+	chk->send_size = SCTP_SIZE32(chk->book_size);
+	chk->book_size_scale = 0;
+	chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
+	if (chk->data == NULL) {
+		sctp_free_a_chunk(stcb, chk, so_locked);
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+		return (ENOMEM);
+	}
+	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
 
+	/* setup chunk parameters */
+	chk->sent = SCTP_DATAGRAM_UNSENT;
+	chk->snd_count = 0;
+	if (stcb->asoc.alternate) {
+		chk->whoTo = stcb->asoc.alternate;
+	} else {
+		chk->whoTo = stcb->asoc.primary_destination;
+	}
+	ch = mtod(chk->data, struct sctp_chunkhdr *);
+	ch->chunk_type = SCTP_STREAM_RESET;
+	ch->chunk_flags = 0;
+	ch->chunk_length = htons(chk->book_size);
+	atomic_add_int(&chk->whoTo->ref_count, 1);
+	SCTP_BUF_LEN(chk->data) = chk->send_size;
+	seq = stcb->asoc.str_reset_seq_out;
+	if (sctp_add_stream_reset_out(stcb, chk, seq, (stcb->asoc.str_reset_seq_in - 1), (stcb->asoc.sending_seq - 1))) {
+		seq++;
+		asoc->stream_reset_outstanding++;
+	} else {
+		m_freem(chk->data);
+		chk->data = NULL;
+		sctp_free_a_chunk(stcb, chk, so_locked);
+		return (ENOENT);
+	}
+	asoc->str_reset = chk;
+	/* insert the chunk for sending */
+	TAILQ_INSERT_TAIL(&asoc->control_send_queue,
+	    chk,
+	    sctp_next);
+	asoc->ctrl_queue_cnt++;
+
+	if (stcb->asoc.send_sack) {
+		sctp_send_sack(stcb, so_locked);
+	}
+	sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo);
+	return (0);
+}
+
 int
 sctp_send_str_reset_req(struct sctp_tcb *stcb,
-    int number_entries, uint16_t * list,
-    uint8_t send_out_req,
+    uint16_t number_entries, uint16_t * list,
     uint8_t send_in_req,
     uint8_t send_tsn_req,
     uint8_t add_stream,
     uint16_t adding_o,
-    uint16_t adding_i, uint8_t peer_asked
-)
+    uint16_t adding_i, uint8_t peer_asked)
 {
-
 	struct sctp_association *asoc;
 	struct sctp_tmit_chunk *chk;
 	struct sctp_chunkhdr *ch;
+	int can_send_out_req = 0;
 	uint32_t seq;
 
 	asoc = &stcb->asoc;
@@ -11823,17 +12035,27 @@
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EBUSY);
 		return (EBUSY);
 	}
-	if ((send_out_req == 0) && (send_in_req == 0) && (send_tsn_req == 0) &&
+	if ((send_in_req == 0) && (send_tsn_req == 0) &&
 	    (add_stream == 0)) {
 		/* nothing to do */
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 		return (EINVAL);
 	}
-	if (send_tsn_req && (send_out_req || send_in_req)) {
+	if (send_tsn_req && send_in_req) {
 		/* error, can't do that */
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
 		return (EINVAL);
+	} else if (send_in_req) {
+		can_send_out_req = 1;
 	}
+	if (number_entries > (MCLBYTES -
+	    SCTP_MIN_OVERHEAD -
+	    sizeof(struct sctp_chunkhdr) -
+	    sizeof(struct sctp_stream_reset_out_request)) /
+	    sizeof(uint16_t)) {
+		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+		return (ENOMEM);
+	}
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
@@ -11842,12 +12064,13 @@
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_STREAM_RESET;
 	chk->rec.chunk_id.can_take_data = 0;
+	chk->flags = 0;
 	chk->asoc = &stcb->asoc;
 	chk->book_size = sizeof(struct sctp_chunkhdr);
 	chk->send_size = SCTP_SIZE32(chk->book_size);
 	chk->book_size_scale = 0;
 
-	chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+	chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
 	if (chk->data == NULL) {
 		sctp_free_a_chunk(stcb, chk, SCTP_SO_LOCKED);
 		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
@@ -11871,12 +12094,14 @@
 	SCTP_BUF_LEN(chk->data) = chk->send_size;
 
 	seq = stcb->asoc.str_reset_seq_out;
-	if (send_out_req) {
-		sctp_add_stream_reset_out(chk, number_entries, list,
-		    seq, (stcb->asoc.str_reset_seq_in - 1), (stcb->asoc.sending_seq - 1));
-		asoc->stream_reset_out_is_outstanding = 1;
-		seq++;
-		asoc->stream_reset_outstanding++;
+	if (can_send_out_req) {
+		int ret;
+
+		ret = sctp_add_stream_reset_out(stcb, chk, seq, (stcb->asoc.str_reset_seq_in - 1), (stcb->asoc.sending_seq - 1));
+		if (ret) {
+			seq++;
+			asoc->stream_reset_outstanding++;
+		}
 	}
 	if ((add_stream & 1) &&
 	    ((stcb->asoc.strm_realoutsize - stcb->asoc.streamoutcnt) < adding_o)) {
@@ -11885,10 +12110,15 @@
 		struct sctp_stream_queue_pending *sp, *nsp;
 		int i;
 
+#if defined(SCTP_DETAILED_STR_STATS)
+		int j;
+
+#endif
+
 		oldstream = stcb->asoc.strmout;
 		/* get some more */
 		SCTP_MALLOC(stcb->asoc.strmout, struct sctp_stream_out *,
-		    ((stcb->asoc.streamoutcnt + adding_o) * sizeof(struct sctp_stream_out)),
+		    (stcb->asoc.streamoutcnt + adding_o) * sizeof(struct sctp_stream_out),
 		    SCTP_M_STRMO);
 		if (stcb->asoc.strmout == NULL) {
 			uint8_t x;
@@ -11907,9 +12137,11 @@
 		stcb->asoc.ss_functions.sctp_ss_clear(stcb, &stcb->asoc, 0, 1);
 		for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
 			TAILQ_INIT(&stcb->asoc.strmout[i].outqueue);
-			stcb->asoc.strmout[i].next_sequence_sent = oldstream[i].next_sequence_sent;
+			stcb->asoc.strmout[i].chunks_on_queues = oldstream[i].chunks_on_queues;
+			stcb->asoc.strmout[i].next_sequence_send = oldstream[i].next_sequence_send;
 			stcb->asoc.strmout[i].last_msg_incomplete = oldstream[i].last_msg_incomplete;
 			stcb->asoc.strmout[i].stream_no = i;
+			stcb->asoc.strmout[i].state = oldstream[i].state;
 			stcb->asoc.ss_functions.sctp_ss_init_stream(&stcb->asoc.strmout[i], &oldstream[i]);
 			/* now anything on those queues? */
 			TAILQ_FOREACH_SAFE(sp, &oldstream[i].outqueue, next, nsp) {
@@ -11927,11 +12159,22 @@
 		/* now the new streams */
 		stcb->asoc.ss_functions.sctp_ss_init(stcb, &stcb->asoc, 1);
 		for (i = stcb->asoc.streamoutcnt; i < (stcb->asoc.streamoutcnt + adding_o); i++) {
-			stcb->asoc.strmout[i].next_sequence_sent = 0x0;
 			TAILQ_INIT(&stcb->asoc.strmout[i].outqueue);
+			stcb->asoc.strmout[i].chunks_on_queues = 0;
+#if defined(SCTP_DETAILED_STR_STATS)
+			for (j = 0; j < SCTP_PR_SCTP_MAX + 1; j++) {
+				stcb->asoc.strmout[i].abandoned_sent[j] = 0;
+				stcb->asoc.strmout[i].abandoned_unsent[j] = 0;
+			}
+#else
+			stcb->asoc.strmout[i].abandoned_sent[0] = 0;
+			stcb->asoc.strmout[i].abandoned_unsent[0] = 0;
+#endif
+			stcb->asoc.strmout[i].next_sequence_send = 0x0;
 			stcb->asoc.strmout[i].stream_no = i;
 			stcb->asoc.strmout[i].last_msg_incomplete = 0;
 			stcb->asoc.ss_functions.sctp_ss_init_stream(&stcb->asoc.strmout[i], NULL);
+			stcb->asoc.strmout[i].state = SCTP_STREAM_CLOSED;
 		}
 		stcb->asoc.strm_realoutsize = stcb->asoc.streamoutcnt + adding_o;
 		SCTP_FREE(oldstream, SCTP_M_STRMO);
@@ -11965,14 +12208,17 @@
 	    chk,
 	    sctp_next);
 	asoc->ctrl_queue_cnt++;
+	if (stcb->asoc.send_sack) {
+		sctp_send_sack(stcb, SCTP_SO_LOCKED);
+	}
 	sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo);
 	return (0);
 }
 
 void
-sctp_send_abort(struct mbuf *m, int iphlen, struct sctphdr *sh, uint32_t vtag,
-    struct mbuf *cause,
-    uint8_t use_mflowid, uint32_t mflowid,
+sctp_send_abort(struct mbuf *m, int iphlen, struct sockaddr *src, struct sockaddr *dst,
+    struct sctphdr *sh, uint32_t vtag, struct mbuf *cause,
+    uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum,
     uint32_t vrf_id, uint16_t port)
 {
 	/* Don't respond to an ABORT with an ABORT. */
@@ -11981,20 +12227,20 @@
 			sctp_m_freem(cause);
 		return;
 	}
-	sctp_send_resp_msg(m, sh, vtag, SCTP_ABORT_ASSOCIATION, cause,
-	    use_mflowid, mflowid,
+	sctp_send_resp_msg(src, dst, sh, vtag, SCTP_ABORT_ASSOCIATION, cause,
+	    mflowtype, mflowid, fibnum,
 	    vrf_id, port);
 	return;
 }
 
 void
-sctp_send_operr_to(struct mbuf *m, struct sctphdr *sh, uint32_t vtag,
-    struct mbuf *cause,
-    uint8_t use_mflowid, uint32_t mflowid,
+sctp_send_operr_to(struct sockaddr *src, struct sockaddr *dst,
+    struct sctphdr *sh, uint32_t vtag, struct mbuf *cause,
+    uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum,
     uint32_t vrf_id, uint16_t port)
 {
-	sctp_send_resp_msg(m, sh, vtag, SCTP_OPERATION_ERROR, cause,
-	    use_mflowid, mflowid,
+	sctp_send_resp_msg(src, dst, sh, vtag, SCTP_OPERATION_ERROR, cause,
+	    mflowtype, mflowid, fibnum,
 	    vrf_id, port);
 	return;
 }
@@ -12012,8 +12258,8 @@
 	m = m_uiotombuf(uio, M_WAITOK, max_send_len, 0,
 	    (M_PKTHDR | (user_marks_eor ? M_EOR : 0)));
 	if (m == NULL) {
-		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
-		*error = ENOMEM;
+		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOBUFS);
+		*error = ENOBUFS;
 	} else {
 		*sndout = m_length(m, NULL);
 		*new_tail = m_last(m);
@@ -12026,14 +12272,11 @@
     struct uio *uio,
     int resv_upfront)
 {
-	int left;
-
-	left = sp->length;
 	sp->data = m_uiotombuf(uio, M_WAITOK, sp->length,
 	    resv_upfront, 0);
 	if (sp->data == NULL) {
-		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
-		return (ENOMEM);
+		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOBUFS);
+		return (ENOBUFS);
 	}
 	sp->tail_mbuf = m_last(sp->data);
 	return (0);
@@ -12084,7 +12327,6 @@
 	sp->timetolive = srcv->sinfo_timetolive;
 	sp->ppid = srcv->sinfo_ppid;
 	sp->context = srcv->sinfo_context;
-	sp->strseq = 0;
 	(void)SCTP_GETTIME_TIMEVAL(&sp->ts);
 
 	sp->stream = srcv->sinfo_stream;
@@ -12253,7 +12495,7 @@
 		sndlen = SCTP_HEADER_LEN(i_pak);
 	}
 	SCTPDBG(SCTP_DEBUG_OUTPUT1, "Send called addr:%p send length %d\n",
-	    addr,
+	    (void *)addr,
 	    sndlen);
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
 	    (inp->sctp_socket->so_qlimit)) {
@@ -12423,6 +12665,7 @@
 			}
 #endif
 			stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id,
+			    inp->sctp_ep.pre_open_stream_count,
 			    p
 			    );
 			if (stcb == NULL) {
@@ -12458,7 +12701,8 @@
 
 			if (control) {
 				if (sctp_process_cmsgs_for_init(stcb, control, &error)) {
-					sctp_free_assoc(inp, stcb, SCTP_PCBFREE_FORCE, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_7);
+					sctp_free_assoc(inp, stcb, SCTP_PCBFREE_FORCE,
+					    SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_5);
 					hold_tcblock = 0;
 					stcb = NULL;
 					goto out_unlocked;
@@ -12544,12 +12788,24 @@
 		SCTP_ASOC_CREATE_UNLOCK(inp);
 		create_lock_applied = 0;
 	}
-	if (asoc->stream_reset_outstanding) {
+	/* Is the stream no. valid? */
+	if (srcv->sinfo_stream >= asoc->streamoutcnt) {
+		/* Invalid stream number */
+		SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+		error = EINVAL;
+		goto out_unlocked;
+	}
+	if ((asoc->strmout[srcv->sinfo_stream].state != SCTP_STREAM_OPEN) &&
+	    (asoc->strmout[srcv->sinfo_stream].state != SCTP_STREAM_OPENING)) {
 		/*
 		 * Can't queue any data while stream reset is underway.
 		 */
-		SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EAGAIN);
-		error = EAGAIN;
+		if (asoc->strmout[srcv->sinfo_stream].state > SCTP_STREAM_OPEN) {
+			error = EAGAIN;
+		} else {
+			error = EINVAL;
+		}
+		SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, error);
 		goto out_unlocked;
 	}
 	if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
@@ -12598,7 +12854,7 @@
 		if (top) {
 			struct mbuf *cntm = NULL;
 
-			mm = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_WAIT, 1, MT_DATA);
+			mm = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_WAITOK, 1, MT_DATA);
 			if (sndlen != 0) {
 				for (cntm = top; cntm; cntm = SCTP_BUF_NEXT(cntm)) {
 					tot_out += SCTP_BUF_LEN(cntm);
@@ -12614,7 +12870,7 @@
 				error = EMSGSIZE;
 				goto out;
 			}
-			mm = sctp_get_mbuf_for_msg(tot_demand, 0, M_WAIT, 1, MT_DATA);
+			mm = sctp_get_mbuf_for_msg(tot_demand, 0, M_WAITOK, 1, MT_DATA);
 		}
 		if (mm == NULL) {
 			SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
@@ -12632,7 +12888,7 @@
 			/* now move forward the data pointer */
 			ph = mtod(mm, struct sctp_paramhdr *);
 			ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
-			ph->param_length = htons((sizeof(struct sctp_paramhdr) + tot_out));
+			ph->param_length = htons(sizeof(struct sctp_paramhdr) + tot_out);
 			ph++;
 			SCTP_BUF_LEN(mm) = tot_out + sizeof(struct sctp_paramhdr);
 			if (top == NULL) {
@@ -12688,13 +12944,6 @@
 		SCTP_TCB_UNLOCK(stcb);
 		hold_tcblock = 0;
 	}
-	/* Is the stream no. valid? */
-	if (srcv->sinfo_stream >= asoc->streamoutcnt) {
-		/* Invalid stream number */
-		SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
-		error = EINVAL;
-		goto out_unlocked;
-	}
 	if (asoc->strmout == NULL) {
 		/* huh? software error */
 		SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EFAULT);
@@ -12837,15 +13086,7 @@
 			}
 			sctp_snd_sb_alloc(stcb, sp->length);
 			atomic_add_int(&asoc->stream_queue_cnt, 1);
-			if ((srcv->sinfo_flags & SCTP_UNORDERED) == 0) {
-				sp->strseq = strm->next_sequence_sent;
-				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_AT_SEND_2_SCTP) {
-					sctp_misc_ints(SCTP_STRMOUT_LOG_ASSIGN,
-					    (uintptr_t) stcb, sp->length,
-					    (uint32_t) ((srcv->sinfo_stream << 16) | sp->strseq), 0);
-				}
-				strm->next_sequence_sent++;
-			} else {
+			if (srcv->sinfo_flags & SCTP_UNORDERED) {
 				SCTP_STAT_INCR(sctps_sends_with_unord);
 			}
 			TAILQ_INSERT_TAIL(&strm->outqueue, sp, next);
@@ -12936,7 +13177,7 @@
 				continue;
 			}
 			/* PR-SCTP? */
-			if ((asoc->peer_supports_prsctp) && (asoc->sent_queue_cnt_removeable > 0)) {
+			if ((asoc->prsctp_supported) && (asoc->sent_queue_cnt_removeable > 0)) {
 				/*
 				 * This is ugly but we must assure locking
 				 * order
@@ -12998,7 +13239,7 @@
 				/*-
 				 * Ok, Nagle is set on and we have data outstanding.
 				 * Don't send anything and let SACKs drive out the
-				 * data unless wen have a "full" segment to send.
+				 * data unless we have a "full" segment to send.
 				 */
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_NAGLE_LOGGING_ENABLE) {
 					sctp_log_nagle_event(stcb, SCTP_NAGLE_APPLIED);
@@ -13157,18 +13398,19 @@
 			    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
 				struct sctp_nets *netp;
 
+				/* only send SHUTDOWN the first time through */
+				if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
+					SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+				}
+				SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+				SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+				sctp_stop_timers_for_shutdown(stcb);
 				if (stcb->asoc.alternate) {
 					netp = stcb->asoc.alternate;
 				} else {
 					netp = stcb->asoc.primary_destination;
 				}
-				/* only send SHUTDOWN the first time through */
 				sctp_send_shutdown(stcb, netp);
-				if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
-					SCTP_STAT_DECR_GAUGE32(sctps_currestab);
-				}
-				SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
-				SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
 				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb,
 				    netp);
 				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
@@ -13206,13 +13448,20 @@
 				if (TAILQ_EMPTY(&asoc->send_queue) &&
 				    TAILQ_EMPTY(&asoc->sent_queue) &&
 				    (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
+					struct mbuf *op_err;
+					char msg[SCTP_DIAG_INFO_LEN];
+
 			abort_anyway:
 					if (free_cnt_applied) {
 						atomic_add_int(&stcb->asoc.refcnt, -1);
 						free_cnt_applied = 0;
 					}
+					snprintf(msg, sizeof(msg),
+					    "%s:%d at %s", __FILE__, __LINE__, __func__);
+					op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+					    msg);
 					sctp_abort_an_association(stcb->sctp_ep, stcb,
-					    NULL, SCTP_SO_LOCKED);
+					    op_err, SCTP_SO_LOCKED);
 					/*
 					 * now relock the stcb so everything
 					 * is sane
@@ -13352,13 +13601,6 @@
 		}
 	}
 #endif
-#ifdef INVARIANTS
-	if (inp) {
-		sctp_validate_no_locks(inp);
-	} else {
-		SCTP_PRINTF("Warning - inp is NULL so cant validate locks\n");
-	}
-#endif
 	if (top) {
 		sctp_m_freem(top);
 	}
@@ -13386,19 +13628,14 @@
 	    (stcb == NULL))
 		return (m);
 
-	/* sysctl disabled auth? */
-	if (SCTP_BASE_SYSCTL(sctp_auth_disable))
+	if (stcb->asoc.auth_supported == 0) {
 		return (m);
-
-	/* peer doesn't do auth... */
-	if (!stcb->asoc.peer_supports_auth) {
-		return (m);
 	}
 	/* does the requested chunk require auth? */
 	if (!sctp_auth_is_required_chunk(chunk, stcb->asoc.peer_auth_chunks)) {
 		return (m);
 	}
-	m_auth = sctp_get_mbuf_for_msg(sizeof(*auth), 0, M_DONTWAIT, 1, MT_HEADER);
+	m_auth = sctp_get_mbuf_for_msg(sizeof(*auth), 0, M_NOWAIT, 1, MT_HEADER);
 	if (m_auth == NULL) {
 		/* no mbuf's */
 		return (m);
@@ -13497,7 +13734,7 @@
 	}
 	ifa = (struct ifaddr *)sifa->ifa;
 	mask = (struct sockaddr_in *)(ifa->ifa_netmask);
-	sin = (struct sockaddr_in *)&sifa->address.sin;
+	sin = &sifa->address.sin;
 	srcnetaddr.s_addr = (sin->sin_addr.s_addr & mask->sin_addr.s_addr);
 	SCTPDBG(SCTP_DEBUG_OUTPUT1, "match_nexthop4: src address is ");
 	SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &sifa->address.sa);

Modified: trunk/sys/netinet/sctp_output.h
===================================================================
--- trunk/sys/netinet/sctp_output.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_output.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_output.h 238253 2012-07-08 16:14:42Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_output.h 295208 2016-02-03 14:04:07Z tuexen $");
 
 #ifndef _NETINET_SCTP_OUTPUT_H_
 #define _NETINET_SCTP_OUTPUT_H_
@@ -47,7 +47,8 @@
     struct sctp_tcb *stcb,
     struct sctp_scoping *scope,
     struct mbuf *m_at,
-    int cnt_inits_to);
+    int cnt_inits_to,
+    uint16_t * padding_len, uint16_t * chunk_len);
 
 
 int sctp_is_addr_restricted(struct sctp_tcb *, struct sctp_ifa *);
@@ -55,13 +56,9 @@
 
 int
 sctp_is_address_in_scope(struct sctp_ifa *ifa,
-    int ipv4_addr_legal,
-    int ipv6_addr_legal,
-    int loopback_scope,
-    int ipv4_local_scope,
-    int local_scope,
-    int site_scope,
+    struct sctp_scoping *scope,
     int do_update);
+
 int
     sctp_is_addr_in_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa);
 
@@ -84,8 +81,11 @@
 );
 
 void
-sctp_send_initiate_ack(struct sctp_inpcb *, struct sctp_tcb *, struct mbuf *,
-    int, int, struct sctphdr *, struct sctp_init_chunk *,
+sctp_send_initiate_ack(struct sctp_inpcb *, struct sctp_tcb *,
+    struct sctp_nets *, struct mbuf *,
+    int, int,
+    struct sockaddr *, struct sockaddr *,
+    struct sctphdr *, struct sctp_init_chunk *,
     uint8_t, uint32_t,
     uint32_t, uint16_t, int);
 
@@ -117,8 +117,9 @@
 void sctp_send_shutdown_complete(struct sctp_tcb *, struct sctp_nets *, int);
 
 void 
-sctp_send_shutdown_complete2(struct mbuf *, struct sctphdr *,
-    uint8_t, uint32_t,
+sctp_send_shutdown_complete2(struct sockaddr *, struct sockaddr *,
+    struct sctphdr *,
+    uint8_t, uint32_t, uint16_t,
     uint32_t, uint16_t);
 
 void sctp_send_asconf(struct sctp_tcb *, struct sctp_nets *, int addr_locked);
@@ -163,7 +164,7 @@
 
 void
 sctp_send_packet_dropped(struct sctp_tcb *, struct sctp_nets *, struct mbuf *,
-    int, int);
+    int, int, int);
 
 
 
@@ -171,48 +172,33 @@
 
 
 void
-sctp_add_stream_reset_out(struct sctp_tmit_chunk *chk,
-    int number_entries, uint16_t * list,
-    uint32_t seq, uint32_t resp_seq, uint32_t last_sent);
+     sctp_add_stream_reset_result(struct sctp_tmit_chunk *, uint32_t, uint32_t);
 
 void
-sctp_add_stream_reset_in(struct sctp_tmit_chunk *chk,
-    int number_entries, uint16_t * list,
-    uint32_t seq);
+sctp_send_deferred_reset_response(struct sctp_tcb *,
+    struct sctp_stream_reset_list *,
+    int);
 
 void
-sctp_add_stream_reset_tsn(struct sctp_tmit_chunk *chk,
-    uint32_t seq);
+sctp_add_stream_reset_result_tsn(struct sctp_tmit_chunk *,
+    uint32_t, uint32_t, uint32_t, uint32_t);
+int
+    sctp_send_stream_reset_out_if_possible(struct sctp_tcb *, int);
 
-void
-sctp_add_stream_reset_result(struct sctp_tmit_chunk *chk,
-    uint32_t resp_seq, uint32_t result);
-
-void
-sctp_add_stream_reset_result_tsn(struct sctp_tmit_chunk *chk,
-    uint32_t resp_seq, uint32_t result,
-    uint32_t send_una, uint32_t recv_next);
-
 int
-sctp_send_str_reset_req(struct sctp_tcb *stcb,
-    int number_entries, uint16_t * list,
-    uint8_t send_out_req,
-    uint8_t send_in_req,
-    uint8_t send_tsn_req,
-    uint8_t add_stream,
-    uint16_t adding_o,
-    uint16_t adding_i, uint8_t from_peer);
+sctp_send_str_reset_req(struct sctp_tcb *, uint16_t, uint16_t *,
+    uint8_t, uint8_t, uint8_t, uint16_t, uint16_t, uint8_t);
 
 void
-sctp_send_abort(struct mbuf *, int, struct sctphdr *, uint32_t,
-    struct mbuf *,
-    uint8_t, uint32_t,
+sctp_send_abort(struct mbuf *, int, struct sockaddr *, struct sockaddr *,
+    struct sctphdr *, uint32_t, struct mbuf *,
+    uint8_t, uint32_t, uint16_t,
     uint32_t, uint16_t);
 
 void 
-sctp_send_operr_to(struct mbuf *, struct sctphdr *, uint32_t,
-    struct mbuf *,
-    uint8_t, uint32_t,
+sctp_send_operr_to(struct sockaddr *, struct sockaddr *,
+    struct sctphdr *, uint32_t, struct mbuf *,
+    uint8_t, uint32_t, uint16_t,
     uint32_t, uint16_t);
 
 #endif				/* _KERNEL || __Userspace__ */

Modified: trunk/sys/netinet/sctp_pcb.c
===================================================================
--- trunk/sys/netinet/sctp_pcb.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_pcb.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_pcb.c 239448 2012-08-20 16:45:46Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_pcb.c 296052 2016-02-25 18:46:06Z tuexen $");
 
 #include <netinet/sctp_os.h>
 #include <sys/proc.h>
@@ -47,7 +47,9 @@
 #include <netinet/sctp_timer.h>
 #include <netinet/sctp_bsd_addr.h>
 #include <netinet/sctp_dtrace_define.h>
+#if defined(INET) || defined(INET6)
 #include <netinet/udp.h>
+#endif
 #ifdef INET6
 #include <netinet6/ip6_var.h>
 #endif
@@ -95,11 +97,10 @@
 	spcb->readq_count = SCTP_BASE_INFO(ipi_count_readq);
 	spcb->stream_oque = SCTP_BASE_INFO(ipi_count_strmoq);
 	spcb->free_chunks = SCTP_BASE_INFO(ipi_free_chunks);
-
 	SCTP_INP_INFO_RUNLOCK();
 }
 
-/*
+/*-
  * Addresses are added to VRF's (Virtual Router's). For BSD we
  * have only the default VRF 0. We maintain a hash list of
  * VRF's. Each VRF has its own list of sctp_ifn's. Each of
@@ -215,7 +216,6 @@
 }
 
 
-
 struct sctp_vrf *
 sctp_find_vrf(uint32_t vrf_id)
 {
@@ -231,6 +231,7 @@
 	return (NULL);
 }
 
+
 void
 sctp_free_vrf(struct sctp_vrf *vrf)
 {
@@ -246,6 +247,7 @@
 	}
 }
 
+
 void
 sctp_free_ifn(struct sctp_ifn *sctp_ifnp)
 {
@@ -259,6 +261,7 @@
 	}
 }
 
+
 void
 sctp_update_ifn_mtu(uint32_t ifn_index, uint32_t mtu)
 {
@@ -284,6 +287,7 @@
 	}
 }
 
+
 static void
 sctp_delete_ifn(struct sctp_ifn *sctp_ifnp, int hold_addr_lock)
 {
@@ -306,12 +310,13 @@
 	sctp_free_ifn(sctp_ifnp);
 }
 
+
 void
 sctp_mark_ifa_addr_down(uint32_t vrf_id, struct sockaddr *addr,
     const char *if_name, uint32_t ifn_index)
 {
 	struct sctp_vrf *vrf;
-	struct sctp_ifa *sctp_ifap = NULL;
+	struct sctp_ifa *sctp_ifap;
 
 	SCTP_IPI_ADDR_RLOCK();
 	vrf = sctp_find_vrf(vrf_id);
@@ -349,12 +354,13 @@
 	SCTP_IPI_ADDR_RUNLOCK();
 }
 
+
 void
 sctp_mark_ifa_addr_up(uint32_t vrf_id, struct sockaddr *addr,
     const char *if_name, uint32_t ifn_index)
 {
 	struct sctp_vrf *vrf;
-	struct sctp_ifa *sctp_ifap = NULL;
+	struct sctp_ifa *sctp_ifap;
 
 	SCTP_IPI_ADDR_RLOCK();
 	vrf = sctp_find_vrf(vrf_id);
@@ -392,6 +398,7 @@
 	SCTP_IPI_ADDR_RUNLOCK();
 }
 
+
 /*-
  * Add an ifa to an ifn.
  * Register the interface as necessary.
@@ -429,6 +436,7 @@
 	}
 }
 
+
 /*-
  * Remove an ifa from its ifn.
  * If no more addresses exist, remove the ifn too. Otherwise, re-register
@@ -480,6 +488,7 @@
 	}
 }
 
+
 struct sctp_ifa *
 sctp_add_addr_to_vrf(uint32_t vrf_id, void *ifn, uint32_t ifn_index,
     uint32_t ifn_type, const char *if_name, void *ifa,
@@ -554,7 +563,7 @@
 		    (sctp_ifap->ifn_p->ifn_index == ifn_index)) {
 			SCTPDBG(SCTP_DEBUG_PCB4, "Using existing ifn %s (0x%x) for ifa %p\n",
 			    sctp_ifap->ifn_p->ifn_name, ifn_index,
-			    sctp_ifap);
+			    (void *)sctp_ifap);
 			if (new_ifn_af) {
 				/* Remove the created one that we don't want */
 				sctp_delete_ifn(sctp_ifnp, SCTP_ADDR_LOCKED);
@@ -576,7 +585,7 @@
 				 * old one
 				 */
 				SCTPDBG(SCTP_DEBUG_PCB4, "Moving ifa %p from %s (0x%x) to %s (0x%x)\n",
-				    sctp_ifap, sctp_ifap->ifn_p->ifn_name,
+				    (void *)sctp_ifap, sctp_ifap->ifn_p->ifn_name,
 				    sctp_ifap->ifn_p->ifn_index, if_name,
 				    ifn_index);
 				/* remove the address from the old ifn */
@@ -588,7 +597,7 @@
 				/* repair ifnp which was NULL ? */
 				sctp_ifap->localifa_flags = SCTP_ADDR_VALID;
 				SCTPDBG(SCTP_DEBUG_PCB4, "Repairing ifn %p for ifa %p\n",
-				    sctp_ifnp, sctp_ifap);
+				    (void *)sctp_ifnp, (void *)sctp_ifap);
 				sctp_add_ifa_to_ifn(sctp_ifnp, sctp_ifap);
 			}
 			goto exit_stage_left;
@@ -617,7 +626,7 @@
 		{
 			struct sockaddr_in *sin;
 
-			sin = (struct sockaddr_in *)&sctp_ifap->address.sin;
+			sin = &sctp_ifap->address.sin;
 			if (SCTP_IFN_IS_IFT_LOOP(sctp_ifap->ifn_p) ||
 			    (IN4_ISLOOPBACK_ADDRESS(&sin->sin_addr))) {
 				sctp_ifap->src_is_loop = 1;
@@ -637,7 +646,7 @@
 			/* ok to use deprecated addresses? */
 			struct sockaddr_in6 *sin6;
 
-			sin6 = (struct sockaddr_in6 *)&sctp_ifap->address.sin6;
+			sin6 = &sctp_ifap->address.sin6;
 			if (SCTP_IFN_IS_IFT_LOOP(sctp_ifap->ifn_p) ||
 			    (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr))) {
 				sctp_ifap->src_is_loop = 1;
@@ -764,9 +773,16 @@
 				return;
 			}
 		}
-		SCTPDBG(SCTP_DEBUG_PCB4, "Deleting ifa %p\n", sctp_ifap);
+		SCTPDBG(SCTP_DEBUG_PCB4, "Deleting ifa %p\n", (void *)sctp_ifap);
 		sctp_ifap->localifa_flags &= SCTP_ADDR_VALID;
-		sctp_ifap->localifa_flags |= SCTP_BEING_DELETED;
+		/*
+		 * We don't set the flag. This means that the structure will
+		 * hang around in EP's that have bound specific to it until
+		 * they close. This gives us TCP like behavior if someone
+		 * removes an address (or for that matter adds it right
+		 * back).
+		 */
+		/* sctp_ifap->localifa_flags |= SCTP_BEING_DELETED; */
 		vrf->total_ifa_count--;
 		LIST_REMOVE(sctp_ifap, next_bucket);
 		sctp_remove_ifa_from_ifn(sctp_ifap);
@@ -818,6 +834,183 @@
 }
 
 
+static int
+sctp_does_stcb_own_this_addr(struct sctp_tcb *stcb, struct sockaddr *to)
+{
+	int loopback_scope;
+
+#if defined(INET)
+	int ipv4_local_scope, ipv4_addr_legal;
+
+#endif
+#if defined(INET6)
+	int local_scope, site_scope, ipv6_addr_legal;
+
+#endif
+	struct sctp_vrf *vrf;
+	struct sctp_ifn *sctp_ifn;
+	struct sctp_ifa *sctp_ifa;
+
+	loopback_scope = stcb->asoc.scope.loopback_scope;
+#if defined(INET)
+	ipv4_local_scope = stcb->asoc.scope.ipv4_local_scope;
+	ipv4_addr_legal = stcb->asoc.scope.ipv4_addr_legal;
+#endif
+#if defined(INET6)
+	local_scope = stcb->asoc.scope.local_scope;
+	site_scope = stcb->asoc.scope.site_scope;
+	ipv6_addr_legal = stcb->asoc.scope.ipv6_addr_legal;
+#endif
+
+	SCTP_IPI_ADDR_RLOCK();
+	vrf = sctp_find_vrf(stcb->asoc.vrf_id);
+	if (vrf == NULL) {
+		/* no vrf, no addresses */
+		SCTP_IPI_ADDR_RUNLOCK();
+		return (0);
+	}
+	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+		LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+			if ((loopback_scope == 0) &&
+			    SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
+				continue;
+			}
+			LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+				if (sctp_is_addr_restricted(stcb, sctp_ifa) &&
+				    (!sctp_is_addr_pending(stcb, sctp_ifa))) {
+					/*
+					 * We allow pending addresses, where
+					 * we have sent an asconf-add to be
+					 * considered valid.
+					 */
+					continue;
+				}
+				if (sctp_ifa->address.sa.sa_family != to->sa_family) {
+					continue;
+				}
+				switch (sctp_ifa->address.sa.sa_family) {
+#ifdef INET
+				case AF_INET:
+					if (ipv4_addr_legal) {
+						struct sockaddr_in *sin,
+						           *rsin;
+
+						sin = &sctp_ifa->address.sin;
+						rsin = (struct sockaddr_in *)to;
+						if ((ipv4_local_scope == 0) &&
+						    IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
+							continue;
+						}
+						if (prison_check_ip4(stcb->sctp_ep->ip_inp.inp.inp_cred,
+						    &sin->sin_addr) != 0) {
+							continue;
+						}
+						if (sin->sin_addr.s_addr == rsin->sin_addr.s_addr) {
+							SCTP_IPI_ADDR_RUNLOCK();
+							return (1);
+						}
+					}
+					break;
+#endif
+#ifdef INET6
+				case AF_INET6:
+					if (ipv6_addr_legal) {
+						struct sockaddr_in6 *sin6,
+						            *rsin6;
+
+						sin6 = &sctp_ifa->address.sin6;
+						rsin6 = (struct sockaddr_in6 *)to;
+						if (prison_check_ip6(stcb->sctp_ep->ip_inp.inp.inp_cred,
+						    &sin6->sin6_addr) != 0) {
+							continue;
+						}
+						if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+							if (local_scope == 0)
+								continue;
+							if (sin6->sin6_scope_id == 0) {
+								if (sa6_recoverscope(sin6) != 0)
+									continue;
+							}
+						}
+						if ((site_scope == 0) &&
+						    (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) {
+							continue;
+						}
+						if (SCTP6_ARE_ADDR_EQUAL(sin6, rsin6)) {
+							SCTP_IPI_ADDR_RUNLOCK();
+							return (1);
+						}
+					}
+					break;
+#endif
+				default:
+					/* TSNH */
+					break;
+				}
+			}
+		}
+	} else {
+		struct sctp_laddr *laddr;
+
+		LIST_FOREACH(laddr, &stcb->sctp_ep->sctp_addr_list, sctp_nxt_addr) {
+			if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) {
+				SCTPDBG(SCTP_DEBUG_PCB1, "ifa being deleted\n");
+				continue;
+			}
+			if (sctp_is_addr_restricted(stcb, laddr->ifa) &&
+			    (!sctp_is_addr_pending(stcb, laddr->ifa))) {
+				/*
+				 * We allow pending addresses, where we have
+				 * sent an asconf-add to be considered
+				 * valid.
+				 */
+				continue;
+			}
+			if (laddr->ifa->address.sa.sa_family != to->sa_family) {
+				continue;
+			}
+			switch (to->sa_family) {
+#ifdef INET
+			case AF_INET:
+				{
+					struct sockaddr_in *sin, *rsin;
+
+					sin = &laddr->ifa->address.sin;
+					rsin = (struct sockaddr_in *)to;
+					if (sin->sin_addr.s_addr == rsin->sin_addr.s_addr) {
+						SCTP_IPI_ADDR_RUNLOCK();
+						return (1);
+					}
+					break;
+				}
+#endif
+#ifdef INET6
+			case AF_INET6:
+				{
+					struct sockaddr_in6 *sin6, *rsin6;
+
+					sin6 = &laddr->ifa->address.sin6;
+					rsin6 = (struct sockaddr_in6 *)to;
+					if (SCTP6_ARE_ADDR_EQUAL(sin6, rsin6)) {
+						SCTP_IPI_ADDR_RUNLOCK();
+						return (1);
+					}
+					break;
+				}
+
+#endif
+			default:
+				/* TSNH */
+				break;
+			}
+
+		}
+	}
+	SCTP_IPI_ADDR_RUNLOCK();
+	return (0);
+}
+
+
 static struct sctp_tcb *
 sctp_tcb_special_locate(struct sctp_inpcb **inp_p, struct sockaddr *from,
     struct sockaddr *to, struct sctp_nets **netp, uint32_t vrf_id)
@@ -878,6 +1071,39 @@
 			SCTP_INP_RUNLOCK(inp);
 			continue;
 		}
+		switch (to->sa_family) {
+#ifdef INET
+		case AF_INET:
+			{
+				struct sockaddr_in *sin;
+
+				sin = (struct sockaddr_in *)to;
+				if (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+				    &sin->sin_addr) != 0) {
+					SCTP_INP_RUNLOCK(inp);
+					continue;
+				}
+				break;
+			}
+#endif
+#ifdef INET6
+		case AF_INET6:
+			{
+				struct sockaddr_in6 *sin6;
+
+				sin6 = (struct sockaddr_in6 *)to;
+				if (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+				    &sin6->sin6_addr) != 0) {
+					SCTP_INP_RUNLOCK(inp);
+					continue;
+				}
+				break;
+			}
+#endif
+		default:
+			SCTP_INP_RUNLOCK(inp);
+			continue;
+		}
 		if (inp->def_vrf_id != vrf_id) {
 			SCTP_INP_RUNLOCK(inp);
 			continue;
@@ -890,7 +1116,7 @@
 			LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
 
 				if (laddr->ifa == NULL) {
-					SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n", __FUNCTION__);
+					SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n", __func__);
 					continue;
 				}
 				if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) {
@@ -900,7 +1126,6 @@
 				if (laddr->ifa->address.sa.sa_family ==
 				    to->sa_family) {
 					/* see if it matches */
-
 #ifdef INET
 					if (from->sa_family == AF_INET) {
 						struct sockaddr_in *intf_addr,
@@ -943,7 +1168,7 @@
 		 * Ok if we hit here the ep has the address, does it hold
 		 * the tcb?
 		 */
-
+		/* XXX: Why don't we TAILQ_FOREACH through sctp_asoc_list? */
 		stcb = LIST_FIRST(&inp->sctp_asoc_list);
 		if (stcb == NULL) {
 			SCTP_INP_RUNLOCK(inp);
@@ -950,6 +1175,11 @@
 			continue;
 		}
 		SCTP_TCB_LOCK(stcb);
+		if (!sctp_does_stcb_own_this_addr(stcb, to)) {
+			SCTP_TCB_UNLOCK(stcb);
+			SCTP_INP_RUNLOCK(inp);
+			continue;
+		}
 		if (stcb->rport != rport) {
 			/* remote port does not match. */
 			SCTP_TCB_UNLOCK(stcb);
@@ -961,6 +1191,11 @@
 			SCTP_INP_RUNLOCK(inp);
 			continue;
 		}
+		if (!sctp_does_stcb_own_this_addr(stcb, to)) {
+			SCTP_TCB_UNLOCK(stcb);
+			SCTP_INP_RUNLOCK(inp);
+			continue;
+		}
 		/* Does this TCB have a matching address? */
 		TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 
@@ -1028,162 +1263,7 @@
 	return (NULL);
 }
 
-static int
-sctp_does_stcb_own_this_addr(struct sctp_tcb *stcb, struct sockaddr *to)
-{
-	int loopback_scope, ipv4_local_scope, local_scope, site_scope;
-	int ipv4_addr_legal, ipv6_addr_legal;
-	struct sctp_vrf *vrf;
-	struct sctp_ifn *sctp_ifn;
-	struct sctp_ifa *sctp_ifa;
 
-	loopback_scope = stcb->asoc.loopback_scope;
-	ipv4_local_scope = stcb->asoc.ipv4_local_scope;
-	local_scope = stcb->asoc.local_scope;
-	site_scope = stcb->asoc.site_scope;
-	ipv4_addr_legal = ipv6_addr_legal = 0;
-	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
-		ipv6_addr_legal = 1;
-		if (SCTP_IPV6_V6ONLY(stcb->sctp_ep) == 0) {
-			ipv4_addr_legal = 1;
-		}
-	} else {
-		ipv4_addr_legal = 1;
-	}
-
-	SCTP_IPI_ADDR_RLOCK();
-	vrf = sctp_find_vrf(stcb->asoc.vrf_id);
-	if (vrf == NULL) {
-		/* no vrf, no addresses */
-		SCTP_IPI_ADDR_RUNLOCK();
-		return (0);
-	}
-	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
-		LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
-			if ((loopback_scope == 0) &&
-			    SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
-				continue;
-			}
-			LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
-				if (sctp_is_addr_restricted(stcb, sctp_ifa) &&
-				    (!sctp_is_addr_pending(stcb, sctp_ifa))) {
-					/*
-					 * We allow pending addresses, where
-					 * we have sent an asconf-add to be
-					 * considered valid.
-					 */
-					continue;
-				}
-				switch (sctp_ifa->address.sa.sa_family) {
-#ifdef INET
-				case AF_INET:
-					if (ipv4_addr_legal) {
-						struct sockaddr_in *sin,
-						           *rsin;
-
-						sin = &sctp_ifa->address.sin;
-						rsin = (struct sockaddr_in *)to;
-						if ((ipv4_local_scope == 0) &&
-						    IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
-							continue;
-						}
-						if (sin->sin_addr.s_addr == rsin->sin_addr.s_addr) {
-							SCTP_IPI_ADDR_RUNLOCK();
-							return (1);
-						}
-					}
-					break;
-#endif
-#ifdef INET6
-				case AF_INET6:
-					if (ipv6_addr_legal) {
-						struct sockaddr_in6 *sin6,
-						            *rsin6;
-
-						sin6 = &sctp_ifa->address.sin6;
-						rsin6 = (struct sockaddr_in6 *)to;
-						if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
-							if (local_scope == 0)
-								continue;
-							if (sin6->sin6_scope_id == 0) {
-								if (sa6_recoverscope(sin6) != 0)
-									continue;
-							}
-						}
-						if ((site_scope == 0) &&
-						    (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) {
-							continue;
-						}
-						if (SCTP6_ARE_ADDR_EQUAL(sin6, rsin6)) {
-							SCTP_IPI_ADDR_RUNLOCK();
-							return (1);
-						}
-					}
-					break;
-#endif
-				default:
-					/* TSNH */
-					break;
-				}
-			}
-		}
-	} else {
-		struct sctp_laddr *laddr;
-
-		LIST_FOREACH(laddr, &stcb->sctp_ep->sctp_addr_list, sctp_nxt_addr) {
-			if (sctp_is_addr_restricted(stcb, laddr->ifa) &&
-			    (!sctp_is_addr_pending(stcb, laddr->ifa))) {
-				/*
-				 * We allow pending addresses, where we have
-				 * sent an asconf-add to be considered
-				 * valid.
-				 */
-				continue;
-			}
-			if (laddr->ifa->address.sa.sa_family != to->sa_family) {
-				continue;
-			}
-			switch (to->sa_family) {
-#ifdef INET
-			case AF_INET:
-				{
-					struct sockaddr_in *sin, *rsin;
-
-					sin = (struct sockaddr_in *)&laddr->ifa->address.sin;
-					rsin = (struct sockaddr_in *)to;
-					if (sin->sin_addr.s_addr == rsin->sin_addr.s_addr) {
-						SCTP_IPI_ADDR_RUNLOCK();
-						return (1);
-					}
-					break;
-				}
-#endif
-#ifdef INET6
-			case AF_INET6:
-				{
-					struct sockaddr_in6 *sin6, *rsin6;
-
-					sin6 = (struct sockaddr_in6 *)&laddr->ifa->address.sin6;
-					rsin6 = (struct sockaddr_in6 *)to;
-					if (SCTP6_ARE_ADDR_EQUAL(sin6, rsin6)) {
-						SCTP_IPI_ADDR_RUNLOCK();
-						return (1);
-					}
-					break;
-				}
-
-#endif
-			default:
-				/* TSNH */
-				break;
-			}
-
-		}
-	}
-	SCTP_IPI_ADDR_RUNLOCK();
-	return (0);
-}
-
 /*
  * rules for use
  *
@@ -1204,11 +1284,18 @@
 	uint16_t rport;
 
 	inp = *inp_p;
-	if (remote->sa_family == AF_INET) {
+	switch (remote->sa_family) {
+#ifdef INET
+	case AF_INET:
 		rport = (((struct sockaddr_in *)remote)->sin_port);
-	} else if (remote->sa_family == AF_INET6) {
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
 		rport = (((struct sockaddr_in6 *)remote)->sin6_port);
-	} else {
+		break;
+#endif
+	default:
 		return (NULL);
 	}
 	if (locked_tcb) {
@@ -1220,7 +1307,8 @@
 		SCTP_TCB_UNLOCK(locked_tcb);
 	}
 	SCTP_INP_INFO_RLOCK();
-	if (inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) {
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
 		/*-
 		 * Now either this guy is our listener or it's the
 		 * connector. If it is the one that issued the connect, then
@@ -1354,9 +1442,6 @@
 		}
 		head = &inp->sctp_tcbhash[SCTP_PCBHASH_ALLADDR(rport,
 		    inp->sctp_hashmark)];
-		if (head == NULL) {
-			goto null_return;
-		}
 		LIST_FOREACH(stcb, head, sctp_tcbhash) {
 			if (stcb->rport != rport) {
 				/* remote port does not match */
@@ -1464,11 +1549,11 @@
 	return (NULL);
 }
 
+
 /*
  * Find an association for a specific endpoint using the association id given
  * out in the COMM_UP notification
  */
-
 struct sctp_tcb *
 sctp_findasoc_ep_asocid_locked(struct sctp_inpcb *inp, sctp_assoc_t asoc_id, int want_lock)
 {
@@ -1529,6 +1614,9 @@
 }
 
 
+/*
+ * Endpoint probe expects that the INP_INFO is locked.
+ */
 static struct sctp_inpcb *
 sctp_endpoint_probe(struct sockaddr *nam, struct sctppcbhead *head,
     uint16_t lport, uint32_t vrf_id)
@@ -1545,12 +1633,8 @@
 	struct sockaddr_in6 *intf_addr6;
 
 #endif
-
 	int fnd;
 
-	/*
-	 * Endpoint probe expects that the INP_INFO is locked.
-	 */
 #ifdef INET
 	sin = NULL;
 #endif
@@ -1585,23 +1669,45 @@
 		if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) &&
 		    (inp->sctp_lport == lport)) {
 			/* got it */
+			switch (nam->sa_family) {
 #ifdef INET
-			if ((nam->sa_family == AF_INET) &&
-			    (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
-			    SCTP_IPV6_V6ONLY(inp)) {
-				/* IPv4 on a IPv6 socket with ONLY IPv6 set */
-				SCTP_INP_RUNLOCK(inp);
-				continue;
-			}
+			case AF_INET:
+				if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+				    SCTP_IPV6_V6ONLY(inp)) {
+					/*
+					 * IPv4 on a IPv6 socket with ONLY
+					 * IPv6 set
+					 */
+					SCTP_INP_RUNLOCK(inp);
+					continue;
+				}
+				if (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+				    &sin->sin_addr) != 0) {
+					SCTP_INP_RUNLOCK(inp);
+					continue;
+				}
+				break;
 #endif
 #ifdef INET6
-			/* A V6 address and the endpoint is NOT bound V6 */
-			if (nam->sa_family == AF_INET6 &&
-			    (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) {
-				SCTP_INP_RUNLOCK(inp);
-				continue;
+			case AF_INET6:
+				/*
+				 * A V6 address and the endpoint is NOT
+				 * bound V6
+				 */
+				if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) {
+					SCTP_INP_RUNLOCK(inp);
+					continue;
+				}
+				if (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+				    &sin6->sin6_addr) != 0) {
+					SCTP_INP_RUNLOCK(inp);
+					continue;
+				}
+				break;
+#endif
+			default:
+				break;
 			}
-#endif
 			/* does a VRF id match? */
 			fnd = 0;
 			if (inp->def_vrf_id == vrf_id)
@@ -1668,11 +1774,11 @@
 		LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
 			if (laddr->ifa == NULL) {
 				SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n",
-				    __FUNCTION__);
+				    __func__);
 				continue;
 			}
 			SCTPDBG(SCTP_DEBUG_PCB1, "Ok laddr->ifa:%p is possible, ",
-			    laddr->ifa);
+			    (void *)laddr->ifa);
 			if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) {
 				SCTPDBG(SCTP_DEBUG_PCB1, "Huh IFA being deleted\n");
 				continue;
@@ -1762,7 +1868,7 @@
 {
 	/* For 1-2-1 with port reuse */
 	struct sctppcbhead *head;
-	struct sctp_inpcb *tinp;
+	struct sctp_inpcb *tinp, *ninp;
 
 	if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_PORTREUSE)) {
 		/* only works with port reuse on */
@@ -1772,10 +1878,11 @@
 		return (0);
 	}
 	SCTP_INP_RUNLOCK(inp);
+	SCTP_INP_INFO_WLOCK();
 	head = &SCTP_BASE_INFO(sctp_ephash)[SCTP_PCBHASH_ALLADDR(inp->sctp_lport,
 	    SCTP_BASE_INFO(hashmark))];
 	/* Kick out all non-listeners to the TCP hash */
-	LIST_FOREACH(tinp, head, sctp_hash) {
+	LIST_FOREACH_SAFE(tinp, head, sctp_hash, ninp) {
 		if (tinp->sctp_lport != inp->sctp_lport) {
 			continue;
 		}
@@ -1803,6 +1910,7 @@
 	LIST_INSERT_HEAD(head, inp, sctp_hash);
 	SCTP_INP_WUNLOCK(inp);
 	SCTP_INP_RLOCK(inp);
+	SCTP_INP_INFO_WUNLOCK();
 	return (0);
 }
 
@@ -1886,6 +1994,7 @@
 	return (inp);
 }
 
+
 /*
  * Find an association for an endpoint with the pointer to whom you want to
  * send to and the endpoint pointer. The address can be IPv4 or IPv6. We may
@@ -1892,25 +2001,25 @@
  * need to change the *to to some other struct like a mbuf...
  */
 struct sctp_tcb *
-sctp_findassociation_addr_sa(struct sockaddr *to, struct sockaddr *from,
+sctp_findassociation_addr_sa(struct sockaddr *from, struct sockaddr *to,
     struct sctp_inpcb **inp_p, struct sctp_nets **netp, int find_tcp_pool,
     uint32_t vrf_id)
 {
 	struct sctp_inpcb *inp = NULL;
-	struct sctp_tcb *retval;
+	struct sctp_tcb *stcb;
 
 	SCTP_INP_INFO_RLOCK();
 	if (find_tcp_pool) {
 		if (inp_p != NULL) {
-			retval = sctp_tcb_special_locate(inp_p, from, to, netp,
+			stcb = sctp_tcb_special_locate(inp_p, from, to, netp,
 			    vrf_id);
 		} else {
-			retval = sctp_tcb_special_locate(&inp, from, to, netp,
+			stcb = sctp_tcb_special_locate(&inp, from, to, netp,
 			    vrf_id);
 		}
-		if (retval != NULL) {
+		if (stcb != NULL) {
 			SCTP_INP_INFO_RUNLOCK();
-			return (retval);
+			return (stcb);
 		}
 	}
 	inp = sctp_pcb_findep(to, 0, 1, vrf_id);
@@ -1918,7 +2027,6 @@
 		*inp_p = inp;
 	}
 	SCTP_INP_INFO_RUNLOCK();
-
 	if (inp == NULL) {
 		return (NULL);
 	}
@@ -1929,13 +2037,13 @@
 	 * inbound packet side.
 	 */
 	if (inp_p != NULL) {
-		retval = sctp_findassociation_ep_addr(inp_p, from, netp, to,
+		stcb = sctp_findassociation_ep_addr(inp_p, from, netp, to,
 		    NULL);
 	} else {
-		retval = sctp_findassociation_ep_addr(&inp, from, netp, to,
+		stcb = sctp_findassociation_ep_addr(&inp, from, netp, to,
 		    NULL);
 	}
-	return retval;
+	return (stcb);
 }
 
 
@@ -1947,12 +2055,17 @@
 static struct sctp_tcb *
 sctp_findassociation_special_addr(struct mbuf *m, int offset,
     struct sctphdr *sh, struct sctp_inpcb **inp_p, struct sctp_nets **netp,
-    struct sockaddr *dest)
+    struct sockaddr *dst)
 {
 	struct sctp_paramhdr *phdr, parm_buf;
-	struct sctp_tcb *retval;
-	uint32_t ptype, plen;
 
+#if defined(INET) || defined(INET6)
+	struct sctp_tcb *stcb;
+	uint16_t ptype;
+
+#endif
+	uint16_t plen;
+
 #ifdef INET
 	struct sockaddr_in sin4;
 
@@ -1975,13 +2088,14 @@
 	sin6.sin6_port = sh->src_port;
 #endif
 
-	retval = NULL;
 	offset += sizeof(struct sctp_init_chunk);
 
 	phdr = sctp_get_next_param(m, offset, &parm_buf, sizeof(parm_buf));
 	while (phdr != NULL) {
 		/* now we must see if we want the parameter */
+#if defined(INET) || defined(INET6)
 		ptype = ntohs(phdr->param_type);
+#endif
 		plen = ntohs(phdr->param_length);
 		if (plen == 0) {
 			break;
@@ -2000,10 +2114,10 @@
 			p4 = (struct sctp_ipv4addr_param *)phdr;
 			memcpy(&sin4.sin_addr, &p4->addr, sizeof(p4->addr));
 			/* look it up */
-			retval = sctp_findassociation_ep_addr(inp_p,
-			    (struct sockaddr *)&sin4, netp, dest, NULL);
-			if (retval != NULL) {
-				return (retval);
+			stcb = sctp_findassociation_ep_addr(inp_p,
+			    (struct sockaddr *)&sin4, netp, dst, NULL);
+			if (stcb != NULL) {
+				return (stcb);
 			}
 		}
 #endif
@@ -2021,10 +2135,10 @@
 			p6 = (struct sctp_ipv6addr_param *)phdr;
 			memcpy(&sin6.sin6_addr, &p6->addr, sizeof(p6->addr));
 			/* look it up */
-			retval = sctp_findassociation_ep_addr(inp_p,
-			    (struct sockaddr *)&sin6, netp, dest, NULL);
-			if (retval != NULL) {
-				return (retval);
+			stcb = sctp_findassociation_ep_addr(inp_p,
+			    (struct sockaddr *)&sin6, netp, dst, NULL);
+			if (stcb != NULL) {
+				return (stcb);
 			}
 		}
 #endif
@@ -2052,11 +2166,6 @@
 	SCTP_INP_INFO_RLOCK();
 	head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(vtag,
 	    SCTP_BASE_INFO(hashasocmark))];
-	if (head == NULL) {
-		/* invalid vtag */
-		SCTP_INP_INFO_RUNLOCK();
-		return (NULL);
-	}
 	LIST_FOREACH(stcb, head, sctp_asocs) {
 		SCTP_INP_RLOCK(stcb->sctp_ep);
 		if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
@@ -2137,6 +2246,7 @@
 	return (NULL);
 }
 
+
 /*
  * Find an association with the pointer to the inbound IP packet. This can be
  * a IPv4 or IPv6 packet.
@@ -2143,124 +2253,31 @@
  */
 struct sctp_tcb *
 sctp_findassociation_addr(struct mbuf *m, int offset,
+    struct sockaddr *src, struct sockaddr *dst,
     struct sctphdr *sh, struct sctp_chunkhdr *ch,
     struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint32_t vrf_id)
 {
-	int find_tcp_pool;
-	struct ip *iph;
-	struct sctp_tcb *retval;
-	struct sockaddr_storage to_store, from_store;
-	struct sockaddr *to = (struct sockaddr *)&to_store;
-	struct sockaddr *from = (struct sockaddr *)&from_store;
+	struct sctp_tcb *stcb;
 	struct sctp_inpcb *inp;
 
-	iph = mtod(m, struct ip *);
-	switch (iph->ip_v) {
-#ifdef INET
-	case IPVERSION:
-		{
-			/* its IPv4 */
-			struct sockaddr_in *from4;
-
-			from4 = (struct sockaddr_in *)&from_store;
-			bzero(from4, sizeof(*from4));
-			from4->sin_family = AF_INET;
-			from4->sin_len = sizeof(struct sockaddr_in);
-			from4->sin_addr.s_addr = iph->ip_src.s_addr;
-			from4->sin_port = sh->src_port;
-			break;
-		}
-#endif
-#ifdef INET6
-	case IPV6_VERSION >> 4:
-		{
-			/* its IPv6 */
-			struct ip6_hdr *ip6;
-			struct sockaddr_in6 *from6;
-
-			ip6 = mtod(m, struct ip6_hdr *);
-			from6 = (struct sockaddr_in6 *)&from_store;
-			bzero(from6, sizeof(*from6));
-			from6->sin6_family = AF_INET6;
-			from6->sin6_len = sizeof(struct sockaddr_in6);
-			from6->sin6_addr = ip6->ip6_src;
-			from6->sin6_port = sh->src_port;
-			/* Get the scopes in properly to the sin6 addr's */
-			sa6_embedscope(from6, MODULE_GLOBAL(ip6_use_defzone));
-			break;
-		}
-#endif
-	default:
-		/* Currently not supported. */
-		return (NULL);
-	}
-
-
-	switch (iph->ip_v) {
-#ifdef INET
-	case IPVERSION:
-		{
-			/* its IPv4 */
-			struct sockaddr_in *to4;
-
-			to4 = (struct sockaddr_in *)&to_store;
-			bzero(to4, sizeof(*to4));
-			to4->sin_family = AF_INET;
-			to4->sin_len = sizeof(struct sockaddr_in);
-			to4->sin_addr.s_addr = iph->ip_dst.s_addr;
-			to4->sin_port = sh->dest_port;
-			break;
-		}
-#endif
-#ifdef INET6
-	case IPV6_VERSION >> 4:
-		{
-			/* its IPv6 */
-			struct ip6_hdr *ip6;
-			struct sockaddr_in6 *to6;
-
-			ip6 = mtod(m, struct ip6_hdr *);
-			to6 = (struct sockaddr_in6 *)&to_store;
-			bzero(to6, sizeof(*to6));
-			to6->sin6_family = AF_INET6;
-			to6->sin6_len = sizeof(struct sockaddr_in6);
-			to6->sin6_addr = ip6->ip6_dst;
-			to6->sin6_port = sh->dest_port;
-			/* Get the scopes in properly to the sin6 addr's */
-			sa6_embedscope(to6, MODULE_GLOBAL(ip6_use_defzone));
-			break;
-		}
-#endif
-	default:
-		/* TSNH */
-		break;
-	}
 	if (sh->v_tag) {
 		/* we only go down this path if vtag is non-zero */
-		retval = sctp_findassoc_by_vtag(from, to, ntohl(sh->v_tag),
+		stcb = sctp_findassoc_by_vtag(src, dst, ntohl(sh->v_tag),
 		    inp_p, netp, sh->src_port, sh->dest_port, 0, vrf_id, 0);
-		if (retval) {
-			return (retval);
+		if (stcb) {
+			return (stcb);
 		}
 	}
-	find_tcp_pool = 0;
-	if ((ch->chunk_type != SCTP_INITIATION) &&
-	    (ch->chunk_type != SCTP_INITIATION_ACK) &&
-	    (ch->chunk_type != SCTP_COOKIE_ACK) &&
-	    (ch->chunk_type != SCTP_COOKIE_ECHO)) {
-		/* Other chunk types go to the tcp pool. */
-		find_tcp_pool = 1;
-	}
 	if (inp_p) {
-		retval = sctp_findassociation_addr_sa(to, from, inp_p, netp,
-		    find_tcp_pool, vrf_id);
+		stcb = sctp_findassociation_addr_sa(src, dst, inp_p, netp,
+		    1, vrf_id);
 		inp = *inp_p;
 	} else {
-		retval = sctp_findassociation_addr_sa(to, from, &inp, netp,
-		    find_tcp_pool, vrf_id);
+		stcb = sctp_findassociation_addr_sa(src, dst, &inp, netp,
+		    1, vrf_id);
 	}
-	SCTPDBG(SCTP_DEBUG_PCB1, "retval:%p inp:%p\n", retval, inp);
-	if (retval == NULL && inp) {
+	SCTPDBG(SCTP_DEBUG_PCB1, "stcb:%p inp:%p\n", (void *)stcb, (void *)inp);
+	if (stcb == NULL && inp) {
 		/* Found a EP but not this address */
 		if ((ch->chunk_type == SCTP_INITIATION) ||
 		    (ch->chunk_type == SCTP_INITIATION_ACK)) {
@@ -2278,15 +2295,15 @@
 				}
 				return (NULL);
 			}
-			retval = sctp_findassociation_special_addr(m,
-			    offset, sh, &inp, netp, to);
+			stcb = sctp_findassociation_special_addr(m,
+			    offset, sh, &inp, netp, dst);
 			if (inp_p != NULL) {
 				*inp_p = inp;
 			}
 		}
 	}
-	SCTPDBG(SCTP_DEBUG_PCB1, "retval is %p\n", retval);
-	return (retval);
+	SCTPDBG(SCTP_DEBUG_PCB1, "stcb is %p\n", (void *)stcb);
+	return (stcb);
 }
 
 /*
@@ -2295,12 +2312,11 @@
  */
 struct sctp_tcb *
 sctp_findassociation_ep_asconf(struct mbuf *m, int offset,
-    struct sctphdr *sh, struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint32_t vrf_id)
+    struct sockaddr *dst, struct sctphdr *sh,
+    struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint32_t vrf_id)
 {
 	struct sctp_tcb *stcb;
-	struct sockaddr_storage local_store, remote_store;
-	struct sockaddr *to;
-	struct ip *iph;
+	union sctp_sockstore remote_store;
 	struct sctp_paramhdr parm_buf, *phdr;
 	int ptype;
 	int zero_address = 0;
@@ -2310,47 +2326,16 @@
 
 #endif
 #ifdef INET6
-	struct ip6_hdr *ip6;
 	struct sockaddr_in6 *sin6;
 
 #endif
 
-	memset(&local_store, 0, sizeof(local_store));
 	memset(&remote_store, 0, sizeof(remote_store));
-	to = (struct sockaddr *)&local_store;
-	/* First get the destination address setup too. */
-	iph = mtod(m, struct ip *);
-	switch (iph->ip_v) {
-#ifdef INET
-	case IPVERSION:
-		/* its IPv4 */
-		sin = (struct sockaddr_in *)&local_store;
-		sin->sin_family = AF_INET;
-		sin->sin_len = sizeof(*sin);
-		sin->sin_port = sh->dest_port;
-		sin->sin_addr.s_addr = iph->ip_dst.s_addr;
-		break;
-#endif
-#ifdef INET6
-	case IPV6_VERSION >> 4:
-		/* its IPv6 */
-		ip6 = mtod(m, struct ip6_hdr *);
-		sin6 = (struct sockaddr_in6 *)&local_store;
-		sin6->sin6_family = AF_INET6;
-		sin6->sin6_len = sizeof(*sin6);
-		sin6->sin6_port = sh->dest_port;
-		sin6->sin6_addr = ip6->ip6_dst;
-		break;
-#endif
-	default:
-		return NULL;
-	}
-
 	phdr = sctp_get_next_param(m, offset + sizeof(struct sctp_asconf_chunk),
 	    &parm_buf, sizeof(struct sctp_paramhdr));
 	if (phdr == NULL) {
 		SCTPDBG(SCTP_DEBUG_INPUT3, "%s: failed to get asconf lookup addr\n",
-		    __FUNCTION__);
+		    __func__);
 		return NULL;
 	}
 	ptype = (int)((uint32_t) ntohs(phdr->param_type));
@@ -2370,10 +2355,10 @@
 			    &p6_buf.ph, sizeof(*p6));
 			if (p6 == NULL) {
 				SCTPDBG(SCTP_DEBUG_INPUT3, "%s: failed to get asconf v6 lookup addr\n",
-				    __FUNCTION__);
+				    __func__);
 				return (NULL);
 			}
-			sin6 = (struct sockaddr_in6 *)&remote_store;
+			sin6 = &remote_store.sin6;
 			sin6->sin6_family = AF_INET6;
 			sin6->sin6_len = sizeof(*sin6);
 			sin6->sin6_port = sh->src_port;
@@ -2397,10 +2382,10 @@
 			    &p4_buf.ph, sizeof(*p4));
 			if (p4 == NULL) {
 				SCTPDBG(SCTP_DEBUG_INPUT3, "%s: failed to get asconf v4 lookup addr\n",
-				    __FUNCTION__);
+				    __func__);
 				return (NULL);
 			}
-			sin = (struct sockaddr_in *)&remote_store;
+			sin = &remote_store.sin;
 			sin->sin_family = AF_INET;
 			sin->sin_len = sizeof(*sin);
 			sin->sin_port = sh->src_port;
@@ -2416,7 +2401,7 @@
 	}
 
 	if (zero_address) {
-		stcb = sctp_findassoc_by_vtag(NULL, to, ntohl(sh->v_tag), inp_p,
+		stcb = sctp_findassoc_by_vtag(NULL, dst, ntohl(sh->v_tag), inp_p,
 		    netp, sh->src_port, sh->dest_port, 1, vrf_id, 0);
 		if (stcb != NULL) {
 			SCTP_INP_DECR_REF(*inp_p);
@@ -2423,8 +2408,8 @@
 		}
 	} else {
 		stcb = sctp_findassociation_ep_addr(inp_p,
-		    (struct sockaddr *)&remote_store, netp,
-		    to, NULL);
+		    &remote_store.sa, netp,
+		    dst, NULL);
 	}
 	return (stcb);
 }
@@ -2469,33 +2454,46 @@
 	/* setup socket pointers */
 	inp->sctp_socket = so;
 	inp->ip_inp.inp.inp_socket = so;
+	inp->ip_inp.inp.inp_cred = crhold(so->so_cred);
 #ifdef INET6
-	if (MODULE_GLOBAL(ip6_auto_flowlabel)) {
-		inp->ip_inp.inp.inp_flags |= IN6P_AUTOFLOWLABEL;
+	if (INP_SOCKAF(so) == AF_INET6) {
+		if (MODULE_GLOBAL(ip6_auto_flowlabel)) {
+			inp->ip_inp.inp.inp_flags |= IN6P_AUTOFLOWLABEL;
+		}
+		if (MODULE_GLOBAL(ip6_v6only)) {
+			inp->ip_inp.inp.inp_flags |= IN6P_IPV6_V6ONLY;
+		}
 	}
 #endif
 	inp->sctp_associd_counter = 1;
 	inp->partial_delivery_point = SCTP_SB_LIMIT_RCV(so) >> SCTP_PARTIAL_DELIVERY_SHIFT;
 	inp->sctp_frag_point = SCTP_DEFAULT_MAXSEGMENT;
+	inp->max_cwnd = 0;
 	inp->sctp_cmt_on_off = SCTP_BASE_SYSCTL(sctp_cmt_on_off);
-	inp->sctp_ecn_enable = SCTP_BASE_SYSCTL(sctp_ecn_enable);
+	inp->ecn_supported = (uint8_t) SCTP_BASE_SYSCTL(sctp_ecn_enable);
+	inp->prsctp_supported = (uint8_t) SCTP_BASE_SYSCTL(sctp_pr_enable);
+	if (SCTP_BASE_SYSCTL(sctp_auth_disable)) {
+		inp->auth_supported = 0;
+	} else {
+		inp->auth_supported = 1;
+	}
+	inp->asconf_supported = (uint8_t) SCTP_BASE_SYSCTL(sctp_asconf_enable);
+	inp->reconfig_supported = (uint8_t) SCTP_BASE_SYSCTL(sctp_reconfig_enable);
+	inp->nrsack_supported = (uint8_t) SCTP_BASE_SYSCTL(sctp_nrsack_enable);
+	inp->pktdrop_supported = (uint8_t) SCTP_BASE_SYSCTL(sctp_pktdrop_enable);
+	inp->fibnum = so->so_fibnum;
 	/* init the small hash table we use to track asocid <-> tcb */
 	inp->sctp_asocidhash = SCTP_HASH_INIT(SCTP_STACK_VTAG_HASH_SIZE, &inp->hashasocidmark);
 	if (inp->sctp_asocidhash == NULL) {
+		crfree(inp->ip_inp.inp.inp_cred);
 		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp);
 		SCTP_INP_INFO_WUNLOCK();
 		return (ENOBUFS);
 	}
 #ifdef IPSEC
-	{
-		struct inpcbpolicy *pcb_sp = NULL;
-
-		error = ipsec_init_policy(so, &pcb_sp);
-		/* Arrange to share the policy */
-		inp->ip_inp.inp.inp_sp = pcb_sp;
-		((struct in6pcb *)(&inp->ip_inp.inp))->in6p_sp = pcb_sp;
-	}
+	error = ipsec_init_policy(so, &inp->ip_inp.inp.inp_sp);
 	if (error != 0) {
+		crfree(inp->ip_inp.inp.inp_cred);
 		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp);
 		SCTP_INP_INFO_WUNLOCK();
 		return error;
@@ -2526,6 +2524,10 @@
 		 */
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EOPNOTSUPP);
 		so->so_pcb = NULL;
+		crfree(inp->ip_inp.inp.inp_cred);
+#ifdef IPSEC
+		ipsec_delete_pcbpolicy(&inp->ip_inp.inp);
+#endif
 		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp);
 		return (EOPNOTSUPP);
 	}
@@ -2545,6 +2547,10 @@
 		SCTP_PRINTF("Out of SCTP-INPCB->hashinit - no resources\n");
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS);
 		so->so_pcb = NULL;
+		crfree(inp->ip_inp.inp.inp_cred);
+#ifdef IPSEC
+		ipsec_delete_pcbpolicy(&inp->ip_inp.inp);
+#endif
 		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp);
 		return (ENOBUFS);
 	}
@@ -2592,9 +2598,6 @@
 	m->initial_rto = SCTP_BASE_SYSCTL(sctp_rto_initial_default);
 	m->initial_init_rto_max = SCTP_BASE_SYSCTL(sctp_init_rto_max_default);
 	m->sctp_sack_freq = SCTP_BASE_SYSCTL(sctp_sack_freq_default);
-
-	m->max_open_streams_intome = MAX_SCTP_STREAMS;
-
 	m->max_init_times = SCTP_BASE_SYSCTL(sctp_init_rtx_max_default);
 	m->max_send_times = SCTP_BASE_SYSCTL(sctp_assoc_rtx_max_default);
 	m->def_net_failure = SCTP_BASE_SYSCTL(sctp_path_rtx_max_default);
@@ -2606,11 +2609,13 @@
 
 	m->sctp_default_cc_module = SCTP_BASE_SYSCTL(sctp_default_cc_module);
 	m->sctp_default_ss_module = SCTP_BASE_SYSCTL(sctp_default_ss_module);
+	m->max_open_streams_intome = SCTP_BASE_SYSCTL(sctp_nr_incoming_streams_default);
 	/* number of streams to pre-open on a association */
 	m->pre_open_stream_count = SCTP_BASE_SYSCTL(sctp_nr_outgoing_streams_default);
 
 	/* Add adaptation cookie */
-	m->adaptation_layer_indicator = 0x504C5253;
+	m->adaptation_layer_indicator = 0;
+	m->adaptation_layer_indicator_provided = 0;
 
 	/* seed random number generator */
 	m->random_counter = 1;
@@ -2639,12 +2644,15 @@
 	 */
 	m->local_hmacs = sctp_default_supported_hmaclist();
 	m->local_auth_chunks = sctp_alloc_chunklist();
+	if (inp->asconf_supported) {
+		sctp_auth_add_chunk(SCTP_ASCONF, m->local_auth_chunks);
+		sctp_auth_add_chunk(SCTP_ASCONF_ACK, m->local_auth_chunks);
+	}
 	m->default_dscp = 0;
 #ifdef INET6
 	m->default_flowlabel = 0;
 #endif
 	m->port = 0;		/* encapsulation disabled by default */
-	sctp_auth_set_default_chunks(m->local_auth_chunks);
 	LIST_INIT(&m->shared_keys);
 	/* add default NULL key as key id 0 */
 	null_key = sctp_alloc_sharedkey();
@@ -2778,9 +2786,48 @@
 	SCTP_INP_WUNLOCK(old_inp);
 }
 
+/*
+ * insert an laddr entry with the given ifa for the desired list
+ */
+static int
+sctp_insert_laddr(struct sctpladdr *list, struct sctp_ifa *ifa, uint32_t act)
+{
+	struct sctp_laddr *laddr;
 
+	laddr = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_laddr), struct sctp_laddr);
+	if (laddr == NULL) {
+		/* out of memory? */
+		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+		return (EINVAL);
+	}
+	SCTP_INCR_LADDR_COUNT();
+	bzero(laddr, sizeof(*laddr));
+	(void)SCTP_GETTIME_TIMEVAL(&laddr->start_time);
+	laddr->ifa = ifa;
+	laddr->action = act;
+	atomic_add_int(&ifa->refcount, 1);
+	/* insert it */
+	LIST_INSERT_HEAD(list, laddr, sctp_nxt_addr);
 
+	return (0);
+}
 
+/*
+ * Remove an laddr entry from the local address list (on an assoc)
+ */
+static void
+sctp_remove_laddr(struct sctp_laddr *laddr)
+{
+
+	/* remove from the list */
+	LIST_REMOVE(laddr, sctp_nxt_addr);
+	sctp_free_ifa(laddr->ifa);
+	SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_laddr), laddr);
+	SCTP_DECR_LADDR_COUNT();
+}
+
+
+
 /* sctp_ifap is used to bypass normal local address validation checks */
 int
 sctp_inpcb_bind(struct socket *so, struct sockaddr *addr,
@@ -2797,15 +2844,14 @@
 	uint32_t vrf_id;
 
 	lport = 0;
-	error = 0;
 	bindall = 1;
 	inp = (struct sctp_inpcb *)so->so_pcb;
 	ip_inp = (struct inpcb *)so->so_pcb;
 #ifdef SCTP_DEBUG
 	if (addr) {
-		SCTPDBG(SCTP_DEBUG_PCB1, "Bind called port:%d\n",
+		SCTPDBG(SCTP_DEBUG_PCB1, "Bind called port: %d\n",
 		    ntohs(((struct sockaddr_in *)addr)->sin_port));
-		SCTPDBG(SCTP_DEBUG_PCB1, "Addr :");
+		SCTPDBG(SCTP_DEBUG_PCB1, "Addr: ");
 		SCTPDBG_ADDR(SCTP_DEBUG_PCB1, addr);
 	}
 #endif
@@ -2867,7 +2913,6 @@
 					return (EINVAL);
 				}
 				lport = sin6->sin6_port;
-
 				/*
 				 * For LOOPBACK the prison_local_ip6() call
 				 * will transmute the ipv6 address to the
@@ -2905,7 +2950,7 @@
 	SCTP_INP_INCR_REF(inp);
 	if (lport) {
 		/*
-		 * Did the caller specify a port? if so we must see if a ep
+		 * Did the caller specify a port? if so we must see if an ep
 		 * already has this one bound.
 		 */
 		/* got to be root to get at low ports */
@@ -2919,13 +2964,6 @@
 				return (error);
 			}
 		}
-		if (p == NULL) {
-			SCTP_INP_DECR_REF(inp);
-			SCTP_INP_WUNLOCK(inp);
-			SCTP_INP_INFO_WUNLOCK();
-			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, error);
-			return (error);
-		}
 		SCTP_INP_WUNLOCK(inp);
 		if (bindall) {
 			vrf_id = inp->def_vrf_id;
@@ -2986,8 +3024,7 @@
 		if (bindall) {
 			/* verify that no lport is not used by a singleton */
 			if ((port_reuse_active == 0) &&
-			    (inp_tmp = sctp_isport_inuse(inp, lport, vrf_id))
-			    ) {
+			    (inp_tmp = sctp_isport_inuse(inp, lport, vrf_id))) {
 				/* Sorry someone already has this one bound */
 				if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE)) &&
 				    (sctp_is_feature_on(inp_tmp, SCTP_PCB_FLAGS_PORTREUSE))) {
@@ -3119,31 +3156,21 @@
 		 * too (before adding).
 		 */
 		struct sctp_ifa *ifa;
-		struct sockaddr_storage store_sa;
+		union sctp_sockstore store;
 
-		memset(&store_sa, 0, sizeof(store_sa));
+		memset(&store, 0, sizeof(store));
 		switch (addr->sa_family) {
 #ifdef INET
 		case AF_INET:
-			{
-				struct sockaddr_in *sin;
-
-				sin = (struct sockaddr_in *)&store_sa;
-				memcpy(sin, addr, sizeof(struct sockaddr_in));
-				sin->sin_port = 0;
-				break;
-			}
+			memcpy(&store.sin, addr, sizeof(struct sockaddr_in));
+			store.sin.sin_port = 0;
+			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
-			{
-				struct sockaddr_in6 *sin6;
-
-				sin6 = (struct sockaddr_in6 *)&store_sa;
-				memcpy(sin6, addr, sizeof(struct sockaddr_in6));
-				sin6->sin6_port = 0;
-				break;
-			}
+			memcpy(&store.sin6, addr, sizeof(struct sockaddr_in6));
+			store.sin6.sin6_port = 0;
+			break;
 #endif
 		default:
 			break;
@@ -3153,15 +3180,15 @@
 		 * zero out the port to find the address! yuck! can't do
 		 * this earlier since need port for sctp_pcb_findep()
 		 */
-		if (sctp_ifap != NULL)
+		if (sctp_ifap != NULL) {
 			ifa = sctp_ifap;
-		else {
+		} else {
 			/*
 			 * Note for BSD we hit here always other O/S's will
 			 * pass things in via the sctp_ifap argument
 			 * (Panda).
 			 */
-			ifa = sctp_find_ifa_by_addr((struct sockaddr *)&store_sa,
+			ifa = sctp_find_ifa_by_addr(&store.sa,
 			    vrf_id, SCTP_ADDR_NOT_LOCKED);
 		}
 		if (ifa == NULL) {
@@ -3210,7 +3237,7 @@
 	/* put it in the bucket */
 	LIST_INSERT_HEAD(head, inp, sctp_hash);
 	SCTPDBG(SCTP_DEBUG_PCB1, "Main hash to bind at head:%p, bound port:%d - in tcp_pool=%d\n",
-	    head, ntohs(lport), port_reuse_active);
+	    (void *)head, ntohs(lport), port_reuse_active);
 	/* set in the port */
 	inp->sctp_lport = lport;
 
@@ -3404,23 +3431,7 @@
 				/* Left with Data unread */
 				struct mbuf *op_err;
 
-				op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
-				    0, M_DONTWAIT, 1, MT_DATA);
-				if (op_err) {
-					/* Fill in the user initiated abort */
-					struct sctp_paramhdr *ph;
-					uint32_t *ippp;
-
-					SCTP_BUF_LEN(op_err) =
-					    sizeof(struct sctp_paramhdr) + sizeof(uint32_t);
-					ph = mtod(op_err,
-					    struct sctp_paramhdr *);
-					ph->param_type = htons(
-					    SCTP_CAUSE_USER_INITIATED_ABT);
-					ph->param_length = htons(SCTP_BUF_LEN(op_err));
-					ippp = (uint32_t *) (ph + 1);
-					*ippp = htonl(SCTP_FROM_SCTP_PCB + SCTP_LOC_3);
-				}
+				op_err = sctp_generate_cause(SCTP_CAUSE_USER_INITIATED_ABT, "");
 				asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_3;
 				sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED);
 				SCTP_STAT_INCR_COUNTER32(sctps_aborted);
@@ -3435,8 +3446,7 @@
 				continue;
 			} else if (TAILQ_EMPTY(&asoc->asoc.send_queue) &&
 				    TAILQ_EMPTY(&asoc->asoc.sent_queue) &&
-				    (asoc->asoc.stream_queue_cnt == 0)
-			    ) {
+			    (asoc->asoc.stream_queue_cnt == 0)) {
 				if (asoc->asoc.locked_on_sending) {
 					goto abort_anyway;
 				}
@@ -3444,16 +3454,10 @@
 				    (SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
 					struct sctp_nets *netp;
 
-					if (asoc->asoc.alternate) {
-						netp = asoc->asoc.alternate;
-					} else {
-						netp = asoc->asoc.primary_destination;
-					}
 					/*
 					 * there is nothing queued to send,
 					 * so I send shutdown
 					 */
-					sctp_send_shutdown(asoc, netp);
 					if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) ||
 					    (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
 						SCTP_STAT_DECR_GAUGE32(sctps_currestab);
@@ -3460,6 +3464,13 @@
 					}
 					SCTP_SET_STATE(&asoc->asoc, SCTP_STATE_SHUTDOWN_SENT);
 					SCTP_CLEAR_SUBSTATE(&asoc->asoc, SCTP_STATE_SHUTDOWN_PENDING);
+					sctp_stop_timers_for_shutdown(asoc);
+					if (asoc->asoc.alternate) {
+						netp = asoc->asoc.alternate;
+					} else {
+						netp = asoc->asoc.primary_destination;
+					}
+					sctp_send_shutdown(asoc, netp);
 					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, asoc->sctp_ep, asoc,
 					    netp);
 					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, asoc->sctp_ep, asoc,
@@ -3478,7 +3489,7 @@
 					    sctp_streamhead);
 					if (sp == NULL) {
 						SCTP_PRINTF("Error, sp is NULL, locked on sending is %p strm:%d\n",
-						    asoc->asoc.locked_on_sending,
+						    (void *)asoc->asoc.locked_on_sending,
 						    asoc->asoc.locked_on_sending->stream_no);
 					} else {
 						if ((sp->length == 0) && (sp->msg_is_complete == 0))
@@ -3491,27 +3502,7 @@
 					struct mbuf *op_err;
 
 			abort_anyway:
-					op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
-					    0, M_DONTWAIT, 1, MT_DATA);
-					if (op_err) {
-						/*
-						 * Fill in the user
-						 * initiated abort
-						 */
-						struct sctp_paramhdr *ph;
-						uint32_t *ippp;
-
-						SCTP_BUF_LEN(op_err) =
-						    (sizeof(struct sctp_paramhdr) +
-						    sizeof(uint32_t));
-						ph = mtod(op_err,
-						    struct sctp_paramhdr *);
-						ph->param_type = htons(
-						    SCTP_CAUSE_USER_INITIATED_ABT);
-						ph->param_length = htons(SCTP_BUF_LEN(op_err));
-						ippp = (uint32_t *) (ph + 1);
-						*ippp = htonl(SCTP_FROM_SCTP_PCB + SCTP_LOC_5);
-					}
+					op_err = sctp_generate_cause(SCTP_CAUSE_USER_INITIATED_ABT, "");
 					asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_5;
 					sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED);
 					SCTP_STAT_INCR_COUNTER32(sctps_aborted);
@@ -3574,24 +3565,8 @@
 		if ((SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_COOKIE_WAIT) &&
 		    ((asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0)) {
 			struct mbuf *op_err;
-			uint32_t *ippp;
 
-			op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
-			    0, M_DONTWAIT, 1, MT_DATA);
-			if (op_err) {
-				/* Fill in the user initiated abort */
-				struct sctp_paramhdr *ph;
-
-				SCTP_BUF_LEN(op_err) = (sizeof(struct sctp_paramhdr) +
-				    sizeof(uint32_t));
-				ph = mtod(op_err, struct sctp_paramhdr *);
-				ph->param_type = htons(
-				    SCTP_CAUSE_USER_INITIATED_ABT);
-				ph->param_length = htons(SCTP_BUF_LEN(op_err));
-				ippp = (uint32_t *) (ph + 1);
-				*ippp = htonl(SCTP_FROM_SCTP_PCB + SCTP_LOC_7);
-
-			}
+			op_err = sctp_generate_cause(SCTP_CAUSE_USER_INITIATED_ABT, "");
 			asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_7;
 			sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED);
 			SCTP_STAT_INCR_COUNTER32(sctps_aborted);
@@ -3604,7 +3579,8 @@
 		    (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
 			SCTP_STAT_DECR_GAUGE32(sctps_currestab);
 		}
-		if (sctp_free_assoc(inp, asoc, SCTP_PCBFREE_FORCE, SCTP_FROM_SCTP_PCB + SCTP_LOC_8) == 0) {
+		if (sctp_free_assoc(inp, asoc, SCTP_PCBFREE_FORCE,
+		    SCTP_FROM_SCTP_PCB + SCTP_LOC_8) == 0) {
 			cnt++;
 		}
 	}
@@ -3698,13 +3674,9 @@
 	 * macro here since le_next will get freed as part of the
 	 * sctp_free_assoc() call.
 	 */
-	if (so) {
 #ifdef IPSEC
-		ipsec_delete_pcbpolicy(ip_pcb);
-#endif				/* IPSEC */
-
-		/* Unlocks not needed since the socket is gone now */
-	}
+	ipsec_delete_pcbpolicy(ip_pcb);
+#endif
 	if (ip_pcb->inp_options) {
 		(void)sctp_m_free(ip_pcb->inp_options);
 		ip_pcb->inp_options = 0;
@@ -3754,6 +3726,7 @@
 		inp->sctp_tcbhash = NULL;
 	}
 	/* Now we must put the ep memory back into the zone pool */
+	crfree(inp->ip_inp.inp.inp_cred);
 	INP_LOCK_DESTROY(&inp->ip_inp.inp);
 	SCTP_INP_LOCK_DESTROY(inp);
 	SCTP_INP_READ_DESTROY(inp);
@@ -3850,17 +3823,13 @@
 			/* assure len is set */
 			sin->sin_len = sizeof(struct sockaddr_in);
 			if (set_scope) {
-#ifdef SCTP_DONT_DO_PRIVADDR_SCOPE
-				stcb->ipv4_local_scope = 1;
-#else
 				if (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
-					stcb->asoc.ipv4_local_scope = 1;
+					stcb->asoc.scope.ipv4_local_scope = 1;
 				}
-#endif				/* SCTP_DONT_DO_PRIVADDR_SCOPE */
 			} else {
 				/* Validate the address is in scope */
 				if ((IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) &&
-				    (stcb->asoc.ipv4_local_scope == 0)) {
+				    (stcb->asoc.scope.ipv4_local_scope == 0)) {
 					addr_inscope = 0;
 				}
 			}
@@ -3881,10 +3850,10 @@
 			sin6->sin6_len = sizeof(struct sockaddr_in6);
 			if (set_scope) {
 				if (sctp_is_address_on_local_host(newaddr, stcb->asoc.vrf_id)) {
-					stcb->asoc.loopback_scope = 1;
-					stcb->asoc.local_scope = 0;
-					stcb->asoc.ipv4_local_scope = 1;
-					stcb->asoc.site_scope = 1;
+					stcb->asoc.scope.loopback_scope = 1;
+					stcb->asoc.scope.local_scope = 0;
+					stcb->asoc.scope.ipv4_local_scope = 1;
+					stcb->asoc.scope.site_scope = 1;
 				} else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
 					/*
 					 * If the new destination is a
@@ -3896,8 +3865,8 @@
 					 * also be on our private network
 					 * for v4 too.
 					 */
-					stcb->asoc.ipv4_local_scope = 1;
-					stcb->asoc.site_scope = 1;
+					stcb->asoc.scope.ipv4_local_scope = 1;
+					stcb->asoc.scope.site_scope = 1;
 				} else if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) {
 					/*
 					 * If the new destination is
@@ -3904,18 +3873,18 @@
 					 * SITE_LOCAL then we must have site
 					 * scope in common.
 					 */
-					stcb->asoc.site_scope = 1;
+					stcb->asoc.scope.site_scope = 1;
 				}
 			} else {
 				/* Validate the address is in scope */
 				if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr) &&
-				    (stcb->asoc.loopback_scope == 0)) {
+				    (stcb->asoc.scope.loopback_scope == 0)) {
 					addr_inscope = 0;
 				} else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
-				    (stcb->asoc.local_scope == 0)) {
+				    (stcb->asoc.scope.local_scope == 0)) {
 					addr_inscope = 0;
 				} else if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
-				    (stcb->asoc.site_scope == 0)) {
+				    (stcb->asoc.scope.site_scope == 0)) {
 					addr_inscope = 0;
 				}
 			}
@@ -3950,10 +3919,10 @@
 	}
 	net->addr_is_local = sctp_is_address_on_local_host(newaddr, stcb->asoc.vrf_id);
 	if (net->addr_is_local && ((set_scope || (from == SCTP_ADDR_IS_CONFIRMED)))) {
-		stcb->asoc.loopback_scope = 1;
-		stcb->asoc.ipv4_local_scope = 1;
-		stcb->asoc.local_scope = 0;
-		stcb->asoc.site_scope = 1;
+		stcb->asoc.scope.loopback_scope = 1;
+		stcb->asoc.scope.ipv4_local_scope = 1;
+		stcb->asoc.scope.local_scope = 0;
+		stcb->asoc.scope.site_scope = 1;
 		addr_inscope = 1;
 	}
 	net->failure_threshold = stcb->asoc.def_net_failure;
@@ -4011,7 +3980,9 @@
 		sin6->sin6_scope_id = 0;
 	}
 #endif
-	SCTP_RTALLOC((sctp_route_t *) & net->ro, stcb->asoc.vrf_id);
+	SCTP_RTALLOC((sctp_route_t *) & net->ro,
+	    stcb->asoc.vrf_id,
+	    stcb->sctp_ep->fibnum);
 
 	if (SCTP_ROUTE_HAS_VALID_IFN(&net->ro)) {
 		/* Get source address */
@@ -4021,9 +3992,14 @@
 		    net,
 		    0,
 		    stcb->asoc.vrf_id);
-		/* Now get the interface MTU */
-		if (net->ro._s_addr && net->ro._s_addr->ifn_p) {
-			net->mtu = SCTP_GATHER_MTU_FROM_INTFC(net->ro._s_addr->ifn_p);
+		if (net->ro._s_addr != NULL) {
+			net->src_addr_selected = 1;
+			/* Now get the interface MTU */
+			if (net->ro._s_addr->ifn_p != NULL) {
+				net->mtu = SCTP_GATHER_MTU_FROM_INTFC(net->ro._s_addr->ifn_p);
+			}
+		} else {
+			net->src_addr_selected = 0;
 		}
 		if (net->mtu > 0) {
 			uint32_t rmtu;
@@ -4045,6 +4021,8 @@
 				net->mtu = rmtu;
 			}
 		}
+	} else {
+		net->src_addr_selected = 0;
 	}
 	if (net->mtu == 0) {
 		switch (newaddr->sa_family) {
@@ -4062,9 +4040,11 @@
 			break;
 		}
 	}
+#if defined(INET) || defined(INET6)
 	if (net->port) {
 		net->mtu -= (uint32_t) sizeof(struct udphdr);
 	}
+#endif
 	if (from == SCTP_ALLOC_ASOC) {
 		stcb->asoc.smallest_mtu = net->mtu;
 	}
@@ -4090,14 +4070,11 @@
 	 */
 	net->find_pseudo_cumack = 1;
 	net->find_rtx_pseudo_cumack = 1;
-	net->src_addr_selected = 0;
 	/* Choose an initial flowid. */
 	net->flowid = stcb->asoc.my_vtag ^
 	    ntohs(stcb->rport) ^
 	    ntohs(stcb->sctp_ep->sctp_lport);
-#ifdef INVARIANTS
-	net->flowidset = 1;
-#endif
+	net->flowtype = M_HASHTYPE_OPAQUE;
 	if (netp) {
 		*netp = net;
 	}
@@ -4218,6 +4195,7 @@
 struct sctp_tcb *
 sctp_aloc_assoc(struct sctp_inpcb *inp, struct sockaddr *firstaddr,
     int *error, uint32_t override_tag, uint32_t vrf_id,
+    uint16_t o_streams,
     struct thread *p
 )
 {
@@ -4376,7 +4354,7 @@
 	/* setup back pointer's */
 	stcb->sctp_ep = inp;
 	stcb->sctp_socket = inp->sctp_socket;
-	if ((err = sctp_init_asoc(inp, stcb, override_tag, vrf_id))) {
+	if ((err = sctp_init_asoc(inp, stcb, override_tag, vrf_id, o_streams))) {
 		/* failed */
 		SCTP_TCB_LOCK_DESTROY(stcb);
 		SCTP_TCB_SEND_LOCK_DESTROY(stcb);
@@ -4425,6 +4403,7 @@
 			asoc->nr_mapping_array = NULL;
 		}
 		SCTP_DECR_ASOC_COUNT();
+		SCTP_TCB_UNLOCK(stcb);
 		SCTP_TCB_LOCK_DESTROY(stcb);
 		SCTP_TCB_SEND_LOCK_DESTROY(stcb);
 		LIST_REMOVE(stcb, sctp_tcbasocidhash);
@@ -4451,7 +4430,7 @@
 		LIST_INSERT_HEAD(head, stcb, sctp_tcbhash);
 	}
 	SCTP_INP_WUNLOCK(inp);
-	SCTPDBG(SCTP_DEBUG_PCB1, "Association %p now allocated\n", stcb);
+	SCTPDBG(SCTP_DEBUG_PCB1, "Association %p now allocated\n", (void *)stcb);
 	return (stcb);
 }
 
@@ -4560,23 +4539,21 @@
 	int i;
 
 	chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
-	if (!LIST_EMPTY(chain)) {
-		LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
-			for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
-				if ((twait_block->vtag_block[i].v_tag == tag) &&
-				    (twait_block->vtag_block[i].lport == lport) &&
-				    (twait_block->vtag_block[i].rport == rport)) {
-					twait_block->vtag_block[i].tv_sec_at_expire = 0;
-					twait_block->vtag_block[i].v_tag = 0;
-					twait_block->vtag_block[i].lport = 0;
-					twait_block->vtag_block[i].rport = 0;
-					found = 1;
-					break;
-				}
+	LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
+		for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
+			if ((twait_block->vtag_block[i].v_tag == tag) &&
+			    (twait_block->vtag_block[i].lport == lport) &&
+			    (twait_block->vtag_block[i].rport == rport)) {
+				twait_block->vtag_block[i].tv_sec_at_expire = 0;
+				twait_block->vtag_block[i].v_tag = 0;
+				twait_block->vtag_block[i].lport = 0;
+				twait_block->vtag_block[i].rport = 0;
+				found = 1;
+				break;
 			}
-			if (found)
-				break;
 		}
+		if (found)
+			break;
 	}
 }
 
@@ -4590,19 +4567,17 @@
 
 	SCTP_INP_INFO_WLOCK();
 	chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
-	if (!LIST_EMPTY(chain)) {
-		LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
-			for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
-				if ((twait_block->vtag_block[i].v_tag == tag) &&
-				    (twait_block->vtag_block[i].lport == lport) &&
-				    (twait_block->vtag_block[i].rport == rport)) {
-					found = 1;
-					break;
-				}
+	LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
+		for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
+			if ((twait_block->vtag_block[i].v_tag == tag) &&
+			    (twait_block->vtag_block[i].lport == lport) &&
+			    (twait_block->vtag_block[i].rport == rport)) {
+				found = 1;
+				break;
 			}
-			if (found)
-				break;
 		}
+		if (found)
+			break;
 	}
 	SCTP_INP_INFO_WUNLOCK();
 	return (found);
@@ -4624,43 +4599,41 @@
 	(void)SCTP_GETTIME_TIMEVAL(&now);
 	chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
 	set = 0;
-	if (!LIST_EMPTY(chain)) {
+	LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
 		/* Block(s) present, lets find space, and expire on the fly */
-		LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
-			for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
-				if ((twait_block->vtag_block[i].v_tag == 0) &&
-				    !set) {
-					twait_block->vtag_block[i].tv_sec_at_expire =
-					    now.tv_sec + time;
+		for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
+			if ((twait_block->vtag_block[i].v_tag == 0) &&
+			    !set) {
+				twait_block->vtag_block[i].tv_sec_at_expire =
+				    now.tv_sec + time;
+				twait_block->vtag_block[i].v_tag = tag;
+				twait_block->vtag_block[i].lport = lport;
+				twait_block->vtag_block[i].rport = rport;
+				set = 1;
+			} else if ((twait_block->vtag_block[i].v_tag) &&
+			    ((long)twait_block->vtag_block[i].tv_sec_at_expire < now.tv_sec)) {
+				/* Audit expires this guy */
+				twait_block->vtag_block[i].tv_sec_at_expire = 0;
+				twait_block->vtag_block[i].v_tag = 0;
+				twait_block->vtag_block[i].lport = 0;
+				twait_block->vtag_block[i].rport = 0;
+				if (set == 0) {
+					/* Reuse it for my new tag */
+					twait_block->vtag_block[i].tv_sec_at_expire = now.tv_sec + time;
 					twait_block->vtag_block[i].v_tag = tag;
 					twait_block->vtag_block[i].lport = lport;
 					twait_block->vtag_block[i].rport = rport;
 					set = 1;
-				} else if ((twait_block->vtag_block[i].v_tag) &&
-				    ((long)twait_block->vtag_block[i].tv_sec_at_expire < now.tv_sec)) {
-					/* Audit expires this guy */
-					twait_block->vtag_block[i].tv_sec_at_expire = 0;
-					twait_block->vtag_block[i].v_tag = 0;
-					twait_block->vtag_block[i].lport = 0;
-					twait_block->vtag_block[i].rport = 0;
-					if (set == 0) {
-						/* Reuse it for my new tag */
-						twait_block->vtag_block[i].tv_sec_at_expire = now.tv_sec + time;
-						twait_block->vtag_block[i].v_tag = tag;
-						twait_block->vtag_block[i].lport = lport;
-						twait_block->vtag_block[i].rport = rport;
-						set = 1;
-					}
 				}
 			}
-			if (set) {
-				/*
-				 * We only do up to the block where we can
-				 * place our tag for audits
-				 */
-				break;
-			}
 		}
+		if (set) {
+			/*
+			 * We only do up to the block where we can place our
+			 * tag for audits
+			 */
+			break;
+		}
 	}
 	/* Need to add a new block to chain */
 	if (!set) {
@@ -5038,6 +5011,13 @@
 	}
 	/* pending send queue SHOULD be empty */
 	TAILQ_FOREACH_SAFE(chk, &asoc->send_queue, sctp_next, nchk) {
+		if (asoc->strmout[chk->rec.data.stream_number].chunks_on_queues > 0) {
+			asoc->strmout[chk->rec.data.stream_number].chunks_on_queues--;
+#ifdef INVARIANTS
+		} else {
+			panic("No chunks on the queues for sid %u.", chk->rec.data.stream_number);
+#endif
+		}
 		TAILQ_REMOVE(&asoc->send_queue, chk, sctp_next);
 		if (chk->data) {
 			if (so) {
@@ -5062,6 +5042,15 @@
 	}
 	/* sent queue SHOULD be empty */
 	TAILQ_FOREACH_SAFE(chk, &asoc->sent_queue, sctp_next, nchk) {
+		if (chk->sent != SCTP_DATAGRAM_NR_ACKED) {
+			if (asoc->strmout[chk->rec.data.stream_number].chunks_on_queues > 0) {
+				asoc->strmout[chk->rec.data.stream_number].chunks_on_queues--;
+#ifdef INVARIANTS
+			} else {
+				panic("No chunks on the queues for sid %u.", chk->rec.data.stream_number);
+#endif
+			}
+		}
 		TAILQ_REMOVE(&asoc->sent_queue, chk, sctp_next);
 		if (chk->data) {
 			if (so) {
@@ -5081,6 +5070,13 @@
 		SCTP_DECR_CHK_COUNT();
 		/* sa_ignore FREED_MEMORY */
 	}
+#ifdef INVARIANTS
+	for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+		if (stcb->asoc.strmout[i].chunks_on_queues > 0) {
+			panic("%u chunks left for stream %u.", stcb->asoc.strmout[i].chunks_on_queues, i);
+		}
+	}
+#endif
 	/* control queue MAY not be empty */
 	TAILQ_FOREACH_SAFE(chk, &asoc->control_send_queue, sctp_next, nchk) {
 		TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next);
@@ -5210,6 +5206,7 @@
 	/* Insert new items here :> */
 
 	/* Get rid of LOCK */
+	SCTP_TCB_UNLOCK(stcb);
 	SCTP_TCB_LOCK_DESTROY(stcb);
 	SCTP_TCB_SEND_LOCK_DESTROY(stcb);
 	if (from_inpcbfree == SCTP_NORMAL_PROC) {
@@ -5334,7 +5331,7 @@
 	LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
 		if (laddr->ifa == NULL) {
 			SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n",
-			    __FUNCTION__);
+			    __func__);
 			continue;
 		}
 		if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) {
@@ -5440,7 +5437,7 @@
 
 
 /*
- * Delete the address from the endpoint local address list There is nothing
+ * Delete the address from the endpoint local address list. There is nothing
  * to be done if we are bound to all addresses
  */
 void
@@ -5491,8 +5488,7 @@
 			 * to laddr
 			 */
 			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
-				if (net->ro._s_addr &&
-				    (net->ro._s_addr->ifa == laddr->ifa)) {
+				if (net->ro._s_addr == laddr->ifa) {
 					/* Yep, purge src address selected */
 					sctp_rtentry_t *rt;
 
@@ -5556,46 +5552,6 @@
 }
 
 /*
- * insert an laddr entry with the given ifa for the desired list
- */
-int
-sctp_insert_laddr(struct sctpladdr *list, struct sctp_ifa *ifa, uint32_t act)
-{
-	struct sctp_laddr *laddr;
-
-	laddr = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_laddr), struct sctp_laddr);
-	if (laddr == NULL) {
-		/* out of memory? */
-		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
-		return (EINVAL);
-	}
-	SCTP_INCR_LADDR_COUNT();
-	bzero(laddr, sizeof(*laddr));
-	(void)SCTP_GETTIME_TIMEVAL(&laddr->start_time);
-	laddr->ifa = ifa;
-	laddr->action = act;
-	atomic_add_int(&ifa->refcount, 1);
-	/* insert it */
-	LIST_INSERT_HEAD(list, laddr, sctp_nxt_addr);
-
-	return (0);
-}
-
-/*
- * Remove an laddr entry from the local address list (on an assoc)
- */
-void
-sctp_remove_laddr(struct sctp_laddr *laddr)
-{
-
-	/* remove from the list */
-	LIST_REMOVE(laddr, sctp_nxt_addr);
-	sctp_free_ifa(laddr->ifa);
-	SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_laddr), laddr);
-	SCTP_DECR_LADDR_COUNT();
-}
-
-/*
  * Remove a local address from the TCB local address restricted list
  */
 void
@@ -5935,7 +5891,6 @@
 	for (i = 0; i < SCTP_STACK_VTAG_HASH_SIZE; i++) {
 		LIST_INIT(&SCTP_BASE_INFO(vtag_timewait)[i]);
 	}
-
 	sctp_startup_iterator();
 
 #if defined(__FreeBSD__) && defined(SCTP_MCORE_INPUT) && defined(SMP)
@@ -5964,40 +5919,59 @@
 	struct sctp_tagblock *twait_block, *prev_twait_block;
 	struct sctp_laddr *wi, *nwi;
 	int i;
+	struct sctp_iterator *it, *nit;
 
+	if (SCTP_BASE_VAR(sctp_pcb_initialized) == 0) {
+		SCTP_PRINTF("%s: race condition on teardown.\n", __func__);
+		return;
+	}
+	SCTP_BASE_VAR(sctp_pcb_initialized) = 0;
 	/*
-	 * Free BSD the it thread never exits but we do clean up. The only
-	 * way freebsd reaches here if we have VRF's but we still add the
-	 * ifdef to make it compile on old versions.
+	 * In FreeBSD the iterator thread never exits but we do clean up.
+	 * The only way FreeBSD reaches here is if we have VRF's but we
+	 * still add the ifdef to make it compile on old versions.
 	 */
-	{
-		struct sctp_iterator *it, *nit;
-
-		SCTP_IPI_ITERATOR_WQ_LOCK();
-		TAILQ_FOREACH_SAFE(it, &sctp_it_ctl.iteratorhead, sctp_nxt_itr, nit) {
-			if (it->vn != curvnet) {
-				continue;
-			}
-			TAILQ_REMOVE(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr);
-			if (it->function_atend != NULL) {
-				(*it->function_atend) (it->pointer, it->val);
-			}
-			SCTP_FREE(it, SCTP_M_ITER);
-		}
+retry:
+	SCTP_IPI_ITERATOR_WQ_LOCK();
+	/*
+	 * sctp_iterator_worker() might be working on an it entry without
+	 * holding the lock.  We won't find it on the list either and
+	 * continue and free/destroy it.  While holding the lock, spin, to
+	 * avoid the race condition as sctp_iterator_worker() will have to
+	 * wait to re-aquire the lock.
+	 */
+	if (sctp_it_ctl.iterator_running != 0 || sctp_it_ctl.cur_it != NULL) {
 		SCTP_IPI_ITERATOR_WQ_UNLOCK();
-		SCTP_ITERATOR_LOCK();
-		if ((sctp_it_ctl.cur_it) &&
-		    (sctp_it_ctl.cur_it->vn == curvnet)) {
-			sctp_it_ctl.iterator_flags |= SCTP_ITERATOR_STOP_CUR_IT;
+		SCTP_PRINTF("%s: Iterator running while we held the lock. Retry. "
+		    "cur_it=%p\n", __func__, sctp_it_ctl.cur_it);
+		DELAY(10);
+		goto retry;
+	}
+	TAILQ_FOREACH_SAFE(it, &sctp_it_ctl.iteratorhead, sctp_nxt_itr, nit) {
+		if (it->vn != curvnet) {
+			continue;
 		}
-		SCTP_ITERATOR_UNLOCK();
+		TAILQ_REMOVE(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr);
+		if (it->function_atend != NULL) {
+			(*it->function_atend) (it->pointer, it->val);
+		}
+		SCTP_FREE(it, SCTP_M_ITER);
 	}
-
-	SCTP_OS_TIMER_STOP(&SCTP_BASE_INFO(addr_wq_timer.timer));
+	SCTP_IPI_ITERATOR_WQ_UNLOCK();
+	SCTP_ITERATOR_LOCK();
+	if ((sctp_it_ctl.cur_it) &&
+	    (sctp_it_ctl.cur_it->vn == curvnet)) {
+		sctp_it_ctl.iterator_flags |= SCTP_ITERATOR_STOP_CUR_IT;
+	}
+	SCTP_ITERATOR_UNLOCK();
+	SCTP_OS_TIMER_STOP_DRAIN(&SCTP_BASE_INFO(addr_wq_timer.timer));
 	SCTP_WQ_ADDR_LOCK();
 	LIST_FOREACH_SAFE(wi, &SCTP_BASE_INFO(addr_wq), sctp_nxt_addr, nwi) {
 		LIST_REMOVE(wi, sctp_nxt_addr);
 		SCTP_DECR_LADDR_COUNT();
+		if (wi->action == SCTP_DEL_IP_ADDRESS) {
+			SCTP_FREE(wi->ifa, SCTP_M_IFA);
+		}
 		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_laddr), wi);
 	}
 	SCTP_WQ_ADDR_UNLOCK();
@@ -6057,6 +6031,14 @@
 
 	SCTP_WQ_ADDR_DESTROY();
 
+	/* Get rid of other stuff too. */
+	if (SCTP_BASE_INFO(sctp_asochash) != NULL)
+		SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_asochash), SCTP_BASE_INFO(hashasocmark));
+	if (SCTP_BASE_INFO(sctp_ephash) != NULL)
+		SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_ephash), SCTP_BASE_INFO(hashmark));
+	if (SCTP_BASE_INFO(sctp_tcpephash) != NULL)
+		SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_tcpephash), SCTP_BASE_INFO(hashtcpmark));
+
 	SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_ep));
 	SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_asoc));
 	SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_laddr));
@@ -6066,13 +6048,6 @@
 	SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_strmoq));
 	SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_asconf));
 	SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_asconf_ack));
-	/* Get rid of other stuff to */
-	if (SCTP_BASE_INFO(sctp_asochash) != NULL)
-		SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_asochash), SCTP_BASE_INFO(hashasocmark));
-	if (SCTP_BASE_INFO(sctp_ephash) != NULL)
-		SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_ephash), SCTP_BASE_INFO(hashmark));
-	if (SCTP_BASE_INFO(sctp_tcpephash) != NULL)
-		SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_tcpephash), SCTP_BASE_INFO(hashtcpmark));
 #if defined(__FreeBSD__) && defined(SMP) && defined(SCTP_USE_PERCPU_STAT)
 	SCTP_FREE(SCTP_BASE_STATS, SCTP_M_MCORE);
 #endif
@@ -6081,7 +6056,8 @@
 
 int
 sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
-    int offset, int limit, struct sctphdr *sh,
+    int offset, int limit,
+    struct sockaddr *src, struct sockaddr *dst,
     struct sockaddr *altsa)
 {
 	/*
@@ -6093,13 +6069,10 @@
 	 */
 	struct sctp_inpcb *inp;
 	struct sctp_nets *net, *nnet, *net_tmp;
-	struct ip *iph;
 	struct sctp_paramhdr *phdr, parm_buf;
 	struct sctp_tcb *stcb_tmp;
 	uint16_t ptype, plen;
 	struct sockaddr *sa;
-	struct sockaddr_storage dest_store;
-	struct sockaddr *local_sa = (struct sockaddr *)&dest_store;
 	uint8_t random_store[SCTP_PARAM_BUFFER_SIZE];
 	struct sctp_auth_random *p_random = NULL;
 	uint16_t random_len = 0;
@@ -6114,7 +6087,14 @@
 	sctp_key_t *new_key;
 	uint32_t keylen;
 	int got_random = 0, got_hmacs = 0, got_chklist = 0;
-	uint8_t ecn_allowed;
+	uint8_t peer_supports_ecn;
+	uint8_t peer_supports_prsctp;
+	uint8_t peer_supports_auth;
+	uint8_t peer_supports_asconf;
+	uint8_t peer_supports_asconf_ack;
+	uint8_t peer_supports_reconfig;
+	uint8_t peer_supports_nrsack;
+	uint8_t peer_supports_pktdrop;
 
 #ifdef INET
 	struct sockaddr_in sin;
@@ -6138,68 +6118,18 @@
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
 	sin6.sin6_port = stcb->rport;
 #endif
-	iph = mtod(m, struct ip *);
-	switch (iph->ip_v) {
-#ifdef INET
-	case IPVERSION:
-		{
-			/* its IPv4 */
-			struct sockaddr_in *sin_2;
-
-			sin_2 = (struct sockaddr_in *)(local_sa);
-			memset(sin_2, 0, sizeof(sin));
-			sin_2->sin_family = AF_INET;
-			sin_2->sin_len = sizeof(sin);
-			sin_2->sin_port = sh->dest_port;
-			sin_2->sin_addr.s_addr = iph->ip_dst.s_addr;
-			if (altsa) {
-				/*
-				 * For cookies we use the src address NOT
-				 * from the packet but from the original
-				 * INIT.
-				 */
-				sa = altsa;
-			} else {
-				sin.sin_addr = iph->ip_src;
-				sa = (struct sockaddr *)&sin;
-			}
-			break;
-		}
-#endif
-#ifdef INET6
-	case IPV6_VERSION >> 4:
-		{
-			/* its IPv6 */
-			struct ip6_hdr *ip6;
-			struct sockaddr_in6 *sin6_2;
-
-			ip6 = mtod(m, struct ip6_hdr *);
-			sin6_2 = (struct sockaddr_in6 *)(local_sa);
-			memset(sin6_2, 0, sizeof(sin6));
-			sin6_2->sin6_family = AF_INET6;
-			sin6_2->sin6_len = sizeof(struct sockaddr_in6);
-			sin6_2->sin6_port = sh->dest_port;
-			sin6_2->sin6_addr = ip6->ip6_dst;
-			if (altsa) {
-				/*
-				 * For cookies we use the src address NOT
-				 * from the packet but from the original
-				 * INIT.
-				 */
-				sa = altsa;
-			} else {
-				sin6.sin6_addr = ip6->ip6_src;
-				sa = (struct sockaddr *)&sin6;
-			}
-			break;
-		}
-#endif
-	default:
-		return (-1);
-		break;
+	if (altsa) {
+		sa = altsa;
+	} else {
+		sa = src;
 	}
-	/* Turn off ECN until we get through all params */
-	ecn_allowed = 0;
+	peer_supports_ecn = 0;
+	peer_supports_prsctp = 0;
+	peer_supports_auth = 0;
+	peer_supports_asconf = 0;
+	peer_supports_reconfig = 0;
+	peer_supports_nrsack = 0;
+	peer_supports_pktdrop = 0;
 	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 		/* mark all addresses that we have currently on the list */
 		net->dest_state |= SCTP_ADDR_NOT_IN_ASSOC;
@@ -6207,7 +6137,7 @@
 	/* does the source address already exist? if so skip it */
 	inp = stcb->sctp_ep;
 	atomic_add_int(&stcb->asoc.refcnt, 1);
-	stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net_tmp, local_sa, stcb);
+	stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net_tmp, dst, stcb);
 	atomic_add_int(&stcb->asoc.refcnt, -1);
 
 	if ((stcb_tmp == NULL && inp == stcb->sctp_ep) || inp == NULL) {
@@ -6216,7 +6146,7 @@
 		switch (sa->sa_family) {
 #ifdef INET
 		case AF_INET:
-			if (stcb->asoc.ipv4_addr_legal) {
+			if (stcb->asoc.scope.ipv4_addr_legal) {
 				if (sctp_add_remote_addr(stcb, sa, NULL, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_2)) {
 					return (-1);
 				}
@@ -6225,7 +6155,7 @@
 #endif
 #ifdef INET6
 		case AF_INET6:
-			if (stcb->asoc.ipv6_addr_legal) {
+			if (stcb->asoc.scope.ipv6_addr_legal) {
 				if (sctp_add_remote_addr(stcb, sa, NULL, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_3)) {
 					return (-2);
 				}
@@ -6249,12 +6179,6 @@
 		/* the assoc was freed? */
 		return (-4);
 	}
-	/*
-	 * peer must explicitly turn this on. This may have been initialized
-	 * to be "on" in order to allow local addr changes while INIT's are
-	 * in flight.
-	 */
-	stcb->asoc.peer_supports_asconf = 0;
 	/* now we must go through each of the params. */
 	phdr = sctp_get_next_param(m, offset, &parm_buf, sizeof(parm_buf));
 	while (phdr) {
@@ -6272,7 +6196,7 @@
 		}
 #ifdef INET
 		if (ptype == SCTP_IPV4_ADDRESS) {
-			if (stcb->asoc.ipv4_addr_legal) {
+			if (stcb->asoc.scope.ipv4_addr_legal) {
 				struct sctp_ipv4addr_param *p4, p4_buf;
 
 				/* ok get the v4 address and check/add */
@@ -6297,7 +6221,7 @@
 				inp = stcb->sctp_ep;
 				atomic_add_int(&stcb->asoc.refcnt, 1);
 				stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net,
-				    local_sa, stcb);
+				    dst, stcb);
 				atomic_add_int(&stcb->asoc.refcnt, -1);
 
 				if ((stcb_tmp == NULL && inp == stcb->sctp_ep) ||
@@ -6337,12 +6261,20 @@
 					 */
 					if (stcb_tmp) {
 						if (SCTP_GET_STATE(&stcb_tmp->asoc) & SCTP_STATE_COOKIE_WAIT) {
+							struct mbuf *op_err;
+							char msg[SCTP_DIAG_INFO_LEN];
+
 							/*
 							 * in setup state we
 							 * abort this guy
 							 */
+							snprintf(msg, sizeof(msg),
+							    "%s:%d at %s", __FILE__, __LINE__, __func__);
+							op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+							    msg);
 							sctp_abort_an_association(stcb_tmp->sctp_ep,
-							    stcb_tmp, NULL, SCTP_SO_NOT_LOCKED);
+							    stcb_tmp, op_err,
+							    SCTP_SO_NOT_LOCKED);
 							goto add_it_now;
 						}
 						SCTP_TCB_UNLOCK(stcb_tmp);
@@ -6358,7 +6290,7 @@
 #endif
 #ifdef INET6
 		if (ptype == SCTP_IPV6_ADDRESS) {
-			if (stcb->asoc.ipv6_addr_legal) {
+			if (stcb->asoc.scope.ipv6_addr_legal) {
 				/* ok get the v6 address and check/add */
 				struct sctp_ipv6addr_param *p6, p6_buf;
 
@@ -6387,7 +6319,7 @@
 				inp = stcb->sctp_ep;
 				atomic_add_int(&stcb->asoc.refcnt, 1);
 				stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net,
-				    local_sa, stcb);
+				    dst, stcb);
 				atomic_add_int(&stcb->asoc.refcnt, -1);
 				if (stcb_tmp == NULL &&
 				    (inp == stcb->sctp_ep || inp == NULL)) {
@@ -6426,18 +6358,26 @@
 					 * strange, address is in another
 					 * assoc? straighten out locks.
 					 */
-					if (stcb_tmp)
+					if (stcb_tmp) {
 						if (SCTP_GET_STATE(&stcb_tmp->asoc) & SCTP_STATE_COOKIE_WAIT) {
+							struct mbuf *op_err;
+							char msg[SCTP_DIAG_INFO_LEN];
+
 							/*
 							 * in setup state we
 							 * abort this guy
 							 */
+							snprintf(msg, sizeof(msg),
+							    "%s:%d at %s", __FILE__, __LINE__, __func__);
+							op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+							    msg);
 							sctp_abort_an_association(stcb_tmp->sctp_ep,
-							    stcb_tmp, NULL, SCTP_SO_NOT_LOCKED);
+							    stcb_tmp, op_err,
+							    SCTP_SO_NOT_LOCKED);
 							goto add_it_now6;
 						}
-					SCTP_TCB_UNLOCK(stcb_tmp);
-
+						SCTP_TCB_UNLOCK(stcb_tmp);
+					}
 					if (stcb->asoc.state == 0) {
 						/* the assoc was freed? */
 						return (-21);
@@ -6448,7 +6388,7 @@
 		} else
 #endif
 		if (ptype == SCTP_ECN_CAPABLE) {
-			ecn_allowed = 1;
+			peer_supports_ecn = 1;
 		} else if (ptype == SCTP_ULP_ADAPTATION) {
 			if (stcb->asoc.state != SCTP_STATE_OPEN) {
 				struct sctp_adaptation_layer_indication ai,
@@ -6472,7 +6412,9 @@
 
 #endif
 
-			stcb->asoc.peer_supports_asconf = 1;
+			if (stcb->asoc.asconf_supported == 0) {
+				return (-100);
+			}
 			if (plen > sizeof(lstore)) {
 				return (-23);
 			}
@@ -6524,7 +6466,7 @@
 			stcb->asoc.peer_supports_nat = 1;
 		} else if (ptype == SCTP_PRSCTP_SUPPORTED) {
 			/* Peer supports pr-sctp */
-			stcb->asoc.peer_supports_prsctp = 1;
+			peer_supports_prsctp = 1;
 		} else if (ptype == SCTP_SUPPORTED_CHUNK_EXT) {
 			/* A supported extension chunk */
 			struct sctp_supported_chunk_types_param *pr_supported;
@@ -6536,34 +6478,30 @@
 			if (phdr == NULL) {
 				return (-25);
 			}
-			stcb->asoc.peer_supports_asconf = 0;
-			stcb->asoc.peer_supports_prsctp = 0;
-			stcb->asoc.peer_supports_pktdrop = 0;
-			stcb->asoc.peer_supports_strreset = 0;
-			stcb->asoc.peer_supports_nr_sack = 0;
-			stcb->asoc.peer_supports_auth = 0;
 			pr_supported = (struct sctp_supported_chunk_types_param *)phdr;
 			num_ent = plen - sizeof(struct sctp_paramhdr);
 			for (i = 0; i < num_ent; i++) {
 				switch (pr_supported->chunk_types[i]) {
 				case SCTP_ASCONF:
+					peer_supports_asconf = 1;
+					break;
 				case SCTP_ASCONF_ACK:
-					stcb->asoc.peer_supports_asconf = 1;
+					peer_supports_asconf_ack = 1;
 					break;
 				case SCTP_FORWARD_CUM_TSN:
-					stcb->asoc.peer_supports_prsctp = 1;
+					peer_supports_prsctp = 1;
 					break;
 				case SCTP_PACKET_DROPPED:
-					stcb->asoc.peer_supports_pktdrop = 1;
+					peer_supports_pktdrop = 1;
 					break;
 				case SCTP_NR_SELECTIVE_ACK:
-					stcb->asoc.peer_supports_nr_sack = 1;
+					peer_supports_nrsack = 1;
 					break;
 				case SCTP_STREAM_RESET:
-					stcb->asoc.peer_supports_strreset = 1;
+					peer_supports_reconfig = 1;
 					break;
 				case SCTP_AUTHENTICATION:
-					stcb->asoc.peer_supports_auth = 1;
+					peer_supports_auth = 1;
 					break;
 				default:
 					/* one I have not learned yet */
@@ -6592,8 +6530,8 @@
 			}
 			got_random = 1;
 		} else if (ptype == SCTP_HMAC_LIST) {
-			int num_hmacs;
-			int i;
+			uint16_t num_hmacs;
+			uint16_t i;
 
 			if (plen > sizeof(hmacs_store))
 				break;
@@ -6700,24 +6638,47 @@
 			}
 		}
 	}
-	if (ecn_allowed == 0) {
-		stcb->asoc.ecn_allowed = 0;
+	if ((stcb->asoc.ecn_supported == 1) &&
+	    (peer_supports_ecn == 0)) {
+		stcb->asoc.ecn_supported = 0;
 	}
+	if ((stcb->asoc.prsctp_supported == 1) &&
+	    (peer_supports_prsctp == 0)) {
+		stcb->asoc.prsctp_supported = 0;
+	}
+	if ((stcb->asoc.auth_supported == 1) &&
+	    ((peer_supports_auth == 0) ||
+	    (got_random == 0) || (got_hmacs == 0))) {
+		stcb->asoc.auth_supported = 0;
+	}
+	if ((stcb->asoc.asconf_supported == 1) &&
+	    ((peer_supports_asconf == 0) || (peer_supports_asconf_ack == 0) ||
+	    (stcb->asoc.auth_supported == 0) ||
+	    (saw_asconf == 0) || (saw_asconf_ack == 0))) {
+		stcb->asoc.asconf_supported = 0;
+	}
+	if ((stcb->asoc.reconfig_supported == 1) &&
+	    (peer_supports_reconfig == 0)) {
+		stcb->asoc.reconfig_supported = 0;
+	}
+	if ((stcb->asoc.nrsack_supported == 1) &&
+	    (peer_supports_nrsack == 0)) {
+		stcb->asoc.nrsack_supported = 0;
+	}
+	if ((stcb->asoc.pktdrop_supported == 1) &&
+	    (peer_supports_pktdrop == 0)) {
+		stcb->asoc.pktdrop_supported = 0;
+	}
 	/* validate authentication required parameters */
-	if (got_random && got_hmacs) {
-		stcb->asoc.peer_supports_auth = 1;
-	} else {
-		stcb->asoc.peer_supports_auth = 0;
-	}
-	if (!stcb->asoc.peer_supports_auth && got_chklist) {
+	if ((peer_supports_auth == 0) && (got_chklist == 1)) {
 		/* peer does not support auth but sent a chunks list? */
 		return (-31);
 	}
-	if (!SCTP_BASE_SYSCTL(sctp_asconf_auth_nochk) && stcb->asoc.peer_supports_asconf &&
-	    !stcb->asoc.peer_supports_auth) {
+	if ((peer_supports_asconf == 1) && (peer_supports_auth == 0)) {
 		/* peer supports asconf but not auth? */
 		return (-32);
-	} else if ((stcb->asoc.peer_supports_asconf) && (stcb->asoc.peer_supports_auth) &&
+	} else if ((peer_supports_asconf == 1) &&
+		    (peer_supports_auth == 1) &&
 	    ((saw_asconf == 0) || (saw_asconf_ack == 0))) {
 		return (-33);
 	}
@@ -6812,10 +6773,6 @@
 	SCTP_INP_INFO_RLOCK();
 	head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(tag,
 	    SCTP_BASE_INFO(hashasocmark))];
-	if (head == NULL) {
-		/* invalid vtag */
-		goto skip_vtag_check;
-	}
 	LIST_FOREACH(stcb, head, sctp_asocs) {
 		/*
 		 * We choose not to lock anything here. TCB's can't be
@@ -6839,34 +6796,30 @@
 			return (0);
 		}
 	}
-skip_vtag_check:
-
 	chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
 	/* Now what about timed wait ? */
-	if (!LIST_EMPTY(chain)) {
+	LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
 		/*
 		 * Block(s) are present, lets see if we have this tag in the
 		 * list
 		 */
-		LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
-			for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
-				if (twait_block->vtag_block[i].v_tag == 0) {
-					/* not used */
-					continue;
-				} else if ((long)twait_block->vtag_block[i].tv_sec_at_expire <
-				    now->tv_sec) {
-					/* Audit expires this guy */
-					twait_block->vtag_block[i].tv_sec_at_expire = 0;
-					twait_block->vtag_block[i].v_tag = 0;
-					twait_block->vtag_block[i].lport = 0;
-					twait_block->vtag_block[i].rport = 0;
-				} else if ((twait_block->vtag_block[i].v_tag == tag) &&
-					    (twait_block->vtag_block[i].lport == lport) &&
-				    (twait_block->vtag_block[i].rport == rport)) {
-					/* Bad tag, sorry :< */
-					SCTP_INP_INFO_RUNLOCK();
-					return (0);
-				}
+		for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
+			if (twait_block->vtag_block[i].v_tag == 0) {
+				/* not used */
+				continue;
+			} else if ((long)twait_block->vtag_block[i].tv_sec_at_expire <
+			    now->tv_sec) {
+				/* Audit expires this guy */
+				twait_block->vtag_block[i].tv_sec_at_expire = 0;
+				twait_block->vtag_block[i].v_tag = 0;
+				twait_block->vtag_block[i].lport = 0;
+				twait_block->vtag_block[i].rport = 0;
+			} else if ((twait_block->vtag_block[i].v_tag == tag) &&
+				    (twait_block->vtag_block[i].lport == lport) &&
+			    (twait_block->vtag_block[i].rport == rport)) {
+				/* Bad tag, sorry :< */
+				SCTP_INP_INFO_RUNLOCK();
+				return (0);
 			}
 		}
 	}
@@ -7058,6 +7011,11 @@
 	if (af == NULL) {
 		return (-1);
 	}
+	if (SCTP_BASE_VAR(sctp_pcb_initialized) == 0) {
+		SCTP_PRINTF("%s: abort on initialize being %d\n", __func__,
+		    SCTP_BASE_VAR(sctp_pcb_initialized));
+		return (-1);
+	}
 	SCTP_MALLOC(it, struct sctp_iterator *, sizeof(struct sctp_iterator),
 	    SCTP_M_ITER);
 	if (it == NULL) {
@@ -7096,7 +7054,13 @@
 
 	}
 	SCTP_IPI_ITERATOR_WQ_LOCK();
-
+	if (SCTP_BASE_VAR(sctp_pcb_initialized) == 0) {
+		SCTP_IPI_ITERATOR_WQ_UNLOCK();
+		SCTP_PRINTF("%s: rollback on initialize being %d it=%p\n", __func__,
+		    SCTP_BASE_VAR(sctp_pcb_initialized), it);
+		SCTP_FREE(it, SCTP_M_ITER);
+		return (-1);
+	}
 	TAILQ_INSERT_TAIL(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr);
 	if (sctp_it_ctl.iterator_running == 0) {
 		sctp_wakeup_iterator();

Modified: trunk/sys/netinet/sctp_pcb.h
===================================================================
--- trunk/sys/netinet/sctp_pcb.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_pcb.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_pcb.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_pcb.h 296052 2016-02-25 18:46:06Z tuexen $");
 
 #ifndef _NETINET_SCTP_PCB_H_
 #define _NETINET_SCTP_PCB_H_
@@ -145,7 +145,12 @@
 
 
 struct sctp_epinfo {
-	struct socket *udp_tun_socket;
+#ifdef INET
+	struct socket *udp4_tun_socket;
+#endif
+#ifdef INET6
+	struct socket *udp6_tun_socket;
+#endif
 	struct sctpasochead *sctp_asochash;
 	u_long hashasocmark;
 
@@ -319,6 +324,7 @@
 	int auto_close_time;
 	uint32_t initial_sequence_debug;
 	uint32_t adaptation_layer_indicator;
+	uint8_t adaptation_layer_indicator_provided;
 	uint32_t store_at;
 	uint32_t max_burst;
 	uint32_t fr_max_burst;
@@ -383,8 +389,8 @@
 
 	/* back pointer to our socket */
 	struct socket *sctp_socket;
+	uint64_t sctp_features;	/* Feature flags */
 	uint32_t sctp_flags;	/* INP state flag set */
-	uint32_t sctp_features;	/* Feature flags */
 	uint32_t sctp_mobility_features;	/* Mobility  Feature flags */
 	struct sctp_pcb sctp_ep;/* SCTP ep data */
 	/* head of the hash of all associations */
@@ -399,9 +405,16 @@
 	uint32_t sctp_frag_point;
 	uint32_t partial_delivery_point;
 	uint32_t sctp_context;
+	uint32_t max_cwnd;
 	uint8_t local_strreset_support;
 	uint32_t sctp_cmt_on_off;
-	uint32_t sctp_ecn_enable;
+	uint8_t ecn_supported;
+	uint8_t prsctp_supported;
+	uint8_t auth_supported;
+	uint8_t asconf_supported;
+	uint8_t reconfig_supported;
+	uint8_t nrsack_supported;
+	uint8_t pktdrop_supported;
 	struct sctp_nonpad_sndrcvinfo def_send;
 	/*-
 	 * These three are here for the sosend_dgram
@@ -418,6 +431,7 @@
 	struct mtx inp_rdata_mtx;
 	int32_t refcount;
 	uint32_t def_vrf_id;
+	uint16_t fibnum;
 	uint32_t total_sends;
 	uint32_t total_recvs;
 	uint32_t last_abort_code;
@@ -529,6 +543,7 @@
 
 struct sctp_tcb *
 sctp_findassociation_addr(struct mbuf *, int,
+    struct sockaddr *, struct sockaddr *,
     struct sctphdr *, struct sctp_chunkhdr *, struct sctp_inpcb **,
     struct sctp_nets **, uint32_t vrf_id);
 
@@ -559,7 +574,7 @@
     sctp_assoc_t, int);
 
 struct sctp_tcb *
-sctp_findassociation_ep_asconf(struct mbuf *, int,
+sctp_findassociation_ep_asconf(struct mbuf *, int, struct sockaddr *,
     struct sctphdr *, struct sctp_inpcb **, struct sctp_nets **, uint32_t vrf_id);
 
 int sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id);
@@ -570,7 +585,7 @@
 
 struct sctp_tcb *
 sctp_aloc_assoc(struct sctp_inpcb *, struct sockaddr *,
-    int *, uint32_t, uint32_t, struct thread *);
+    int *, uint32_t, uint32_t, uint16_t, struct thread *);
 
 int sctp_free_assoc(struct sctp_inpcb *, struct sctp_tcb *, int, int);
 
@@ -584,10 +599,6 @@
 
 void sctp_add_local_addr_ep(struct sctp_inpcb *, struct sctp_ifa *, uint32_t);
 
-int sctp_insert_laddr(struct sctpladdr *, struct sctp_ifa *, uint32_t);
-
-void sctp_remove_laddr(struct sctp_laddr *);
-
 void sctp_del_local_addr_ep(struct sctp_inpcb *, struct sctp_ifa *);
 
 int sctp_add_remote_addr(struct sctp_tcb *, struct sockaddr *, struct sctp_nets **, int, int);
@@ -604,8 +615,8 @@
 void sctp_del_local_addr_restricted(struct sctp_tcb *, struct sctp_ifa *);
 
 int
-sctp_load_addresses_from_init(struct sctp_tcb *, struct mbuf *, int,
-    int, struct sctphdr *, struct sockaddr *);
+sctp_load_addresses_from_init(struct sctp_tcb *, struct mbuf *, int, int,
+    struct sockaddr *, struct sockaddr *, struct sockaddr *);
 
 int
 sctp_set_primary_addr(struct sctp_tcb *, struct sockaddr *,
@@ -640,11 +651,5 @@
 
 #endif
 
-#ifdef INVARIANTS
-void
-     sctp_validate_no_locks(struct sctp_inpcb *inp);
-
-#endif
-
 #endif				/* _KERNEL */
 #endif				/* !__sctp_pcb_h__ */

Modified: trunk/sys/netinet/sctp_peeloff.c
===================================================================
--- trunk/sys/netinet/sctp_peeloff.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_peeloff.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_peeloff.c 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_peeloff.c 283724 2015-05-29 12:54:30Z tuexen $");
 
 #include <netinet/sctp_os.h>
 #include <netinet/sctp_pcb.h>
@@ -75,9 +75,7 @@
 	}
 	state = SCTP_GET_STATE((&stcb->asoc));
 	if ((state == SCTP_STATE_EMPTY) ||
-	    (state == SCTP_STATE_INUSE) ||
-	    (state == SCTP_STATE_COOKIE_WAIT) ||
-	    (state == SCTP_STATE_COOKIE_ECHOED)) {
+	    (state == SCTP_STATE_INUSE)) {
 		SCTP_TCB_UNLOCK(stcb);
 		SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN);
 		return (ENOTCONN);
@@ -106,9 +104,7 @@
 	}
 	state = SCTP_GET_STATE((&stcb->asoc));
 	if ((state == SCTP_STATE_EMPTY) ||
-	    (state == SCTP_STATE_INUSE) ||
-	    (state == SCTP_STATE_COOKIE_WAIT) ||
-	    (state == SCTP_STATE_COOKIE_ECHOED)) {
+	    (state == SCTP_STATE_INUSE)) {
 		SCTP_TCB_UNLOCK(stcb);
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN);
 		return (ENOTCONN);
@@ -123,9 +119,16 @@
 	n_inp->sctp_mobility_features = inp->sctp_mobility_features;
 	n_inp->sctp_frag_point = inp->sctp_frag_point;
 	n_inp->sctp_cmt_on_off = inp->sctp_cmt_on_off;
-	n_inp->sctp_ecn_enable = inp->sctp_ecn_enable;
+	n_inp->ecn_supported = inp->ecn_supported;
+	n_inp->prsctp_supported = inp->prsctp_supported;
+	n_inp->auth_supported = inp->auth_supported;
+	n_inp->asconf_supported = inp->asconf_supported;
+	n_inp->reconfig_supported = inp->reconfig_supported;
+	n_inp->nrsack_supported = inp->nrsack_supported;
+	n_inp->pktdrop_supported = inp->pktdrop_supported;
 	n_inp->partial_delivery_point = inp->partial_delivery_point;
 	n_inp->sctp_context = inp->sctp_context;
+	n_inp->max_cwnd = inp->max_cwnd;
 	n_inp->local_strreset_support = inp->local_strreset_support;
 	n_inp->inp_starting_point_for_iterator = NULL;
 	/* copy in the authentication parameters from the original endpoint */
@@ -152,103 +155,3 @@
 
 	return (0);
 }
-
-
-struct socket *
-sctp_get_peeloff(struct socket *head, sctp_assoc_t assoc_id, int *error)
-{
-	struct socket *newso;
-	struct sctp_inpcb *inp, *n_inp;
-	struct sctp_tcb *stcb;
-
-	SCTPDBG(SCTP_DEBUG_PEEL1, "SCTP peel-off called\n");
-	inp = (struct sctp_inpcb *)head->so_pcb;
-	if (inp == NULL) {
-		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, EFAULT);
-		*error = EFAULT;
-		return (NULL);
-	}
-	stcb = sctp_findassociation_ep_asocid(inp, assoc_id, 1);
-	if (stcb == NULL) {
-		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN);
-		*error = ENOTCONN;
-		return (NULL);
-	}
-	atomic_add_int(&stcb->asoc.refcnt, 1);
-	SCTP_TCB_UNLOCK(stcb);
-	CURVNET_SET(head->so_vnet);
-	newso = sonewconn(head, SS_ISCONNECTED
-	    );
-	CURVNET_RESTORE();
-	if (newso == NULL) {
-		SCTPDBG(SCTP_DEBUG_PEEL1, "sctp_peeloff:sonewconn failed\n");
-		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_PEELOFF, ENOMEM);
-		*error = ENOMEM;
-		atomic_subtract_int(&stcb->asoc.refcnt, 1);
-		return (NULL);
-
-	}
-	SCTP_TCB_LOCK(stcb);
-	atomic_subtract_int(&stcb->asoc.refcnt, 1);
-	n_inp = (struct sctp_inpcb *)newso->so_pcb;
-	SOCK_LOCK(head);
-	n_inp->sctp_flags = (SCTP_PCB_FLAGS_UDPTYPE |
-	    SCTP_PCB_FLAGS_CONNECTED |
-	    SCTP_PCB_FLAGS_IN_TCPPOOL |	/* Turn on Blocking IO */
-	    (SCTP_PCB_COPY_FLAGS & inp->sctp_flags));
-	n_inp->sctp_features = inp->sctp_features;
-	n_inp->sctp_frag_point = inp->sctp_frag_point;
-	n_inp->sctp_cmt_on_off = inp->sctp_cmt_on_off;
-	n_inp->sctp_ecn_enable = inp->sctp_ecn_enable;
-	n_inp->partial_delivery_point = inp->partial_delivery_point;
-	n_inp->sctp_context = inp->sctp_context;
-	n_inp->local_strreset_support = inp->local_strreset_support;
-	n_inp->inp_starting_point_for_iterator = NULL;
-
-	/* copy in the authentication parameters from the original endpoint */
-	if (n_inp->sctp_ep.local_hmacs)
-		sctp_free_hmaclist(n_inp->sctp_ep.local_hmacs);
-	n_inp->sctp_ep.local_hmacs =
-	    sctp_copy_hmaclist(inp->sctp_ep.local_hmacs);
-	if (n_inp->sctp_ep.local_auth_chunks)
-		sctp_free_chunklist(n_inp->sctp_ep.local_auth_chunks);
-	n_inp->sctp_ep.local_auth_chunks =
-	    sctp_copy_chunklist(inp->sctp_ep.local_auth_chunks);
-	(void)sctp_copy_skeylist(&inp->sctp_ep.shared_keys,
-	    &n_inp->sctp_ep.shared_keys);
-
-	n_inp->sctp_socket = newso;
-	if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE)) {
-		sctp_feature_off(n_inp, SCTP_PCB_FLAGS_AUTOCLOSE);
-		n_inp->sctp_ep.auto_close_time = 0;
-		sctp_timer_stop(SCTP_TIMER_TYPE_AUTOCLOSE, n_inp, stcb, NULL,
-		    SCTP_FROM_SCTP_PEELOFF + SCTP_LOC_1);
-	}
-	/* Turn off any non-blocking semantic. */
-	SCTP_CLEAR_SO_NBIO(newso);
-	newso->so_state |= SS_ISCONNECTED;
-	/* We remove it right away */
-
-#ifdef SCTP_LOCK_LOGGING
-	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOCK_LOGGING_ENABLE) {
-		sctp_log_lock(inp, (struct sctp_tcb *)NULL, SCTP_LOG_LOCK_SOCK);
-	}
-#endif
-	TAILQ_REMOVE(&head->so_comp, newso, so_list);
-	head->so_qlen--;
-	SOCK_UNLOCK(head);
-	/*
-	 * Now we must move it from one hash table to another and get the
-	 * stcb in the right place.
-	 */
-	sctp_move_pcb_and_assoc(inp, n_inp, stcb);
-	atomic_add_int(&stcb->asoc.refcnt, 1);
-	SCTP_TCB_UNLOCK(stcb);
-	/*
-	 * And now the final hack. We move data in the pending side i.e.
-	 * head to the new socket buffer. Let the GRUBBING begin :-0
-	 */
-	sctp_pull_off_control_to_new_inp(inp, n_inp, stcb, SBL_WAIT);
-	atomic_subtract_int(&stcb->asoc.refcnt, 1);
-	return (newso);
-}

Modified: trunk/sys/netinet/sctp_peeloff.h
===================================================================
--- trunk/sys/netinet/sctp_peeloff.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_peeloff.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,22 +32,13 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_peeloff.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_peeloff.h 243516 2012-11-25 14:25:08Z tuexen $");
 
 #ifndef _NETINET_SCTP_PEELOFF_H_
 #define _NETINET_SCTP_PEELOFF_H_
-
-
-
-
 #if defined(_KERNEL)
-
 int sctp_can_peel_off(struct socket *, sctp_assoc_t);
 int sctp_do_peeloff(struct socket *, struct socket *, sctp_assoc_t);
-struct socket *sctp_get_peeloff(struct socket *, sctp_assoc_t, int *);
 
-
-
 #endif				/* _KERNEL */
-
-#endif
+#endif				/* _NETINET_SCTP_PEELOFF_H_ */

Modified: trunk/sys/netinet/sctp_ss_functions.c
===================================================================
--- trunk/sys/netinet/sctp_ss_functions.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_ss_functions.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_ss_functions.c 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_ss_functions.c 235828 2012-05-23 11:26:28Z tuexen $");
 
 #include <netinet/sctp_pcb.h>
 

Modified: trunk/sys/netinet/sctp_structs.h
===================================================================
--- trunk/sys/netinet/sctp_structs.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_structs.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_structs.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_structs.h 294140 2016-01-16 12:15:07Z tuexen $");
 
 #ifndef _NETINET_SCTP_STRUCTS_H_
 #define _NETINET_SCTP_STRUCTS_H_
@@ -77,9 +77,10 @@
 
 struct sctp_stream_reset_list {
 	TAILQ_ENTRY(sctp_stream_reset_list) next_resp;
+	uint32_t seq;
 	uint32_t tsn;
-	int number_entries;
-	struct sctp_stream_reset_out_request req;
+	uint32_t number_entries;
+	uint16_t list_of_streams[];
 };
 
 TAILQ_HEAD(sctp_resethead, sctp_stream_reset_list);
@@ -381,9 +382,7 @@
 	uint8_t lan_type;
 	uint8_t rto_needed;
 	uint32_t flowid;
-#ifdef INVARIANTS
-	uint8_t flowidset;
-#endif
+	uint8_t flowtype;
 };
 
 
@@ -419,8 +418,8 @@
 #define CHUNK_FLAGS_FRAGMENT_OK	        0x0100
 
 struct chk_id {
-	uint16_t id;
-	uint16_t can_take_data;
+	uint8_t id;
+	uint8_t can_take_data;
 };
 
 
@@ -447,7 +446,6 @@
 	uint8_t do_rtt;
 	uint8_t book_size_scale;
 	uint8_t no_fr_allowed;
-	uint8_t pr_sctp_on;
 	uint8_t copy_by_ref;
 	uint8_t window_probe;
 };
@@ -518,13 +516,11 @@
 	uint32_t context;
 	uint16_t sinfo_flags;
 	uint16_t stream;
-	uint16_t strseq;
 	uint16_t act_flags;
 	uint16_t auth_keyid;
 	uint8_t holds_key_ref;
 	uint8_t msg_is_complete;
 	uint8_t some_taken;
-	uint8_t pr_sctp_on;
 	uint8_t sender_all_done;
 	uint8_t put_last_out;
 	uint8_t discard_rest;
@@ -586,13 +582,32 @@
 	struct ss_fb fb;
 };
 
+/* States for outgoing streams */
+#define SCTP_STREAM_CLOSED           0x00
+#define SCTP_STREAM_OPENING          0x01
+#define SCTP_STREAM_OPEN             0x02
+#define SCTP_STREAM_RESET_PENDING    0x03
+#define SCTP_STREAM_RESET_IN_FLIGHT  0x04
+
+#define SCTP_MAX_STREAMS_AT_ONCE_RESET 200
+
 /* This struct is used to track the traffic on outbound streams */
 struct sctp_stream_out {
 	struct sctp_streamhead outqueue;
 	union scheduling_parameters ss_params;
+	uint32_t chunks_on_queues;	/* send queue and sent queue */
+#if defined(SCTP_DETAILED_STR_STATS)
+	uint32_t abandoned_unsent[SCTP_PR_SCTP_MAX + 1];
+	uint32_t abandoned_sent[SCTP_PR_SCTP_MAX + 1];
+#else
+	/* Only the aggregation */
+	uint32_t abandoned_unsent[1];
+	uint32_t abandoned_sent[1];
+#endif
 	uint16_t stream_no;
-	uint16_t next_sequence_sent;	/* next one I expect to send out */
+	uint16_t next_sequence_send;	/* next one I expect to send out */
 	uint8_t last_msg_incomplete;
+	uint8_t state;
 };
 
 /* used to keep track of the addresses yet to try to add/delete */
@@ -1145,7 +1160,7 @@
 	uint8_t hb_random_idx;
 	uint8_t default_dscp;
 	uint8_t asconf_del_pending;	/* asconf delete last addr pending */
-
+	uint8_t trigger_reset;
 	/*
 	 * This value, plus all other ack'd but above cum-ack is added
 	 * together to cross check against the bit that we have yet to
@@ -1153,44 +1168,24 @@
 	 * sum is updated as well.
 	 */
 
-	/* Flag to tell if ECN is allowed */
-	uint8_t ecn_allowed;
+	/* Flags whether an extension is supported or not */
+	uint8_t ecn_supported;
+	uint8_t prsctp_supported;
+	uint8_t auth_supported;
+	uint8_t asconf_supported;
+	uint8_t reconfig_supported;
+	uint8_t nrsack_supported;
+	uint8_t pktdrop_supported;
 
 	/* Did the peer make the stream config (add out) request */
 	uint8_t peer_req_out;
 
-	/* flag to indicate if peer can do asconf */
-	uint8_t peer_supports_asconf;
-	/* EY - flag to indicate if peer can do nr_sack */
-	uint8_t peer_supports_nr_sack;
-	/* pr-sctp support flag */
-	uint8_t peer_supports_prsctp;
-	/* peer authentication support flag */
-	uint8_t peer_supports_auth;
-	/* stream resets are supported by the peer */
-	uint8_t peer_supports_strreset;
 	uint8_t local_strreset_support;
 
 	uint8_t peer_supports_nat;
-	/*
-	 * packet drop's are supported by the peer, we don't really care
-	 * about this but we bookkeep it anyway.
-	 */
-	uint8_t peer_supports_pktdrop;
 
-	/* Do we allow V6/V4? */
-	uint8_t ipv4_addr_legal;
-	uint8_t ipv6_addr_legal;
-	/* Address scoping flags */
-	/* scope value for IPv4 */
-	uint8_t ipv4_local_scope;
-	/* scope values for IPv6 */
-	uint8_t local_scope;
-	uint8_t site_scope;
-	/* loopback scope */
-	uint8_t loopback_scope;
+	struct sctp_scoping scope;
 	/* flags to handle send alternate net tracking */
-	uint8_t used_alt_onsack;
 	uint8_t used_alt_asconfack;
 	uint8_t fast_retran_loss_recovery;
 	uint8_t sat_t3_loss_recovery;
@@ -1211,12 +1206,11 @@
 	uint8_t sctp_cmt_on_off;
 	uint8_t iam_blocking;
 	uint8_t cookie_how[8];
-	/* EY 05/05/08 - NR_SACK variable */
-	uint8_t sctp_nr_sack_on_off;
 	/* JRS 5/21/07 - CMT PF variable */
 	uint8_t sctp_cmt_pf;
 	uint8_t use_precise_time;
-	uint32_t sctp_features;
+	uint64_t sctp_features;
+	uint32_t max_cwnd;
 	uint16_t port;		/* remote UDP encapsulation port */
 	/*
 	 * The mapping array is used to track out of order sequences above
@@ -1235,6 +1229,8 @@
 	uint32_t timoshutdownack;
 	struct timeval start_time;
 	struct timeval discontinuity_time;
+	uint64_t abandoned_unsent[SCTP_PR_SCTP_MAX + 1];
+	uint64_t abandoned_sent[SCTP_PR_SCTP_MAX + 1];
 };
 
 #endif

Added: trunk/sys/netinet/sctp_syscalls.c
===================================================================
--- trunk/sys/netinet/sctp_syscalls.c	                        (rev 0)
+++ trunk/sys/netinet/sctp_syscalls.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -0,0 +1,597 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_syscalls.c 321021 2017-07-15 17:28:03Z dchagin $");
+
+#include "opt_capsicum.h"
+#include "opt_inet.h"
+#include "opt_inet6.h"
+#include "opt_sctp.h"
+#include "opt_compat.h"
+#include "opt_ktrace.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/capsicum.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/sysproto.h>
+#include <sys/malloc.h>
+#include <sys/filedesc.h>
+#include <sys/event.h>
+#include <sys/proc.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/filio.h>
+#include <sys/jail.h>
+#include <sys/mount.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/sf_buf.h>
+#include <sys/sysent.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/signalvar.h>
+#include <sys/syscall.h>
+#include <sys/syscallsubr.h>
+#include <sys/sysctl.h>
+#include <sys/uio.h>
+#include <sys/vnode.h>
+#ifdef KTRACE
+#include <sys/ktrace.h>
+#endif
+#ifdef COMPAT_FREEBSD32
+#include <compat/freebsd32/freebsd32_util.h>
+#endif
+
+#include <net/vnet.h>
+
+#include <security/audit/audit.h>
+#include <security/mac/mac_framework.h>
+
+#include <netinet/sctp.h>
+#include <netinet/sctp_peeloff.h>
+
+static struct syscall_helper_data sctp_syscalls[] = {
+	SYSCALL_INIT_HELPER(sctp_peeloff),
+	SYSCALL_INIT_HELPER(sctp_generic_sendmsg),
+	SYSCALL_INIT_HELPER(sctp_generic_sendmsg_iov),
+	SYSCALL_INIT_HELPER(sctp_generic_recvmsg),
+	SYSCALL_INIT_LAST
+};
+
+static void
+sctp_syscalls_init(void *unused __unused)
+{
+	int error;
+
+	error = syscall_helper_register(sctp_syscalls);
+	KASSERT((error == 0),
+	    ("%s: syscall_helper_register failed for sctp syscalls", __func__));
+#ifdef COMPAT_FREEBSD32
+	error = syscall32_helper_register(sctp_syscalls);
+	KASSERT((error == 0),
+	    ("%s: syscall32_helper_register failed for sctp syscalls",
+	    __func__));
+#endif
+}
+SYSINIT(sctp_syscalls, SI_SUB_SYSCALLS, SI_ORDER_ANY, sctp_syscalls_init, NULL);
+
+/*
+ * SCTP syscalls.
+ * Functionality only compiled in if SCTP is defined in the kernel Makefile,
+ * otherwise all return EOPNOTSUPP.
+ * XXX: We should make this loadable one day.
+ */
+int
+sys_sctp_peeloff(td, uap)
+	struct thread *td;
+	struct sctp_peeloff_args /* {
+		int	sd;
+		caddr_t	name;
+	} */ *uap;
+{
+#if (defined(INET) || defined(INET6)) && defined(SCTP)
+	struct file *nfp = NULL;
+	struct socket *head, *so;
+	cap_rights_t rights;
+	u_int fflag;
+	int error, fd;
+
+	AUDIT_ARG_FD(uap->sd);
+	error = fgetsock(td, uap->sd, cap_rights_init(&rights, CAP_PEELOFF),
+	    &head, &fflag);
+	if (error != 0)
+		goto done2;
+	if (head->so_proto->pr_protocol != IPPROTO_SCTP) {
+		error = EOPNOTSUPP;
+		goto done;
+	}
+	error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name);
+	if (error != 0)
+		goto done;
+	/*
+	 * At this point we know we do have a assoc to pull
+	 * we proceed to get the fd setup. This may block
+	 * but that is ok.
+	 */
+
+	error = falloc(td, &nfp, &fd, 0);
+	if (error != 0)
+		goto done;
+	td->td_retval[0] = fd;
+
+	CURVNET_SET(head->so_vnet);
+	so = sonewconn(head, SS_ISCONNECTED);
+	if (so == NULL) {
+		error = ENOMEM;
+		goto noconnection;
+	}
+	/*
+	 * Before changing the flags on the socket, we have to bump the
+	 * reference count.  Otherwise, if the protocol calls sofree(),
+	 * the socket will be released due to a zero refcount.
+	 */
+        SOCK_LOCK(so);
+        soref(so);                      /* file descriptor reference */
+        SOCK_UNLOCK(so);
+
+	ACCEPT_LOCK();
+
+	TAILQ_REMOVE(&head->so_comp, so, so_list);
+	head->so_qlen--;
+	so->so_state |= (head->so_state & SS_NBIO);
+	so->so_state &= ~SS_NOFDREF;
+	so->so_qstate &= ~SQ_COMP;
+	so->so_head = NULL;
+	ACCEPT_UNLOCK();
+	finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
+	error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
+	if (error != 0)
+		goto noconnection;
+	if (head->so_sigio != NULL)
+		fsetown(fgetown(&head->so_sigio), &so->so_sigio);
+
+noconnection:
+	/*
+	 * close the new descriptor, assuming someone hasn't ripped it
+	 * out from under us.
+	 */
+	if (error != 0)
+		fdclose(td, nfp, fd);
+
+	/*
+	 * Release explicitly held references before returning.
+	 */
+	CURVNET_RESTORE();
+done:
+	if (nfp != NULL)
+		fdrop(nfp, td);
+	fputsock(head);
+done2:
+	return (error);
+#else  /* SCTP */
+	return (EOPNOTSUPP);
+#endif /* SCTP */
+}
+
+int
+sys_sctp_generic_sendmsg (td, uap)
+	struct thread *td;
+	struct sctp_generic_sendmsg_args /* {
+		int sd,
+		caddr_t msg,
+		int mlen,
+		caddr_t to,
+		__socklen_t tolen,
+		struct sctp_sndrcvinfo *sinfo,
+		int flags
+	} */ *uap;
+{
+#if (defined(INET) || defined(INET6)) && defined(SCTP)
+	struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
+	struct socket *so;
+	struct file *fp = NULL;
+	struct sockaddr *to = NULL;
+#ifdef KTRACE
+	struct uio *ktruio = NULL;
+#endif
+	struct uio auio;
+	struct iovec iov[1];
+	cap_rights_t rights;
+	int error = 0, len;
+
+	if (uap->sinfo != NULL) {
+		error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
+		if (error != 0)
+			return (error);
+		u_sinfo = &sinfo;
+	}
+
+	cap_rights_init(&rights, CAP_SEND);
+	if (uap->tolen != 0) {
+		error = getsockaddr(&to, uap->to, uap->tolen);
+		if (error != 0) {
+			to = NULL;
+			goto sctp_bad2;
+		}
+		cap_rights_set(&rights, CAP_CONNECT);
+	}
+
+	AUDIT_ARG_FD(uap->sd);
+	error = getsock_cap(td, uap->sd, &rights, &fp, NULL);
+	if (error != 0)
+		goto sctp_bad;
+#ifdef KTRACE
+	if (to && (KTRPOINT(td, KTR_STRUCT)))
+		ktrsockaddr(to);
+#endif
+
+	iov[0].iov_base = uap->msg;
+	iov[0].iov_len = uap->mlen;
+
+	so = (struct socket *)fp->f_data;
+	if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
+		error = EOPNOTSUPP;
+		goto sctp_bad;
+	}
+#ifdef MAC
+	error = mac_socket_check_send(td->td_ucred, so);
+	if (error != 0)
+		goto sctp_bad;
+#endif /* MAC */
+
+	auio.uio_iov =  iov;
+	auio.uio_iovcnt = 1;
+	auio.uio_segflg = UIO_USERSPACE;
+	auio.uio_rw = UIO_WRITE;
+	auio.uio_td = td;
+	auio.uio_offset = 0;			/* XXX */
+	auio.uio_resid = 0;
+#ifdef KTRACE
+	if (KTRPOINT(td, KTR_GENIO))
+		ktruio = cloneuio(&auio);
+#endif /* KTRACE */
+	len = auio.uio_resid = uap->mlen;
+	CURVNET_SET(so->so_vnet);
+	error = sctp_lower_sosend(so, to, &auio, (struct mbuf *)NULL,
+	    (struct mbuf *)NULL, uap->flags, u_sinfo, td);
+	CURVNET_RESTORE();
+	if (error != 0) {
+		if (auio.uio_resid != len && (error == ERESTART ||
+		    error == EINTR || error == EWOULDBLOCK))
+			error = 0;
+		/* Generation of SIGPIPE can be controlled per socket. */
+		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
+		    !(uap->flags & MSG_NOSIGNAL)) {
+			PROC_LOCK(td->td_proc);
+			tdsignal(td, SIGPIPE);
+			PROC_UNLOCK(td->td_proc);
+		}
+	}
+	if (error == 0)
+		td->td_retval[0] = len - auio.uio_resid;
+#ifdef KTRACE
+	if (ktruio != NULL) {
+		ktruio->uio_resid = td->td_retval[0];
+		ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
+	}
+#endif /* KTRACE */
+sctp_bad:
+	if (fp != NULL)
+		fdrop(fp, td);
+sctp_bad2:
+	free(to, M_SONAME);
+	return (error);
+#else  /* SCTP */
+	return (EOPNOTSUPP);
+#endif /* SCTP */
+}
+
+int
+sys_sctp_generic_sendmsg_iov(td, uap)
+	struct thread *td;
+	struct sctp_generic_sendmsg_iov_args /* {
+		int sd,
+		struct iovec *iov,
+		int iovlen,
+		caddr_t to,
+		__socklen_t tolen,
+		struct sctp_sndrcvinfo *sinfo,
+		int flags
+	} */ *uap;
+{
+#if (defined(INET) || defined(INET6)) && defined(SCTP)
+	struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
+	struct socket *so;
+	struct file *fp = NULL;
+	struct sockaddr *to = NULL;
+#ifdef KTRACE
+	struct uio *ktruio = NULL;
+#endif
+	struct uio auio;
+	struct iovec *iov, *tiov;
+	cap_rights_t rights;
+	ssize_t len;
+	int error, i;
+
+	if (uap->sinfo != NULL) {
+		error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
+		if (error != 0)
+			return (error);
+		u_sinfo = &sinfo;
+	}
+	cap_rights_init(&rights, CAP_SEND);
+	if (uap->tolen != 0) {
+		error = getsockaddr(&to, uap->to, uap->tolen);
+		if (error != 0) {
+			to = NULL;
+			goto sctp_bad2;
+		}
+		cap_rights_set(&rights, CAP_CONNECT);
+	}
+
+	AUDIT_ARG_FD(uap->sd);
+	error = getsock_cap(td, uap->sd, &rights, &fp, NULL);
+	if (error != 0)
+		goto sctp_bad1;
+
+#ifdef COMPAT_FREEBSD32
+	if (SV_CURPROC_FLAG(SV_ILP32))
+		error = freebsd32_copyiniov((struct iovec32 *)uap->iov,
+		    uap->iovlen, &iov, EMSGSIZE);
+	else
+#endif
+		error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
+	if (error != 0)
+		goto sctp_bad1;
+#ifdef KTRACE
+	if (to && (KTRPOINT(td, KTR_STRUCT)))
+		ktrsockaddr(to);
+#endif
+
+	so = (struct socket *)fp->f_data;
+	if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
+		error = EOPNOTSUPP;
+		goto sctp_bad;
+	}
+#ifdef MAC
+	error = mac_socket_check_send(td->td_ucred, so);
+	if (error != 0)
+		goto sctp_bad;
+#endif /* MAC */
+
+	auio.uio_iov = iov;
+	auio.uio_iovcnt = uap->iovlen;
+	auio.uio_segflg = UIO_USERSPACE;
+	auio.uio_rw = UIO_WRITE;
+	auio.uio_td = td;
+	auio.uio_offset = 0;			/* XXX */
+	auio.uio_resid = 0;
+	tiov = iov;
+	for (i = 0; i <uap->iovlen; i++, tiov++) {
+		if ((auio.uio_resid += tiov->iov_len) < 0) {
+			error = EINVAL;
+			goto sctp_bad;
+		}
+	}
+#ifdef KTRACE
+	if (KTRPOINT(td, KTR_GENIO))
+		ktruio = cloneuio(&auio);
+#endif /* KTRACE */
+	len = auio.uio_resid;
+	CURVNET_SET(so->so_vnet);
+	error = sctp_lower_sosend(so, to, &auio,
+		    (struct mbuf *)NULL, (struct mbuf *)NULL,
+		    uap->flags, u_sinfo, td);
+	CURVNET_RESTORE();
+	if (error != 0) {
+		if (auio.uio_resid != len && (error == ERESTART ||
+		    error == EINTR || error == EWOULDBLOCK))
+			error = 0;
+		/* Generation of SIGPIPE can be controlled per socket */
+		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
+		    !(uap->flags & MSG_NOSIGNAL)) {
+			PROC_LOCK(td->td_proc);
+			tdsignal(td, SIGPIPE);
+			PROC_UNLOCK(td->td_proc);
+		}
+	}
+	if (error == 0)
+		td->td_retval[0] = len - auio.uio_resid;
+#ifdef KTRACE
+	if (ktruio != NULL) {
+		ktruio->uio_resid = td->td_retval[0];
+		ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
+	}
+#endif /* KTRACE */
+sctp_bad:
+	free(iov, M_IOV);
+sctp_bad1:
+	if (fp != NULL)
+		fdrop(fp, td);
+sctp_bad2:
+	free(to, M_SONAME);
+	return (error);
+#else  /* SCTP */
+	return (EOPNOTSUPP);
+#endif /* SCTP */
+}
+
+int
+sys_sctp_generic_recvmsg(td, uap)
+	struct thread *td;
+	struct sctp_generic_recvmsg_args /* {
+		int sd,
+		struct iovec *iov,
+		int iovlen,
+		struct sockaddr *from,
+		__socklen_t *fromlenaddr,
+		struct sctp_sndrcvinfo *sinfo,
+		int *msg_flags
+	} */ *uap;
+{
+#if (defined(INET) || defined(INET6)) && defined(SCTP)
+	uint8_t sockbufstore[256];
+	struct uio auio;
+	struct iovec *iov, *tiov;
+	struct sctp_sndrcvinfo sinfo;
+	struct socket *so;
+	struct file *fp = NULL;
+	struct sockaddr *fromsa;
+	cap_rights_t rights;
+#ifdef KTRACE
+	struct uio *ktruio = NULL;
+#endif
+	ssize_t len;
+	int error, fromlen, i, msg_flags;
+
+	AUDIT_ARG_FD(uap->sd);
+	error = getsock_cap(td, uap->sd, cap_rights_init(&rights, CAP_RECV),
+	    &fp, NULL);
+	if (error != 0)
+		return (error);
+#ifdef COMPAT_FREEBSD32
+	if (SV_CURPROC_FLAG(SV_ILP32))
+		error = freebsd32_copyiniov((struct iovec32 *)uap->iov,
+		    uap->iovlen, &iov, EMSGSIZE);
+	else
+#endif
+		error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
+	if (error != 0)
+		goto out1;
+
+	so = fp->f_data;
+	if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
+		error = EOPNOTSUPP;
+		goto out;
+	}
+#ifdef MAC
+	error = mac_socket_check_receive(td->td_ucred, so);
+	if (error != 0)
+		goto out;
+#endif /* MAC */
+
+	if (uap->fromlenaddr != NULL) {
+		error = copyin(uap->fromlenaddr, &fromlen, sizeof (fromlen));
+		if (error != 0)
+			goto out;
+	} else {
+		fromlen = 0;
+	}
+	if (uap->msg_flags) {
+		error = copyin(uap->msg_flags, &msg_flags, sizeof (int));
+		if (error != 0)
+			goto out;
+	} else {
+		msg_flags = 0;
+	}
+	auio.uio_iov = iov;
+	auio.uio_iovcnt = uap->iovlen;
+	auio.uio_segflg = UIO_USERSPACE;
+	auio.uio_rw = UIO_READ;
+	auio.uio_td = td;
+	auio.uio_offset = 0;			/* XXX */
+	auio.uio_resid = 0;
+	tiov = iov;
+	for (i = 0; i <uap->iovlen; i++, tiov++) {
+		if ((auio.uio_resid += tiov->iov_len) < 0) {
+			error = EINVAL;
+			goto out;
+		}
+	}
+	len = auio.uio_resid;
+	fromsa = (struct sockaddr *)sockbufstore;
+
+#ifdef KTRACE
+	if (KTRPOINT(td, KTR_GENIO))
+		ktruio = cloneuio(&auio);
+#endif /* KTRACE */
+	memset(&sinfo, 0, sizeof(struct sctp_sndrcvinfo));
+	CURVNET_SET(so->so_vnet);
+	error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL,
+		    fromsa, fromlen, &msg_flags,
+		    (struct sctp_sndrcvinfo *)&sinfo, 1);
+	CURVNET_RESTORE();
+	if (error != 0) {
+		if (auio.uio_resid != len && (error == ERESTART ||
+		    error == EINTR || error == EWOULDBLOCK))
+			error = 0;
+	} else {
+		if (uap->sinfo)
+			error = copyout(&sinfo, uap->sinfo, sizeof (sinfo));
+	}
+#ifdef KTRACE
+	if (ktruio != NULL) {
+		ktruio->uio_resid = len - auio.uio_resid;
+		ktrgenio(uap->sd, UIO_READ, ktruio, error);
+	}
+#endif /* KTRACE */
+	if (error != 0)
+		goto out;
+	td->td_retval[0] = len - auio.uio_resid;
+
+	if (fromlen && uap->from) {
+		len = fromlen;
+		if (len <= 0 || fromsa == 0)
+			len = 0;
+		else {
+			len = MIN(len, fromsa->sa_len);
+			error = copyout(fromsa, uap->from, (size_t)len);
+			if (error != 0)
+				goto out;
+		}
+		error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t));
+		if (error != 0)
+			goto out;
+	}
+#ifdef KTRACE
+	if (KTRPOINT(td, KTR_STRUCT))
+		ktrsockaddr(fromsa);
+#endif
+	if (uap->msg_flags) {
+		error = copyout(&msg_flags, uap->msg_flags, sizeof (int));
+		if (error != 0)
+			goto out;
+	}
+out:
+	free(iov, M_IOV);
+out1:
+	if (fp != NULL)
+		fdrop(fp, td);
+
+	return (error);
+#else  /* SCTP */
+	return (EOPNOTSUPP);
+#endif /* SCTP */
+}


Property changes on: trunk/sys/netinet/sctp_syscalls.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/netinet/sctp_sysctl.c
===================================================================
--- trunk/sys/netinet/sctp_sysctl.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_sysctl.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_sysctl.c 238249 2012-07-08 15:37:58Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_sysctl.c 294149 2016-01-16 14:41:44Z tuexen $");
 
 #include <netinet/sctp_os.h>
 #include <netinet/sctp.h>
@@ -42,7 +42,10 @@
 #include <netinet/sctputil.h>
 #include <netinet/sctp_output.h>
 #include <sys/smp.h>
+#include <sys/sysctl.h>
 
+FEATURE(sctp, "Stream Control Transmission Protocol");
+
 /*
  * sysctl tunable variables
  */
@@ -55,6 +58,12 @@
 	SCTP_BASE_SYSCTL(sctp_auto_asconf) = SCTPCTL_AUTOASCONF_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_multiple_asconfs) = SCTPCTL_MULTIPLEASCONFS_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_ecn_enable) = SCTPCTL_ECN_ENABLE_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_pr_enable) = SCTPCTL_PR_ENABLE_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_auth_disable) = SCTPCTL_AUTH_DISABLE_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_asconf_enable) = SCTPCTL_ASCONF_ENABLE_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_reconfig_enable) = SCTPCTL_RECONFIG_ENABLE_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_nrsack_enable) = SCTPCTL_NRSACK_ENABLE_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_pktdrop_enable) = SCTPCTL_PKTDROP_ENABLE_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_strict_sacks) = SCTPCTL_STRICT_SACKS_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_peer_chunk_oh) = SCTPCTL_PEER_CHKOH_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_max_burst_default) = SCTPCTL_MAXBURST_DEFAULT;
@@ -82,14 +91,11 @@
 	SCTP_BASE_SYSCTL(sctp_path_rtx_max_default) = SCTPCTL_PATH_RTX_MAX_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_path_pf_threshold) = SCTPCTL_PATH_PF_THRESHOLD_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_add_more_threshold) = SCTPCTL_ADD_MORE_ON_OUTPUT_DEFAULT;
+	SCTP_BASE_SYSCTL(sctp_nr_incoming_streams_default) = SCTPCTL_INCOMING_STREAMS_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_nr_outgoing_streams_default) = SCTPCTL_OUTGOING_STREAMS_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_cmt_on_off) = SCTPCTL_CMT_ON_OFF_DEFAULT;
-	/* EY */
-	SCTP_BASE_SYSCTL(sctp_nr_sack_on_off) = SCTPCTL_NR_SACK_ON_OFF_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_cmt_use_dac) = SCTPCTL_CMT_USE_DAC_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst) = SCTPCTL_CWND_MAXBURST_DEFAULT;
-	SCTP_BASE_SYSCTL(sctp_asconf_auth_nochk) = SCTPCTL_ASCONF_AUTH_NOCHK_DEFAULT;
-	SCTP_BASE_SYSCTL(sctp_auth_disable) = SCTPCTL_AUTH_DISABLE_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_nat_friendly) = SCTPCTL_NAT_FRIENDLY_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_L2_abc_variable) = SCTPCTL_ABC_L_VAR_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_mbuf_threshold_count) = SCTPCTL_MAX_CHAINED_MBUFS_DEFAULT;
@@ -100,9 +106,7 @@
 	SCTP_BASE_SYSCTL(sctp_min_residual) = SCTPCTL_MIN_RESIDUAL_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_max_retran_chunk) = SCTPCTL_MAX_RETRAN_CHUNK_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_logging_level) = SCTPCTL_LOGGING_LEVEL_DEFAULT;
-	/* JRS - Variable for default congestion control module */
 	SCTP_BASE_SYSCTL(sctp_default_cc_module) = SCTPCTL_DEFAULT_CC_MODULE_DEFAULT;
-	/* RS - Variable for default stream scheduling module */
 	SCTP_BASE_SYSCTL(sctp_default_ss_module) = SCTPCTL_DEFAULT_SS_MODULE_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_default_frag_interleave) = SCTPCTL_DEFAULT_FRAG_INTERLEAVE_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_mobility_base) = SCTPCTL_MOBILITY_BASE_DEFAULT;
@@ -116,7 +120,7 @@
 	SCTP_BASE_SYSCTL(sctp_steady_step) = SCTPCTL_RTTVAR_STEADYS_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_use_dccc_ecn) = SCTPCTL_RTTVAR_DCCCECN_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_blackhole) = SCTPCTL_BLACKHOLE_DEFAULT;
-
+	SCTP_BASE_SYSCTL(sctp_diag_info_code) = SCTPCTL_DIAG_INFO_CODE_DEFAULT;
 #if defined(SCTP_LOCAL_TRACE_BUF)
 	memset(&SCTP_BASE_SYSCTL(sctp_log), 0, sizeof(struct sctp_log));
 #endif
@@ -134,7 +138,7 @@
 
 /* It returns an upper limit. No filtering is done here */
 static unsigned int
-number_of_addresses(struct sctp_inpcb *inp)
+sctp_sysctl_number_of_addresses(struct sctp_inpcb *inp)
 {
 	unsigned int cnt;
 	struct sctp_vrf *vrf;
@@ -184,7 +188,7 @@
 }
 
 static int
-copy_out_local_addresses(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sysctl_req *req)
+sctp_sysctl_copy_out_local_addresses(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sysctl_req *req)
 {
 	struct sctp_ifn *sctp_ifn;
 	struct sctp_ifa *sctp_ifa;
@@ -198,29 +202,29 @@
 	/* Turn on all the appropriate scope */
 	if (stcb) {
 		/* use association specific values */
-		loopback_scope = stcb->asoc.loopback_scope;
-		ipv4_local_scope = stcb->asoc.ipv4_local_scope;
-		local_scope = stcb->asoc.local_scope;
-		site_scope = stcb->asoc.site_scope;
+		loopback_scope = stcb->asoc.scope.loopback_scope;
+		ipv4_local_scope = stcb->asoc.scope.ipv4_local_scope;
+		local_scope = stcb->asoc.scope.local_scope;
+		site_scope = stcb->asoc.scope.site_scope;
+		ipv4_addr_legal = stcb->asoc.scope.ipv4_addr_legal;
+		ipv6_addr_legal = stcb->asoc.scope.ipv6_addr_legal;
 	} else {
-		/* use generic values for endpoints */
+		/* Use generic values for endpoints. */
 		loopback_scope = 1;
 		ipv4_local_scope = 1;
 		local_scope = 1;
 		site_scope = 1;
-	}
-
-	/* use only address families of interest */
-	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
-		ipv6_addr_legal = 1;
-		if (SCTP_IPV6_V6ONLY(inp)) {
-			ipv4_addr_legal = 0;
+		if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+			ipv6_addr_legal = 1;
+			if (SCTP_IPV6_V6ONLY(inp)) {
+				ipv4_addr_legal = 0;
+			} else {
+				ipv4_addr_legal = 1;
+			}
 		} else {
+			ipv6_addr_legal = 0;
 			ipv4_addr_legal = 1;
 		}
-	} else {
-		ipv4_addr_legal = 1;
-		ipv6_addr_legal = 0;
 	}
 
 	/* neither Mac OS X nor FreeBSD support mulitple routing functions */
@@ -249,9 +253,13 @@
 					if (ipv4_addr_legal) {
 						struct sockaddr_in *sin;
 
-						sin = (struct sockaddr_in *)&sctp_ifa->address.sa;
+						sin = &sctp_ifa->address.sin;
 						if (sin->sin_addr.s_addr == 0)
 							continue;
+						if (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+						    &sin->sin_addr) != 0) {
+							continue;
+						}
 						if ((ipv4_local_scope == 0) && (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)))
 							continue;
 					} else {
@@ -264,9 +272,13 @@
 					if (ipv6_addr_legal) {
 						struct sockaddr_in6 *sin6;
 
-						sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sa;
+						sin6 = &sctp_ifa->address.sin6;
 						if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
 							continue;
+						if (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+						    &sin6->sin6_addr) != 0) {
+							continue;
+						}
 						if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
 							if (local_scope == 0)
 								continue;
@@ -342,7 +354,7 @@
  * sysctl functions
  */
 static int
-sctp_assoclist(SYSCTL_HANDLER_ARGS)
+sctp_sysctl_handle_assoclist(SYSCTL_HANDLER_ARGS)
 {
 	unsigned int number_of_endpoints;
 	unsigned int number_of_local_addresses;
@@ -364,14 +376,14 @@
 	number_of_remote_addresses = 0;
 
 	SCTP_INP_INFO_RLOCK();
-	if (req->oldptr == USER_ADDR_NULL) {
+	if (req->oldptr == NULL) {
 		LIST_FOREACH(inp, &SCTP_BASE_INFO(listhead), sctp_list) {
 			SCTP_INP_RLOCK(inp);
 			number_of_endpoints++;
-			number_of_local_addresses += number_of_addresses(inp);
+			number_of_local_addresses += sctp_sysctl_number_of_addresses(inp);
 			LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 				number_of_associations++;
-				number_of_local_addresses += number_of_addresses(inp);
+				number_of_local_addresses += sctp_sysctl_number_of_addresses(inp);
 				TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 					number_of_remote_addresses++;
 				}
@@ -388,7 +400,7 @@
 		req->oldidx = (n + n / 8);
 		return (0);
 	}
-	if (req->newptr != USER_ADDR_NULL) {
+	if (req->newptr != NULL) {
 		SCTP_INP_INFO_RUNLOCK();
 		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_SYSCTL, EPERM);
 		return (EPERM);
@@ -407,6 +419,7 @@
 		xinpcb.total_recvs = inp->total_recvs;
 		xinpcb.total_nospaces = inp->total_nospaces;
 		xinpcb.fragmentation_point = inp->sctp_frag_point;
+		xinpcb.socket = inp->sctp_socket;
 		so = inp->sctp_socket;
 		if ((so == NULL) ||
 		    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) {
@@ -426,7 +439,7 @@
 		}
 		SCTP_INP_INFO_RLOCK();
 		SCTP_INP_RLOCK(inp);
-		error = copy_out_local_addresses(inp, NULL, req);
+		error = sctp_sysctl_copy_out_local_addresses(inp, NULL, req);
 		if (error) {
 			SCTP_INP_DECR_REF(inp);
 			return (error);
@@ -441,7 +454,7 @@
 			if (stcb->asoc.primary_destination != NULL)
 				xstcb.primary_addr = stcb->asoc.primary_destination->ro._l_addr;
 			xstcb.heartbeat_interval = stcb->asoc.heart_beat_delay;
-			xstcb.state = SCTP_GET_STATE(&stcb->asoc);	/* FIXME */
+			xstcb.state = (uint32_t) sctp_map_assoc_state(stcb->asoc.state);
 			/* 7.0 does not support these */
 			xstcb.assoc_id = sctp_get_associd(stcb);
 			xstcb.peers_rwnd = stcb->asoc.peers_rwnd;
@@ -477,7 +490,7 @@
 			}
 			SCTP_INP_INFO_RLOCK();
 			SCTP_INP_RLOCK(inp);
-			error = copy_out_local_addresses(inp, stcb, req);
+			error = sctp_sysctl_copy_out_local_addresses(inp, stcb, req);
 			if (error) {
 				SCTP_INP_DECR_REF(inp);
 				atomic_subtract_int(&stcb->asoc.refcnt, 1);
@@ -499,6 +512,7 @@
 				xraddr.mtu = net->mtu;
 				xraddr.rtt = net->rtt / 1000;
 				xraddr.heartbeat_interval = net->heart_beat_delay;
+				xraddr.ssthresh = net->ssthresh;
 				xraddr.start_time.tv_sec = (uint32_t) net->start_time.tv_sec;
 				xraddr.start_time.tv_usec = (uint32_t) net->start_time.tv_usec;
 				SCTP_INP_RUNLOCK(inp);
@@ -545,151 +559,120 @@
 	return (error);
 }
 
-
-#define RANGECHK(var, min, max) \
-	if ((var) < (min)) { (var) = (min); } \
-	else if ((var) > (max)) { (var) = (max); }
-
-/* XXX: Remove the #if after tunneling over IPv6 works also on FreeBSD. */
-#if !defined(__FreeBSD__) || defined(INET)
 static int
-sysctl_sctp_udp_tunneling_check(SYSCTL_HANDLER_ARGS)
+sctp_sysctl_handle_udp_tunneling(SYSCTL_HANDLER_ARGS)
 {
 	int error;
-	uint32_t old_sctp_udp_tunneling_port;
+	uint32_t old, new;
 
 	SCTP_INP_INFO_RLOCK();
-	old_sctp_udp_tunneling_port = SCTP_BASE_SYSCTL(sctp_udp_tunneling_port);
+	old = SCTP_BASE_SYSCTL(sctp_udp_tunneling_port);
 	SCTP_INP_INFO_RUNLOCK();
-	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
-	if (error == 0) {
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port), SCTPCTL_UDP_TUNNELING_PORT_MIN, SCTPCTL_UDP_TUNNELING_PORT_MAX);
-		if (old_sctp_udp_tunneling_port == SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)) {
-			error = 0;
-			goto out;
-		}
-		SCTP_INP_INFO_WLOCK();
-		if (old_sctp_udp_tunneling_port) {
-			sctp_over_udp_stop();
-		}
-		if (SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)) {
-			if (sctp_over_udp_start()) {
-				SCTP_BASE_SYSCTL(sctp_udp_tunneling_port) = 0;
+	new = old;
+	error = sysctl_handle_int(oidp, &new, 0, req);
+	if ((error == 0) &&
+	    (req->newptr != NULL)) {
+#if (SCTPCTL_UDP_TUNNELING_PORT_MIN == 0)
+		if (new > SCTPCTL_UDP_TUNNELING_PORT_MAX) {
+#else
+		if ((new < SCTPCTL_UDP_TUNNELING_PORT_MIN) ||
+		    (new > SCTPCTL_UDP_TUNNELING_PORT_MAX)) {
+#endif
+			error = EINVAL;
+		} else {
+			SCTP_INP_INFO_WLOCK();
+			SCTP_BASE_SYSCTL(sctp_udp_tunneling_port) = new;
+			if (old != 0) {
+				sctp_over_udp_stop();
 			}
+			if (new != 0) {
+				error = sctp_over_udp_start();
+			}
+			SCTP_INP_INFO_WUNLOCK();
 		}
-		SCTP_INP_INFO_WUNLOCK();
 	}
-out:
 	return (error);
 }
 
-#endif
 
-
 static int
-sysctl_sctp_check(SYSCTL_HANDLER_ARGS)
+sctp_sysctl_handle_auth(SYSCTL_HANDLER_ARGS)
 {
 	int error;
+	uint32_t new;
 
-#ifdef VIMAGE
-	error = vnet_sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
+	new = SCTP_BASE_SYSCTL(sctp_auth_disable);
+	error = sysctl_handle_int(oidp, &new, 0, req);
+	if ((error == 0) &&
+	    (req->newptr != NULL)) {
+#if (SCTPCTL_AUTH_DISABLE_MIN ==0)
+		if ((new > SCTPCTL_AUTH_DISABLE_MAX) ||
+		    ((new == 1) && (SCTP_BASE_SYSCTL(sctp_asconf_enable) == 1))) {
 #else
-	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
+		if ((new < SCTPCTL_AUTH_DISABLE_MIN) ||
+		    (new > SCTPCTL_AUTH_DISABLE_MAX) ||
+		    ((new == 1) && (SCTP_BASE_SYSCTL(sctp_asconf_enable) == 1))) {
 #endif
-	if (error == 0) {
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_sendspace), SCTPCTL_MAXDGRAM_MIN, SCTPCTL_MAXDGRAM_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_recvspace), SCTPCTL_RECVSPACE_MIN, SCTPCTL_RECVSPACE_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_auto_asconf), SCTPCTL_AUTOASCONF_MIN, SCTPCTL_AUTOASCONF_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_ecn_enable), SCTPCTL_ECN_ENABLE_MIN, SCTPCTL_ECN_ENABLE_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_strict_sacks), SCTPCTL_STRICT_SACKS_MIN, SCTPCTL_STRICT_SACKS_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_peer_chunk_oh), SCTPCTL_PEER_CHKOH_MIN, SCTPCTL_PEER_CHKOH_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_max_burst_default), SCTPCTL_MAXBURST_MIN, SCTPCTL_MAXBURST_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_fr_max_burst_default), SCTPCTL_FRMAXBURST_MIN, SCTPCTL_FRMAXBURST_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue), SCTPCTL_MAXCHUNKS_MIN, SCTPCTL_MAXCHUNKS_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_hashtblsize), SCTPCTL_TCBHASHSIZE_MIN, SCTPCTL_TCBHASHSIZE_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_pcbtblsize), SCTPCTL_PCBHASHSIZE_MIN, SCTPCTL_PCBHASHSIZE_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_min_split_point), SCTPCTL_MIN_SPLIT_POINT_MIN, SCTPCTL_MIN_SPLIT_POINT_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_chunkscale), SCTPCTL_CHUNKSCALE_MIN, SCTPCTL_CHUNKSCALE_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_delayed_sack_time_default), SCTPCTL_DELAYED_SACK_TIME_MIN, SCTPCTL_DELAYED_SACK_TIME_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_sack_freq_default), SCTPCTL_SACK_FREQ_MIN, SCTPCTL_SACK_FREQ_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_system_free_resc_limit), SCTPCTL_SYS_RESOURCE_MIN, SCTPCTL_SYS_RESOURCE_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_asoc_free_resc_limit), SCTPCTL_ASOC_RESOURCE_MIN, SCTPCTL_ASOC_RESOURCE_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_heartbeat_interval_default), SCTPCTL_HEARTBEAT_INTERVAL_MIN, SCTPCTL_HEARTBEAT_INTERVAL_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_pmtu_raise_time_default), SCTPCTL_PMTU_RAISE_TIME_MIN, SCTPCTL_PMTU_RAISE_TIME_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_shutdown_guard_time_default), SCTPCTL_SHUTDOWN_GUARD_TIME_MIN, SCTPCTL_SHUTDOWN_GUARD_TIME_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_secret_lifetime_default), SCTPCTL_SECRET_LIFETIME_MIN, SCTPCTL_SECRET_LIFETIME_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_rto_max_default), SCTPCTL_RTO_MAX_MIN, SCTPCTL_RTO_MAX_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_rto_min_default), SCTPCTL_RTO_MIN_MIN, SCTPCTL_RTO_MIN_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_rto_initial_default), SCTPCTL_RTO_INITIAL_MIN, SCTPCTL_RTO_INITIAL_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_init_rto_max_default), SCTPCTL_INIT_RTO_MAX_MIN, SCTPCTL_INIT_RTO_MAX_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_valid_cookie_life_default), SCTPCTL_VALID_COOKIE_LIFE_MIN, SCTPCTL_VALID_COOKIE_LIFE_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_init_rtx_max_default), SCTPCTL_INIT_RTX_MAX_MIN, SCTPCTL_INIT_RTX_MAX_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_assoc_rtx_max_default), SCTPCTL_ASSOC_RTX_MAX_MIN, SCTPCTL_ASSOC_RTX_MAX_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_path_rtx_max_default), SCTPCTL_PATH_RTX_MAX_MIN, SCTPCTL_PATH_RTX_MAX_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_path_pf_threshold), SCTPCTL_PATH_PF_THRESHOLD_MIN, SCTPCTL_PATH_PF_THRESHOLD_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_add_more_threshold), SCTPCTL_ADD_MORE_ON_OUTPUT_MIN, SCTPCTL_ADD_MORE_ON_OUTPUT_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_nr_outgoing_streams_default), SCTPCTL_OUTGOING_STREAMS_MIN, SCTPCTL_OUTGOING_STREAMS_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_cmt_on_off), SCTPCTL_CMT_ON_OFF_MIN, SCTPCTL_CMT_ON_OFF_MAX);
-		/* EY */
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_nr_sack_on_off), SCTPCTL_NR_SACK_ON_OFF_MIN, SCTPCTL_NR_SACK_ON_OFF_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_cmt_use_dac), SCTPCTL_CMT_USE_DAC_MIN, SCTPCTL_CMT_USE_DAC_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst), SCTPCTL_CWND_MAXBURST_MIN, SCTPCTL_CWND_MAXBURST_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_asconf_auth_nochk), SCTPCTL_ASCONF_AUTH_NOCHK_MIN, SCTPCTL_ASCONF_AUTH_NOCHK_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_auth_disable), SCTPCTL_AUTH_DISABLE_MIN, SCTPCTL_AUTH_DISABLE_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_nat_friendly), SCTPCTL_NAT_FRIENDLY_MIN, SCTPCTL_NAT_FRIENDLY_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_L2_abc_variable), SCTPCTL_ABC_L_VAR_MIN, SCTPCTL_ABC_L_VAR_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_mbuf_threshold_count), SCTPCTL_MAX_CHAINED_MBUFS_MIN, SCTPCTL_MAX_CHAINED_MBUFS_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_do_drain), SCTPCTL_DO_SCTP_DRAIN_MIN, SCTPCTL_DO_SCTP_DRAIN_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_hb_maxburst), SCTPCTL_HB_MAX_BURST_MIN, SCTPCTL_HB_MAX_BURST_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_abort_if_one_2_one_hits_limit), SCTPCTL_ABORT_AT_LIMIT_MIN, SCTPCTL_ABORT_AT_LIMIT_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_strict_data_order), SCTPCTL_STRICT_DATA_ORDER_MIN, SCTPCTL_STRICT_DATA_ORDER_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_min_residual), SCTPCTL_MIN_RESIDUAL_MIN, SCTPCTL_MIN_RESIDUAL_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_max_retran_chunk), SCTPCTL_MAX_RETRAN_CHUNK_MIN, SCTPCTL_MAX_RETRAN_CHUNK_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_logging_level), SCTPCTL_LOGGING_LEVEL_MIN, SCTPCTL_LOGGING_LEVEL_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_default_cc_module), SCTPCTL_DEFAULT_CC_MODULE_MIN, SCTPCTL_DEFAULT_CC_MODULE_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_default_ss_module), SCTPCTL_DEFAULT_SS_MODULE_MIN, SCTPCTL_DEFAULT_SS_MODULE_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_default_frag_interleave), SCTPCTL_DEFAULT_FRAG_INTERLEAVE_MIN, SCTPCTL_DEFAULT_FRAG_INTERLEAVE_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_vtag_time_wait), SCTPCTL_TIME_WAIT_MIN, SCTPCTL_TIME_WAIT_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_buffer_splitting), SCTPCTL_BUFFER_SPLITTING_MIN, SCTPCTL_BUFFER_SPLITTING_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_initial_cwnd), SCTPCTL_INITIAL_CWND_MIN, SCTPCTL_INITIAL_CWND_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_rttvar_bw), SCTPCTL_RTTVAR_BW_MIN, SCTPCTL_RTTVAR_BW_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_rttvar_rtt), SCTPCTL_RTTVAR_RTT_MIN, SCTPCTL_RTTVAR_RTT_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_rttvar_eqret), SCTPCTL_RTTVAR_EQRET_MIN, SCTPCTL_RTTVAR_EQRET_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_steady_step), SCTPCTL_RTTVAR_STEADYS_MIN, SCTPCTL_RTTVAR_STEADYS_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_use_dccc_ecn), SCTPCTL_RTTVAR_DCCCECN_MIN, SCTPCTL_RTTVAR_DCCCECN_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_mobility_base), SCTPCTL_MOBILITY_BASE_MIN, SCTPCTL_MOBILITY_BASE_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_mobility_fasthandoff), SCTPCTL_MOBILITY_FASTHANDOFF_MIN, SCTPCTL_MOBILITY_FASTHANDOFF_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_enable_sack_immediately), SCTPCTL_SACK_IMMEDIATELY_ENABLE_MIN, SCTPCTL_SACK_IMMEDIATELY_ENABLE_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_inits_include_nat_friendly), SCTPCTL_NAT_FRIENDLY_INITS_MIN, SCTPCTL_NAT_FRIENDLY_INITS_MAX);
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_blackhole), SCTPCTL_BLACKHOLE_MIN, SCTPCTL_BLACKHOLE_MAX);
+			error = EINVAL;
+		} else {
+			SCTP_BASE_SYSCTL(sctp_auth_disable) = new;
+		}
+	}
+	return (error);
+}
 
-#ifdef SCTP_DEBUG
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_debug_on), SCTPCTL_DEBUG_MIN, SCTPCTL_DEBUG_MAX);
+static int
+sctp_sysctl_handle_asconf(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	uint32_t new;
+
+	new = SCTP_BASE_SYSCTL(sctp_asconf_enable);
+	error = sysctl_handle_int(oidp, &new, 0, req);
+	if ((error == 0) &&
+	    (req->newptr != NULL)) {
+#if (SCTPCTL_ASCONF_ENABLE_MIN == 0)
+		if ((new > SCTPCTL_ASCONF_ENABLE_MAX) ||
+		    ((new == 1) && (SCTP_BASE_SYSCTL(sctp_auth_disable) == 1))) {
+#else
+		if ((new < SCTPCTL_ASCONF_ENABLE_MIN) ||
+		    (new > SCTPCTL_ASCONF_ENABLE_MAX) ||
+		    ((new == 1) && (SCTP_BASE_SYSCTL(sctp_auth_disable) == 1))) {
 #endif
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
-		RANGECHK(SCTP_BASE_SYSCTL(sctp_output_unlocked), SCTPCTL_OUTPUT_UNLOCKED_MIN, SCTPCTL_OUTPUT_UNLOCKED_MAX);
-#endif
+			error = EINVAL;
+		} else {
+			SCTP_BASE_SYSCTL(sctp_asconf_enable) = new;
+		}
 	}
 	return (error);
 }
 
-#if defined(__FreeBSD__) && defined(SMP) && defined(SCTP_USE_PERCPU_STAT)
 static int
-sysctl_stat_get(SYSCTL_HANDLER_ARGS)
+sctp_sysctl_handle_stats(SYSCTL_HANDLER_ARGS)
 {
-	int cpu, error;
-	struct sctpstat sb, *sarry, *cpin = NULL;
+	int error;
 
-	if ((req->newptr) && (req->newlen == sizeof(struct sctpstat))) {
-		/*
-		 * User wants us to clear or at least reset the counters to
-		 * the specified values.
-		 */
-		cpin = (struct sctpstat *)req->newptr;
-	} else if (req->newptr) {
-		/* Must be a stat structure */
+#if defined(SMP) && defined(SCTP_USE_PERCPU_STAT)
+	struct sctpstat *sarry;
+	struct sctpstat sb;
+	int cpu;
+
+#endif
+	struct sctpstat sb_temp;
+
+	if ((req->newptr != NULL) &&
+	    (req->newlen != sizeof(struct sctpstat))) {
 		return (EINVAL);
 	}
+	memset(&sb_temp, 0, sizeof(struct sctpstat));
+
+	if (req->newptr != NULL) {
+		error = SYSCTL_IN(req, &sb_temp, sizeof(struct sctpstat));
+		if (error != 0) {
+			return (error);
+		}
+	}
+#if defined(SMP) && defined(SCTP_USE_PERCPU_STAT)
 	memset(&sb, 0, sizeof(sb));
 	for (cpu = 0; cpu < mp_maxid; cpu++) {
 		sarry = &SCTP_BASE_STATS[cpu];
@@ -818,20 +801,36 @@
 		sb.sctps_send_burst_avoid += sarry->sctps_send_burst_avoid;
 		sb.sctps_send_cwnd_avoid += sarry->sctps_send_cwnd_avoid;
 		sb.sctps_fwdtsn_map_over += sarry->sctps_fwdtsn_map_over;
-		if (cpin) {
-			memcpy(sarry, cpin, sizeof(struct sctpstat));
+		if (req->newptr != NULL) {
+			memcpy(sarry, &sb_temp, sizeof(struct sctpstat));
 		}
 	}
-	error = SYSCTL_OUT(req, &sb, sizeof(sb));
+	error = SYSCTL_OUT(req, &sb, sizeof(struct sctpstat));
+#else
+	error = SYSCTL_OUT(req, &SCTP_BASE_STATS, sizeof(struct sctpstat));
+	if (error != 0) {
+		return (error);
+	}
+	if (req->newptr != NULL) {
+		memcpy(&SCTP_BASE_STATS, &sb_temp, sizeof(struct sctpstat));
+	}
+#endif
 	return (error);
 }
 
-#endif
-
 #if defined(SCTP_LOCAL_TRACE_BUF)
 static int
-sysctl_sctp_cleartrace(SYSCTL_HANDLER_ARGS)
+sctp_sysctl_handle_trace_log(SYSCTL_HANDLER_ARGS)
 {
+	int error;
+
+	error = SYSCTL_OUT(req, &SCTP_BASE_SYSCTL(sctp_log), sizeof(struct sctp_log));
+	return (error);
+}
+
+static int
+sctp_sysctl_handle_trace_log_clear(SYSCTL_HANDLER_ARGS)
+{
 	int error = 0;
 
 	memset(&SCTP_BASE_SYSCTL(sctp_log), 0, sizeof(struct sctp_log));
@@ -840,309 +839,119 @@
 
 #endif
 
+#define SCTP_UINT_SYSCTL(mib_name, var_name, prefix)			\
+	static int							\
+	sctp_sysctl_handle_##mib_name(SYSCTL_HANDLER_ARGS)		\
+	{								\
+		int error;						\
+		uint32_t new;						\
+									\
+		new = SCTP_BASE_SYSCTL(var_name);			\
+		error = sysctl_handle_int(oidp, &new, 0, req);		\
+		if ((error == 0) && (req->newptr != NULL)) {		\
+			if ((new < prefix##_MIN) ||			\
+			    (new > prefix##_MAX)) {			\
+				error = EINVAL;				\
+			} else {					\
+				SCTP_BASE_SYSCTL(var_name) = new;	\
+			}						\
+		}							\
+		return (error);						\
+	}								\
+	SYSCTL_PROC(_net_inet_sctp, OID_AUTO, mib_name,			\
+	                 CTLFLAG_VNET|CTLTYPE_UINT|CTLFLAG_RW, NULL, 0,	\
+	                 sctp_sysctl_handle_##mib_name, "UI", prefix##_DESC);
 
 /*
  * sysctl definitions
  */
 
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, sendspace, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_sendspace), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_MAXDGRAM_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, recvspace, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_recvspace), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_RECVSPACE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, auto_asconf, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_auto_asconf), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_AUTOASCONF_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, ecn_enable, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_ecn_enable), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_ECN_ENABLE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, strict_sacks, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_strict_sacks), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_STRICT_SACKS_DESC);
-
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, peer_chkoh, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_peer_chunk_oh), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_PEER_CHKOH_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, maxburst, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_max_burst_default), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_MAXBURST_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, fr_maxburst, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_fr_max_burst_default), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_FRMAXBURST_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, maxchunks, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_MAXCHUNKS_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, tcbhashsize, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_hashtblsize), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_TCBHASHSIZE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, pcbhashsize, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_pcbtblsize), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_PCBHASHSIZE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, min_split_point, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_min_split_point), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_MIN_SPLIT_POINT_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, chunkscale, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_chunkscale), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_CHUNKSCALE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, delayed_sack_time, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_delayed_sack_time_default), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_DELAYED_SACK_TIME_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, sack_freq, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_sack_freq_default), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_SACK_FREQ_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, sys_resource, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_system_free_resc_limit), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_SYS_RESOURCE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, asoc_resource, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_asoc_free_resc_limit), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_ASOC_RESOURCE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, heartbeat_interval, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_heartbeat_interval_default), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_HEARTBEAT_INTERVAL_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, pmtu_raise_time, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_pmtu_raise_time_default), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_PMTU_RAISE_TIME_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, shutdown_guard_time, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_shutdown_guard_time_default), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_SHUTDOWN_GUARD_TIME_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, secret_lifetime, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_secret_lifetime_default), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_SECRET_LIFETIME_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, rto_max, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_rto_max_default), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_RTO_MAX_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, rto_min, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_rto_min_default), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_RTO_MIN_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, rto_initial, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_rto_initial_default), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_RTO_INITIAL_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, init_rto_max, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_init_rto_max_default), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_INIT_RTO_MAX_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, valid_cookie_life, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_valid_cookie_life_default), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_VALID_COOKIE_LIFE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, init_rtx_max, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_init_rtx_max_default), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_INIT_RTX_MAX_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, assoc_rtx_max, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_assoc_rtx_max_default), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_ASSOC_RTX_MAX_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, path_rtx_max, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_path_rtx_max_default), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_PATH_RTX_MAX_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, path_pf_threshold, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_path_pf_threshold), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_PATH_PF_THRESHOLD_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, add_more_on_output, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_add_more_threshold), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_ADD_MORE_ON_OUTPUT_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, outgoing_streams, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_nr_outgoing_streams_default), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_OUTGOING_STREAMS_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, cmt_on_off, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_cmt_on_off), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_CMT_ON_OFF_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, nr_sack_on_off, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_nr_sack_on_off), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_NR_SACK_ON_OFF_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, cmt_use_dac, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_cmt_use_dac), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_CMT_USE_DAC_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, cwnd_maxburst, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_CWND_MAXBURST_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, asconf_auth_nochk, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_asconf_auth_nochk), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_ASCONF_AUTH_NOCHK_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, auth_disable, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_auth_disable), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_AUTH_DISABLE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, nat_friendly, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_nat_friendly), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_NAT_FRIENDLY_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, abc_l_var, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_L2_abc_variable), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_ABC_L_VAR_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, max_chained_mbufs, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_mbuf_threshold_count), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_MAX_CHAINED_MBUFS_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, do_sctp_drain, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_do_drain), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_DO_SCTP_DRAIN_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, hb_max_burst, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_hb_maxburst), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_HB_MAX_BURST_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, abort_at_limit, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_abort_if_one_2_one_hits_limit), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_ABORT_AT_LIMIT_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, strict_data_order, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_strict_data_order), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_STRICT_DATA_ORDER_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, min_residual, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_min_residual), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_MIN_RESIDUAL_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, max_retran_chunk, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_max_retran_chunk), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_MAX_RETRAN_CHUNK_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, log_level, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_logging_level), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_LOGGING_LEVEL_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, default_cc_module, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_default_cc_module), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_DEFAULT_CC_MODULE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, default_ss_module, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_default_ss_module), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_DEFAULT_SS_MODULE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, default_frag_interleave, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_default_frag_interleave), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_DEFAULT_FRAG_INTERLEAVE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, mobility_base, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_mobility_base), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_MOBILITY_BASE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, mobility_fasthandoff, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_mobility_fasthandoff), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_MOBILITY_FASTHANDOFF_DESC);
-
+SCTP_UINT_SYSCTL(sendspace, sctp_sendspace, SCTPCTL_MAXDGRAM)
+SCTP_UINT_SYSCTL(recvspace, sctp_recvspace, SCTPCTL_RECVSPACE)
+SCTP_UINT_SYSCTL(auto_asconf, sctp_auto_asconf, SCTPCTL_AUTOASCONF)
+SCTP_UINT_SYSCTL(ecn_enable, sctp_ecn_enable, SCTPCTL_ECN_ENABLE)
+SCTP_UINT_SYSCTL(pr_enable, sctp_pr_enable, SCTPCTL_PR_ENABLE)
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, auth_disable, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW,
+    NULL, 0, sctp_sysctl_handle_auth, "IU", SCTPCTL_AUTH_DISABLE_DESC);
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, asconf_enable, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW,
+    NULL, 0, sctp_sysctl_handle_asconf, "IU", SCTPCTL_ASCONF_ENABLE_DESC);
+SCTP_UINT_SYSCTL(reconfig_enable, sctp_reconfig_enable, SCTPCTL_RECONFIG_ENABLE)
+SCTP_UINT_SYSCTL(nr_sack_on_off, sctp_nrsack_enable, SCTPCTL_NRSACK_ENABLE)
+SCTP_UINT_SYSCTL(pktdrop_enable, sctp_pktdrop_enable, SCTPCTL_PKTDROP_ENABLE)
+SCTP_UINT_SYSCTL(strict_sacks, sctp_strict_sacks, SCTPCTL_STRICT_SACKS)
+SCTP_UINT_SYSCTL(peer_chkoh, sctp_peer_chunk_oh, SCTPCTL_PEER_CHKOH)
+SCTP_UINT_SYSCTL(maxburst, sctp_max_burst_default, SCTPCTL_MAXBURST)
+SCTP_UINT_SYSCTL(fr_maxburst, sctp_fr_max_burst_default, SCTPCTL_FRMAXBURST)
+SCTP_UINT_SYSCTL(maxchunks, sctp_max_chunks_on_queue, SCTPCTL_MAXCHUNKS)
+SCTP_UINT_SYSCTL(tcbhashsize, sctp_hashtblsize, SCTPCTL_TCBHASHSIZE)
+SCTP_UINT_SYSCTL(pcbhashsize, sctp_pcbtblsize, SCTPCTL_PCBHASHSIZE)
+SCTP_UINT_SYSCTL(min_split_point, sctp_min_split_point, SCTPCTL_MIN_SPLIT_POINT)
+SCTP_UINT_SYSCTL(chunkscale, sctp_chunkscale, SCTPCTL_CHUNKSCALE)
+SCTP_UINT_SYSCTL(delayed_sack_time, sctp_delayed_sack_time_default, SCTPCTL_DELAYED_SACK_TIME)
+SCTP_UINT_SYSCTL(sack_freq, sctp_sack_freq_default, SCTPCTL_SACK_FREQ)
+SCTP_UINT_SYSCTL(sys_resource, sctp_system_free_resc_limit, SCTPCTL_SYS_RESOURCE)
+SCTP_UINT_SYSCTL(asoc_resource, sctp_asoc_free_resc_limit, SCTPCTL_ASOC_RESOURCE)
+SCTP_UINT_SYSCTL(heartbeat_interval, sctp_heartbeat_interval_default, SCTPCTL_HEARTBEAT_INTERVAL)
+SCTP_UINT_SYSCTL(pmtu_raise_time, sctp_pmtu_raise_time_default, SCTPCTL_PMTU_RAISE_TIME)
+SCTP_UINT_SYSCTL(shutdown_guard_time, sctp_shutdown_guard_time_default, SCTPCTL_SHUTDOWN_GUARD_TIME)
+SCTP_UINT_SYSCTL(secret_lifetime, sctp_secret_lifetime_default, SCTPCTL_SECRET_LIFETIME)
+SCTP_UINT_SYSCTL(rto_max, sctp_rto_max_default, SCTPCTL_RTO_MAX)
+SCTP_UINT_SYSCTL(rto_min, sctp_rto_min_default, SCTPCTL_RTO_MIN)
+SCTP_UINT_SYSCTL(rto_initial, sctp_rto_initial_default, SCTPCTL_RTO_INITIAL)
+SCTP_UINT_SYSCTL(init_rto_max, sctp_init_rto_max_default, SCTPCTL_INIT_RTO_MAX)
+SCTP_UINT_SYSCTL(valid_cookie_life, sctp_valid_cookie_life_default, SCTPCTL_VALID_COOKIE_LIFE)
+SCTP_UINT_SYSCTL(init_rtx_max, sctp_init_rtx_max_default, SCTPCTL_INIT_RTX_MAX)
+SCTP_UINT_SYSCTL(assoc_rtx_max, sctp_assoc_rtx_max_default, SCTPCTL_ASSOC_RTX_MAX)
+SCTP_UINT_SYSCTL(path_rtx_max, sctp_path_rtx_max_default, SCTPCTL_PATH_RTX_MAX)
+SCTP_UINT_SYSCTL(path_pf_threshold, sctp_path_pf_threshold, SCTPCTL_PATH_PF_THRESHOLD)
+SCTP_UINT_SYSCTL(add_more_on_output, sctp_add_more_threshold, SCTPCTL_ADD_MORE_ON_OUTPUT)
+SCTP_UINT_SYSCTL(incoming_streams, sctp_nr_incoming_streams_default, SCTPCTL_INCOMING_STREAMS)
+SCTP_UINT_SYSCTL(outgoing_streams, sctp_nr_outgoing_streams_default, SCTPCTL_OUTGOING_STREAMS)
+SCTP_UINT_SYSCTL(cmt_on_off, sctp_cmt_on_off, SCTPCTL_CMT_ON_OFF)
+SCTP_UINT_SYSCTL(cmt_use_dac, sctp_cmt_use_dac, SCTPCTL_CMT_USE_DAC)
+SCTP_UINT_SYSCTL(cwnd_maxburst, sctp_use_cwnd_based_maxburst, SCTPCTL_CWND_MAXBURST)
+SCTP_UINT_SYSCTL(nat_friendly, sctp_nat_friendly, SCTPCTL_NAT_FRIENDLY)
+SCTP_UINT_SYSCTL(abc_l_var, sctp_L2_abc_variable, SCTPCTL_ABC_L_VAR)
+SCTP_UINT_SYSCTL(max_chained_mbufs, sctp_mbuf_threshold_count, SCTPCTL_MAX_CHAINED_MBUFS)
+SCTP_UINT_SYSCTL(do_sctp_drain, sctp_do_drain, SCTPCTL_DO_SCTP_DRAIN)
+SCTP_UINT_SYSCTL(hb_max_burst, sctp_hb_maxburst, SCTPCTL_HB_MAX_BURST)
+SCTP_UINT_SYSCTL(abort_at_limit, sctp_abort_if_one_2_one_hits_limit, SCTPCTL_ABORT_AT_LIMIT)
+SCTP_UINT_SYSCTL(strict_data_order, sctp_strict_data_order, SCTPCTL_STRICT_DATA_ORDER)
+SCTP_UINT_SYSCTL(min_residual, sctp_min_residual, SCTPCTL_MIN_RESIDUAL)
+SCTP_UINT_SYSCTL(max_retran_chunk, sctp_max_retran_chunk, SCTPCTL_MAX_RETRAN_CHUNK)
+SCTP_UINT_SYSCTL(log_level, sctp_logging_level, SCTPCTL_LOGGING_LEVEL)
+SCTP_UINT_SYSCTL(default_cc_module, sctp_default_cc_module, SCTPCTL_DEFAULT_CC_MODULE)
+SCTP_UINT_SYSCTL(default_ss_module, sctp_default_ss_module, SCTPCTL_DEFAULT_SS_MODULE)
+SCTP_UINT_SYSCTL(default_frag_interleave, sctp_default_frag_interleave, SCTPCTL_DEFAULT_FRAG_INTERLEAVE)
+SCTP_UINT_SYSCTL(mobility_base, sctp_mobility_base, SCTPCTL_MOBILITY_BASE)
+SCTP_UINT_SYSCTL(mobility_fasthandoff, sctp_mobility_fasthandoff, SCTPCTL_MOBILITY_FASTHANDOFF)
 #if defined(SCTP_LOCAL_TRACE_BUF)
-SYSCTL_VNET_STRUCT(_net_inet_sctp, OID_AUTO, log, CTLFLAG_RD,
-    &SCTP_BASE_SYSCTL(sctp_log), sctp_log,
-    "SCTP logging (struct sctp_log)");
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, clear_trace, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_log), 0, sysctl_sctp_cleartrace, "IU",
-    "Clear SCTP Logging buffer");
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, log, CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_RD,
+    NULL, 0, sctp_sysctl_handle_trace_log, "S,sctplog", "SCTP logging (struct sctp_log)");
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, clear_trace, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW,
+    NULL, 0, sctp_sysctl_handle_trace_log_clear, "IU", "Clear SCTP Logging buffer");
 #endif
-
-/* XXX: Remove the #if after tunneling over IPv6 works also on FreeBSD. */
-#if !defined(__FreeBSD__) || defined(INET)
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, udp_tunneling_port, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_udp_tunneling_port), 0, sysctl_sctp_udp_tunneling_check, "IU",
-    SCTPCTL_UDP_TUNNELING_PORT_DESC);
-#endif
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, enable_sack_immediately, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_enable_sack_immediately), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_SACK_IMMEDIATELY_ENABLE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, nat_friendly_init, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_inits_include_nat_friendly), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_NAT_FRIENDLY_INITS_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, vtag_time_wait, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_vtag_time_wait), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_TIME_WAIT_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, buffer_splitting, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_buffer_splitting), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_BUFFER_SPLITTING_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, initial_cwnd, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_initial_cwnd), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_INITIAL_CWND_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, rttvar_bw, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_rttvar_bw), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_RTTVAR_BW_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, rttvar_rtt, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_rttvar_rtt), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_RTTVAR_RTT_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, rttvar_eqret, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_rttvar_eqret), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_RTTVAR_EQRET_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, rttvar_steady_step, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_steady_step), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_RTTVAR_STEADYS_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, use_dcccecn, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_use_dccc_ecn), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_RTTVAR_DCCCECN_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, blackhole, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_blackhole), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_BLACKHOLE_DESC);
-
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, udp_tunneling_port, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW,
+    NULL, 0, sctp_sysctl_handle_udp_tunneling, "IU", SCTPCTL_UDP_TUNNELING_PORT_DESC);
+SCTP_UINT_SYSCTL(enable_sack_immediately, sctp_enable_sack_immediately, SCTPCTL_SACK_IMMEDIATELY_ENABLE)
+SCTP_UINT_SYSCTL(nat_friendly_init, sctp_inits_include_nat_friendly, SCTPCTL_NAT_FRIENDLY_INITS)
+SCTP_UINT_SYSCTL(vtag_time_wait, sctp_vtag_time_wait, SCTPCTL_TIME_WAIT)
+SCTP_UINT_SYSCTL(buffer_splitting, sctp_buffer_splitting, SCTPCTL_BUFFER_SPLITTING)
+SCTP_UINT_SYSCTL(initial_cwnd, sctp_initial_cwnd, SCTPCTL_INITIAL_CWND)
+SCTP_UINT_SYSCTL(rttvar_bw, sctp_rttvar_bw, SCTPCTL_RTTVAR_BW)
+SCTP_UINT_SYSCTL(rttvar_rtt, sctp_rttvar_rtt, SCTPCTL_RTTVAR_RTT)
+SCTP_UINT_SYSCTL(rttvar_eqret, sctp_rttvar_eqret, SCTPCTL_RTTVAR_EQRET)
+SCTP_UINT_SYSCTL(rttvar_steady_step, sctp_steady_step, SCTPCTL_RTTVAR_STEADYS)
+SCTP_UINT_SYSCTL(use_dcccecn, sctp_use_dccc_ecn, SCTPCTL_RTTVAR_DCCCECN)
+SCTP_UINT_SYSCTL(blackhole, sctp_blackhole, SCTPCTL_BLACKHOLE)
+SCTP_UINT_SYSCTL(diag_info_code, sctp_diag_info_code, SCTPCTL_DIAG_INFO_CODE)
 #ifdef SCTP_DEBUG
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, debug, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_debug_on), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_DEBUG_DESC);
+SCTP_UINT_SYSCTL(debug, sctp_debug_on, SCTPCTL_DEBUG)
 #endif
-
-
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, output_unlocked, CTLTYPE_UINT | CTLFLAG_RW,
-    &SCTP_BASE_SYSCTL(sctp_output_unlocked), 0, sysctl_sctp_check, "IU",
-    SCTPCTL_OUTPUT_UNLOCKED_DESC);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+SCTP_UINT_SYSCTL(output_unlocked, sctp_output_unlocked, SCTPCTL_OUTPUT_UNLOCKED)
 #endif
-
-#if defined(__FreeBSD__) && defined(SMP) && defined(SCTP_USE_PERCPU_STAT)
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, stats,
-    CTLTYPE_STRUCT | CTLFLAG_RW,
-    0, 0, sysctl_stat_get, "S,sctpstat",
-    "SCTP statistics (struct sctp_stat)");
-#else
-SYSCTL_VNET_STRUCT(_net_inet_sctp, OID_AUTO, stats, CTLFLAG_RW,
-    &SCTP_BASE_STATS_SYSCTL, sctpstat,
-    "SCTP statistics (struct sctp_stat)");
-#endif
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, assoclist, CTLTYPE_OPAQUE | CTLFLAG_RD,
-    0, 0, sctp_assoclist,
-    "S,xassoc", "List of active SCTP associations");
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, stats, CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_RW,
+    NULL, 0, sctp_sysctl_handle_stats, "S,sctpstat", "SCTP statistics (struct sctp_stat)");
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, assoclist, CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_RD,
+    NULL, 0, sctp_sysctl_handle_assoclist, "S,xassoc", "List of active SCTP associations");

Modified: trunk/sys/netinet/sctp_sysctl.h
===================================================================
--- trunk/sys/netinet/sctp_sysctl.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_sysctl.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_sysctl.h 238249 2012-07-08 15:37:58Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_sysctl.h 294151 2016-01-16 14:46:27Z tuexen $");
 
 #ifndef _NETINET_SCTP_SYSCTL_H_
 #define _NETINET_SCTP_SYSCTL_H_
@@ -46,6 +46,12 @@
 	uint32_t sctp_auto_asconf;
 	uint32_t sctp_multiple_asconfs;
 	uint32_t sctp_ecn_enable;
+	uint32_t sctp_pr_enable;
+	uint32_t sctp_auth_disable;
+	uint32_t sctp_asconf_enable;
+	uint32_t sctp_reconfig_enable;
+	uint32_t sctp_nrsack_enable;
+	uint32_t sctp_pktdrop_enable;
 	uint32_t sctp_fr_max_burst_default;
 	uint32_t sctp_strict_sacks;
 	uint32_t sctp_peer_chunk_oh;
@@ -73,14 +79,11 @@
 	uint32_t sctp_path_rtx_max_default;
 	uint32_t sctp_path_pf_threshold;
 	uint32_t sctp_add_more_threshold;
+	uint32_t sctp_nr_incoming_streams_default;
 	uint32_t sctp_nr_outgoing_streams_default;
 	uint32_t sctp_cmt_on_off;
 	uint32_t sctp_cmt_use_dac;
-	/* EY 5/5/08 - nr_sack flag variable */
-	uint32_t sctp_nr_sack_on_off;
 	uint32_t sctp_use_cwnd_based_maxburst;
-	uint32_t sctp_asconf_auth_nochk;
-	uint32_t sctp_auth_disable;
 	uint32_t sctp_nat_friendly;
 	uint32_t sctp_L2_abc_variable;
 	uint32_t sctp_mbuf_threshold_count;
@@ -104,6 +107,7 @@
 	uint32_t sctp_rttvar_eqret;
 	uint32_t sctp_steady_step;
 	uint32_t sctp_use_dccc_ecn;
+	uint32_t sctp_diag_info_code;
 #if defined(SCTP_LOCAL_TRACE_BUF)
 	struct sctp_log sctp_log;
 #endif
@@ -140,7 +144,7 @@
 #define SCTPCTL_AUTOASCONF_DESC		"Enable SCTP Auto-ASCONF"
 #define SCTPCTL_AUTOASCONF_MIN		0
 #define SCTPCTL_AUTOASCONF_MAX		1
-#define SCTPCTL_AUTOASCONF_DEFAULT	SCTP_DEFAULT_AUTO_ASCONF
+#define SCTPCTL_AUTOASCONF_DEFAULT	1
 
 /* autoasconf: Enable SCTP Auto-ASCONF */
 #define SCTPCTL_MULTIPLEASCONFS_DESC	"Enable SCTP Muliple-ASCONFs"
@@ -154,6 +158,42 @@
 #define SCTPCTL_ECN_ENABLE_MAX		1
 #define SCTPCTL_ECN_ENABLE_DEFAULT	1
 
+/* pr_enable: Enable PR-SCTP */
+#define SCTPCTL_PR_ENABLE_DESC		"Enable PR-SCTP"
+#define SCTPCTL_PR_ENABLE_MIN		0
+#define SCTPCTL_PR_ENABLE_MAX		1
+#define SCTPCTL_PR_ENABLE_DEFAULT	1
+
+/* auth_disable: Disable SCTP AUTH function */
+#define SCTPCTL_AUTH_DISABLE_DESC	"Disable SCTP AUTH function"
+#define SCTPCTL_AUTH_DISABLE_MIN	0
+#define SCTPCTL_AUTH_DISABLE_MAX	1
+#define SCTPCTL_AUTH_DISABLE_DEFAULT	0
+
+/* asconf_enable: Enable SCTP ASCONF */
+#define SCTPCTL_ASCONF_ENABLE_DESC	"Enable SCTP ASCONF"
+#define SCTPCTL_ASCONF_ENABLE_MIN	0
+#define SCTPCTL_ASCONF_ENABLE_MAX	1
+#define SCTPCTL_ASCONF_ENABLE_DEFAULT	1
+
+/* reconfig_enable: Enable SCTP RE-CONFIG */
+#define SCTPCTL_RECONFIG_ENABLE_DESC	"Enable SCTP RE-CONFIG"
+#define SCTPCTL_RECONFIG_ENABLE_MIN	0
+#define SCTPCTL_RECONFIG_ENABLE_MAX	1
+#define SCTPCTL_RECONFIG_ENABLE_DEFAULT	1
+
+/* nrsack_enable: Enable NR_SACK */
+#define SCTPCTL_NRSACK_ENABLE_DESC	"Enable SCTP NR-SACK"
+#define SCTPCTL_NRSACK_ENABLE_MIN	0
+#define SCTPCTL_NRSACK_ENABLE_MAX	1
+#define SCTPCTL_NRSACK_ENABLE_DEFAULT	0
+
+/* pktdrop_enable: Enable SCTP Packet Drop Reports */
+#define SCTPCTL_PKTDROP_ENABLE_DESC	"Enable SCTP PKTDROP"
+#define SCTPCTL_PKTDROP_ENABLE_MIN	0
+#define SCTPCTL_PKTDROP_ENABLE_MAX	1
+#define SCTPCTL_PKTDROP_ENABLE_DEFAULT	0
+
 /* strict_sacks: Enable SCTP Strict SACK checking */
 #define SCTPCTL_STRICT_SACKS_DESC	"Enable SCTP Strict SACK checking"
 #define SCTPCTL_STRICT_SACKS_MIN	0
@@ -252,10 +292,10 @@
 #define SCTPCTL_PMTU_RAISE_TIME_DEFAULT	SCTP_DEF_PMTU_RAISE_SEC
 
 /* shutdown_guard_time: Default shutdown guard timer in seconds */
-#define SCTPCTL_SHUTDOWN_GUARD_TIME_DESC	"Default shutdown guard timer in seconds"
+#define SCTPCTL_SHUTDOWN_GUARD_TIME_DESC	"Shutdown guard timer in seconds (0 means 5 times RTO.Max)"
 #define SCTPCTL_SHUTDOWN_GUARD_TIME_MIN		0
 #define SCTPCTL_SHUTDOWN_GUARD_TIME_MAX		0xFFFFFFFF
-#define SCTPCTL_SHUTDOWN_GUARD_TIME_DEFAULT	SCTP_DEF_MAX_SHUTDOWN_SEC
+#define SCTPCTL_SHUTDOWN_GUARD_TIME_DEFAULT	0
 
 /* secret_lifetime: Default secret lifetime in seconds */
 #define SCTPCTL_SECRET_LIFETIME_DESC	"Default secret lifetime in seconds"
@@ -323,6 +363,12 @@
 #define SCTPCTL_ADD_MORE_ON_OUTPUT_MAX	0xFFFFFFFF
 #define SCTPCTL_ADD_MORE_ON_OUTPUT_DEFAULT SCTP_DEFAULT_ADD_MORE
 
+/* incoming_streams: Default number of incoming streams */
+#define SCTPCTL_INCOMING_STREAMS_DESC	"Default number of incoming streams"
+#define SCTPCTL_INCOMING_STREAMS_MIN	1
+#define SCTPCTL_INCOMING_STREAMS_MAX	65535
+#define SCTPCTL_INCOMING_STREAMS_DEFAULT SCTP_ISTREAM_INITIAL
+
 /* outgoing_streams: Default number of outgoing streams */
 #define SCTPCTL_OUTGOING_STREAMS_DESC	"Default number of outgoing streams"
 #define SCTPCTL_OUTGOING_STREAMS_MIN	1
@@ -335,12 +381,6 @@
 #define SCTPCTL_CMT_ON_OFF_MAX		SCTP_CMT_MAX
 #define SCTPCTL_CMT_ON_OFF_DEFAULT	SCTP_CMT_OFF
 
-/* EY - nr_sack_on_off: NR_SACK on/off flag */
-#define SCTPCTL_NR_SACK_ON_OFF_DESC	"NR_SACK on/off flag"
-#define SCTPCTL_NR_SACK_ON_OFF_MIN	0
-#define SCTPCTL_NR_SACK_ON_OFF_MAX	1
-#define SCTPCTL_NR_SACK_ON_OFF_DEFAULT	0
-
 /* cmt_use_dac: CMT DAC on/off flag */
 #define SCTPCTL_CMT_USE_DAC_DESC	"CMT DAC on/off flag"
 #define SCTPCTL_CMT_USE_DAC_MIN		0
@@ -353,18 +393,6 @@
 #define SCTPCTL_CWND_MAXBURST_MAX	1
 #define SCTPCTL_CWND_MAXBURST_DEFAULT	1
 
-/* asconf_auth_nochk: Disable SCTP ASCONF AUTH requirement */
-#define SCTPCTL_ASCONF_AUTH_NOCHK_DESC	"Disable SCTP ASCONF AUTH requirement"
-#define SCTPCTL_ASCONF_AUTH_NOCHK_MIN	0
-#define SCTPCTL_ASCONF_AUTH_NOCHK_MAX	1
-#define SCTPCTL_ASCONF_AUTH_NOCHK_DEFAULT	0
-
-/* auth_disable: Disable SCTP AUTH function */
-#define SCTPCTL_AUTH_DISABLE_DESC	"Disable SCTP AUTH function"
-#define SCTPCTL_AUTH_DISABLE_MIN	0
-#define SCTPCTL_AUTH_DISABLE_MAX	1
-#define SCTPCTL_AUTH_DISABLE_DEFAULT	0
-
 /* nat_friendly: SCTP NAT friendly operation */
 #define SCTPCTL_NAT_FRIENDLY_DESC	"SCTP NAT friendly operation"
 #define SCTPCTL_NAT_FRIENDLY_MIN	0
@@ -447,19 +475,19 @@
 #define SCTPCTL_MOBILITY_BASE_DESC	"Enable SCTP base mobility"
 #define SCTPCTL_MOBILITY_BASE_MIN	0
 #define SCTPCTL_MOBILITY_BASE_MAX	1
-#define SCTPCTL_MOBILITY_BASE_DEFAULT	SCTP_DEFAULT_MOBILITY_BASE
+#define SCTPCTL_MOBILITY_BASE_DEFAULT	0
 
 /* mobility_fasthandoff: Enable SCTP fast handoff support */
 #define SCTPCTL_MOBILITY_FASTHANDOFF_DESC	"Enable SCTP fast handoff"
 #define SCTPCTL_MOBILITY_FASTHANDOFF_MIN	0
 #define SCTPCTL_MOBILITY_FASTHANDOFF_MAX	1
-#define SCTPCTL_MOBILITY_FASTHANDOFF_DEFAULT	SCTP_DEFAULT_MOBILITY_FASTHANDOFF
+#define SCTPCTL_MOBILITY_FASTHANDOFF_DEFAULT	0
 
 /* Enable SCTP/UDP tunneling port */
 #define SCTPCTL_UDP_TUNNELING_PORT_DESC		"Set the SCTP/UDP tunneling port"
 #define SCTPCTL_UDP_TUNNELING_PORT_MIN		0
 #define SCTPCTL_UDP_TUNNELING_PORT_MAX		65535
-#define SCTPCTL_UDP_TUNNELING_PORT_DEFAULT	SCTP_OVER_UDP_TUNNELING_PORT
+#define SCTPCTL_UDP_TUNNELING_PORT_DEFAULT	0
 
 /* Enable sending of the SACK-IMMEDIATELY bit */
 #define SCTPCTL_SACK_IMMEDIATELY_ENABLE_DESC	"Enable sending of the SACK-IMMEDIATELY-bit."
@@ -523,6 +551,11 @@
 #define SCTPCTL_BLACKHOLE_MAX		2
 #define SCTPCTL_BLACKHOLE_DEFAULT	SCTPCTL_BLACKHOLE_MIN
 
+#define SCTPCTL_DIAG_INFO_CODE_DESC	"Diagnostic information error cause code"
+#define SCTPCTL_DIAG_INFO_CODE_MIN	0
+#define SCTPCTL_DIAG_INFO_CODE_MAX	65535
+#define SCTPCTL_DIAG_INFO_CODE_DEFAULT	0
+
 #if defined(SCTP_DEBUG)
 /* debug: Configure debug output */
 #define SCTPCTL_DEBUG_DESC	"Configure debug output"
@@ -532,7 +565,7 @@
 #endif
 
 
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 #define SCTPCTL_OUTPUT_UNLOCKED_DESC	"Unlock socket when sending packets down to IP."
 #define SCTPCTL_OUTPUT_UNLOCKED_MIN	0
 #define SCTPCTL_OUTPUT_UNLOCKED_MAX	1

Modified: trunk/sys/netinet/sctp_timer.c
===================================================================
--- trunk/sys/netinet/sctp_timer.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_timer.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_timer.c 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_timer.c 296052 2016-02-25 18:46:06Z tuexen $");
 
 #define _IP_VHL
 #include <netinet/sctp_os.h>
@@ -50,7 +50,9 @@
 #include <netinet/sctp_input.h>
 #include <netinet/sctp.h>
 #include <netinet/sctp_uio.h>
+#if defined(INET) || defined(INET6)
 #include <netinet/udp.h>
+#endif
 
 
 void
@@ -84,7 +86,7 @@
 	    asoc->sent_queue_cnt);
 }
 
-int
+static int
 sctp_threshold_management(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
     struct sctp_nets *net, uint16_t threshold)
 {
@@ -91,7 +93,7 @@
 	if (net) {
 		net->error_count++;
 		SCTPDBG(SCTP_DEBUG_TIMER4, "Error count for %p now %d thresh:%d\n",
-		    net, net->error_count,
+		    (void *)net, net->error_count,
 		    net->failure_threshold);
 		if (net->error_count > net->failure_threshold) {
 			/* We had a threshold failure */
@@ -109,8 +111,10 @@
 				net->dest_state |= SCTP_ADDR_PF;
 				net->last_active = sctp_get_tick_count();
 				sctp_send_hb(stcb, net, SCTP_SO_NOT_LOCKED);
-				sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_TIMER + SCTP_LOC_3);
-				sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net);
+				sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
+				    inp, stcb, net,
+				    SCTP_FROM_SCTP_TIMER + SCTP_LOC_1);
+				sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net);
 			}
 		}
 	}
@@ -139,7 +143,7 @@
 		stcb->asoc.overall_error_count++;
 	}
 	SCTPDBG(SCTP_DEBUG_TIMER4, "Overall error count for %p now %d thresh:%u state:%x\n",
-	    &stcb->asoc, stcb->asoc.overall_error_count,
+	    (void *)&stcb->asoc, stcb->asoc.overall_error_count,
 	    (uint32_t) threshold,
 	    ((net == NULL) ? (uint32_t) 0 : (uint32_t) net->dest_state));
 	/*
@@ -148,24 +152,12 @@
 	 */
 	if (stcb->asoc.overall_error_count > threshold) {
 		/* Abort notification sends a ULP notify */
-		struct mbuf *oper;
+		struct mbuf *op_err;
 
-		oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
-		    0, M_DONTWAIT, 1, MT_DATA);
-		if (oper) {
-			struct sctp_paramhdr *ph;
-			uint32_t *ippp;
-
-			SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
-			    sizeof(uint32_t);
-			ph = mtod(oper, struct sctp_paramhdr *);
-			ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
-			ph->param_length = htons(SCTP_BUF_LEN(oper));
-			ippp = (uint32_t *) (ph + 1);
-			*ippp = htonl(SCTP_FROM_SCTP_TIMER + SCTP_LOC_1);
-		}
-		inp->last_abort_code = SCTP_FROM_SCTP_TIMER + SCTP_LOC_1;
-		sctp_abort_an_association(inp, stcb, oper, SCTP_SO_NOT_LOCKED);
+		op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+		    "Association error counter exceeded");
+		inp->last_abort_code = SCTP_FROM_SCTP_TIMER + SCTP_LOC_2;
+		sctp_abort_an_association(inp, stcb, op_err, SCTP_SO_NOT_LOCKED);
 		return (1);
 	}
 	return (0);
@@ -348,7 +340,7 @@
 			return (NULL);
 		}
 	}
-	do {
+	for (;;) {
 		alt = TAILQ_NEXT(mnet, sctp_next);
 		if (alt == NULL) {
 			once++;
@@ -367,7 +359,6 @@
 			}
 			alt->src_addr_selected = 0;
 		}
-		/* sa_ignore NO_NULL_CHK */
 		if (((alt->dest_state & SCTP_ADDR_REACHABLE) == SCTP_ADDR_REACHABLE) &&
 		    (alt->ro.ro_rt != NULL) &&
 		    (!(alt->dest_state & SCTP_ADDR_UNCONFIRMED))) {
@@ -375,7 +366,7 @@
 			break;
 		}
 		mnet = alt;
-	} while (alt != NULL);
+	}
 
 	if (alt == NULL) {
 		/* Case where NO insv network exists (dormant state) */
@@ -382,7 +373,7 @@
 		/* we rotate destinations */
 		once = 0;
 		mnet = net;
-		do {
+		for (;;) {
 			if (mnet == NULL) {
 				return (TAILQ_FIRST(&stcb->asoc.nets));
 			}
@@ -393,8 +384,10 @@
 					break;
 				}
 				alt = TAILQ_FIRST(&stcb->asoc.nets);
+				if (alt == NULL) {
+					break;
+				}
 			}
-			/* sa_ignore NO_NULL_CHK */
 			if ((!(alt->dest_state & SCTP_ADDR_UNCONFIRMED)) &&
 			    (alt != net)) {
 				/* Found an alternate address */
@@ -401,7 +394,7 @@
 				break;
 			}
 			mnet = alt;
-		} while (alt != NULL);
+		}
 	}
 	if (alt == NULL) {
 		return (net);
@@ -416,7 +409,11 @@
     int num_marked, int num_abandoned)
 {
 	if (net->RTO == 0) {
-		net->RTO = stcb->asoc.minrto;
+		if (net->RTO_measured) {
+			net->RTO = stcb->asoc.minrto;
+		} else {
+			net->RTO = stcb->asoc.initial_rto;
+		}
 	}
 	net->RTO <<= 1;
 	if (net->RTO > stcb->asoc.maxrto) {
@@ -440,9 +437,19 @@
 	TAILQ_FOREACH_SAFE(chk, &asoc->sent_queue, sctp_next, nchk) {
 		if (SCTP_TSN_GE(asoc->last_acked_seq, chk->rec.data.TSN_seq)) {
 			SCTP_PRINTF("Found chk:%p tsn:%x <= last_acked_seq:%x\n",
-			    chk, chk->rec.data.TSN_seq, asoc->last_acked_seq);
+			    (void *)chk, chk->rec.data.TSN_seq, asoc->last_acked_seq);
+			if (chk->sent != SCTP_DATAGRAM_NR_ACKED) {
+				if (asoc->strmout[chk->rec.data.stream_number].chunks_on_queues > 0) {
+					asoc->strmout[chk->rec.data.stream_number].chunks_on_queues--;
+				}
+			}
+			if ((asoc->strmout[chk->rec.data.stream_number].chunks_on_queues == 0) &&
+			    (asoc->strmout[chk->rec.data.stream_number].state == SCTP_STREAM_RESET_PENDING) &&
+			    TAILQ_EMPTY(&asoc->strmout[chk->rec.data.stream_number].outqueue)) {
+				asoc->trigger_reset = 1;
+			}
 			TAILQ_REMOVE(&asoc->sent_queue, chk, sctp_next);
-			if (chk->pr_sctp_on) {
+			if (PR_SCTP_ENABLED(chk->flags)) {
 				if (asoc->pr_sctp_cnt != 0)
 					asoc->pr_sctp_cnt--;
 			}
@@ -451,7 +458,7 @@
 				sctp_free_bufspace(stcb, asoc, chk, 1);
 				sctp_m_freem(chk->data);
 				chk->data = NULL;
-				if (asoc->peer_supports_prsctp && PR_SCTP_BUF_ENABLED(chk->flags)) {
+				if (asoc->prsctp_supported && PR_SCTP_BUF_ENABLED(chk->flags)) {
 					asoc->sent_queue_cnt_removeable--;
 				}
 			}
@@ -461,7 +468,7 @@
 	}
 	SCTP_PRINTF("after recover order is as follows\n");
 	TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
-		SCTP_PRINTF("chk:%p TSN:%x\n", chk, chk->rec.data.TSN_seq);
+		SCTP_PRINTF("chk:%p TSN:%x\n", (void *)chk, chk->rec.data.TSN_seq);
 	}
 }
 
@@ -548,7 +555,7 @@
 	TAILQ_FOREACH_SAFE(chk, &stcb->asoc.sent_queue, sctp_next, nchk) {
 		if (SCTP_TSN_GE(stcb->asoc.last_acked_seq, chk->rec.data.TSN_seq)) {
 			/* Strange case our list got out of order? */
-			SCTP_PRINTF("Our list is out of order? last_acked:%x chk:%x",
+			SCTP_PRINTF("Our list is out of order? last_acked:%x chk:%x\n",
 			    (unsigned int)stcb->asoc.last_acked_seq, (unsigned int)chk->rec.data.TSN_seq);
 			recovery_cnt++;
 #ifdef INVARIANTS
@@ -606,7 +613,7 @@
 					continue;
 				}
 			}
-			if (stcb->asoc.peer_supports_prsctp && PR_SCTP_TTL_ENABLED(chk->flags)) {
+			if (stcb->asoc.prsctp_supported && PR_SCTP_TTL_ENABLED(chk->flags)) {
 				/* Is it expired? */
 				if (timevalcmp(&now, &chk->rec.data.timetodrop, >)) {
 					/* Yes so drop it */
@@ -620,7 +627,7 @@
 					continue;
 				}
 			}
-			if (stcb->asoc.peer_supports_prsctp && PR_SCTP_RTX_ENABLED(chk->flags)) {
+			if (stcb->asoc.prsctp_supported && PR_SCTP_RTX_ENABLED(chk->flags)) {
 				/* Has it been retransmitted tv_sec times? */
 				if (chk->snd_count > chk->rec.data.timetodrop.tv_sec) {
 					if (chk->data) {
@@ -768,7 +775,7 @@
 	if (audit_tf) {
 		SCTPDBG(SCTP_DEBUG_TIMER4,
 		    "Audit total flight due to negative value net:%p\n",
-		    net);
+		    (void *)net);
 		stcb->asoc.total_flight = 0;
 		stcb->asoc.total_flight_count = 0;
 		/* Clear all networks flight size */
@@ -776,7 +783,7 @@
 			lnets->flight_size = 0;
 			SCTPDBG(SCTP_DEBUG_TIMER4,
 			    "Net:%p c-f cwnd:%d ssthresh:%d\n",
-			    lnets, lnets->cwnd, lnets->ssthresh);
+			    (void *)lnets, lnets->cwnd, lnets->ssthresh);
 		}
 		TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
 			if (chk->sent < SCTP_DATAGRAM_RESEND) {
@@ -963,7 +970,7 @@
 		sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
 		return (0);
 	}
-	if (stcb->asoc.peer_supports_prsctp) {
+	if (stcb->asoc.prsctp_supported) {
 		struct sctp_tmit_chunk *lchk;
 
 		lchk = sctp_try_advance_peer_ack_point(stcb, &stcb->asoc);
@@ -1047,24 +1054,12 @@
 	if (cookie == NULL) {
 		if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED) {
 			/* FOOBAR! */
-			struct mbuf *oper;
+			struct mbuf *op_err;
 
-			oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
-			    0, M_DONTWAIT, 1, MT_DATA);
-			if (oper) {
-				struct sctp_paramhdr *ph;
-				uint32_t *ippp;
-
-				SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
-				    sizeof(uint32_t);
-				ph = mtod(oper, struct sctp_paramhdr *);
-				ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
-				ph->param_length = htons(SCTP_BUF_LEN(oper));
-				ippp = (uint32_t *) (ph + 1);
-				*ippp = htonl(SCTP_FROM_SCTP_TIMER + SCTP_LOC_3);
-			}
-			inp->last_abort_code = SCTP_FROM_SCTP_TIMER + SCTP_LOC_4;
-			sctp_abort_an_association(inp, stcb, oper, SCTP_SO_NOT_LOCKED);
+			op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+			    "Cookie timer expired, but no cookie");
+			inp->last_abort_code = SCTP_FROM_SCTP_TIMER + SCTP_LOC_3;
+			sctp_abort_an_association(inp, stcb, op_err, SCTP_SO_NOT_LOCKED);
 		} else {
 #ifdef INVARIANTS
 			panic("Cookie timer expires in wrong state?");
@@ -1500,11 +1495,15 @@
 		}
 		if (net->ro._s_addr) {
 			mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._s_addr.sa, net->ro.ro_rt);
+#if defined(INET) || defined(INET6)
 			if (net->port) {
 				mtu -= sizeof(struct udphdr);
 			}
+#endif
 			if (mtu > next_mtu) {
 				net->mtu = next_mtu;
+			} else {
+				net->mtu = mtu;
 			}
 		}
 	}
@@ -1556,6 +1555,13 @@
 					/* only send SHUTDOWN 1st time thru */
 					struct sctp_nets *netp;
 
+					if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+					    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+						SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+					}
+					SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+					SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+					sctp_stop_timers_for_shutdown(stcb);
 					if (stcb->asoc.alternate) {
 						netp = stcb->asoc.alternate;
 					} else {
@@ -1562,12 +1568,6 @@
 						netp = stcb->asoc.primary_destination;
 					}
 					sctp_send_shutdown(stcb, netp);
-					if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
-					    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
-						SCTP_STAT_DECR_GAUGE32(sctps_currestab);
-					}
-					SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
-					SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
 					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
 					    stcb->sctp_ep, stcb,
 					    netp);

Modified: trunk/sys/netinet/sctp_timer.h
===================================================================
--- trunk/sys/netinet/sctp_timer.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_timer.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_timer.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_timer.h 296052 2016-02-25 18:46:06Z tuexen $");
 
 #ifndef _NETINET_SCTP_TIMER_H_
 #define _NETINET_SCTP_TIMER_H_
@@ -47,10 +47,6 @@
     struct sctp_nets *, int mode);
 
 int
-sctp_threshold_management(struct sctp_inpcb *, struct sctp_tcb *,
-    struct sctp_nets *, uint16_t);
-
-int
 sctp_t3rxt_timer(struct sctp_inpcb *, struct sctp_tcb *,
     struct sctp_nets *);
 int

Modified: trunk/sys/netinet/sctp_uio.h
===================================================================
--- trunk/sys/netinet/sctp_uio.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_uio.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_uio.h 244524 2012-12-21 00:41:52Z delphij $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_uio.h 294180 2016-01-16 18:05:24Z tuexen $");
 
 #ifndef _NETINET_SCTP_UIO_H_
 #define _NETINET_SCTP_UIO_H_
@@ -135,20 +135,27 @@
 	uint16_t sinfo_flags;
 	uint32_t sinfo_ppid;
 	uint32_t sinfo_context;
-	uint32_t sinfo_timetolive;
+	uint32_t sinfo_timetolive;	/* should have been sinfo_pr_value */
 	uint32_t sinfo_tsn;
 	uint32_t sinfo_cumtsn;
 	sctp_assoc_t sinfo_assoc_id;
-	uint16_t sreinfo_next_flags;
-	uint16_t sreinfo_next_stream;
-	uint32_t sreinfo_next_aid;
-	uint32_t sreinfo_next_length;
-	uint32_t sreinfo_next_ppid;
+	uint16_t serinfo_next_flags;
+	uint16_t serinfo_next_stream;
+	uint32_t serinfo_next_aid;
+	uint32_t serinfo_next_length;
+	uint32_t serinfo_next_ppid;
 	uint16_t sinfo_keynumber;
 	uint16_t sinfo_keynumber_valid;
 	uint8_t __reserve_pad[SCTP_ALIGN_RESV_PAD_SHORT];
 };
 
+#define sinfo_pr_value sinfo_timetolive
+#define sreinfo_next_flags serinfo_next_flags
+#define sreinfo_next_stream serinfo_next_stream
+#define sreinfo_next_aid serinfo_next_aid
+#define sreinfo_next_length serinfo_next_length
+#define sreinfo_next_ppid serinfo_next_ppid
+
 struct sctp_sndinfo {
 	uint16_t snd_sid;
 	uint16_t snd_flags;
@@ -250,18 +257,23 @@
 					SCTP_SACK_IMMEDIATELY)) != 0)
 /* for the endpoint */
 
-/* The lower byte is an enumeration of PR-SCTP policies */
+/* The lower four bits is an enumeration of PR-SCTP policies */
 #define SCTP_PR_SCTP_NONE 0x0000/* Reliable transfer */
 #define SCTP_PR_SCTP_TTL  0x0001/* Time based PR-SCTP */
 #define SCTP_PR_SCTP_BUF  0x0002/* Buffer based PR-SCTP */
 #define SCTP_PR_SCTP_RTX  0x0003/* Number of retransmissions based PR-SCTP */
+#define SCTP_PR_SCTP_MAX  SCTP_PR_SCTP_RTX
+#define SCTP_PR_SCTP_ALL  0x000f/* Used for aggregated stats */
 
 #define PR_SCTP_POLICY(x)         ((x) & 0x0f)
-#define PR_SCTP_ENABLED(x)        (PR_SCTP_POLICY(x) != SCTP_PR_SCTP_NONE)
+#define PR_SCTP_ENABLED(x)        ((PR_SCTP_POLICY(x) != SCTP_PR_SCTP_NONE) && \
+                                   (PR_SCTP_POLICY(x) != SCTP_PR_SCTP_ALL))
 #define PR_SCTP_TTL_ENABLED(x)    (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_TTL)
 #define PR_SCTP_BUF_ENABLED(x)    (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_BUF)
 #define PR_SCTP_RTX_ENABLED(x)    (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_RTX)
-#define PR_SCTP_INVALID_POLICY(x) (PR_SCTP_POLICY(x) > SCTP_PR_SCTP_RTX)
+#define PR_SCTP_INVALID_POLICY(x) (PR_SCTP_POLICY(x) > SCTP_PR_SCTP_MAX)
+#define PR_SCTP_VALID_POLICY(x)   (PR_SCTP_POLICY(x) <= SCTP_PR_SCTP_MAX)
+
 /* Stat's */
 struct sctp_pcbinfo {
 	uint32_t ep_count;
@@ -663,11 +675,7 @@
 #define SCTP_AUTH_HMAC_ID_RSVD		0x0000
 #define SCTP_AUTH_HMAC_ID_SHA1		0x0001	/* default, mandatory */
 #define SCTP_AUTH_HMAC_ID_SHA256	0x0003
-#define SCTP_AUTH_HMAC_ID_SHA224	0x0004
-#define SCTP_AUTH_HMAC_ID_SHA384	0x0005
-#define SCTP_AUTH_HMAC_ID_SHA512	0x0006
 
-
 /* SCTP_AUTH_ACTIVE_KEY / SCTP_AUTH_DELETE_KEY */
 struct sctp_authkeyid {
 	sctp_assoc_t scact_assoc_id;
@@ -725,6 +733,14 @@
 	uint16_t sue_port;
 };
 
+struct sctp_prstatus {
+	sctp_assoc_t sprstat_assoc_id;
+	uint16_t sprstat_sid;
+	uint16_t sprstat_policy;
+	uint64_t sprstat_abandoned_unsent;
+	uint64_t sprstat_abandoned_sent;
+};
+
 struct sctp_cwnd_args {
 	struct sctp_nets *net;	/* network to *//* FIXME: LP64 issue */
 	uint32_t cwnd_new_value;/* cwnd in k */
@@ -1150,7 +1166,7 @@
 struct xsctp_inpcb {
 	uint32_t last;
 	uint32_t flags;
-	uint32_t features;
+	uint64_t features;
 	uint32_t total_sends;
 	uint32_t total_recvs;
 	uint32_t total_nospaces;
@@ -1158,7 +1174,12 @@
 	uint16_t local_port;
 	uint16_t qlen;
 	uint16_t maxqlen;
-	uint32_t extra_padding[32];	/* future */
+	void *socket;
+#if defined(__LP64__)
+	uint32_t extra_padding[29];	/* future */
+#else
+	uint32_t extra_padding[30];	/* future */
+#endif
 };
 
 struct xsctp_tcb {
@@ -1216,7 +1237,8 @@
 	struct sctp_timeval start_time;	/* sctpAssocLocalRemEntry 8   */
 	uint32_t rtt;
 	uint32_t heartbeat_interval;
-	uint32_t extra_padding[31];	/* future */
+	uint32_t ssthresh;
+	uint32_t extra_padding[30];	/* future */
 };
 
 #define SCTP_MAX_LOGGING_SIZE 30000
@@ -1279,33 +1301,39 @@
 int sctp_opt_info(int, sctp_assoc_t, int, void *, socklen_t *);
 
 /* deprecated */
-ssize_t sctp_sendmsg 
-(int, const void *, size_t, const struct sockaddr *,
+ssize_t 
+sctp_sendmsg(int, const void *, size_t, const struct sockaddr *,
     socklen_t, uint32_t, uint32_t, uint16_t, uint32_t, uint32_t);
 
 /* deprecated */
-	ssize_t sctp_send(int, const void *, size_t,
-              const struct sctp_sndrcvinfo *, int);
+ssize_t 
+sctp_send(int, const void *, size_t,
+    const struct sctp_sndrcvinfo *, int);
 
 /* deprecated */
-	ssize_t sctp_sendx(int, const void *, size_t, struct sockaddr *,
-               int, struct sctp_sndrcvinfo *, int);
+ssize_t 
+sctp_sendx(int, const void *, size_t, struct sockaddr *,
+    int, struct sctp_sndrcvinfo *, int);
 
 /* deprecated */
-	ssize_t sctp_sendmsgx(int sd, const void *, size_t, struct sockaddr *,
-                  int, uint32_t, uint32_t, uint16_t, uint32_t, uint32_t);
+ssize_t 
+sctp_sendmsgx(int sd, const void *, size_t, struct sockaddr *,
+    int, uint32_t, uint32_t, uint16_t, uint32_t, uint32_t);
 
-	sctp_assoc_t sctp_getassocid(int, struct sockaddr *);
+sctp_assoc_t sctp_getassocid(int, struct sockaddr *);
 
 /* deprecated */
-	ssize_t sctp_recvmsg(int, void *, size_t, struct sockaddr *, socklen_t *,
-                 struct sctp_sndrcvinfo *, int *);
+ssize_t 
+sctp_recvmsg(int, void *, size_t, struct sockaddr *, socklen_t *,
+    struct sctp_sndrcvinfo *, int *);
 
-	ssize_t sctp_sendv(int, const struct iovec *, int, struct sockaddr *,
-               int, void *, socklen_t, unsigned int, int);
+ssize_t 
+sctp_sendv(int, const struct iovec *, int, struct sockaddr *,
+    int, void *, socklen_t, unsigned int, int);
 
-	ssize_t sctp_recvv(int, const struct iovec *, int, struct sockaddr *,
-               socklen_t *, void *, socklen_t *, unsigned int *, int *);
+ssize_t 
+sctp_recvv(int, const struct iovec *, int, struct sockaddr *,
+    socklen_t *, void *, socklen_t *, unsigned int *, int *);
 
 __END_DECLS
 

Modified: trunk/sys/netinet/sctp_usrreq.c
===================================================================
--- trunk/sys/netinet/sctp_usrreq.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_usrreq.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_usrreq.c 238613 2012-07-19 09:32:59Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_usrreq.c 294222 2016-01-17 14:10:37Z tuexen $");
 
 #include <netinet/sctp_os.h>
 #include <sys/proc.h>
@@ -40,6 +40,7 @@
 #include <netinet/sctp_header.h>
 #include <netinet/sctp_var.h>
 #ifdef INET6
+#include <netinet6/sctp6_var.h>
 #endif
 #include <netinet/sctp_sysctl.h>
 #include <netinet/sctp_output.h>
@@ -125,21 +126,19 @@
 			if (chk->sent < SCTP_DATAGRAM_RESEND) {
 				sctp_flight_size_decrease(chk);
 				sctp_total_flight_decrease(stcb, chk);
-			}
-			if (chk->sent != SCTP_DATAGRAM_RESEND) {
+				chk->sent = SCTP_DATAGRAM_RESEND;
 				sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+				chk->rec.data.doing_fast_retransmit = 0;
+				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) {
+					sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_PMTU,
+					    chk->whoTo->flight_size,
+					    chk->book_size,
+					    (uintptr_t) chk->whoTo,
+					    chk->rec.data.TSN_seq);
+				}
+				/* Clear any time so NO RTT is being done */
+				chk->do_rtt = 0;
 			}
-			chk->sent = SCTP_DATAGRAM_RESEND;
-			chk->rec.data.doing_fast_retransmit = 0;
-			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) {
-				sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_PMTU,
-				    chk->whoTo->flight_size,
-				    chk->book_size,
-				    (uintptr_t) chk->whoTo,
-				    chk->rec.data.TSN_seq);
-			}
-			/* Clear any time so NO RTT is being done */
-			chk->do_rtt = 0;
 		}
 	}
 }
@@ -181,7 +180,7 @@
 		SCTP_TCB_UNLOCK(stcb);
 		return;
 	}
-	totsz = ip->ip_len;
+	totsz = ntohs(ip->ip_len);
 
 	nxtsz = ntohs(icmph->icmp_nextmtu);
 	if (nxtsz == 0) {
@@ -215,8 +214,6 @@
 	SCTP_TCB_UNLOCK(stcb);
 }
 
-#endif
-
 void
 sctp_notify(struct sctp_inpcb *inp,
     struct ip *ip,
@@ -225,7 +222,7 @@
     struct sctp_tcb *stcb,
     struct sctp_nets *net)
 {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	struct socket *so;
 
 #endif
@@ -284,7 +281,7 @@
 		 * with no TCB
 		 */
 		sctp_abort_notification(stcb, 1, 0, NULL, SCTP_SO_NOT_LOCKED);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		so = SCTP_INP_SO(inp);
 		atomic_add_int(&stcb->asoc.refcnt, 1);
 		SCTP_TCB_UNLOCK(stcb);
@@ -292,8 +289,9 @@
 		SCTP_TCB_LOCK(stcb);
 		atomic_subtract_int(&stcb->asoc.refcnt, 1);
 #endif
-		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_2);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+		    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_2);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_SOCKET_UNLOCK(so, 1);
 		/* SCTP_TCB_UNLOCK(stcb); MT: I think this is not needed. */
 #endif
@@ -303,6 +301,8 @@
 	}
 }
 
+#endif
+
 #ifdef INET
 void
 sctp_ctlinput(cmd, sa, vip)
@@ -346,8 +346,8 @@
 		 * 'from' holds our local endpoint address. Thus we reverse
 		 * the to and the from in the lookup.
 		 */
-		stcb = sctp_findassociation_addr_sa((struct sockaddr *)&from,
-		    (struct sockaddr *)&to,
+		stcb = sctp_findassociation_addr_sa((struct sockaddr *)&to,
+		    (struct sockaddr *)&from,
 		    &inp, &net, 1, vrf_id);
 		if (stcb != NULL && inp && (inp->sctp_socket != NULL)) {
 			if (cmd != PRC_MSGSIZE) {
@@ -398,8 +398,8 @@
 	if (error)
 		return (error);
 
-	stcb = sctp_findassociation_addr_sa(sintosa(&addrs[0]),
-	    sintosa(&addrs[1]),
+	stcb = sctp_findassociation_addr_sa(sintosa(&addrs[1]),
+	    sintosa(&addrs[0]),
 	    &inp, &net, 1, vrf_id);
 	if (stcb == NULL || inp == NULL || inp->sctp_socket == NULL) {
 		if ((inp != NULL) && (stcb == NULL)) {
@@ -488,11 +488,6 @@
 	int error;
 	uint32_t vrf_id = SCTP_DEFAULT_VRFID;
 
-#ifdef IPSEC
-	uint32_t flags;
-
-#endif
-
 	inp = (struct sctp_inpcb *)so->so_pcb;
 	if (inp != 0) {
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
@@ -514,33 +509,6 @@
 	ip_inp = &inp->ip_inp.inp;
 	ip_inp->inp_vflag |= INP_IPV4;
 	ip_inp->inp_ip_ttl = MODULE_GLOBAL(ip_defttl);
-#ifdef IPSEC
-	error = ipsec_init_policy(so, &ip_inp->inp_sp);
-#ifdef SCTP_LOG_CLOSING
-	sctp_log_closing(inp, NULL, 17);
-#endif
-	if (error != 0) {
-try_again:
-		flags = inp->sctp_flags;
-		if (((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) &&
-		    (atomic_cmpset_int(&inp->sctp_flags, flags, (flags | SCTP_PCB_FLAGS_SOCKET_GONE | SCTP_PCB_FLAGS_CLOSE_IP)))) {
-#ifdef SCTP_LOG_CLOSING
-			sctp_log_closing(inp, NULL, 15);
-#endif
-			SCTP_INP_WUNLOCK(inp);
-			sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
-			    SCTP_CALLED_AFTER_CMPSET_OFCLOSE);
-		} else {
-			flags = inp->sctp_flags;
-			if ((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) {
-				goto try_again;
-			} else {
-				SCTP_INP_WUNLOCK(inp);
-			}
-		}
-		return (error);
-	}
-#endif				/* IPSEC */
 	SCTP_INP_WUNLOCK(inp);
 	return (0);
 }
@@ -548,27 +516,21 @@
 static int
 sctp_bind(struct socket *so, struct sockaddr *addr, struct thread *p)
 {
-	struct sctp_inpcb *inp = NULL;
-	int error;
+	struct sctp_inpcb *inp;
 
-#ifdef INET
-	if (addr && addr->sa_family != AF_INET) {
-		/* must be a v4 address! */
-		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
-		return (EINVAL);
-	}
-#endif				/* INET6 */
-	if (addr && (addr->sa_len != sizeof(struct sockaddr_in))) {
-		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
-		return (EINVAL);
-	}
 	inp = (struct sctp_inpcb *)so->so_pcb;
 	if (inp == NULL) {
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 		return (EINVAL);
 	}
-	error = sctp_inpcb_bind(so, addr, NULL, p);
-	return (error);
+	if (addr != NULL) {
+		if ((addr->sa_family != AF_INET) ||
+		    (addr->sa_len != sizeof(struct sockaddr_in))) {
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+			return (EINVAL);
+		}
+	}
+	return (sctp_inpcb_bind(so, addr, NULL, p));
 }
 
 #endif
@@ -764,7 +726,7 @@
 					/* Left with Data unread */
 					struct mbuf *err;
 
-					err = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_DONTWAIT, 1, MT_DATA);
+					err = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_NOWAIT, 1, MT_DATA);
 					if (err) {
 						/*
 						 * Fill in the user
@@ -785,7 +747,8 @@
 				    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
 					SCTP_STAT_DECR_GAUGE32(sctps_currestab);
 				}
-				(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_3);
+				(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+				    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_3);
 				/* No unlock tcb assoc is gone */
 				return (0);
 			}
@@ -801,25 +764,24 @@
 					/* only send SHUTDOWN 1st time thru */
 					struct sctp_nets *netp;
 
+					if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+					    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+						SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+					}
+					SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+					SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+					sctp_stop_timers_for_shutdown(stcb);
 					if (stcb->asoc.alternate) {
 						netp = stcb->asoc.alternate;
 					} else {
 						netp = stcb->asoc.primary_destination;
 					}
-					sctp_stop_timers_for_shutdown(stcb);
 					sctp_send_shutdown(stcb, netp);
-					sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_LOCKED);
-					if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
-					    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
-						SCTP_STAT_DECR_GAUGE32(sctps_currestab);
-					}
-					SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
-					SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
 					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
 					    stcb->sctp_ep, stcb, netp);
 					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
 					    stcb->sctp_ep, stcb, netp);
-
+					sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_LOCKED);
 				}
 			} else {
 				/*
@@ -862,26 +824,7 @@
 					struct mbuf *op_err;
 
 			abort_anyway:
-					op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
-					    0, M_DONTWAIT, 1, MT_DATA);
-					if (op_err) {
-						/*
-						 * Fill in the user
-						 * initiated abort
-						 */
-						struct sctp_paramhdr *ph;
-						uint32_t *ippp;
-
-						SCTP_BUF_LEN(op_err) =
-						    (sizeof(struct sctp_paramhdr) + sizeof(uint32_t));
-						ph = mtod(op_err,
-						    struct sctp_paramhdr *);
-						ph->param_type = htons(
-						    SCTP_CAUSE_USER_INITIATED_ABT);
-						ph->param_length = htons(SCTP_BUF_LEN(op_err));
-						ippp = (uint32_t *) (ph + 1);
-						*ippp = htonl(SCTP_FROM_SCTP_USRREQ + SCTP_LOC_4);
-					}
+					op_err = sctp_generate_cause(SCTP_CAUSE_USER_INITIATED_ABT, "");
 					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_USRREQ + SCTP_LOC_4;
 					sctp_send_abort_tcb(stcb, op_err, SCTP_SO_LOCKED);
 					SCTP_STAT_INCR_COUNTER32(sctps_aborted);
@@ -890,7 +833,8 @@
 						SCTP_STAT_DECR_GAUGE32(sctps_currestab);
 					}
 					SCTP_INP_RUNLOCK(inp);
-					(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_5);
+					(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+					    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_5);
 					return (0);
 				} else {
 					sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CLOSING, SCTP_SO_LOCKED);
@@ -972,7 +916,8 @@
 	}
 	SCTP_INP_RLOCK(inp);
 	/* For UDP model this is a invalid call */
-	if (inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) {
+	if (!((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+	    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) {
 		/* Restore the flags that the soshutdown took away. */
 		SOCKBUF_LOCK(&so->so_rcv);
 		so->so_rcv.sb_state &= ~SBS_CANTRCVMORE;
@@ -981,14 +926,15 @@
 		SCTP_INP_RUNLOCK(inp);
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
 		return (EOPNOTSUPP);
-	}
-	/*
-	 * Ok if we reach here its the TCP model and it is either a SHUT_WR
-	 * or SHUT_RDWR. This means we put the shutdown flag against it.
-	 */
-	{
+	} else {
+		/*
+		 * Ok, if we reach here its the TCP model and it is either a
+		 * SHUT_WR or SHUT_RDWR. This means we put the shutdown flag
+		 * against it.
+		 */
 		struct sctp_tcb *stcb;
 		struct sctp_association *asoc;
+		struct sctp_nets *netp;
 
 		if ((so->so_state &
 		    (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) {
@@ -1000,7 +946,7 @@
 		stcb = LIST_FIRST(&inp->sctp_asoc_list);
 		if (stcb == NULL) {
 			/*
-			 * Ok we hit the case that the shutdown call was
+			 * Ok, we hit the case that the shutdown call was
 			 * made after an abort or something. Nothing to do
 			 * now.
 			 */
@@ -1009,7 +955,29 @@
 		}
 		SCTP_TCB_LOCK(stcb);
 		asoc = &stcb->asoc;
-		if (TAILQ_EMPTY(&asoc->send_queue) &&
+		if (asoc->state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+			SCTP_TCB_UNLOCK(stcb);
+			SCTP_INP_RUNLOCK(inp);
+			return (0);
+		}
+		if ((SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_WAIT) &&
+		    (SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_ECHOED) &&
+		    (SCTP_GET_STATE(asoc) != SCTP_STATE_OPEN)) {
+			/*
+			 * If we are not in or before ESTABLISHED, there is
+			 * no protocol action required.
+			 */
+			SCTP_TCB_UNLOCK(stcb);
+			SCTP_INP_RUNLOCK(inp);
+			return (0);
+		}
+		if (stcb->asoc.alternate) {
+			netp = stcb->asoc.alternate;
+		} else {
+			netp = stcb->asoc.primary_destination;
+		}
+		if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) &&
+		    TAILQ_EMPTY(&asoc->send_queue) &&
 		    TAILQ_EMPTY(&asoc->sent_queue) &&
 		    (asoc->stream_queue_cnt == 0)) {
 			if (asoc->locked_on_sending) {
@@ -1016,46 +984,19 @@
 				goto abort_anyway;
 			}
 			/* there is nothing queued to send, so I'm done... */
-			if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) {
-				/* only send SHUTDOWN the first time through */
-				struct sctp_nets *netp;
-
-				if (stcb->asoc.alternate) {
-					netp = stcb->asoc.alternate;
-				} else {
-					netp = stcb->asoc.primary_destination;
-				}
-				sctp_stop_timers_for_shutdown(stcb);
-				sctp_send_shutdown(stcb, netp);
-				sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_LOCKED);
-				if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
-				    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
-					SCTP_STAT_DECR_GAUGE32(sctps_currestab);
-				}
-				SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
-				SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
-				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
-				    stcb->sctp_ep, stcb, netp);
-				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
-				    stcb->sctp_ep, stcb, netp);
-			}
+			SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+			SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+			SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+			sctp_stop_timers_for_shutdown(stcb);
+			sctp_send_shutdown(stcb, netp);
+			sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
+			    stcb->sctp_ep, stcb, netp);
 		} else {
 			/*
-			 * we still got (or just got) data to send, so set
-			 * SHUTDOWN_PENDING
+			 * We still got (or just got) data to send, so set
+			 * SHUTDOWN_PENDING.
 			 */
-			struct sctp_nets *netp;
-
-			if (stcb->asoc.alternate) {
-				netp = stcb->asoc.alternate;
-			} else {
-				netp = stcb->asoc.primary_destination;
-			}
-
-			asoc->state |= SCTP_STATE_SHUTDOWN_PENDING;
-			sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
-			    netp);
-
+			SCTP_ADD_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
 			if (asoc->locked_on_sending) {
 				/* Locked to send out the data */
 				struct sctp_stream_queue_pending *sp;
@@ -1066,7 +1007,7 @@
 					    asoc->locked_on_sending->stream_no);
 				} else {
 					if ((sp->length == 0) && (sp->msg_is_complete == 0)) {
-						asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+						SCTP_ADD_SUBSTATE(asoc, SCTP_STATE_PARTIAL_MSG_LEFT);
 					}
 				}
 			}
@@ -1076,36 +1017,24 @@
 				struct mbuf *op_err;
 
 		abort_anyway:
-				op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
-				    0, M_DONTWAIT, 1, MT_DATA);
-				if (op_err) {
-					/* Fill in the user initiated abort */
-					struct sctp_paramhdr *ph;
-					uint32_t *ippp;
-
-					SCTP_BUF_LEN(op_err) =
-					    sizeof(struct sctp_paramhdr) + sizeof(uint32_t);
-					ph = mtod(op_err,
-					    struct sctp_paramhdr *);
-					ph->param_type = htons(
-					    SCTP_CAUSE_USER_INITIATED_ABT);
-					ph->param_length = htons(SCTP_BUF_LEN(op_err));
-					ippp = (uint32_t *) (ph + 1);
-					*ippp = htonl(SCTP_FROM_SCTP_USRREQ + SCTP_LOC_6);
-				}
+				op_err = sctp_generate_cause(SCTP_CAUSE_USER_INITIATED_ABT, "");
 				stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_USRREQ + SCTP_LOC_6;
 				sctp_abort_an_association(stcb->sctp_ep, stcb,
 				    op_err, SCTP_SO_LOCKED);
-				goto skip_unlock;
-			} else {
-				sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CLOSING, SCTP_SO_LOCKED);
+				SCTP_INP_RUNLOCK(inp);
+				return (0);
 			}
 		}
+		sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb, netp);
+		/*
+		 * XXX: Why do this in the case where we have still data
+		 * queued?
+		 */
+		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CLOSING, SCTP_SO_LOCKED);
 		SCTP_TCB_UNLOCK(stcb);
+		SCTP_INP_RUNLOCK(inp);
+		return (0);
 	}
-skip_unlock:
-	SCTP_INP_RUNLOCK(inp);
-	return (0);
 }
 
 /*
@@ -1139,9 +1068,17 @@
 {
 	struct sctp_ifn *sctp_ifn;
 	struct sctp_ifa *sctp_ifa;
-	int loopback_scope, ipv4_local_scope, local_scope, site_scope;
 	size_t actual;
-	int ipv4_addr_legal, ipv6_addr_legal;
+	int loopback_scope;
+
+#if defined(INET)
+	int ipv4_local_scope, ipv4_addr_legal;
+
+#endif
+#if defined(INET6)
+	int local_scope, site_scope, ipv6_addr_legal;
+
+#endif
 	struct sctp_vrf *vrf;
 
 	actual = 0;
@@ -1150,23 +1087,45 @@
 
 	if (stcb) {
 		/* Turn on all the appropriate scope */
-		loopback_scope = stcb->asoc.loopback_scope;
-		ipv4_local_scope = stcb->asoc.ipv4_local_scope;
-		local_scope = stcb->asoc.local_scope;
-		site_scope = stcb->asoc.site_scope;
+		loopback_scope = stcb->asoc.scope.loopback_scope;
+#if defined(INET)
+		ipv4_local_scope = stcb->asoc.scope.ipv4_local_scope;
+		ipv4_addr_legal = stcb->asoc.scope.ipv4_addr_legal;
+#endif
+#if defined(INET6)
+		local_scope = stcb->asoc.scope.local_scope;
+		site_scope = stcb->asoc.scope.site_scope;
+		ipv6_addr_legal = stcb->asoc.scope.ipv6_addr_legal;
+#endif
 	} else {
-		/* Turn on ALL scope, since we look at the EP */
-		loopback_scope = ipv4_local_scope = local_scope =
-		    site_scope = 1;
-	}
-	ipv4_addr_legal = ipv6_addr_legal = 0;
-	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
-		ipv6_addr_legal = 1;
-		if (SCTP_IPV6_V6ONLY(inp) == 0) {
+		/* Use generic values for endpoints. */
+		loopback_scope = 1;
+#if defined(INET)
+		ipv4_local_scope = 1;
+#endif
+#if defined(INET6)
+		local_scope = 1;
+		site_scope = 1;
+#endif
+		if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+#if defined(INET6)
+			ipv6_addr_legal = 1;
+#endif
+#if defined(INET)
+			if (SCTP_IPV6_V6ONLY(inp)) {
+				ipv4_addr_legal = 0;
+			} else {
+				ipv4_addr_legal = 1;
+			}
+#endif
+		} else {
+#if defined(INET6)
+			ipv6_addr_legal = 0;
+#endif
+#if defined(INET)
 			ipv4_addr_legal = 1;
+#endif
 		}
-	} else {
-		ipv4_addr_legal = 1;
 	}
 	vrf = sctp_find_vrf(vrf_id);
 	if (vrf == NULL) {
@@ -1200,7 +1159,7 @@
 					if (ipv4_addr_legal) {
 						struct sockaddr_in *sin;
 
-						sin = (struct sockaddr_in *)&sctp_ifa->address.sa;
+						sin = &sctp_ifa->address.sin;
 						if (sin->sin_addr.s_addr == 0) {
 							/*
 							 * we skip
@@ -1209,6 +1168,10 @@
 							 */
 							continue;
 						}
+						if (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+						    &sin->sin_addr) != 0) {
+							continue;
+						}
 						if ((ipv4_local_scope == 0) &&
 						    (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) {
 							continue;
@@ -1241,7 +1204,7 @@
 					if (ipv6_addr_legal) {
 						struct sockaddr_in6 *sin6;
 
-						sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sa;
+						sin6 = &sctp_ifa->address.sin6;
 						if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 							/*
 							 * we skip
@@ -1250,6 +1213,10 @@
 							 */
 							continue;
 						}
+						if (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+						    &sin6->sin6_addr) != 0) {
+							continue;
+						}
 						if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
 							if (local_scope == 0)
 								continue;
@@ -1305,8 +1272,21 @@
 			}
 			if (sctp_fill_user_address(sas, &laddr->ifa->address.sa))
 				continue;
-
-			((struct sockaddr_in6 *)sas)->sin6_port = inp->sctp_lport;
+			switch (laddr->ifa->address.sa.sa_family) {
+#ifdef INET
+			case AF_INET:
+				((struct sockaddr_in *)sas)->sin_port = inp->sctp_lport;
+				break;
+#endif
+#ifdef INET6
+			case AF_INET6:
+				((struct sockaddr_in6 *)sas)->sin6_port = inp->sctp_lport;
+				break;
+#endif
+			default:
+				/* TSNH */
+				break;
+			}
 			sas = (struct sockaddr_storage *)((caddr_t)sas +
 			    laddr->ifa->address.sa.sa_len);
 			actual += laddr->ifa->address.sa.sa_len;
@@ -1364,10 +1344,14 @@
 				switch (sctp_ifa->address.sa.sa_family) {
 #ifdef INET
 				case AF_INET:
+#ifdef INET6
 					if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4))
 						cnt += sizeof(struct sockaddr_in6);
 					else
 						cnt += sizeof(struct sockaddr_in);
+#else
+					cnt += sizeof(struct sockaddr_in);
+#endif
 					break;
 #endif
 #ifdef INET6
@@ -1387,10 +1371,14 @@
 			switch (laddr->ifa->address.sa.sa_family) {
 #ifdef INET
 			case AF_INET:
+#ifdef INET6
 				if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4))
 					cnt += sizeof(struct sockaddr_in6);
 				else
 					cnt += sizeof(struct sockaddr_in);
+#else
+				cnt += sizeof(struct sockaddr_in);
+#endif
 				break;
 #endif
 #ifdef INET6
@@ -1514,6 +1502,7 @@
 
 	/* We are GOOD to go */
 	stcb = sctp_aloc_assoc(inp, sa, &error, 0, vrf_id,
+	    inp->sctp_ep.pre_open_stream_count,
 	    (struct thread *)p
 	    );
 	if (stcb == NULL) {
@@ -1546,7 +1535,8 @@
 	sctp_connectx_helper_add(stcb, sa, (totaddr - 1), &error);
 	/* Fill in the return id */
 	if (error) {
-		(void)sctp_free_assoc(inp, stcb, SCTP_PCBFREE_FORCE, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_6);
+		(void)sctp_free_assoc(inp, stcb, SCTP_PCBFREE_FORCE,
+		    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_7);
 		goto out_now;
 	}
 	a_id = (sctp_assoc_t *) optval;
@@ -1845,8 +1835,9 @@
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_stream_value, *optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 			if (stcb) {
-				if (stcb->asoc.ss_functions.sctp_ss_get_value(stcb, &stcb->asoc, &stcb->asoc.strmout[av->stream_id],
-				    &av->stream_value) < 0) {
+				if ((av->stream_id >= stcb->asoc.streamoutcnt) ||
+				    (stcb->asoc.ss_functions.sctp_ss_get_value(stcb, &stcb->asoc, &stcb->asoc.strmout[av->stream_id],
+				    &av->stream_value) < 0)) {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				} else {
@@ -1893,8 +1884,15 @@
 			uint32_t *value, cnt;
 
 			SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
+			SCTP_INP_RLOCK(inp);
+			if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+			    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+				/* Can't do this for a 1-1 socket */
+				error = EINVAL;
+				SCTP_INP_RUNLOCK(inp);
+				break;
+			}
 			cnt = 0;
-			SCTP_INP_RLOCK(inp);
 			LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 				cnt++;
 			}
@@ -1909,9 +1907,16 @@
 			unsigned int at, limit;
 
 			SCTP_CHECK_AND_CAST(ids, optval, struct sctp_assoc_ids, *optsize);
+			SCTP_INP_RLOCK(inp);
+			if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+			    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+				/* Can't do this for a 1-1 socket */
+				error = EINVAL;
+				SCTP_INP_RUNLOCK(inp);
+				break;
+			}
 			at = 0;
 			limit = (*optsize - sizeof(uint32_t)) / sizeof(sctp_assoc_t);
-			SCTP_INP_RLOCK(inp);
 			LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 				if (at < limit) {
 					ids->gaids_assoc_id[at++] = sctp_get_associd(stcb);
@@ -2207,23 +2212,27 @@
 				size = 0;
 				/* Count the sizes */
 				TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
-					if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
-						size += sizeof(struct sockaddr_in6);
-					} else {
-						switch (((struct sockaddr *)&net->ro._l_addr)->sa_family) {
+					switch (net->ro._l_addr.sa.sa_family) {
 #ifdef INET
-						case AF_INET:
+					case AF_INET:
+#ifdef INET6
+						if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
+							size += sizeof(struct sockaddr_in6);
+						} else {
 							size += sizeof(struct sockaddr_in);
-							break;
+						}
+#else
+						size += sizeof(struct sockaddr_in);
 #endif
+						break;
+#endif
 #ifdef INET6
-						case AF_INET6:
-							size += sizeof(struct sockaddr_in6);
-							break;
+					case AF_INET6:
+						size += sizeof(struct sockaddr_in6);
+						break;
 #endif
-						default:
-							break;
-						}
+					default:
+						break;
 					}
 				}
 				SCTP_TCB_UNLOCK(stcb);
@@ -2255,24 +2264,28 @@
 				sas = (struct sockaddr_storage *)&saddr->addr[0];
 
 				TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
-					if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
-						cpsz = sizeof(struct sockaddr_in6);
-					} else {
-						switch (((struct sockaddr *)&net->ro._l_addr)->sa_family) {
+					switch (net->ro._l_addr.sa.sa_family) {
 #ifdef INET
-						case AF_INET:
+					case AF_INET:
+#ifdef INET6
+						if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
+							cpsz = sizeof(struct sockaddr_in6);
+						} else {
 							cpsz = sizeof(struct sockaddr_in);
-							break;
+						}
+#else
+						cpsz = sizeof(struct sockaddr_in);
 #endif
+						break;
+#endif
 #ifdef INET6
-						case AF_INET6:
-							cpsz = sizeof(struct sockaddr_in6);
-							break;
+					case AF_INET6:
+						cpsz = sizeof(struct sockaddr_in6);
+						break;
 #endif
-						default:
-							cpsz = 0;
-							break;
-						}
+					default:
+						cpsz = 0;
+						break;
 					}
 					if (cpsz == 0) {
 						break;
@@ -2283,15 +2296,15 @@
 					}
 #if defined(INET) && defined(INET6)
 					if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) &&
-					    (((struct sockaddr *)&net->ro._l_addr)->sa_family == AF_INET)) {
+					    (net->ro._l_addr.sa.sa_family == AF_INET)) {
 						/* Must map the address */
-						in6_sin_2_v4mapsin6((struct sockaddr_in *)&net->ro._l_addr,
+						in6_sin_2_v4mapsin6(&net->ro._l_addr.sin,
 						    (struct sockaddr_in6 *)sas);
 					} else {
-#endif
 						memcpy(sas, &net->ro._l_addr, cpsz);
-#if defined(INET) && defined(INET6)
 					}
+#else
+					memcpy(sas, &net->ro._l_addr, cpsz);
 #endif
 					((struct sockaddr_in *)sas)->sin_port = stcb->rport;
 
@@ -2328,14 +2341,36 @@
 		{
 			struct sctp_paddrparams *paddrp;
 			struct sctp_nets *net;
+			struct sockaddr *addr;
 
+#if defined(INET) && defined(INET6)
+			struct sockaddr_in sin_store;
+
+#endif
+
 			SCTP_CHECK_AND_CAST(paddrp, optval, struct sctp_paddrparams, *optsize);
 			SCTP_FIND_STCB(inp, stcb, paddrp->spp_assoc_id);
 
-			net = NULL;
-			if (stcb) {
-				net = sctp_findnet(stcb, (struct sockaddr *)&paddrp->spp_address);
+#if defined(INET) && defined(INET6)
+			if (paddrp->spp_address.ss_family == AF_INET6) {
+				struct sockaddr_in6 *sin6;
+
+				sin6 = (struct sockaddr_in6 *)&paddrp->spp_address;
+				if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+					in6_sin6_2_sin(&sin_store, sin6);
+					addr = (struct sockaddr *)&sin_store;
+				} else {
+					addr = (struct sockaddr *)&paddrp->spp_address;
+				}
 			} else {
+				addr = (struct sockaddr *)&paddrp->spp_address;
+			}
+#else
+			addr = (struct sockaddr *)&paddrp->spp_address;
+#endif
+			if (stcb != NULL) {
+				net = sctp_findnet(stcb, addr);
+			} else {
 				/*
 				 * We increment here since
 				 * sctp_findassociation_ep_addr() wil do a
@@ -2343,22 +2378,20 @@
 				 * the locked tcb (last argument) is NOT a
 				 * TCB.. aka NULL.
 				 */
+				net = NULL;
 				SCTP_INP_INCR_REF(inp);
-				stcb = sctp_findassociation_ep_addr(&inp, (struct sockaddr *)&paddrp->spp_address, &net, NULL, NULL);
+				stcb = sctp_findassociation_ep_addr(&inp, addr, &net, NULL, NULL);
 				if (stcb == NULL) {
 					SCTP_INP_DECR_REF(inp);
 				}
 			}
-			if (stcb && (net == NULL)) {
-				struct sockaddr *sa;
-
-				sa = (struct sockaddr *)&paddrp->spp_address;
+			if ((stcb != NULL) && (net == NULL)) {
 #ifdef INET
-				if (sa->sa_family == AF_INET) {
+				if (addr->sa_family == AF_INET) {
 					struct sockaddr_in *sin;
 
-					sin = (struct sockaddr_in *)sa;
-					if (sin->sin_addr.s_addr) {
+					sin = (struct sockaddr_in *)addr;
+					if (sin->sin_addr.s_addr != INADDR_ANY) {
 						error = EINVAL;
 						SCTP_TCB_UNLOCK(stcb);
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
@@ -2367,10 +2400,10 @@
 				} else
 #endif
 #ifdef INET6
-				if (sa->sa_family == AF_INET6) {
+				if (addr->sa_family == AF_INET6) {
 					struct sockaddr_in6 *sin6;
 
-					sin6 = (struct sockaddr_in6 *)sa;
+					sin6 = (struct sockaddr_in6 *)addr;
 					if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 						error = EINVAL;
 						SCTP_TCB_UNLOCK(stcb);
@@ -2386,21 +2419,27 @@
 					break;
 				}
 			}
-			if (stcb) {
+			if (stcb != NULL) {
 				/* Applies to the specific association */
 				paddrp->spp_flags = 0;
-				if (net) {
-					int ovh;
-
-					if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
-						ovh = SCTP_MED_OVERHEAD;
-					} else {
-						ovh = SCTP_MED_V4_OVERHEAD;
-					}
-
+				if (net != NULL) {
 					paddrp->spp_hbinterval = net->heart_beat_delay;
 					paddrp->spp_pathmaxrxt = net->failure_threshold;
-					paddrp->spp_pathmtu = net->mtu - ovh;
+					paddrp->spp_pathmtu = net->mtu;
+					switch (net->ro._l_addr.sa.sa_family) {
+#ifdef INET
+					case AF_INET:
+						paddrp->spp_pathmtu -= SCTP_MIN_V4_OVERHEAD;
+						break;
+#endif
+#ifdef INET6
+					case AF_INET6:
+						paddrp->spp_pathmtu -= SCTP_MIN_V4_OVERHEAD;
+						break;
+#endif
+					default:
+						break;
+					}
 					/* get flags for HB */
 					if (net->dest_state & SCTP_ADDR_NOHB) {
 						paddrp->spp_flags |= SPP_HB_DISABLE;
@@ -2409,9 +2448,9 @@
 					}
 					/* get flags for PMTU */
 					if (net->dest_state & SCTP_ADDR_NO_PMTUD) {
+						paddrp->spp_flags |= SPP_PMTUD_DISABLE;
+					} else {
 						paddrp->spp_flags |= SPP_PMTUD_ENABLE;
-					} else {
-						paddrp->spp_flags |= SPP_PMTUD_DISABLE;
 					}
 					if (net->dscp & 0x01) {
 						paddrp->spp_dscp = net->dscp & 0xfc;
@@ -2430,7 +2469,7 @@
 					 * value
 					 */
 					paddrp->spp_pathmaxrxt = stcb->asoc.def_net_failure;
-					paddrp->spp_pathmtu = sctp_get_frag_point(stcb, &stcb->asoc);
+					paddrp->spp_pathmtu = 0;
 					if (stcb->asoc.default_dscp & 0x01) {
 						paddrp->spp_dscp = stcb->asoc.default_dscp & 0xfc;
 						paddrp->spp_flags |= SPP_DSCP;
@@ -2505,14 +2544,36 @@
 		{
 			struct sctp_paddrinfo *paddri;
 			struct sctp_nets *net;
+			struct sockaddr *addr;
 
+#if defined(INET) && defined(INET6)
+			struct sockaddr_in sin_store;
+
+#endif
+
 			SCTP_CHECK_AND_CAST(paddri, optval, struct sctp_paddrinfo, *optsize);
 			SCTP_FIND_STCB(inp, stcb, paddri->spinfo_assoc_id);
 
-			net = NULL;
-			if (stcb) {
-				net = sctp_findnet(stcb, (struct sockaddr *)&paddri->spinfo_address);
+#if defined(INET) && defined(INET6)
+			if (paddri->spinfo_address.ss_family == AF_INET6) {
+				struct sockaddr_in6 *sin6;
+
+				sin6 = (struct sockaddr_in6 *)&paddri->spinfo_address;
+				if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+					in6_sin6_2_sin(&sin_store, sin6);
+					addr = (struct sockaddr *)&sin_store;
+				} else {
+					addr = (struct sockaddr *)&paddri->spinfo_address;
+				}
 			} else {
+				addr = (struct sockaddr *)&paddri->spinfo_address;
+			}
+#else
+			addr = (struct sockaddr *)&paddri->spinfo_address;
+#endif
+			if (stcb != NULL) {
+				net = sctp_findnet(stcb, addr);
+			} else {
 				/*
 				 * We increment here since
 				 * sctp_findassociation_ep_addr() wil do a
@@ -2520,14 +2581,15 @@
 				 * the locked tcb (last argument) is NOT a
 				 * TCB.. aka NULL.
 				 */
+				net = NULL;
 				SCTP_INP_INCR_REF(inp);
-				stcb = sctp_findassociation_ep_addr(&inp, (struct sockaddr *)&paddri->spinfo_address, &net, NULL, NULL);
+				stcb = sctp_findassociation_ep_addr(&inp, addr, &net, NULL, NULL);
 				if (stcb == NULL) {
 					SCTP_INP_DECR_REF(inp);
 				}
 			}
 
-			if ((stcb) && (net)) {
+			if ((stcb != NULL) && (net != NULL)) {
 				if (net->dest_state & SCTP_ADDR_UNCONFIRMED) {
 					/* It's unconfirmed */
 					paddri->spinfo_state = SCTP_UNCONFIRMED;
@@ -2543,10 +2605,24 @@
 				paddri->spinfo_rto = net->RTO;
 				paddri->spinfo_assoc_id = sctp_get_associd(stcb);
 				paddri->spinfo_mtu = net->mtu;
+				switch (addr->sa_family) {
+#if defined(INET)
+				case AF_INET:
+					paddri->spinfo_mtu -= SCTP_MIN_V4_OVERHEAD;
+					break;
+#endif
+#if defined(INET6)
+				case AF_INET6:
+					paddri->spinfo_mtu -= SCTP_MIN_OVERHEAD;
+					break;
+#endif
+				default:
+					break;
+				}
 				SCTP_TCB_UNLOCK(stcb);
 				*optsize = sizeof(struct sctp_paddrinfo);
 			} else {
-				if (stcb) {
+				if (stcb != NULL) {
 					SCTP_TCB_UNLOCK(stcb);
 				}
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
@@ -2576,12 +2652,7 @@
 				error = EINVAL;
 				break;
 			}
-			/*
-			 * I think passing the state is fine since
-			 * sctp_constants.h will be available to the user
-			 * land.
-			 */
-			sstat->sstat_state = stcb->asoc.state;
+			sstat->sstat_state = sctp_map_assoc_state(stcb->asoc.state);
 			sstat->sstat_assoc_id = sctp_get_associd(stcb);
 			sstat->sstat_rwnd = stcb->asoc.peers_rwnd;
 			sstat->sstat_unackdata = stcb->asoc.sent_queue_cnt;
@@ -2619,6 +2690,20 @@
 			sstat->sstat_primary.spinfo_srtt = net->lastsa >> SCTP_RTT_SHIFT;
 			sstat->sstat_primary.spinfo_rto = net->RTO;
 			sstat->sstat_primary.spinfo_mtu = net->mtu;
+			switch (stcb->asoc.primary_destination->ro._l_addr.sa.sa_family) {
+#if defined(INET)
+			case AF_INET:
+				sstat->sstat_primary.spinfo_mtu -= SCTP_MIN_V4_OVERHEAD;
+				break;
+#endif
+#if defined(INET6)
+			case AF_INET6:
+				sstat->sstat_primary.spinfo_mtu -= SCTP_MIN_OVERHEAD;
+				break;
+#endif
+			default:
+				break;
+			}
 			sstat->sstat_primary.spinfo_assoc_id = sctp_get_associd(stcb);
 			SCTP_TCB_UNLOCK(stcb);
 			*optsize = sizeof(struct sctp_status);
@@ -2763,16 +2848,32 @@
 			SCTP_FIND_STCB(inp, stcb, ssp->ssp_assoc_id);
 
 			if (stcb) {
-				/* simply copy out the sockaddr_storage... */
-				int len;
+				union sctp_sockstore *addr;
 
-				len = *optsize;
-				if (len > stcb->asoc.primary_destination->ro._l_addr.sa.sa_len)
-					len = stcb->asoc.primary_destination->ro._l_addr.sa.sa_len;
-
-				memcpy(&ssp->ssp_addr,
-				    &stcb->asoc.primary_destination->ro._l_addr,
-				    len);
+				addr = &stcb->asoc.primary_destination->ro._l_addr;
+				switch (addr->sa.sa_family) {
+#ifdef INET
+				case AF_INET:
+#ifdef INET6
+					if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
+						in6_sin_2_v4mapsin6(&addr->sin,
+						    (struct sockaddr_in6 *)&ssp->ssp_addr);
+					} else {
+						memcpy(&ssp->ssp_addr, &addr->sin, sizeof(struct sockaddr_in));
+					}
+#else
+					memcpy(&ssp->ssp_addr, &addr->sin, sizeof(struct sockaddr_in));
+#endif
+					break;
+#endif
+#ifdef INET6
+				case AF_INET6:
+					memcpy(&ssp->ssp_addr, &addr->sin6, sizeof(struct sockaddr_in6));
+					break;
+#endif
+				default:
+					break;
+				}
 				SCTP_TCB_UNLOCK(stcb);
 				*optsize = sizeof(struct sctp_setprim);
 			} else {
@@ -3112,14 +3213,36 @@
 		{
 			struct sctp_paddrthlds *thlds;
 			struct sctp_nets *net;
+			struct sockaddr *addr;
 
+#if defined(INET) && defined(INET6)
+			struct sockaddr_in sin_store;
+
+#endif
+
 			SCTP_CHECK_AND_CAST(thlds, optval, struct sctp_paddrthlds, *optsize);
 			SCTP_FIND_STCB(inp, stcb, thlds->spt_assoc_id);
 
-			net = NULL;
-			if (stcb) {
-				net = sctp_findnet(stcb, (struct sockaddr *)&thlds->spt_address);
+#if defined(INET) && defined(INET6)
+			if (thlds->spt_address.ss_family == AF_INET6) {
+				struct sockaddr_in6 *sin6;
+
+				sin6 = (struct sockaddr_in6 *)&thlds->spt_address;
+				if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+					in6_sin6_2_sin(&sin_store, sin6);
+					addr = (struct sockaddr *)&sin_store;
+				} else {
+					addr = (struct sockaddr *)&thlds->spt_address;
+				}
 			} else {
+				addr = (struct sockaddr *)&thlds->spt_address;
+			}
+#else
+			addr = (struct sockaddr *)&thlds->spt_address;
+#endif
+			if (stcb != NULL) {
+				net = sctp_findnet(stcb, addr);
+			} else {
 				/*
 				 * We increment here since
 				 * sctp_findassociation_ep_addr() wil do a
@@ -3127,22 +3250,20 @@
 				 * the locked tcb (last argument) is NOT a
 				 * TCB.. aka NULL.
 				 */
+				net = NULL;
 				SCTP_INP_INCR_REF(inp);
-				stcb = sctp_findassociation_ep_addr(&inp, (struct sockaddr *)&thlds->spt_address, &net, NULL, NULL);
+				stcb = sctp_findassociation_ep_addr(&inp, addr, &net, NULL, NULL);
 				if (stcb == NULL) {
 					SCTP_INP_DECR_REF(inp);
 				}
 			}
-			if (stcb && (net == NULL)) {
-				struct sockaddr *sa;
-
-				sa = (struct sockaddr *)&thlds->spt_address;
+			if ((stcb != NULL) && (net == NULL)) {
 #ifdef INET
-				if (sa->sa_family == AF_INET) {
+				if (addr->sa_family == AF_INET) {
 					struct sockaddr_in *sin;
 
-					sin = (struct sockaddr_in *)sa;
-					if (sin->sin_addr.s_addr) {
+					sin = (struct sockaddr_in *)addr;
+					if (sin->sin_addr.s_addr != INADDR_ANY) {
 						error = EINVAL;
 						SCTP_TCB_UNLOCK(stcb);
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
@@ -3151,10 +3272,10 @@
 				} else
 #endif
 #ifdef INET6
-				if (sa->sa_family == AF_INET6) {
+				if (addr->sa_family == AF_INET6) {
 					struct sockaddr_in6 *sin6;
 
-					sin6 = (struct sockaddr_in6 *)sa;
+					sin6 = (struct sockaddr_in6 *)addr;
 					if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 						error = EINVAL;
 						SCTP_TCB_UNLOCK(stcb);
@@ -3170,8 +3291,8 @@
 					break;
 				}
 			}
-			if (stcb) {
-				if (net) {
+			if (stcb != NULL) {
+				if (net != NULL) {
 					thlds->spt_pathmaxrxt = net->failure_threshold;
 					thlds->spt_pathpfthld = net->pf_threshold;
 				} else {
@@ -3203,12 +3324,35 @@
 		{
 			struct sctp_udpencaps *encaps;
 			struct sctp_nets *net;
+			struct sockaddr *addr;
 
+#if defined(INET) && defined(INET6)
+			struct sockaddr_in sin_store;
+
+#endif
+
 			SCTP_CHECK_AND_CAST(encaps, optval, struct sctp_udpencaps, *optsize);
 			SCTP_FIND_STCB(inp, stcb, encaps->sue_assoc_id);
 
+#if defined(INET) && defined(INET6)
+			if (encaps->sue_address.ss_family == AF_INET6) {
+				struct sockaddr_in6 *sin6;
+
+				sin6 = (struct sockaddr_in6 *)&encaps->sue_address;
+				if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+					in6_sin6_2_sin(&sin_store, sin6);
+					addr = (struct sockaddr *)&sin_store;
+				} else {
+					addr = (struct sockaddr *)&encaps->sue_address;
+				}
+			} else {
+				addr = (struct sockaddr *)&encaps->sue_address;
+			}
+#else
+			addr = (struct sockaddr *)&encaps->sue_address;
+#endif
 			if (stcb) {
-				net = sctp_findnet(stcb, (struct sockaddr *)&encaps->sue_address);
+				net = sctp_findnet(stcb, addr);
 			} else {
 				/*
 				 * We increment here since
@@ -3219,21 +3363,18 @@
 				 */
 				net = NULL;
 				SCTP_INP_INCR_REF(inp);
-				stcb = sctp_findassociation_ep_addr(&inp, (struct sockaddr *)&encaps->sue_address, &net, NULL, NULL);
+				stcb = sctp_findassociation_ep_addr(&inp, addr, &net, NULL, NULL);
 				if (stcb == NULL) {
 					SCTP_INP_DECR_REF(inp);
 				}
 			}
-			if (stcb && (net == NULL)) {
-				struct sockaddr *sa;
-
-				sa = (struct sockaddr *)&encaps->sue_address;
+			if ((stcb != NULL) && (net == NULL)) {
 #ifdef INET
-				if (sa->sa_family == AF_INET) {
+				if (addr->sa_family == AF_INET) {
 					struct sockaddr_in *sin;
 
-					sin = (struct sockaddr_in *)sa;
-					if (sin->sin_addr.s_addr) {
+					sin = (struct sockaddr_in *)addr;
+					if (sin->sin_addr.s_addr != INADDR_ANY) {
 						error = EINVAL;
 						SCTP_TCB_UNLOCK(stcb);
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
@@ -3242,10 +3383,10 @@
 				} else
 #endif
 #ifdef INET6
-				if (sa->sa_family == AF_INET6) {
+				if (addr->sa_family == AF_INET6) {
 					struct sockaddr_in6 *sin6;
 
-					sin6 = (struct sockaddr_in6 *)sa;
+					sin6 = (struct sockaddr_in6 *)addr;
 					if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 						error = EINVAL;
 						SCTP_TCB_UNLOCK(stcb);
@@ -3261,7 +3402,7 @@
 					break;
 				}
 			}
-			if (stcb) {
+			if (stcb != NULL) {
 				if (net) {
 					encaps->sue_port = net->port;
 				} else {
@@ -3281,10 +3422,199 @@
 				}
 			}
 			if (error == 0) {
-				*optsize = sizeof(struct sctp_paddrparams);
+				*optsize = sizeof(struct sctp_udpencaps);
 			}
 			break;
 		}
+	case SCTP_ECN_SUPPORTED:
+		{
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+			if (stcb) {
+				av->assoc_value = stcb->asoc.ecn_supported;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+					SCTP_INP_RLOCK(inp);
+					av->assoc_value = inp->ecn_supported;
+					SCTP_INP_RUNLOCK(inp);
+				} else {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+				}
+			}
+			if (error == 0) {
+				*optsize = sizeof(struct sctp_assoc_value);
+			}
+			break;
+		}
+	case SCTP_PR_SUPPORTED:
+		{
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+			if (stcb) {
+				av->assoc_value = stcb->asoc.prsctp_supported;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+					SCTP_INP_RLOCK(inp);
+					av->assoc_value = inp->prsctp_supported;
+					SCTP_INP_RUNLOCK(inp);
+				} else {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+				}
+			}
+			if (error == 0) {
+				*optsize = sizeof(struct sctp_assoc_value);
+			}
+			break;
+		}
+	case SCTP_AUTH_SUPPORTED:
+		{
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+			if (stcb) {
+				av->assoc_value = stcb->asoc.auth_supported;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+					SCTP_INP_RLOCK(inp);
+					av->assoc_value = inp->auth_supported;
+					SCTP_INP_RUNLOCK(inp);
+				} else {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+				}
+			}
+			if (error == 0) {
+				*optsize = sizeof(struct sctp_assoc_value);
+			}
+			break;
+		}
+	case SCTP_ASCONF_SUPPORTED:
+		{
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+			if (stcb) {
+				av->assoc_value = stcb->asoc.asconf_supported;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+					SCTP_INP_RLOCK(inp);
+					av->assoc_value = inp->asconf_supported;
+					SCTP_INP_RUNLOCK(inp);
+				} else {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+				}
+			}
+			if (error == 0) {
+				*optsize = sizeof(struct sctp_assoc_value);
+			}
+			break;
+		}
+	case SCTP_RECONFIG_SUPPORTED:
+		{
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+			if (stcb) {
+				av->assoc_value = stcb->asoc.reconfig_supported;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+					SCTP_INP_RLOCK(inp);
+					av->assoc_value = inp->reconfig_supported;
+					SCTP_INP_RUNLOCK(inp);
+				} else {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+				}
+			}
+			if (error == 0) {
+				*optsize = sizeof(struct sctp_assoc_value);
+			}
+			break;
+		}
+	case SCTP_NRSACK_SUPPORTED:
+		{
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+			if (stcb) {
+				av->assoc_value = stcb->asoc.nrsack_supported;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+					SCTP_INP_RLOCK(inp);
+					av->assoc_value = inp->nrsack_supported;
+					SCTP_INP_RUNLOCK(inp);
+				} else {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+				}
+			}
+			if (error == 0) {
+				*optsize = sizeof(struct sctp_assoc_value);
+			}
+			break;
+		}
+	case SCTP_PKTDROP_SUPPORTED:
+		{
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+			if (stcb) {
+				av->assoc_value = stcb->asoc.pktdrop_supported;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+					SCTP_INP_RLOCK(inp);
+					av->assoc_value = inp->pktdrop_supported;
+					SCTP_INP_RUNLOCK(inp);
+				} else {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+				}
+			}
+			if (error == 0) {
+				*optsize = sizeof(struct sctp_assoc_value);
+			}
+			break;
+		}
 	case SCTP_ENABLE_STREAM_RESET:
 		{
 			struct sctp_assoc_value *av;
@@ -3312,6 +3642,100 @@
 			}
 			break;
 		}
+	case SCTP_PR_STREAM_STATUS:
+		{
+			struct sctp_prstatus *sprstat;
+			uint16_t sid;
+			uint16_t policy;
+
+			SCTP_CHECK_AND_CAST(sprstat, optval, struct sctp_prstatus, *optsize);
+			SCTP_FIND_STCB(inp, stcb, sprstat->sprstat_assoc_id);
+
+			sid = sprstat->sprstat_sid;
+			policy = sprstat->sprstat_policy;
+#if defined(SCTP_DETAILED_STR_STATS)
+			if ((stcb != NULL) &&
+			    (sid < stcb->asoc.streamoutcnt) &&
+			    (policy != SCTP_PR_SCTP_NONE) &&
+			    ((policy <= SCTP_PR_SCTP_MAX) ||
+			    (policy == SCTP_PR_SCTP_ALL))) {
+				if (policy == SCTP_PR_SCTP_ALL) {
+					sprstat->sprstat_abandoned_unsent = stcb->asoc.strmout[sid].abandoned_unsent[0];
+					sprstat->sprstat_abandoned_sent = stcb->asoc.strmout[sid].abandoned_sent[0];
+				} else {
+					sprstat->sprstat_abandoned_unsent = stcb->asoc.strmout[sid].abandoned_unsent[policy];
+					sprstat->sprstat_abandoned_sent = stcb->asoc.strmout[sid].abandoned_sent[policy];
+				}
+#else
+			if ((stcb != NULL) &&
+			    (sid < stcb->asoc.streamoutcnt) &&
+			    (policy == SCTP_PR_SCTP_ALL)) {
+				sprstat->sprstat_abandoned_unsent = stcb->asoc.strmout[sid].abandoned_unsent[0];
+				sprstat->sprstat_abandoned_sent = stcb->asoc.strmout[sid].abandoned_sent[0];
+#endif
+				SCTP_TCB_UNLOCK(stcb);
+				*optsize = sizeof(struct sctp_prstatus);
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				error = EINVAL;
+			}
+			break;
+		}
+	case SCTP_PR_ASSOC_STATUS:
+		{
+			struct sctp_prstatus *sprstat;
+			uint16_t policy;
+
+			SCTP_CHECK_AND_CAST(sprstat, optval, struct sctp_prstatus, *optsize);
+			SCTP_FIND_STCB(inp, stcb, sprstat->sprstat_assoc_id);
+
+			policy = sprstat->sprstat_policy;
+			if ((stcb != NULL) &&
+			    (policy != SCTP_PR_SCTP_NONE) &&
+			    ((policy <= SCTP_PR_SCTP_MAX) ||
+			    (policy == SCTP_PR_SCTP_ALL))) {
+				if (policy == SCTP_PR_SCTP_ALL) {
+					sprstat->sprstat_abandoned_unsent = stcb->asoc.abandoned_unsent[0];
+					sprstat->sprstat_abandoned_sent = stcb->asoc.abandoned_sent[0];
+				} else {
+					sprstat->sprstat_abandoned_unsent = stcb->asoc.abandoned_unsent[policy];
+					sprstat->sprstat_abandoned_sent = stcb->asoc.abandoned_sent[policy];
+				}
+				SCTP_TCB_UNLOCK(stcb);
+				*optsize = sizeof(struct sctp_prstatus);
+			} else {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				error = EINVAL;
+			}
+			break;
+		}
+	case SCTP_MAX_CWND:
+		{
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+			if (stcb) {
+				av->assoc_value = stcb->asoc.max_cwnd;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+					SCTP_INP_RLOCK(inp);
+					av->assoc_value = inp->max_cwnd;
+					SCTP_INP_RUNLOCK(inp);
+				} else {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+				}
+			}
+			if (error == 0) {
+				*optsize = sizeof(struct sctp_assoc_value);
+			}
+			break;
+		}
 	default:
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
 		error = ENOPROTOOPT;
@@ -3651,8 +4075,9 @@
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_stream_value, optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 			if (stcb) {
-				if (stcb->asoc.ss_functions.sctp_ss_set_value(stcb, &stcb->asoc, &stcb->asoc.strmout[av->stream_id],
-				    av->stream_value) < 0) {
+				if ((av->stream_id >= stcb->asoc.streamoutcnt) ||
+				    (stcb->asoc.ss_functions.sctp_ss_set_value(stcb, &stcb->asoc, &stcb->asoc.strmout[av->stream_id],
+				    av->stream_value) < 0)) {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
@@ -3662,14 +4087,15 @@
 					SCTP_INP_RLOCK(inp);
 					LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 						SCTP_TCB_LOCK(stcb);
-						stcb->asoc.ss_functions.sctp_ss_set_value(stcb,
-						    &stcb->asoc,
-						    &stcb->asoc.strmout[av->stream_id],
-						    av->stream_value);
+						if (av->stream_id < stcb->asoc.streamoutcnt) {
+							stcb->asoc.ss_functions.sctp_ss_set_value(stcb,
+							    &stcb->asoc,
+							    &stcb->asoc.strmout[av->stream_id],
+							    av->stream_value);
+						}
 						SCTP_TCB_UNLOCK(stcb);
 					}
 					SCTP_INP_RUNLOCK(inp);
-
 				} else {
 					/*
 					 * Can't set stream value without
@@ -3942,15 +4368,15 @@
 			sctp_hmaclist_t *hmaclist;
 			uint16_t hmacid;
 			uint32_t i;
-			size_t found;
 
 			SCTP_CHECK_AND_CAST(shmac, optval, struct sctp_hmacalgo, optsize);
-			if (optsize < sizeof(struct sctp_hmacalgo) + shmac->shmac_number_of_idents * sizeof(uint16_t)) {
+			if ((optsize < sizeof(struct sctp_hmacalgo) + shmac->shmac_number_of_idents * sizeof(uint16_t)) ||
+			    (shmac->shmac_number_of_idents > 0xffff)) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 				break;
 			}
-			hmaclist = sctp_alloc_hmaclist(shmac->shmac_number_of_idents);
+			hmaclist = sctp_alloc_hmaclist((uint16_t) shmac->shmac_number_of_idents);
 			if (hmaclist == NULL) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
 				error = ENOMEM;
@@ -3966,14 +4392,14 @@
 					goto sctp_set_hmac_done;
 				}
 			}
-			found = 0;
 			for (i = 0; i < hmaclist->num_algo; i++) {
 				if (hmaclist->hmac[i] == SCTP_AUTH_HMAC_ID_SHA1) {
 					/* already in list */
-					found = 1;
+					break;
 				}
 			}
-			if (!found) {
+			if (i == hmaclist->num_algo) {
+				/* not found in list */
 				sctp_free_hmaclist(hmaclist);
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
@@ -4158,7 +4584,7 @@
 				error = ENOENT;
 				break;
 			}
-			if (stcb->asoc.peer_supports_strreset == 0) {
+			if (stcb->asoc.reconfig_supported == 0) {
 				/*
 				 * Peer does not support the chunk type.
 				 */
@@ -4167,18 +4593,30 @@
 				SCTP_TCB_UNLOCK(stcb);
 				break;
 			}
-			if (stcb->asoc.stream_reset_outstanding) {
-				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
-				error = EALREADY;
+			if (sizeof(struct sctp_reset_streams) +
+			    strrst->srs_number_streams * sizeof(uint16_t) > optsize) {
+				error = EINVAL;
 				SCTP_TCB_UNLOCK(stcb);
 				break;
 			}
 			if (strrst->srs_flags & SCTP_STREAM_RESET_INCOMING) {
 				send_in = 1;
+				if (stcb->asoc.stream_reset_outstanding) {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
+					error = EALREADY;
+					SCTP_TCB_UNLOCK(stcb);
+					break;
+				}
 			}
 			if (strrst->srs_flags & SCTP_STREAM_RESET_OUTGOING) {
 				send_out = 1;
 			}
+			if ((strrst->srs_number_streams > SCTP_MAX_STREAMS_AT_ONCE_RESET) && send_in) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
+				error = ENOMEM;
+				SCTP_TCB_UNLOCK(stcb);
+				break;
+			}
 			if ((send_in == 0) && (send_out == 0)) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
@@ -4203,11 +4641,46 @@
 				SCTP_TCB_UNLOCK(stcb);
 				break;
 			}
-			error = sctp_send_str_reset_req(stcb, strrst->srs_number_streams,
-			    strrst->srs_stream_list,
-			    send_out, send_in, 0, 0, 0, 0, 0);
+			if (send_out) {
+				int cnt;
+				uint16_t strm;
 
-			sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_REQ, SCTP_SO_LOCKED);
+				if (strrst->srs_number_streams) {
+					for (i = 0, cnt = 0; i < strrst->srs_number_streams; i++) {
+						strm = strrst->srs_stream_list[i];
+						if (stcb->asoc.strmout[strm].state == SCTP_STREAM_OPEN) {
+							stcb->asoc.strmout[strm].state = SCTP_STREAM_RESET_PENDING;
+							cnt++;
+						}
+					}
+				} else {
+					/* Its all */
+					for (i = 0, cnt = 0; i < stcb->asoc.streamoutcnt; i++) {
+						if (stcb->asoc.strmout[i].state == SCTP_STREAM_OPEN) {
+							stcb->asoc.strmout[i].state = SCTP_STREAM_RESET_PENDING;
+							cnt++;
+						}
+					}
+				}
+			}
+			if (send_in) {
+				error = sctp_send_str_reset_req(stcb, strrst->srs_number_streams,
+				    strrst->srs_stream_list,
+				    send_in, 0, 0, 0, 0, 0);
+			} else {
+				error = sctp_send_stream_reset_out_if_possible(stcb, SCTP_SO_LOCKED);
+			}
+			if (error == 0) {
+				sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_REQ, SCTP_SO_LOCKED);
+			} else {
+				/*
+				 * For outgoing streams don't report any
+				 * problems in sending the request to the
+				 * application. XXX: Double check resetting
+				 * incoming streams.
+				 */
+				error = 0;
+			}
 			SCTP_TCB_UNLOCK(stcb);
 			break;
 		}
@@ -4225,7 +4698,7 @@
 				error = ENOENT;
 				break;
 			}
-			if (stcb->asoc.peer_supports_strreset == 0) {
+			if (stcb->asoc.reconfig_supported == 0) {
 				/*
 				 * Peer does not support the chunk type.
 				 */
@@ -4277,7 +4750,7 @@
 					goto skip_stuff;
 				}
 			}
-			error = sctp_send_str_reset_req(stcb, 0, NULL, 0, 0, 0, addstream, add_o_strmcnt, add_i_strmcnt, 0);
+			error = sctp_send_str_reset_req(stcb, 0, NULL, 0, 0, addstream, add_o_strmcnt, add_i_strmcnt, 0);
 			sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_REQ, SCTP_SO_LOCKED);
 	skip_stuff:
 			SCTP_TCB_UNLOCK(stcb);
@@ -4285,6 +4758,7 @@
 		}
 	case SCTP_RESET_ASSOC:
 		{
+			int i;
 			uint32_t *value;
 
 			SCTP_CHECK_AND_CAST(value, optval, uint32_t, optsize);
@@ -4294,7 +4768,7 @@
 				error = ENOENT;
 				break;
 			}
-			if (stcb->asoc.peer_supports_strreset == 0) {
+			if (stcb->asoc.reconfig_supported == 0) {
 				/*
 				 * Peer does not support the chunk type.
 				 */
@@ -4309,7 +4783,25 @@
 				SCTP_TCB_UNLOCK(stcb);
 				break;
 			}
-			error = sctp_send_str_reset_req(stcb, 0, NULL, 0, 0, 1, 0, 0, 0, 0);
+			/*
+			 * Is there any data pending in the send or sent
+			 * queues?
+			 */
+			if (!TAILQ_EMPTY(&stcb->asoc.send_queue) ||
+			    !TAILQ_EMPTY(&stcb->asoc.sent_queue)) {
+		busy_out:
+				error = EBUSY;
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+				SCTP_TCB_UNLOCK(stcb);
+				break;
+			}
+			/* Do any streams have data queued? */
+			for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+				if (!TAILQ_EMPTY(&stcb->asoc.strmout[i].outqueue)) {
+					goto busy_out;
+				}
+			}
+			error = sctp_send_str_reset_req(stcb, 0, NULL, 0, 1, 0, 0, 0, 0);
 			sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_REQ, SCTP_SO_LOCKED);
 			SCTP_TCB_UNLOCK(stcb);
 			break;
@@ -4333,7 +4825,6 @@
 	case SCTP_CONNECT_X_COMPLETE:
 		{
 			struct sockaddr *sa;
-			struct sctp_nets *net;
 
 			/* FIXME MT: check correct? */
 			SCTP_CHECK_AND_CAST(sa, optval, struct sockaddr, optsize);
@@ -4344,7 +4835,6 @@
 				stcb = LIST_FIRST(&inp->sctp_asoc_list);
 				if (stcb) {
 					SCTP_TCB_LOCK(stcb);
-					net = sctp_findnet(stcb, sa);
 				}
 				SCTP_INP_RUNLOCK(inp);
 			} else {
@@ -4356,7 +4846,7 @@
 				 * TCB.. aka NULL.
 				 */
 				SCTP_INP_INCR_REF(inp);
-				stcb = sctp_findassociation_ep_addr(&inp, sa, &net, NULL, NULL);
+				stcb = sctp_findassociation_ep_addr(&inp, sa, NULL, NULL, NULL);
 				if (stcb == NULL) {
 					SCTP_INP_DECR_REF(inp);
 				}
@@ -4372,7 +4862,7 @@
 				(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
 				sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb,
 				    stcb->asoc.primary_destination,
-				    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_9);
+				    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_8);
 				sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
 			} else {
 				/*
@@ -4617,6 +5107,7 @@
 			SCTP_CHECK_AND_CAST(adap_bits, optval, struct sctp_setadaptation, optsize);
 			SCTP_INP_WLOCK(inp);
 			inp->sctp_ep.adaptation_layer_indicator = adap_bits->ssb_adaptation_ind;
+			inp->sctp_ep.adaptation_layer_indicator_provided = 1;
 			SCTP_INP_WUNLOCK(inp);
 			break;
 		}
@@ -4675,13 +5166,36 @@
 		{
 			struct sctp_paddrparams *paddrp;
 			struct sctp_nets *net;
+			struct sockaddr *addr;
 
+#if defined(INET) && defined(INET6)
+			struct sockaddr_in sin_store;
+
+#endif
+
 			SCTP_CHECK_AND_CAST(paddrp, optval, struct sctp_paddrparams, optsize);
 			SCTP_FIND_STCB(inp, stcb, paddrp->spp_assoc_id);
-			net = NULL;
-			if (stcb) {
-				net = sctp_findnet(stcb, (struct sockaddr *)&paddrp->spp_address);
+
+#if defined(INET) && defined(INET6)
+			if (paddrp->spp_address.ss_family == AF_INET6) {
+				struct sockaddr_in6 *sin6;
+
+				sin6 = (struct sockaddr_in6 *)&paddrp->spp_address;
+				if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+					in6_sin6_2_sin(&sin_store, sin6);
+					addr = (struct sockaddr *)&sin_store;
+				} else {
+					addr = (struct sockaddr *)&paddrp->spp_address;
+				}
 			} else {
+				addr = (struct sockaddr *)&paddrp->spp_address;
+			}
+#else
+			addr = (struct sockaddr *)&paddrp->spp_address;
+#endif
+			if (stcb != NULL) {
+				net = sctp_findnet(stcb, addr);
+			} else {
 				/*
 				 * We increment here since
 				 * sctp_findassociation_ep_addr() wil do a
@@ -4689,25 +5203,22 @@
 				 * the locked tcb (last argument) is NOT a
 				 * TCB.. aka NULL.
 				 */
+				net = NULL;
 				SCTP_INP_INCR_REF(inp);
-				stcb = sctp_findassociation_ep_addr(&inp,
-				    (struct sockaddr *)&paddrp->spp_address,
+				stcb = sctp_findassociation_ep_addr(&inp, addr,
 				    &net, NULL, NULL);
 				if (stcb == NULL) {
 					SCTP_INP_DECR_REF(inp);
 				}
 			}
-			if (stcb && (net == NULL)) {
-				struct sockaddr *sa;
-
-				sa = (struct sockaddr *)&paddrp->spp_address;
+			if ((stcb != NULL) && (net == NULL)) {
 #ifdef INET
-				if (sa->sa_family == AF_INET) {
+				if (addr->sa_family == AF_INET) {
 
 					struct sockaddr_in *sin;
 
-					sin = (struct sockaddr_in *)sa;
-					if (sin->sin_addr.s_addr) {
+					sin = (struct sockaddr_in *)addr;
+					if (sin->sin_addr.s_addr != INADDR_ANY) {
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 						SCTP_TCB_UNLOCK(stcb);
 						error = EINVAL;
@@ -4716,10 +5227,10 @@
 				} else
 #endif
 #ifdef INET6
-				if (sa->sa_family == AF_INET6) {
+				if (addr->sa_family == AF_INET6) {
 					struct sockaddr_in6 *sin6;
 
-					sin6 = (struct sockaddr_in6 *)sa;
+					sin6 = (struct sockaddr_in6 *)addr;
 					if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 						SCTP_TCB_UNLOCK(stcb);
@@ -4748,28 +5259,15 @@
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				return (EINVAL);
 			}
-			if (stcb) {
+			if (stcb != NULL) {
 				/************************TCB SPECIFIC SET ******************/
-				/*
-				 * do we change the timer for HB, we run
-				 * only one?
-				 */
-				int ovh = 0;
-
-				if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
-					ovh = SCTP_MED_OVERHEAD;
-				} else {
-					ovh = SCTP_MED_V4_OVERHEAD;
-				}
-
-				/* network sets ? */
-				if (net) {
+				if (net != NULL) {
 					/************************NET SPECIFIC SET ******************/
 					if (paddrp->spp_flags & SPP_HB_DISABLE) {
 						if (!(net->dest_state & SCTP_ADDR_UNCONFIRMED) &&
 						    !(net->dest_state & SCTP_ADDR_NOHB)) {
 							sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net,
-							    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10);
+							    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_9);
 						}
 						net->dest_state |= SCTP_ADDR_NOHB;
 					}
@@ -4793,15 +5291,27 @@
 					if ((paddrp->spp_flags & SPP_PMTUD_DISABLE) && (paddrp->spp_pathmtu >= SCTP_SMALLEST_PMTU)) {
 						if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
 							sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net,
-							    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10);
+							    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_11);
 						}
 						net->dest_state |= SCTP_ADDR_NO_PMTUD;
-						if (paddrp->spp_pathmtu > SCTP_DEFAULT_MINSEGMENT) {
-							net->mtu = paddrp->spp_pathmtu + ovh;
-							if (net->mtu < stcb->asoc.smallest_mtu) {
-								sctp_pathmtu_adjustment(stcb, net->mtu);
-							}
+						net->mtu = paddrp->spp_pathmtu;
+						switch (net->ro._l_addr.sa.sa_family) {
+#ifdef INET
+						case AF_INET:
+							net->mtu += SCTP_MIN_V4_OVERHEAD;
+							break;
+#endif
+#ifdef INET6
+						case AF_INET6:
+							net->mtu += SCTP_MIN_OVERHEAD;
+							break;
+#endif
+						default:
+							break;
 						}
+						if (net->mtu < stcb->asoc.smallest_mtu) {
+							sctp_pathmtu_adjustment(stcb, net->mtu);
+						}
 					}
 					if (paddrp->spp_flags & SPP_PMTUD_ENABLE) {
 						if (!SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
@@ -4819,7 +5329,9 @@
 							    (net->error_count > net->pf_threshold)) {
 								net->dest_state |= SCTP_ADDR_PF;
 								sctp_send_hb(stcb, net, SCTP_SO_LOCKED);
-								sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_TIMER + SCTP_LOC_3);
+								sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
+								    stcb->sctp_ep, stcb, net,
+								    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_12);
 								sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net);
 							}
 						}
@@ -4850,7 +5362,7 @@
 #endif
 				} else {
 					/************************ASSOC ONLY -- NO NET SPECIFIC SET ******************/
-					if (paddrp->spp_pathmaxrxt) {
+					if (paddrp->spp_pathmaxrxt != 0) {
 						stcb->asoc.def_net_failure = paddrp->spp_pathmaxrxt;
 						TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 							if (net->dest_state & SCTP_ADDR_PF) {
@@ -4862,7 +5374,9 @@
 								    (net->error_count > net->pf_threshold)) {
 									net->dest_state |= SCTP_ADDR_PF;
 									sctp_send_hb(stcb, net, SCTP_SO_LOCKED);
-									sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_TIMER + SCTP_LOC_3);
+									sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
+									    stcb->sctp_ep, stcb, net,
+									    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_13);
 									sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net);
 								}
 							}
@@ -4881,7 +5395,7 @@
 						}
 					}
 					if (paddrp->spp_flags & SPP_HB_ENABLE) {
-						if (paddrp->spp_hbinterval) {
+						if (paddrp->spp_hbinterval != 0) {
 							stcb->asoc.heart_beat_delay = paddrp->spp_hbinterval;
 						} else if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO) {
 							stcb->asoc.heart_beat_delay = 0;
@@ -4888,7 +5402,7 @@
 						}
 						/* Turn back on the timer */
 						TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
-							if (paddrp->spp_hbinterval) {
+							if (paddrp->spp_hbinterval != 0) {
 								net->heart_beat_delay = paddrp->spp_hbinterval;
 							} else if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO) {
 								net->heart_beat_delay = 0;
@@ -4897,7 +5411,7 @@
 								net->dest_state &= ~SCTP_ADDR_NOHB;
 							}
 							sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net,
-							    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10);
+							    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_14);
 							sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net);
 						}
 						sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_DONOT_HEARTBEAT);
@@ -4907,7 +5421,9 @@
 							if (!(net->dest_state & SCTP_ADDR_NOHB)) {
 								net->dest_state |= SCTP_ADDR_NOHB;
 								if (!(net->dest_state & SCTP_ADDR_UNCONFIRMED)) {
-									sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10);
+									sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
+									    inp, stcb, net,
+									    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_15);
 								}
 							}
 						}
@@ -4917,15 +5433,27 @@
 						TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 							if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
 								sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net,
-								    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10);
+								    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_16);
 							}
 							net->dest_state |= SCTP_ADDR_NO_PMTUD;
-							if (paddrp->spp_pathmtu > SCTP_DEFAULT_MINSEGMENT) {
-								net->mtu = paddrp->spp_pathmtu + ovh;
-								if (net->mtu < stcb->asoc.smallest_mtu) {
-									sctp_pathmtu_adjustment(stcb, net->mtu);
-								}
+							net->mtu = paddrp->spp_pathmtu;
+							switch (net->ro._l_addr.sa.sa_family) {
+#ifdef INET
+							case AF_INET:
+								net->mtu += SCTP_MIN_V4_OVERHEAD;
+								break;
+#endif
+#ifdef INET6
+							case AF_INET6:
+								net->mtu += SCTP_MIN_OVERHEAD;
+								break;
+#endif
+							default:
+								break;
 							}
+							if (net->mtu < stcb->asoc.smallest_mtu) {
+								sctp_pathmtu_adjustment(stcb, net->mtu);
+							}
 						}
 						sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_DO_NOT_PMTUD);
 					}
@@ -4971,12 +5499,12 @@
 					 * set it with the options on the
 					 * socket
 					 */
-					if (paddrp->spp_pathmaxrxt) {
+					if (paddrp->spp_pathmaxrxt != 0) {
 						inp->sctp_ep.def_net_failure = paddrp->spp_pathmaxrxt;
 					}
 					if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO)
 						inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = 0;
-					else if (paddrp->spp_hbinterval) {
+					else if (paddrp->spp_hbinterval != 0) {
 						if (paddrp->spp_hbinterval > SCTP_MAX_HB_INTERVAL)
 							paddrp->spp_hbinterval = SCTP_MAX_HB_INTERVAL;
 						inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = MSEC_TO_TICKS(paddrp->spp_hbinterval);
@@ -5142,14 +5670,36 @@
 		{
 			struct sctp_setprim *spa;
 			struct sctp_nets *net;
+			struct sockaddr *addr;
 
+#if defined(INET) && defined(INET6)
+			struct sockaddr_in sin_store;
+
+#endif
+
 			SCTP_CHECK_AND_CAST(spa, optval, struct sctp_setprim, optsize);
 			SCTP_FIND_STCB(inp, stcb, spa->ssp_assoc_id);
 
-			net = NULL;
-			if (stcb) {
-				net = sctp_findnet(stcb, (struct sockaddr *)&spa->ssp_addr);
+#if defined(INET) && defined(INET6)
+			if (spa->ssp_addr.ss_family == AF_INET6) {
+				struct sockaddr_in6 *sin6;
+
+				sin6 = (struct sockaddr_in6 *)&spa->ssp_addr;
+				if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+					in6_sin6_2_sin(&sin_store, sin6);
+					addr = (struct sockaddr *)&sin_store;
+				} else {
+					addr = (struct sockaddr *)&spa->ssp_addr;
+				}
 			} else {
+				addr = (struct sockaddr *)&spa->ssp_addr;
+			}
+#else
+			addr = (struct sockaddr *)&spa->ssp_addr;
+#endif
+			if (stcb != NULL) {
+				net = sctp_findnet(stcb, addr);
+			} else {
 				/*
 				 * We increment here since
 				 * sctp_findassociation_ep_addr() wil do a
@@ -5157,9 +5707,9 @@
 				 * the locked tcb (last argument) is NOT a
 				 * TCB.. aka NULL.
 				 */
+				net = NULL;
 				SCTP_INP_INCR_REF(inp);
-				stcb = sctp_findassociation_ep_addr(&inp,
-				    (struct sockaddr *)&spa->ssp_addr,
+				stcb = sctp_findassociation_ep_addr(&inp, addr,
 				    &net, NULL, NULL);
 				if (stcb == NULL) {
 					SCTP_INP_DECR_REF(inp);
@@ -5166,17 +5716,24 @@
 				}
 			}
 
-			if ((stcb) && (net)) {
-				if ((net != stcb->asoc.primary_destination) &&
-				    (!(net->dest_state & SCTP_ADDR_UNCONFIRMED))) {
-					/* Ok we need to set it */
-					if (sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net) == 0) {
-						if ((stcb->asoc.alternate) &&
-						    (!(net->dest_state & SCTP_ADDR_PF)) &&
-						    (net->dest_state & SCTP_ADDR_REACHABLE)) {
-							sctp_free_remote_addr(stcb->asoc.alternate);
-							stcb->asoc.alternate = NULL;
+			if ((stcb != NULL) && (net != NULL)) {
+				if (net != stcb->asoc.primary_destination) {
+					if (!(net->dest_state & SCTP_ADDR_UNCONFIRMED)) {
+						/* Ok we need to set it */
+						if (sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net) == 0) {
+							if ((stcb->asoc.alternate) &&
+							    (!(net->dest_state & SCTP_ADDR_PF)) &&
+							    (net->dest_state & SCTP_ADDR_REACHABLE)) {
+								sctp_free_remote_addr(stcb->asoc.alternate);
+								stcb->asoc.alternate = NULL;
+							}
+						} else {
+							SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+							error = EINVAL;
 						}
+					} else {
+						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+						error = EINVAL;
 					}
 				}
 			} else {
@@ -5183,7 +5740,7 @@
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 			}
-			if (stcb) {
+			if (stcb != NULL) {
 				SCTP_TCB_UNLOCK(stcb);
 			}
 			break;
@@ -5205,14 +5762,36 @@
 	case SCTP_SET_PEER_PRIMARY_ADDR:
 		{
 			struct sctp_setpeerprim *sspp;
+			struct sockaddr *addr;
 
+#if defined(INET) && defined(INET6)
+			struct sockaddr_in sin_store;
+
+#endif
+
 			SCTP_CHECK_AND_CAST(sspp, optval, struct sctp_setpeerprim, optsize);
 			SCTP_FIND_STCB(inp, stcb, sspp->sspp_assoc_id);
 			if (stcb != NULL) {
 				struct sctp_ifa *ifa;
 
-				ifa = sctp_find_ifa_by_addr((struct sockaddr *)&sspp->sspp_addr,
-				    stcb->asoc.vrf_id, SCTP_ADDR_NOT_LOCKED);
+#if defined(INET) && defined(INET6)
+				if (sspp->sspp_addr.ss_family == AF_INET6) {
+					struct sockaddr_in6 *sin6;
+
+					sin6 = (struct sockaddr_in6 *)&sspp->sspp_addr;
+					if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+						in6_sin6_2_sin(&sin_store, sin6);
+						addr = (struct sockaddr *)&sin_store;
+					} else {
+						addr = (struct sockaddr *)&sspp->sspp_addr;
+					}
+				} else {
+					addr = (struct sockaddr *)&sspp->sspp_addr;
+				}
+#else
+				addr = (struct sockaddr *)&sspp->sspp_addr;
+#endif
+				ifa = sctp_find_ifa_by_addr(addr, stcb->asoc.vrf_id, SCTP_ADDR_NOT_LOCKED);
 				if (ifa == NULL) {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
@@ -5229,7 +5808,7 @@
 					LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
 						if (laddr->ifa == NULL) {
 							SCTPDBG(SCTP_DEBUG_OUTPUT1, "%s: NULL ifa\n",
-							    __FUNCTION__);
+							    __func__);
 							continue;
 						}
 						if (laddr->ifa == ifa) {
@@ -5242,9 +5821,45 @@
 						error = EINVAL;
 						goto out_of_it;
 					}
+				} else {
+					switch (addr->sa_family) {
+#ifdef INET
+					case AF_INET:
+						{
+							struct sockaddr_in *sin;
+
+							sin = (struct sockaddr_in *)addr;
+							if (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+							    &sin->sin_addr) != 0) {
+								SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+								error = EINVAL;
+								goto out_of_it;
+							}
+							break;
+						}
+#endif
+#ifdef INET6
+					case AF_INET6:
+						{
+							struct sockaddr_in6 *sin6;
+
+							sin6 = (struct sockaddr_in6 *)addr;
+							if (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+							    &sin6->sin6_addr) != 0) {
+								SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+								error = EINVAL;
+								goto out_of_it;
+							}
+							break;
+						}
+#endif
+					default:
+						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+						error = EINVAL;
+						goto out_of_it;
+					}
 				}
-				if (sctp_set_primary_ip_address_sa(stcb,
-				    (struct sockaddr *)&sspp->sspp_addr) != 0) {
+				if (sctp_set_primary_ip_address_sa(stcb, addr) != 0) {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
@@ -5554,7 +6169,7 @@
 			SCTP_CHECK_AND_CAST(info, optval, struct sctp_default_prinfo, optsize);
 			SCTP_FIND_STCB(inp, stcb, info->pr_assoc_id);
 
-			if (PR_SCTP_INVALID_POLICY(info->pr_policy)) {
+			if (info->pr_policy > SCTP_PR_SCTP_MAX) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 				break;
@@ -5595,13 +6210,36 @@
 		{
 			struct sctp_paddrthlds *thlds;
 			struct sctp_nets *net;
+			struct sockaddr *addr;
 
+#if defined(INET) && defined(INET6)
+			struct sockaddr_in sin_store;
+
+#endif
+
 			SCTP_CHECK_AND_CAST(thlds, optval, struct sctp_paddrthlds, optsize);
 			SCTP_FIND_STCB(inp, stcb, thlds->spt_assoc_id);
-			net = NULL;
-			if (stcb) {
-				net = sctp_findnet(stcb, (struct sockaddr *)&thlds->spt_assoc_id);
+
+#if defined(INET) && defined(INET6)
+			if (thlds->spt_address.ss_family == AF_INET6) {
+				struct sockaddr_in6 *sin6;
+
+				sin6 = (struct sockaddr_in6 *)&thlds->spt_address;
+				if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+					in6_sin6_2_sin(&sin_store, sin6);
+					addr = (struct sockaddr *)&sin_store;
+				} else {
+					addr = (struct sockaddr *)&thlds->spt_address;
+				}
 			} else {
+				addr = (struct sockaddr *)&thlds->spt_address;
+			}
+#else
+			addr = (struct sockaddr *)&thlds->spt_address;
+#endif
+			if (stcb != NULL) {
+				net = sctp_findnet(stcb, addr);
+			} else {
 				/*
 				 * We increment here since
 				 * sctp_findassociation_ep_addr() wil do a
@@ -5609,25 +6247,22 @@
 				 * the locked tcb (last argument) is NOT a
 				 * TCB.. aka NULL.
 				 */
+				net = NULL;
 				SCTP_INP_INCR_REF(inp);
-				stcb = sctp_findassociation_ep_addr(&inp,
-				    (struct sockaddr *)&thlds->spt_assoc_id,
+				stcb = sctp_findassociation_ep_addr(&inp, addr,
 				    &net, NULL, NULL);
 				if (stcb == NULL) {
 					SCTP_INP_DECR_REF(inp);
 				}
 			}
-			if (stcb && (net == NULL)) {
-				struct sockaddr *sa;
-
-				sa = (struct sockaddr *)&thlds->spt_assoc_id;
+			if ((stcb != NULL) && (net == NULL)) {
 #ifdef INET
-				if (sa->sa_family == AF_INET) {
+				if (addr->sa_family == AF_INET) {
 
 					struct sockaddr_in *sin;
 
-					sin = (struct sockaddr_in *)sa;
-					if (sin->sin_addr.s_addr) {
+					sin = (struct sockaddr_in *)addr;
+					if (sin->sin_addr.s_addr != INADDR_ANY) {
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 						SCTP_TCB_UNLOCK(stcb);
 						error = EINVAL;
@@ -5636,10 +6271,10 @@
 				} else
 #endif
 #ifdef INET6
-				if (sa->sa_family == AF_INET6) {
+				if (addr->sa_family == AF_INET6) {
 					struct sockaddr_in6 *sin6;
 
-					sin6 = (struct sockaddr_in6 *)sa;
+					sin6 = (struct sockaddr_in6 *)addr;
 					if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 						SCTP_TCB_UNLOCK(stcb);
@@ -5655,68 +6290,73 @@
 					break;
 				}
 			}
-			if (stcb) {
-				if (net) {
+			if (stcb != NULL) {
+				if (net != NULL) {
+					net->failure_threshold = thlds->spt_pathmaxrxt;
+					net->pf_threshold = thlds->spt_pathpfthld;
 					if (net->dest_state & SCTP_ADDR_PF) {
-						if ((net->failure_threshold > thlds->spt_pathmaxrxt) ||
-						    (net->failure_threshold <= thlds->spt_pathpfthld)) {
+						if ((net->error_count > net->failure_threshold) ||
+						    (net->error_count <= net->pf_threshold)) {
 							net->dest_state &= ~SCTP_ADDR_PF;
 						}
 					} else {
-						if ((net->failure_threshold > thlds->spt_pathpfthld) &&
-						    (net->failure_threshold <= thlds->spt_pathmaxrxt)) {
+						if ((net->error_count > net->pf_threshold) &&
+						    (net->error_count <= net->failure_threshold)) {
 							net->dest_state |= SCTP_ADDR_PF;
 							sctp_send_hb(stcb, net, SCTP_SO_LOCKED);
-							sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_TIMER + SCTP_LOC_3);
+							sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
+							    stcb->sctp_ep, stcb, net,
+							    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_17);
 							sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net);
 						}
 					}
 					if (net->dest_state & SCTP_ADDR_REACHABLE) {
-						if (net->failure_threshold > thlds->spt_pathmaxrxt) {
+						if (net->error_count > net->failure_threshold) {
 							net->dest_state &= ~SCTP_ADDR_REACHABLE;
 							sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, 0, net, SCTP_SO_LOCKED);
 						}
 					} else {
-						if (net->failure_threshold <= thlds->spt_pathmaxrxt) {
+						if (net->error_count <= net->failure_threshold) {
 							net->dest_state |= SCTP_ADDR_REACHABLE;
 							sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 0, net, SCTP_SO_LOCKED);
 						}
 					}
-					net->failure_threshold = thlds->spt_pathmaxrxt;
-					net->pf_threshold = thlds->spt_pathpfthld;
 				} else {
 					TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+						net->failure_threshold = thlds->spt_pathmaxrxt;
+						net->pf_threshold = thlds->spt_pathpfthld;
 						if (net->dest_state & SCTP_ADDR_PF) {
-							if ((net->failure_threshold > thlds->spt_pathmaxrxt) ||
-							    (net->failure_threshold <= thlds->spt_pathpfthld)) {
+							if ((net->error_count > net->failure_threshold) ||
+							    (net->error_count <= net->pf_threshold)) {
 								net->dest_state &= ~SCTP_ADDR_PF;
 							}
 						} else {
-							if ((net->failure_threshold > thlds->spt_pathpfthld) &&
-							    (net->failure_threshold <= thlds->spt_pathmaxrxt)) {
+							if ((net->error_count > net->pf_threshold) &&
+							    (net->error_count <= net->failure_threshold)) {
 								net->dest_state |= SCTP_ADDR_PF;
 								sctp_send_hb(stcb, net, SCTP_SO_LOCKED);
-								sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_TIMER + SCTP_LOC_3);
+								sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
+								    stcb->sctp_ep, stcb, net,
+								    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_18);
 								sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net);
 							}
 						}
 						if (net->dest_state & SCTP_ADDR_REACHABLE) {
-							if (net->failure_threshold > thlds->spt_pathmaxrxt) {
+							if (net->error_count > net->failure_threshold) {
 								net->dest_state &= ~SCTP_ADDR_REACHABLE;
 								sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, 0, net, SCTP_SO_LOCKED);
 							}
 						} else {
-							if (net->failure_threshold <= thlds->spt_pathmaxrxt) {
+							if (net->error_count <= net->failure_threshold) {
 								net->dest_state |= SCTP_ADDR_REACHABLE;
 								sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 0, net, SCTP_SO_LOCKED);
 							}
 						}
-						net->failure_threshold = thlds->spt_pathmaxrxt;
-						net->pf_threshold = thlds->spt_pathpfthld;
 					}
 					stcb->asoc.def_net_failure = thlds->spt_pathmaxrxt;
 					stcb->asoc.def_net_pf_threshold = thlds->spt_pathpfthld;
 				}
+				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
@@ -5736,12 +6376,36 @@
 		{
 			struct sctp_udpencaps *encaps;
 			struct sctp_nets *net;
+			struct sockaddr *addr;
 
+#if defined(INET) && defined(INET6)
+			struct sockaddr_in sin_store;
+
+#endif
+
 			SCTP_CHECK_AND_CAST(encaps, optval, struct sctp_udpencaps, optsize);
 			SCTP_FIND_STCB(inp, stcb, encaps->sue_assoc_id);
-			if (stcb) {
-				net = sctp_findnet(stcb, (struct sockaddr *)&encaps->sue_address);
+
+#if defined(INET) && defined(INET6)
+			if (encaps->sue_address.ss_family == AF_INET6) {
+				struct sockaddr_in6 *sin6;
+
+				sin6 = (struct sockaddr_in6 *)&encaps->sue_address;
+				if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+					in6_sin6_2_sin(&sin_store, sin6);
+					addr = (struct sockaddr *)&sin_store;
+				} else {
+					addr = (struct sockaddr *)&encaps->sue_address;
+				}
 			} else {
+				addr = (struct sockaddr *)&encaps->sue_address;
+			}
+#else
+			addr = (struct sockaddr *)&encaps->sue_address;
+#endif
+			if (stcb != NULL) {
+				net = sctp_findnet(stcb, addr);
+			} else {
 				/*
 				 * We increment here since
 				 * sctp_findassociation_ep_addr() wil do a
@@ -5751,22 +6415,19 @@
 				 */
 				net = NULL;
 				SCTP_INP_INCR_REF(inp);
-				stcb = sctp_findassociation_ep_addr(&inp, (struct sockaddr *)&encaps->sue_address, &net, NULL, NULL);
+				stcb = sctp_findassociation_ep_addr(&inp, addr, &net, NULL, NULL);
 				if (stcb == NULL) {
 					SCTP_INP_DECR_REF(inp);
 				}
 			}
-			if (stcb && (net == NULL)) {
-				struct sockaddr *sa;
-
-				sa = (struct sockaddr *)&encaps->sue_address;
+			if ((stcb != NULL) && (net == NULL)) {
 #ifdef INET
-				if (sa->sa_family == AF_INET) {
+				if (addr->sa_family == AF_INET) {
 
 					struct sockaddr_in *sin;
 
-					sin = (struct sockaddr_in *)sa;
-					if (sin->sin_addr.s_addr) {
+					sin = (struct sockaddr_in *)addr;
+					if (sin->sin_addr.s_addr != INADDR_ANY) {
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 						SCTP_TCB_UNLOCK(stcb);
 						error = EINVAL;
@@ -5775,10 +6436,10 @@
 				} else
 #endif
 #ifdef INET6
-				if (sa->sa_family == AF_INET6) {
+				if (addr->sa_family == AF_INET6) {
 					struct sockaddr_in6 *sin6;
 
-					sin6 = (struct sockaddr_in6 *)sa;
+					sin6 = (struct sockaddr_in6 *)addr;
 					if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 						SCTP_TCB_UNLOCK(stcb);
@@ -5794,8 +6455,8 @@
 					break;
 				}
 			}
-			if (stcb) {
-				if (net) {
+			if (stcb != NULL) {
+				if (net != NULL) {
 					net->port = encaps->sue_port;
 				} else {
 					stcb->asoc.port = encaps->sue_port;
@@ -5815,6 +6476,273 @@
 			}
 			break;
 		}
+	case SCTP_ECN_SUPPORTED:
+		{
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+			if (stcb) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				error = EINVAL;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+					SCTP_INP_WLOCK(inp);
+					if (av->assoc_value == 0) {
+						inp->ecn_supported = 0;
+					} else {
+						inp->ecn_supported = 1;
+					}
+					SCTP_INP_WUNLOCK(inp);
+				} else {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+				}
+			}
+			break;
+		}
+	case SCTP_PR_SUPPORTED:
+		{
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+			if (stcb) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				error = EINVAL;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+					SCTP_INP_WLOCK(inp);
+					if (av->assoc_value == 0) {
+						inp->prsctp_supported = 0;
+					} else {
+						inp->prsctp_supported = 1;
+					}
+					SCTP_INP_WUNLOCK(inp);
+				} else {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+				}
+			}
+			break;
+		}
+	case SCTP_AUTH_SUPPORTED:
+		{
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+			if (stcb) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				error = EINVAL;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+					if ((av->assoc_value == 0) &&
+					    (inp->asconf_supported == 1)) {
+						/*
+						 * AUTH is required for
+						 * ASCONF
+						 */
+						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+						error = EINVAL;
+					} else {
+						SCTP_INP_WLOCK(inp);
+						if (av->assoc_value == 0) {
+							inp->auth_supported = 0;
+						} else {
+							inp->auth_supported = 1;
+						}
+						SCTP_INP_WUNLOCK(inp);
+					}
+				} else {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+				}
+			}
+			break;
+		}
+	case SCTP_ASCONF_SUPPORTED:
+		{
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+			if (stcb) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				error = EINVAL;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+					if ((av->assoc_value != 0) &&
+					    (inp->auth_supported == 0)) {
+						/*
+						 * AUTH is required for
+						 * ASCONF
+						 */
+						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+						error = EINVAL;
+					} else {
+						SCTP_INP_WLOCK(inp);
+						if (av->assoc_value == 0) {
+							inp->asconf_supported = 0;
+							sctp_auth_delete_chunk(SCTP_ASCONF,
+							    inp->sctp_ep.local_auth_chunks);
+							sctp_auth_delete_chunk(SCTP_ASCONF_ACK,
+							    inp->sctp_ep.local_auth_chunks);
+						} else {
+							inp->asconf_supported = 1;
+							sctp_auth_add_chunk(SCTP_ASCONF,
+							    inp->sctp_ep.local_auth_chunks);
+							sctp_auth_add_chunk(SCTP_ASCONF_ACK,
+							    inp->sctp_ep.local_auth_chunks);
+						}
+						SCTP_INP_WUNLOCK(inp);
+					}
+				} else {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+				}
+			}
+			break;
+		}
+	case SCTP_RECONFIG_SUPPORTED:
+		{
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+			if (stcb) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				error = EINVAL;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+					SCTP_INP_WLOCK(inp);
+					if (av->assoc_value == 0) {
+						inp->reconfig_supported = 0;
+					} else {
+						inp->reconfig_supported = 1;
+					}
+					SCTP_INP_WUNLOCK(inp);
+				} else {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+				}
+			}
+			break;
+		}
+	case SCTP_NRSACK_SUPPORTED:
+		{
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+			if (stcb) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				error = EINVAL;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+					SCTP_INP_WLOCK(inp);
+					if (av->assoc_value == 0) {
+						inp->nrsack_supported = 0;
+					} else {
+						inp->nrsack_supported = 1;
+					}
+					SCTP_INP_WUNLOCK(inp);
+				} else {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+				}
+			}
+			break;
+		}
+	case SCTP_PKTDROP_SUPPORTED:
+		{
+			struct sctp_assoc_value *av;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+			if (stcb) {
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+				error = EINVAL;
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+					SCTP_INP_WLOCK(inp);
+					if (av->assoc_value == 0) {
+						inp->pktdrop_supported = 0;
+					} else {
+						inp->pktdrop_supported = 1;
+					}
+					SCTP_INP_WUNLOCK(inp);
+				} else {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+				}
+			}
+			break;
+		}
+	case SCTP_MAX_CWND:
+		{
+			struct sctp_assoc_value *av;
+			struct sctp_nets *net;
+
+			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+			if (stcb) {
+				stcb->asoc.max_cwnd = av->assoc_value;
+				if (stcb->asoc.max_cwnd > 0) {
+					TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+						if ((net->cwnd > stcb->asoc.max_cwnd) &&
+						    (net->cwnd > (net->mtu - sizeof(struct sctphdr)))) {
+							net->cwnd = stcb->asoc.max_cwnd;
+							if (net->cwnd < (net->mtu - sizeof(struct sctphdr))) {
+								net->cwnd = net->mtu - sizeof(struct sctphdr);
+							}
+						}
+					}
+				}
+				SCTP_TCB_UNLOCK(stcb);
+			} else {
+				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+					SCTP_INP_WLOCK(inp);
+					inp->max_cwnd = av->assoc_value;
+					SCTP_INP_WUNLOCK(inp);
+				} else {
+					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+					error = EINVAL;
+				}
+			}
+			break;
+		}
 	default:
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
 		error = ENOPROTOOPT;
@@ -5830,7 +6758,20 @@
 	size_t optsize = 0;
 	void *p;
 	int error = 0;
+	struct sctp_inpcb *inp;
 
+	if ((sopt->sopt_level == SOL_SOCKET) &&
+	    (sopt->sopt_name == SO_SETFIB)) {
+		inp = (struct sctp_inpcb *)so->so_pcb;
+		if (inp == NULL) {
+			SCTP_LTRACE_ERR_RET(so->so_pcb, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOBUFS);
+			return (EINVAL);
+		}
+		SCTP_INP_WLOCK(inp);
+		inp->fibnum = so->so_fibnum;
+		SCTP_INP_WUNLOCK(inp);
+		return (0);
+	}
 	if (sopt->sopt_level != IPPROTO_SCTP) {
 		/* wrong proto level... send back up to IP */
 #ifdef INET6
@@ -5837,7 +6778,7 @@
 		if (INP_CHECK_SOCKAF(so, AF_INET6))
 			error = ip6_ctloutput(so, sopt);
 #endif				/* INET6 */
-#if defined(INET) && defined (INET6)
+#if defined(INET) && defined(INET6)
 		else
 #endif
 #ifdef INET
@@ -5955,7 +6896,7 @@
 		error = EINVAL;
 		goto out_now;
 	}
-#endif				/* INET6 */
+#endif
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) ==
 	    SCTP_PCB_FLAGS_UNBOUND) {
 		/* Bind a ephemeral port */
@@ -6004,7 +6945,7 @@
 	}
 	vrf_id = inp->def_vrf_id;
 	/* We are GOOD to go */
-	stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id, p);
+	stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id, inp->sctp_ep.pre_open_stream_count, p);
 	if (stcb == NULL) {
 		/* Gak! no memory */
 		goto out_now;
@@ -6056,9 +6997,8 @@
 	if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE)) {
 		/* See if we have a listener */
 		struct sctp_inpcb *tinp;
-		union sctp_sockstore store, *sp;
+		union sctp_sockstore store;
 
-		sp = &store;
 		if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) {
 			/* not bound all */
 			struct sctp_laddr *laddr;
@@ -6065,21 +7005,21 @@
 
 			LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
 				memcpy(&store, &laddr->ifa->address, sizeof(store));
-				switch (sp->sa.sa_family) {
+				switch (store.sa.sa_family) {
 #ifdef INET
 				case AF_INET:
-					sp->sin.sin_port = inp->sctp_lport;
+					store.sin.sin_port = inp->sctp_lport;
 					break;
 #endif
 #ifdef INET6
 				case AF_INET6:
-					sp->sin6.sin6_port = inp->sctp_lport;
+					store.sin6.sin6_port = inp->sctp_lport;
 					break;
 #endif
 				default:
 					break;
 				}
-				tinp = sctp_pcb_findep(&sp->sa, 0, 0, inp->def_vrf_id);
+				tinp = sctp_pcb_findep(&store.sa, 0, 0, inp->def_vrf_id);
 				if (tinp && (tinp != inp) &&
 				    ((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) == 0) &&
 				    ((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) &&
@@ -6097,8 +7037,20 @@
 		} else {
 			/* Setup a local addr bound all */
 			memset(&store, 0, sizeof(store));
-			switch (sp->sa.sa_family) {
+#ifdef INET6
+			if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+				store.sa.sa_family = AF_INET6;
+				store.sa.sa_len = sizeof(struct sockaddr_in6);
+			}
+#endif
 #ifdef INET
+			if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) {
+				store.sa.sa_family = AF_INET;
+				store.sa.sa_len = sizeof(struct sockaddr_in);
+			}
+#endif
+			switch (store.sa.sa_family) {
+#ifdef INET
 			case AF_INET:
 				store.sin.sin_port = inp->sctp_lport;
 				break;
@@ -6105,25 +7057,13 @@
 #endif
 #ifdef INET6
 			case AF_INET6:
-				sp->sin6.sin6_port = inp->sctp_lport;
+				store.sin6.sin6_port = inp->sctp_lport;
 				break;
 #endif
 			default:
 				break;
 			}
-#ifdef INET6
-			if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
-				store.sa.sa_family = AF_INET6;
-				store.sa.sa_len = sizeof(struct sockaddr_in6);
-			}
-#endif
-#ifdef INET
-			if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) {
-				store.sa.sa_family = AF_INET;
-				store.sa.sa_len = sizeof(struct sockaddr_in);
-			}
-#endif
-			tinp = sctp_pcb_findep(&sp->sa, 0, 0, inp->def_vrf_id);
+			tinp = sctp_pcb_findep(&store.sa, 0, 0, inp->def_vrf_id);
 			if (tinp && (tinp != inp) &&
 			    ((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) == 0) &&
 			    ((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) &&
@@ -6135,7 +7075,7 @@
 				SCTP_INP_DECR_REF(tinp);
 				return (EADDRINUSE);
 			} else if (tinp) {
-				SCTP_INP_DECR_REF(inp);
+				SCTP_INP_DECR_REF(tinp);
 			}
 		}
 	}
@@ -6147,8 +7087,8 @@
 #endif
 	SOCK_LOCK(so);
 	error = solisten_proto_check(so);
+	SOCK_UNLOCK(so);
 	if (error) {
-		SOCK_UNLOCK(so);
 		SCTP_INP_RUNLOCK(inp);
 		return (error);
 	}
@@ -6161,15 +7101,15 @@
 		 * move the guy that was listener to the TCP Pool.
 		 */
 		if (sctp_swap_inpcb_for_listen(inp)) {
-			goto in_use;
+			SCTP_INP_RUNLOCK(inp);
+			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EADDRINUSE);
+			return (EADDRINUSE);
 		}
 	}
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
 	    (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED)) {
 		/* We are already connected AND the TCP model */
-in_use:
 		SCTP_INP_RUNLOCK(inp);
-		SOCK_UNLOCK(so);
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EADDRINUSE);
 		return (EADDRINUSE);
 	}
@@ -6176,13 +7116,12 @@
 	SCTP_INP_RUNLOCK(inp);
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) {
 		/* We must do a bind. */
-		SOCK_UNLOCK(so);
 		if ((error = sctp_inpcb_bind(so, NULL, NULL, p))) {
 			/* bind error, probably perm */
 			return (error);
 		}
-		SOCK_LOCK(so);
 	}
+	SOCK_LOCK(so);
 	/* It appears for 7.0 and on, we must always call this. */
 	solisten_proto(so, backlog);
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) {
@@ -6249,8 +7188,8 @@
 				return (ENOMEM);
 			sin->sin_family = AF_INET;
 			sin->sin_len = sizeof(*sin);
-			sin->sin_port = ((struct sockaddr_in *)&store)->sin_port;
-			sin->sin_addr = ((struct sockaddr_in *)&store)->sin_addr;
+			sin->sin_port = store.sin.sin_port;
+			sin->sin_addr = store.sin.sin_addr;
 			*addr = (struct sockaddr *)sin;
 			break;
 		}
@@ -6265,9 +7204,8 @@
 				return (ENOMEM);
 			sin6->sin6_family = AF_INET6;
 			sin6->sin6_len = sizeof(*sin6);
-			sin6->sin6_port = ((struct sockaddr_in6 *)&store)->sin6_port;
-
-			sin6->sin6_addr = ((struct sockaddr_in6 *)&store)->sin6_addr;
+			sin6->sin6_port = store.sin6.sin6_port;
+			sin6->sin6_addr = store.sin6.sin6_addr;
 			if ((error = sa6_recoverscope(sin6)) != 0) {
 				SCTP_FREE_SONAME(sin6);
 				return (error);
@@ -6311,7 +7249,8 @@
 	}
 	if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
 		SCTP_TCB_LOCK(stcb);
-		sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_7);
+		sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+		    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_19);
 	}
 	return (0);
 }
@@ -6396,7 +7335,7 @@
 			if (laddr->ifa->address.sa.sa_family == AF_INET) {
 				struct sockaddr_in *sin_a;
 
-				sin_a = (struct sockaddr_in *)&laddr->ifa->address.sa;
+				sin_a = &laddr->ifa->address.sin;
 				sin->sin_addr = sin_a->sin_addr;
 				fnd = 1;
 				break;

Modified: trunk/sys/netinet/sctp_var.h
===================================================================
--- trunk/sys/netinet/sctp_var.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_var.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_var.h 244524 2012-12-21 00:41:52Z delphij $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_var.h 295208 2016-02-03 14:04:07Z tuexen $");
 
 #ifndef _NETINET_SCTP_VAR_H_
 #define _NETINET_SCTP_VAR_H_
@@ -73,7 +73,7 @@
 	  ((stcb->asoc.sctp_features & feature) == 0)) || \
 	 ((stcb == NULL) && (inp != NULL) && \
 	  ((inp->sctp_features & feature) == 0)) || \
-         ((stcb == NULL) && (inp == NULL)))
+	 ((stcb == NULL) && (inp == NULL)))
 
 /* managing mobility_feature in inpcb (by micchie) */
 #define sctp_mobility_feature_on(inp, feature)  (inp->sctp_mobility_features |= feature)
@@ -87,7 +87,7 @@
 
 #define	sctp_sbspace_failedmsgs(sb) ((long) ((sctp_maxspace(sb) > (sb)->sb_cc) ? (sctp_maxspace(sb) - (sb)->sb_cc) : 0))
 
-#define sctp_sbspace_sub(a,b) ((a > b) ? (a - b) : 0)
+#define sctp_sbspace_sub(a,b) (((a) > (b)) ? ((a) - (b)) : 0)
 
 /*
  * I tried to cache the readq entries at one point. But the reality
@@ -107,7 +107,7 @@
 #define sctp_alloc_a_readq(_stcb, _readq) { \
 	(_readq) = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_readq), struct sctp_queued_to_read); \
 	if ((_readq)) { \
- 	     SCTP_INCR_READQ_COUNT(); \
+	     SCTP_INCR_READQ_COUNT(); \
 	} \
 }
 
@@ -122,11 +122,11 @@
 
 #define sctp_alloc_a_strmoq(_stcb, _strmoq) { \
 	(_strmoq) = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_strmoq), struct sctp_stream_queue_pending); \
-         if ((_strmoq)) {			  \
+	if ((_strmoq)) { \
 		memset(_strmoq, 0, sizeof(struct sctp_stream_queue_pending)); \
 		SCTP_INCR_STRMOQ_COUNT(); \
 		(_strmoq)->holds_key_ref = 0; \
- 	} \
+	} \
 }
 
 #define sctp_free_a_chunk(_stcb, _chk, _so_locked) { \
@@ -134,22 +134,22 @@
 		sctp_auth_key_release((_stcb), (_chk)->auth_keyid, _so_locked); \
 		(_chk)->holds_key_ref = 0; \
 	} \
-        if (_stcb) { \
-          SCTP_TCB_LOCK_ASSERT((_stcb)); \
-          if ((_chk)->whoTo) { \
-                  sctp_free_remote_addr((_chk)->whoTo); \
-                  (_chk)->whoTo = NULL; \
-          } \
-          if (((_stcb)->asoc.free_chunk_cnt > SCTP_BASE_SYSCTL(sctp_asoc_free_resc_limit)) || \
-               (SCTP_BASE_INFO(ipi_free_chunks) > SCTP_BASE_SYSCTL(sctp_system_free_resc_limit))) { \
-	 	SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), (_chk)); \
-	 	SCTP_DECR_CHK_COUNT(); \
-	  } else { \
-	 	TAILQ_INSERT_TAIL(&(_stcb)->asoc.free_chunks, (_chk), sctp_next); \
-	 	(_stcb)->asoc.free_chunk_cnt++; \
-	 	atomic_add_int(&SCTP_BASE_INFO(ipi_free_chunks), 1); \
-          } \
-        } else { \
+	if (_stcb) { \
+		SCTP_TCB_LOCK_ASSERT((_stcb)); \
+		if ((_chk)->whoTo) { \
+			sctp_free_remote_addr((_chk)->whoTo); \
+			(_chk)->whoTo = NULL; \
+		} \
+		if (((_stcb)->asoc.free_chunk_cnt > SCTP_BASE_SYSCTL(sctp_asoc_free_resc_limit)) || \
+		    (SCTP_BASE_INFO(ipi_free_chunks) > SCTP_BASE_SYSCTL(sctp_system_free_resc_limit))) { \
+			SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), (_chk)); \
+			SCTP_DECR_CHK_COUNT(); \
+		} else { \
+			TAILQ_INSERT_TAIL(&(_stcb)->asoc.free_chunks, (_chk), sctp_next); \
+			(_stcb)->asoc.free_chunk_cnt++; \
+			atomic_add_int(&SCTP_BASE_INFO(ipi_free_chunks), 1); \
+		} \
+	} else { \
 		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), (_chk)); \
 		SCTP_DECR_CHK_COUNT(); \
 	} \
@@ -160,7 +160,7 @@
 		(_chk) = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_chunk), struct sctp_tmit_chunk); \
 		if ((_chk)) { \
 			SCTP_INCR_CHK_COUNT(); \
-                        (_chk)->whoTo = NULL; \
+			(_chk)->whoTo = NULL; \
 			(_chk)->holds_key_ref = 0; \
 		} \
 	} else { \
@@ -168,7 +168,7 @@
 		TAILQ_REMOVE(&(_stcb)->asoc.free_chunks, (_chk), sctp_next); \
 		atomic_subtract_int(&SCTP_BASE_INFO(ipi_free_chunks), 1); \
 		(_chk)->holds_key_ref = 0; \
-                SCTP_STAT_INCR(sctps_cached_chk); \
+		SCTP_STAT_INCR(sctps_cached_chk); \
 		(_stcb)->asoc.free_chunk_cnt--; \
 	} \
 }
@@ -179,15 +179,16 @@
 		if (SCTP_DECREMENT_AND_CHECK_REFCOUNT(&(__net)->ref_count)) { \
 			(void)SCTP_OS_TIMER_STOP(&(__net)->rxt_timer.timer); \
 			(void)SCTP_OS_TIMER_STOP(&(__net)->pmtu_timer.timer); \
-                        if ((__net)->ro.ro_rt) { \
+			(void)SCTP_OS_TIMER_STOP(&(__net)->hb_timer.timer); \
+			if ((__net)->ro.ro_rt) { \
 				RTFREE((__net)->ro.ro_rt); \
 				(__net)->ro.ro_rt = NULL; \
-                        } \
+			} \
 			if ((__net)->src_addr_selected) { \
 				sctp_free_ifa((__net)->ro._s_addr); \
 				(__net)->ro._s_addr = NULL; \
 			} \
-                        (__net)->src_addr_selected = 0; \
+			(__net)->src_addr_selected = 0; \
 			(__net)->dest_state &= ~SCTP_ADDR_REACHABLE; \
 			SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_net), (__net)); \
 			SCTP_DECR_RADDR_COUNT(); \
@@ -251,12 +252,12 @@
 } while (0)
 
 #define sctp_flight_size_increase(tp1) do { \
-       (tp1)->whoTo->flight_size += (tp1)->book_size; \
+	(tp1)->whoTo->flight_size += (tp1)->book_size; \
 } while (0)
 
 #ifdef SCTP_FS_SPEC_LOG
 #define sctp_total_flight_decrease(stcb, tp1) do { \
-        if (stcb->asoc.fs_index > SCTP_FS_SPEC_LOG_SIZE) \
+	if (stcb->asoc.fs_index > SCTP_FS_SPEC_LOG_SIZE) \
 		stcb->asoc.fs_index = 0;\
 	stcb->asoc.fslog[stcb->asoc.fs_index].total_flight = stcb->asoc.total_flight; \
 	stcb->asoc.fslog[stcb->asoc.fs_index].tsn = tp1->rec.data.TSN_seq; \
@@ -265,7 +266,7 @@
 	stcb->asoc.fslog[stcb->asoc.fs_index].incr = 0; \
 	stcb->asoc.fslog[stcb->asoc.fs_index].decr = 1; \
 	stcb->asoc.fs_index++; \
-        tp1->window_probe = 0; \
+	tp1->window_probe = 0; \
 	if (stcb->asoc.total_flight >= tp1->book_size) { \
 		stcb->asoc.total_flight -= tp1->book_size; \
 		if (stcb->asoc.total_flight_count > 0) \
@@ -277,7 +278,7 @@
 } while (0)
 
 #define sctp_total_flight_increase(stcb, tp1) do { \
-        if (stcb->asoc.fs_index > SCTP_FS_SPEC_LOG_SIZE) \
+	if (stcb->asoc.fs_index > SCTP_FS_SPEC_LOG_SIZE) \
 		stcb->asoc.fs_index = 0;\
 	stcb->asoc.fslog[stcb->asoc.fs_index].total_flight = stcb->asoc.total_flight; \
 	stcb->asoc.fslog[stcb->asoc.fs_index].tsn = tp1->rec.data.TSN_seq; \
@@ -286,14 +287,14 @@
 	stcb->asoc.fslog[stcb->asoc.fs_index].incr = 1; \
 	stcb->asoc.fslog[stcb->asoc.fs_index].decr = 0; \
 	stcb->asoc.fs_index++; \
-       (stcb)->asoc.total_flight_count++; \
-       (stcb)->asoc.total_flight += (tp1)->book_size; \
+	(stcb)->asoc.total_flight_count++; \
+	(stcb)->asoc.total_flight += (tp1)->book_size; \
 } while (0)
 
 #else
 
 #define sctp_total_flight_decrease(stcb, tp1) do { \
-        tp1->window_probe = 0; \
+	tp1->window_probe = 0; \
 	if (stcb->asoc.total_flight >= tp1->book_size) { \
 		stcb->asoc.total_flight -= tp1->book_size; \
 		if (stcb->asoc.total_flight_count > 0) \
@@ -305,8 +306,8 @@
 } while (0)
 
 #define sctp_total_flight_increase(stcb, tp1) do { \
-       (stcb)->asoc.total_flight_count++; \
-       (stcb)->asoc.total_flight += (tp1)->book_size; \
+	(stcb)->asoc.total_flight_count++; \
+	(stcb)->asoc.total_flight += (tp1)->book_size; \
 } while (0)
 
 #endif
@@ -322,15 +323,11 @@
 
 void sctp_close(struct socket *so);
 int sctp_disconnect(struct socket *so);
-
 void sctp_ctlinput(int, struct sockaddr *, void *);
 int sctp_ctloutput(struct socket *, struct sockopt *);
 
 #ifdef INET
 void sctp_input_with_port(struct mbuf *, int, uint16_t);
-
-#endif
-#ifdef INET
 void sctp_input(struct mbuf *, int);
 
 #endif
@@ -337,34 +334,24 @@
 void sctp_pathmtu_adjustment(struct sctp_tcb *, uint16_t);
 void sctp_drain(void);
 void sctp_init(void);
-
 void sctp_finish(void);
-
 int sctp_flush(struct socket *, int);
 int sctp_shutdown(struct socket *);
-void sctp_notify 
-(struct sctp_inpcb *, struct ip *ip, struct sctphdr *,
+void 
+sctp_notify(struct sctp_inpcb *, struct ip *ip, struct sctphdr *,
     struct sockaddr *, struct sctp_tcb *,
     struct sctp_nets *);
+int 
+sctp_bindx(struct socket *, int, struct sockaddr_storage *,
+    int, int, struct proc *);
 
-	int sctp_bindx(struct socket *, int, struct sockaddr_storage *,
-        int, int, struct proc *);
-
 /* can't use sctp_assoc_t here */
-	int sctp_peeloff(struct socket *, struct socket *, int, caddr_t, int *);
+int sctp_peeloff(struct socket *, struct socket *, int, caddr_t, int *);
+int sctp_ingetaddr(struct socket *, struct sockaddr **);
+int sctp_peeraddr(struct socket *, struct sockaddr **);
+int sctp_listen(struct socket *, int, struct thread *);
+int sctp_accept(struct socket *, struct sockaddr **);
 
-	int sctp_ingetaddr(struct socket *,
-        struct sockaddr **
-);
-
-	int sctp_peeraddr(struct socket *,
-        struct sockaddr **
-);
-
-	int sctp_listen(struct socket *, int, struct thread *);
-
-	int sctp_accept(struct socket *, struct sockaddr **);
-
 #endif				/* _KERNEL */
 
 #endif				/* !_NETINET_SCTP_VAR_H_ */

Modified: trunk/sys/netinet/sctputil.c
===================================================================
--- trunk/sys/netinet/sctputil.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctputil.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctputil.c 242836 2012-11-09 19:31:31Z mjg $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctputil.c 296052 2016-02-25 18:46:06Z tuexen $");
 
 #include <netinet/sctp_os.h>
 #include <netinet/sctp_pcb.h>
@@ -40,6 +40,7 @@
 #include <netinet/sctp_var.h>
 #include <netinet/sctp_sysctl.h>
 #ifdef INET6
+#include <netinet6/sctp6_var.h>
 #endif
 #include <netinet/sctp_header.h>
 #include <netinet/sctp_output.h>
@@ -49,6 +50,9 @@
 #include <netinet/sctp_auth.h>
 #include <netinet/sctp_asconf.h>
 #include <netinet/sctp_bsd_addr.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+#include <sys/proc.h>
 
 
 #ifndef KTR_SCTP
@@ -59,8 +63,7 @@
 extern struct sctp_ss_functions sctp_ss_functions[];
 
 void
-sctp_sblog(struct sockbuf *sb,
-    struct sctp_tcb *stcb, int from, int incr)
+sctp_sblog(struct sockbuf *sb, struct sctp_tcb *stcb, int from, int incr)
 {
 	struct sctp_cwnd_log sctp_clog;
 
@@ -104,7 +107,6 @@
 	    sctp_clog.x.misc.log4);
 }
 
-
 void
 rto_logging(struct sctp_nets *net, int from)
 {
@@ -199,8 +201,7 @@
 }
 
 void
-sctp_log_fr(uint32_t biggest_tsn, uint32_t biggest_new_tsn, uint32_t tsn,
-    int from)
+sctp_log_fr(uint32_t biggest_tsn, uint32_t biggest_new_tsn, uint32_t tsn, int from)
 {
 	struct sctp_cwnd_log sctp_clog;
 
@@ -217,6 +218,7 @@
 	    sctp_clog.x.misc.log4);
 }
 
+#ifdef SCTP_MBUF_LOGGING
 void
 sctp_log_mb(struct mbuf *m, int from)
 {
@@ -243,9 +245,20 @@
 }
 
 void
-sctp_log_strm_del(struct sctp_queued_to_read *control, struct sctp_queued_to_read *poschk,
-    int from)
+sctp_log_mbc(struct mbuf *m, int from)
 {
+	struct mbuf *mat;
+
+	for (mat = m; mat; mat = SCTP_BUF_NEXT(mat)) {
+		sctp_log_mb(mat, from);
+	}
+}
+
+#endif
+
+void
+sctp_log_strm_del(struct sctp_queued_to_read *control, struct sctp_queued_to_read *poschk, int from)
+{
 	struct sctp_cwnd_log sctp_clog;
 
 	if (control == NULL) {
@@ -414,7 +427,8 @@
 	    sctp_clog.x.misc.log4);
 }
 
-void
+#ifdef SCTP_MBCNT_LOGGING
+static void
 sctp_log_mbcnt(uint8_t from, uint32_t total_oq, uint32_t book, uint32_t total_mbcnt_q, uint32_t mbcnt)
 {
 	struct sctp_cwnd_log sctp_clog;
@@ -432,6 +446,8 @@
 	    sctp_clog.x.misc.log4);
 }
 
+#endif
+
 void
 sctp_misc_ints(uint8_t from, uint32_t a, uint32_t b, uint32_t c, uint32_t d)
 {
@@ -687,7 +703,7 @@
 			}
 			if (lnet->flight_size != tot_out) {
 				SCTP_PRINTF("net:%p flight was %d corrected to %d\n",
-				    lnet, lnet->flight_size,
+				    (void *)lnet, lnet->flight_size,
 				    tot_out);
 				lnet->flight_size = tot_out;
 			}
@@ -878,9 +894,52 @@
 	return (x);
 }
 
+int32_t
+sctp_map_assoc_state(int kernel_state)
+{
+	int32_t user_state;
+
+	if (kernel_state & SCTP_STATE_WAS_ABORTED) {
+		user_state = SCTP_CLOSED;
+	} else if (kernel_state & SCTP_STATE_SHUTDOWN_PENDING) {
+		user_state = SCTP_SHUTDOWN_PENDING;
+	} else {
+		switch (kernel_state & SCTP_STATE_MASK) {
+		case SCTP_STATE_EMPTY:
+			user_state = SCTP_CLOSED;
+			break;
+		case SCTP_STATE_INUSE:
+			user_state = SCTP_CLOSED;
+			break;
+		case SCTP_STATE_COOKIE_WAIT:
+			user_state = SCTP_COOKIE_WAIT;
+			break;
+		case SCTP_STATE_COOKIE_ECHOED:
+			user_state = SCTP_COOKIE_ECHOED;
+			break;
+		case SCTP_STATE_OPEN:
+			user_state = SCTP_ESTABLISHED;
+			break;
+		case SCTP_STATE_SHUTDOWN_SENT:
+			user_state = SCTP_SHUTDOWN_SENT;
+			break;
+		case SCTP_STATE_SHUTDOWN_RECEIVED:
+			user_state = SCTP_SHUTDOWN_RECEIVED;
+			break;
+		case SCTP_STATE_SHUTDOWN_ACK_SENT:
+			user_state = SCTP_SHUTDOWN_ACK_SENT;
+			break;
+		default:
+			user_state = SCTP_CLOSED;
+			break;
+		}
+	}
+	return (user_state);
+}
+
 int
-sctp_init_asoc(struct sctp_inpcb *m, struct sctp_tcb *stcb,
-    uint32_t override_tag, uint32_t vrf_id)
+sctp_init_asoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+    uint32_t override_tag, uint32_t vrf_id, uint16_t o_strms)
 {
 	struct sctp_association *asoc;
 
@@ -897,26 +956,37 @@
 	 */
 	int i;
 
+#if defined(SCTP_DETAILED_STR_STATS)
+	int j;
+
+#endif
+
 	asoc = &stcb->asoc;
 	/* init all variables to a known value. */
 	SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_INUSE);
-	asoc->max_burst = m->sctp_ep.max_burst;
-	asoc->fr_max_burst = m->sctp_ep.fr_max_burst;
-	asoc->heart_beat_delay = TICKS_TO_MSEC(m->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT]);
-	asoc->cookie_life = m->sctp_ep.def_cookie_life;
-	asoc->sctp_cmt_on_off = m->sctp_cmt_on_off;
-	asoc->ecn_allowed = m->sctp_ecn_enable;
-	asoc->sctp_nr_sack_on_off = (uint8_t) SCTP_BASE_SYSCTL(sctp_nr_sack_on_off);
+	asoc->max_burst = inp->sctp_ep.max_burst;
+	asoc->fr_max_burst = inp->sctp_ep.fr_max_burst;
+	asoc->heart_beat_delay = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT]);
+	asoc->cookie_life = inp->sctp_ep.def_cookie_life;
+	asoc->sctp_cmt_on_off = inp->sctp_cmt_on_off;
+	asoc->ecn_supported = inp->ecn_supported;
+	asoc->prsctp_supported = inp->prsctp_supported;
+	asoc->auth_supported = inp->auth_supported;
+	asoc->asconf_supported = inp->asconf_supported;
+	asoc->reconfig_supported = inp->reconfig_supported;
+	asoc->nrsack_supported = inp->nrsack_supported;
+	asoc->pktdrop_supported = inp->pktdrop_supported;
 	asoc->sctp_cmt_pf = (uint8_t) 0;
-	asoc->sctp_frag_point = m->sctp_frag_point;
-	asoc->sctp_features = m->sctp_features;
-	asoc->default_dscp = m->sctp_ep.default_dscp;
+	asoc->sctp_frag_point = inp->sctp_frag_point;
+	asoc->sctp_features = inp->sctp_features;
+	asoc->default_dscp = inp->sctp_ep.default_dscp;
+	asoc->max_cwnd = inp->max_cwnd;
 #ifdef INET6
-	if (m->sctp_ep.default_flowlabel) {
-		asoc->default_flowlabel = m->sctp_ep.default_flowlabel;
+	if (inp->sctp_ep.default_flowlabel) {
+		asoc->default_flowlabel = inp->sctp_ep.default_flowlabel;
 	} else {
-		if (m->ip_inp.inp.inp_flags & IN6P_AUTOFLOWLABEL) {
-			asoc->default_flowlabel = sctp_select_initial_TSN(&m->sctp_ep);
+		if (inp->ip_inp.inp.inp_flags & IN6P_AUTOFLOWLABEL) {
+			asoc->default_flowlabel = sctp_select_initial_TSN(&inp->sctp_ep);
 			asoc->default_flowlabel &= 0x000fffff;
 			asoc->default_flowlabel |= 0x80000000;
 		} else {
@@ -928,11 +998,11 @@
 	if (override_tag) {
 		asoc->my_vtag = override_tag;
 	} else {
-		asoc->my_vtag = sctp_select_a_tag(m, stcb->sctp_ep->sctp_lport, stcb->rport, 1);
+		asoc->my_vtag = sctp_select_a_tag(inp, stcb->sctp_ep->sctp_lport, stcb->rport, 1);
 	}
 	/* Get the nonce tags */
-	asoc->my_vtag_nonce = sctp_select_a_tag(m, stcb->sctp_ep->sctp_lport, stcb->rport, 0);
-	asoc->peer_vtag_nonce = sctp_select_a_tag(m, stcb->sctp_ep->sctp_lport, stcb->rport, 0);
+	asoc->my_vtag_nonce = sctp_select_a_tag(inp, stcb->sctp_ep->sctp_lport, stcb->rport, 0);
+	asoc->peer_vtag_nonce = sctp_select_a_tag(inp, stcb->sctp_ep->sctp_lport, stcb->rport, 0);
 	asoc->vrf_id = vrf_id;
 
 #ifdef SCTP_ASOCLOG_OF_TSNS
@@ -949,10 +1019,9 @@
 	asoc->refcnt = 0;
 	asoc->assoc_up_sent = 0;
 	asoc->asconf_seq_out = asoc->str_reset_seq_out = asoc->init_seq_number = asoc->sending_seq =
-	    sctp_select_initial_TSN(&m->sctp_ep);
+	    sctp_select_initial_TSN(&inp->sctp_ep);
 	asoc->asconf_seq_out_acked = asoc->asconf_seq_out - 1;
 	/* we are optimisitic here */
-	asoc->peer_supports_pktdrop = 1;
 	asoc->peer_supports_nat = 0;
 	asoc->sent_queue_retran_cnt = 0;
 
@@ -967,49 +1036,42 @@
 	/* here we are different, we hold the next one we expect */
 	asoc->str_reset_seq_in = asoc->last_acked_seq + 1;
 
-	asoc->initial_init_rto_max = m->sctp_ep.initial_init_rto_max;
-	asoc->initial_rto = m->sctp_ep.initial_rto;
+	asoc->initial_init_rto_max = inp->sctp_ep.initial_init_rto_max;
+	asoc->initial_rto = inp->sctp_ep.initial_rto;
 
-	asoc->max_init_times = m->sctp_ep.max_init_times;
-	asoc->max_send_times = m->sctp_ep.max_send_times;
-	asoc->def_net_failure = m->sctp_ep.def_net_failure;
-	asoc->def_net_pf_threshold = m->sctp_ep.def_net_pf_threshold;
+	asoc->max_init_times = inp->sctp_ep.max_init_times;
+	asoc->max_send_times = inp->sctp_ep.max_send_times;
+	asoc->def_net_failure = inp->sctp_ep.def_net_failure;
+	asoc->def_net_pf_threshold = inp->sctp_ep.def_net_pf_threshold;
 	asoc->free_chunk_cnt = 0;
 
 	asoc->iam_blocking = 0;
-	asoc->context = m->sctp_context;
-	asoc->local_strreset_support = m->local_strreset_support;
-	asoc->def_send = m->def_send;
-	asoc->delayed_ack = TICKS_TO_MSEC(m->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV]);
-	asoc->sack_freq = m->sctp_ep.sctp_sack_freq;
+	asoc->context = inp->sctp_context;
+	asoc->local_strreset_support = inp->local_strreset_support;
+	asoc->def_send = inp->def_send;
+	asoc->delayed_ack = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV]);
+	asoc->sack_freq = inp->sctp_ep.sctp_sack_freq;
 	asoc->pr_sctp_cnt = 0;
 	asoc->total_output_queue_size = 0;
 
-	if (m->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
-		struct in6pcb *inp6;
-
-		/* Its a V6 socket */
-		inp6 = (struct in6pcb *)m;
-		asoc->ipv6_addr_legal = 1;
-		/* Now look at the binding flag to see if V4 will be legal */
-		if (SCTP_IPV6_V6ONLY(inp6) == 0) {
-			asoc->ipv4_addr_legal = 1;
+	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+		asoc->scope.ipv6_addr_legal = 1;
+		if (SCTP_IPV6_V6ONLY(inp) == 0) {
+			asoc->scope.ipv4_addr_legal = 1;
 		} else {
-			/* V4 addresses are NOT legal on the association */
-			asoc->ipv4_addr_legal = 0;
+			asoc->scope.ipv4_addr_legal = 0;
 		}
 	} else {
-		/* Its a V4 socket, no - V6 */
-		asoc->ipv4_addr_legal = 1;
-		asoc->ipv6_addr_legal = 0;
+		asoc->scope.ipv6_addr_legal = 0;
+		asoc->scope.ipv4_addr_legal = 1;
 	}
 
-	asoc->my_rwnd = max(SCTP_SB_LIMIT_RCV(m->sctp_socket), SCTP_MINIMAL_RWND);
-	asoc->peers_rwnd = SCTP_SB_LIMIT_RCV(m->sctp_socket);
+	asoc->my_rwnd = max(SCTP_SB_LIMIT_RCV(inp->sctp_socket), SCTP_MINIMAL_RWND);
+	asoc->peers_rwnd = SCTP_SB_LIMIT_RCV(inp->sctp_socket);
 
-	asoc->smallest_mtu = m->sctp_frag_point;
-	asoc->minrto = m->sctp_ep.sctp_minrto;
-	asoc->maxrto = m->sctp_ep.sctp_maxrto;
+	asoc->smallest_mtu = inp->sctp_frag_point;
+	asoc->minrto = inp->sctp_ep.sctp_minrto;
+	asoc->maxrto = inp->sctp_ep.sctp_maxrto;
 
 	asoc->locked_on_sending = NULL;
 	asoc->stream_locked_on = 0;
@@ -1026,13 +1088,13 @@
 	/* Setup to fill the hb random cache at first HB */
 	asoc->hb_random_idx = 4;
 
-	asoc->sctp_autoclose_ticks = m->sctp_ep.auto_close_time;
+	asoc->sctp_autoclose_ticks = inp->sctp_ep.auto_close_time;
 
-	stcb->asoc.congestion_control_module = m->sctp_ep.sctp_default_cc_module;
-	stcb->asoc.cc_functions = sctp_cc_functions[m->sctp_ep.sctp_default_cc_module];
+	stcb->asoc.congestion_control_module = inp->sctp_ep.sctp_default_cc_module;
+	stcb->asoc.cc_functions = sctp_cc_functions[inp->sctp_ep.sctp_default_cc_module];
 
-	stcb->asoc.stream_scheduling_module = m->sctp_ep.sctp_default_ss_module;
-	stcb->asoc.ss_functions = sctp_ss_functions[m->sctp_ep.sctp_default_ss_module];
+	stcb->asoc.stream_scheduling_module = inp->sctp_ep.sctp_default_ss_module;
+	stcb->asoc.ss_functions = sctp_ss_functions[inp->sctp_ep.sctp_default_ss_module];
 
 	/*
 	 * Now the stream parameters, here we allocate space for all streams
@@ -1039,7 +1101,7 @@
 	 * that we request by default.
 	 */
 	asoc->strm_realoutsize = asoc->streamoutcnt = asoc->pre_open_streams =
-	    m->sctp_ep.pre_open_stream_count;
+	    o_strms;
 	SCTP_MALLOC(asoc->strmout, struct sctp_stream_out *,
 	    asoc->streamoutcnt * sizeof(struct sctp_stream_out),
 	    SCTP_M_STRMO);
@@ -1057,10 +1119,21 @@
 		 * that were dropped must be notified to the upper layer as
 		 * failed to send.
 		 */
-		asoc->strmout[i].next_sequence_sent = 0x0;
+		asoc->strmout[i].next_sequence_send = 0x0;
 		TAILQ_INIT(&asoc->strmout[i].outqueue);
+		asoc->strmout[i].chunks_on_queues = 0;
+#if defined(SCTP_DETAILED_STR_STATS)
+		for (j = 0; j < SCTP_PR_SCTP_MAX + 1; j++) {
+			asoc->strmout[i].abandoned_sent[j] = 0;
+			asoc->strmout[i].abandoned_unsent[j] = 0;
+		}
+#else
+		asoc->strmout[i].abandoned_sent[0] = 0;
+		asoc->strmout[i].abandoned_unsent[0] = 0;
+#endif
 		asoc->strmout[i].stream_no = i;
 		asoc->strmout[i].last_msg_incomplete = 0;
+		asoc->strmout[i].state = SCTP_STREAM_OPENING;
 		asoc->ss_functions.sctp_ss_init_stream(&asoc->strmout[i], NULL);
 	}
 	asoc->ss_functions.sctp_ss_init(stcb, asoc, 0);
@@ -1093,7 +1166,7 @@
 	TAILQ_INIT(&asoc->sent_queue);
 	TAILQ_INIT(&asoc->reasmqueue);
 	TAILQ_INIT(&asoc->resetHead);
-	asoc->max_inbound_streams = m->sctp_ep.max_open_streams_intome;
+	asoc->max_inbound_streams = inp->sctp_ep.max_open_streams_intome;
 	TAILQ_INIT(&asoc->asconf_queue);
 	/* authentication fields */
 	asoc->authinfo.random = NULL;
@@ -1104,7 +1177,7 @@
 	asoc->authinfo.recv_keyid = 0;
 	LIST_INIT(&asoc->shared_keys);
 	asoc->marked_retrans = 0;
-	asoc->port = m->sctp_ep.port;
+	asoc->port = inp->sctp_ep.port;
 	asoc->timoinit = 0;
 	asoc->timodata = 0;
 	asoc->timosack = 0;
@@ -1114,6 +1187,10 @@
 	asoc->timoshutdownack = 0;
 	(void)SCTP_GETTIME_TIMEVAL(&asoc->start_time);
 	asoc->discontinuity_time = asoc->start_time;
+	for (i = 0; i < SCTP_PR_SCTP_MAX + 1; i++) {
+		asoc->abandoned_unsent[i] = 0;
+		asoc->abandoned_sent[i] = 0;
+	}
 	/*
 	 * sa_ignore MEMLEAK {memory is put in the assoc mapping array and
 	 * freed later when the association is freed.
@@ -1394,7 +1471,9 @@
 	if (asc->cnt == 0) {
 		SCTP_FREE(asc, SCTP_M_ASC_IT);
 	} else {
-		(void)sctp_initiate_iterator(sctp_asconf_iterator_ep,
+		int ret;
+
+		ret = sctp_initiate_iterator(sctp_asconf_iterator_ep,
 		    sctp_asconf_iterator_stcb,
 		    NULL,	/* No ep end for boundall */
 		    SCTP_PCB_FLAGS_BOUNDALL,
@@ -1402,6 +1481,23 @@
 		    SCTP_ASOC_ANY_STATE,
 		    (void *)asc, 0,
 		    sctp_asconf_iterator_end, NULL, 0);
+		if (ret) {
+			SCTP_PRINTF("Failed to initiate iterator for handle_addr_wq\n");
+			/*
+			 * Freeing if we are stopping or put back on the
+			 * addr_wq.
+			 */
+			if (SCTP_BASE_VAR(sctp_pcb_initialized) == 0) {
+				sctp_asconf_iterator_end(asc, 0);
+			} else {
+				SCTP_WQ_ADDR_LOCK();
+				LIST_FOREACH(wi, &asc->list_of_work, sctp_nxt_addr) {
+					LIST_INSERT_HEAD(&SCTP_BASE_INFO(addr_wq), wi, sctp_nxt_addr);
+				}
+				SCTP_WQ_ADDR_UNLOCK();
+				SCTP_FREE(asc, SCTP_M_ASC_IT);
+			}
+		}
 	}
 }
 
@@ -1412,12 +1508,14 @@
 	struct sctp_tcb *stcb;
 	struct sctp_nets *net;
 	struct sctp_timer *tmr;
+	struct mbuf *op_err;
 
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	struct socket *so;
 
 #endif
-	int did_output, type;
+	int did_output;
+	int type;
 
 	tmr = (struct sctp_timer *)t;
 	inp = (struct sctp_inpcb *)tmr->ep;
@@ -1435,7 +1533,7 @@
 	if (tmr->self != (void *)tmr) {
 		/*
 		 * SCTP_PRINTF("Stale SCTP timer fired (%p), ignoring...\n",
-		 * tmr);
+		 * (void *)tmr);
 		 */
 		CURVNET_RESTORE();
 		return;
@@ -1456,7 +1554,6 @@
 	}
 	/* if this is an iterator timeout, get the struct and clear inp */
 	tmr->stopped_from = 0xa003;
-	type = tmr->type;
 	if (inp) {
 		SCTP_INP_INCR_REF(inp);
 		if ((inp->sctp_socket == NULL) &&
@@ -1487,8 +1584,9 @@
 			return;
 		}
 	}
+	type = tmr->type;
 	tmr->stopped_from = 0xa005;
-	SCTPDBG(SCTP_DEBUG_TIMER1, "Timer type %d goes off\n", tmr->type);
+	SCTPDBG(SCTP_DEBUG_TIMER1, "Timer type %d goes off\n", type);
 	if (!SCTP_OS_TIMER_ACTIVE(&tmr->timer)) {
 		if (inp) {
 			SCTP_INP_DECR_REF(inp);
@@ -1504,7 +1602,7 @@
 	if (stcb) {
 		SCTP_TCB_LOCK(stcb);
 		atomic_add_int(&stcb->asoc.refcnt, -1);
-		if ((tmr->type != SCTP_TIMER_TYPE_ASOCKILL) &&
+		if ((type != SCTP_TIMER_TYPE_ASOCKILL) &&
 		    ((stcb->asoc.state == 0) ||
 		    (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED))) {
 			SCTP_TCB_UNLOCK(stcb);
@@ -1516,7 +1614,7 @@
 		}
 	}
 	/* record in stopped what t-o occured */
-	tmr->stopped_from = tmr->type;
+	tmr->stopped_from = type;
 
 	/* mark as being serviced now */
 	if (SCTP_OS_TIMER_PENDING(&tmr->timer)) {
@@ -1534,7 +1632,7 @@
 	SCTP_OS_TIMER_DEACTIVATE(&tmr->timer);
 
 	/* call the handler for the appropriate timer type */
-	switch (tmr->type) {
+	switch (type) {
 	case SCTP_TIMER_TYPE_ZERO_COPY:
 		if (inp == NULL) {
 			break;
@@ -1724,7 +1822,9 @@
 			break;
 		}
 		SCTP_STAT_INCR(sctps_timoshutdownguard);
-		sctp_abort_an_association(inp, stcb, NULL, SCTP_SO_NOT_LOCKED);
+		op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+		    "Shutdown guard timer expired");
+		sctp_abort_an_association(inp, stcb, op_err, SCTP_SO_NOT_LOCKED);
 		/* no need to unlock on tcb its gone */
 		goto out_decr;
 
@@ -1777,8 +1877,9 @@
 		SCTP_STAT_INCR(sctps_timoassockill);
 		/* Can we free it yet? */
 		SCTP_INP_DECR_REF(inp);
-		sctp_timer_stop(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL, SCTP_FROM_SCTPUTIL + SCTP_LOC_1);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		sctp_timer_stop(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL,
+		    SCTP_FROM_SCTPUTIL + SCTP_LOC_1);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		so = SCTP_INP_SO(inp);
 		atomic_add_int(&stcb->asoc.refcnt, 1);
 		SCTP_TCB_UNLOCK(stcb);
@@ -1786,8 +1887,9 @@
 		SCTP_TCB_LOCK(stcb);
 		atomic_subtract_int(&stcb->asoc.refcnt, 1);
 #endif
-		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTPUTIL + SCTP_LOC_2);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+		    SCTP_FROM_SCTPUTIL + SCTP_LOC_2);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 		/*
@@ -1806,7 +1908,8 @@
 		 * killer
 		 */
 		SCTP_INP_DECR_REF(inp);
-		sctp_timer_stop(SCTP_TIMER_TYPE_INPKILL, inp, NULL, NULL, SCTP_FROM_SCTPUTIL + SCTP_LOC_3);
+		sctp_timer_stop(SCTP_TIMER_TYPE_INPKILL, inp, NULL, NULL,
+		    SCTP_FROM_SCTPUTIL + SCTP_LOC_3);
 		sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
 		    SCTP_CALLED_FROM_INPKILL_TIMER);
 		inp = NULL;
@@ -1813,11 +1916,11 @@
 		goto out_no_decr;
 	default:
 		SCTPDBG(SCTP_DEBUG_TIMER1, "sctp_timeout_handler:unknown timer %d\n",
-		    tmr->type);
+		    type);
 		break;
 	}
 #ifdef SCTP_AUDITING_ENABLED
-	sctp_audit_log(0xF1, (uint8_t) tmr->type);
+	sctp_audit_log(0xF1, (uint8_t) type);
 	if (inp)
 		sctp_auditing(5, inp, stcb, net);
 #endif
@@ -1840,8 +1943,7 @@
 		SCTP_INP_DECR_REF(inp);
 	}
 out_no_decr:
-	SCTPDBG(SCTP_DEBUG_TIMER1, "Timer now complete (type %d)\n",
-	    type);
+	SCTPDBG(SCTP_DEBUG_TIMER1, "Timer now complete (type = %d)\n", type);
 	CURVNET_RESTORE();
 }
 
@@ -1934,7 +2036,7 @@
 		 * though we use a different timer. We also add the HB timer
 		 * PLUS a random jitter.
 		 */
-		if ((inp == NULL) || (stcb == NULL) || (net == NULL)) {
+		if ((stcb == NULL) || (net == NULL)) {
 			return;
 		} else {
 			uint32_t rndval;
@@ -1989,9 +2091,6 @@
 		 * nothing needed but the endpoint here ususually about 60
 		 * minutes.
 		 */
-		if (inp == NULL) {
-			return;
-		}
 		tmr = &inp->sctp_ep.signature_change;
 		to_ticks = inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_SIGNATURE];
 		break;
@@ -2008,9 +2107,6 @@
 		 * timer since that has stopped and we are in the GONE
 		 * state.
 		 */
-		if (inp == NULL) {
-			return;
-		}
 		tmr = &inp->sctp_ep.signature_change;
 		to_ticks = MSEC_TO_TICKS(SCTP_INP_KILL_TIMEOUT);
 		break;
@@ -2019,12 +2115,9 @@
 		 * Here we use the value found in the EP for PMTU ususually
 		 * about 10 minutes.
 		 */
-		if ((stcb == NULL) || (inp == NULL)) {
+		if ((stcb == NULL) || (net == NULL)) {
 			return;
 		}
-		if (net == NULL) {
-			return;
-		}
 		if (net->dest_state & SCTP_ADDR_NO_PMTUD) {
 			return;
 		}
@@ -2048,10 +2141,14 @@
 		 * Here we use the endpoints shutdown guard timer usually
 		 * about 3 minutes.
 		 */
-		if ((inp == NULL) || (stcb == NULL)) {
+		if (stcb == NULL) {
 			return;
 		}
-		to_ticks = inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_MAXSHUTDOWN];
+		if (inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_MAXSHUTDOWN] == 0) {
+			to_ticks = 5 * MSEC_TO_TICKS(stcb->asoc.maxrto);
+		} else {
+			to_ticks = inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_MAXSHUTDOWN];
+		}
 		tmr = &stcb->asoc.shut_guard_timer;
 		break;
 	case SCTP_TIMER_TYPE_STRRESET:
@@ -2107,13 +2204,13 @@
 		break;
 	default:
 		SCTPDBG(SCTP_DEBUG_TIMER1, "%s: Unknown timer type %d\n",
-		    __FUNCTION__, t_type);
+		    __func__, t_type);
 		return;
 		break;
 	}
 	if ((to_ticks <= 0) || (tmr == NULL)) {
 		SCTPDBG(SCTP_DEBUG_TIMER1, "%s: %d:software error to_ticks:%d tmr:%p not set ??\n",
-		    __FUNCTION__, t_type, to_ticks, tmr);
+		    __func__, t_type, to_ticks, (void *)tmr);
 		return;
 	}
 	if (SCTP_OS_TIMER_PENDING(&tmr->timer)) {
@@ -2269,7 +2366,7 @@
 		break;
 	default:
 		SCTPDBG(SCTP_DEBUG_TIMER1, "%s: Unknown timer type %d\n",
-		    __FUNCTION__, t_type);
+		    __func__, t_type);
 		break;
 	}
 	if (tmr == NULL) {
@@ -2386,10 +2483,10 @@
 	timevalsub(&now, old);
 	/* store the current RTT in us */
 	net->rtt = (uint64_t) 1000000 *(uint64_t) now.tv_sec +
-	         (uint64_t) now.tv_usec;
+	        (uint64_t) now.tv_usec;
 
-	/* computer rtt in ms */
-	rtt = net->rtt / 1000;
+	/* compute rtt in ms */
+	rtt = (int32_t) (net->rtt / 1000);
 	if ((asoc->cc_functions.sctp_rtt_calculated) && (rtt_from_sack == SCTP_RTT_FROM_DATA)) {
 		/*
 		 * Tell the CC module that a new update has just occurred
@@ -2523,19 +2620,14 @@
 }
 
 
-int
+struct mbuf *
 sctp_add_pad_tombuf(struct mbuf *m, int padlen)
 {
-	/*
-	 * add padlen bytes of 0 filled padding to the end of the mbuf. If
-	 * padlen is > 3 this routine will fail.
-	 */
-	uint8_t *dp;
-	int i;
+	struct mbuf *m_last;
+	caddr_t dp;
 
 	if (padlen > 3) {
-		SCTP_LTRACE_ERR_RET_PKT(m, NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOBUFS);
-		return (ENOBUFS);
+		return (NULL);
 	}
 	if (padlen <= M_TRAILINGSPACE(m)) {
 		/*
@@ -2542,39 +2634,30 @@
 		 * The easy way. We hope the majority of the time we hit
 		 * here :)
 		 */
-		dp = (uint8_t *) (mtod(m, caddr_t)+SCTP_BUF_LEN(m));
-		SCTP_BUF_LEN(m) += padlen;
+		m_last = m;
 	} else {
-		/* Hard way we must grow the mbuf */
-		struct mbuf *tmp;
-
-		tmp = sctp_get_mbuf_for_msg(padlen, 0, M_DONTWAIT, 1, MT_DATA);
-		if (tmp == NULL) {
-			/* Out of space GAK! we are in big trouble. */
-			SCTP_LTRACE_ERR_RET_PKT(m, NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOBUFS);
-			return (ENOBUFS);
+		/* Hard way we must grow the mbuf chain */
+		m_last = sctp_get_mbuf_for_msg(padlen, 0, M_NOWAIT, 1, MT_DATA);
+		if (m_last == NULL) {
+			return (NULL);
 		}
-		/* setup and insert in middle */
-		SCTP_BUF_LEN(tmp) = padlen;
-		SCTP_BUF_NEXT(tmp) = NULL;
-		SCTP_BUF_NEXT(m) = tmp;
-		dp = mtod(tmp, uint8_t *);
+		SCTP_BUF_LEN(m_last) = 0;
+		SCTP_BUF_NEXT(m_last) = NULL;
+		SCTP_BUF_NEXT(m) = m_last;
 	}
-	/* zero out the pad */
-	for (i = 0; i < padlen; i++) {
-		*dp = 0;
-		dp++;
-	}
-	return (0);
+	dp = mtod(m_last, caddr_t)+SCTP_BUF_LEN(m_last);
+	SCTP_BUF_LEN(m_last) += padlen;
+	memset(dp, 0, padlen);
+	return (m_last);
 }
 
-int
+struct mbuf *
 sctp_pad_lastmbuf(struct mbuf *m, int padval, struct mbuf *last_mbuf)
 {
 	/* find the last mbuf in chain and pad it */
 	struct mbuf *m_at;
 
-	if (last_mbuf) {
+	if (last_mbuf != NULL) {
 		return (sctp_add_pad_tombuf(last_mbuf, padval));
 	} else {
 		for (m_at = m; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
@@ -2583,8 +2666,7 @@
 			}
 		}
 	}
-	SCTP_LTRACE_ERR_RET_PKT(m, NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, EFAULT);
-	return (EFAULT);
+	return (NULL);
 }
 
 static void
@@ -2601,15 +2683,18 @@
 	size_t notif_len, abort_len;
 	unsigned int i;
 
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	struct socket *so;
 
 #endif
 
+	if (stcb == NULL) {
+		return;
+	}
 	if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVASSOCEVNT)) {
 		notif_len = sizeof(struct sctp_assoc_change);
 		if (abort != NULL) {
-			abort_len = htons(abort->ch.chunk_length);
+			abort_len = ntohs(abort->ch.chunk_length);
 		} else {
 			abort_len = 0;
 		}
@@ -2618,11 +2703,11 @@
 		} else if ((state == SCTP_COMM_LOST) || (state == SCTP_CANT_STR_ASSOC)) {
 			notif_len += abort_len;
 		}
-		m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_DONTWAIT, 1, MT_DATA);
+		m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_NOWAIT, 1, MT_DATA);
 		if (m_notify == NULL) {
 			/* Retry with smaller value. */
 			notif_len = sizeof(struct sctp_assoc_change);
-			m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_DONTWAIT, 1, MT_DATA);
+			m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_NOWAIT, 1, MT_DATA);
 			if (m_notify == NULL) {
 				goto set_error;
 			}
@@ -2629,6 +2714,7 @@
 		}
 		SCTP_BUF_NEXT(m_notify) = NULL;
 		sac = mtod(m_notify, struct sctp_assoc_change *);
+		memset(sac, 0, notif_len);
 		sac->sac_type = SCTP_ASSOC_CHANGE;
 		sac->sac_flags = 0;
 		sac->sac_length = sizeof(struct sctp_assoc_change);
@@ -2641,17 +2727,17 @@
 		if (notif_len > sizeof(struct sctp_assoc_change)) {
 			if ((state == SCTP_COMM_UP) || (state == SCTP_RESTART)) {
 				i = 0;
-				if (stcb->asoc.peer_supports_prsctp) {
+				if (stcb->asoc.prsctp_supported == 1) {
 					sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_PR;
 				}
-				if (stcb->asoc.peer_supports_auth) {
+				if (stcb->asoc.auth_supported == 1) {
 					sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_AUTH;
 				}
-				if (stcb->asoc.peer_supports_asconf) {
+				if (stcb->asoc.asconf_supported == 1) {
 					sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_ASCONF;
 				}
 				sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_MULTIBUF;
-				if (stcb->asoc.peer_supports_strreset) {
+				if (stcb->asoc.reconfig_supported == 1) {
 					sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_RE_CONFIG;
 				}
 				sac->sac_length += i;
@@ -2685,6 +2771,7 @@
 	if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
 	    ((state == SCTP_COMM_LOST) || (state == SCTP_CANT_STR_ASSOC))) {
+		SOCK_LOCK(stcb->sctp_socket);
 		if (from_peer) {
 			if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_WAIT) {
 				SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNREFUSED);
@@ -2694,12 +2781,18 @@
 				stcb->sctp_socket->so_error = ECONNRESET;
 			}
 		} else {
-			SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNABORTED);
-			stcb->sctp_socket->so_error = ECONNABORTED;
+			if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_WAIT) ||
+			    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+				SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ETIMEDOUT);
+				stcb->sctp_socket->so_error = ETIMEDOUT;
+			} else {
+				SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNABORTED);
+				stcb->sctp_socket->so_error = ECONNABORTED;
+			}
 		}
 	}
 	/* Wake ANY sleepers */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	so = SCTP_INP_SO(stcb->sctp_ep);
 	if (!so_locked) {
 		atomic_add_int(&stcb->asoc.refcnt, 1);
@@ -2716,11 +2809,11 @@
 	if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
 	    ((state == SCTP_COMM_LOST) || (state == SCTP_CANT_STR_ASSOC))) {
-		socantrcvmore(stcb->sctp_socket);
+		socantrcvmore_locked(stcb->sctp_socket);
 	}
 	sorwakeup(stcb->sctp_socket);
 	sowwakeup(stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	if (!so_locked) {
 		SCTP_SOCKET_UNLOCK(so, 1);
 	}
@@ -2729,7 +2822,11 @@
 
 static void
 sctp_notify_peer_addr_change(struct sctp_tcb *stcb, uint32_t state,
-    struct sockaddr *sa, uint32_t error)
+    struct sockaddr *sa, uint32_t error, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+    SCTP_UNUSED
+#endif
+)
 {
 	struct mbuf *m_notify;
 	struct sctp_paddr_change *spc;
@@ -2740,11 +2837,12 @@
 		/* event not enabled */
 		return;
 	}
-	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_paddr_change), 0, M_DONTWAIT, 1, MT_DATA);
+	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_paddr_change), 0, M_NOWAIT, 1, MT_DATA);
 	if (m_notify == NULL)
 		return;
 	SCTP_BUF_LEN(m_notify) = 0;
 	spc = mtod(m_notify, struct sctp_paddr_change *);
+	memset(spc, 0, sizeof(struct sctp_paddr_change));
 	spc->spc_type = SCTP_PEER_ADDR_CHANGE;
 	spc->spc_flags = 0;
 	spc->spc_length = sizeof(struct sctp_paddr_change);
@@ -2751,7 +2849,16 @@
 	switch (sa->sa_family) {
 #ifdef INET
 	case AF_INET:
+#ifdef INET6
+		if (sctp_is_feature_on(stcb->sctp_ep, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
+			in6_sin_2_v4mapsin6((struct sockaddr_in *)sa,
+			    (struct sockaddr_in6 *)&spc->spc_aaddr);
+		} else {
+			memcpy(&spc->spc_aaddr, sa, sizeof(struct sockaddr_in));
+		}
+#else
 		memcpy(&spc->spc_aaddr, sa, sizeof(struct sockaddr_in));
+#endif
 		break;
 #endif
 #ifdef INET6
@@ -2802,7 +2909,7 @@
 	    control,
 	    &stcb->sctp_socket->so_rcv, 1,
 	    SCTP_READ_LOCK_NOT_HELD,
-	    SCTP_SO_NOT_LOCKED);
+	    so_locked);
 }
 
 
@@ -2831,15 +2938,14 @@
 	} else {
 		length = sizeof(struct sctp_send_failed);
 	}
-	m_notify = sctp_get_mbuf_for_msg(length, 0, M_DONTWAIT, 1, MT_DATA);
+	m_notify = sctp_get_mbuf_for_msg(length, 0, M_NOWAIT, 1, MT_DATA);
 	if (m_notify == NULL)
 		/* no space left */
 		return;
-	length += chk->send_size;
-	length -= sizeof(struct sctp_data_chunk);
 	SCTP_BUF_LEN(m_notify) = 0;
 	if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVNSENDFAILEVNT)) {
 		ssfe = mtod(m_notify, struct sctp_send_failed_event *);
+		memset(ssfe, 0, length);
 		ssfe->ssfe_type = SCTP_SEND_FAILED_EVENT;
 		if (sent) {
 			ssfe->ssfe_flags = SCTP_DATA_SENT;
@@ -2846,10 +2952,11 @@
 		} else {
 			ssfe->ssfe_flags = SCTP_DATA_UNSENT;
 		}
+		length += chk->send_size;
+		length -= sizeof(struct sctp_data_chunk);
 		ssfe->ssfe_length = length;
 		ssfe->ssfe_error = error;
 		/* not exactly what the user sent in, but should be close :) */
-		bzero(&ssfe->ssfe_info, sizeof(ssfe->ssfe_info));
 		ssfe->ssfe_info.snd_sid = chk->rec.data.stream_number;
 		ssfe->ssfe_info.snd_flags = chk->rec.data.rcv_flags;
 		ssfe->ssfe_info.snd_ppid = chk->rec.data.payloadtype;
@@ -2859,6 +2966,7 @@
 		SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_send_failed_event);
 	} else {
 		ssf = mtod(m_notify, struct sctp_send_failed *);
+		memset(ssf, 0, length);
 		ssf->ssf_type = SCTP_SEND_FAILED;
 		if (sent) {
 			ssf->ssf_flags = SCTP_DATA_SENT;
@@ -2865,6 +2973,8 @@
 		} else {
 			ssf->ssf_flags = SCTP_DATA_UNSENT;
 		}
+		length += chk->send_size;
+		length -= sizeof(struct sctp_data_chunk);
 		ssf->ssf_length = length;
 		ssf->ssf_error = error;
 		/* not exactly what the user sent in, but should be close :) */
@@ -2943,21 +3053,21 @@
 	} else {
 		length = sizeof(struct sctp_send_failed);
 	}
-	m_notify = sctp_get_mbuf_for_msg(length, 0, M_DONTWAIT, 1, MT_DATA);
+	m_notify = sctp_get_mbuf_for_msg(length, 0, M_NOWAIT, 1, MT_DATA);
 	if (m_notify == NULL) {
 		/* no space left */
 		return;
 	}
-	length += sp->length;
 	SCTP_BUF_LEN(m_notify) = 0;
 	if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVNSENDFAILEVNT)) {
 		ssfe = mtod(m_notify, struct sctp_send_failed_event *);
+		memset(ssfe, 0, length);
 		ssfe->ssfe_type = SCTP_SEND_FAILED_EVENT;
 		ssfe->ssfe_flags = SCTP_DATA_UNSENT;
+		length += sp->length;
 		ssfe->ssfe_length = length;
 		ssfe->ssfe_error = error;
 		/* not exactly what the user sent in, but should be close :) */
-		bzero(&ssfe->ssfe_info, sizeof(ssfe->ssfe_info));
 		ssfe->ssfe_info.snd_sid = sp->stream;
 		if (sp->some_taken) {
 			ssfe->ssfe_info.snd_flags = SCTP_DATA_LAST_FRAG;
@@ -2971,14 +3081,15 @@
 		SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_send_failed_event);
 	} else {
 		ssf = mtod(m_notify, struct sctp_send_failed *);
+		memset(ssf, 0, length);
 		ssf->ssf_type = SCTP_SEND_FAILED;
 		ssf->ssf_flags = SCTP_DATA_UNSENT;
+		length += sp->length;
 		ssf->ssf_length = length;
 		ssf->ssf_error = error;
 		/* not exactly what the user sent in, but should be close :) */
-		bzero(&ssf->ssf_info, sizeof(ssf->ssf_info));
 		ssf->ssf_info.sinfo_stream = sp->stream;
-		ssf->ssf_info.sinfo_ssn = sp->strseq;
+		ssf->ssf_info.sinfo_ssn = 0;
 		if (sp->some_taken) {
 			ssf->ssf_info.sinfo_flags = SCTP_DATA_LAST_FRAG;
 		} else {
@@ -3032,12 +3143,13 @@
 		/* event not enabled */
 		return;
 	}
-	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_adaption_event), 0, M_DONTWAIT, 1, MT_DATA);
+	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_adaption_event), 0, M_NOWAIT, 1, MT_DATA);
 	if (m_notify == NULL)
 		/* no space left */
 		return;
 	SCTP_BUF_LEN(m_notify) = 0;
 	sai = mtod(m_notify, struct sctp_adaptation_event *);
+	memset(sai, 0, sizeof(struct sctp_adaptation_event));
 	sai->sai_type = SCTP_ADAPTATION_INDICATION;
 	sai->sai_flags = 0;
 	sai->sai_length = sizeof(struct sctp_adaptation_event);
@@ -3087,12 +3199,13 @@
 	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_CANT_READ) {
 		return;
 	}
-	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_pdapi_event), 0, M_DONTWAIT, 1, MT_DATA);
+	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_pdapi_event), 0, M_NOWAIT, 1, MT_DATA);
 	if (m_notify == NULL)
 		/* no space left */
 		return;
 	SCTP_BUF_LEN(m_notify) = 0;
 	pdapi = mtod(m_notify, struct sctp_pdapi_event *);
+	memset(pdapi, 0, sizeof(struct sctp_pdapi_event));
 	pdapi->pdapi_type = SCTP_PARTIAL_DELIVERY_EVENT;
 	pdapi->pdapi_flags = 0;
 	pdapi->pdapi_length = sizeof(struct sctp_pdapi_event);
@@ -3135,7 +3248,7 @@
 	}
 	if (stcb->sctp_ep && stcb->sctp_socket) {
 		/* This should always be the case */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		struct socket *so;
 
 		so = SCTP_INP_SO(stcb->sctp_ep);
@@ -3152,7 +3265,7 @@
 		}
 #endif
 		sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		if (!so_locked) {
 			SCTP_SOCKET_UNLOCK(so, 1);
 		}
@@ -3174,7 +3287,7 @@
 	if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
 		/* mark socket closed for read/write and wakeup! */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		struct socket *so;
 
 		so = SCTP_INP_SO(stcb->sctp_ep);
@@ -3189,7 +3302,7 @@
 		}
 #endif
 		socantsendmore(stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 	}
@@ -3197,11 +3310,12 @@
 		/* event not enabled */
 		return;
 	}
-	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_event), 0, M_DONTWAIT, 1, MT_DATA);
+	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_event), 0, M_NOWAIT, 1, MT_DATA);
 	if (m_notify == NULL)
 		/* no space left */
 		return;
 	sse = mtod(m_notify, struct sctp_shutdown_event *);
+	memset(sse, 0, sizeof(struct sctp_shutdown_event));
 	sse->sse_type = SCTP_SHUTDOWN_EVENT;
 	sse->sse_flags = 0;
 	sse->sse_length = sizeof(struct sctp_shutdown_event);
@@ -3245,7 +3359,7 @@
 		/* event not enabled */
 		return;
 	}
-	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_sender_dry_event), 0, M_DONTWAIT, 1, MT_DATA);
+	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_sender_dry_event), 0, M_NOWAIT, 1, MT_DATA);
 	if (m_notify == NULL) {
 		/* no space left */
 		return;
@@ -3252,6 +3366,7 @@
 	}
 	SCTP_BUF_LEN(m_notify) = 0;
 	event = mtod(m_notify, struct sctp_sender_dry_event *);
+	memset(event, 0, sizeof(struct sctp_sender_dry_event));
 	event->sender_dry_type = SCTP_SENDER_DRY_EVENT;
 	event->sender_dry_flags = 0;
 	event->sender_dry_length = sizeof(struct sctp_sender_dry_event);
@@ -3284,7 +3399,6 @@
 	struct mbuf *m_notify;
 	struct sctp_queued_to_read *control;
 	struct sctp_stream_change_event *stradd;
-	int len;
 
 	if ((stcb == NULL) ||
 	    (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_STREAM_CHANGEEVNT))) {
@@ -3297,25 +3411,20 @@
 		return;
 	}
 	stcb->asoc.peer_req_out = 0;
-	m_notify = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_stream_change_event), 0, M_NOWAIT, 1, MT_DATA);
 	if (m_notify == NULL)
 		/* no space left */
 		return;
 	SCTP_BUF_LEN(m_notify) = 0;
-	len = sizeof(struct sctp_stream_change_event);
-	if (len > M_TRAILINGSPACE(m_notify)) {
-		/* never enough room */
-		sctp_m_freem(m_notify);
-		return;
-	}
 	stradd = mtod(m_notify, struct sctp_stream_change_event *);
+	memset(stradd, 0, sizeof(struct sctp_stream_change_event));
 	stradd->strchange_type = SCTP_STREAM_CHANGE_EVENT;
 	stradd->strchange_flags = flag;
-	stradd->strchange_length = len;
+	stradd->strchange_length = sizeof(struct sctp_stream_change_event);
 	stradd->strchange_assoc_id = sctp_get_associd(stcb);
 	stradd->strchange_instrms = numberin;
 	stradd->strchange_outstrms = numberout;
-	SCTP_BUF_LEN(m_notify) = len;
+	SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_stream_change_event);
 	SCTP_BUF_NEXT(m_notify) = NULL;
 	if (sctp_sbspace(&stcb->asoc, &stcb->sctp_socket->so_rcv) < SCTP_BUF_LEN(m_notify)) {
 		/* no space */
@@ -3346,7 +3455,6 @@
 	struct mbuf *m_notify;
 	struct sctp_queued_to_read *control;
 	struct sctp_assoc_reset_event *strasoc;
-	int len;
 
 	if ((stcb == NULL) ||
 	    (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_ASSOC_RESETEVNT))) {
@@ -3353,25 +3461,20 @@
 		/* event not enabled */
 		return;
 	}
-	m_notify = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+	m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_assoc_reset_event), 0, M_NOWAIT, 1, MT_DATA);
 	if (m_notify == NULL)
 		/* no space left */
 		return;
 	SCTP_BUF_LEN(m_notify) = 0;
-	len = sizeof(struct sctp_assoc_reset_event);
-	if (len > M_TRAILINGSPACE(m_notify)) {
-		/* never enough room */
-		sctp_m_freem(m_notify);
-		return;
-	}
 	strasoc = mtod(m_notify, struct sctp_assoc_reset_event *);
+	memset(strasoc, 0, sizeof(struct sctp_assoc_reset_event));
 	strasoc->assocreset_type = SCTP_ASSOC_RESET_EVENT;
 	strasoc->assocreset_flags = flag;
-	strasoc->assocreset_length = len;
+	strasoc->assocreset_length = sizeof(struct sctp_assoc_reset_event);
 	strasoc->assocreset_assoc_id = sctp_get_associd(stcb);
 	strasoc->assocreset_local_tsn = sending_tsn;
 	strasoc->assocreset_remote_tsn = recv_tsn;
-	SCTP_BUF_LEN(m_notify) = len;
+	SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_assoc_reset_event);
 	SCTP_BUF_NEXT(m_notify) = NULL;
 	if (sctp_sbspace(&stcb->asoc, &stcb->sctp_socket->so_rcv) < SCTP_BUF_LEN(m_notify)) {
 		/* no space */
@@ -3412,7 +3515,7 @@
 		/* event not enabled */
 		return;
 	}
-	m_notify = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+	m_notify = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
 	if (m_notify == NULL)
 		/* no space left */
 		return;
@@ -3424,6 +3527,7 @@
 		return;
 	}
 	strreset = mtod(m_notify, struct sctp_stream_reset_event *);
+	memset(strreset, 0, len);
 	strreset->strreset_type = SCTP_STREAM_RESET_EVENT;
 	strreset->strreset_flags = flag;
 	strreset->strreset_length = len;
@@ -3474,16 +3578,16 @@
 		return;
 	}
 	if (chunk != NULL) {
-		chunk_len = htons(chunk->ch.chunk_length);
+		chunk_len = ntohs(chunk->ch.chunk_length);
 	} else {
 		chunk_len = 0;
 	}
 	notif_len = sizeof(struct sctp_remote_error) + chunk_len;
-	m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_DONTWAIT, 1, MT_DATA);
+	m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_NOWAIT, 1, MT_DATA);
 	if (m_notify == NULL) {
 		/* Retry with smaller value. */
 		notif_len = sizeof(struct sctp_remote_error);
-		m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_DONTWAIT, 1, MT_DATA);
+		m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_NOWAIT, 1, MT_DATA);
 		if (m_notify == NULL) {
 			return;
 		}
@@ -3490,6 +3594,7 @@
 	}
 	SCTP_BUF_NEXT(m_notify) = NULL;
 	sre = mtod(m_notify, struct sctp_remote_error *);
+	memset(sre, 0, notif_len);
 	sre->sre_type = SCTP_REMOTE_ERROR;
 	sre->sre_flags = 0;
 	sre->sre_length = sizeof(struct sctp_remote_error);
@@ -3536,8 +3641,8 @@
 	if (stcb->sctp_socket->so_rcv.sb_state & SBS_CANTRCVMORE) {
 		return;
 	}
-	if (stcb && ((stcb->asoc.state & SCTP_STATE_COOKIE_WAIT) ||
-	    (stcb->asoc.state & SCTP_STATE_COOKIE_ECHOED))) {
+	if ((stcb->asoc.state & SCTP_STATE_COOKIE_WAIT) ||
+	    (stcb->asoc.state & SCTP_STATE_COOKIE_ECHOED)) {
 		if ((notification == SCTP_NOTIFY_INTERFACE_DOWN) ||
 		    (notification == SCTP_NOTIFY_INTERFACE_UP) ||
 		    (notification == SCTP_NOTIFY_INTERFACE_CONFIRMED)) {
@@ -3554,7 +3659,7 @@
 		if (stcb->asoc.adaptation_needed && (stcb->asoc.adaptation_sent == 0)) {
 			sctp_notify_adaptation_layer(stcb);
 		}
-		if (stcb->asoc.peer_supports_auth == 0) {
+		if (stcb->asoc.auth_supported == 0) {
 			sctp_ulp_notify(SCTP_NOTIFY_NO_PEER_AUTH, stcb, 0,
 			    NULL, so_locked);
 		}
@@ -3568,7 +3673,7 @@
 
 			net = (struct sctp_nets *)data;
 			sctp_notify_peer_addr_change(stcb, SCTP_ADDR_UNREACHABLE,
-			    (struct sockaddr *)&net->ro._l_addr, error);
+			    (struct sockaddr *)&net->ro._l_addr, error, so_locked);
 			break;
 		}
 	case SCTP_NOTIFY_INTERFACE_UP:
@@ -3577,7 +3682,7 @@
 
 			net = (struct sctp_nets *)data;
 			sctp_notify_peer_addr_change(stcb, SCTP_ADDR_AVAILABLE,
-			    (struct sockaddr *)&net->ro._l_addr, error);
+			    (struct sockaddr *)&net->ro._l_addr, error, so_locked);
 			break;
 		}
 	case SCTP_NOTIFY_INTERFACE_CONFIRMED:
@@ -3586,7 +3691,7 @@
 
 			net = (struct sctp_nets *)data;
 			sctp_notify_peer_addr_change(stcb, SCTP_ADDR_CONFIRMED,
-			    (struct sockaddr *)&net->ro._l_addr, error);
+			    (struct sockaddr *)&net->ro._l_addr, error, so_locked);
 			break;
 		}
 	case SCTP_NOTIFY_SPECIAL_SP_FAIL:
@@ -3611,8 +3716,8 @@
 			break;
 		}
 	case SCTP_NOTIFY_ASSOC_LOC_ABORTED:
-		if ((stcb) && (((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_WAIT) ||
-		    ((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_ECHOED))) {
+		if (((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_WAIT) ||
+		    ((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_ECHOED)) {
 			sctp_notify_assoc_change(SCTP_CANT_STR_ASSOC, stcb, error, data, 0, so_locked);
 		} else {
 			sctp_notify_assoc_change(SCTP_COMM_LOST, stcb, error, data, 0, so_locked);
@@ -3619,8 +3724,8 @@
 		}
 		break;
 	case SCTP_NOTIFY_ASSOC_REM_ABORTED:
-		if ((stcb) && (((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_WAIT) ||
-		    ((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_ECHOED))) {
+		if (((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_WAIT) ||
+		    ((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_ECHOED)) {
 			sctp_notify_assoc_change(SCTP_CANT_STR_ASSOC, stcb, error, data, 1, so_locked);
 		} else {
 			sctp_notify_assoc_change(SCTP_COMM_LOST, stcb, error, data, 1, so_locked);
@@ -3628,7 +3733,7 @@
 		break;
 	case SCTP_NOTIFY_ASSOC_RESTART:
 		sctp_notify_assoc_change(SCTP_RESTART, stcb, error, NULL, 0, so_locked);
-		if (stcb->asoc.peer_supports_auth == 0) {
+		if (stcb->asoc.auth_supported == 0) {
 			sctp_ulp_notify(SCTP_NOTIFY_NO_PEER_AUTH, stcb, 0,
 			    NULL, so_locked);
 		}
@@ -3657,15 +3762,15 @@
 		break;
 	case SCTP_NOTIFY_ASCONF_ADD_IP:
 		sctp_notify_peer_addr_change(stcb, SCTP_ADDR_ADDED, data,
-		    error);
+		    error, so_locked);
 		break;
 	case SCTP_NOTIFY_ASCONF_DELETE_IP:
 		sctp_notify_peer_addr_change(stcb, SCTP_ADDR_REMOVED, data,
-		    error);
+		    error, so_locked);
 		break;
 	case SCTP_NOTIFY_ASCONF_SET_PRIMARY:
 		sctp_notify_peer_addr_change(stcb, SCTP_ADDR_MADE_PRIM, data,
-		    error);
+		    error, so_locked);
 		break;
 	case SCTP_NOTIFY_PEER_SHUTDOWN:
 		sctp_notify_shutdown_event(stcb);
@@ -3693,7 +3798,7 @@
 		break;
 	default:
 		SCTPDBG(SCTP_DEBUG_UTIL1, "%s: unknown notification %xh (%u)\n",
-		    __FUNCTION__, notification, notification);
+		    __func__, notification, notification);
 		break;
 	}			/* end switch */
 }
@@ -3732,6 +3837,15 @@
 	TAILQ_FOREACH_SAFE(chk, &asoc->sent_queue, sctp_next, nchk) {
 		TAILQ_REMOVE(&asoc->sent_queue, chk, sctp_next);
 		asoc->sent_queue_cnt--;
+		if (chk->sent != SCTP_DATAGRAM_NR_ACKED) {
+			if (asoc->strmout[chk->rec.data.stream_number].chunks_on_queues > 0) {
+				asoc->strmout[chk->rec.data.stream_number].chunks_on_queues--;
+#ifdef INVARIANTS
+			} else {
+				panic("No chunks on the queues for sid %u.", chk->rec.data.stream_number);
+#endif
+			}
+		}
 		if (chk->data != NULL) {
 			sctp_free_bufspace(stcb, asoc, chk, 1);
 			sctp_ulp_notify(SCTP_NOTIFY_SENT_DG_FAIL, stcb,
@@ -3748,6 +3862,13 @@
 	TAILQ_FOREACH_SAFE(chk, &asoc->send_queue, sctp_next, nchk) {
 		TAILQ_REMOVE(&asoc->send_queue, chk, sctp_next);
 		asoc->send_queue_cnt--;
+		if (asoc->strmout[chk->rec.data.stream_number].chunks_on_queues > 0) {
+			asoc->strmout[chk->rec.data.stream_number].chunks_on_queues--;
+#ifdef INVARIANTS
+		} else {
+			panic("No chunks on the queues for sid %u.", chk->rec.data.stream_number);
+#endif
+		}
 		if (chk->data != NULL) {
 			sctp_free_bufspace(stcb, asoc, chk, 1);
 			sctp_ulp_notify(SCTP_NOTIFY_UNSENT_DG_FAIL, stcb,
@@ -3826,14 +3947,15 @@
 
 void
 sctp_abort_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
-    struct mbuf *m, int iphlen, struct sctphdr *sh,
-    struct mbuf *op_err,
-    uint8_t use_mflowid, uint32_t mflowid,
+    struct mbuf *m, int iphlen,
+    struct sockaddr *src, struct sockaddr *dst,
+    struct sctphdr *sh, struct mbuf *op_err,
+    uint8_t mflowtype, uint32_t mflowid,
     uint32_t vrf_id, uint16_t port)
 {
 	uint32_t vtag;
 
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	struct socket *so;
 
 #endif
@@ -3847,12 +3969,12 @@
 		vrf_id = stcb->asoc.vrf_id;
 		stcb->asoc.state |= SCTP_STATE_WAS_ABORTED;
 	}
-	sctp_send_abort(m, iphlen, sh, vtag, op_err,
-	    use_mflowid, mflowid,
+	sctp_send_abort(m, iphlen, src, dst, sh, vtag, op_err,
+	    mflowtype, mflowid, inp->fibnum,
 	    vrf_id, port);
 	if (stcb != NULL) {
 		/* Ok, now lets free it */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		so = SCTP_INP_SO(inp);
 		atomic_add_int(&stcb->asoc.refcnt, 1);
 		SCTP_TCB_UNLOCK(stcb);
@@ -3865,8 +3987,9 @@
 		    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
 			SCTP_STAT_DECR_GAUGE32(sctps_currestab);
 		}
-		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTPUTIL + SCTP_LOC_4);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+		    SCTP_FROM_SCTPUTIL + SCTP_LOC_4);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 	}
@@ -3945,18 +4068,18 @@
 #endif
 )
 {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	struct socket *so;
 
 #endif
 
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	so = SCTP_INP_SO(inp);
 #endif
 	if (stcb == NULL) {
 		/* Got to have a TCB */
 		if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
-			if (LIST_FIRST(&inp->sctp_asoc_list) == NULL) {
+			if (LIST_EMPTY(&inp->sctp_asoc_list)) {
 				sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
 				    SCTP_CALLED_DIRECTLY_NOCMPSET);
 			}
@@ -3980,7 +4103,7 @@
 #ifdef SCTP_ASOCLOG_OF_TSNS
 	sctp_print_out_track_log(stcb);
 #endif
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	if (!so_locked) {
 		atomic_add_int(&stcb->asoc.refcnt, 1);
 		SCTP_TCB_UNLOCK(stcb);
@@ -3989,8 +4112,9 @@
 		atomic_subtract_int(&stcb->asoc.refcnt, 1);
 	}
 #endif
-	(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTPUTIL + SCTP_LOC_5);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+	(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+	    SCTP_FROM_SCTPUTIL + SCTP_LOC_5);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	if (!so_locked) {
 		SCTP_SOCKET_UNLOCK(so, 1);
 	}
@@ -3998,9 +4122,11 @@
 }
 
 void
-sctp_handle_ootb(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh,
-    struct sctp_inpcb *inp,
-    uint8_t use_mflowid, uint32_t mflowid,
+sctp_handle_ootb(struct mbuf *m, int iphlen, int offset,
+    struct sockaddr *src, struct sockaddr *dst,
+    struct sctphdr *sh, struct sctp_inpcb *inp,
+    struct mbuf *cause,
+    uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum,
     uint32_t vrf_id, uint16_t port)
 {
 	struct sctp_chunkhdr *ch, chunk_buf;
@@ -4010,7 +4136,7 @@
 	SCTP_STAT_INCR_COUNTER32(sctps_outoftheblue);
 	/* Generate a TO address for future reference */
 	if (inp && (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) {
-		if (LIST_FIRST(&inp->sctp_asoc_list) == NULL) {
+		if (LIST_EMPTY(&inp->sctp_asoc_list)) {
 			sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
 			    SCTP_CALLED_DIRECTLY_NOCMPSET);
 		}
@@ -4028,9 +4154,6 @@
 		case SCTP_INIT:
 			contains_init_chunk = 1;
 			break;
-		case SCTP_COOKIE_ECHO:
-			/* We hit here only if the assoc is being freed */
-			return;
 		case SCTP_PACKET_DROPPED:
 			/* we don't respond to pkt-dropped */
 			return;
@@ -4044,8 +4167,8 @@
 			 */
 			return;
 		case SCTP_SHUTDOWN_ACK:
-			sctp_send_shutdown_complete2(m, sh,
-			    use_mflowid, mflowid,
+			sctp_send_shutdown_complete2(src, dst, sh,
+			    mflowtype, mflowid, fibnum,
 			    vrf_id, port);
 			return;
 		default:
@@ -4058,8 +4181,8 @@
 	if ((SCTP_BASE_SYSCTL(sctp_blackhole) == 0) ||
 	    ((SCTP_BASE_SYSCTL(sctp_blackhole) == 1) &&
 	    (contains_init_chunk == 0))) {
-		sctp_send_abort(m, iphlen, sh, 0, NULL,
-		    use_mflowid, mflowid,
+		sctp_send_abort(m, iphlen, src, dst, sh, 0, cause,
+		    mflowtype, mflowid, fibnum,
 		    vrf_id, port);
 	}
 }
@@ -4214,7 +4337,6 @@
 #ifdef INET6
 	char ip6buf[INET6_ADDRSTRLEN];
 
-	ip6buf[0] = 0;
 #endif
 
 	switch (sa->sa_family) {
@@ -4251,62 +4373,6 @@
 }
 
 void
-sctp_print_address_pkt(struct ip *iph, struct sctphdr *sh)
-{
-	switch (iph->ip_v) {
-#ifdef INET
-	case IPVERSION:
-		{
-			struct sockaddr_in lsa, fsa;
-
-			bzero(&lsa, sizeof(lsa));
-			lsa.sin_len = sizeof(lsa);
-			lsa.sin_family = AF_INET;
-			lsa.sin_addr = iph->ip_src;
-			lsa.sin_port = sh->src_port;
-			bzero(&fsa, sizeof(fsa));
-			fsa.sin_len = sizeof(fsa);
-			fsa.sin_family = AF_INET;
-			fsa.sin_addr = iph->ip_dst;
-			fsa.sin_port = sh->dest_port;
-			SCTP_PRINTF("src: ");
-			sctp_print_address((struct sockaddr *)&lsa);
-			SCTP_PRINTF("dest: ");
-			sctp_print_address((struct sockaddr *)&fsa);
-			break;
-		}
-#endif
-#ifdef INET6
-	case IPV6_VERSION >> 4:
-		{
-			struct ip6_hdr *ip6;
-			struct sockaddr_in6 lsa6, fsa6;
-
-			ip6 = (struct ip6_hdr *)iph;
-			bzero(&lsa6, sizeof(lsa6));
-			lsa6.sin6_len = sizeof(lsa6);
-			lsa6.sin6_family = AF_INET6;
-			lsa6.sin6_addr = ip6->ip6_src;
-			lsa6.sin6_port = sh->src_port;
-			bzero(&fsa6, sizeof(fsa6));
-			fsa6.sin6_len = sizeof(fsa6);
-			fsa6.sin6_family = AF_INET6;
-			fsa6.sin6_addr = ip6->ip6_dst;
-			fsa6.sin6_port = sh->dest_port;
-			SCTP_PRINTF("src: ");
-			sctp_print_address((struct sockaddr *)&lsa6);
-			SCTP_PRINTF("dest: ");
-			sctp_print_address((struct sockaddr *)&fsa6);
-			break;
-		}
-#endif
-	default:
-		/* TSNH */
-		break;
-	}
-}
-
-void
 sctp_pull_off_control_to_new_inp(struct sctp_inpcb *old_inp,
     struct sctp_inpcb *new_inp,
     struct sctp_tcb *stcb,
@@ -4478,7 +4544,7 @@
 		if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE)) {
 			SCTP_ZERO_COPY_EVENT(inp, inp->sctp_socket);
 		} else {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 			struct socket *so;
 
 			so = SCTP_INP_SO(inp);
@@ -4499,7 +4565,7 @@
 			}
 #endif
 			sctp_sorwakeup(inp, inp->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 			if (!so_locked) {
 				SCTP_SOCKET_UNLOCK(so, 1);
 			}
@@ -4632,7 +4698,7 @@
 		if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE)) {
 			SCTP_ZERO_COPY_EVENT(inp, inp->sctp_socket);
 		} else {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 			struct socket *so;
 
 			so = SCTP_INP_SO(inp);
@@ -4651,7 +4717,7 @@
 			}
 #endif
 			sctp_sorwakeup(inp, inp->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 			SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 		}
@@ -4670,19 +4736,43 @@
  */
 
 struct mbuf *
-sctp_generate_invmanparam(int err)
+sctp_generate_cause(uint16_t code, char *info)
 {
-	/* Return a MBUF with a invalid mandatory parameter */
 	struct mbuf *m;
+	struct sctp_gen_error_cause *cause;
+	size_t info_len, len;
 
-	m = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_DONTWAIT, 1, MT_DATA);
-	if (m) {
-		struct sctp_paramhdr *ph;
+	if ((code == 0) || (info == NULL)) {
+		return (NULL);
+	}
+	info_len = strlen(info);
+	len = sizeof(struct sctp_paramhdr) + info_len;
+	m = sctp_get_mbuf_for_msg(len, 0, M_NOWAIT, 1, MT_DATA);
+	if (m != NULL) {
+		SCTP_BUF_LEN(m) = len;
+		cause = mtod(m, struct sctp_gen_error_cause *);
+		cause->code = htons(code);
+		cause->length = htons((uint16_t) len);
+		memcpy(cause->info, info, info_len);
+	}
+	return (m);
+}
 
-		SCTP_BUF_LEN(m) = sizeof(struct sctp_paramhdr);
-		ph = mtod(m, struct sctp_paramhdr *);
-		ph->param_length = htons(sizeof(struct sctp_paramhdr));
-		ph->param_type = htons(err);
+struct mbuf *
+sctp_generate_no_user_data_cause(uint32_t tsn)
+{
+	struct mbuf *m;
+	struct sctp_error_no_user_data *no_user_data_cause;
+	size_t len;
+
+	len = sizeof(struct sctp_error_no_user_data);
+	m = sctp_get_mbuf_for_msg(len, 0, M_NOWAIT, 1, MT_DATA);
+	if (m != NULL) {
+		SCTP_BUF_LEN(m) = len;
+		no_user_data_cause = mtod(m, struct sctp_error_no_user_data *);
+		no_user_data_cause->cause.code = htons(SCTP_CAUSE_NO_USER_DATA);
+		no_user_data_cause->cause.length = htons((uint16_t) len);
+		no_user_data_cause->tsn = tsn;	/* tsn is passed in as NBO */
 	}
 	return (m);
 }
@@ -4741,6 +4831,21 @@
 
 	stream = tp1->rec.data.stream_number;
 	seq = tp1->rec.data.stream_seq;
+	if (sent || !(tp1->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG)) {
+		stcb->asoc.abandoned_sent[0]++;
+		stcb->asoc.abandoned_sent[PR_SCTP_POLICY(tp1->flags)]++;
+		stcb->asoc.strmout[stream].abandoned_sent[0]++;
+#if defined(SCTP_DETAILED_STR_STATS)
+		stcb->asoc.strmout[stream].abandoned_sent[PR_SCTP_POLICY(tp1->flags)]++;
+#endif
+	} else {
+		stcb->asoc.abandoned_unsent[0]++;
+		stcb->asoc.abandoned_unsent[PR_SCTP_POLICY(tp1->flags)]++;
+		stcb->asoc.strmout[stream].abandoned_unsent[0]++;
+#if defined(SCTP_DETAILED_STR_STATS)
+		stcb->asoc.strmout[stream].abandoned_unsent[PR_SCTP_POLICY(tp1->flags)]++;
+#endif
+	}
 	do {
 		ret_sz += tp1->book_size;
 		if (tp1->data != NULL) {
@@ -4834,81 +4939,68 @@
 		 * Still no eom found. That means there is stuff left on the
 		 * stream out queue.. yuck.
 		 */
+		SCTP_TCB_SEND_LOCK(stcb);
 		strq = &stcb->asoc.strmout[stream];
-		SCTP_TCB_SEND_LOCK(stcb);
-		TAILQ_FOREACH(sp, &strq->outqueue, next) {
-			/* FIXME: Shouldn't this be a serial number check? */
-			if (sp->strseq > seq) {
-				break;
-			}
-			/* Check if its our SEQ */
-			if (sp->strseq == seq) {
-				sp->discard_rest = 1;
-				/*
-				 * We may need to put a chunk on the queue
-				 * that holds the TSN that would have been
-				 * sent with the LAST bit.
-				 */
+		sp = TAILQ_FIRST(&strq->outqueue);
+		if (sp != NULL) {
+			sp->discard_rest = 1;
+			/*
+			 * We may need to put a chunk on the queue that
+			 * holds the TSN that would have been sent with the
+			 * LAST bit.
+			 */
+			if (chk == NULL) {
+				/* Yep, we have to */
+				sctp_alloc_a_chunk(stcb, chk);
 				if (chk == NULL) {
-					/* Yep, we have to */
-					sctp_alloc_a_chunk(stcb, chk);
-					if (chk == NULL) {
-						/*
-						 * we are hosed. All we can
-						 * do is nothing.. which
-						 * will cause an abort if
-						 * the peer is paying
-						 * attention.
-						 */
-						goto oh_well;
-					}
-					memset(chk, 0, sizeof(*chk));
-					chk->rec.data.rcv_flags = SCTP_DATA_LAST_FRAG;
-					chk->sent = SCTP_FORWARD_TSN_SKIP;
-					chk->asoc = &stcb->asoc;
-					chk->rec.data.stream_seq = sp->strseq;
-					chk->rec.data.stream_number = sp->stream;
-					chk->rec.data.payloadtype = sp->ppid;
-					chk->rec.data.context = sp->context;
-					chk->flags = sp->act_flags;
-					if (sp->net)
-						chk->whoTo = sp->net;
-					else
-						chk->whoTo = stcb->asoc.primary_destination;
-					atomic_add_int(&chk->whoTo->ref_count, 1);
-					chk->rec.data.TSN_seq = atomic_fetchadd_int(&stcb->asoc.sending_seq, 1);
-					stcb->asoc.pr_sctp_cnt++;
-					chk->pr_sctp_on = 1;
-					TAILQ_INSERT_TAIL(&stcb->asoc.sent_queue, chk, sctp_next);
-					stcb->asoc.sent_queue_cnt++;
-					stcb->asoc.pr_sctp_cnt++;
-				} else {
-					chk->rec.data.rcv_flags |= SCTP_DATA_LAST_FRAG;
-				}
-		oh_well:
-				if (sp->data) {
 					/*
-					 * Pull any data to free up the SB
-					 * and allow sender to "add more"
-					 * while we will throw away :-)
+					 * we are hosed. All we can do is
+					 * nothing.. which will cause an
+					 * abort if the peer is paying
+					 * attention.
 					 */
-					sctp_free_spbufspace(stcb, &stcb->asoc,
-					    sp);
-					ret_sz += sp->length;
-					do_wakeup_routine = 1;
-					sp->some_taken = 1;
-					sctp_m_freem(sp->data);
-					sp->data = NULL;
-					sp->tail_mbuf = NULL;
-					sp->length = 0;
+					goto oh_well;
 				}
-				break;
+				memset(chk, 0, sizeof(*chk));
+				chk->rec.data.rcv_flags = SCTP_DATA_LAST_FRAG;
+				chk->sent = SCTP_FORWARD_TSN_SKIP;
+				chk->asoc = &stcb->asoc;
+				chk->rec.data.stream_seq = strq->next_sequence_send;
+				chk->rec.data.stream_number = sp->stream;
+				chk->rec.data.payloadtype = sp->ppid;
+				chk->rec.data.context = sp->context;
+				chk->flags = sp->act_flags;
+				chk->whoTo = NULL;
+				chk->rec.data.TSN_seq = atomic_fetchadd_int(&stcb->asoc.sending_seq, 1);
+				strq->chunks_on_queues++;
+				TAILQ_INSERT_TAIL(&stcb->asoc.sent_queue, chk, sctp_next);
+				stcb->asoc.sent_queue_cnt++;
+				stcb->asoc.pr_sctp_cnt++;
+			} else {
+				chk->rec.data.rcv_flags |= SCTP_DATA_LAST_FRAG;
 			}
-		}		/* End tailq_foreach */
+			strq->next_sequence_send++;
+	oh_well:
+			if (sp->data) {
+				/*
+				 * Pull any data to free up the SB and allow
+				 * sender to "add more" while we will throw
+				 * away :-)
+				 */
+				sctp_free_spbufspace(stcb, &stcb->asoc, sp);
+				ret_sz += sp->length;
+				do_wakeup_routine = 1;
+				sp->some_taken = 1;
+				sctp_m_freem(sp->data);
+				sp->data = NULL;
+				sp->tail_mbuf = NULL;
+				sp->length = 0;
+			}
+		}
 		SCTP_TCB_SEND_UNLOCK(stcb);
 	}
 	if (do_wakeup_routine) {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		struct socket *so;
 
 		so = SCTP_INP_SO(stcb->sctp_ep);
@@ -4926,7 +5018,7 @@
 		}
 #endif
 		sctp_sowwakeup(stcb->sctp_ep, stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		if (!so_locked) {
 			SCTP_SOCKET_UNLOCK(so, 1);
 		}
@@ -5002,7 +5094,7 @@
 		}
 #endif
 #ifdef INET6
-	case INET6:
+	case AF_INET6:
 		{
 			struct sockaddr_in6 *sin6;
 			uint32_t hash_of_addr;
@@ -5035,7 +5127,6 @@
 
 	vrf = sctp_find_vrf(vrf_id);
 	if (vrf == NULL) {
-stage_right:
 		if (holds_lock == 0)
 			SCTP_IPI_ADDR_RUNLOCK();
 		return (NULL);
@@ -5055,15 +5146,6 @@
 		return (NULL);
 	}
 	LIST_FOREACH(sctp_ifap, hash_head, next_bucket) {
-		if (sctp_ifap == NULL) {
-#ifdef INVARIANTS
-			panic("Huh LIST_FOREACH corrupt");
-			goto stage_right;
-#else
-			SCTP_PRINTF("LIST corrupt of sctp_ifap's?\n");
-			goto stage_right;
-#endif
-		}
 		if (addr->sa_family != sctp_ifap->address.sa.sa_family)
 			continue;
 #ifdef INET
@@ -5162,7 +5244,8 @@
 		sctp_chunk_output(stcb->sctp_ep, stcb,
 		    SCTP_OUTPUT_FROM_USR_RCVD, SCTP_SO_LOCKED);
 		/* make sure no timer is running */
-		sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTPUTIL + SCTP_LOC_6);
+		sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, NULL,
+		    SCTP_FROM_SCTPUTIL + SCTP_LOC_6);
 		SCTP_TCB_UNLOCK(stcb);
 	} else {
 		/* Update how much we have pending */
@@ -5592,20 +5675,20 @@
 			s_extra = (struct sctp_extrcvinfo *)sinfo;
 			if ((nxt) &&
 			    (nxt->length)) {
-				s_extra->sreinfo_next_flags = SCTP_NEXT_MSG_AVAIL;
+				s_extra->serinfo_next_flags = SCTP_NEXT_MSG_AVAIL;
 				if (nxt->sinfo_flags & SCTP_UNORDERED) {
-					s_extra->sreinfo_next_flags |= SCTP_NEXT_MSG_IS_UNORDERED;
+					s_extra->serinfo_next_flags |= SCTP_NEXT_MSG_IS_UNORDERED;
 				}
 				if (nxt->spec_flags & M_NOTIFICATION) {
-					s_extra->sreinfo_next_flags |= SCTP_NEXT_MSG_IS_NOTIFICATION;
+					s_extra->serinfo_next_flags |= SCTP_NEXT_MSG_IS_NOTIFICATION;
 				}
-				s_extra->sreinfo_next_aid = nxt->sinfo_assoc_id;
-				s_extra->sreinfo_next_length = nxt->length;
-				s_extra->sreinfo_next_ppid = nxt->sinfo_ppid;
-				s_extra->sreinfo_next_stream = nxt->sinfo_stream;
+				s_extra->serinfo_next_aid = nxt->sinfo_assoc_id;
+				s_extra->serinfo_next_length = nxt->length;
+				s_extra->serinfo_next_ppid = nxt->sinfo_ppid;
+				s_extra->serinfo_next_stream = nxt->sinfo_stream;
 				if (nxt->tail_mbuf != NULL) {
 					if (nxt->end_added) {
-						s_extra->sreinfo_next_flags |= SCTP_NEXT_MSG_ISCOMPLETE;
+						s_extra->serinfo_next_flags |= SCTP_NEXT_MSG_ISCOMPLETE;
 					}
 				}
 			} else {
@@ -5616,11 +5699,11 @@
 				 * :-D
 				 */
 				nxt = NULL;
-				s_extra->sreinfo_next_flags = SCTP_NO_NEXT_MSG;
-				s_extra->sreinfo_next_aid = 0;
-				s_extra->sreinfo_next_length = 0;
-				s_extra->sreinfo_next_ppid = 0;
-				s_extra->sreinfo_next_stream = 0;
+				s_extra->serinfo_next_flags = SCTP_NO_NEXT_MSG;
+				s_extra->serinfo_next_aid = 0;
+				s_extra->serinfo_next_length = 0;
+				s_extra->serinfo_next_ppid = 0;
+				s_extra->serinfo_next_stream = 0;
 			}
 		}
 		/*
@@ -5657,43 +5740,43 @@
 		entry->flgs = control->sinfo_flags;
 	}
 #endif
-	if (fromlen && from) {
-		cp_len = min((size_t)fromlen, (size_t)control->whoFrom->ro._l_addr.sa.sa_len);
+	if ((fromlen > 0) && (from != NULL)) {
+		union sctp_sockstore store;
+		size_t len;
+
 		switch (control->whoFrom->ro._l_addr.sa.sa_family) {
 #ifdef INET6
 		case AF_INET6:
-			((struct sockaddr_in6 *)from)->sin6_port = control->port_from;
+			len = sizeof(struct sockaddr_in6);
+			store.sin6 = control->whoFrom->ro._l_addr.sin6;
+			store.sin6.sin6_port = control->port_from;
 			break;
 #endif
 #ifdef INET
 		case AF_INET:
-			((struct sockaddr_in *)from)->sin_port = control->port_from;
+#ifdef INET6
+			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
+				len = sizeof(struct sockaddr_in6);
+				in6_sin_2_v4mapsin6(&control->whoFrom->ro._l_addr.sin,
+				    &store.sin6);
+				store.sin6.sin6_port = control->port_from;
+			} else {
+				len = sizeof(struct sockaddr_in);
+				store.sin = control->whoFrom->ro._l_addr.sin;
+				store.sin.sin_port = control->port_from;
+			}
+#else
+			len = sizeof(struct sockaddr_in);
+			store.sin = control->whoFrom->ro._l_addr.sin;
+			store.sin.sin_port = control->port_from;
+#endif
 			break;
 #endif
 		default:
+			len = 0;
 			break;
 		}
-		memcpy(from, &control->whoFrom->ro._l_addr, cp_len);
-
-#if defined(INET) && defined(INET6)
-		if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) &&
-		    (from->sa_family == AF_INET) &&
-		    ((size_t)fromlen >= sizeof(struct sockaddr_in6))) {
-			struct sockaddr_in *sin;
-			struct sockaddr_in6 sin6;
-
-			sin = (struct sockaddr_in *)from;
-			bzero(&sin6, sizeof(sin6));
-			sin6.sin6_family = AF_INET6;
-			sin6.sin6_len = sizeof(struct sockaddr_in6);
-			sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
-			bcopy(&sin->sin_addr,
-			    &sin6.sin6_addr.s6_addr32[3],
-			    sizeof(sin6.sin6_addr.s6_addr32[3]));
-			sin6.sin6_port = sin->sin_port;
-			memcpy(from, &sin6, sizeof(struct sockaddr_in6));
-		}
-#endif
+		memcpy(from, &store, min((size_t)fromlen, len));
 #ifdef INET6
 		{
 			struct sockaddr_in6 lsa6, *from6;
@@ -5916,8 +5999,8 @@
 			goto release;
 		}
 		if ((uio->uio_resid == 0) ||
-		    ((in_eeor_mode) && (copied_so_far >= max(so->so_rcv.sb_lowat, 1)))
-		    ) {
+		    ((in_eeor_mode) &&
+		    (copied_so_far >= (uint32_t) max(so->so_rcv.sb_lowat, 1)))) {
 			goto release;
 		}
 		/*
@@ -6103,7 +6186,7 @@
 		struct sctp_extrcvinfo *s_extra;
 
 		s_extra = (struct sctp_extrcvinfo *)sinfo;
-		s_extra->sreinfo_next_flags = SCTP_NO_NEXT_MSG;
+		s_extra->serinfo_next_flags = SCTP_NO_NEXT_MSG;
 	}
 	if (hold_rlock == 1) {
 		SCTP_INP_READ_UNLOCK(inp);
@@ -6161,9 +6244,7 @@
 sctp_m_free(struct mbuf *m)
 {
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
-		if (SCTP_BUF_IS_EXTENDED(m)) {
-			sctp_log_mb(m, SCTP_MBUF_IFREE);
-		}
+		sctp_log_mb(m, SCTP_MBUF_IFREE);
 	}
 	return (m_free(m));
 }
@@ -6262,9 +6343,12 @@
 		fromlen = 0;
 	}
 
+	if (filling_sinfo) {
+		memset(&sinfo, 0, sizeof(struct sctp_extrcvinfo));
+	}
 	error = sctp_sorecvmsg(so, uio, mp0, from, fromlen, flagsp,
 	    (struct sctp_sndrcvinfo *)&sinfo, filling_sinfo);
-	if ((controlp) && (filling_sinfo)) {
+	if (controlp != NULL) {
 		/* copy back the sinfo in a CMSG format */
 		if (filling_sinfo)
 			*controlp = sctp_build_ctl_nchunk(inp,
@@ -6319,7 +6403,8 @@
 			    (sin->sin_addr.s_addr == INADDR_BROADCAST) ||
 			    IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
 				SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
-				(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_7);
+				(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+				    SCTP_FROM_SCTPUTIL + SCTP_LOC_7);
 				*error = EINVAL;
 				goto out_now;
 			}
@@ -6326,7 +6411,8 @@
 			if (sctp_add_remote_addr(stcb, sa, NULL, SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) {
 				/* assoc gone no un-lock */
 				SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ENOBUFS);
-				(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_7);
+				(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+				    SCTP_FROM_SCTPUTIL + SCTP_LOC_8);
 				*error = ENOBUFS;
 				goto out_now;
 			}
@@ -6340,7 +6426,8 @@
 			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
 			    IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
 				SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
-				(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_8);
+				(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+				    SCTP_FROM_SCTPUTIL + SCTP_LOC_9);
 				*error = EINVAL;
 				goto out_now;
 			}
@@ -6347,7 +6434,8 @@
 			if (sctp_add_remote_addr(stcb, sa, NULL, SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) {
 				/* assoc gone no un-lock */
 				SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ENOBUFS);
-				(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_8);
+				(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+				    SCTP_FROM_SCTPUTIL + SCTP_LOC_10);
 				*error = ENOBUFS;
 				goto out_now;
 			}
@@ -6451,7 +6539,7 @@
 {
 	struct sockaddr *addr_touse;
 
-#ifdef INET6
+#if defined(INET) && defined(INET6)
 	struct sockaddr_in sin;
 
 #endif
@@ -6465,8 +6553,10 @@
 	addr_touse = sa;
 #ifdef INET6
 	if (sa->sa_family == AF_INET6) {
+#ifdef INET
 		struct sockaddr_in6 *sin6;
 
+#endif
 		if (sa->sa_len != sizeof(struct sockaddr_in6)) {
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
 			*error = EINVAL;
@@ -6478,6 +6568,7 @@
 			*error = EINVAL;
 			return;
 		}
+#ifdef INET
 		sin6 = (struct sockaddr_in6 *)addr_touse;
 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 			if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
@@ -6490,6 +6581,7 @@
 			in6_sin6_2_sin(&sin, sin6);
 			addr_touse = (struct sockaddr *)&sin;
 		}
+#endif
 	}
 #endif
 #ifdef INET
@@ -6579,7 +6671,7 @@
 {
 	struct sockaddr *addr_touse;
 
-#ifdef INET6
+#if defined(INET) && defined(INET6)
 	struct sockaddr_in sin;
 
 #endif
@@ -6593,8 +6685,11 @@
 	addr_touse = sa;
 #ifdef INET6
 	if (sa->sa_family == AF_INET6) {
+#ifdef INET
 		struct sockaddr_in6 *sin6;
 
+#endif
+
 		if (sa->sa_len != sizeof(struct sockaddr_in6)) {
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
 			*error = EINVAL;
@@ -6606,6 +6701,7 @@
 			*error = EINVAL;
 			return;
 		}
+#ifdef INET
 		sin6 = (struct sockaddr_in6 *)addr_touse;
 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 			if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
@@ -6618,6 +6714,7 @@
 			in6_sin6_2_sin(&sin, sin6);
 			addr_touse = (struct sockaddr *)&sin;
 		}
+#endif
 	}
 #endif
 #ifdef INET
@@ -6660,8 +6757,16 @@
 int
 sctp_local_addr_count(struct sctp_tcb *stcb)
 {
-	int loopback_scope, ipv4_local_scope, local_scope, site_scope;
-	int ipv4_addr_legal, ipv6_addr_legal;
+	int loopback_scope;
+
+#if defined(INET)
+	int ipv4_local_scope, ipv4_addr_legal;
+
+#endif
+#if defined (INET6)
+	int local_scope, site_scope, ipv6_addr_legal;
+
+#endif
 	struct sctp_vrf *vrf;
 	struct sctp_ifn *sctp_ifn;
 	struct sctp_ifa *sctp_ifa;
@@ -6668,20 +6773,16 @@
 	int count = 0;
 
 	/* Turn on all the appropriate scopes */
-	loopback_scope = stcb->asoc.loopback_scope;
-	ipv4_local_scope = stcb->asoc.ipv4_local_scope;
-	local_scope = stcb->asoc.local_scope;
-	site_scope = stcb->asoc.site_scope;
-	ipv4_addr_legal = ipv6_addr_legal = 0;
-	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
-		ipv6_addr_legal = 1;
-		if (SCTP_IPV6_V6ONLY(stcb->sctp_ep) == 0) {
-			ipv4_addr_legal = 1;
-		}
-	} else {
-		ipv4_addr_legal = 1;
-	}
-
+	loopback_scope = stcb->asoc.scope.loopback_scope;
+#if defined(INET)
+	ipv4_local_scope = stcb->asoc.scope.ipv4_local_scope;
+	ipv4_addr_legal = stcb->asoc.scope.ipv4_addr_legal;
+#endif
+#if defined(INET6)
+	local_scope = stcb->asoc.scope.local_scope;
+	site_scope = stcb->asoc.scope.site_scope;
+	ipv6_addr_legal = stcb->asoc.scope.ipv6_addr_legal;
+#endif
 	SCTP_IPI_ADDR_RLOCK();
 	vrf = sctp_find_vrf(stcb->asoc.vrf_id);
 	if (vrf == NULL) {
@@ -6707,7 +6808,7 @@
 					if (ipv4_addr_legal) {
 						struct sockaddr_in *sin;
 
-						sin = (struct sockaddr_in *)&sctp_ifa->address.sa;
+						sin = &sctp_ifa->address.sin;
 						if (sin->sin_addr.s_addr == 0) {
 							/*
 							 * skip unspecified
@@ -6715,6 +6816,10 @@
 							 */
 							continue;
 						}
+						if (prison_check_ip4(stcb->sctp_ep->ip_inp.inp.inp_cred,
+						    &sin->sin_addr) != 0) {
+							continue;
+						}
 						if ((ipv4_local_scope == 0) &&
 						    (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) {
 							continue;
@@ -6731,10 +6836,14 @@
 					if (ipv6_addr_legal) {
 						struct sockaddr_in6 *sin6;
 
-						sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sa;
+						sin6 = &sctp_ifa->address.sin6;
 						if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 							continue;
 						}
+						if (prison_check_ip6(stcb->sctp_ep->ip_inp.inp.inp_cred,
+						    &sin6->sin6_addr) != 0) {
+							continue;
+						}
 						if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
 							if (local_scope == 0)
 								continue;
@@ -6820,24 +6929,16 @@
 }
 
 #endif
-/* XXX: Remove the #ifdef after tunneling over IPv6 works also on FreeBSD. */
-#ifdef INET
-/* We will need to add support
- * to bind the ports and such here
- * so we can do UDP tunneling. In
- * the mean-time, we return error
- */
-#include <netinet/udp.h>
-#include <netinet/udp_var.h>
-#include <sys/proc.h>
-#ifdef INET6
-#include <netinet6/sctp6_var.h>
-#endif
-
 static void
-sctp_recv_udp_tunneled_packet(struct mbuf *m, int off, struct inpcb *ignored)
+sctp_recv_udp_tunneled_packet(struct mbuf *m, int off, struct inpcb *inp,
+    const struct sockaddr *sa SCTP_UNUSED, void *ctx SCTP_UNUSED)
 {
 	struct ip *iph;
+
+#ifdef INET6
+	struct ip6_hdr *ip6;
+
+#endif
 	struct mbuf *sp, *last;
 	struct udphdr *uhdr;
 	uint16_t port;
@@ -6854,7 +6955,7 @@
 	 * Split out the mbuf chain. Leave the IP header in m, place the
 	 * rest in the sp.
 	 */
-	sp = m_split(m, off, M_DONTWAIT);
+	sp = m_split(m, off, M_NOWAIT);
 	if (sp == NULL) {
 		/* Gak, drop packet, we can't do a split */
 		goto out;
@@ -6877,20 +6978,32 @@
 	for (last = m; last->m_next; last = last->m_next);
 	last->m_next = sp;
 	m->m_pkthdr.len += sp->m_pkthdr.len;
+	/*
+	 * The CSUM_DATA_VALID flags indicates that the HW checked the UDP
+	 * checksum and it was valid. Since CSUM_DATA_VALID ==
+	 * CSUM_SCTP_VALID this would imply that the HW also verified the
+	 * SCTP checksum. Therefore, clear the bit.
+	 */
+	SCTPDBG(SCTP_DEBUG_CRCOFFLOAD,
+	    "sctp_recv_udp_tunneled_packet(): Packet of length %d received on %s with csum_flags 0x%b.\n",
+	    m->m_pkthdr.len,
+	    if_name(m->m_pkthdr.rcvif),
+	    (int)m->m_pkthdr.csum_flags, CSUM_BITS);
+	m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
 	iph = mtod(m, struct ip *);
 	switch (iph->ip_v) {
 #ifdef INET
 	case IPVERSION:
-		iph->ip_len -= sizeof(struct udphdr);
+		iph->ip_len = htons(ntohs(iph->ip_len) - sizeof(struct udphdr));
 		sctp_input_with_port(m, off, port);
 		break;
 #endif
 #ifdef INET6
 	case IPV6_VERSION >> 4:
-		/* Not yet supported. */
-		goto out;
+		ip6 = mtod(m, struct ip6_hdr *);
+		ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) - sizeof(struct udphdr));
+		sctp6_input_with_port(&m, &off, port);
 		break;
-
 #endif
 	default:
 		goto out;
@@ -6904,19 +7017,22 @@
 void
 sctp_over_udp_stop(void)
 {
-	struct socket *sop;
-
 	/*
 	 * This function assumes sysctl caller holds sctp_sysctl_info_lock()
 	 * for writting!
 	 */
-	if (SCTP_BASE_INFO(udp_tun_socket) == NULL) {
-		/* Nothing to do */
-		return;
+#ifdef INET
+	if (SCTP_BASE_INFO(udp4_tun_socket) != NULL) {
+		soclose(SCTP_BASE_INFO(udp4_tun_socket));
+		SCTP_BASE_INFO(udp4_tun_socket) = NULL;
 	}
-	sop = SCTP_BASE_INFO(udp_tun_socket);
-	soclose(sop);
-	SCTP_BASE_INFO(udp_tun_socket) = NULL;
+#endif
+#ifdef INET6
+	if (SCTP_BASE_INFO(udp6_tun_socket) != NULL) {
+		soclose(SCTP_BASE_INFO(udp6_tun_socket));
+		SCTP_BASE_INFO(udp6_tun_socket) = NULL;
+	}
+#endif
 }
 
 int
@@ -6924,53 +7040,83 @@
 {
 	uint16_t port;
 	int ret;
+
+#ifdef INET
 	struct sockaddr_in sin;
-	struct socket *sop = NULL;
-	struct thread *th;
-	struct ucred *cred;
 
+#endif
+#ifdef INET6
+	struct sockaddr_in6 sin6;
+
+#endif
 	/*
 	 * This function assumes sysctl caller holds sctp_sysctl_info_lock()
 	 * for writting!
 	 */
 	port = SCTP_BASE_SYSCTL(sctp_udp_tunneling_port);
-	if (port == 0) {
+	if (ntohs(port) == 0) {
 		/* Must have a port set */
 		return (EINVAL);
 	}
-	if (SCTP_BASE_INFO(udp_tun_socket) != NULL) {
+#ifdef INET
+	if (SCTP_BASE_INFO(udp4_tun_socket) != NULL) {
 		/* Already running -- must stop first */
 		return (EALREADY);
 	}
-	th = curthread;
-	cred = th->td_ucred;
-	if ((ret = socreate(PF_INET, &sop,
-	    SOCK_DGRAM, IPPROTO_UDP, cred, th))) {
+#endif
+#ifdef INET6
+	if (SCTP_BASE_INFO(udp6_tun_socket) != NULL) {
+		/* Already running -- must stop first */
+		return (EALREADY);
+	}
+#endif
+#ifdef INET
+	if ((ret = socreate(PF_INET, &SCTP_BASE_INFO(udp4_tun_socket),
+	    SOCK_DGRAM, IPPROTO_UDP,
+	    curthread->td_ucred, curthread))) {
+		sctp_over_udp_stop();
 		return (ret);
 	}
-	SCTP_BASE_INFO(udp_tun_socket) = sop;
-	/* call the special UDP hook */
-	ret = udp_set_kernel_tunneling(sop, sctp_recv_udp_tunneled_packet);
-	if (ret) {
-		goto exit_stage_left;
+	/* Call the special UDP hook. */
+	if ((ret = udp_set_kernel_tunneling(SCTP_BASE_INFO(udp4_tun_socket),
+	    sctp_recv_udp_tunneled_packet, NULL))) {
+		sctp_over_udp_stop();
+		return (ret);
 	}
-	/* Ok we have a socket, bind it to the port */
-	memset(&sin, 0, sizeof(sin));
-	sin.sin_len = sizeof(sin);
+	/* Ok, we have a socket, bind it to the port. */
+	memset(&sin, 0, sizeof(struct sockaddr_in));
+	sin.sin_len = sizeof(struct sockaddr_in);
 	sin.sin_family = AF_INET;
 	sin.sin_port = htons(port);
-	ret = sobind(sop, (struct sockaddr *)&sin, th);
-	if (ret) {
-		/* Close up we cant get the port */
-exit_stage_left:
+	if ((ret = sobind(SCTP_BASE_INFO(udp4_tun_socket),
+	    (struct sockaddr *)&sin, curthread))) {
 		sctp_over_udp_stop();
 		return (ret);
 	}
-	/*
-	 * Ok we should now get UDP packets directly to our input routine
-	 * sctp_recv_upd_tunneled_packet().
-	 */
+#endif
+#ifdef INET6
+	if ((ret = socreate(PF_INET6, &SCTP_BASE_INFO(udp6_tun_socket),
+	    SOCK_DGRAM, IPPROTO_UDP,
+	    curthread->td_ucred, curthread))) {
+		sctp_over_udp_stop();
+		return (ret);
+	}
+	/* Call the special UDP hook. */
+	if ((ret = udp_set_kernel_tunneling(SCTP_BASE_INFO(udp6_tun_socket),
+	    sctp_recv_udp_tunneled_packet, NULL))) {
+		sctp_over_udp_stop();
+		return (ret);
+	}
+	/* Ok, we have a socket, bind it to the port. */
+	memset(&sin6, 0, sizeof(struct sockaddr_in6));
+	sin6.sin6_len = sizeof(struct sockaddr_in6);
+	sin6.sin6_family = AF_INET6;
+	sin6.sin6_port = htons(port);
+	if ((ret = sobind(SCTP_BASE_INFO(udp6_tun_socket),
+	    (struct sockaddr *)&sin6, curthread))) {
+		sctp_over_udp_stop();
+		return (ret);
+	}
+#endif
 	return (0);
 }
-
-#endif

Modified: trunk/sys/netinet/sctputil.h
===================================================================
--- trunk/sys/netinet/sctputil.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctputil.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctputil.h 238253 2012-07-08 16:14:42Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctputil.h 294215 2016-01-17 12:15:41Z tuexen $");
 
 #ifndef _NETINET_SCTP_UTIL_H_
 #define _NETINET_SCTP_UTIL_H_
@@ -68,6 +68,9 @@
 /*
  * Function prototypes
  */
+int32_t
+sctp_map_assoc_state(int);
+
 uint32_t
 sctp_get_ifa_hash_val(struct sockaddr *addr);
 
@@ -81,7 +84,7 @@
 
 uint32_t sctp_select_a_tag(struct sctp_inpcb *, uint16_t lport, uint16_t rport, int);
 
-int sctp_init_asoc(struct sctp_inpcb *, struct sctp_tcb *, uint32_t, uint32_t);
+int sctp_init_asoc(struct sctp_inpcb *, struct sctp_tcb *, uint32_t, uint32_t, uint16_t);
 
 void sctp_fill_random_store(struct sctp_pcb *);
 
@@ -148,9 +151,11 @@
 sctp_get_next_param(struct mbuf *, int,
     struct sctp_paramhdr *, int);
 
-int sctp_add_pad_tombuf(struct mbuf *, int);
+struct mbuf *
+     sctp_add_pad_tombuf(struct mbuf *, int);
 
-int sctp_pad_lastmbuf(struct mbuf *, int, struct mbuf *);
+struct mbuf *
+     sctp_pad_lastmbuf(struct mbuf *, int, struct mbuf *);
 
 void 
 sctp_ulp_notify(uint32_t, struct sctp_tcb *, uint32_t, void *, int
@@ -187,7 +192,8 @@
 /* We abort responding to an IP packet for some reason */
 void
 sctp_abort_association(struct sctp_inpcb *, struct sctp_tcb *, struct mbuf *,
-    int, struct sctphdr *, struct mbuf *,
+    int, struct sockaddr *, struct sockaddr *,
+    struct sctphdr *, struct mbuf *,
     uint8_t, uint32_t,
     uint32_t, uint16_t);
 
@@ -202,9 +208,11 @@
 );
 
 void 
-sctp_handle_ootb(struct mbuf *, int, int, struct sctphdr *,
-    struct sctp_inpcb *,
-    uint8_t, uint32_t,
+sctp_handle_ootb(struct mbuf *, int, int,
+    struct sockaddr *, struct sockaddr *,
+    struct sctphdr *, struct sctp_inpcb *,
+    struct mbuf *,
+    uint8_t, uint32_t, uint16_t,
     uint32_t, uint16_t);
 
 int 
@@ -242,7 +250,6 @@
 int sctp_cmpaddr(struct sockaddr *, struct sockaddr *);
 
 void sctp_print_address(struct sockaddr *);
-void sctp_print_address_pkt(struct ip *, struct sctphdr *);
 
 int
 sctp_release_pr_sctp_chunk(struct sctp_tcb *, struct sctp_tmit_chunk *,
@@ -252,7 +259,8 @@
 #endif
 );
 
-struct mbuf *sctp_generate_invmanparam(int);
+struct mbuf *sctp_generate_cause(uint16_t, char *);
+struct mbuf *sctp_generate_no_user_data_cause(uint32_t);
 
 void 
 sctp_bindx_add_address(struct socket *so, struct sctp_inpcb *inp,
@@ -274,14 +282,14 @@
 #define sctp_free_bufspace(stcb, asoc, tp1, chk_cnt)  \
 do { \
 	if (tp1->data != NULL) { \
-                atomic_subtract_int(&((asoc)->chunks_on_out_queue), chk_cnt); \
+		atomic_subtract_int(&((asoc)->chunks_on_out_queue), chk_cnt); \
 		if ((asoc)->total_output_queue_size >= tp1->book_size) { \
 			atomic_subtract_int(&((asoc)->total_output_queue_size), tp1->book_size); \
 		} else { \
 			(asoc)->total_output_queue_size = 0; \
 		} \
-   	        if (stcb->sctp_socket && ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || \
-	            (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) { \
+		if (stcb->sctp_socket && ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || \
+		    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) { \
 			if (stcb->sctp_socket->so_snd.sb_cc >= tp1->book_size) { \
 				atomic_subtract_int(&((stcb)->sctp_socket->so_snd.sb_cc), tp1->book_size); \
 			} else { \
@@ -288,7 +296,7 @@
 				stcb->sctp_socket->so_snd.sb_cc = 0; \
 			} \
 		} \
-        } \
+	} \
 } while (0)
 
 #endif
@@ -295,14 +303,14 @@
 
 #define sctp_free_spbufspace(stcb, asoc, sp)  \
 do { \
- 	if (sp->data != NULL) { \
+	if (sp->data != NULL) { \
 		if ((asoc)->total_output_queue_size >= sp->length) { \
 			atomic_subtract_int(&(asoc)->total_output_queue_size, sp->length); \
 		} else { \
 			(asoc)->total_output_queue_size = 0; \
 		} \
-   	        if (stcb->sctp_socket && ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || \
-	            (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) { \
+		if (stcb->sctp_socket && ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || \
+		    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) { \
 			if (stcb->sctp_socket->so_snd.sb_cc >= sp->length) { \
 				atomic_subtract_int(&stcb->sctp_socket->so_snd.sb_cc,sp->length); \
 			} else { \
@@ -309,7 +317,7 @@
 				stcb->sctp_socket->so_snd.sb_cc = 0; \
 			} \
 		} \
-        } \
+	} \
 } while (0)
 
 #define sctp_snd_sb_alloc(stcb, sz)  \
@@ -323,13 +331,9 @@
 } while (0)
 
 /* functions to start/stop udp tunneling */
-/* XXX: Remove the #ifdef after tunneling over IPv6 works also on FreeBSD. */
-#ifdef INET
 void sctp_over_udp_stop(void);
 int sctp_over_udp_start(void);
 
-#endif
-
 int
 sctp_soreceive(struct socket *so, struct sockaddr **psa,
     struct uio *uio,
@@ -349,10 +353,16 @@
 void sctp_log_nagle_event(struct sctp_tcb *stcb, int action);
 
 
+#ifdef SCTP_MBUF_LOGGING
 void
      sctp_log_mb(struct mbuf *m, int from);
 
 void
+     sctp_log_mbc(struct mbuf *m, int from);
+
+#endif
+
+void
 sctp_sblog(struct sockbuf *sb,
     struct sctp_tcb *stcb, int from, int incr);
 
@@ -369,7 +379,6 @@
 void sctp_log_maxburst(struct sctp_tcb *stcb, struct sctp_nets *, int, int, uint8_t);
 void sctp_log_block(uint8_t, struct sctp_association *, int);
 void sctp_log_rwnd(uint8_t, uint32_t, uint32_t, uint32_t);
-void sctp_log_mbcnt(uint8_t, uint32_t, uint32_t, uint32_t, uint32_t);
 void sctp_log_rwnd_set(uint8_t, uint32_t, uint32_t, uint32_t, uint32_t);
 int sctp_fill_stat_log(void *, size_t *);
 void sctp_log_fr(uint32_t, uint32_t, uint32_t, int);

Modified: trunk/sys/netinet/siftr.c
===================================================================
--- trunk/sys/netinet/siftr.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/siftr.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -60,7 +60,7 @@
  ******************************************************/
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/siftr.c 248144 2013-03-11 06:09:08Z lstewart $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/siftr.c 281174 2015-04-06 22:41:13Z hiren $");
 
 #include <sys/param.h>
 #include <sys/alq.h>
@@ -226,6 +226,10 @@
 	u_int			sent_inflight_bytes;
 	/* Number of segments currently in the reassembly queue. */
 	int			t_segqlen;
+	/* Flowid for the connection. */
+	u_int			flowid;	
+	/* Flow type for the connection. */
+	u_int			flowtype;	
 	/* Link to next pkt_node in the list. */
 	STAILQ_ENTRY(pkt_node)	nodes;
 };
@@ -267,6 +271,7 @@
 static unsigned int siftr_generate_hashes = 0;
 /* static unsigned int siftr_binary_log = 0; */
 static char siftr_logfile[PATH_MAX] = "/var/log/siftr.log";
+static char siftr_logfile_shadow[PATH_MAX] = "/var/log/siftr.log";
 static u_long siftr_hashmask;
 STAILQ_HEAD(pkthead, pkt_node) pkt_queue = STAILQ_HEAD_INITIALIZER(pkt_queue);
 LIST_HEAD(listhead, flow_hash_node) *counter_hash;
@@ -298,7 +303,7 @@
     "switch siftr module operations on/off");
 
 SYSCTL_PROC(_net_inet_siftr, OID_AUTO, logfile, CTLTYPE_STRING|CTLFLAG_RW,
-    &siftr_logfile, sizeof(siftr_logfile), &siftr_sysctl_logfile_name_handler,
+    &siftr_logfile_shadow, sizeof(siftr_logfile_shadow), &siftr_sysctl_logfile_name_handler,
     "A", "file to save siftr log messages to");
 
 SYSCTL_UINT(_net_inet_siftr, OID_AUTO, ppl, CTLFLAG_RW,
@@ -442,7 +447,7 @@
 		    MAX_LOG_MSG_LEN,
 		    "%c,0x%08x,%zd.%06ld,%x:%x:%x:%x:%x:%x:%x:%x,%u,%x:%x:%x:"
 		    "%x:%x:%x:%x:%x,%u,%ld,%ld,%ld,%ld,%ld,%u,%u,%u,%u,%u,%u,"
-		    "%u,%d,%u,%u,%u,%u,%u,%u\n",
+		    "%u,%d,%u,%u,%u,%u,%u,%u,%u,%u\n",
 		    direction[pkt_node->direction],
 		    pkt_node->hash,
 		    pkt_node->tval.tv_sec,
@@ -483,7 +488,9 @@
 		    pkt_node->rcv_buf_hiwater,
 		    pkt_node->rcv_buf_cc,
 		    pkt_node->sent_inflight_bytes,
-		    pkt_node->t_segqlen);
+		    pkt_node->t_segqlen,
+		    pkt_node->flowid,
+		    pkt_node->flowtype);
 	} else { /* IPv4 packet */
 		pkt_node->ip_laddr[0] = FIRST_OCTET(pkt_node->ip_laddr[3]);
 		pkt_node->ip_laddr[1] = SECOND_OCTET(pkt_node->ip_laddr[3]);
@@ -499,7 +506,7 @@
 		log_buf->ae_bytesused = snprintf(log_buf->ae_data,
 		    MAX_LOG_MSG_LEN,
 		    "%c,0x%08x,%jd.%06ld,%u.%u.%u.%u,%u,%u.%u.%u.%u,%u,%ld,%ld,"
-		    "%ld,%ld,%ld,%u,%u,%u,%u,%u,%u,%u,%d,%u,%u,%u,%u,%u,%u\n",
+		    "%ld,%ld,%ld,%u,%u,%u,%u,%u,%u,%u,%d,%u,%u,%u,%u,%u,%u,%u,%u\n",
 		    direction[pkt_node->direction],
 		    pkt_node->hash,
 		    (intmax_t)pkt_node->tval.tv_sec,
@@ -532,7 +539,9 @@
 		    pkt_node->rcv_buf_hiwater,
 		    pkt_node->rcv_buf_cc,
 		    pkt_node->sent_inflight_bytes,
-		    pkt_node->t_segqlen);
+		    pkt_node->t_segqlen,
+		    pkt_node->flowid,
+		    pkt_node->flowtype);
 #ifdef SIFTR_IPV6
 	}
 #endif
@@ -785,6 +794,8 @@
 	pn->rcv_buf_cc = inp->inp_socket->so_rcv.sb_cc;
 	pn->sent_inflight_bytes = tp->snd_max - tp->snd_una;
 	pn->t_segqlen = tp->t_segqlen;
+	pn->flowid = inp->inp_flowid;
+	pn->flowtype = inp->inp_flowtype;
 
 	/* We've finished accessing the tcb so release the lock. */
 	if (inp_locally_locked)
@@ -953,7 +964,8 @@
 			 * the mbuf cluster "at" at offset "offset" bytes from
 			 * the beginning of the "at" mbuf's data pointer.
 			 */
-			th->th_sum = in_cksum_skip(*m, ip->ip_len, ip_hl);
+			th->th_sum = in_cksum_skip(*m, ntohs(ip->ip_len),
+			    ip_hl);
 		}
 
 		/*
@@ -1143,38 +1155,38 @@
 	struct alq *new_alq;
 	int error;
 
-	if (req->newptr == NULL)
-		goto skip;
+	error = sysctl_handle_string(oidp, arg1, arg2, req);
 
-	/* If old filename and new filename are different. */
-	if (strncmp(siftr_logfile, (char *)req->newptr, PATH_MAX)) {
+	/* Check for error or same filename */
+	if (error != 0 || req->newptr == NULL ||
+	    strncmp(siftr_logfile, arg1, arg2) == 0)
+		goto done;
 
-		error = alq_open(&new_alq, req->newptr, curthread->td_ucred,
-		    SIFTR_LOG_FILE_MODE, SIFTR_ALQ_BUFLEN, 0);
+	/* Filname changed */
+	error = alq_open(&new_alq, arg1, curthread->td_ucred,
+	    SIFTR_LOG_FILE_MODE, SIFTR_ALQ_BUFLEN, 0);
+	if (error != 0)
+		goto done;
 
-		/* Bail if unable to create new alq. */
-		if (error)
-			return (1);
-
-		/*
-		 * If disabled, siftr_alq == NULL so we simply close
-		 * the alq as we've proved it can be opened.
-		 * If enabled, close the existing alq and switch the old
-		 * for the new.
-		 */
-		if (siftr_alq == NULL)
-			alq_close(new_alq);
-		else {
-			alq_close(siftr_alq);
-			siftr_alq = new_alq;
-		}
+	/*
+	 * If disabled, siftr_alq == NULL so we simply close
+	 * the alq as we've proved it can be opened.
+	 * If enabled, close the existing alq and switch the old
+	 * for the new.
+	 */
+	if (siftr_alq == NULL) {
+		alq_close(new_alq);
+	} else {
+		alq_close(siftr_alq);
+		siftr_alq = new_alq;
 	}
 
-skip:
-	return (sysctl_handle_string(oidp, arg1, arg2, req));
+	/* Update filename upon success */
+	strlcpy(siftr_logfile, arg1, arg2);
+done:
+	return (error);
 }
 
-
 static int
 siftr_manage_ops(uint8_t action)
 {

Modified: trunk/sys/netinet/tcp.h
===================================================================
--- trunk/sys/netinet/tcp.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet/tcp.h 247525 2013-03-01 03:04:57Z jhb $
+ * $FreeBSD: stable/10/sys/netinet/tcp.h 292823 2015-12-28 02:43:12Z pkelsey $
  */
 
 #ifndef _NETINET_TCP_H_
@@ -98,6 +98,10 @@
 #define    TCPOLEN_TSTAMP_APPA		(TCPOLEN_TIMESTAMP+2) /* appendix A */
 #define	TCPOPT_SIGNATURE	19		/* Keyed MD5: RFC 2385 */
 #define	   TCPOLEN_SIGNATURE		18
+#define	TCPOPT_FAST_OPEN	34
+#define	   TCPOLEN_FAST_OPEN_EMPTY	2
+#define	   TCPOLEN_FAST_OPEN_MIN	6
+#define	   TCPOLEN_FAST_OPEN_MAX	18
 
 /* Miscellaneous constants */
 #define	MAX_SACK_BLKS	6	/* Max # SACK blocks stored at receiver side */
@@ -166,6 +170,7 @@
 #define	TCP_KEEPIDLE	256	/* L,N,X start keeplives after this period */
 #define	TCP_KEEPINTVL	512	/* L,N interval between keepalives */
 #define	TCP_KEEPCNT	1024	/* L,N number of keepalives before close */
+#define	TCP_FASTOPEN	1025	/* enable TFO / was created via TFO */
 
 /* Start of reserved space for third-party user-settable options. */
 #define	TCP_VENDOR	SO_VENDOR

Modified: trunk/sys/netinet/tcp_debug.c
===================================================================
--- trunk/sys/netinet/tcp_debug.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_debug.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_debug.c 193895 2009-06-10 10:39:41Z bz $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_debug.c 241913 2012-10-22 21:09:03Z glebius $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
@@ -176,11 +176,10 @@
 #ifdef INET6
 		    isipv6 ? ntohs(((struct ip6_hdr *)ipgen)->ip6_plen) :
 #endif
-		    ((struct ip *)ipgen)->ip_len;
+		    ntohs(((struct ip *)ipgen)->ip_len);
 		if (act == TA_OUTPUT) {
 			seq = ntohl(seq);
 			ack = ntohl(ack);
-			len = ntohs((u_short)len);
 		}
 		if (act == TA_OUTPUT)
 			len -= sizeof (struct tcphdr);

Modified: trunk/sys/netinet/tcp_debug.h
===================================================================
--- trunk/sys/netinet/tcp_debug.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_debug.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp_debug.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet/tcp_debug.h 188578 2009-02-13 15:14:43Z luigi $
+ * $FreeBSD: stable/10/sys/netinet/tcp_debug.h 188578 2009-02-13 15:14:43Z luigi $
  */
 
 #ifndef _NETINET_TCP_DEBUG_H_

Added: trunk/sys/netinet/tcp_fastopen.c
===================================================================
--- trunk/sys/netinet/tcp_fastopen.c	                        (rev 0)
+++ trunk/sys/netinet/tcp_fastopen.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -0,0 +1,443 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2015 Patrick Kelsey
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * This is a server-side implementation of TCP Fast Open (TFO) [RFC7413].
+ *
+ * This implementation is currently considered to be experimental and is not
+ * included in kernel builds by default.  To include this code, add the
+ * following line to your kernel config:
+ *
+ * options TCP_RFC7413
+ *
+ * The generated TFO cookies are the 64-bit output of
+ * SipHash24(<16-byte-key><client-ip>).  Multiple concurrent valid keys are
+ * supported so that time-based rolling cookie invalidation policies can be
+ * implemented in the system.  The default number of concurrent keys is 2.
+ * This can be adjusted in the kernel config as follows:
+ *
+ * options TCP_RFC7413_MAX_KEYS=<num-keys>
+ *
+ *
+ * The following TFO-specific sysctls are defined:
+ *
+ * net.inet.tcp.fastopen.acceptany (RW, default 0)
+ *     When non-zero, all client-supplied TFO cookies will be considered to
+ *     be valid.
+ *
+ * net.inet.tcp.fastopen.autokey (RW, default 120)
+ *     When this and net.inet.tcp.fastopen.enabled are non-zero, a new key
+ *     will be automatically generated after this many seconds.
+ *
+ * net.inet.tcp.fastopen.enabled (RW, default 0)
+ *     When zero, no new TFO connections can be created.  On the transition
+ *     from enabled to disabled, all installed keys are removed.  On the 
+ *     transition from disabled to enabled, if net.inet.tcp.fastopen.autokey
+ *     is non-zero and there are no keys installed, a new key will be 
+ *     generated immediately.  The transition from enabled to disabled does
+ *     not affect any TFO connections in progress; it only prevents new ones
+ *     from being made.
+ *
+ * net.inet.tcp.fastopen.keylen (RO)
+ *     The key length in bytes.
+ *
+ * net.inet.tcp.fastopen.maxkeys (RO)
+ *     The maximum number of keys supported.
+ *
+ * net.inet.tcp.fastopen.numkeys (RO)
+ *     The current number of keys installed.
+ *
+ * net.inet.tcp.fastopen.setkey (WO)
+ *     Install a new key by writing net.inet.tcp.fastopen.keylen bytes to this
+ *     sysctl.
+ *
+ *
+ * In order for TFO connections to be created via a listen socket, that
+ * socket must have the TCP_FASTOPEN socket option set on it.  This option
+ * can be set on the socket either before or after the listen() is invoked.
+ * Clearing this option on a listen socket after it has been set has no
+ * effect on existing TFO connections or TFO connections in progress; it
+ * only prevents new TFO connections from being made.
+ *
+ * For passively-created sockets, the TCP_FASTOPEN socket option can be
+ * queried to determine whether the connection was established using TFO.
+ * Note that connections that are established via a TFO SYN, but that fall
+ * back to using a non-TFO SYN|ACK will have the TCP_FASTOPEN socket option
+ * set.
+ *
+ * Per the RFC, this implementation limits the number of TFO connections
+ * that can be in the SYN_RECEIVED state on a per listen-socket basis.
+ * Whenever this limit is exceeded, requests for new TFO connections are
+ * serviced as non-TFO requests.  Without such a limit, given a valid TFO
+ * cookie, an attacker could keep the listen queue in an overflow condition
+ * using a TFO SYN flood.  This implementation sets the limit at half the
+ * configured listen backlog.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_fastopen.c 292823 2015-12-28 02:43:12Z pkelsey $");
+
+#include "opt_inet.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+
+#include <crypto/siphash/siphash.h>
+
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/tcp_fastopen.h>
+#include <netinet/tcp_var.h>
+
+
+#define	TCP_FASTOPEN_KEY_LEN	SIPHASH_KEY_LENGTH
+
+#if !defined(TCP_RFC7413_MAX_KEYS) || (TCP_RFC7413_MAX_KEYS < 1)
+#define	TCP_FASTOPEN_MAX_KEYS	2
+#else
+#define	TCP_FASTOPEN_MAX_KEYS	TCP_RFC7413_MAX_KEYS
+#endif
+
+struct tcp_fastopen_keylist {
+	unsigned int newest;
+	uint8_t key[TCP_FASTOPEN_MAX_KEYS][TCP_FASTOPEN_KEY_LEN];
+};
+
+struct tcp_fastopen_callout {
+	struct callout c;
+	struct vnet *v;
+};
+
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, fastopen, CTLFLAG_RW, 0, "TCP Fast Open");
+
+static VNET_DEFINE(int, tcp_fastopen_acceptany) = 0;
+#define	V_tcp_fastopen_acceptany	VNET(tcp_fastopen_acceptany)
+SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, acceptany,
+    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_fastopen_acceptany), 0,
+    "Accept any non-empty cookie");
+
+static VNET_DEFINE(unsigned int, tcp_fastopen_autokey) = 120;
+#define	V_tcp_fastopen_autokey	VNET(tcp_fastopen_autokey)
+static int sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, autokey,
+    CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0,
+    &sysctl_net_inet_tcp_fastopen_autokey, "IU",
+    "Number of seconds between auto-generation of a new key; zero disables");
+
+VNET_DEFINE(unsigned int, tcp_fastopen_enabled) = 0;
+static int sysctl_net_inet_tcp_fastopen_enabled(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, enabled,
+    CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0,
+    &sysctl_net_inet_tcp_fastopen_enabled, "IU",
+    "Enable/disable TCP Fast Open processing");
+
+SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, keylen,
+    CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_KEY_LEN,
+    "Key length in bytes");
+
+SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, maxkeys,
+    CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_MAX_KEYS,
+    "Maximum number of keys supported");
+
+static VNET_DEFINE(unsigned int, tcp_fastopen_numkeys) = 0;
+#define	V_tcp_fastopen_numkeys	VNET(tcp_fastopen_numkeys)
+SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, numkeys,
+    CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcp_fastopen_numkeys), 0,
+    "Number of keys installed");
+
+static int sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, setkey,
+    CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_WR, NULL, 0,
+    &sysctl_net_inet_tcp_fastopen_setkey, "",
+    "Install a new key");
+
+static VNET_DEFINE(struct rmlock, tcp_fastopen_keylock);
+#define	V_tcp_fastopen_keylock	VNET(tcp_fastopen_keylock)
+
+#define TCP_FASTOPEN_KEYS_RLOCK(t)	rm_rlock(&V_tcp_fastopen_keylock, (t))
+#define TCP_FASTOPEN_KEYS_RUNLOCK(t)	rm_runlock(&V_tcp_fastopen_keylock, (t))
+#define TCP_FASTOPEN_KEYS_WLOCK()	rm_wlock(&V_tcp_fastopen_keylock)
+#define TCP_FASTOPEN_KEYS_WUNLOCK()	rm_wunlock(&V_tcp_fastopen_keylock)
+
+static VNET_DEFINE(struct tcp_fastopen_keylist, tcp_fastopen_keys);
+#define V_tcp_fastopen_keys	VNET(tcp_fastopen_keys)
+
+static VNET_DEFINE(struct tcp_fastopen_callout, tcp_fastopen_autokey_ctx);
+#define V_tcp_fastopen_autokey_ctx	VNET(tcp_fastopen_autokey_ctx)
+
+static VNET_DEFINE(uma_zone_t, counter_zone);
+#define	V_counter_zone			VNET(counter_zone)
+
+void
+tcp_fastopen_init(void)
+{
+	V_counter_zone = uma_zcreate("tfo", sizeof(unsigned int),
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+	rm_init(&V_tcp_fastopen_keylock, "tfo_keylock");
+	callout_init_rm(&V_tcp_fastopen_autokey_ctx.c,
+	    &V_tcp_fastopen_keylock, 0);
+	V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1;
+}
+
+void
+tcp_fastopen_destroy(void)
+{
+	callout_drain(&V_tcp_fastopen_autokey_ctx.c);
+	rm_destroy(&V_tcp_fastopen_keylock);
+	uma_zdestroy(V_counter_zone);
+}
+
+unsigned int *
+tcp_fastopen_alloc_counter(void)
+{
+	unsigned int *counter;
+	counter = uma_zalloc(V_counter_zone, M_NOWAIT);
+	if (counter)
+		*counter = 1;
+	return (counter);
+}
+
+void
+tcp_fastopen_decrement_counter(unsigned int *counter)
+{
+	if (*counter == 1)
+		uma_zfree(V_counter_zone, counter);
+	else
+		atomic_subtract_int(counter, 1);
+}
+
+static void
+tcp_fastopen_addkey_locked(uint8_t *key)
+{
+
+	V_tcp_fastopen_keys.newest++;
+	if (V_tcp_fastopen_keys.newest == TCP_FASTOPEN_MAX_KEYS)
+		V_tcp_fastopen_keys.newest = 0;
+	memcpy(V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest], key,
+	    TCP_FASTOPEN_KEY_LEN);
+	if (V_tcp_fastopen_numkeys < TCP_FASTOPEN_MAX_KEYS)
+		V_tcp_fastopen_numkeys++;
+}
+
+static void
+tcp_fastopen_autokey_locked(void)
+{
+	uint8_t newkey[TCP_FASTOPEN_KEY_LEN];
+
+	arc4rand(newkey, TCP_FASTOPEN_KEY_LEN, 0);
+	tcp_fastopen_addkey_locked(newkey);
+}
+
+static void
+tcp_fastopen_autokey_callout(void *arg)
+{
+	struct tcp_fastopen_callout *ctx = arg;
+
+	CURVNET_SET(ctx->v);
+	tcp_fastopen_autokey_locked();
+	callout_reset(&ctx->c, V_tcp_fastopen_autokey * hz,
+		      tcp_fastopen_autokey_callout, ctx);
+	CURVNET_RESTORE();
+}
+
+
+static uint64_t
+tcp_fastopen_make_cookie(uint8_t key[SIPHASH_KEY_LENGTH], struct in_conninfo *inc)
+{
+	SIPHASH_CTX ctx;
+	uint64_t siphash;
+
+	SipHash24_Init(&ctx);
+	SipHash_SetKey(&ctx, key);
+	switch (inc->inc_flags & INC_ISIPV6) {
+#ifdef INET
+	case 0:
+		SipHash_Update(&ctx, &inc->inc_faddr, sizeof(inc->inc_faddr));
+		break;
+#endif
+#ifdef INET6
+	case INC_ISIPV6:
+		SipHash_Update(&ctx, &inc->inc6_faddr, sizeof(inc->inc6_faddr));
+		break;
+#endif
+	}
+	SipHash_Final((u_int8_t *)&siphash, &ctx);
+
+	return (siphash);
+}
+
+
+/*
+ * Return values:
+ *	-1	the cookie is invalid and no valid cookie is available
+ *	 0	the cookie is invalid and the latest cookie has been returned
+ *	 1	the cookie is valid and the latest cookie has been returned
+ */
+int
+tcp_fastopen_check_cookie(struct in_conninfo *inc, uint8_t *cookie,
+    unsigned int len, uint64_t *latest_cookie)
+{
+	struct rm_priotracker tracker;
+	unsigned int i, key_index;
+	uint64_t cur_cookie;
+
+	if (V_tcp_fastopen_acceptany) {
+		*latest_cookie = 0;
+		return (1);
+	}
+
+	if (len != TCP_FASTOPEN_COOKIE_LEN) {
+		if (V_tcp_fastopen_numkeys > 0) {
+			*latest_cookie =
+			    tcp_fastopen_make_cookie(
+				V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest],
+				inc);
+			return (0);
+		}
+ 		return (-1);
+	}
+
+	/*
+	 * Check against each available key, from newest to oldest.
+	 */
+	TCP_FASTOPEN_KEYS_RLOCK(&tracker);
+	key_index = V_tcp_fastopen_keys.newest;
+	for (i = 0; i < V_tcp_fastopen_numkeys; i++) {
+		cur_cookie =
+		    tcp_fastopen_make_cookie(V_tcp_fastopen_keys.key[key_index],
+			inc);
+		if (i == 0)
+			*latest_cookie = cur_cookie;
+		if (memcmp(cookie, &cur_cookie, TCP_FASTOPEN_COOKIE_LEN) == 0) {
+			TCP_FASTOPEN_KEYS_RUNLOCK(&tracker);
+			return (1);
+		}
+		if (key_index == 0)
+			key_index = TCP_FASTOPEN_MAX_KEYS - 1;
+		else
+			key_index--;
+	}
+	TCP_FASTOPEN_KEYS_RUNLOCK(&tracker);
+
+	return (0);
+}
+
+static int
+sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	unsigned int new;
+
+	new = V_tcp_fastopen_autokey;
+	error = sysctl_handle_int(oidp, &new, 0, req);
+	if (error == 0 && req->newptr) {
+		if (new > (INT_MAX / hz))
+			return (EINVAL);
+
+		TCP_FASTOPEN_KEYS_WLOCK();
+		if (V_tcp_fastopen_enabled) {
+			if (V_tcp_fastopen_autokey && !new)
+				callout_stop(&V_tcp_fastopen_autokey_ctx.c);
+			else if (new)
+				callout_reset(&V_tcp_fastopen_autokey_ctx.c,
+				    new * hz, tcp_fastopen_autokey_callout,
+				    &V_tcp_fastopen_autokey_ctx);
+		}
+		V_tcp_fastopen_autokey = new;
+		TCP_FASTOPEN_KEYS_WUNLOCK();
+	}
+
+	return (error);
+}
+
+static int
+sysctl_net_inet_tcp_fastopen_enabled(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	unsigned int new;
+
+	new = V_tcp_fastopen_enabled;
+	error = sysctl_handle_int(oidp, &new, 0, req);
+	if (error == 0 && req->newptr) {
+		if (V_tcp_fastopen_enabled && !new) {
+			/* enabled -> disabled */
+			TCP_FASTOPEN_KEYS_WLOCK();
+			V_tcp_fastopen_numkeys = 0;
+			V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1;
+			if (V_tcp_fastopen_autokey)
+				callout_stop(&V_tcp_fastopen_autokey_ctx.c);
+			V_tcp_fastopen_enabled = 0;
+			TCP_FASTOPEN_KEYS_WUNLOCK();
+		} else if (!V_tcp_fastopen_enabled && new) {
+			/* disabled -> enabled */
+			TCP_FASTOPEN_KEYS_WLOCK();
+			if (V_tcp_fastopen_autokey &&
+			    (V_tcp_fastopen_numkeys == 0)) {
+				tcp_fastopen_autokey_locked();
+				callout_reset(&V_tcp_fastopen_autokey_ctx.c,
+				    V_tcp_fastopen_autokey * hz,
+				    tcp_fastopen_autokey_callout,
+				    &V_tcp_fastopen_autokey_ctx);
+			}
+			V_tcp_fastopen_enabled = 1;
+			TCP_FASTOPEN_KEYS_WUNLOCK();
+		}
+	}
+	return (error);
+}
+
+static int
+sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	uint8_t newkey[TCP_FASTOPEN_KEY_LEN];
+
+	if (req->oldptr != NULL || req->oldlen != 0)
+		return (EINVAL);
+	if (req->newptr == NULL)
+		return (EPERM);
+	if (req->newlen != sizeof(newkey))
+		return (EINVAL);
+	error = SYSCTL_IN(req, newkey, sizeof(newkey));
+	if (error)
+		return (error);
+
+	TCP_FASTOPEN_KEYS_WLOCK();
+	tcp_fastopen_addkey_locked(newkey);
+	TCP_FASTOPEN_KEYS_WUNLOCK();
+
+	return (0);
+}


Property changes on: trunk/sys/netinet/tcp_fastopen.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/netinet/tcp_fastopen.h
===================================================================
--- trunk/sys/netinet/tcp_fastopen.h	                        (rev 0)
+++ trunk/sys/netinet/tcp_fastopen.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -0,0 +1,48 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2015 Patrick Kelsey
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/netinet/tcp_fastopen.h 292823 2015-12-28 02:43:12Z pkelsey $
+ */
+
+#ifndef _TCP_FASTOPEN_H_
+#define _TCP_FASTOPEN_H_
+
+#ifdef _KERNEL
+
+#define	TCP_FASTOPEN_COOKIE_LEN	8	/* tied to SipHash24 64-bit output */
+
+VNET_DECLARE(unsigned int, tcp_fastopen_enabled);
+#define	V_tcp_fastopen_enabled	VNET(tcp_fastopen_enabled)
+
+void	tcp_fastopen_init(void);
+void	tcp_fastopen_destroy(void);
+unsigned int *tcp_fastopen_alloc_counter(void);
+void	tcp_fastopen_decrement_counter(unsigned int *counter);
+int	tcp_fastopen_check_cookie(struct in_conninfo *inc, uint8_t *cookie,
+	    unsigned int len, uint64_t *latest_cookie);
+#endif /* _KERNEL */
+
+#endif /* _TCP_FASTOPEN_H_ */


Property changes on: trunk/sys/netinet/tcp_fastopen.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/netinet/tcp_fsm.h
===================================================================
--- trunk/sys/netinet/tcp_fsm.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_fsm.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -29,7 +29,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp_fsm.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet/tcp_fsm.h 171656 2007-07-30 11:06:42Z des $
+ * $FreeBSD: stable/10/sys/netinet/tcp_fsm.h 171656 2007-07-30 11:06:42Z des $
  */
 
 #ifndef _NETINET_TCP_FSM_H_

Modified: trunk/sys/netinet/tcp_hostcache.c
===================================================================
--- trunk/sys/netinet/tcp_hostcache.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_hostcache.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -64,7 +64,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_hostcache.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_hostcache.c 314667 2017-03-04 13:03:31Z avg $");
 
 #include "opt_inet6.h"
 
@@ -74,6 +74,7 @@
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/malloc.h>
+#include <sys/sbuf.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
@@ -233,7 +234,7 @@
 	/*
 	 * Set up periodic cache cleanup.
 	 */
-	callout_init(&V_tcp_hc_callout, CALLOUT_MPSAFE);
+	callout_init(&V_tcp_hc_callout, 1);
 	callout_reset(&V_tcp_hc_callout, V_tcp_hostcache.prune * hz,
 	    tcp_hc_purge, curvnet);
 }
@@ -593,23 +594,20 @@
 static int
 sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS)
 {
-	int bufsize;
 	int linesize = 128;
-	char *p, *buf;
-	int len, i, error;
+	struct sbuf sb;
+	int i, error;
 	struct hc_metrics *hc_entry;
 #ifdef INET6
 	char ip6buf[INET6_ADDRSTRLEN];
 #endif
 
-	bufsize = linesize * (V_tcp_hostcache.cache_count + 1);
+	sbuf_new(&sb, NULL, linesize * (V_tcp_hostcache.cache_count + 1),
+	    SBUF_FIXEDLEN);
 
-	p = buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO);
-
-	len = snprintf(p, linesize,
-		"\nIP address        MTU  SSTRESH      RTT   RTTVAR BANDWIDTH "
+	sbuf_printf(&sb,
+	        "\nIP address        MTU  SSTRESH      RTT   RTTVAR BANDWIDTH "
 		"    CWND SENDPIPE RECVPIPE HITS  UPD  EXP\n");
-	p += len;
 
 #define msec(u) (((u) + 500) / 1000)
 	for (i = 0; i < V_tcp_hostcache.hashsize; i++) {
@@ -616,7 +614,7 @@
 		THC_LOCK(&V_tcp_hostcache.hashbase[i].hch_mtx);
 		TAILQ_FOREACH(hc_entry, &V_tcp_hostcache.hashbase[i].hch_bucket,
 			      rmx_q) {
-			len = snprintf(p, linesize,
+			sbuf_printf(&sb,
 			    "%-15s %5lu %8lu %6lums %6lums %9lu %8lu %8lu %8lu "
 			    "%4lu %4lu %4i\n",
 			    hc_entry->ip4.s_addr ? inet_ntoa(hc_entry->ip4) :
@@ -638,13 +636,13 @@
 			    hc_entry->rmx_hits,
 			    hc_entry->rmx_updates,
 			    hc_entry->rmx_expire);
-			p += len;
 		}
 		THC_UNLOCK(&V_tcp_hostcache.hashbase[i].hch_mtx);
 	}
 #undef msec
-	error = SYSCTL_OUT(req, buf, p - buf);
-	free(buf, M_TEMP);
+	sbuf_finish(&sb);
+	error = SYSCTL_OUT(req, sbuf_data(&sb), sbuf_len(&sb));
+	sbuf_delete(&sb);
 	return(error);
 }
 

Modified: trunk/sys/netinet/tcp_hostcache.h
===================================================================
--- trunk/sys/netinet/tcp_hostcache.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_hostcache.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -27,7 +27,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/netinet/tcp_hostcache.h 185420 2008-11-28 23:39:25Z zec $
+ * $FreeBSD: stable/10/sys/netinet/tcp_hostcache.h 185420 2008-11-28 23:39:25Z zec $
  */
 
 /*

Modified: trunk/sys/netinet/tcp_input.c
===================================================================
--- trunk/sys/netinet/tcp_input.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_input.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -49,12 +49,13 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_input.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_input.c 327520 2018-01-03 16:16:20Z smh $");
 
 #include "opt_ipfw.h"		/* for ipfw_fwd	*/
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
+#include "opt_kdtrace.h"
 #include "opt_tcpdebug.h"
 
 #include <sys/param.h>
@@ -64,6 +65,7 @@
 #include <sys/mbuf.h>
 #include <sys/proc.h>		/* for proc0 declaration */
 #include <sys/protosw.h>
+#include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
@@ -83,6 +85,7 @@
 
 #include <netinet/cc.h>
 #include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
@@ -96,6 +99,9 @@
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
@@ -106,6 +112,9 @@
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif /* TCPDEBUG */
+#ifdef TCP_OFFLOAD
+#include <netinet/tcp_offload.h>
+#endif
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
@@ -118,11 +127,6 @@
 
 const int tcprexmtthresh = 3;
 
-VNET_DEFINE(struct tcpstat, tcpstat);
-SYSCTL_VNET_STRUCT(_net_inet_tcp, TCPCTL_STATS, stats, CTLFLAG_RW,
-    &VNET_NAME(tcpstat), tcpstat,
-    "TCP statistics (struct tcpstat, netinet/tcp_var.h)");
-
 int tcp_log_in_vain = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_RW,
     &tcp_log_in_vain, 0,
@@ -145,6 +149,11 @@
     &VNET_NAME(drop_synfin), 0,
     "Drop TCP packets with SYN+FIN set");
 
+VNET_DEFINE(int, tcp_do_rfc6675_pipe) = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_pipe, CTLFLAG_VNET | CTLFLAG_RW,
+    &VNET_NAME(tcp_do_rfc6675_pipe), 0,
+    "Use calculated pipe/in-flight bytes per RFC 6675");
+
 VNET_DEFINE(int, tcp_do_rfc3042) = 1;
 #define	V_tcp_do_rfc3042	VNET(tcp_do_rfc3042)
 SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, rfc3042, CTLFLAG_RW,
@@ -156,6 +165,14 @@
     &VNET_NAME(tcp_do_rfc3390), 0,
     "Enable RFC 3390 (Increasing TCP's Initial Congestion Window)");
 
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, experimental, CTLFLAG_RW, 0,
+    "Experimental TCP extensions");
+
+VNET_DEFINE(int, tcp_do_initcwnd10) = 1;
+SYSCTL_VNET_INT(_net_inet_tcp_experimental, OID_AUTO, initcwnd10, CTLFLAG_RW,
+    &VNET_NAME(tcp_do_initcwnd10), 0,
+    "Enable RFC 6928 (Increasing initial CWND to 10)");
+
 VNET_DEFINE(int, tcp_do_rfc3465) = 1;
 SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, rfc3465, CTLFLAG_RW,
     &VNET_NAME(tcp_do_rfc3465), 0,
@@ -168,7 +185,7 @@
 
 static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, ecn, CTLFLAG_RW, 0, "TCP ECN");
 
-VNET_DEFINE(int, tcp_do_ecn) = 0;
+VNET_DEFINE(int, tcp_do_ecn) = 2;
 SYSCTL_VNET_INT(_net_inet_tcp_ecn, OID_AUTO, enable, CTLFLAG_RW,
     &VNET_NAME(tcp_do_ecn), 0,
     "TCP ECN support");
@@ -184,6 +201,11 @@
     &VNET_NAME(tcp_insecure_rst), 0,
     "Follow the old (insecure) criteria for accepting RST packets");
 
+VNET_DEFINE(int, tcp_recvspace) = 1024*64;
+#define	V_tcp_recvspace	VNET(tcp_recvspace)
+SYSCTL_VNET_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
+    &VNET_NAME(tcp_recvspace), 0, "Initial receive socket buffer size");
+
 VNET_DEFINE(int, tcp_do_autorcvbuf) = 1;
 #define	V_tcp_do_autorcvbuf	VNET(tcp_do_autorcvbuf)
 SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, recvbuf_auto, CTLFLAG_RW,
@@ -216,12 +238,6 @@
 		     struct tcphdr *, struct mbuf *, int);
 static void	 tcp_xmit_timer(struct tcpcb *, int);
 static void	 tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *);
-static void inline 	tcp_fields_to_host(struct tcphdr *);
-#ifdef TCP_SIGNATURE
-static void inline 	tcp_fields_to_net(struct tcphdr *);
-static int inline	tcp_signature_verify_input(struct mbuf *, int, int,
-			    int, struct tcpopt *, struct tcphdr *, u_int);
-#endif
 static void inline	cc_ack_received(struct tcpcb *tp, struct tcphdr *th,
 			    uint16_t type);
 static void inline	cc_conn_init(struct tcpcb *tp);
@@ -230,17 +246,25 @@
 			    struct tcphdr *th, struct tcpopt *to);
 
 /*
+ * TCP statistics are stored in an "array" of counter(9)s.
+ */
+VNET_PCPUSTAT_DEFINE(struct tcpstat, tcpstat);
+VNET_PCPUSTAT_SYSINIT(tcpstat);
+SYSCTL_VNET_PCPUSTAT(_net_inet_tcp, TCPCTL_STATS, stats, struct tcpstat,
+    tcpstat, "TCP statistics (struct tcpstat, netinet/tcp_var.h)");
+
+#ifdef VIMAGE
+VNET_PCPUSTAT_SYSUNINIT(tcpstat);
+#endif /* VIMAGE */
+/*
  * Kernel module interface for updating tcpstat.  The argument is an index
- * into tcpstat treated as an array of u_long.  While this encodes the
- * general layout of tcpstat into the caller, it doesn't encode its location,
- * so that future changes to add, for example, per-CPU stats support won't
- * cause binary compatibility problems for kernel modules.
+ * into tcpstat treated as an array.
  */
 void
 kmod_tcpstat_inc(int statnum)
 {
 
-	(*((u_long *)&V_tcpstat + statnum))++;
+	counter_u64_add(VNET(tcpstat)[statnum], 1);
 }
 
 /*
@@ -302,9 +326,6 @@
 	struct hc_metrics_lite metrics;
 	struct inpcb *inp = tp->t_inpcb;
 	int rtt;
-#ifdef INET6
-	int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
-#endif
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
@@ -338,49 +359,33 @@
 	}
 
 	/*
-	 * Set the slow-start flight size depending on whether this
-	 * is a local network or not.
+	 * Set the initial slow-start flight size.
 	 *
-	 * Extend this so we cache the cwnd too and retrieve it here.
-	 * Make cwnd even bigger than RFC3390 suggests but only if we
-	 * have previous experience with the remote host. Be careful
-	 * not make cwnd bigger than remote receive window or our own
-	 * send socket buffer. Maybe put some additional upper bound
-	 * on the retrieved cwnd. Should do incremental updates to
-	 * hostcache when cwnd collapses so next connection doesn't
-	 * overloads the path again.
+	 * RFC5681 Section 3.1 specifies the default conservative values.
+	 * RFC3390 specifies slightly more aggressive values.
+	 * RFC6928 increases it to ten segments.
 	 *
-	 * XXXAO: Initializing the CWND from the hostcache is broken
-	 * and in its current form not RFC conformant.  It is disabled
-	 * until fixed or removed entirely.
-	 *
-	 * RFC3390 says only do this if SYN or SYN/ACK didn't got lost.
-	 * We currently check only in syncache_socket for that.
+	 * If a SYN or SYN/ACK was lost and retransmitted, we have to
+	 * reduce the initial CWND to one segment as congestion is likely
+	 * requiring us to be cautious.
 	 */
-/* #define TCP_METRICS_CWND */
-#ifdef TCP_METRICS_CWND
-	if (metrics.rmx_cwnd)
-		tp->snd_cwnd = max(tp->t_maxseg, min(metrics.rmx_cwnd / 2,
-		    min(tp->snd_wnd, so->so_snd.sb_hiwat)));
-	else
-#endif
-	if (V_tcp_do_rfc3390)
+	if (tp->snd_cwnd == 1)
+		tp->snd_cwnd = tp->t_maxseg;		/* SYN(-ACK) lost */
+	else if (V_tcp_do_initcwnd10)
+		tp->snd_cwnd = min(10 * tp->t_maxseg,
+		    max(2 * tp->t_maxseg, 14600));
+	else if (V_tcp_do_rfc3390)
 		tp->snd_cwnd = min(4 * tp->t_maxseg,
 		    max(2 * tp->t_maxseg, 4380));
-#ifdef INET6
-	else if (isipv6 && in6_localaddr(&inp->in6p_faddr))
-		tp->snd_cwnd = tp->t_maxseg * V_ss_fltsz_local;
-#endif
-#if defined(INET) && defined(INET6)
-	else if (!isipv6 && in_localaddr(inp->inp_faddr))
-		tp->snd_cwnd = tp->t_maxseg * V_ss_fltsz_local;
-#endif
-#ifdef INET
-	else if (in_localaddr(inp->inp_faddr))
-		tp->snd_cwnd = tp->t_maxseg * V_ss_fltsz_local;
-#endif
-	else
-		tp->snd_cwnd = tp->t_maxseg * V_ss_fltsz;
+	else {
+		/* Per RFC5681 Section 3.1 */
+		if (tp->t_maxseg > 2190)
+			tp->snd_cwnd = 2 * tp->t_maxseg;
+		else if (tp->t_maxseg > 1095)
+			tp->snd_cwnd = 3 * tp->t_maxseg;
+		else
+			tp->snd_cwnd = 4 * tp->t_maxseg;
+	}
 
 	if (CC_ALGO(tp)->conn_init != NULL)
 		CC_ALGO(tp)->conn_init(tp->ccv);
@@ -453,27 +458,7 @@
 	tp->t_bytes_acked = 0;
 }
 
-static inline void
-tcp_fields_to_host(struct tcphdr *th)
-{
-
-	th->th_seq = ntohl(th->th_seq);
-	th->th_ack = ntohl(th->th_ack);
-	th->th_win = ntohs(th->th_win);
-	th->th_urp = ntohs(th->th_urp);
-}
-
 #ifdef TCP_SIGNATURE
-static inline void
-tcp_fields_to_net(struct tcphdr *th)
-{
-
-	th->th_seq = htonl(th->th_seq);
-	th->th_ack = htonl(th->th_ack);
-	th->th_win = htons(th->th_win);
-	th->th_urp = htons(th->th_urp);
-}
-
 static inline int
 tcp_signature_verify_input(struct mbuf *m, int off0, int tlen, int optlen,
     struct tcpopt *to, struct tcphdr *th, u_int tcpbflag)
@@ -506,10 +491,13 @@
  *	  the ack that opens up a 0-sized window and
  *		- delayed acks are enabled or
  *		- this is a half-synchronized T/TCP connection.
+ *	- the segment size is not larger than the MSS and LRO wasn't used
+ *	  for this segment.
  */
-#define DELAY_ACK(tp)							\
+#define DELAY_ACK(tp, tlen)						\
 	((!tcp_timer_active(tp, TT_DELACK) &&				\
 	    (tp->t_flags & TF_RXWIN0SENT) == 0) &&			\
+	    (tlen <= tp->t_maxopd) &&					\
 	    (V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
 
 /*
@@ -557,9 +545,6 @@
 {
 	struct tcphdr *th = NULL;
 	struct ip *ip = NULL;
-#ifdef INET
-	struct ipovly *ipov;
-#endif
 	struct inpcb *inp = NULL;
 	struct tcpcb *tp = NULL;
 	struct socket *so = NULL;
@@ -587,7 +572,7 @@
 	char *s = NULL;			/* address and port logging */
 	int ti_locked;
 #define	TI_UNLOCKED	1
-#define	TI_WLOCKED	2
+#define	TI_RLOCKED	2
 
 #ifdef TCPDEBUG
 	/*
@@ -659,7 +644,7 @@
 		 * Note: IP leaves IP header in first mbuf.
 		 */
 		if (off0 > sizeof (struct ip)) {
-			ip_stripoptions(m, (struct mbuf *)0);
+			ip_stripoptions(m);
 			off0 = sizeof(struct ip);
 		}
 		if (m->m_len < sizeof (struct tcpiphdr)) {
@@ -670,9 +655,8 @@
 			}
 		}
 		ip = mtod(m, struct ip *);
-		ipov = (struct ipovly *)ip;
 		th = (struct tcphdr *)((caddr_t)ip + off0);
-		tlen = ip->ip_len;
+		tlen = ntohs(ip->ip_len) - off0;
 
 		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
@@ -679,25 +663,24 @@
 				th->th_sum = m->m_pkthdr.csum_data;
 			else
 				th->th_sum = in_pseudo(ip->ip_src.s_addr,
-						ip->ip_dst.s_addr,
-						htonl(m->m_pkthdr.csum_data +
-							ip->ip_len +
-							IPPROTO_TCP));
+				    ip->ip_dst.s_addr,
+				    htonl(m->m_pkthdr.csum_data + tlen +
+				    IPPROTO_TCP));
 			th->th_sum ^= 0xffff;
-#ifdef TCPDEBUG
-			ipov->ih_len = (u_short)tlen;
-			ipov->ih_len = htons(ipov->ih_len);
-#endif
 		} else {
+			struct ipovly *ipov = (struct ipovly *)ip;
+
 			/*
 			 * Checksum extended TCP header and data.
 			 */
-			len = sizeof (struct ip) + tlen;
+			len = off0 + tlen;
 			bzero(ipov->ih_x1, sizeof(ipov->ih_x1));
-			ipov->ih_len = (u_short)tlen;
-			ipov->ih_len = htons(ipov->ih_len);
+			ipov->ih_len = htons(tlen);
 			th->th_sum = in_cksum(m, len);
+			/* Reset length for SDT probes. */
+			ip->ip_len = htons(tlen + off0);
 		}
+
 		if (th->th_sum) {
 			TCPSTAT_INC(tcps_rcvbadsum);
 			goto drop;
@@ -748,7 +731,6 @@
 					return;
 				}
 				ip = mtod(m, struct ip *);
-				ipov = (struct ipovly *)ip;
 				th = (struct tcphdr *)((caddr_t)ip + off0);
 			}
 		}
@@ -770,26 +752,17 @@
 
 	/*
 	 * Locate pcb for segment; if we're likely to add or remove a
-	 * connection then first acquire pcbinfo lock.  There are two cases
+	 * connection then first acquire pcbinfo lock.  There are three cases
 	 * where we might discover later we need a write lock despite the
-	 * flags: ACKs moving a connection out of the syncache, and ACKs for
-	 * a connection in TIMEWAIT.
+	 * flags: ACKs moving a connection out of the syncache, ACKs for a
+	 * connection in TIMEWAIT and SYNs not targeting a listening socket.
 	 */
-	if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0) {
-		INP_INFO_WLOCK(&V_tcbinfo);
-		ti_locked = TI_WLOCKED;
+	if ((thflags & (TH_FIN | TH_RST)) != 0) {
+		INP_INFO_RLOCK(&V_tcbinfo);
+		ti_locked = TI_RLOCKED;
 	} else
 		ti_locked = TI_UNLOCKED;
 
-findpcb:
-#ifdef INVARIANTS
-	if (ti_locked == TI_WLOCKED) {
-		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
-	} else {
-		INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
-	}
-#endif
-
 	/*
 	 * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
 	 */
@@ -806,6 +779,14 @@
 	    )
 		fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
 
+findpcb:
+#ifdef INVARIANTS
+	if (ti_locked == TI_RLOCKED) {
+		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+	} else {
+		INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+	}
+#endif
 #ifdef INET6
 	if (isipv6 && fwd_tag != NULL) {
 		struct sockaddr_in6 *next_hop6;
@@ -830,10 +811,6 @@
 			    th->th_dport, INPLOOKUP_WILDCARD |
 			    INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif);
 		}
-		/* Remove the tag from the packet.  We don't need it anymore. */
-		m_tag_delete(m, fwd_tag);
-		m->m_flags &= ~M_IP6_NEXTHOP;
-		fwd_tag = NULL;
 	} else if (isipv6) {
 		inp = in6_pcblookup_mbuf(&V_tcbinfo, &ip6->ip6_src,
 		    th->th_sport, &ip6->ip6_dst, th->th_dport,
@@ -868,10 +845,6 @@
 			    th->th_dport, INPLOOKUP_WILDCARD |
 			    INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif);
 		}
-		/* Remove the tag from the packet.  We don't need it anymore. */
-		m_tag_delete(m, fwd_tag);
-		m->m_flags &= ~M_IP_NEXTHOP;
-		fwd_tag = NULL;
 	} else
 		inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src,
 		    th->th_sport, ip->ip_dst, th->th_dport,
@@ -907,23 +880,32 @@
 		goto dropwithreset;
 	}
 	INP_WLOCK_ASSERT(inp);
-	if (!(inp->inp_flags & INP_HW_FLOWID)
-	    && (m->m_flags & M_FLOWID)
-	    && ((inp->inp_socket == NULL)
-		|| !(inp->inp_socket->so_options & SO_ACCEPTCONN))) {
-		inp->inp_flags |= INP_HW_FLOWID;
-		inp->inp_flags &= ~INP_SW_FLOWID;
+	/*
+	 * While waiting for inp lock during the lookup, another thread
+	 * can have dropped the inpcb, in which case we need to loop back
+	 * and try to find a new inpcb to deliver to.
+	 */
+	if (inp->inp_flags & INP_DROPPED) {
+		INP_WUNLOCK(inp);
+		inp = NULL;
+		goto findpcb;
+	}
+	if ((inp->inp_flowtype == M_HASHTYPE_NONE) &&
+	    (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) &&
+	    ((inp->inp_socket == NULL) ||
+	    (inp->inp_socket->so_options & SO_ACCEPTCONN) == 0)) {
 		inp->inp_flowid = m->m_pkthdr.flowid;
+		inp->inp_flowtype = M_HASHTYPE_GET(m);
 	}
 #ifdef IPSEC
 #ifdef INET6
 	if (isipv6 && ipsec6_in_reject(m, inp)) {
-		V_ipsec6stat.in_polvio++;
+		IPSEC6STAT_INC(ips_in_polvio);
 		goto dropunlock;
 	} else
 #endif /* INET6 */
 	if (ipsec4_in_reject(m, inp) != 0) {
-		V_ipsec4stat.in_polvio++;
+		IPSECSTAT_INC(ips_in_polvio);
 		goto dropunlock;
 	}
 #endif /* IPSEC */
@@ -944,7 +926,7 @@
 	/*
 	 * A previous connection in TIMEWAIT state is supposed to catch stray
 	 * or duplicate segments arriving late.  If this segment was a
-	 * legitimate new connection attempt the old INPCB gets removed and
+	 * legitimate new connection attempt, the old INPCB gets removed and
 	 * we can try again to find a listening socket.
 	 *
 	 * At this point, due to earlier optimism, we may hold only an inpcb
@@ -960,20 +942,24 @@
 relocked:
 	if (inp->inp_flags & INP_TIMEWAIT) {
 		if (ti_locked == TI_UNLOCKED) {
-			if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) {
+			if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) {
 				in_pcbref(inp);
 				INP_WUNLOCK(inp);
-				INP_INFO_WLOCK(&V_tcbinfo);
-				ti_locked = TI_WLOCKED;
+				INP_INFO_RLOCK(&V_tcbinfo);
+				ti_locked = TI_RLOCKED;
 				INP_WLOCK(inp);
 				if (in_pcbrele_wlocked(inp)) {
 					inp = NULL;
 					goto findpcb;
+				} else if (inp->inp_flags & INP_DROPPED) {
+					INP_WUNLOCK(inp);
+					inp = NULL;
+					goto findpcb;
 				}
 			} else
-				ti_locked = TI_WLOCKED;
+				ti_locked = TI_RLOCKED;
 		}
-		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 
 		if (thflags & TH_SYN)
 			tcp_dooptions(&to, optp, optlen, TO_SYN);
@@ -982,7 +968,7 @@
 		 */
 		if (tcp_twcheck(inp, &to, th, m, tlen))
 			goto findpcb;
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 		return;
 	}
 	/*
@@ -996,6 +982,14 @@
 		goto dropwithreset;
 	}
 
+#ifdef TCP_OFFLOAD
+	if (tp->t_flags & TF_TOE) {
+		tcp_offload_input(tp, m);
+		m = NULL;	/* consumed by the TOE driver */
+		goto dropunlock;
+	}
+#endif
+
 	/*
 	 * We've identified a valid inpcb, but it could be that we need an
 	 * inpcbinfo write lock but don't hold it.  In this case, attempt to
@@ -1004,26 +998,32 @@
 	 * now be in TIMEWAIT.
 	 */
 #ifdef INVARIANTS
-	if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0)
-		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	if ((thflags & (TH_FIN | TH_RST)) != 0)
+		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 #endif
-	if (tp->t_state != TCPS_ESTABLISHED) {
+	if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) ||
+	      (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN) &&
+	       !(tp->t_flags & TF_FASTOPEN)))) {
 		if (ti_locked == TI_UNLOCKED) {
-			if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) {
+			if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) {
 				in_pcbref(inp);
 				INP_WUNLOCK(inp);
-				INP_INFO_WLOCK(&V_tcbinfo);
-				ti_locked = TI_WLOCKED;
+				INP_INFO_RLOCK(&V_tcbinfo);
+				ti_locked = TI_RLOCKED;
 				INP_WLOCK(inp);
 				if (in_pcbrele_wlocked(inp)) {
 					inp = NULL;
 					goto findpcb;
+				} else if (inp->inp_flags & INP_DROPPED) {
+					INP_WUNLOCK(inp);
+					inp = NULL;
+					goto findpcb;
 				}
 				goto relocked;
 			} else
-				ti_locked = TI_WLOCKED;
+				ti_locked = TI_RLOCKED;
 		}
-		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	}
 
 #ifdef MAC
@@ -1048,9 +1048,7 @@
 	/*
 	 * When the socket is accepting connections (the INPCB is in LISTEN
 	 * state) we look into the SYN cache if this is a new connection
-	 * attempt or the completion of a previous one.  Because listen
-	 * sockets are never in TCPS_ESTABLISHED, the V_tcbinfo lock will be
-	 * held in this case.
+	 * attempt or the completion of a previous one.
 	 */
 	if (so->so_options & SO_ACCEPTCONN) {
 		struct in_conninfo inc;
@@ -1057,8 +1055,6 @@
 
 		KASSERT(tp->t_state == TCPS_LISTEN, ("%s: so accepting but "
 		    "tp not listening", __func__));
-		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
-
 		bzero(&inc, sizeof(inc));
 #ifdef INET6
 		if (isipv6) {
@@ -1081,6 +1077,8 @@
 		 * socket appended to the listen queue in SYN_RECEIVED state.
 		 */
 		if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) {
+
+			INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 			/*
 			 * Parse the TCP options here because
 			 * syncookies need access to the reflected
@@ -1101,6 +1099,9 @@
 				rstreason = BANDLIM_RST_OPENPORT;
 				goto dropwithreset;
 			}
+#ifdef TCP_RFC7413
+new_tfo_socket:
+#endif
 			if (so == NULL) {
 				/*
 				 * We completed the 3-way handshake
@@ -1132,7 +1133,11 @@
 			 */
 			INP_WUNLOCK(inp);	/* listen socket */
 			inp = sotoinpcb(so);
-			INP_WLOCK(inp);		/* new connection */
+			/*
+			 * New connection inpcb is already locked by
+			 * syncache_expand().
+			 */
+			INP_WLOCK_ASSERT(inp);
 			tp = intotcpcb(inp);
 			KASSERT(tp->t_state == TCPS_SYN_RECEIVED,
 			    ("%s: ", __func__));
@@ -1358,13 +1363,31 @@
 			    (void *)tcp_saveipgen, &tcp_savetcp, 0);
 #endif
 		tcp_dooptions(&to, optp, optlen, TO_SYN);
-		syncache_add(&inc, &to, th, inp, &so, m);
+#ifdef TCP_RFC7413
+		if (syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL))
+			goto new_tfo_socket;
+#else
+		syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL);
+#endif
 		/*
 		 * Entry added to syncache and mbuf consumed.
-		 * Everything already unlocked by syncache_add().
+		 * Only the listen socket is unlocked by syncache_add().
 		 */
+		if (ti_locked == TI_RLOCKED) {
+			INP_INFO_RUNLOCK(&V_tcbinfo);
+			ti_locked = TI_UNLOCKED;
+		}
 		INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 		return;
+	} else if (tp->t_state == TCPS_LISTEN) {
+		/*
+		 * When a listen socket is torn down the SO_ACCEPTCONN
+		 * flag is removed first while connections are drained
+		 * from the accept queue in a unlock/lock cycle of the
+		 * ACCEPT_LOCK, opening a race condition allowing a SYN
+		 * attempt go through unhandled.
+		 */
+		goto dropunlock;
 	}
 
 #ifdef TCP_SIGNATURE
@@ -1386,6 +1409,8 @@
 	}
 #endif
 
+	TCP_PROBE5(receive, NULL, tp, mtod(m, const char *), tp, th);
+
 	/*
 	 * Segment belongs to a connection in SYN_SENT, ESTABLISHED or later
 	 * state.  tcp_do_segment() always consumes the mbuf chain, unlocks
@@ -1396,8 +1421,10 @@
 	return;
 
 dropwithreset:
-	if (ti_locked == TI_WLOCKED) {
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+	TCP_PROBE5(receive, NULL, tp, mtod(m, const char *), tp, th);
+
+	if (ti_locked == TI_RLOCKED) {
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 		ti_locked = TI_UNLOCKED;
 	}
 #ifdef INVARIANTS
@@ -1417,8 +1444,11 @@
 	goto drop;
 
 dropunlock:
-	if (ti_locked == TI_WLOCKED) {
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+	if (m != NULL)
+		TCP_PROBE5(receive, NULL, tp, mtod(m, const char *), tp, th);
+
+	if (ti_locked == TI_RLOCKED) {
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 		ti_locked = TI_UNLOCKED;
 	}
 #ifdef INVARIANTS
@@ -1440,15 +1470,82 @@
 		m_freem(m);
 }
 
+/*
+ * Automatic sizing of receive socket buffer.  Often the send
+ * buffer size is not optimally adjusted to the actual network
+ * conditions at hand (delay bandwidth product).  Setting the
+ * buffer size too small limits throughput on links with high
+ * bandwidth and high delay (eg. trans-continental/oceanic links).
+ *
+ * On the receive side the socket buffer memory is only rarely
+ * used to any significant extent.  This allows us to be much
+ * more aggressive in scaling the receive socket buffer.  For
+ * the case that the buffer space is actually used to a large
+ * extent and we run out of kernel memory we can simply drop
+ * the new segments; TCP on the sender will just retransmit it
+ * later.  Setting the buffer size too big may only consume too
+ * much kernel memory if the application doesn't read() from
+ * the socket or packet loss or reordering makes use of the
+ * reassembly queue.
+ *
+ * The criteria to step up the receive buffer one notch are:
+ *  1. Application has not set receive buffer size with
+ *     SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE.
+ *  2. the number of bytes received during the time it takes
+ *     one timestamp to be reflected back to us (the RTT);
+ *  3. received bytes per RTT is within seven eighth of the
+ *     current socket buffer size;
+ *  4. receive buffer size has not hit maximal automatic size;
+ *
+ * This algorithm does one step per RTT at most and only if
+ * we receive a bulk stream w/o packet losses or reorderings.
+ * Shrinking the buffer during idle times is not necessary as
+ * it doesn't consume any memory when idle.
+ *
+ * TODO: Only step up if the application is actually serving
+ * the buffer to better manage the socket buffer resources.
+ */
+int
+tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, struct socket *so,
+    struct tcpcb *tp, int tlen)
+{
+	int newsize = 0;
+
+	if (V_tcp_do_autorcvbuf && (so->so_rcv.sb_flags & SB_AUTOSIZE) &&
+	    tp->t_srtt != 0 && tp->rfbuf_ts != 0 &&
+	    TCP_TS_TO_TICKS(tcp_ts_getticks() - tp->rfbuf_ts) >
+	    (tp->t_srtt >> TCP_RTT_SHIFT)) {
+		if (tp->rfbuf_cnt > (so->so_rcv.sb_hiwat / 8 * 7) &&
+		    so->so_rcv.sb_hiwat < V_tcp_autorcvbuf_max) {
+			newsize = min(so->so_rcv.sb_hiwat +
+			    V_tcp_autorcvbuf_inc, V_tcp_autorcvbuf_max);
+		}
+		TCP_PROBE6(receive__autoresize, NULL, tp, mtod(m, const char *),
+		    tp, th, newsize);
+
+		/* Start over with next RTT. */
+		tp->rfbuf_ts = 0;
+		tp->rfbuf_cnt = 0;
+	} else {
+		tp->rfbuf_cnt += tlen;	/* add up */
+	}
+
+	return (newsize);
+}
+
 static void
 tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
     struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
     int ti_locked)
 {
-	int thflags, acked, ourfinisacked, needoutput = 0;
+	int thflags, acked, ourfinisacked, needoutput = 0, sack_changed;
 	int rstreason, todrop, win;
 	u_long tiwin;
+	char *s;
+	struct in_conninfo *inc;
+	struct mbuf *mfree;
 	struct tcpopt to;
+	int tfo_syn;
 
 #ifdef TCPDEBUG
 	/*
@@ -1460,30 +1557,25 @@
 	short ostate = 0;
 #endif
 	thflags = th->th_flags;
+	inc = &tp->t_inpcb->inp_inc;
 	tp->sackhint.last_sack_ack = 0;
+	sack_changed = 0;
 
 	/*
 	 * If this is either a state-changing packet or current state isn't
 	 * established, we require a write lock on tcbinfo.  Otherwise, we
-	 * allow either a read lock or a write lock, as we may have acquired
-	 * a write lock due to a race.
-	 *
-	 * Require a global write lock for SYN/FIN/RST segments or
-	 * non-established connections; otherwise accept either a read or
-	 * write lock, as we may have conservatively acquired a write lock in
-	 * certain cases in tcp_input() (is this still true?).  Currently we
-	 * will never enter with no lock, so we try to drop it quickly in the
-	 * common pure ack/pure data cases.
+	 * allow the tcbinfo to be in either alocked or unlocked, as the
+	 * caller may have unnecessarily acquired a write lock due to a race.
 	 */
 	if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
 	    tp->t_state != TCPS_ESTABLISHED) {
-		KASSERT(ti_locked == TI_WLOCKED, ("%s ti_locked %d for "
+		KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for "
 		    "SYN/FIN/RST/!EST", __func__, ti_locked));
-		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	} else {
 #ifdef INVARIANTS
-		if (ti_locked == TI_WLOCKED)
-			INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+		if (ti_locked == TI_RLOCKED)
+			INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 		else {
 			KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST "
 			    "ti_locked: %d", __func__, ti_locked));
@@ -1555,7 +1647,6 @@
 		if (TSTMP_GT(to.to_tsecr, tcp_ts_getticks()))
 			to.to_tsecr = 0;
 	}
-
 	/*
 	 * Process options only when we get SYN/ACK back. The SYN case
 	 * for incoming connections is handled in tcp_syncache.
@@ -1587,6 +1678,25 @@
 	}
 
 	/*
+	 * If timestamps were negotiated during SYN/ACK they should
+	 * appear on every segment during this session and vice versa.
+	 */
+	if ((tp->t_flags & TF_RCVD_TSTMP) && !(to.to_flags & TOF_TS)) {
+		if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
+			log(LOG_DEBUG, "%s; %s: Timestamp missing, "
+			    "no action\n", s, __func__);
+			free(s, M_TCPLOG);
+		}
+	}
+	if (!(tp->t_flags & TF_RCVD_TSTMP) && (to.to_flags & TOF_TS)) {
+		if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
+			log(LOG_DEBUG, "%s; %s: Timestamp not expected, "
+			    "no action\n", s, __func__);
+			free(s, M_TCPLOG);
+		}
+	}
+
+	/*
 	 * Header prediction: check for the two common cases
 	 * of a uni-directional data xfer.  If the packet has
 	 * no control flags, is in-sequence, the window didn't
@@ -1634,8 +1744,8 @@
 				/*
 				 * This is a pure ack for outstanding data.
 				 */
-				if (ti_locked == TI_WLOCKED)
-					INP_INFO_WUNLOCK(&V_tcbinfo);
+				if (ti_locked == TI_RLOCKED)
+					INP_INFO_RUNLOCK(&V_tcbinfo);
 				ti_locked = TI_UNLOCKED;
 
 				TCPSTAT_INC(tcps_predack);
@@ -1738,8 +1848,8 @@
 			 * nothing on the reassembly queue and we have enough
 			 * buffer space to take it.
 			 */
-			if (ti_locked == TI_WLOCKED)
-				INP_INFO_WUNLOCK(&V_tcbinfo);
+			if (ti_locked == TI_RLOCKED)
+				INP_INFO_RUNLOCK(&V_tcbinfo);
 			ti_locked = TI_UNLOCKED;
 
 			/* Clean receiver SACK report if present */
@@ -1765,59 +1875,7 @@
 				tcp_trace(TA_INPUT, ostate, tp,
 				    (void *)tcp_saveipgen, &tcp_savetcp, 0);
 #endif
-		/*
-		 * Automatic sizing of receive socket buffer.  Often the send
-		 * buffer size is not optimally adjusted to the actual network
-		 * conditions at hand (delay bandwidth product).  Setting the
-		 * buffer size too small limits throughput on links with high
-		 * bandwidth and high delay (eg. trans-continental/oceanic links).
-		 *
-		 * On the receive side the socket buffer memory is only rarely
-		 * used to any significant extent.  This allows us to be much
-		 * more aggressive in scaling the receive socket buffer.  For
-		 * the case that the buffer space is actually used to a large
-		 * extent and we run out of kernel memory we can simply drop
-		 * the new segments; TCP on the sender will just retransmit it
-		 * later.  Setting the buffer size too big may only consume too
-		 * much kernel memory if the application doesn't read() from
-		 * the socket or packet loss or reordering makes use of the
-		 * reassembly queue.
-		 *
-		 * The criteria to step up the receive buffer one notch are:
-		 *  1. the number of bytes received during the time it takes
-		 *     one timestamp to be reflected back to us (the RTT);
-		 *  2. received bytes per RTT is within seven eighth of the
-		 *     current socket buffer size;
-		 *  3. receive buffer size has not hit maximal automatic size;
-		 *
-		 * This algorithm does one step per RTT at most and only if
-		 * we receive a bulk stream w/o packet losses or reorderings.
-		 * Shrinking the buffer during idle times is not necessary as
-		 * it doesn't consume any memory when idle.
-		 *
-		 * TODO: Only step up if the application is actually serving
-		 * the buffer to better manage the socket buffer resources.
-		 */
-			if (V_tcp_do_autorcvbuf &&
-			    to.to_tsecr &&
-			    (so->so_rcv.sb_flags & SB_AUTOSIZE)) {
-				if (TSTMP_GT(to.to_tsecr, tp->rfbuf_ts) &&
-				    to.to_tsecr - tp->rfbuf_ts < hz) {
-					if (tp->rfbuf_cnt >
-					    (so->so_rcv.sb_hiwat / 8 * 7) &&
-					    so->so_rcv.sb_hiwat <
-					    V_tcp_autorcvbuf_max) {
-						newsize =
-						    min(so->so_rcv.sb_hiwat +
-						    V_tcp_autorcvbuf_inc,
-						    V_tcp_autorcvbuf_max);
-					}
-					/* Start over with next RTT. */
-					tp->rfbuf_ts = 0;
-					tp->rfbuf_cnt = 0;
-				} else
-					tp->rfbuf_cnt += tlen;	/* add up */
-			}
+			newsize = tcp_autorcvbuf(m, th, so, tp, tlen);
 
 			/* Add data to socket buffer. */
 			SOCKBUF_LOCK(&so->so_rcv);
@@ -1837,7 +1895,7 @@
 			}
 			/* NB: sorwakeup_locked() does an implicit unlock. */
 			sorwakeup_locked(so);
-			if (DELAY_ACK(tp)) {
+			if (DELAY_ACK(tp, tlen)) {
 				tp->t_flags |= TF_DELACK;
 			} else {
 				tp->t_flags |= TF_ACKNOW;
@@ -1858,10 +1916,6 @@
 		win = 0;
 	tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
 
-	/* Reset receive buffer auto scaling when not in bulk receive mode. */
-	tp->rfbuf_ts = 0;
-	tp->rfbuf_cnt = 0;
-
 	switch (tp->t_state) {
 
 	/*
@@ -1875,6 +1929,28 @@
 				rstreason = BANDLIM_RST_OPENPORT;
 				goto dropwithreset;
 		}
+#ifdef TCP_RFC7413
+		if (tp->t_flags & TF_FASTOPEN) {
+			/*
+			 * When a TFO connection is in SYN_RECEIVED, the
+			 * only valid packets are the initial SYN, a
+			 * retransmit/copy of the initial SYN (possibly with
+			 * a subset of the original data), a valid ACK, a
+			 * FIN, or a RST.
+			 */
+			if ((thflags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)) {
+				rstreason = BANDLIM_RST_OPENPORT;
+				goto dropwithreset;
+			} else if (thflags & TH_SYN) {
+				/* non-initial SYN is ignored */
+				if ((tcp_timer_active(tp, TT_DELACK) || 
+				     tcp_timer_active(tp, TT_REXMT)))
+					goto drop;
+			} else if (!(thflags & (TH_ACK|TH_FIN|TH_RST))) {
+				goto drop;
+			}
+		}
+#endif
 		break;
 
 	/*
@@ -1898,8 +1974,11 @@
 			rstreason = BANDLIM_UNLIMITED;
 			goto dropwithreset;
 		}
-		if ((thflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST))
+		if ((thflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) {
+			TCP_PROBE5(connect__refused, NULL, tp,
+			    mtod(m, const char *), tp, th);
 			tp = tcp_drop(tp, ECONNREFUSED);
+		}
 		if (thflags & TH_RST)
 			goto drop;
 		if (!(thflags & TH_SYN))
@@ -1925,7 +2004,7 @@
 			 * If there's data, delay ACK; if there's also a FIN
 			 * ACKNOW will be turned on later.
 			 */
-			if (DELAY_ACK(tp) && tlen != 0)
+			if (DELAY_ACK(tp, tlen) && tlen != 0)
 				tcp_timer_activate(tp, TT_DELACK,
 				    tcp_delacktime);
 			else
@@ -1944,11 +2023,13 @@
 			 */
 			tp->t_starttime = ticks;
 			if (tp->t_flags & TF_NEEDFIN) {
-				tp->t_state = TCPS_FIN_WAIT_1;
+				tcp_state_change(tp, TCPS_FIN_WAIT_1);
 				tp->t_flags &= ~TF_NEEDFIN;
 				thflags &= ~TH_SYN;
 			} else {
-				tp->t_state = TCPS_ESTABLISHED;
+				tcp_state_change(tp, TCPS_ESTABLISHED);
+				TCP_PROBE5(connect__established, NULL, tp,
+				    mtod(m, const char *), tp, th);
 				cc_conn_init(tp);
 				tcp_timer_activate(tp, TT_KEEP,
 				    TP_KEEPIDLE(tp));
@@ -1966,12 +2047,12 @@
 			 */
 			tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
 			tcp_timer_activate(tp, TT_REXMT, 0);
-			tp->t_state = TCPS_SYN_RECEIVED;
+			tcp_state_change(tp, TCPS_SYN_RECEIVED);
 		}
 
-		KASSERT(ti_locked == TI_WLOCKED, ("%s: trimthenstep6: "
+		KASSERT(ti_locked == TI_RLOCKED, ("%s: trimthenstep6: "
 		    "ti_locked %d", __func__, ti_locked));
-		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 		INP_WLOCK_ASSERT(tp->t_inpcb);
 
 		/*
@@ -2099,12 +2180,12 @@
 			case TCPS_CLOSE_WAIT:
 				so->so_error = ECONNRESET;
 			close:
-				KASSERT(ti_locked == TI_WLOCKED,
+				KASSERT(ti_locked == TI_RLOCKED,
 				    ("tcp_do_segment: TH_RST 1 ti_locked %d",
 				    ti_locked));
-				INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+				INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 
-				tp->t_state = TCPS_CLOSED;
+				tcp_state_change(tp, TCPS_CLOSED);
 				TCPSTAT_INC(tcps_drops);
 				tp = tcp_close(tp);
 				break;
@@ -2111,10 +2192,10 @@
 
 			case TCPS_CLOSING:
 			case TCPS_LAST_ACK:
-				KASSERT(ti_locked == TI_WLOCKED,
+				KASSERT(ti_locked == TI_RLOCKED,
 				    ("tcp_do_segment: TH_RST 2 ti_locked %d",
 				    ti_locked));
-				INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+				INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 
 				tp = tcp_close(tp);
 				break;
@@ -2168,11 +2249,7 @@
 
 	todrop = tp->rcv_nxt - th->th_seq;
 	if (todrop > 0) {
-		/*
-		 * If this is a duplicate SYN for our current connection,
-		 * advance over it and pretend and it's not a SYN.
-		 */
-		if (thflags & TH_SYN && th->th_seq == tp->irs) {
+		if (thflags & TH_SYN) {
 			thflags &= ~TH_SYN;
 			th->th_seq++;
 			if (th->th_urp > 1)
@@ -2222,15 +2299,14 @@
 	 */
 	if ((so->so_state & SS_NOFDREF) &&
 	    tp->t_state > TCPS_CLOSE_WAIT && tlen) {
-		char *s;
-
-		KASSERT(ti_locked == TI_WLOCKED, ("%s: SS_NOFDEREF && "
+		KASSERT(ti_locked == TI_RLOCKED, ("%s: SS_NOFDEREF && "
 		    "CLOSE_WAIT && tlen ti_locked %d", __func__, ti_locked));
-		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 
-		if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) {
-			log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data after socket "
-			    "was closed, sending RST and removing tcpcb\n",
+		if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
+			log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data "
+			    "after socket was closed, "
+			    "sending RST and removing tcpcb\n",
 			    s, __func__, tcpstates[tp->t_state], tlen);
 			free(s, M_TCPLOG);
 		}
@@ -2299,9 +2375,9 @@
 	 * error and we send an RST and drop the connection.
 	 */
 	if (thflags & TH_SYN) {
-		KASSERT(ti_locked == TI_WLOCKED,
+		KASSERT(ti_locked == TI_RLOCKED,
 		    ("tcp_do_segment: TH_SYN ti_locked %d", ti_locked));
-		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 
 		tp = tcp_drop(tp, ECONNRESET);
 		rstreason = BANDLIM_UNLIMITED;
@@ -2315,9 +2391,16 @@
 	 */
 	if ((thflags & TH_ACK) == 0) {
 		if (tp->t_state == TCPS_SYN_RECEIVED ||
-		    (tp->t_flags & TF_NEEDSYN))
+		    (tp->t_flags & TF_NEEDSYN)) {
+#ifdef TCP_RFC7413
+			if (tp->t_state == TCPS_SYN_RECEIVED &&
+			    tp->t_flags & TF_FASTOPEN) {
+				tp->snd_wnd = tiwin;
+				cc_conn_init(tp);
+			}
+#endif
 			goto step6;
-		else if (tp->t_flags & TF_ACKNOW)
+		} else if (tp->t_flags & TF_ACKNOW)
 			goto dropafterack;
 		else
 			goto drop;
@@ -2350,11 +2433,33 @@
 		 */
 		tp->t_starttime = ticks;
 		if (tp->t_flags & TF_NEEDFIN) {
-			tp->t_state = TCPS_FIN_WAIT_1;
+			tcp_state_change(tp, TCPS_FIN_WAIT_1);
 			tp->t_flags &= ~TF_NEEDFIN;
 		} else {
-			tp->t_state = TCPS_ESTABLISHED;
-			cc_conn_init(tp);
+			tcp_state_change(tp, TCPS_ESTABLISHED);
+			TCP_PROBE5(accept__established, NULL, tp,
+			    mtod(m, const char *), tp, th);
+#ifdef TCP_RFC7413
+			if (tp->t_tfo_pending) {
+				tcp_fastopen_decrement_counter(tp->t_tfo_pending);
+				tp->t_tfo_pending = NULL;
+
+				/*
+				 * Account for the ACK of our SYN prior to
+				 * regular ACK processing below.
+				 */ 
+				tp->snd_una++;
+			}
+			/*
+			 * TFO connections call cc_conn_init() during SYN
+			 * processing.  Calling it again here for such
+			 * connections is not harmless as it would undo the
+			 * snd_cwnd reduction that occurs when a TFO SYN|ACK
+			 * is retransmitted.
+			 */
+			if (!(tp->t_flags & TF_FASTOPEN))
+#endif
+				cc_conn_init(tp);
 			tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
 		}
 		/*
@@ -2388,13 +2493,21 @@
 		if ((tp->t_flags & TF_SACK_PERMIT) &&
 		    ((to.to_flags & TOF_SACK) ||
 		     !TAILQ_EMPTY(&tp->snd_holes)))
-			tcp_sack_doack(tp, &to, th->th_ack);
+			sack_changed = tcp_sack_doack(tp, &to, th->th_ack);
+		else
+			/*
+			 * Reset the value so that previous (valid) value
+			 * from the last ack with SACK doesn't get used.
+			 */
+			tp->sackhint.sacked_bytes = 0;
 
 		/* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */
 		hhook_run_tcp_est_in(tp, th, &to);
 
 		if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
-			if (tlen == 0 && tiwin == tp->snd_wnd) {
+			if (tlen == 0 &&
+			    (tiwin == tp->snd_wnd ||
+			    (tp->t_flags & TF_SACK_PERMIT))) {
 				TCPSTAT_INC(tcps_rcvdupack);
 				/*
 				 * If we have outstanding data (other than
@@ -2423,8 +2536,20 @@
 				 * When using TCP ECN, notify the peer that
 				 * we reduced the cwnd.
 				 */
-				if (!tcp_timer_active(tp, TT_REXMT) ||
-				    th->th_ack != tp->snd_una)
+				/*
+				 * Following 2 kinds of acks should not affect
+				 * dupack counting:
+				 * 1) Old acks
+				 * 2) Acks with SACK but without any new SACK
+				 * information in them. These could result from
+				 * any anomaly in the network like a switch
+				 * duplicating packets or a possible DoS attack.
+				 */
+				if (th->th_ack != tp->snd_una ||
+				    ((tp->t_flags & TF_SACK_PERMIT) &&
+				    !sack_changed))
+					break;
+				else if (!tcp_timer_active(tp, TT_REXMT))
 					tp->t_dupacks = 0;
 				else if (++tp->t_dupacks > tcprexmtthresh ||
 				     IN_FASTRECOVERY(tp->t_flags)) {
@@ -2439,8 +2564,12 @@
 						 * we have less than 1/2 the original window's
 						 * worth of data in flight.
 						 */
-						awnd = (tp->snd_nxt - tp->snd_fack) +
-							tp->sackhint.sack_bytes_rexmit;
+						if (V_tcp_do_rfc6675_pipe)
+							awnd = tcp_compute_pipe(tp);
+						else
+							awnd = (tp->snd_nxt - tp->snd_fack) +
+								tp->sackhint.sack_bytes_rexmit;
+
 						if (awnd < tp->snd_ssthresh) {
 							tp->snd_cwnd += tp->t_maxseg;
 							if (tp->snd_cwnd > tp->snd_ssthresh)
@@ -2518,10 +2647,21 @@
 						tp->snd_nxt = onxt;
 					goto drop;
 				} else if (V_tcp_do_rfc3042) {
+					/*
+					 * Process first and second duplicate
+					 * ACKs. Each indicates a segment
+					 * leaving the network, creating room
+					 * for more. Make sure we can send a
+					 * packet on reception of each duplicate
+					 * ACK by increasing snd_cwnd by one
+					 * segment. Restore the original
+					 * snd_cwnd after packet transmission.
+					 */
 					cc_ack_received(tp, th, CC_DUPACK);
 					u_long oldcwnd = tp->snd_cwnd;
 					tcp_seq oldsndmax = tp->snd_max;
 					u_int sent;
+					int avail;
 
 					KASSERT(tp->t_dupacks == 1 ||
 					    tp->t_dupacks == 2,
@@ -2543,7 +2683,17 @@
 						 */
 						break;
 					}
-					(void) tcp_output(tp);
+					/*
+					 * Only call tcp_output when there
+					 * is new data available to be sent.
+					 * Otherwise we would send pure ACKs.
+					 */
+					SOCKBUF_LOCK(&so->so_snd);
+					avail = so->so_snd.sb_cc -
+					    (tp->snd_nxt - tp->snd_una);
+					SOCKBUF_UNLOCK(&so->so_snd);
+					if (avail > 0)
+						(void) tcp_output(tp);
 					sent = tp->snd_max - oldsndmax;
 					if (sent > tp->t_maxseg) {
 						KASSERT((tp->t_dupacks == 2 &&
@@ -2558,9 +2708,20 @@
 					tp->snd_cwnd = oldcwnd;
 					goto drop;
 				}
-			} else
-				tp->t_dupacks = 0;
+			}
 			break;
+		} else {
+			/*
+			 * This ack is advancing the left edge, reset the
+			 * counter.
+			 */
+			tp->t_dupacks = 0;
+			/*
+			 * If this ack also has new SACK info, increment the
+			 * counter as per rfc6675.
+			 */
+			if ((tp->t_flags & TF_SACK_PERMIT) && sack_changed)
+				tp->t_dupacks++;
 		}
 
 		KASSERT(SEQ_GT(th->th_ack, tp->snd_una),
@@ -2579,7 +2740,6 @@
 			} else
 				cc_post_recovery(tp, th);
 		}
-		tp->t_dupacks = 0;
 		/*
 		 * If we reach this point, ACK is not a duplicate,
 		 *     i.e., it ACKs something we sent.
@@ -2606,6 +2766,9 @@
 		INP_WLOCK_ASSERT(tp->t_inpcb);
 
 		acked = BYTES_THIS_ACK(tp, th);
+		KASSERT(acked >= 0, ("%s: acked unexepectedly negative "
+		    "(tp->snd_una=%u, th->th_ack=%u, tp=%p, m=%p)", __func__,
+		    tp->snd_una, th->th_ack, tp, m));
 		TCPSTAT_INC(tcps_rcvackpack);
 		TCPSTAT_ADD(tcps_rcvackbyte, acked);
 
@@ -2675,16 +2838,24 @@
 
 		SOCKBUF_LOCK(&so->so_snd);
 		if (acked > so->so_snd.sb_cc) {
-			tp->snd_wnd -= so->so_snd.sb_cc;
-			sbdrop_locked(&so->so_snd, (int)so->so_snd.sb_cc);
+			if (tp->snd_wnd >= so->so_snd.sb_cc)
+				tp->snd_wnd -= so->so_snd.sb_cc;
+			else
+				tp->snd_wnd = 0;
+			mfree = sbcut_locked(&so->so_snd,
+			    (int)so->so_snd.sb_cc);
 			ourfinisacked = 1;
 		} else {
-			sbdrop_locked(&so->so_snd, acked);
-			tp->snd_wnd -= acked;
+			mfree = sbcut_locked(&so->so_snd, acked);
+			if (tp->snd_wnd >= (u_long) acked)
+				tp->snd_wnd -= acked;
+			else
+				tp->snd_wnd = 0;
 			ourfinisacked = 0;
 		}
 		/* NB: sowwakeup_locked() does an implicit unlock. */
 		sowwakeup_locked(so);
+		m_freem(mfree);
 		/* Detect una wraparound. */
 		if (!IN_RECOVERY(tp->t_flags) &&
 		    SEQ_GT(tp->snd_una, tp->snd_recover) &&
@@ -2730,7 +2901,7 @@
 					    tcp_finwait2_timeout :
 					    TP_MAXIDLE(tp)));
 				}
-				tp->t_state = TCPS_FIN_WAIT_2;
+				tcp_state_change(tp, TCPS_FIN_WAIT_2);
 			}
 			break;
 
@@ -2742,9 +2913,9 @@
 		 */
 		case TCPS_CLOSING:
 			if (ourfinisacked) {
-				INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+				INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 				tcp_twstart(tp);
-				INP_INFO_WUNLOCK(&V_tcbinfo);
+				INP_INFO_RUNLOCK(&V_tcbinfo);
 				m_freem(m);
 				return;
 			}
@@ -2758,7 +2929,7 @@
 		 */
 		case TCPS_LAST_ACK:
 			if (ourfinisacked) {
-				INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+				INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 				tp = tcp_close(tp);
 				goto drop;
 			}
@@ -2862,9 +3033,12 @@
 	 * case PRU_RCVD).  If a FIN has already been received on this
 	 * connection then we just ignore the text.
 	 */
-	if ((tlen || (thflags & TH_FIN)) &&
+	tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) &&
+		   (tp->t_flags & TF_FASTOPEN));
+	if ((tlen || (thflags & TH_FIN) || tfo_syn) &&
 	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
 		tcp_seq save_start = th->th_seq;
+
 		m_adj(m, drop_hdrlen);	/* delayed header drop */
 		/*
 		 * Insert segment which includes th into TCP reassembly queue
@@ -2880,8 +3054,9 @@
 		 */
 		if (th->th_seq == tp->rcv_nxt &&
 		    LIST_EMPTY(&tp->t_segq) &&
-		    TCPS_HAVEESTABLISHED(tp->t_state)) {
-			if (DELAY_ACK(tp))
+		    (TCPS_HAVEESTABLISHED(tp->t_state) ||
+		     tfo_syn)) {
+			if (DELAY_ACK(tp, tlen) || tfo_syn)
 				tp->t_flags |= TF_DELACK;
 			else
 				tp->t_flags |= TF_ACKNOW;
@@ -2956,7 +3131,7 @@
 			tp->t_starttime = ticks;
 			/* FALLTHROUGH */
 		case TCPS_ESTABLISHED:
-			tp->t_state = TCPS_CLOSE_WAIT;
+			tcp_state_change(tp, TCPS_CLOSE_WAIT);
 			break;
 
 		/*
@@ -2964,7 +3139,7 @@
 		 * enter the CLOSING state.
 		 */
 		case TCPS_FIN_WAIT_1:
-			tp->t_state = TCPS_CLOSING;
+			tcp_state_change(tp, TCPS_CLOSING);
 			break;
 
 		/*
@@ -2973,18 +3148,18 @@
 		 * standard timers.
 		 */
 		case TCPS_FIN_WAIT_2:
-			INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
-			KASSERT(ti_locked == TI_WLOCKED, ("%s: dodata "
+			INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+			KASSERT(ti_locked == TI_RLOCKED, ("%s: dodata "
 			    "TCP_FIN_WAIT_2 ti_locked: %d", __func__,
 			    ti_locked));
 
 			tcp_twstart(tp);
-			INP_INFO_WUNLOCK(&V_tcbinfo);
+			INP_INFO_RUNLOCK(&V_tcbinfo);
 			return;
 		}
 	}
-	if (ti_locked == TI_WLOCKED)
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+	if (ti_locked == TI_RLOCKED)
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 	ti_locked = TI_UNLOCKED;
 
 #ifdef TCPDEBUG
@@ -3039,8 +3214,8 @@
 		tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
 			  &tcp_savetcp, 0);
 #endif
-	if (ti_locked == TI_WLOCKED)
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+	if (ti_locked == TI_RLOCKED)
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 	ti_locked = TI_UNLOCKED;
 
 	tp->t_flags |= TF_ACKNOW;
@@ -3050,8 +3225,8 @@
 	return;
 
 dropwithreset:
-	if (ti_locked == TI_WLOCKED)
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+	if (ti_locked == TI_RLOCKED)
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 	ti_locked = TI_UNLOCKED;
 
 	if (tp != NULL) {
@@ -3062,8 +3237,8 @@
 	return;
 
 drop:
-	if (ti_locked == TI_WLOCKED) {
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+	if (ti_locked == TI_RLOCKED) {
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 		ti_locked = TI_UNLOCKED;
 	}
 #ifdef INVARIANTS
@@ -3234,6 +3409,21 @@
 			to->to_sacks = cp + 2;
 			TCPSTAT_INC(tcps_sack_rcv_blocks);
 			break;
+#ifdef TCP_RFC7413
+		case TCPOPT_FAST_OPEN:
+			if ((optlen != TCPOLEN_FAST_OPEN_EMPTY) &&
+			    (optlen < TCPOLEN_FAST_OPEN_MIN) &&
+			    (optlen > TCPOLEN_FAST_OPEN_MAX))
+				continue;
+			if (!(flags & TO_SYN))
+				continue;
+			if (!V_tcp_fastopen_enabled)
+				continue;
+			to->to_flags |= TOF_FASTOPEN;
+			to->to_tfo_len = optlen - 2;
+			to->to_tfo_cookie = to->to_tfo_len ? cp + 2 : NULL;
+			break;
+#endif
 		default:
 			continue;
 		}
@@ -3359,10 +3549,8 @@
 /*
  * Determine a reasonable value for maxseg size.
  * If the route is known, check route for mtu.
- * If none, use an mss that can be handled on the outgoing
- * interface without forcing IP to fragment; if bigger than
- * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES
- * to utilize large mbufs.  If no route is found, route has no mtu,
+ * If none, use an mss that can be handled on the outgoing interface
+ * without forcing IP to fragment.  If no route is found, route has no mtu,
  * or the destination isn't local, use a default, hopefully conservative
  * size (usually 512 or the default IP max size, but no more than the mtu
  * of the interface), as we can't discover anything about intervening
@@ -3383,7 +3571,7 @@
  */
 void
 tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer,
-    struct hc_metrics_lite *metricptr, int *mtuflags)
+    struct hc_metrics_lite *metricptr, struct tcp_ifcap *cap)
 {
 	int mss = 0;
 	u_long maxmtu = 0;
@@ -3410,7 +3598,7 @@
 	/* Initialize. */
 #ifdef INET6
 	if (isipv6) {
-		maxmtu = tcp_maxmtu6(&inp->inp_inc, mtuflags);
+		maxmtu = tcp_maxmtu6(&inp->inp_inc, cap);
 		tp->t_maxopd = tp->t_maxseg = V_tcp_v6mssdflt;
 	}
 #endif
@@ -3419,7 +3607,7 @@
 #endif
 #ifdef INET
 	{
-		maxmtu = tcp_maxmtu(&inp->inp_inc, mtuflags);
+		maxmtu = tcp_maxmtu(&inp->inp_inc, cap);
 		tp->t_maxopd = tp->t_maxseg = V_tcp_mssdflt;
 	}
 #endif
@@ -3543,13 +3731,6 @@
 	     (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP))
 		mss -= TCPOLEN_TSTAMP_APPA;
 
-#if	(MCLBYTES & (MCLBYTES - 1)) == 0
-	if (mss > MCLBYTES)
-		mss &= ~(MCLBYTES-1);
-#else
-	if (mss > MCLBYTES)
-		mss = mss / MCLBYTES * MCLBYTES;
-#endif
 	tp->t_maxseg = mss;
 }
 
@@ -3561,12 +3742,13 @@
 	struct inpcb *inp;
 	struct socket *so;
 	struct hc_metrics_lite metrics;
-	int mtuflags = 0;
+	struct tcp_ifcap cap;
 
 	KASSERT(tp != NULL, ("%s: tp == NULL", __func__));
-	
-	tcp_mss_update(tp, offer, -1, &metrics, &mtuflags);
 
+	bzero(&cap, sizeof(cap));
+	tcp_mss_update(tp, offer, -1, &metrics, &cap);
+
 	mss = tp->t_maxseg;
 	inp = tp->t_inpcb;
 
@@ -3579,7 +3761,7 @@
 	 */
 	so = inp->inp_socket;
 	SOCKBUF_LOCK(&so->so_snd);
-	if ((so->so_snd.sb_hiwat == tcp_sendspace) && metrics.rmx_sendpipe)
+	if ((so->so_snd.sb_hiwat == V_tcp_sendspace) && metrics.rmx_sendpipe)
 		bufsize = metrics.rmx_sendpipe;
 	else
 		bufsize = so->so_snd.sb_hiwat;
@@ -3596,7 +3778,7 @@
 	tp->t_maxseg = mss;
 
 	SOCKBUF_LOCK(&so->so_rcv);
-	if ((so->so_rcv.sb_hiwat == tcp_recvspace) && metrics.rmx_recvpipe)
+	if ((so->so_rcv.sb_hiwat == V_tcp_recvspace) && metrics.rmx_recvpipe)
 		bufsize = metrics.rmx_recvpipe;
 	else
 		bufsize = so->so_rcv.sb_hiwat;
@@ -3610,8 +3792,12 @@
 	SOCKBUF_UNLOCK(&so->so_rcv);
 
 	/* Check the interface for TSO capabilities. */
-	if (mtuflags & CSUM_TSO)
+	if (cap.ifcap & CSUM_TSO) {
 		tp->t_flags |= TF_TSO;
+		tp->t_tsomax = cap.tsomax;
+		tp->t_tsomaxsegcount = cap.tsomaxsegcount;
+		tp->t_tsomaxsegsize = cap.tsomaxsegsize;
+	}
 }
 
 /*
@@ -3694,3 +3880,11 @@
 		tp->snd_cwnd = 0;
 	tp->snd_cwnd += tp->t_maxseg;
 }
+
+int
+tcp_compute_pipe(struct tcpcb *tp)
+{
+	return (tp->snd_max - tp->snd_una +
+		tp->sackhint.sack_bytes_rexmit -
+		tp->sackhint.sacked_bytes);
+}

Modified: trunk/sys/netinet/tcp_lro.c
===================================================================
--- trunk/sys/netinet/tcp_lro.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_lro.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_lro.c 247470 2013-02-28 16:32:36Z gallatin $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_lro.c 305189 2016-09-01 08:01:13Z sephe $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
@@ -41,6 +41,7 @@
 #include <sys/mbuf.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
+#include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
@@ -54,6 +55,7 @@
 #include <netinet/ip_var.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_lro.h>
+#include <netinet/tcp_var.h>
 
 #include <netinet6/ip6_var.h>
 
@@ -68,6 +70,14 @@
 #define	TCP_LRO_INVALID_CSUM	0x0000
 #endif
 
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, lro,  CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+    "TCP LRO");
+
+static unsigned	tcp_lro_entries = LRO_ENTRIES;
+SYSCTL_UINT(_net_inet_tcp_lro, OID_AUTO, entries,
+    CTLFLAG_RDTUN | CTLFLAG_MPSAFE, &tcp_lro_entries, 0,
+    "default number of LRO entries");
+
 int
 tcp_lro_init(struct lro_ctrl *lc)
 {
@@ -82,7 +92,7 @@
 	SLIST_INIT(&lc->lro_active);
 
 	error = 0;
-	for (i = 0; i < LRO_ENTRIES; i++) {
+	for (i = 0; i < tcp_lro_entries; i++) {
 		le = (struct lro_entry *)malloc(sizeof(*le), M_DEVBUF,
 		    M_NOWAIT | M_ZERO);
                 if (le == NULL) {
@@ -195,6 +205,25 @@
 #endif
 
 void
+tcp_lro_flush_inactive(struct lro_ctrl *lc, const struct timeval *timeout)
+{
+	struct lro_entry *le, *le_tmp;
+	struct timeval tv;
+
+	if (SLIST_EMPTY(&lc->lro_active))
+		return;
+
+	getmicrotime(&tv);
+	timevalsub(&tv, timeout);
+	SLIST_FOREACH_SAFE(le, &lc->lro_active, next, le_tmp) {
+		if (timevalcmp(&tv, &le->mtime, >=)) {
+			SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
+			tcp_lro_flush(lc, le);
+		}
+	}
+}
+
+void
 tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le)
 {
 
@@ -364,6 +393,7 @@
 	tcp_seq seq;
 	int error, ip_len, l;
 	uint16_t eh_type, tcp_data_len;
+	int force_flush = 0;
 
 	/* We expect a contiguous header [eh, ip, tcp]. */
 
@@ -430,10 +460,17 @@
 	 * Check TCP header constraints.
 	 */
 	/* Ensure no bits set besides ACK or PSH. */
-	if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
-		return (TCP_LRO_CANNOT);
+	if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0) {
+		if (th->th_flags & TH_SYN)
+			return (TCP_LRO_CANNOT);
+		/*
+		 * Make sure that previously seen segements/ACKs are delivered
+		 * before this segement, e.g. FIN.
+		 */
+		force_flush = 1;
+	}
 
-	/* XXX-BZ We lose a AKC|PUSH flag concatinating multiple segments. */
+	/* XXX-BZ We lose a ACK|PUSH flag concatenating multiple segments. */
 	/* XXX-BZ Ideally we'd flush on PUSH? */
 
 	/*
@@ -447,8 +484,13 @@
 	ts_ptr = (uint32_t *)(th + 1);
 	if (l != 0 && (__predict_false(l != TCPOLEN_TSTAMP_APPA) ||
 	    (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
-	    TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP))))
-		return (TCP_LRO_CANNOT);
+	    TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))) {
+		/*
+		 * Make sure that previously seen segements/ACKs are delivered
+		 * before this segement.
+		 */
+		force_flush = 1;
+	}
 
 	/* If the driver did not pass in the checksum, set it now. */
 	if (csum == 0x0000)
@@ -482,6 +524,13 @@
 #endif
 		}
 
+		if (force_flush) {
+			/* Timestamps mismatch; this is a FIN, etc */
+			SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
+			tcp_lro_flush(lc, le);
+			return (TCP_LRO_CANNOT);
+		}
+
 		/* Flush now if appending will result in overflow. */
 		if (le->p_len > (65535 - tcp_data_len)) {
 			SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
@@ -544,19 +593,29 @@
 		if (le->p_len > (65535 - lc->ifp->if_mtu)) {
 			SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
 			tcp_lro_flush(lc, le);
-		}
+		} else
+			getmicrotime(&le->mtime);
 
 		return (0);
 	}
 
+	if (force_flush) {
+		/*
+		 * Nothing to flush, but this segment can not be further
+		 * aggregated/delayed.
+		 */
+		return (TCP_LRO_CANNOT);
+	}
+
 	/* Try to find an empty slot. */
 	if (SLIST_EMPTY(&lc->lro_free))
-		return (TCP_LRO_CANNOT);
+		return (TCP_LRO_NO_ENTRIES);
 
 	/* Start a new segment chain. */
 	le = SLIST_FIRST(&lc->lro_free);
 	SLIST_REMOVE_HEAD(&lc->lro_free, next);
 	SLIST_INSERT_HEAD(&lc->lro_active, le, next);
+	getmicrotime(&le->mtime);
 
 	/* Start filling in details. */
 	switch (eh_type) {

Modified: trunk/sys/netinet/tcp_lro.h
===================================================================
--- trunk/sys/netinet/tcp_lro.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_lro.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -25,12 +25,14 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/netinet/tcp_lro.h 238230 2012-07-08 11:24:22Z bz $
+ * $FreeBSD: stable/10/sys/netinet/tcp_lro.h 301949 2016-06-16 04:21:27Z sephe $
  */
 
 #ifndef _TCP_LRO_H_
 #define _TCP_LRO_H_
 
+#include <sys/time.h>
+
 struct lro_entry
 {
 	SLIST_ENTRY(lro_entry)	next;
@@ -60,6 +62,7 @@
 	uint32_t		tsecr;
 	uint16_t		window;
 	uint16_t		timestamp;	/* flag, not a TCP hdr field. */
+	struct timeval		mtime;
 };
 SLIST_HEAD(lro_head, lro_entry);
 
@@ -84,9 +87,11 @@
 
 int tcp_lro_init(struct lro_ctrl *);
 void tcp_lro_free(struct lro_ctrl *);
+void tcp_lro_flush_inactive(struct lro_ctrl *, const struct timeval *);
 void tcp_lro_flush(struct lro_ctrl *, struct lro_entry *);
 int tcp_lro_rx(struct lro_ctrl *, struct mbuf *, uint32_t);
 
+#define	TCP_LRO_NO_ENTRIES	-2
 #define	TCP_LRO_CANNOT		-1
 #define	TCP_LRO_NOT_SUPPORTED	1
 

Modified: trunk/sys/netinet/tcp_offload.c
===================================================================
--- trunk/sys/netinet/tcp_offload.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_offload.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,146 +1,177 @@
 /* $MidnightBSD$ */
 /*-
- * Copyright (c) 2007, Chelsio Inc.
+ * Copyright (c) 2012 Chelsio Communications, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
  *
- * 1. Redistributions of source code must retain the above copyright notice,
- *    this list of conditions and the following disclaimer.
- *
- * 2. Neither the name of the Chelsio Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_offload.c 196019 2009-08-01 19:26:27Z rwatson $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_offload.c 237263 2012-06-19 07:34:13Z np $");
 
+#include "opt_inet.h"
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/types.h>
-#include <sys/malloc.h>
-#include <sys/kernel.h>
-#include <sys/sysctl.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
-
+#include <sys/sockopt.h>
 #include <net/if.h>
-#include <net/if_types.h>
-#include <net/if_var.h>
 #include <net/route.h>
-#include <net/vnet.h>
-
 #include <netinet/in.h>
-#include <netinet/in_systm.h>
 #include <netinet/in_pcb.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_offload.h>
-#include <netinet/toedev.h>
+#define	TCPOUTFLAGS
+#include <netinet/tcp_fsm.h>
+#include <netinet/toecore.h>
 
-uint32_t toedev_registration_count;
+int registered_toedevs;
 
+/*
+ * Provide an opportunity for a TOE driver to offload.
+ */
 int
 tcp_offload_connect(struct socket *so, struct sockaddr *nam)
 {
 	struct ifnet *ifp;
-	struct toedev *tdev;
+	struct toedev *tod;
 	struct rtentry *rt;
-	int error;
+	int error = EOPNOTSUPP;
 
-	if (toedev_registration_count == 0)
-		return (EINVAL);
-	
-	/*
-	 * Look up the route used for the connection to 
-	 * determine if it uses an interface capable of
-	 * offloading the connection.
-	 */
-	rt = rtalloc1(nam, 0 /*report*/, 0 /*ignflags*/);
-	if (rt) 
+	INP_WLOCK_ASSERT(sotoinpcb(so));
+	KASSERT(nam->sa_family == AF_INET || nam->sa_family == AF_INET6,
+	    ("%s: called with sa_family %d", __func__, nam->sa_family));
+
+	if (registered_toedevs == 0)
+		return (error);
+
+	rt = rtalloc1(nam, 0, 0);
+	if (rt)
 		RT_UNLOCK(rt);
-	else 
+	else
 		return (EHOSTUNREACH);
 
 	ifp = rt->rt_ifp;
-	if ((ifp->if_capenable & IFCAP_TOE) == 0) {
-		error = EINVAL;
-		goto fail;
-	}
-	
-	tdev = TOEDEV(ifp);
-	if (tdev == NULL) {
-		error = EPERM;
-		goto fail;
-	}
-	
-	if (tdev->tod_can_offload(tdev, so) == 0) {
-		error = EPERM;
-		goto fail;
-	}
-	
-	return (tdev->tod_connect(tdev, so, rt, nam));
-fail:
+
+	if (nam->sa_family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4))
+		goto done;
+	if (nam->sa_family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6))
+		goto done;
+
+	tod = TOEDEV(ifp);
+	if (tod != NULL)
+		error = tod->tod_connect(tod, so, rt, nam);
+done:
 	RTFREE(rt);
 	return (error);
 }
 
+void
+tcp_offload_listen_start(struct tcpcb *tp)
+{
 
-/*
- * This file contains code as a short-term staging area before it is moved in 
- * to sys/netinet/tcp_offload.c
- */
+	INP_WLOCK_ASSERT(tp->t_inpcb);
 
+	EVENTHANDLER_INVOKE(tcp_offload_listen_start, tp);
+}
+
 void
-tcp_offload_twstart(struct tcpcb *tp)
+tcp_offload_listen_stop(struct tcpcb *tp)
 {
 
-	INP_INFO_WLOCK(&V_tcbinfo);
-	INP_WLOCK(tp->t_inpcb);
-	tcp_twstart(tp);
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp);
 }
 
-struct tcpcb *
-tcp_offload_close(struct tcpcb *tp)
+void
+tcp_offload_input(struct tcpcb *tp, struct mbuf *m)
 {
+	struct toedev *tod = tp->tod;
 
-	INP_INFO_WLOCK(&V_tcbinfo);
-	INP_WLOCK(tp->t_inpcb);
-	tp = tcp_close(tp);
-	INP_INFO_WUNLOCK(&V_tcbinfo);
-	if (tp)
-		INP_WUNLOCK(tp->t_inpcb);
+	KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+	INP_WLOCK_ASSERT(tp->t_inpcb);
 
-	return (tp);
+	tod->tod_input(tod, tp, m);
 }
 
-struct tcpcb *
-tcp_offload_drop(struct tcpcb *tp, int error)
+int
+tcp_offload_output(struct tcpcb *tp)
 {
+	struct toedev *tod = tp->tod;
+	int error, flags;
 
-	INP_INFO_WLOCK(&V_tcbinfo);
-	INP_WLOCK(tp->t_inpcb);
-	tp = tcp_drop(tp, error);
-	INP_INFO_WUNLOCK(&V_tcbinfo);
-	if (tp)
-		INP_WUNLOCK(tp->t_inpcb);
+	KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+	INP_WLOCK_ASSERT(tp->t_inpcb);
 
-	return (tp);
+	flags = tcp_outflags[tp->t_state];
+
+	if (flags & TH_RST) {
+		/* XXX: avoid repeated calls like we do for FIN */
+		error = tod->tod_send_rst(tod, tp);
+	} else if ((flags & TH_FIN || tp->t_flags & TF_NEEDFIN) &&
+	    (tp->t_flags & TF_SENTFIN) == 0) {
+		error = tod->tod_send_fin(tod, tp);
+		if (error == 0)
+			tp->t_flags |= TF_SENTFIN;
+	} else
+		error = tod->tod_output(tod, tp);
+
+	return (error);
 }
 
+void
+tcp_offload_rcvd(struct tcpcb *tp)
+{
+	struct toedev *tod = tp->tod;
+
+	KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	tod->tod_rcvd(tod, tp);
+}
+
+void
+tcp_offload_ctloutput(struct tcpcb *tp, int sopt_dir, int sopt_name)
+{
+	struct toedev *tod = tp->tod;
+
+	KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	tod->tod_ctloutput(tod, tp, sopt_dir, sopt_name);
+}
+
+void
+tcp_offload_detach(struct tcpcb *tp)
+{
+	struct toedev *tod = tp->tod;
+
+	KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	tod->tod_pcb_detach(tod, tp);
+}

Modified: trunk/sys/netinet/tcp_offload.h
===================================================================
--- trunk/sys/netinet/tcp_offload.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_offload.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,31 +1,31 @@
 /* $MidnightBSD$ */
 /*-
- * Copyright (c) 2007, Chelsio Inc.
+ * Copyright (c) 2012 Chelsio Communications, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
  *
- * 1. Redistributions of source code must retain the above copyright notice,
- *    this list of conditions and the following disclaimer.
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
  *
- * 2. Neither the name of the Chelsio Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
+ * $FreeBSD: stable/10/sys/netinet/tcp_offload.h 237263 2012-06-19 07:34:13Z np $
  *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD: stable/9/sys/netinet/tcp_offload.h 218909 2011-02-21 09:01:34Z brucec $
  */
 
 #ifndef _NETINET_TCP_OFFLOAD_H_
@@ -35,321 +35,15 @@
 #error "no user-serviceable parts inside"
 #endif
 
-/*
- * A driver publishes that it provides offload services
- * by setting IFCAP_TOE in the ifnet. The offload connect
- * will bypass any further work if the interface that a
- * connection would use does not support TCP offload.
- *
- * The TOE API assumes that the tcp offload engine can offload the 
- * the entire connection from set up to teardown, with some provision 
- * being made to allowing the software stack to handle time wait. If
- * the device does not meet these criteria, it is the driver's responsibility
- * to overload the functions that it needs to in tcp_usrreqs and make
- * its own calls to tcp_output if it needs to do so.
- *
- * There is currently no provision for the device advertising the congestion
- * control algorithms it supports as there is currently no API for querying 
- * an operating system for the protocols that it has loaded. This is a desirable
- * future extension.
- *
- *
- *
- * It is assumed that individuals deploying TOE will want connections
- * to be offloaded without software changes so all connections on an
- * interface providing TOE are offloaded unless the SO_NO_OFFLOAD 
- * flag is set on the socket.
- *
- *
- * The toe_usrreqs structure constitutes the TOE driver's 
- * interface to the TCP stack for functionality that doesn't
- * interact directly with userspace. If one wants to provide
- * (optional) functionality to do zero-copy to/from
- * userspace one still needs to override soreceive/sosend 
- * with functions that fault in and pin the user buffers.
- *
- * + tu_send
- *   - tells the driver that new data may have been added to the 
- *     socket's send buffer - the driver should not fail if the
- *     buffer is in fact unchanged
- *   - the driver is responsible for providing credits (bytes in the send window)
- *     back to the socket by calling sbdrop() as segments are acknowledged.
- *   - The driver expects the inpcb lock to be held - the driver is expected
- *     not to drop the lock. Hence the driver is not allowed to acquire the
- *     pcbinfo lock during this call.
- *
- * + tu_rcvd
- *   - returns credits to the driver and triggers window updates
- *     to the peer (a credit as used here is a byte in the peer's receive window)
- *   - the driver is expected to determine how many bytes have been 
- *     consumed and credit that back to the card so that it can grow
- *     the window again by maintaining its own state between invocations.
- *   - In principle this could be used to shrink the window as well as
- *     grow the window, although it is not used for that now.
- *   - this function needs to correctly handle being called any number of
- *     times without any bytes being consumed from the receive buffer.
- *   - The driver expects the inpcb lock to be held - the driver is expected
- *     not to drop the lock. Hence the driver is not allowed to acquire the
- *     pcbinfo lock during this call.
- *
- * + tu_disconnect
- *   - tells the driver to send FIN to peer
- *   - driver is expected to send the remaining data and then do a clean half close
- *   - disconnect implies at least half-close so only send, reset, and detach
- *     are legal
- *   - the driver is expected to handle transition through the shutdown
- *     state machine and allow the stack to support SO_LINGER.
- *   - The driver expects the inpcb lock to be held - the driver is expected
- *     not to drop the lock. Hence the driver is not allowed to acquire the
- *     pcbinfo lock during this call.
- *
- * + tu_reset
- *   - closes the connection and sends a RST to peer
- *   - driver is expectd to trigger an RST and detach the toepcb
- *   - no further calls are legal after reset
- *   - The driver expects the inpcb lock to be held - the driver is expected
- *     not to drop the lock. Hence the driver is not allowed to acquire the
- *     pcbinfo lock during this call.
- *
- *   The following fields in the tcpcb are expected to be referenced by the driver:
- *	+ iss
- *	+ rcv_nxt
- *	+ rcv_wnd
- *	+ snd_isn
- *	+ snd_max
- *	+ snd_nxt
- *	+ snd_una
- *	+ t_flags
- *	+ t_inpcb
- *	+ t_maxseg
- *	+ t_toe
- *
- *   The following fields in the inpcb are expected to be referenced by the driver:
- *	+ inp_lport
- *	+ inp_fport
- *	+ inp_laddr
- *	+ inp_fport
- *	+ inp_socket
- *	+ inp_ip_tos
- *
- *   The following fields in the socket are expected to be referenced by the
- *   driver:
- *	+ so_comp
- *	+ so_error
- *	+ so_linger
- *	+ so_options
- *	+ so_rcv
- *	+ so_snd
- *	+ so_state
- *	+ so_timeo
- *
- *   These functions all return 0 on success and can return the following errors
- *   as appropriate:
- *	+ EPERM:
- *	+ ENOBUFS: memory allocation failed
- *	+ EMSGSIZE: MTU changed during the call
- *	+ EHOSTDOWN:
- *	+ EHOSTUNREACH:
- *	+ ENETDOWN:
- *	* ENETUNREACH: the peer is no longer reachable
- *
- * + tu_detach
- *   - tells driver that the socket is going away so disconnect
- *     the toepcb and free appropriate resources
- *   - allows the driver to cleanly handle the case of connection state
- *     outliving the socket
- *   - no further calls are legal after detach
- *   - the driver is expected to provide its own synchronization between
- *     detach and receiving new data.
- * 
- * + tu_syncache_event
- *   - even if it is not actually needed, the driver is expected to
- *     call syncache_add for the initial SYN and then syncache_expand
- *     for the SYN,ACK
- *   - tells driver that a connection either has not been added or has 
- *     been dropped from the syncache
- *   - the driver is expected to maintain state that lives outside the 
- *     software stack so the syncache needs to be able to notify the
- *     toe driver that the software stack is not going to create a connection
- *     for a received SYN
- *   - The driver is responsible for any synchronization required between
- *     the syncache dropping an entry and the driver processing the SYN,ACK.
- * 
- */
-struct toe_usrreqs {
-	int (*tu_send)(struct tcpcb *tp);
-	int (*tu_rcvd)(struct tcpcb *tp);
-	int (*tu_disconnect)(struct tcpcb *tp);
-	int (*tu_reset)(struct tcpcb *tp);
-	void (*tu_detach)(struct tcpcb *tp);
-	void (*tu_syncache_event)(int event, void *toep);
-};
+extern int registered_toedevs;
 
-/*
- * Proxy for struct tcpopt between TOE drivers and TCP functions.
- */
-struct toeopt {
-	u_int64_t	to_flags;	/* see tcpopt in tcp_var.h */
-	u_int16_t	to_mss;		/* maximum segment size */
-	u_int8_t	to_wscale;	/* window scaling */
+int  tcp_offload_connect(struct socket *, struct sockaddr *);
+void tcp_offload_listen_start(struct tcpcb *);
+void tcp_offload_listen_stop(struct tcpcb *);
+void tcp_offload_input(struct tcpcb *, struct mbuf *);
+int  tcp_offload_output(struct tcpcb *);
+void tcp_offload_rcvd(struct tcpcb *);
+void tcp_offload_ctloutput(struct tcpcb *, int, int);
+void tcp_offload_detach(struct tcpcb *);
 
-	u_int8_t	_pad1;		/* explicit pad for 64bit alignment */
-	u_int32_t	_pad2;		/* explicit pad for 64bit alignment */
-	u_int64_t	_pad3[4];	/* TBD */
-};
-
-#define	TOE_SC_ENTRY_PRESENT		1	/* 4-tuple already present */
-#define	TOE_SC_DROP			2	/* connection was timed out */
-
-/*
- * Because listen is a one-to-many relationship (a socket can be listening 
- * on all interfaces on a machine some of which may be using different TCP
- * offload devices), listen uses a publish/subscribe mechanism. The TCP
- * offload driver registers a listen notification function with the stack.
- * When a listen socket is created all TCP offload devices are notified
- * so that they can do the appropriate set up to offload connections on the
- * port to which the socket is bound. When the listen socket is closed,
- * the offload devices are notified so that they will stop listening on that
- * port and free any associated resources as well as sending RSTs on any
- * connections in the SYN_RCVD state.
- *
- */
-
-typedef	void	(*tcp_offload_listen_start_fn)(void *, struct tcpcb *);
-typedef	void	(*tcp_offload_listen_stop_fn)(void *, struct tcpcb *);
-
-EVENTHANDLER_DECLARE(tcp_offload_listen_start, tcp_offload_listen_start_fn);
-EVENTHANDLER_DECLARE(tcp_offload_listen_stop, tcp_offload_listen_stop_fn);
-
-/*
- * Check if the socket can be offloaded by the following steps:
- * - determine the egress interface
- * - check the interface for TOE capability and TOE is enabled
- * - check if the device has resources to offload the connection
- */
-int	tcp_offload_connect(struct socket *so, struct sockaddr *nam);
-
-/*
- * The tcp_output_* routines are wrappers around the toe_usrreqs calls
- * which trigger packet transmission. In the non-offloaded case they
- * translate to tcp_output. The tcp_offload_* routines notify TOE
- * of specific events. I the non-offloaded case they are no-ops.
- *
- * Listen is a special case because it is a 1 to many relationship
- * and there can be more than one offload driver in the system.
- */
-
-/*
- * Connection is offloaded
- */
-#define	tp_offload(tp)		((tp)->t_flags & TF_TOE)
-
-/*
- * hackish way of allowing this file to also be included by TOE
- * which needs to be kept ignorant of socket implementation details
- */
-#ifdef _SYS_SOCKETVAR_H_
-/*
- * The socket has not been marked as "do not offload"
- */
-#define	SO_OFFLOADABLE(so)	((so->so_options & SO_NO_OFFLOAD) == 0)
-
-static __inline int
-tcp_output_connect(struct socket *so, struct sockaddr *nam)
-{
-	struct tcpcb *tp = sototcpcb(so);
-	int error;
-
-	/*
-	 * If offload has been disabled for this socket or the 
-	 * connection cannot be offloaded just call tcp_output
-	 * to start the TCP state machine.
-	 */
-#ifndef TCP_OFFLOAD_DISABLE	
-	if (!SO_OFFLOADABLE(so) || (error = tcp_offload_connect(so, nam)) != 0)
-#endif		
-		error = tcp_output(tp);
-	return (error);
-}
-
-static __inline int
-tcp_output_send(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
-	if (tp_offload(tp))
-		return (tp->t_tu->tu_send(tp));
 #endif
-	return (tcp_output(tp));
-}
-
-static __inline int
-tcp_output_rcvd(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
-	if (tp_offload(tp))
-		return (tp->t_tu->tu_rcvd(tp));
-#endif
-	return (tcp_output(tp));
-}
-
-static __inline int
-tcp_output_disconnect(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
-	if (tp_offload(tp))
-		return (tp->t_tu->tu_disconnect(tp));
-#endif
-	return (tcp_output(tp));
-}
-
-static __inline int
-tcp_output_reset(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
-	if (tp_offload(tp))
-		return (tp->t_tu->tu_reset(tp));
-#endif
-	return (tcp_output(tp));
-}
-
-static __inline void
-tcp_offload_detach(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
-	if (tp_offload(tp))
-		tp->t_tu->tu_detach(tp);
-#endif	
-}
-
-static __inline void
-tcp_offload_listen_open(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
-	if (SO_OFFLOADABLE(tp->t_inpcb->inp_socket))
-		EVENTHANDLER_INVOKE(tcp_offload_listen_start, tp);
-#endif	
-}
-
-static __inline void
-tcp_offload_listen_close(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
-	EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp);
-#endif	
-}
-#undef SO_OFFLOADABLE
-#endif /* _SYS_SOCKETVAR_H_ */
-#undef tp_offload
-
-void tcp_offload_twstart(struct tcpcb *tp);
-struct tcpcb *tcp_offload_close(struct tcpcb *tp);
-struct tcpcb *tcp_offload_drop(struct tcpcb *tp, int error);
-
-#endif /* _NETINET_TCP_OFFLOAD_H_ */

Modified: trunk/sys/netinet/tcp_output.c
===================================================================
--- trunk/sys/netinet/tcp_output.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_output.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,11 +31,12 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_output.c 240307 2012-09-10 11:43:28Z glebius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_output.c 317375 2017-04-24 16:31:28Z smh $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
+#include "opt_kdtrace.h"
 #include "opt_tcpdebug.h"
 
 #include <sys/param.h>
@@ -47,6 +48,7 @@
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/protosw.h>
+#include <sys/sdt.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
@@ -57,6 +59,7 @@
 
 #include <netinet/cc.h>
 #include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
@@ -67,6 +70,9 @@
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #endif
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
 #define	TCPOUTFLAGS
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
@@ -76,6 +82,9 @@
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif
+#ifdef TCP_OFFLOAD
+#include <netinet/tcp_offload.h>
+#endif
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
@@ -90,16 +99,6 @@
 	&VNET_NAME(path_mtu_discovery), 1,
 	"Enable Path MTU Discovery");
 
-VNET_DEFINE(int, ss_fltsz) = 1;
-SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, slowstart_flightsize, CTLFLAG_RW,
-	&VNET_NAME(ss_fltsz), 1,
-	"Slow start flight size");
-
-VNET_DEFINE(int, ss_fltsz_local) = 4;
-SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, local_slowstart_flightsize,
-	CTLFLAG_RW, &VNET_NAME(ss_fltsz_local), 1,
-	"Slow start flight size for local networks");
-
 VNET_DEFINE(int, tcp_do_tso) = 1;
 #define	V_tcp_do_tso		VNET(tcp_do_tso)
 SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_RW,
@@ -106,6 +105,11 @@
 	&VNET_NAME(tcp_do_tso), 0,
 	"Enable TCP Segmentation Offload");
 
+VNET_DEFINE(int, tcp_sendspace) = 1024*32;
+#define	V_tcp_sendspace	VNET(tcp_sendspace)
+SYSCTL_VNET_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW,
+	&VNET_NAME(tcp_sendspace), 0, "Initial send socket buffer size");
+
 VNET_DEFINE(int, tcp_do_autosndbuf) = 1;
 #define	V_tcp_do_autosndbuf	VNET(tcp_do_autosndbuf)
 SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto, CTLFLAG_RW,
@@ -124,6 +128,16 @@
 	&VNET_NAME(tcp_autosndbuf_max), 0,
 	"Max size of automatic send buffer");
 
+/*
+ * Make sure that either retransmit or persist timer is set for SYN, FIN and
+ * non-ACK.
+ */
+#define TCP_XMIT_TIMER_ASSERT(tp, len, th_flags)			\
+	KASSERT(((len) == 0 && ((th_flags) & (TH_SYN | TH_FIN)) == 0) ||\
+	    tcp_timer_active((tp), TT_REXMT) ||				\
+	    tcp_timer_active((tp), TT_PERSIST),				\
+	    ("neither rexmt nor persist timer is set"))
+
 static void inline	hhook_run_tcp_est_out(struct tcpcb *tp,
 			    struct tcphdr *th, struct tcpopt *to,
 			    long len, int tso);
@@ -130,7 +144,7 @@
 static void inline	cc_after_idle(struct tcpcb *tp);
 
 /*
- * Wrapper for the TCP established ouput helper hook.
+ * Wrapper for the TCP established output helper hook.
  */
 static void inline
 hhook_run_tcp_est_out(struct tcpcb *tp, struct tcphdr *th,
@@ -197,7 +211,23 @@
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
+#ifdef TCP_OFFLOAD
+	if (tp->t_flags & TF_TOE)
+		return (tcp_offload_output(tp));
+#endif
+
+#ifdef TCP_RFC7413
 	/*
+	 * For TFO connections in SYN_RECEIVED, only allow the initial
+	 * SYN|ACK and those sent by the retransmit timer.
+	 */
+	if ((tp->t_flags & TF_FASTOPEN) &&
+	    (tp->t_state == TCPS_SYN_RECEIVED) &&
+	    SEQ_GT(tp->snd_max, tp->snd_una) &&    /* inital SYN|ACK sent */
+	    (tp->snd_nxt != tp->snd_una))          /* not a retransmit */
+		return (0);
+#endif
+	/*
 	 * Determine length of data that should be transmitted,
 	 * and flags that will be used.
 	 * If there is some data or critical controls (SYN, RST)
@@ -382,6 +412,15 @@
 	if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
 		if (tp->t_state != TCPS_SYN_RECEIVED)
 			flags &= ~TH_SYN;
+#ifdef TCP_RFC7413
+		/*
+		 * When sending additional segments following a TFO SYN|ACK,
+		 * do not include the SYN bit.
+		 */
+		if ((tp->t_flags & TF_FASTOPEN) &&
+		    (tp->t_state == TCPS_SYN_RECEIVED))
+			flags &= ~TH_SYN;
+#endif
 		off--, len++;
 	}
 
@@ -395,7 +434,18 @@
 		flags &= ~TH_FIN;
 	}
 
-	if (len < 0) {
+#ifdef TCP_RFC7413
+	/*
+	 * When retransmitting SYN|ACK on a passively-created TFO socket,
+	 * don't include data, as the presence of data may have caused the
+	 * original SYN|ACK to have been dropped by a middlebox.
+	 */
+	if ((tp->t_flags & TF_FASTOPEN) &&
+	    (((tp->t_state == TCPS_SYN_RECEIVED) && (tp->t_rxtshift > 0)) ||
+	     (flags & TH_RST)))
+		len = 0;
+#endif
+	if (len <= 0) {
 		/*
 		 * If FIN has been sent but not acked,
 		 * but we haven't been called to retransmit,
@@ -405,9 +455,16 @@
 		 * to (closed) window, and set the persist timer
 		 * if it isn't already going.  If the window didn't
 		 * close completely, just wait for an ACK.
+		 *
+		 * We also do a general check here to ensure that
+		 * we will set the persist timer when we have data
+		 * to send, but a 0-byte window. This makes sure
+		 * the persist timer is set even if the packet
+		 * hits one of the "goto send" lines below.
 		 */
 		len = 0;
-		if (sendwin == 0) {
+		if ((sendwin == 0) && (TCPS_HAVEESTABLISHED(tp->t_state)) &&
+			(off < (int) so->so_snd.sb_cc)) {
 			tcp_timer_activate(tp, TT_REXMT, 0);
 			tp->t_rxtshift = 0;
 			tp->snd_nxt = tp->snd_una;
@@ -543,19 +600,39 @@
 	}
 
 	/*
-	 * Compare available window to amount of window
-	 * known to peer (as advertised window less
-	 * next expected input).  If the difference is at least two
-	 * max size segments, or at least 50% of the maximum possible
-	 * window, then want to send a window update to peer.
-	 * Skip this if the connection is in T/TCP half-open state.
-	 * Don't send pure window updates when the peer has closed
-	 * the connection and won't ever send more data.
+	 * Sending of standalone window updates.
+	 *
+	 * Window updates are important when we close our window due to a
+	 * full socket buffer and are opening it again after the application
+	 * reads data from it.  Once the window has opened again and the
+	 * remote end starts to send again the ACK clock takes over and
+	 * provides the most current window information.
+	 *
+	 * We must avoid the silly window syndrome whereas every read
+	 * from the receive buffer, no matter how small, causes a window
+	 * update to be sent.  We also should avoid sending a flurry of
+	 * window updates when the socket buffer had queued a lot of data
+	 * and the application is doing small reads.
+	 *
+	 * Prevent a flurry of pointless window updates by only sending
+	 * an update when we can increase the advertized window by more
+	 * than 1/4th of the socket buffer capacity.  When the buffer is
+	 * getting full or is very small be more aggressive and send an
+	 * update whenever we can increase by two mss sized segments.
+	 * In all other situations the ACK's to new incoming data will
+	 * carry further window increases.
+	 *
+	 * Don't send an independent window update if a delayed
+	 * ACK is pending (it will get piggy-backed on it) or the
+	 * remote side already has done a half-close and won't send
+	 * more data.  Skip this if the connection is in T/TCP
+	 * half-open state.
 	 */
 	if (recwin > 0 && !(tp->t_flags & TF_NEEDSYN) &&
+	    !(tp->t_flags & TF_DELACK) &&
 	    !TCPS_HAVERCVDFIN(tp->t_state)) {
 		/*
-		 * "adv" is the amount we can increase the window,
+		 * "adv" is the amount we could increase the window,
 		 * taking into account that we are limited by
 		 * TCP_MAXWIN << tp->rcv_scale.
 		 */
@@ -575,10 +652,12 @@
 		 */
 		if (oldwin >> tp->rcv_scale == (adv + oldwin) >> tp->rcv_scale)
 			goto dontupdate;
-		if (adv >= (long) (2 * tp->t_maxseg))
+
+		if (adv >= (long)(2 * tp->t_maxseg) &&
+		    (adv >= (long)(so->so_rcv.sb_hiwat / 4) ||
+		     recwin <= (long)(so->so_rcv.sb_hiwat / 8) ||
+		     so->so_rcv.sb_hiwat <= 8 * tp->t_maxseg))
 			goto send;
-		if (2 * adv >= (long) so->so_rcv.sb_hiwat)
-			goto send;
 	}
 dontupdate:
 
@@ -649,6 +728,12 @@
 
 send:
 	SOCKBUF_LOCK_ASSERT(&so->so_snd);
+	if (len > 0) {
+		if (len >= tp->t_maxseg)
+			tp->t_flags2 |= TF2_PLPMTU_MAXSEGSNT;
+		else
+			tp->t_flags2 &= ~TF2_PLPMTU_MAXSEGSNT;
+	}
 	/*
 	 * Before ESTABLISHED, force sending of initial options
 	 * unless TCP set not to do any options.
@@ -671,13 +756,29 @@
 	 * segments.  Options for SYN-ACK segments are handled in TCP
 	 * syncache.
 	 */
+	to.to_flags = 0;
 	if ((tp->t_flags & TF_NOOPT) == 0) {
-		to.to_flags = 0;
 		/* Maximum segment size. */
 		if (flags & TH_SYN) {
 			tp->snd_nxt = tp->iss;
 			to.to_mss = tcp_mssopt(&tp->t_inpcb->inp_inc);
 			to.to_flags |= TOF_MSS;
+#ifdef TCP_RFC7413
+			/*
+			 * Only include the TFO option on the first
+			 * transmission of the SYN|ACK on a
+			 * passively-created TFO socket, as the presence of
+			 * the TFO option may have caused the original
+			 * SYN|ACK to have been dropped by a middlebox.
+			 */
+			if ((tp->t_flags & TF_FASTOPEN) &&
+			    (tp->t_state == TCPS_SYN_RECEIVED) &&
+			    (tp->t_rxtshift == 0)) {
+				to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN;
+				to.to_tfo_cookie = (u_char *)&tp->t_tfo_cookie;
+				to.to_flags |= TOF_FASTOPEN;
+			}
+#endif
 		}
 		/* Window scaling. */
 		if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) {
@@ -690,11 +791,13 @@
 			to.to_tsval = tcp_ts_getticks() + tp->ts_offset;
 			to.to_tsecr = tp->ts_recent;
 			to.to_flags |= TOF_TS;
-			/* Set receive buffer autosizing timestamp. */
-			if (tp->rfbuf_ts == 0 &&
-			    (so->so_rcv.sb_flags & SB_AUTOSIZE))
-				tp->rfbuf_ts = tcp_ts_getticks();
 		}
+
+		/* Set receive buffer autosizing timestamp. */
+		if (tp->rfbuf_ts == 0 &&
+		    (so->so_rcv.sb_flags & SB_AUTOSIZE))
+			tp->rfbuf_ts = tcp_ts_getticks();
+
 		/* Selective ACK's. */
 		if (tp->t_flags & TF_SACK_PERMIT) {
 			if (flags & TH_SYN)
@@ -741,27 +844,122 @@
 		flags &= ~TH_FIN;
 
 		if (tso) {
+			u_int if_hw_tsomax;
+			u_int if_hw_tsomaxsegcount;
+			u_int if_hw_tsomaxsegsize;
+			struct mbuf *mb;
+			u_int moff;
+			int max_len;
+
+			/* extract TSO information */
+			if_hw_tsomax = tp->t_tsomax;
+			if_hw_tsomaxsegcount = tp->t_tsomaxsegcount;
+			if_hw_tsomaxsegsize = tp->t_tsomaxsegsize;
+
+			/*
+			 * Limit a TSO burst to prevent it from
+			 * overflowing or exceeding the maximum length
+			 * allowed by the network interface:
+			 */
 			KASSERT(ipoptlen == 0,
 			    ("%s: TSO can't do IP options", __func__));
 
 			/*
-			 * Limit a burst to IP_MAXPACKET minus IP,
-			 * TCP and options length to keep ip->ip_len
-			 * from overflowing.
+			 * Check if we should limit by maximum payload
+			 * length:
 			 */
-			if (len > IP_MAXPACKET - hdrlen) {
-				len = IP_MAXPACKET - hdrlen;
-				sendalot = 1;
+			if (if_hw_tsomax != 0) {
+				/* compute maximum TSO length */
+				max_len = (if_hw_tsomax - hdrlen -
+				    max_linkhdr);
+				if (max_len <= 0) {
+					len = 0;
+				} else if (len > max_len) {
+					sendalot = 1;
+					len = max_len;
+				}
 			}
 
 			/*
+			 * Check if we should limit by maximum segment
+			 * size and count:
+			 */
+			if (if_hw_tsomaxsegcount != 0 &&
+			    if_hw_tsomaxsegsize != 0) {
+				/*
+				 * Subtract one segment for the LINK
+				 * and TCP/IP headers mbuf that will
+				 * be prepended to this mbuf chain
+				 * after the code in this section
+				 * limits the number of mbufs in the
+				 * chain to if_hw_tsomaxsegcount.
+				 */
+				if_hw_tsomaxsegcount -= 1;
+				max_len = 0;
+				mb = sbsndmbuf(&so->so_snd, off, &moff);
+
+				while (mb != NULL && max_len < len) {
+					u_int mlen;
+					u_int frags;
+
+					/*
+					 * Get length of mbuf fragment
+					 * and how many hardware frags,
+					 * rounded up, it would use:
+					 */
+					mlen = (mb->m_len - moff);
+					frags = howmany(mlen,
+					    if_hw_tsomaxsegsize);
+
+					/* Handle special case: Zero Length Mbuf */
+					if (frags == 0)
+						frags = 1;
+
+					/*
+					 * Check if the fragment limit
+					 * will be reached or exceeded:
+					 */
+					if (frags >= if_hw_tsomaxsegcount) {
+						max_len += min(mlen,
+						    if_hw_tsomaxsegcount *
+						    if_hw_tsomaxsegsize);
+						break;
+					}
+					max_len += mlen;
+					if_hw_tsomaxsegcount -= frags;
+					moff = 0;
+					mb = mb->m_next;
+				}
+				if (max_len <= 0) {
+					len = 0;
+				} else if (len > max_len) {
+					sendalot = 1;
+					len = max_len;
+				}
+			}
+
+			/*
 			 * Prevent the last segment from being
-			 * fractional unless the send sockbuf can
-			 * be emptied.
+			 * fractional unless the send sockbuf can be
+			 * emptied:
 			 */
-			if (sendalot && off + len < so->so_snd.sb_cc) {
-				len -= len % (tp->t_maxopd - optlen);
+			max_len = (tp->t_maxopd - optlen);
+			if ((off + len) < so->so_snd.sb_cc) {
+				moff = len % max_len;
+				if (moff != 0) {
+					len -= moff;
+					sendalot = 1;
+				}
+			}
+
+			/*
+			 * In case there are too many small fragments
+			 * don't use TSO:
+			 */
+			if (len <= max_len) {
+				len = max_len;
 				sendalot = 1;
+				tso = 0;
 			}
 
 			/*
@@ -818,23 +1016,20 @@
 			TCPSTAT_INC(tcps_sndpack);
 			TCPSTAT_ADD(tcps_sndbyte, len);
 		}
-		MGETHDR(m, M_DONTWAIT, MT_DATA);
+#ifdef INET6
+		if (MHLEN < hdrlen + max_linkhdr)
+			m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+		else
+#endif
+			m = m_gethdr(M_NOWAIT, MT_DATA);
+
 		if (m == NULL) {
 			SOCKBUF_UNLOCK(&so->so_snd);
 			error = ENOBUFS;
+			sack_rxmit = 0;
 			goto out;
 		}
-#ifdef INET6
-		if (MHLEN < hdrlen + max_linkhdr) {
-			MCLGET(m, M_DONTWAIT);
-			if ((m->m_flags & M_EXT) == 0) {
-				SOCKBUF_UNLOCK(&so->so_snd);
-				m_freem(m);
-				error = ENOBUFS;
-				goto out;
-			}
-		}
-#endif
+
 		m->m_data += max_linkhdr;
 		m->m_len = hdrlen;
 
@@ -854,6 +1049,7 @@
 				SOCKBUF_UNLOCK(&so->so_snd);
 				(void) m_free(m);
 				error = ENOBUFS;
+				sack_rxmit = 0;
 				goto out;
 			}
 		}
@@ -864,7 +1060,7 @@
 		 * give data to the user when a buffer fills or
 		 * a PUSH comes in.)
 		 */
-		if (off + len == so->so_snd.sb_cc)
+		if ((off + len == so->so_snd.sb_cc) && !(flags & TH_SYN))
 			flags |= TH_PUSH;
 		SOCKBUF_UNLOCK(&so->so_snd);
 	} else {
@@ -878,9 +1074,10 @@
 		else
 			TCPSTAT_INC(tcps_sndwinup);
 
-		MGETHDR(m, M_DONTWAIT, MT_DATA);
+		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m == NULL) {
 			error = ENOBUFS;
+			sack_rxmit = 0;
 			goto out;
 		}
 #ifdef INET6
@@ -925,7 +1122,7 @@
 	 * resend those bits a number of times as per
 	 * RFC 3168.
 	 */
-	if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
+	if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn == 1) {
 		if (tp->t_rxtshift >= 1) {
 			if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
 				flags |= TH_ECE|TH_CWR;
@@ -1042,7 +1239,7 @@
 		tp->snd_up = tp->snd_una;		/* drag it along */
 
 #ifdef TCP_SIGNATURE
-	if (tp->t_flags & TF_SIGNATURE) {
+	if (to.to_flags & TOF_SIGNATURE) {
 		int sigoff = to.to_signature - opt;
 		tcp_signature_compute(m, 0, len, optlen,
 		    (u_char *)(th + 1) + sigoff, IPSEC_DIR_OUTBOUND);
@@ -1058,8 +1255,8 @@
 #ifdef INET6
 	if (isipv6) {
 		/*
-		 * ip6_plen is not need to be filled now, and will be filled
-		 * in ip6_output.
+		 * There is no need to fill in ip6_plen right now.
+		 * It will be filled later by ip6_output.
 		 */
 		m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
 		th->th_sum = in6_cksum_pseudo(ip6, sizeof(struct tcphdr) +
@@ -1103,75 +1300,6 @@
 	    __func__, len, hdrlen, ipoptlen, m_length(m, NULL)));
 #endif
 
-	/*
-	 * In transmit state, time the transmission and arrange for
-	 * the retransmit.  In persist state, just set snd_max.
-	 */
-	if ((tp->t_flags & TF_FORCEDATA) == 0 || 
-	    !tcp_timer_active(tp, TT_PERSIST)) {
-		tcp_seq startseq = tp->snd_nxt;
-
-		/*
-		 * Advance snd_nxt over sequence space of this segment.
-		 */
-		if (flags & (TH_SYN|TH_FIN)) {
-			if (flags & TH_SYN)
-				tp->snd_nxt++;
-			if (flags & TH_FIN) {
-				tp->snd_nxt++;
-				tp->t_flags |= TF_SENTFIN;
-			}
-		}
-		if (sack_rxmit)
-			goto timer;
-		tp->snd_nxt += len;
-		if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
-			tp->snd_max = tp->snd_nxt;
-			/*
-			 * Time this transmission if not a retransmission and
-			 * not currently timing anything.
-			 */
-			if (tp->t_rtttime == 0) {
-				tp->t_rtttime = ticks;
-				tp->t_rtseq = startseq;
-				TCPSTAT_INC(tcps_segstimed);
-			}
-		}
-
-		/*
-		 * Set retransmit timer if not currently set,
-		 * and not doing a pure ack or a keep-alive probe.
-		 * Initial value for retransmit timer is smoothed
-		 * round-trip time + 2 * round-trip time variance.
-		 * Initialize shift counter which is used for backoff
-		 * of retransmit time.
-		 */
-timer:
-		if (!tcp_timer_active(tp, TT_REXMT) &&
-		    ((sack_rxmit && tp->snd_nxt != tp->snd_max) ||
-		     (tp->snd_nxt != tp->snd_una))) {
-			if (tcp_timer_active(tp, TT_PERSIST)) {
-				tcp_timer_activate(tp, TT_PERSIST, 0);
-				tp->t_rxtshift = 0;
-			}
-			tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
-		}
-	} else {
-		/*
-		 * Persist case, update snd_max but since we are in
-		 * persist mode (no window) we do not update snd_nxt.
-		 */
-		int xlen = len;
-		if (flags & TH_SYN)
-			++xlen;
-		if (flags & TH_FIN) {
-			++xlen;
-			tp->t_flags |= TF_SENTFIN;
-		}
-		if (SEQ_GT(tp->snd_nxt + xlen, tp->snd_max))
-			tp->snd_max = tp->snd_nxt + len;
-	}
-
 	/* Run HHOOK_TCP_ESTABLISHED_OUT helper hooks. */
 	hhook_run_tcp_est_out(tp, th, &to, len, tso);
 
@@ -1219,6 +1347,23 @@
 		 */
 		ip6->ip6_hlim = in6_selecthlim(tp->t_inpcb, NULL);
 
+		/*
+		 * Set the packet size here for the benefit of DTrace probes.
+		 * ip6_output() will set it properly; it's supposed to include
+		 * the option header lengths as well.
+		 */
+		ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6));
+
+		if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss)
+			tp->t_flags2 |= TF2_PLPMTU_PMTUD;
+		else
+			tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
+
+		if (tp->t_state == TCPS_SYN_SENT)
+			TCP_PROBE5(connect__request, NULL, tp, ip6, tp, th);
+
+		TCP_PROBE5(send, NULL, tp, ip6, tp, th);
+
 		/* TODO: IPv6 IP6TOS_ECT bit on */
 		error = ip6_output(m, tp->t_inpcb->in6p_outputopts, &ro,
 		    ((so->so_options & SO_DONTROUTE) ?  IP_ROUTETOIF : 0),
@@ -1225,7 +1370,7 @@
 		    NULL, NULL, tp->t_inpcb);
 
 		if (error == EMSGSIZE && ro.ro_rt != NULL)
-			mtu = ro.ro_rt->rt_rmx.rmx_mtu;
+			mtu = ro.ro_rt->rt_mtu;
 		RO_RTFREE(&ro);
 	}
 #endif /* INET6 */
@@ -1237,7 +1382,7 @@
 	struct route ro;
 
 	bzero(&ro, sizeof(ro));
-	ip->ip_len = m->m_pkthdr.len;
+	ip->ip_len = htons(m->m_pkthdr.len);
 #ifdef INET6
 	if (tp->t_inpcb->inp_vflag & INP_IPV6PROTO)
 		ip->ip_ttl = in6_selecthlim(tp->t_inpcb, NULL);
@@ -1250,18 +1395,122 @@
 	 *
 	 * NB: Don't set DF on small MTU/MSS to have a safe fallback.
 	 */
-	if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss)
-		ip->ip_off |= IP_DF;
+	if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss) {
+		ip->ip_off |= htons(IP_DF);
+		tp->t_flags2 |= TF2_PLPMTU_PMTUD;
+	} else {
+		tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
+	}
 
+	if (tp->t_state == TCPS_SYN_SENT)
+		TCP_PROBE5(connect__request, NULL, tp, ip, tp, th);
+
+	TCP_PROBE5(send, NULL, tp, ip, tp, th);
+
 	error = ip_output(m, tp->t_inpcb->inp_options, &ro,
 	    ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), 0,
 	    tp->t_inpcb);
 
 	if (error == EMSGSIZE && ro.ro_rt != NULL)
-		mtu = ro.ro_rt->rt_rmx.rmx_mtu;
+		mtu = ro.ro_rt->rt_mtu;
 	RO_RTFREE(&ro);
     }
 #endif /* INET */
+
+out:
+	/*
+	 * In transmit state, time the transmission and arrange for
+	 * the retransmit.  In persist state, just set snd_max.
+	 */
+	if ((tp->t_flags & TF_FORCEDATA) == 0 || 
+	    !tcp_timer_active(tp, TT_PERSIST)) {
+		tcp_seq startseq = tp->snd_nxt;
+
+		/*
+		 * Advance snd_nxt over sequence space of this segment.
+		 */
+		if (flags & (TH_SYN|TH_FIN)) {
+			if (flags & TH_SYN)
+				tp->snd_nxt++;
+			if (flags & TH_FIN) {
+				tp->snd_nxt++;
+				tp->t_flags |= TF_SENTFIN;
+			}
+		}
+		if (sack_rxmit)
+			goto timer;
+		tp->snd_nxt += len;
+		if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
+			tp->snd_max = tp->snd_nxt;
+			/*
+			 * Time this transmission if not a retransmission and
+			 * not currently timing anything.
+			 */
+			if (tp->t_rtttime == 0) {
+				tp->t_rtttime = ticks;
+				tp->t_rtseq = startseq;
+				TCPSTAT_INC(tcps_segstimed);
+			}
+		}
+
+		/*
+		 * Set retransmit timer if not currently set,
+		 * and not doing a pure ack or a keep-alive probe.
+		 * Initial value for retransmit timer is smoothed
+		 * round-trip time + 2 * round-trip time variance.
+		 * Initialize shift counter which is used for backoff
+		 * of retransmit time.
+		 */
+timer:
+		if (!tcp_timer_active(tp, TT_REXMT) &&
+		    ((sack_rxmit && tp->snd_nxt != tp->snd_max) ||
+		     (tp->snd_nxt != tp->snd_una))) {
+			if (tcp_timer_active(tp, TT_PERSIST)) {
+				tcp_timer_activate(tp, TT_PERSIST, 0);
+				tp->t_rxtshift = 0;
+			}
+			tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
+		} else if (len == 0 && so->so_snd.sb_cc &&
+		    !tcp_timer_active(tp, TT_REXMT) &&
+		    !tcp_timer_active(tp, TT_PERSIST)) {
+			/*
+			 * Avoid a situation where we do not set persist timer
+			 * after a zero window condition. For example:
+			 * 1) A -> B: packet with enough data to fill the window
+			 * 2) B -> A: ACK for #1 + new data (0 window
+			 *    advertisement)
+			 * 3) A -> B: ACK for #2, 0 len packet
+			 *
+			 * In this case, A will not activate the persist timer,
+			 * because it chose to send a packet. Unless tcp_output
+			 * is called for some other reason (delayed ack timer,
+			 * another input packet from B, socket syscall), A will
+			 * not send zero window probes.
+			 *
+			 * So, if you send a 0-length packet, but there is data
+			 * in the socket buffer, and neither the rexmt or
+			 * persist timer is already set, then activate the
+			 * persist timer.
+			 */
+			tp->t_rxtshift = 0;
+			tcp_setpersist(tp);
+		}
+	} else {
+		/*
+		 * Persist case, update snd_max but since we are in
+		 * persist mode (no window) we do not update snd_nxt.
+		 */
+		int xlen = len;
+		if (flags & TH_SYN)
+			++xlen;
+		if (flags & TH_FIN) {
+			++xlen;
+			tp->t_flags |= TF_SENTFIN;
+		}
+		if (SEQ_GT(tp->snd_nxt + xlen, tp->snd_max))
+			tp->snd_max = tp->snd_nxt + len;
+	}
+
 	if (error) {
 
 		/*
@@ -1289,7 +1538,6 @@
 			} else
 				tp->snd_nxt -= len;
 		}
-out:
 		SOCKBUF_UNLOCK_ASSERT(&so->so_snd);	/* Check gotos. */
 		switch (error) {
 		case EPERM:
@@ -1296,9 +1544,7 @@
 			tp->t_softerror = error;
 			return (error);
 		case ENOBUFS:
-	                if (!tcp_timer_active(tp, TT_REXMT) &&
-			    !tcp_timer_active(tp, TT_PERSIST))
-	                        tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
+			TCP_XMIT_TIMER_ASSERT(tp, len, flags);
 			tp->snd_cwnd = tp->t_maxseg;
 			return (0);
 		case EMSGSIZE:
@@ -1373,7 +1619,7 @@
 	 * Start/restart persistance timer.
 	 */
 	TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift],
-		      TCPTV_PERSMIN, TCPTV_PERSMAX);
+		      tcp_persmin, tcp_persmax);
 	tcp_timer_activate(tp, TT_PERSIST, tt);
 	if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
 		tp->t_rxtshift++;
@@ -1461,6 +1707,7 @@
 			bcopy((u_char *)&to->to_tsecr, optp, sizeof(to->to_tsecr));
 			optp += sizeof(to->to_tsecr);
 			break;
+#ifdef TCP_SIGNATURE
 		case TOF_SIGNATURE:
 			{
 			int siglen = TCPOLEN_SIGNATURE - 2;
@@ -1479,6 +1726,7 @@
 				 *optp++ = 0;
 			break;
 			}
+#endif
 		case TOF_SACK:
 			{
 			int sackblks = 0;
@@ -1509,6 +1757,25 @@
 			TCPSTAT_INC(tcps_sack_send_blocks);
 			break;
 			}
+#ifdef TCP_RFC7413
+		case TOF_FASTOPEN:
+			{
+			int total_len;
+
+			/* XXX is there any point to aligning this option? */
+			total_len = TCPOLEN_FAST_OPEN_EMPTY + to->to_tfo_len;
+			if (TCP_MAXOLEN - optlen < total_len)
+				continue;
+			*optp++ = TCPOPT_FAST_OPEN;
+			*optp++ = total_len;
+			if (to->to_tfo_len > 0) {
+				bcopy(to->to_tfo_cookie, optp, to->to_tfo_len);
+				optp += to->to_tfo_len;
+			}
+			optlen += total_len;
+			break;
+			}
+#endif
 		default:
 			panic("%s: unknown TCP option type", __func__);
 			break;

Modified: trunk/sys/netinet/tcp_reass.c
===================================================================
--- trunk/sys/netinet/tcp_reass.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_reass.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_reass.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_reass.c 285976 2015-07-28 19:58:44Z delphij $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
@@ -75,35 +75,27 @@
 #include <netinet/tcp_debug.h>
 #endif /* TCPDEBUG */
 
-static int tcp_reass_sysctl_maxseg(SYSCTL_HANDLER_ARGS);
 static int tcp_reass_sysctl_qsize(SYSCTL_HANDLER_ARGS);
 
 static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
     "TCP Segment Reassembly Queue");
 
-static VNET_DEFINE(int, tcp_reass_maxseg) = 0;
-#define	V_tcp_reass_maxseg		VNET(tcp_reass_maxseg)
-SYSCTL_VNET_PROC(_net_inet_tcp_reass, OID_AUTO, maxsegments,
-    CTLTYPE_INT | CTLFLAG_RDTUN,
-    &VNET_NAME(tcp_reass_maxseg), 0, &tcp_reass_sysctl_maxseg, "I",
+static int tcp_reass_maxseg = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN,
+    &tcp_reass_maxseg, 0,
     "Global maximum number of TCP Segments in Reassembly Queue");
 
-static VNET_DEFINE(int, tcp_reass_qsize) = 0;
-#define	V_tcp_reass_qsize		VNET(tcp_reass_qsize)
-SYSCTL_VNET_PROC(_net_inet_tcp_reass, OID_AUTO, cursegments,
-    CTLTYPE_INT | CTLFLAG_RD,
-    &VNET_NAME(tcp_reass_qsize), 0, &tcp_reass_sysctl_qsize, "I",
+SYSCTL_PROC(_net_inet_tcp_reass, OID_AUTO, cursegments,
+    (CTLTYPE_INT | CTLFLAG_RD), NULL, 0, &tcp_reass_sysctl_qsize, "I",
     "Global number of TCP Segments currently in Reassembly Queue");
 
-static VNET_DEFINE(int, tcp_reass_overflows) = 0;
-#define	V_tcp_reass_overflows		VNET(tcp_reass_overflows)
-SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, overflows,
-    CTLTYPE_INT | CTLFLAG_RD,
-    &VNET_NAME(tcp_reass_overflows), 0,
+static int tcp_reass_overflows = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows,
+    CTLFLAG_RD,
+    &tcp_reass_overflows, 0,
     "Global number of TCP Segment Reassembly Queue Overflows");
 
-static VNET_DEFINE(uma_zone_t, tcp_reass_zone);
-#define	V_tcp_reass_zone		VNET(tcp_reass_zone)
+static uma_zone_t tcp_reass_zone;
 
 /* Initialize TCP reassembly queue */
 static void
@@ -110,34 +102,29 @@
 tcp_reass_zone_change(void *tag)
 {
 
-	V_tcp_reass_maxseg = nmbclusters / 16;
-	uma_zone_set_max(V_tcp_reass_zone, V_tcp_reass_maxseg);
+	/* Set the zone limit and read back the effective value. */
+	tcp_reass_maxseg = nmbclusters / 16;
+	tcp_reass_maxseg = uma_zone_set_max(tcp_reass_zone,
+	    tcp_reass_maxseg);
 }
 
 void
-tcp_reass_init(void)
+tcp_reass_global_init(void)
 {
 
-	V_tcp_reass_maxseg = nmbclusters / 16;
+	tcp_reass_maxseg = nmbclusters / 16;
 	TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments",
-	    &V_tcp_reass_maxseg);
-	V_tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent),
+	    &tcp_reass_maxseg);
+	tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
-	uma_zone_set_max(V_tcp_reass_zone, V_tcp_reass_maxseg);
+	/* Set the zone limit and read back the effective value. */
+	tcp_reass_maxseg = uma_zone_set_max(tcp_reass_zone,
+	    tcp_reass_maxseg);
 	EVENTHANDLER_REGISTER(nmbclusters_change,
 	    tcp_reass_zone_change, NULL, EVENTHANDLER_PRI_ANY);
 }
 
-#ifdef VIMAGE
 void
-tcp_reass_destroy(void)
-{
-
-	uma_zdestroy(V_tcp_reass_zone);
-}
-#endif
-
-void
 tcp_reass_flush(struct tcpcb *tp)
 {
 	struct tseg_qent *qe;
@@ -147,7 +134,7 @@
 	while ((qe = LIST_FIRST(&tp->t_segq)) != NULL) {
 		LIST_REMOVE(qe, tqe_q);
 		m_freem(qe->tqe_m);
-		uma_zfree(V_tcp_reass_zone, qe);
+		uma_zfree(tcp_reass_zone, qe);
 		tp->t_segqlen--;
 	}
 
@@ -157,17 +144,12 @@
 }
 
 static int
-tcp_reass_sysctl_maxseg(SYSCTL_HANDLER_ARGS)
+tcp_reass_sysctl_qsize(SYSCTL_HANDLER_ARGS)
 {
-	V_tcp_reass_maxseg = uma_zone_get_max(V_tcp_reass_zone);
-	return (sysctl_handle_int(oidp, arg1, arg2, req));
-}
+	int qsize;
 
-static int
-tcp_reass_sysctl_qsize(SYSCTL_HANDLER_ARGS)
-{
-	V_tcp_reass_qsize = uma_zone_get_cur(V_tcp_reass_zone);
-	return (sysctl_handle_int(oidp, arg1, arg2, req));
+	qsize = uma_zone_get_cur(tcp_reass_zone);
+	return (sysctl_handle_int(oidp, &qsize, 0, req));
 }
 
 int
@@ -212,9 +194,9 @@
 	 * Investigate why and re-evaluate the below limit after the behaviour
 	 * is understood.
 	 */
-	if (th->th_seq != tp->rcv_nxt &&
+	if ((th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) &&
 	    tp->t_segqlen >= (so->so_rcv.sb_hiwat / tp->t_maxseg) + 1) {
-		V_tcp_reass_overflows++;
+		tcp_reass_overflows++;
 		TCPSTAT_INC(tcps_rcvmemdrop);
 		m_freem(m);
 		*tlenp = 0;
@@ -233,9 +215,9 @@
 	 * Use a temporary structure on the stack for the missing segment
 	 * when the zone is exhausted. Otherwise we may get stuck.
 	 */
-	te = uma_zalloc(V_tcp_reass_zone, M_NOWAIT);
+	te = uma_zalloc(tcp_reass_zone, M_NOWAIT);
 	if (te == NULL) {
-		if (th->th_seq != tp->rcv_nxt) {
+		if (th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) {
 			TCPSTAT_INC(tcps_rcvmemdrop);
 			m_freem(m);
 			*tlenp = 0;
@@ -283,7 +265,8 @@
 				TCPSTAT_INC(tcps_rcvduppack);
 				TCPSTAT_ADD(tcps_rcvdupbyte, *tlenp);
 				m_freem(m);
-				uma_zfree(V_tcp_reass_zone, te);
+				if (te != &tqs)
+					uma_zfree(tcp_reass_zone, te);
 				tp->t_segqlen--;
 				/*
 				 * Try to present any queued data
@@ -320,7 +303,7 @@
 		nq = LIST_NEXT(q, tqe_q);
 		LIST_REMOVE(q, tqe_q);
 		m_freem(q->tqe_m);
-		uma_zfree(V_tcp_reass_zone, q);
+		uma_zfree(tcp_reass_zone, q);
 		tp->t_segqlen--;
 		q = nq;
 	}
@@ -359,7 +342,7 @@
 		else
 			sbappendstream_locked(&so->so_rcv, q->tqe_m);
 		if (q != &tqs)
-			uma_zfree(V_tcp_reass_zone, q);
+			uma_zfree(tcp_reass_zone, q);
 		tp->t_segqlen--;
 		q = nq;
 	} while (q && q->tqe_th->th_seq == tp->rcv_nxt);

Modified: trunk/sys/netinet/tcp_sack.c
===================================================================
--- trunk/sys/netinet/tcp_sack.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_sack.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -72,7 +72,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_sack.c 220105 2011-03-28 19:03:56Z weongyo $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_sack.c 293710 2016-01-11 23:34:29Z hiren $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
@@ -345,17 +345,22 @@
  * Process cumulative ACK and the TCP SACK option to update the scoreboard.
  * tp->snd_holes is an ordered list of holes (oldest to newest, in terms of
  * the sequence space).
+ * Returns 1 if incoming ACK has previously unknown SACK information,
+ * 0 otherwise. Note: We treat (snd_una, th_ack) as a sack block so any changes
+ * to that (i.e. left edge moving) would also be considered a change in SACK
+ * information which is slightly different than rfc6675.
  */
-void
+int
 tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
 {
 	struct sackhole *cur, *temp;
 	struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp;
-	int i, j, num_sack_blks;
+	int i, j, num_sack_blks, sack_changed;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	num_sack_blks = 0;
+	sack_changed = 0;
 	/*
 	 * If SND.UNA will be advanced by SEG.ACK, and if SACK holes exist,
 	 * treat [SND.UNA, SEG.ACK) as if it is a SACK block.
@@ -369,6 +374,7 @@
 	 * received new blocks from the other side.
 	 */
 	if (to->to_flags & TOF_SACK) {
+		tp->sackhint.sacked_bytes = 0;	/* reset */
 		for (i = 0; i < to->to_nsacks; i++) {
 			bcopy((to->to_sacks + i * TCPOLEN_SACK),
 			    &sack, sizeof(sack));
@@ -379,8 +385,11 @@
 			    SEQ_GT(sack.start, th_ack) &&
 			    SEQ_LT(sack.start, tp->snd_max) &&
 			    SEQ_GT(sack.end, tp->snd_una) &&
-			    SEQ_LEQ(sack.end, tp->snd_max))
+			    SEQ_LEQ(sack.end, tp->snd_max)) {
 				sack_blocks[num_sack_blks++] = sack;
+				tp->sackhint.sacked_bytes +=
+				    (sack.end-sack.start);
+			}
 		}
 	}
 	/*
@@ -388,7 +397,7 @@
 	 * received.
 	 */
 	if (num_sack_blks == 0)
-		return;
+		return (sack_changed);
 
 	/*
 	 * Sort the SACK blocks so we can update the scoreboard with just one
@@ -439,6 +448,7 @@
 			tp->snd_fack = sblkp->end;
 			/* Go to the previous sack block. */
 			sblkp--;
+			sack_changed = 1;
 		} else {
 			/* 
 			 * We failed to add a new hole based on the current 
@@ -455,9 +465,11 @@
 			    SEQ_LT(tp->snd_fack, sblkp->end))
 				tp->snd_fack = sblkp->end;
 		}
-	} else if (SEQ_LT(tp->snd_fack, sblkp->end))
+	} else if (SEQ_LT(tp->snd_fack, sblkp->end)) {
 		/* fack is advanced. */
 		tp->snd_fack = sblkp->end;
+		sack_changed = 1;
+	}
 	/* We must have at least one SACK hole in scoreboard. */
 	KASSERT(!TAILQ_EMPTY(&tp->snd_holes),
 	    ("SACK scoreboard must not be empty"));
@@ -486,6 +498,7 @@
 		tp->sackhint.sack_bytes_rexmit -= (cur->rxmit - cur->start);
 		KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
 		    ("sackhint bytes rtx >= 0"));
+		sack_changed = 1;
 		if (SEQ_LEQ(sblkp->start, cur->start)) {
 			/* Data acks at least the beginning of hole. */
 			if (SEQ_GEQ(sblkp->end, cur->end)) {
@@ -541,6 +554,7 @@
 		else
 			sblkp--;
 	}
+	return (sack_changed);
 }
 
 /*

Modified: trunk/sys/netinet/tcp_seq.h
===================================================================
--- trunk/sys/netinet/tcp_seq.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_seq.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp_seq.h	8.3 (Berkeley) 6/21/95
- * $FreeBSD: stable/9/sys/netinet/tcp_seq.h 235657 2012-05-19 18:32:31Z bz $
+ * $FreeBSD: stable/10/sys/netinet/tcp_seq.h 231767 2012-02-15 16:09:56Z bz $
  */
 
 #ifndef _NETINET_TCP_SEQ_H_

Modified: trunk/sys/netinet/tcp_subr.c
===================================================================
--- trunk/sys/netinet/tcp_subr.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_subr.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,12 +31,13 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_subr.c 238247 2012-07-08 14:21:36Z bz $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_subr.c 314667 2017-03-04 13:03:31Z avg $");
 
 #include "opt_compat.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
+#include "opt_kdtrace.h"
 #include "opt_tcpdebug.h"
 
 #include <sys/param.h>
@@ -54,6 +55,7 @@
 #endif
 #include <sys/priv.h>
 #include <sys/proc.h>
+#include <sys/sdt.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/protosw.h>
@@ -67,6 +69,7 @@
 
 #include <netinet/cc.h>
 #include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
@@ -81,12 +84,14 @@
 #include <netinet6/nd6.h>
 #endif
 
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_syncache.h>
-#include <netinet/tcp_offload.h>
 #ifdef INET6
 #include <netinet6/tcp6_var.h>
 #endif
@@ -97,6 +102,9 @@
 #ifdef INET6
 #include <netinet6/ip6protosw.h>
 #endif
+#ifdef TCP_OFFLOAD
+#include <netinet/tcp_offload.h>
+#endif
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
@@ -173,7 +181,7 @@
 VNET_DEFINE(int, tcp_minmss) = TCP_MINMSS;
 SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, minmss, CTLFLAG_RW,
      &VNET_NAME(tcp_minmss), 0,
-    "Minmum TCP Maximum Segment Size");
+    "Minimum TCP Maximum Segment Size");
 
 VNET_DEFINE(int, tcp_do_rfc1323) = 1;
 SYSCTL_VNET_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_RW,
@@ -226,6 +234,7 @@
 static struct inpcb *tcp_mtudisc_notify(struct inpcb *, int);
 static char *	tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th,
 		    void *ip4hdr, const void *ip6hdr);
+static void	tcp_timer_discard(struct tcpcb *, uint32_t);
 
 /*
  * Target size of TCP PCB hash tables. Must be a power of two.
@@ -234,7 +243,7 @@
  * variable net.inet.tcp.tcbhashsize
  */
 #ifndef TCBHASHSIZE
-#define TCBHASHSIZE	512
+#define TCBHASHSIZE	0
 #endif
 
 /*
@@ -281,11 +290,35 @@
 	return (0);
 }
 
+/*
+ * Take a value and get the next power of 2 that doesn't overflow.
+ * Used to size the tcp_inpcb hash buckets.
+ */
+static int
+maketcp_hashsize(int size)
+{
+	int hashsize;
+
+	/*
+	 * auto tune.
+	 * get the next power of 2 higher than maxsockets.
+	 */
+	hashsize = 1 << fls(size);
+	/* catch overflow, and just go one power of 2 smaller */
+	if (hashsize < size) {
+		hashsize = 1 << (fls(size) - 1);
+	}
+	return (hashsize);
+}
+
 void
 tcp_init(void)
 {
+	const char *tcbhash_tuneable;
 	int hashsize;
 
+	tcbhash_tuneable = "net.inet.tcp.tcbhashsize";
+
 	if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_IN,
 	    &V_tcp_hhh[HHOOK_TCP_EST_IN], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
 		printf("%s: WARNING: unable to register helper hook\n", __func__);
@@ -294,10 +327,43 @@
 		printf("%s: WARNING: unable to register helper hook\n", __func__);
 
 	hashsize = TCBHASHSIZE;
-	TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", &hashsize);
+	TUNABLE_INT_FETCH(tcbhash_tuneable, &hashsize);
+	if (hashsize == 0) {
+		/*
+		 * Auto tune the hash size based on maxsockets.
+		 * A perfect hash would have a 1:1 mapping
+		 * (hashsize = maxsockets) however it's been
+		 * suggested that O(2) average is better.
+		 */
+		hashsize = maketcp_hashsize(maxsockets / 4);
+		/*
+		 * Our historical default is 512,
+		 * do not autotune lower than this.
+		 */
+		if (hashsize < 512)
+			hashsize = 512;
+		if (bootverbose && IS_DEFAULT_VNET(curvnet))
+			printf("%s: %s auto tuned to %d\n", __func__,
+			    tcbhash_tuneable, hashsize);
+	}
+	/*
+	 * We require a hashsize to be a power of two.
+	 * Previously if it was not a power of two we would just reset it
+	 * back to 512, which could be a nasty surprise if you did not notice
+	 * the error message.
+	 * Instead what we do is clip it to the closest power of two lower
+	 * than the specified hash value.
+	 */
 	if (!powerof2(hashsize)) {
-		printf("WARNING: TCB hash size not a power of 2\n");
-		hashsize = 512; /* safe default */
+		int oldhashsize = hashsize;
+
+		hashsize = maketcp_hashsize(hashsize);
+		/* prevent absurdly low value */
+		if (hashsize < 16)
+			hashsize = 16;
+		printf("%s: WARNING: TCB hash size not a power of 2, "
+		    "clipped from %d to %d.\n", __func__, oldhashsize,
+		    hashsize);
 	}
 	in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, hashsize, hashsize,
 	    "tcp_inpcb", tcp_inpcb_init, NULL, UMA_ZONE_NOFREE,
@@ -309,11 +375,11 @@
 	V_tcpcb_zone = uma_zcreate("tcpcb", sizeof(struct tcpcb_mem),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	uma_zone_set_max(V_tcpcb_zone, maxsockets);
+	uma_zone_set_warning(V_tcpcb_zone, "kern.ipc.maxsockets limit reached");
 
 	tcp_tw_init();
 	syncache_init();
 	tcp_hc_init();
-	tcp_reass_init();
 
 	TUNABLE_INT_FETCH("net.inet.tcp.sack.enable", &V_tcp_do_sack);
 	V_sack_hole_zone = uma_zcreate("sackhole", sizeof(struct sackhole),
@@ -323,6 +389,8 @@
 	if (!IS_DEFAULT_VNET(curvnet))
 		return;
 
+	tcp_reass_global_init();
+
 	/* XXX virtualize those bellow? */
 	tcp_delacktime = TCPTV_DELACK;
 	tcp_keepinit = TCPTV_KEEP_INIT;
@@ -333,6 +401,8 @@
 	tcp_rexmit_min = TCPTV_MIN;
 	if (tcp_rexmit_min < 1)
 		tcp_rexmit_min = 1;
+	tcp_persmin = TCPTV_PERSMIN;
+	tcp_persmax = TCPTV_PERSMAX;
 	tcp_rexmit_slop = TCPTV_CPU_VAR;
 	tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT;
 	tcp_tcbhashsize = hashsize;
@@ -363,6 +433,10 @@
 		SHUTDOWN_PRI_DEFAULT);
 	EVENTHANDLER_REGISTER(maxsockets_change, tcp_zone_change, NULL,
 		EVENTHANDLER_PRI_ANY);
+
+#ifdef TCP_RFC7413
+	tcp_fastopen_init();
+#endif
 }
 
 #ifdef VIMAGE
@@ -370,7 +444,9 @@
 tcp_destroy(void)
 {
 
-	tcp_reass_destroy();
+#ifdef TCP_RFC7413
+	tcp_fastopen_destroy();
+#endif
 	tcp_hc_destroy();
 	syncache_destroy();
 	tcp_tw_destroy();
@@ -481,16 +557,18 @@
 tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
     tcp_seq ack, tcp_seq seq, int flags)
 {
-	int tlen;
-	int win = 0;
+	struct tcpopt to;
+	struct inpcb *inp;
 	struct ip *ip;
+	struct mbuf *optm;
 	struct tcphdr *nth;
+	u_char *optp;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 	int isipv6;
 #endif /* INET6 */
-	int ipflags = 0;
-	struct inpcb *inp;
+	int optlen, tlen, win;
+	bool incl_opts;
 
 	KASSERT(tp != NULL || m != NULL, ("tcp_respond: tp and m both NULL"));
 
@@ -507,6 +585,8 @@
 	} else
 		inp = NULL;
 
+	incl_opts = false;
+	win = 0;
 	if (tp != NULL) {
 		if (!(flags & TH_RST)) {
 			win = sbspace(&inp->inp_socket->so_rcv);
@@ -513,12 +593,13 @@
 			if (win > (long)TCP_MAXWIN << tp->rcv_scale)
 				win = (long)TCP_MAXWIN << tp->rcv_scale;
 		}
+		if ((tp->t_flags & TF_NOOPT) == 0)
+			incl_opts = true;
 	}
 	if (m == NULL) {
-		m = m_gethdr(M_DONTWAIT, MT_DATA);
+		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m == NULL)
 			return;
-		tlen = 0;
 		m->m_data += max_linkhdr;
 #ifdef INET6
 		if (isipv6) {
@@ -528,25 +609,61 @@
 			nth = (struct tcphdr *)(ip6 + 1);
 		} else
 #endif /* INET6 */
-	      {
-		bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
-		ip = mtod(m, struct ip *);
-		nth = (struct tcphdr *)(ip + 1);
-	      }
+		{
+			bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
+			ip = mtod(m, struct ip *);
+			nth = (struct tcphdr *)(ip + 1);
+		}
 		bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr));
 		flags = TH_ACK;
+	} else if (!M_WRITABLE(m)) {
+		struct mbuf *n;
+
+		/* Can't reuse 'm', allocate a new mbuf. */
+		n = m_gethdr(M_NOWAIT, MT_DATA);
+		if (n == NULL) {
+			m_freem(m);
+			return;
+		}
+
+		if (!m_dup_pkthdr(n, m, M_NOWAIT)) {
+			m_freem(m);
+			m_freem(n);
+			return;
+		}
+
+		n->m_data += max_linkhdr;
+		/* m_len is set later */
+#define xchg(a,b,type) { type t; t=a; a=b; b=t; }
+#ifdef INET6
+		if (isipv6) {
+			bcopy((caddr_t)ip6, mtod(n, caddr_t),
+			      sizeof(struct ip6_hdr));
+			ip6 = mtod(n, struct ip6_hdr *);
+			xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr);
+			nth = (struct tcphdr *)(ip6 + 1);
+		} else
+#endif /* INET6 */
+		{
+			bcopy((caddr_t)ip, mtod(n, caddr_t), sizeof(struct ip));
+			ip = mtod(n, struct ip *);
+			xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t);
+			nth = (struct tcphdr *)(ip + 1);
+		}
+		bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr));
+		xchg(nth->th_dport, nth->th_sport, uint16_t);
+		th = nth;
+		m_freem(m);
+		m = n;
 	} else {
 		/*
 		 *  reuse the mbuf. 
-		 * XXX MRT We inherrit the FIB, which is lucky.
+		 * XXX MRT We inherit the FIB, which is lucky.
 		 */
 		m_freem(m->m_next);
 		m->m_next = NULL;
 		m->m_data = (caddr_t)ipgen;
-		m_addr_changed(m);
 		/* m_len is set later */
-		tlen = 0;
-#define xchg(a,b,type) { type t; t=a; a=b; b=t; }
 #ifdef INET6
 		if (isipv6) {
 			xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr);
@@ -553,10 +670,10 @@
 			nth = (struct tcphdr *)(ip6 + 1);
 		} else
 #endif /* INET6 */
-	      {
-		xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t);
-		nth = (struct tcphdr *)(ip + 1);
-	      }
+		{
+			xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t);
+			nth = (struct tcphdr *)(ip + 1);
+		}
 		if (th != nth) {
 			/*
 			 * this is usually a case when an extension header
@@ -569,13 +686,65 @@
 		xchg(nth->th_dport, nth->th_sport, uint16_t);
 #undef xchg
 	}
+	tlen = 0;
 #ifdef INET6
+	if (isipv6)
+		tlen = sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
+#endif
+#if defined(INET) && defined(INET6)
+	else
+#endif
+#ifdef INET
+		tlen = sizeof (struct tcpiphdr);
+#endif
+#ifdef INVARIANTS
+	m->m_len = 0;
+	KASSERT(M_TRAILINGSPACE(m) >= tlen,
+	    ("Not enough trailing space for message (m=%p, need=%d, have=%ld)",
+	    m, tlen, (long)M_TRAILINGSPACE(m)));
+#endif
+	m->m_len = tlen;
+	to.to_flags = 0;
+	if (incl_opts) {
+		/* Make sure we have room. */
+		if (M_TRAILINGSPACE(m) < TCP_MAXOLEN) {
+			m->m_next = m_get(M_NOWAIT, MT_DATA);
+			if (m->m_next) {
+				optp = mtod(m->m_next, u_char *);
+				optm = m->m_next;
+			} else
+				incl_opts = false;
+		} else {
+			optp = (u_char *) (nth + 1);
+			optm = m;
+		}
+	}
+	if (incl_opts) {
+		/* Timestamps. */
+		if (tp->t_flags & TF_RCVD_TSTMP) {
+			to.to_tsval = tcp_ts_getticks() + tp->ts_offset;
+			to.to_tsecr = tp->ts_recent;
+			to.to_flags |= TOF_TS;
+		}
+#ifdef TCP_SIGNATURE
+		/* TCP-MD5 (RFC2385). */
+		if (tp->t_flags & TF_SIGNATURE)
+			to.to_flags |= TOF_SIGNATURE;
+#endif
+
+		/* Add the options. */
+		tlen += optlen = tcp_addoptions(&to, optp);
+
+		/* Update m_len in the correct mbuf. */
+		optm->m_len += optlen;
+	} else
+		optlen = 0;
+#ifdef INET6
 	if (isipv6) {
 		ip6->ip6_flow = 0;
 		ip6->ip6_vfc = IPV6_VERSION;
 		ip6->ip6_nxt = IPPROTO_TCP;
-		ip6->ip6_plen = 0;		/* Set in ip6_output(). */
-		tlen += sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
+		ip6->ip6_plen = htons(tlen - sizeof(*ip6));
 	}
 #endif
 #if defined(INET) && defined(INET6)
@@ -583,14 +752,12 @@
 #endif
 #ifdef INET
 	{
-		tlen += sizeof (struct tcpiphdr);
-		ip->ip_len = tlen;
+		ip->ip_len = htons(tlen);
 		ip->ip_ttl = V_ip_defttl;
 		if (V_path_mtu_discovery)
-			ip->ip_off |= IP_DF;
+			ip->ip_off |= htons(IP_DF);
 	}
 #endif
-	m->m_len = tlen;
 	m->m_pkthdr.len = tlen;
 	m->m_pkthdr.rcvif = NULL;
 #ifdef MAC
@@ -612,7 +779,7 @@
 	nth->th_seq = htonl(seq);
 	nth->th_ack = htonl(ack);
 	nth->th_x2 = 0;
-	nth->th_off = sizeof (struct tcphdr) >> 2;
+	nth->th_off = (sizeof (struct tcphdr) + optlen) >> 2;
 	nth->th_flags = flags;
 	if (tp != NULL)
 		nth->th_win = htons((u_short) (win >> tp->rcv_scale));
@@ -620,6 +787,13 @@
 		nth->th_win = htons((u_short)win);
 	nth->th_urp = 0;
 
+#ifdef TCP_SIGNATURE
+	if (to.to_flags & TOF_SIGNATURE) {
+		tcp_signature_compute(m, 0, 0, optlen, to.to_signature,
+		    IPSEC_DIR_OUTBOUND);
+	}
+#endif
+
 	m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 #ifdef INET6
 	if (isipv6) {
@@ -644,15 +818,20 @@
 	if (tp == NULL || (inp->inp_socket->so_options & SO_DEBUG))
 		tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0);
 #endif
+	if (flags & TH_RST)
+		TCP_PROBE5(accept__refused, NULL, NULL, mtod(m, const char *),
+		    tp, nth);
+
+	TCP_PROBE5(send, NULL, tp, mtod(m, const char *), tp, nth);
 #ifdef INET6
 	if (isipv6)
-		(void) ip6_output(m, NULL, NULL, ipflags, NULL, NULL, inp);
+		(void) ip6_output(m, NULL, NULL, 0, NULL, NULL, inp);
 #endif /* INET6 */
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
-		(void) ip_output(m, NULL, NULL, ipflags, NULL, inp);
+		(void) ip_output(m, NULL, NULL, 0, NULL, inp);
 #endif
 }
 
@@ -713,11 +892,11 @@
 		V_tcp_mssdflt;
 
 	/* Set up our timeouts. */
-	callout_init(&tp->t_timers->tt_rexmt, CALLOUT_MPSAFE);
-	callout_init(&tp->t_timers->tt_persist, CALLOUT_MPSAFE);
-	callout_init(&tp->t_timers->tt_keep, CALLOUT_MPSAFE);
-	callout_init(&tp->t_timers->tt_2msl, CALLOUT_MPSAFE);
-	callout_init(&tp->t_timers->tt_delack, CALLOUT_MPSAFE);
+	callout_init(&tp->t_timers->tt_rexmt, 1);
+	callout_init(&tp->t_timers->tt_persist, 1);
+	callout_init(&tp->t_timers->tt_keep, 1);
+	callout_init(&tp->t_timers->tt_2msl, 1);
+	callout_init(&tp->t_timers->tt_delack, 1);
 
 	if (V_tcp_do_rfc1323)
 		tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
@@ -724,8 +903,14 @@
 	if (V_tcp_do_sack)
 		tp->t_flags |= TF_SACK_PERMIT;
 	TAILQ_INIT(&tp->snd_holes);
-	tp->t_inpcb = inp;	/* XXX */
 	/*
+	 * The tcpcb will hold a reference on its inpcb until tcp_discardcb()
+	 * is called.
+	 */
+	in_pcbref(inp);	/* Reference for tcpcb */
+	tp->t_inpcb = inp;
+
+	/*
 	 * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
 	 * rtt estimate.  Set rttvar so that srtt + 4 * rttvar gives
 	 * reasonable initial retransmit time.
@@ -772,7 +957,7 @@
 	VNET_LIST_RLOCK();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
-		INP_INFO_RLOCK(&V_tcbinfo);
+		INP_INFO_WLOCK(&V_tcbinfo);
 		/*
 		 * New connections already part way through being initialised
 		 * with the CC algo we're removing will not race with this code
@@ -802,7 +987,7 @@
 			}
 			INP_WUNLOCK(inp);
 		}
-		INP_INFO_RUNLOCK(&V_tcbinfo);
+		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK();
@@ -820,12 +1005,12 @@
 {
 	struct socket *so = tp->t_inpcb->inp_socket;
 
-	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_INFO_LOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	if (TCPS_HAVERCVDSYN(tp->t_state)) {
-		tp->t_state = TCPS_CLOSED;
-		(void) tcp_output_reset(tp);
+		tcp_state_change(tp, TCPS_CLOSED);
+		(void) tcp_output(tp);
 		TCPSTAT_INC(tcps_drops);
 	} else
 		TCPSTAT_INC(tcps_conndrops);
@@ -843,6 +1028,7 @@
 #ifdef INET6
 	int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
 #endif /* INET6 */
+	int released;
 
 	INP_WLOCK_ASSERT(inp);
 
@@ -850,22 +1036,15 @@
 	 * Make sure that all of our timers are stopped before we delete the
 	 * PCB.
 	 *
-	 * XXXRW: Really, we would like to use callout_drain() here in order
-	 * to avoid races experienced in tcp_timer.c where a timer is already
-	 * executing at this point.  However, we can't, both because we're
-	 * running in a context where we can't sleep, and also because we
-	 * hold locks required by the timers.  What we instead need to do is
-	 * test to see if callout_drain() is required, and if so, defer some
-	 * portion of the remainder of tcp_discardcb() to an asynchronous
-	 * context that can callout_drain() and then continue.  Some care
-	 * will be required to ensure that no further processing takes place
-	 * on the tcpcb, even though it hasn't been freed (a flag?).
+	 * If stopping a timer fails, we schedule a discard function in same
+	 * callout, and the last discard function called will take care of
+	 * deleting the tcpcb.
 	 */
-	callout_stop(&tp->t_timers->tt_rexmt);
-	callout_stop(&tp->t_timers->tt_persist);
-	callout_stop(&tp->t_timers->tt_keep);
-	callout_stop(&tp->t_timers->tt_2msl);
-	callout_stop(&tp->t_timers->tt_delack);
+	tcp_timer_stop(tp, TT_REXMT);
+	tcp_timer_stop(tp, TT_PERSIST);
+	tcp_timer_stop(tp, TT_KEEP);
+	tcp_timer_stop(tp, TT_2MSL);
+	tcp_timer_stop(tp, TT_DELACK);
 
 	/*
 	 * If we got enough samples through the srtt filter,
@@ -902,14 +1081,14 @@
 				ssthresh = 2;
 			ssthresh *= (u_long)(tp->t_maxseg +
 #ifdef INET6
-				      (isipv6 ? sizeof (struct ip6_hdr) +
-					       sizeof (struct tcphdr) :
+			    (isipv6 ? sizeof (struct ip6_hdr) +
+				sizeof (struct tcphdr) :
 #endif
-				       sizeof (struct tcpiphdr)
+				sizeof (struct tcpiphdr)
 #ifdef INET6
-				       )
+			    )
 #endif
-				      );
+			    );
 		} else
 			ssthresh = 0;
 		metrics.rmx_ssthresh = ssthresh;
@@ -925,8 +1104,12 @@
 
 	/* free the reassembly queue, if any */
 	tcp_reass_flush(tp);
+
+#ifdef TCP_OFFLOAD
 	/* Disconnect offload device, if any. */
-	tcp_offload_detach(tp);
+	if (tp->t_flags & TF_TOE)
+		tcp_offload_detach(tp);
+#endif
 		
 	tcp_free_sackholes(tp);
 
@@ -938,10 +1121,82 @@
 
 	CC_ALGO(tp) = NULL;
 	inp->inp_ppcb = NULL;
-	tp->t_inpcb = NULL;
-	uma_zfree(V_tcpcb_zone, tp);
+	if ((tp->t_timers->tt_flags & TT_MASK) == 0) {
+		/* We own the last reference on tcpcb, let's free it. */
+		tp->t_inpcb = NULL;
+		uma_zfree(V_tcpcb_zone, tp);
+		released = in_pcbrele_wlocked(inp);
+		KASSERT(!released, ("%s: inp %p should not have been released "
+			"here", __func__, inp));
+	}
 }
 
+void
+tcp_timer_2msl_discard(void *xtp)
+{
+
+	tcp_timer_discard((struct tcpcb *)xtp, TT_2MSL);
+}
+
+void
+tcp_timer_keep_discard(void *xtp)
+{
+
+	tcp_timer_discard((struct tcpcb *)xtp, TT_KEEP);
+}
+
+void
+tcp_timer_persist_discard(void *xtp)
+{
+
+	tcp_timer_discard((struct tcpcb *)xtp, TT_PERSIST);
+}
+
+void
+tcp_timer_rexmt_discard(void *xtp)
+{
+
+	tcp_timer_discard((struct tcpcb *)xtp, TT_REXMT);
+}
+
+void
+tcp_timer_delack_discard(void *xtp)
+{
+
+	tcp_timer_discard((struct tcpcb *)xtp, TT_DELACK);
+}
+
+void
+tcp_timer_discard(struct tcpcb *tp, uint32_t timer_type)
+{
+	struct inpcb *inp;
+
+	CURVNET_SET(tp->t_vnet);
+	INP_INFO_RLOCK(&V_tcbinfo);
+	inp = tp->t_inpcb;
+	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL",
+		__func__, tp));
+	INP_WLOCK(inp);
+	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) != 0,
+		("%s: tcpcb has to be stopped here", __func__));
+	KASSERT((tp->t_timers->tt_flags & timer_type) != 0,
+		("%s: discard callout should be running", __func__));
+	tp->t_timers->tt_flags &= ~timer_type;
+	if ((tp->t_timers->tt_flags & TT_MASK) == 0) {
+		/* We own the last reference on this tcpcb, let's free it. */
+		tp->t_inpcb = NULL;
+		uma_zfree(V_tcpcb_zone, tp);
+		if (in_pcbrele_wlocked(inp)) {
+			INP_INFO_RUNLOCK(&V_tcbinfo);
+			CURVNET_RESTORE();
+			return;
+		}
+	}
+	INP_WUNLOCK(inp);
+	INP_INFO_RUNLOCK(&V_tcbinfo);
+	CURVNET_RESTORE();
+}
+
 /*
  * Attempt to close a TCP control block, marking it as dropped, and freeing
  * the socket if we hold the only reference.
@@ -952,12 +1207,24 @@
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so;
 
-	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_INFO_LOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
-	/* Notify any offload devices of listener close */
+#ifdef TCP_OFFLOAD
 	if (tp->t_state == TCPS_LISTEN)
-		tcp_offload_listen_close(tp);
+		tcp_offload_listen_stop(tp);
+#endif
+#ifdef TCP_RFC7413
+	/*
+	 * This releases the TFO pending counter resource for TFO listen
+	 * sockets as well as passively-created TFO sockets that transition
+	 * from SYN_RECEIVED to CLOSED.
+	 */
+	if (tp->t_tfo_pending) {
+		tcp_fastopen_decrement_counter(tp->t_tfo_pending);
+		tp->t_tfo_pending = NULL;
+	}
+#endif
 	in_pcbdrop(inp);
 	TCPSTAT_INC(tcps_closed);
 	KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL"));
@@ -997,9 +1264,9 @@
 	 * XXX: The "Net/3" implementation doesn't imply that the TCP
 	 *      reassembly queue should be flushed, but in a situation
 	 *	where we're really low on mbufs, this is potentially
-	 *	usefull.
+	 *	useful.
 	 */
-		INP_INFO_RLOCK(&V_tcbinfo);
+		INP_INFO_WLOCK(&V_tcbinfo);
 		LIST_FOREACH(inpb, V_tcbinfo.ipi_listhead, inp_list) {
 			if (inpb->inp_flags & INP_TIMEWAIT)
 				continue;
@@ -1010,7 +1277,7 @@
 			}
 			INP_WUNLOCK(inpb);
 		}
-		INP_INFO_RUNLOCK(&V_tcbinfo);
+		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK_NOSLEEP();
@@ -1029,7 +1296,7 @@
 {
 	struct tcpcb *tp;
 
-	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_INFO_LOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	if ((inp->inp_flags & INP_TIMEWAIT) ||
@@ -1093,10 +1360,10 @@
 	/*
 	 * OK, now we're committed to doing something.
 	 */
-	INP_INFO_RLOCK(&V_tcbinfo);
+	INP_LIST_RLOCK(&V_tcbinfo);
 	gencnt = V_tcbinfo.ipi_gencnt;
 	n = V_tcbinfo.ipi_count;
-	INP_INFO_RUNLOCK(&V_tcbinfo);
+	INP_LIST_RUNLOCK(&V_tcbinfo);
 
 	m = syncache_pcbcount();
 
@@ -1118,10 +1385,8 @@
 		return (error);
 
 	inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
-	if (inp_list == NULL)
-		return (ENOMEM);
 
-	INP_INFO_RLOCK(&V_tcbinfo);
+	INP_INFO_WLOCK(&V_tcbinfo);
 	for (inp = LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0;
 	    inp != NULL && i < n; inp = LIST_NEXT(inp, inp_list)) {
 		INP_WLOCK(inp);
@@ -1146,7 +1411,7 @@
 		}
 		INP_WUNLOCK(inp);
 	}
-	INP_INFO_RUNLOCK(&V_tcbinfo);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
 	n = i;
 
 	error = 0;
@@ -1184,7 +1449,7 @@
 		} else
 			INP_RUNLOCK(inp);
 	}
-	INP_INFO_WLOCK(&V_tcbinfo);
+	INP_INFO_RLOCK(&V_tcbinfo);
 	for (i = 0; i < n; i++) {
 		inp = inp_list[i];
 		INP_RLOCK(inp);
@@ -1191,7 +1456,7 @@
 		if (!in_pcbrele_rlocked(inp))
 			INP_RUNLOCK(inp);
 	}
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	INP_INFO_RUNLOCK(&V_tcbinfo);
 
 	if (!error) {
 		/*
@@ -1201,11 +1466,11 @@
 		 * while we were processing this request, and it
 		 * might be necessary to retry.
 		 */
-		INP_INFO_RLOCK(&V_tcbinfo);
+		INP_LIST_RLOCK(&V_tcbinfo);
 		xig.xig_gen = V_tcbinfo.ipi_gencnt;
 		xig.xig_sogen = so_gencnt;
 		xig.xig_count = V_tcbinfo.ipi_count + pcb_count;
-		INP_INFO_RUNLOCK(&V_tcbinfo);
+		INP_LIST_RUNLOCK(&V_tcbinfo);
 		error = SYSCTL_OUT(req, &xig, sizeof xig);
 	}
 	free(inp_list, M_TEMP);
@@ -1366,7 +1631,7 @@
 				      - offsetof(struct icmp, icmp_ip));
 		th = (struct tcphdr *)((caddr_t)ip
 				       + (ip->ip_hl << 2));
-		INP_INFO_WLOCK(&V_tcbinfo);
+		INP_INFO_RLOCK(&V_tcbinfo);
 		inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport,
 		    ip->ip_src, th->th_sport, INPLOOKUP_WLOCKPCB, NULL);
 		if (inp != NULL)  {
@@ -1393,12 +1658,11 @@
 					    /*
 					     * If no alternative MTU was
 					     * proposed, try the next smaller
-					     * one.  ip->ip_len has already
-					     * been swapped in icmp_input().
+					     * one.
 					     */
 					    if (!mtu)
-						mtu = ip_next_mtu(ip->ip_len,
-						 1);
+						mtu = ip_next_mtu(
+						 ntohs(ip->ip_len), 1);
 					    if (mtu < V_tcp_minmss
 						 + sizeof(struct tcpiphdr))
 						mtu = V_tcp_minmss
@@ -1427,7 +1691,7 @@
 			inc.inc_laddr = ip->ip_src;
 			syncache_unreach(&inc, th);
 		}
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 	} else
 		in_pcbnotifyall(&V_tcbinfo, faddr, inetctlerrmap[cmd], notify);
 }
@@ -1500,9 +1764,9 @@
 		inc.inc6_faddr = ((struct sockaddr_in6 *)sa)->sin6_addr;
 		inc.inc6_laddr = ip6cp->ip6c_src->sin6_addr;
 		inc.inc_flags |= INC_ISIPV6;
-		INP_INFO_WLOCK(&V_tcbinfo);
+		INP_INFO_RLOCK(&V_tcbinfo);
 		syncache_unreach(&inc, &th);
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 	} else
 		in6_pcbnotify(&V_tcbinfo, sa, 0, (const struct sockaddr *)sa6_src,
 			      0, cmd, NULL, notify);
@@ -1635,7 +1899,7 @@
 {
 	struct tcpcb *tp;
 
-	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	if ((inp->inp_flags & INP_TIMEWAIT) ||
@@ -1696,7 +1960,7 @@
 	tp->snd_recover = tp->snd_max;
 	if (tp->t_flags & TF_SACK_PERMIT)
 		EXIT_FASTRECOVERY(tp->t_flags);
-	tcp_output_send(tp);
+	tcp_output(tp);
 	return (inp);
 }
 
@@ -1708,7 +1972,7 @@
  * tcp_mss_update to get the peer/interface MTU.
  */
 u_long
-tcp_maxmtu(struct in_conninfo *inc, int *flags)
+tcp_maxmtu(struct in_conninfo *inc, struct tcp_ifcap *cap)
 {
 	struct route sro;
 	struct sockaddr_in *dst;
@@ -1727,16 +1991,20 @@
 	}
 	if (sro.ro_rt != NULL) {
 		ifp = sro.ro_rt->rt_ifp;
-		if (sro.ro_rt->rt_rmx.rmx_mtu == 0)
+		if (sro.ro_rt->rt_mtu == 0)
 			maxmtu = ifp->if_mtu;
 		else
-			maxmtu = min(sro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu);
+			maxmtu = min(sro.ro_rt->rt_mtu, ifp->if_mtu);
 
 		/* Report additional interface capabilities. */
-		if (flags != NULL) {
+		if (cap != NULL) {
 			if (ifp->if_capenable & IFCAP_TSO4 &&
-			    ifp->if_hwassist & CSUM_TSO)
-				*flags |= CSUM_TSO;
+			    ifp->if_hwassist & CSUM_TSO) {
+				cap->ifcap |= CSUM_TSO;
+				cap->tsomax = ifp->if_hw_tsomax;
+				cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
+				cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
+			}
 		}
 		RTFREE(sro.ro_rt);
 	}
@@ -1746,7 +2014,7 @@
 
 #ifdef INET6
 u_long
-tcp_maxmtu6(struct in_conninfo *inc, int *flags)
+tcp_maxmtu6(struct in_conninfo *inc, struct tcp_ifcap *cap)
 {
 	struct route_in6 sro6;
 	struct ifnet *ifp;
@@ -1763,17 +2031,21 @@
 	}
 	if (sro6.ro_rt != NULL) {
 		ifp = sro6.ro_rt->rt_ifp;
-		if (sro6.ro_rt->rt_rmx.rmx_mtu == 0)
+		if (sro6.ro_rt->rt_mtu == 0)
 			maxmtu = IN6_LINKMTU(sro6.ro_rt->rt_ifp);
 		else
-			maxmtu = min(sro6.ro_rt->rt_rmx.rmx_mtu,
+			maxmtu = min(sro6.ro_rt->rt_mtu,
 				     IN6_LINKMTU(sro6.ro_rt->rt_ifp));
 
 		/* Report additional interface capabilities. */
-		if (flags != NULL) {
+		if (cap != NULL) {
 			if (ifp->if_capenable & IFCAP_TSO6 &&
-			    ifp->if_hwassist & CSUM_TSO)
-				*flags |= CSUM_TSO;
+			    ifp->if_hwassist & CSUM_TSO) {
+				cap->ifcap |= CSUM_TSO;
+				cap->tsomax = ifp->if_hw_tsomax;
+				cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
+				cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
+			}
 		}
 		RTFREE(sro6.ro_rt);
 	}
@@ -1796,9 +2068,10 @@
 #endif
 	struct tcphdr *th;
 
-	if ((tp == NULL) || ((inp = tp->t_inpcb) == NULL))
+	if ((tp == NULL) || ((inp = tp->t_inpcb) == NULL) ||
+		(!key_havesp(IPSEC_DIR_OUTBOUND)))
 		return (0);
-	MGETHDR(m, M_DONTWAIT, MT_DATA);
+	m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (!m)
 		return (0);
 
@@ -2150,7 +2423,7 @@
 	default:
 		return (EINVAL);
 	}
-	INP_INFO_WLOCK(&V_tcbinfo);
+	INP_INFO_RLOCK(&V_tcbinfo);
 	switch (addrs[0].ss_family) {
 #ifdef INET6
 	case AF_INET6:
@@ -2189,7 +2462,7 @@
 			INP_WUNLOCK(inp);
 	} else
 		error = ESRCH;
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	INP_INFO_RUNLOCK(&V_tcbinfo);
 	return (error);
 }
 
@@ -2312,3 +2585,19 @@
 		panic("%s: string too long", __func__);
 	return (s);
 }
+
+/*
+ * A subroutine which makes it easy to track TCP state changes with DTrace.
+ * This function shouldn't be called for t_state initializations that don't
+ * correspond to actual TCP state transitions.
+ */
+void
+tcp_state_change(struct tcpcb *tp, int newstate)
+{
+#if defined(KDTRACE_HOOKS)
+	int pstate = tp->t_state;
+#endif
+
+	tp->t_state = newstate;
+	TCP_PROBE6(state__change, NULL, tp, NULL, tp, NULL, pstate);
+}

Modified: trunk/sys/netinet/tcp_syncache.c
===================================================================
--- trunk/sys/netinet/tcp_syncache.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_syncache.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,13 +1,13 @@
 /* $MidnightBSD$ */
 /*-
  * Copyright (c) 2001 McAfee, Inc.
- * Copyright (c) 2006 Andre Oppermann, Internet Business Solutions AG
+ * Copyright (c) 2006,2013 Andre Oppermann, Internet Business Solutions AG
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Jonathan Lemon
  * and McAfee Research, the Security Research Division of McAfee, Inc. under
  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
- * DARPA CHATS research program.
+ * DARPA CHATS research program. [2001 McAfee, Inc.]
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_syncache.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_syncache.c 324520 2017-10-11 06:28:46Z sephe $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
@@ -48,7 +48,6 @@
 #include <sys/mutex.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
-#include <sys/md5.h>
 #include <sys/proc.h>		/* for proc0 declaration */
 #include <sys/random.h>
 #include <sys/socket.h>
@@ -56,6 +55,9 @@
 #include <sys/syslog.h>
 #include <sys/ucred.h>
 
+#include <sys/md5.h>
+#include <crypto/siphash/siphash.h>
+
 #include <vm/uma.h>
 
 #include <net/if.h>
@@ -77,15 +79,20 @@
 #include <netinet6/in6_pcb.h>
 #endif
 #include <netinet/tcp.h>
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_syncache.h>
-#include <netinet/tcp_offload.h>
 #ifdef INET6
 #include <netinet6/tcp6_var.h>
 #endif
+#ifdef TCP_OFFLOAD
+#include <netinet/toecore.h>
+#endif
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
@@ -111,28 +118,34 @@
     &VNET_NAME(tcp_syncookiesonly), 0,
     "Use only TCP SYN cookies");
 
-#ifdef TCP_OFFLOAD_DISABLE
-#define TOEPCB_ISSET(sc) (0)
-#else
-#define TOEPCB_ISSET(sc) ((sc)->sc_toepcb != NULL)
+#ifdef TCP_OFFLOAD
+#define ADDED_BY_TOE(sc) ((sc)->sc_tod != NULL)
 #endif
 
 static void	 syncache_drop(struct syncache *, struct syncache_head *);
 static void	 syncache_free(struct syncache *);
 static void	 syncache_insert(struct syncache *, struct syncache_head *);
-struct syncache *syncache_lookup(struct in_conninfo *, struct syncache_head **);
-static int	 syncache_respond(struct syncache *);
+static int	 syncache_respond(struct syncache *, const struct mbuf *);
 static struct	 socket *syncache_socket(struct syncache *, struct socket *,
 		    struct mbuf *m);
+static int	 syncache_sysctl_count(SYSCTL_HANDLER_ARGS);
 static void	 syncache_timeout(struct syncache *sc, struct syncache_head *sch,
 		    int docallout);
 static void	 syncache_timer(void *);
-static void	 syncookie_generate(struct syncache_head *, struct syncache *,
-		    u_int32_t *);
+
+static uint32_t	 syncookie_mac(struct in_conninfo *, tcp_seq, uint8_t,
+		    uint8_t *, uintptr_t);
+static tcp_seq	 syncookie_generate(struct syncache_head *, struct syncache *);
 static struct syncache
 		*syncookie_lookup(struct in_conninfo *, struct syncache_head *,
-		    struct syncache *, struct tcpopt *, struct tcphdr *,
+		    struct syncache *, struct tcphdr *, struct tcpopt *,
 		    struct socket *);
+static void	 syncookie_reseed(void *);
+#ifdef INVARIANTS
+static int	 syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch,
+		    struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
+		    struct socket *lso);
+#endif
 
 /*
  * Transmit the SYN,ACK fewer times than TCP_MAXRXTSHIFT specifies.
@@ -159,8 +172,8 @@
     &VNET_NAME(tcp_syncache.cache_limit), 0,
     "Overall entry limit for syncache");
 
-SYSCTL_VNET_UINT(_net_inet_tcp_syncache, OID_AUTO, count, CTLFLAG_RD,
-    &VNET_NAME(tcp_syncache.cache_count), 0,
+SYSCTL_VNET_PROC(_net_inet_tcp_syncache, OID_AUTO, count, (CTLTYPE_UINT|CTLFLAG_RD),
+    NULL, 0, &syncache_sysctl_count, "IU",
     "Current number of entries in syncache");
 
 SYSCTL_VNET_UINT(_net_inet_tcp_syncache, OID_AUTO, hashsize, CTLFLAG_RDTUN,
@@ -226,7 +239,6 @@
 {
 	int i;
 
-	V_tcp_syncache.cache_count = 0;
 	V_tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE;
 	V_tcp_syncache.bucket_limit = TCP_SYNCACHE_BUCKETLIMIT;
 	V_tcp_syncache.rexmt_limit = SYNCACHE_MAXREXMTS;
@@ -253,11 +265,12 @@
 	V_tcp_syncache.hashbase = malloc(V_tcp_syncache.hashsize *
 	    sizeof(struct syncache_head), M_SYNCACHE, M_WAITOK | M_ZERO);
 
+#ifdef VIMAGE
+	V_tcp_syncache.vnet = curvnet;
+#endif
+
 	/* Initialize the hash buckets. */
 	for (i = 0; i < V_tcp_syncache.hashsize; i++) {
-#ifdef VIMAGE
-		V_tcp_syncache.hashbase[i].sch_vnet = curvnet;
-#endif
 		TAILQ_INIT(&V_tcp_syncache.hashbase[i].sch_bucket);
 		mtx_init(&V_tcp_syncache.hashbase[i].sch_mtx, "tcp_sc_head",
 			 NULL, MTX_DEF);
@@ -264,12 +277,23 @@
 		callout_init_mtx(&V_tcp_syncache.hashbase[i].sch_timer,
 			 &V_tcp_syncache.hashbase[i].sch_mtx, 0);
 		V_tcp_syncache.hashbase[i].sch_length = 0;
+		V_tcp_syncache.hashbase[i].sch_sc = &V_tcp_syncache;
+		V_tcp_syncache.hashbase[i].sch_last_overflow =
+		    -(SYNCOOKIE_LIFETIME + 1);
 	}
 
 	/* Create the syncache entry zone. */
 	V_tcp_syncache.zone = uma_zcreate("syncache", sizeof(struct syncache),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
-	uma_zone_set_max(V_tcp_syncache.zone, V_tcp_syncache.cache_limit);
+	V_tcp_syncache.cache_limit = uma_zone_set_max(V_tcp_syncache.zone,
+	    V_tcp_syncache.cache_limit);
+
+	/* Start the SYN cookie reseeder callout. */
+	callout_init(&V_tcp_syncache.secret.reseed, 1);
+	arc4rand(V_tcp_syncache.secret.key[0], SYNCOOKIE_SECRET_SIZE, 0);
+	arc4rand(V_tcp_syncache.secret.key[1], SYNCOOKIE_SECRET_SIZE, 0);
+	callout_reset(&V_tcp_syncache.secret.reseed, SYNCOOKIE_LIFETIME * hz,
+	    syncookie_reseed, &V_tcp_syncache);
 }
 
 #ifdef VIMAGE
@@ -297,15 +321,26 @@
 		mtx_destroy(&sch->sch_mtx);
 	}
 
-	KASSERT(V_tcp_syncache.cache_count == 0, ("%s: cache_count %d not 0",
-	    __func__, V_tcp_syncache.cache_count));
+	KASSERT(uma_zone_get_cur(V_tcp_syncache.zone) == 0,
+	    ("%s: cache_count not 0", __func__));
 
 	/* Free the allocated global resources. */
 	uma_zdestroy(V_tcp_syncache.zone);
 	free(V_tcp_syncache.hashbase, M_SYNCACHE);
+
+	callout_drain(&V_tcp_syncache.secret.reseed);
 }
 #endif
 
+static int
+syncache_sysctl_count(SYSCTL_HANDLER_ARGS)
+{
+	int count;
+
+	count = uma_zone_get_cur(V_tcp_syncache.zone);
+	return (sysctl_handle_int(oidp, &count, 0, req));
+}
+
 /*
  * Inserts a syncache entry into the specified bucket row.
  * Locks and unlocks the syncache_head autonomously.
@@ -325,6 +360,7 @@
 		KASSERT(!TAILQ_EMPTY(&sch->sch_bucket),
 			("sch->sch_length incorrect"));
 		sc2 = TAILQ_LAST(&sch->sch_bucket, sch_head);
+		sch->sch_last_overflow = time_uptime;
 		syncache_drop(sc2, sch);
 		TCPSTAT_INC(tcps_sc_bucketoverflow);
 	}
@@ -333,6 +369,14 @@
 	TAILQ_INSERT_HEAD(&sch->sch_bucket, sc, sc_hash);
 	sch->sch_length++;
 
+#ifdef TCP_OFFLOAD
+	if (ADDED_BY_TOE(sc)) {
+		struct toedev *tod = sc->sc_tod;
+
+		tod->tod_syncache_added(tod, sc->sc_todctx);
+	}
+#endif
+
 	/* Reinitialize the bucket row's timer. */
 	if (sch->sch_length == 1)
 		sch->sch_nextc = ticks + INT_MAX;
@@ -340,7 +384,6 @@
 
 	SCH_UNLOCK(sch);
 
-	V_tcp_syncache.cache_count++;
 	TCPSTAT_INC(tcps_sc_added);
 }
 
@@ -357,12 +400,15 @@
 	TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
 	sch->sch_length--;
 
-#ifndef TCP_OFFLOAD_DISABLE
-	if (sc->sc_tu)
-		sc->sc_tu->tu_syncache_event(TOE_SC_DROP, sc->sc_toepcb);
-#endif		    
+#ifdef TCP_OFFLOAD
+	if (ADDED_BY_TOE(sc)) {
+		struct toedev *tod = sc->sc_tod;
+
+		tod->tod_syncache_removed(tod, sc->sc_todctx);
+	}
+#endif
+
 	syncache_free(sc);
-	V_tcp_syncache.cache_count--;
 }
 
 /*
@@ -372,7 +418,7 @@
 syncache_timeout(struct syncache *sc, struct syncache_head *sch, int docallout)
 {
 	sc->sc_rxttime = ticks +
-		TCPTV_RTOBASE * (tcp_backoff[sc->sc_rxmits]);
+		TCPTV_RTOBASE * (tcp_syn_backoff[sc->sc_rxmits]);
 	sc->sc_rxmits++;
 	if (TSTMP_LT(sc->sc_rxttime, sch->sch_nextc)) {
 		sch->sch_nextc = sc->sc_rxttime;
@@ -395,7 +441,7 @@
 	int tick = ticks;
 	char *s;
 
-	CURVNET_SET(sch->sch_vnet);
+	CURVNET_SET(sch->sch_sc->vnet);
 
 	/* NB: syncache_head has already been locked by the callout. */
 	SCH_LOCK_ASSERT(sch);
@@ -438,7 +484,7 @@
 			free(s, M_TCPLOG);
 		}
 
-		(void) syncache_respond(sc);
+		(void) syncache_respond(sc, NULL);
 		TCPSTAT_INC(tcps_sc_retransmitted);
 		syncache_timeout(sc, sch, 0);
 	}
@@ -452,7 +498,7 @@
  * Find an entry in the syncache.
  * Returns always with locked syncache_head plus a matching entry or NULL.
  */
-struct syncache *
+static struct syncache *
 syncache_lookup(struct in_conninfo *inc, struct syncache_head **schp)
 {
 	struct syncache *sc;
@@ -623,6 +669,8 @@
 
 /*
  * Build a new TCP socket structure from a syncache entry.
+ *
+ * On success return the newly created socket with its underlying inp locked.
  */
 static struct socket *
 syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
@@ -633,7 +681,7 @@
 	int error;
 	char *s;
 
-	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 
 	/*
 	 * Ok, create the full blown connection, and set things up
@@ -641,7 +689,7 @@
 	 * connection when the SYN arrived.  If we can't create
 	 * the connection, abort it.
 	 */
-	so = sonewconn(lso, SS_ISCONNECTED);
+	so = sonewconn(lso, 0);
 	if (so == NULL) {
 		/*
 		 * Drop the connection; we will either send a RST or
@@ -664,6 +712,15 @@
 	inp = sotoinpcb(so);
 	inp->inp_inc.inc_fibnum = so->so_fibnum;
 	INP_WLOCK(inp);
+	/*
+	 * Exclusive pcbinfo lock is not required in syncache socket case even
+	 * if two inpcb locks can be acquired simultaneously:
+	 *  - the inpcb in LISTEN state,
+	 *  - the newly created inp.
+	 *
+	 * In this case, an inp cannot be at same time in LISTEN state and
+	 * just created by an accept() call.
+	 */
 	INP_HASH_WLOCK(&V_tcbinfo);
 
 	/* Insert new socket into PCB hash list. */
@@ -681,6 +738,15 @@
 #endif
 
 	/*
+	 * If there's an mbuf and it has a flowid, then let's initialise the
+	 * inp with that particular flowid.
+	 */
+	if (m != NULL && M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
+		inp->inp_flowid = m->m_pkthdr.flowid;
+		inp->inp_flowtype = M_HASHTYPE_GET(m);
+	}
+
+	/*
 	 * Install in the reservation hash table for now, but don't yet
 	 * install a connection group since the full 4-tuple isn't yet
 	 * configured.
@@ -795,7 +861,7 @@
 #endif /* INET */
 	INP_HASH_WUNLOCK(&V_tcbinfo);
 	tp = intotcpcb(inp);
-	tp->t_state = TCPS_SYN_RECEIVED;
+	tcp_state_change(tp, TCPS_SYN_RECEIVED);
 	tp->iss = sc->sc_iss;
 	tp->irs = sc->sc_irs;
 	tcp_rcvseqinit(tp);
@@ -841,13 +907,26 @@
 	tcp_mss(tp, sc->sc_peer_mss);
 
 	/*
-	 * If the SYN,ACK was retransmitted, reset cwnd to 1 segment.
+	 * If the SYN,ACK was retransmitted, indicate that CWND to be
+	 * limited to one segment in cc_conn_init().
 	 * NB: sc_rxmits counts all SYN,ACK transmits, not just retransmits.
 	 */
 	if (sc->sc_rxmits > 1)
-		tp->snd_cwnd = tp->t_maxseg;
+		tp->snd_cwnd = 1;
 
+#ifdef TCP_OFFLOAD
 	/*
+	 * Allow a TOE driver to install its hooks.  Note that we hold the
+	 * pcbinfo lock too and that prevents tcp_usr_accept from accepting a
+	 * new connection before the TOE driver has done its thing.
+	 */
+	if (ADDED_BY_TOE(sc)) {
+		struct toedev *tod = sc->sc_tod;
+
+		tod->tod_offload_socket(tod, sc->sc_todctx, so);
+	}
+#endif
+	/*
 	 * Copy and activate timers.
 	 */
 	tp->t_keepinit = sototcpcb(lso)->t_keepinit;
@@ -856,8 +935,6 @@
 	tp->t_keepcnt = sototcpcb(lso)->t_keepcnt;
 	tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
 
-	INP_WUNLOCK(inp);
-
 	TCPSTAT_INC(tcps_accepts);
 	return (so);
 
@@ -875,6 +952,9 @@
  * in the syncache, and if its there, we pull it out of
  * the cache and turn it into a full-blown connection in
  * the SYN-RECEIVED state.
+ *
+ * On syncache_socket() success the newly created socket
+ * has its underlying inp locked.
  */
 int
 syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
@@ -889,20 +969,33 @@
 	 * Global TCP locks are held because we manipulate the PCB lists
 	 * and create a new socket.
 	 */
-	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK,
 	    ("%s: can handle only ACK", __func__));
 
 	sc = syncache_lookup(inc, &sch);	/* returns locked sch */
 	SCH_LOCK_ASSERT(sch);
+
+#ifdef INVARIANTS
+	/*
+	 * Test code for syncookies comparing the syncache stored
+	 * values with the reconstructed values from the cookie.
+	 */
+	if (sc != NULL)
+		syncookie_cmp(inc, sch, sc, th, to, *lsop);
+#endif
+
 	if (sc == NULL) {
 		/*
 		 * There is no syncache entry, so see if this ACK is
 		 * a returning syncookie.  To do this, first:
-		 *  A. See if this socket has had a syncache entry dropped in
-		 *     the past.  We don't want to accept a bogus syncookie
-		 *     if we've never received a SYN.
-		 *  B. check that the syncookie is valid.  If it is, then
+		 *  A. Check if syncookies are used in case of syncache
+		 *     overflows
+		 *  B. See if this socket has had a syncache entry dropped in
+		 *     the recent past. We don't want to accept a bogus
+		 *     syncookie if we've never received a SYN or accept it
+		 *     twice.
+		 *  C. check that the syncookie is valid.  If it is, then
 		 *     cobble up a fake syncache entry, and return.
 		 */
 		if (!V_tcp_syncookies) {
@@ -913,8 +1006,17 @@
 				    s, __func__);
 			goto failed;
 		}
+		if (!V_tcp_syncookiesonly &&
+		    sch->sch_last_overflow < time_uptime - SYNCOOKIE_LIFETIME) {
+			SCH_UNLOCK(sch);
+			if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+				log(LOG_DEBUG, "%s; %s: Spurious ACK, "
+				    "segment rejected (no syncache entry)\n",
+				    s, __func__);
+			goto failed;
+		}
 		bzero(&scs, sizeof(scs));
-		sc = syncookie_lookup(inc, sch, &scs, to, th, *lsop);
+		sc = syncookie_lookup(inc, sch, &scs, th, to, *lsop);
 		SCH_UNLOCK(sch);
 		if (sc == NULL) {
 			if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
@@ -927,7 +1029,13 @@
 		/* Pull out the entry to unlock the bucket row. */
 		TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
 		sch->sch_length--;
-		V_tcp_syncache.cache_count--;
+#ifdef TCP_OFFLOAD
+		if (ADDED_BY_TOE(sc)) {
+			struct toedev *tod = sc->sc_tod;
+
+			tod->tod_syncache_removed(tod, sc->sc_todctx);
+		}
+#endif
 		SCH_UNLOCK(sch);
 	}
 
@@ -935,7 +1043,7 @@
 	 * Segment validation:
 	 * ACK must match our initial sequence number + 1 (the SYN|ACK).
 	 */
-	if (th->th_ack != sc->sc_iss + 1 && !TOEPCB_ISSET(sc)) {
+	if (th->th_ack != sc->sc_iss + 1) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: ACK %u != ISS+1 %u, segment "
 			    "rejected\n", s, __func__, th->th_ack, sc->sc_iss);
@@ -946,9 +1054,8 @@
 	 * The SEQ must fall in the window starting at the received
 	 * initial receive sequence number + 1 (the SYN).
 	 */
-	if ((SEQ_LEQ(th->th_seq, sc->sc_irs) ||
-	    SEQ_GT(th->th_seq, sc->sc_irs + sc->sc_wnd)) &&
-	    !TOEPCB_ISSET(sc)) {
+	if (SEQ_LEQ(th->th_seq, sc->sc_irs) ||
+	    SEQ_GT(th->th_seq, sc->sc_irs + sc->sc_wnd)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: SEQ %u != IRS+1 %u, segment "
 			    "rejected\n", s, __func__, th->th_seq, sc->sc_irs);
@@ -955,6 +1062,10 @@
 		goto failed;
 	}
 
+	/*
+	 * If timestamps were not negotiated during SYN/ACK they
+	 * must not appear on any segment during this session.
+	 */
 	if (!(sc->sc_flags & SCF_TIMESTAMP) && (to->to_flags & TOF_TS)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: Timestamp not expected, "
@@ -961,12 +1072,27 @@
 			    "segment rejected\n", s, __func__);
 		goto failed;
 	}
+
 	/*
+	 * If timestamps were negotiated during SYN/ACK they should
+	 * appear on every segment during this session.
+	 * XXXAO: This is only informal as there have been unverified
+	 * reports of non-compliants stacks.
+	 */
+	if ((sc->sc_flags & SCF_TIMESTAMP) && !(to->to_flags & TOF_TS)) {
+		if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
+			log(LOG_DEBUG, "%s; %s: Timestamp missing, "
+			    "no action\n", s, __func__);
+			free(s, M_TCPLOG);
+			s = NULL;
+		}
+	}
+
+	/*
 	 * If timestamps were negotiated the reflected timestamp
 	 * must be equal to what we actually sent in the SYN|ACK.
 	 */
-	if ((to->to_flags & TOF_TS) && to->to_tsecr != sc->sc_ts &&
-	    !TOEPCB_ISSET(sc)) {
+	if ((to->to_flags & TOF_TS) && to->to_tsecr != sc->sc_ts) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: TSECR %u != TS %u, "
 			    "segment rejected\n",
@@ -994,24 +1120,38 @@
 	return (0);
 }
 
-int
-tcp_offload_syncache_expand(struct in_conninfo *inc, struct toeopt *toeo,
-    struct tcphdr *th, struct socket **lsop, struct mbuf *m)
+#ifdef TCP_RFC7413
+static void
+syncache_tfo_expand(struct syncache *sc, struct socket **lsop, struct mbuf *m,
+    uint64_t response_cookie)
 {
-	struct tcpopt to;
-	int rc;
+	struct inpcb *inp;
+	struct tcpcb *tp;
+	unsigned int *pending_counter;
 
-	bzero(&to, sizeof(struct tcpopt));
-	to.to_mss = toeo->to_mss;
-	to.to_wscale = toeo->to_wscale;
-	to.to_flags = toeo->to_flags;
-	
-	INP_INFO_WLOCK(&V_tcbinfo);
-	rc = syncache_expand(inc, &to, th, lsop, m);
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	/*
+	 * Global TCP locks are held because we manipulate the PCB lists
+	 * and create a new socket.
+	 */
+	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 
-	return (rc);
+	pending_counter = intotcpcb(sotoinpcb(*lsop))->t_tfo_pending;
+	*lsop = syncache_socket(sc, *lsop, m);
+	if (*lsop == NULL) {
+		TCPSTAT_INC(tcps_sc_aborted);
+		atomic_subtract_int(pending_counter, 1);
+	} else {
+		inp = sotoinpcb(*lsop);
+		tp = intotcpcb(inp);
+		tp->t_flags |= TF_FASTOPEN;
+		tp->t_tfo_cookie = response_cookie;
+		tp->snd_max = tp->iss;
+		tp->snd_nxt = tp->iss;
+		tp->t_tfo_pending = pending_counter;
+		TCPSTAT_INC(tcps_sc_completed);
+	}
 }
+#endif /* TCP_RFC7413 */
 
 /*
  * Given a LISTEN socket and an inbound SYN request, add
@@ -1025,11 +1165,18 @@
  * DoS attack, an attacker could send data which would eventually
  * consume all available buffer space if it were ACKed.  By not ACKing
  * the data, we avoid this DoS scenario.
+ *
+ * The exception to the above is when a SYN with a valid TCP Fast Open (TFO)
+ * cookie is processed, V_tcp_fastopen_enabled set to true, and the
+ * TCP_FASTOPEN socket option is set.  In this case, a new socket is created
+ * and returned via lsop, the mbuf is not freed so that tcp_input() can
+ * queue its data to the socket, and 1 is returned to indicate the
+ * TFO-socket-creation path was taken.
  */
-static void
-_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
-    struct inpcb *inp, struct socket **lsop, struct mbuf *m,
-    struct toe_usrreqs *tu, void *toepcb)
+int
+syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
+    struct inpcb *inp, struct socket **lsop, struct mbuf *m, void *tod,
+    void *todctx)
 {
 	struct tcpcb *tp;
 	struct socket *so;
@@ -1036,10 +1183,10 @@
 	struct syncache *sc = NULL;
 	struct syncache_head *sch;
 	struct mbuf *ipopts = NULL;
-	u_int32_t flowtmp;
 	u_int ltflags;
 	int win, sb_hiwat, ip_ttl, ip_tos;
 	char *s;
+	int rv = 0;
 #ifdef INET6
 	int autoflowlabel = 0;
 #endif
@@ -1048,8 +1195,12 @@
 #endif
 	struct syncache scs;
 	struct ucred *cred;
+#ifdef TCP_RFC7413
+	uint64_t tfo_response_cookie;
+	int tfo_cookie_valid = 0;
+	int tfo_response_cookie_valid = 0;
+#endif
 
-	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);			/* listen socket */
 	KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN,
 	    ("%s: unexpected tcp flags", __func__));
@@ -1073,6 +1224,29 @@
 	sb_hiwat = so->so_rcv.sb_hiwat;
 	ltflags = (tp->t_flags & (TF_NOOPT | TF_SIGNATURE));
 
+#ifdef TCP_RFC7413
+	if (V_tcp_fastopen_enabled && (tp->t_flags & TF_FASTOPEN) &&
+	    (tp->t_tfo_pending != NULL) && (to->to_flags & TOF_FASTOPEN)) {
+		/*
+		 * Limit the number of pending TFO connections to
+		 * approximately half of the queue limit.  This prevents TFO
+		 * SYN floods from starving the service by filling the
+		 * listen queue with bogus TFO connections.
+		 */
+		if (atomic_fetchadd_int(tp->t_tfo_pending, 1) <=
+		    (so->so_qlimit / 2)) {
+			int result;
+
+			result = tcp_fastopen_check_cookie(inc,
+			    to->to_tfo_cookie, to->to_tfo_len,
+			    &tfo_response_cookie);
+			tfo_cookie_valid = (result > 0);
+			tfo_response_cookie_valid = (result >= 0);
+		} else
+			atomic_subtract_int(tp->t_tfo_pending, 1);
+	}
+#endif
+
 	/* By the time we drop the lock these should no longer be used. */
 	so = NULL;
 	tp = NULL;
@@ -1080,13 +1254,14 @@
 #ifdef MAC
 	if (mac_syncache_init(&maclabel) != 0) {
 		INP_WUNLOCK(inp);
-		INP_INFO_WUNLOCK(&V_tcbinfo);
 		goto done;
 	} else
 		mac_syncache_create(maclabel, inp);
 #endif
-	INP_WUNLOCK(inp);
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+#ifdef TCP_RFC7413
+	if (!tfo_cookie_valid)
+#endif
+		INP_WUNLOCK(inp);
 
 	/*
 	 * Remember the IP options, if any.
@@ -1115,11 +1290,10 @@
 	sc = syncache_lookup(inc, &sch);	/* returns locked entry */
 	SCH_LOCK_ASSERT(sch);
 	if (sc != NULL) {
-#ifndef TCP_OFFLOAD_DISABLE
-		if (sc->sc_tu)
-			sc->sc_tu->tu_syncache_event(TOE_SC_ENTRY_PRESENT,
-			    sc->sc_toepcb);
-#endif		    
+#ifdef TCP_RFC7413
+		if (tfo_cookie_valid)
+			INP_WUNLOCK(inp);
+#endif
 		TCPSTAT_INC(tcps_sc_dupsyn);
 		if (ipopts) {
 			/*
@@ -1152,7 +1326,7 @@
 			    s, __func__);
 			free(s, M_TCPLOG);
 		}
-		if (!TOEPCB_ISSET(sc) && syncache_respond(sc) == 0) {
+		if (syncache_respond(sc, m) == 0) {
 			sc->sc_rxmits = 0;
 			syncache_timeout(sc, sch, 1);
 			TCPSTAT_INC(tcps_sndacks);
@@ -1162,6 +1336,14 @@
 		goto done;
 	}
 
+#ifdef TCP_RFC7413
+	if (tfo_cookie_valid) {
+		bzero(&scs, sizeof(scs));
+		sc = &scs;
+		goto skip_alloc;
+	}
+#endif
+
 	sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
 	if (sc == NULL) {
 		/*
@@ -1170,8 +1352,10 @@
 		 * entry and insert the new one.
 		 */
 		TCPSTAT_INC(tcps_sc_zonefail);
-		if ((sc = TAILQ_LAST(&sch->sch_bucket, sch_head)) != NULL)
+		if ((sc = TAILQ_LAST(&sch->sch_bucket, sch_head)) != NULL) {
+			sch->sch_last_overflow = time_uptime;
 			syncache_drop(sc, sch);
+		}
 		sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
 		if (sc == NULL) {
 			if (V_tcp_syncookies) {
@@ -1185,7 +1369,13 @@
 			}
 		}
 	}
-	
+
+#ifdef TCP_RFC7413
+skip_alloc:
+	if (!tfo_cookie_valid && tfo_response_cookie_valid)
+		sc->sc_tfo_cookie = &tfo_response_cookie;
+#endif
+
 	/*
 	 * Fill in the syncache values.
 	 */
@@ -1203,9 +1393,9 @@
 		sc->sc_ip_tos = ip_tos;
 		sc->sc_ip_ttl = ip_ttl;
 	}
-#ifndef TCP_OFFLOAD_DISABLE	
-	sc->sc_tu = tu;
-	sc->sc_toepcb = toepcb;
+#ifdef TCP_OFFLOAD
+	sc->sc_tod = tod;
+	sc->sc_todctx = todctx;
 #endif
 	sc->sc_irs = th->th_seq;
 	sc->sc_iss = arc4random();
@@ -1282,25 +1472,32 @@
 	if ((th->th_flags & (TH_ECE|TH_CWR)) && V_tcp_do_ecn)
 		sc->sc_flags |= SCF_ECN;
 
-	if (V_tcp_syncookies) {
-		syncookie_generate(sch, sc, &flowtmp);
+	if (V_tcp_syncookies)
+		sc->sc_iss = syncookie_generate(sch, sc);
 #ifdef INET6
-		if (autoflowlabel)
-			sc->sc_flowlabel = flowtmp;
+	if (autoflowlabel) {
+		if (V_tcp_syncookies)
+			sc->sc_flowlabel = sc->sc_iss;
+		else
+			sc->sc_flowlabel = ip6_randomflowlabel();
+		sc->sc_flowlabel = htonl(sc->sc_flowlabel) & IPV6_FLOWLABEL_MASK;
+	}
 #endif
-	} else {
-#ifdef INET6
-		if (autoflowlabel)
-			sc->sc_flowlabel =
-			    (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
-#endif
-	}
 	SCH_UNLOCK(sch);
 
+#ifdef TCP_RFC7413
+	if (tfo_cookie_valid) {
+		syncache_tfo_expand(sc, lsop, m, tfo_response_cookie);
+		/* INP_WUNLOCK(inp) will be performed by the called */
+		rv = 1;
+		goto tfo_done;
+	}
+#endif
+
 	/*
 	 * Do a standard 3-way handshake.
 	 */
-	if (TOEPCB_ISSET(sc) || syncache_respond(sc) == 0) {
+	if (syncache_respond(sc, m) == 0) {
 		if (V_tcp_syncookies && V_tcp_syncookiesonly && sc != &scs)
 			syncache_free(sc);
 		else if (sc != &scs)
@@ -1314,6 +1511,13 @@
 	}
 
 done:
+	if (m) {
+		*lsop = NULL;
+		m_freem(m);
+	}
+#ifdef TCP_RFC7413
+tfo_done:
+#endif
 	if (cred != NULL)
 		crfree(cred);
 #ifdef MAC
@@ -1320,15 +1524,15 @@
 	if (sc == &scs)
 		mac_syncache_destroy(&maclabel);
 #endif
-	if (m) {
-		
-		*lsop = NULL;
-		m_freem(m);
-	}
+	return (rv);
 }
 
+/*
+ * Send SYN|ACK to the peer.  Either in response to the peer's SYN,
+ * i.e. m0 != NULL, or upon 3WHS ACK timeout, i.e. m0 == NULL.
+ */
 static int
-syncache_respond(struct syncache *sc)
+syncache_respond(struct syncache *sc, const struct mbuf *m0)
 {
 	struct ip *ip = NULL;
 	struct mbuf *m;
@@ -1348,9 +1552,7 @@
 	tlen = hlen + sizeof(struct tcphdr);
 
 	/* Determine MSS we advertize to other end of connection. */
-	mssopt = tcp_mssopt(&sc->sc_inc);
-	if (sc->sc_peer_mss)
-		mssopt = max( min(sc->sc_peer_mss, mssopt), V_tcp_minmss);
+	mssopt = max(tcp_mssopt(&sc->sc_inc), V_tcp_minmss);
 
 	/* XXX: Assume that the entire packet will fit in a header mbuf. */
 	KASSERT(max_linkhdr + tlen + TCP_MAXOLEN <= MHLEN,
@@ -1357,7 +1559,7 @@
 	    ("syncache: mbuf too small"));
 
 	/* Create the IP+TCP header from scratch. */
-	m = m_gethdr(M_DONTWAIT, MT_DATA);
+	m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (ENOBUFS);
 #ifdef MAC
@@ -1391,7 +1593,7 @@
 		ip = mtod(m, struct ip *);
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = sizeof(struct ip) >> 2;
-		ip->ip_len = tlen;
+		ip->ip_len = htons(tlen);
 		ip->ip_id = 0;
 		ip->ip_off = 0;
 		ip->ip_sum = 0;
@@ -1409,7 +1611,7 @@
 		 *	2) the SCF_UNREACH flag has been set
 		 */
 		if (V_path_mtu_discovery && ((sc->sc_flags & SCF_UNREACH) == 0))
-		       ip->ip_off |= IP_DF;
+		       ip->ip_off |= htons(IP_DF);
 
 		th = (struct tcphdr *)(ip + 1);
 	}
@@ -1452,6 +1654,16 @@
 		if (sc->sc_flags & SCF_SIGNATURE)
 			to.to_flags |= TOF_SIGNATURE;
 #endif
+
+#ifdef TCP_RFC7413
+		if (sc->sc_tfo_cookie) {
+			to.to_flags |= TOF_FASTOPEN;
+			to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN;
+			to.to_tfo_cookie = sc->sc_tfo_cookie;
+			/* don't send cookie again when retransmitting response */
+			sc->sc_tfo_cookie = NULL;
+		}
+#endif
 		optlen = tcp_addoptions(&to, (u_char *)(th + 1));
 
 		/* Adjust headers by option size. */
@@ -1469,12 +1681,21 @@
 			ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) + optlen);
 		else
 #endif
-			ip->ip_len += optlen;
+			ip->ip_len = htons(ntohs(ip->ip_len) + optlen);
 	} else
 		optlen = 0;
 
 	M_SETFIB(m, sc->sc_inc.inc_fibnum);
 	m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+	/*
+	 * If we have peer's SYN and it has a flowid, then let's assign it to
+	 * our SYN|ACK.  ip6_output() and ip_output() will not assign flowid
+	 * to SYN|ACK due to lack of inp here.
+	 */
+	if (m0 != NULL && M_HASHTYPE_GET(m0) != M_HASHTYPE_NONE) {
+		m->m_pkthdr.flowid = m0->m_pkthdr.flowid;
+		M_HASHTYPE_SET(m, M_HASHTYPE_GET(m0));
+	}
 #ifdef INET6
 	if (sc->sc_inc.inc_flags & INC_ISIPV6) {
 		m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
@@ -1481,6 +1702,15 @@
 		th->th_sum = in6_cksum_pseudo(ip6, tlen + optlen - hlen,
 		    IPPROTO_TCP, 0);
 		ip6->ip6_hlim = in6_selecthlim(NULL, NULL);
+#ifdef TCP_OFFLOAD
+		if (ADDED_BY_TOE(sc)) {
+			struct toedev *tod = sc->sc_tod;
+
+			error = tod->tod_syncache_respond(tod, sc->sc_todctx, m);
+
+			return (error);
+		}
+#endif
 		error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
 	}
 #endif
@@ -1492,6 +1722,15 @@
 		m->m_pkthdr.csum_flags = CSUM_TCP;
 		th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 		    htons(tlen + optlen - hlen + IPPROTO_TCP));
+#ifdef TCP_OFFLOAD
+		if (ADDED_BY_TOE(sc)) {
+			struct toedev *tod = sc->sc_tod;
+
+			error = tod->tod_syncache_respond(tod, sc->sc_todctx, m);
+
+			return (error);
+		}
+#endif
 		error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, NULL);
 	}
 #endif
@@ -1498,239 +1737,261 @@
 	return (error);
 }
 
-void
-syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
-    struct inpcb *inp, struct socket **lsop, struct mbuf *m)
-{
-	_syncache_add(inc, to, th, inp, lsop, m, NULL, NULL);
-}
-
-void
-tcp_offload_syncache_add(struct in_conninfo *inc, struct toeopt *toeo,
-    struct tcphdr *th, struct inpcb *inp, struct socket **lsop,
-    struct toe_usrreqs *tu, void *toepcb)
-{
-	struct tcpopt to;
-
-	bzero(&to, sizeof(struct tcpopt));
-	to.to_mss = toeo->to_mss;
-	to.to_wscale = toeo->to_wscale;
-	to.to_flags = toeo->to_flags;
-
-	INP_INFO_WLOCK(&V_tcbinfo);
-	INP_WLOCK(inp);
-
-	_syncache_add(inc, &to, th, inp, lsop, NULL, tu, toepcb);
-}
-
 /*
- * The purpose of SYN cookies is to avoid keeping track of all SYN's we
- * receive and to be able to handle SYN floods from bogus source addresses
- * (where we will never receive any reply).  SYN floods try to exhaust all
- * our memory and available slots in the SYN cache table to cause a denial
- * of service to legitimate users of the local host.
+ * The purpose of syncookies is to handle spoofed SYN flooding DoS attacks
+ * that exceed the capacity of the syncache by avoiding the storage of any
+ * of the SYNs we receive.  Syncookies defend against blind SYN flooding
+ * attacks where the attacker does not have access to our responses.
  *
- * The idea of SYN cookies is to encode and include all necessary information
- * about the connection setup state within the SYN-ACK we send back and thus
- * to get along without keeping any local state until the ACK to the SYN-ACK
- * arrives (if ever).  Everything we need to know should be available from
- * the information we encoded in the SYN-ACK.
+ * Syncookies encode and include all necessary information about the
+ * connection setup within the SYN|ACK that we send back.  That way we
+ * can avoid keeping any local state until the ACK to our SYN|ACK returns
+ * (if ever).  Normally the syncache and syncookies are running in parallel
+ * with the latter taking over when the former is exhausted.  When matching
+ * syncache entry is found the syncookie is ignored.
  *
- * More information about the theory behind SYN cookies and its first
- * discussion and specification can be found at:
- *  http://cr.yp.to/syncookies.html    (overview)
- *  http://cr.yp.to/syncookies/archive (gory details)
+ * The only reliable information persisting the 3WHS is our inital sequence
+ * number ISS of 32 bits.  Syncookies embed a cryptographically sufficient
+ * strong hash (MAC) value and a few bits of TCP SYN options in the ISS
+ * of our SYN|ACK.  The MAC can be recomputed when the ACK to our SYN|ACK
+ * returns and signifies a legitimate connection if it matches the ACK.
  *
- * This implementation extends the orginal idea and first implementation
- * of FreeBSD by using not only the initial sequence number field to store
- * information but also the timestamp field if present.  This way we can
- * keep track of the entire state we need to know to recreate the session in
- * its original form.  Almost all TCP speakers implement RFC1323 timestamps
- * these days.  For those that do not we still have to live with the known
- * shortcomings of the ISN only SYN cookies.
+ * The available space of 32 bits to store the hash and to encode the SYN
+ * option information is very tight and we should have at least 24 bits for
+ * the MAC to keep the number of guesses by blind spoofing reasonably high.
  *
- * Cookie layers:
+ * SYN option information we have to encode to fully restore a connection:
+ * MSS: is imporant to chose an optimal segment size to avoid IP level
+ *   fragmentation along the path.  The common MSS values can be encoded
+ *   in a 3-bit table.  Uncommon values are captured by the next lower value
+ *   in the table leading to a slight increase in packetization overhead.
+ * WSCALE: is necessary to allow large windows to be used for high delay-
+ *   bandwidth product links.  Not scaling the window when it was initially
+ *   negotiated is bad for performance as lack of scaling further decreases
+ *   the apparent available send window.  We only need to encode the WSCALE
+ *   we received from the remote end.  Our end can be recalculated at any
+ *   time.  The common WSCALE values can be encoded in a 3-bit table.
+ *   Uncommon values are captured by the next lower value in the table
+ *   making us under-estimate the available window size halving our
+ *   theoretically possible maximum throughput for that connection.
+ * SACK: Greatly assists in packet loss recovery and requires 1 bit.
+ * TIMESTAMP and SIGNATURE is not encoded because they are permanent options
+ *   that are included in all segments on a connection.  We enable them when
+ *   the ACK has them.
  *
- * Initial sequence number we send:
- * 31|................................|0
- *    DDDDDDDDDDDDDDDDDDDDDDDDDMMMRRRP
- *    D = MD5 Digest (first dword)
- *    M = MSS index
- *    R = Rotation of secret
- *    P = Odd or Even secret
+ * Security of syncookies and attack vectors:
  *
- * The MD5 Digest is computed with over following parameters:
- *  a) randomly rotated secret
- *  b) struct in_conninfo containing the remote/local ip/port (IPv4&IPv6)
- *  c) the received initial sequence number from remote host
- *  d) the rotation offset and odd/even bit
+ * The MAC is computed over (faddr||laddr||fport||lport||irs||flags||secmod)
+ * together with the gloabl secret to make it unique per connection attempt.
+ * Thus any change of any of those parameters results in a different MAC output
+ * in an unpredictable way unless a collision is encountered.  24 bits of the
+ * MAC are embedded into the ISS.
  *
- * Timestamp we send:
- * 31|................................|0
- *    DDDDDDDDDDDDDDDDDDDDDDSSSSRRRRA5
- *    D = MD5 Digest (third dword) (only as filler)
- *    S = Requested send window scale
- *    R = Requested receive window scale
- *    A = SACK allowed
- *    5 = TCP-MD5 enabled (not implemented yet)
- *    XORed with MD5 Digest (forth dword)
+ * To prevent replay attacks two rotating global secrets are updated with a
+ * new random value every 15 seconds.  The life-time of a syncookie is thus
+ * 15-30 seconds.
  *
- * The timestamp isn't cryptographically secure and doesn't need to be.
- * The double use of the MD5 digest dwords ties it to a specific remote/
- * local host/port, remote initial sequence number and our local time
- * limited secret.  A received timestamp is reverted (XORed) and then
- * the contained MD5 dword is compared to the computed one to ensure the
- * timestamp belongs to the SYN-ACK we sent.  The other parameters may
- * have been tampered with but this isn't different from supplying bogus
- * values in the SYN in the first place.
+ * Vector 1: Attacking the secret.  This requires finding a weakness in the
+ * MAC itself or the way it is used here.  The attacker can do a chosen plain
+ * text attack by varying and testing the all parameters under his control.
+ * The strength depends on the size and randomness of the secret, and the
+ * cryptographic security of the MAC function.  Due to the constant updating
+ * of the secret the attacker has at most 29.999 seconds to find the secret
+ * and launch spoofed connections.  After that he has to start all over again.
  *
- * Some problems with SYN cookies remain however:
- * Consider the problem of a recreated (and retransmitted) cookie.  If the
- * original SYN was accepted, the connection is established.  The second
- * SYN is inflight, and if it arrives with an ISN that falls within the
- * receive window, the connection is killed.
+ * Vector 2: Collision attack on the MAC of a single ACK.  With a 24 bit MAC
+ * size an average of 4,823 attempts are required for a 50% chance of success
+ * to spoof a single syncookie (birthday collision paradox).  However the
+ * attacker is blind and doesn't know if one of his attempts succeeded unless
+ * he has a side channel to interfere success from.  A single connection setup
+ * success average of 90% requires 8,790 packets, 99.99% requires 17,578 packets.
+ * This many attempts are required for each one blind spoofed connection.  For
+ * every additional spoofed connection he has to launch another N attempts.
+ * Thus for a sustained rate 100 spoofed connections per second approximately
+ * 1,800,000 packets per second would have to be sent.
  *
- * Notes:
- * A heuristic to determine when to accept syn cookies is not necessary.
- * An ACK flood would cause the syncookie verification to be attempted,
- * but a SYN flood causes syncookies to be generated.  Both are of equal
- * cost, so there's no point in trying to optimize the ACK flood case.
- * Also, if you don't process certain ACKs for some reason, then all someone
- * would have to do is launch a SYN and ACK flood at the same time, which
- * would stop cookie verification and defeat the entire purpose of syncookies.
+ * NB: The MAC function should be fast so that it doesn't become a CPU
+ * exhaustion attack vector itself.
+ *
+ * References:
+ *  RFC4987 TCP SYN Flooding Attacks and Common Mitigations
+ *  SYN cookies were first proposed by cryptographer Dan J. Bernstein in 1996
+ *   http://cr.yp.to/syncookies.html    (overview)
+ *   http://cr.yp.to/syncookies/archive (details)
+ *
+ *
+ * Schematic construction of a syncookie enabled Initial Sequence Number:
+ *  0        1         2         3
+ *  12345678901234567890123456789012
+ * |xxxxxxxxxxxxxxxxxxxxxxxxWWWMMMSP|
+ *
+ *  x 24 MAC (truncated)
+ *  W  3 Send Window Scale index
+ *  M  3 MSS index
+ *  S  1 SACK permitted
+ *  P  1 Odd/even secret
  */
-static int tcp_sc_msstab[] = { 0, 256, 468, 536, 996, 1452, 1460, 8960 };
 
-static void
-syncookie_generate(struct syncache_head *sch, struct syncache *sc,
-    u_int32_t *flowlabel)
+/*
+ * Distribution and probability of certain MSS values.  Those in between are
+ * rounded down to the next lower one.
+ * [An Analysis of TCP Maximum Segment Sizes, S. Alcock and R. Nelson, 2011]
+ *                            .2%  .3%   5%    7%    7%    20%   15%   45%
+ */
+static int tcp_sc_msstab[] = { 216, 536, 1200, 1360, 1400, 1440, 1452, 1460 };
+
+/*
+ * Distribution and probability of certain WSCALE values.  We have to map the
+ * (send) window scale (shift) option with a range of 0-14 from 4 bits into 3
+ * bits based on prevalence of certain values.  Where we don't have an exact
+ * match for are rounded down to the next lower one letting us under-estimate
+ * the true available window.  At the moment this would happen only for the
+ * very uncommon values 3, 5 and those above 8 (more than 16MB socket buffer
+ * and window size).  The absence of the WSCALE option (no scaling in either
+ * direction) is encoded with index zero.
+ * [WSCALE values histograms, Allman, 2012]
+ *                            X 10 10 35  5  6 14 10%   by host
+ *                            X 11  4  5  5 18 49  3%   by connections
+ */
+static int tcp_sc_wstab[] = { 0, 0, 1, 2, 4, 6, 7, 8 };
+
+/*
+ * Compute the MAC for the SYN cookie.  SIPHASH-2-4 is chosen for its speed
+ * and good cryptographic properties.
+ */
+static uint32_t
+syncookie_mac(struct in_conninfo *inc, tcp_seq irs, uint8_t flags,
+    uint8_t *secbits, uintptr_t secmod)
 {
-	MD5_CTX ctx;
-	u_int32_t md5_buffer[MD5_DIGEST_LENGTH / sizeof(u_int32_t)];
-	u_int32_t data;
-	u_int32_t *secbits;
-	u_int off, pmss, mss;
-	int i;
+	SIPHASH_CTX ctx;
+	uint32_t siphash[2];
 
+	SipHash24_Init(&ctx);
+	SipHash_SetKey(&ctx, secbits);
+	switch (inc->inc_flags & INC_ISIPV6) {
+#ifdef INET
+	case 0:
+		SipHash_Update(&ctx, &inc->inc_faddr, sizeof(inc->inc_faddr));
+		SipHash_Update(&ctx, &inc->inc_laddr, sizeof(inc->inc_laddr));
+		break;
+#endif
+#ifdef INET6
+	case INC_ISIPV6:
+		SipHash_Update(&ctx, &inc->inc6_faddr, sizeof(inc->inc6_faddr));
+		SipHash_Update(&ctx, &inc->inc6_laddr, sizeof(inc->inc6_laddr));
+		break;
+#endif
+	}
+	SipHash_Update(&ctx, &inc->inc_fport, sizeof(inc->inc_fport));
+	SipHash_Update(&ctx, &inc->inc_lport, sizeof(inc->inc_lport));
+	SipHash_Update(&ctx, &irs, sizeof(irs));
+	SipHash_Update(&ctx, &flags, sizeof(flags));
+	SipHash_Update(&ctx, &secmod, sizeof(secmod));
+	SipHash_Final((u_int8_t *)&siphash, &ctx);
+
+	return (siphash[0] ^ siphash[1]);
+}
+
+static tcp_seq
+syncookie_generate(struct syncache_head *sch, struct syncache *sc)
+{
+	u_int i, secbit, wscale;
+	uint32_t iss, hash;
+	uint8_t *secbits;
+	union syncookie cookie;
+
 	SCH_LOCK_ASSERT(sch);
 
-	/* Which of the two secrets to use. */
-	secbits = sch->sch_oddeven ?
-			sch->sch_secbits_odd : sch->sch_secbits_even;
+	cookie.cookie = 0;
 
-	/* Reseed secret if too old. */
-	if (sch->sch_reseed < time_uptime) {
-		sch->sch_oddeven = sch->sch_oddeven ? 0 : 1;	/* toggle */
-		secbits = sch->sch_oddeven ?
-				sch->sch_secbits_odd : sch->sch_secbits_even;
-		for (i = 0; i < SYNCOOKIE_SECRET_SIZE; i++)
-			secbits[i] = arc4random();
-		sch->sch_reseed = time_uptime + SYNCOOKIE_LIFETIME;
+	/* Map our computed MSS into the 3-bit index. */
+	for (i = sizeof(tcp_sc_msstab) / sizeof(*tcp_sc_msstab) - 1;
+	     tcp_sc_msstab[i] > sc->sc_peer_mss && i > 0;
+	     i--)
+		;
+	cookie.flags.mss_idx = i;
+
+	/*
+	 * Map the send window scale into the 3-bit index but only if
+	 * the wscale option was received.
+	 */
+	if (sc->sc_flags & SCF_WINSCALE) {
+		wscale = sc->sc_requested_s_scale;
+		for (i = sizeof(tcp_sc_wstab) / sizeof(*tcp_sc_wstab) - 1;
+		     tcp_sc_wstab[i] > wscale && i > 0;
+		     i--)
+			;
+		cookie.flags.wscale_idx = i;
 	}
 
-	/* Secret rotation offset. */
-	off = sc->sc_iss & 0x7;			/* iss was randomized before */
+	/* Can we do SACK? */
+	if (sc->sc_flags & SCF_SACK)
+		cookie.flags.sack_ok = 1;
 
-	/* Maximum segment size calculation. */
-	pmss =
-	    max( min(sc->sc_peer_mss, tcp_mssopt(&sc->sc_inc)),	V_tcp_minmss);
-	for (mss = sizeof(tcp_sc_msstab) / sizeof(int) - 1; mss > 0; mss--)
-		if (tcp_sc_msstab[mss] <= pmss)
-			break;
+	/* Which of the two secrets to use. */
+	secbit = sch->sch_sc->secret.oddeven & 0x1;
+	cookie.flags.odd_even = secbit;
 
-	/* Fold parameters and MD5 digest into the ISN we will send. */
-	data = sch->sch_oddeven;/* odd or even secret, 1 bit */
-	data |= off << 1;	/* secret offset, derived from iss, 3 bits */
-	data |= mss << 4;	/* mss, 3 bits */
+	secbits = sch->sch_sc->secret.key[secbit];
+	hash = syncookie_mac(&sc->sc_inc, sc->sc_irs, cookie.cookie, secbits,
+	    (uintptr_t)sch);
 
-	MD5Init(&ctx);
-	MD5Update(&ctx, ((u_int8_t *)secbits) + off,
-	    SYNCOOKIE_SECRET_SIZE * sizeof(*secbits) - off);
-	MD5Update(&ctx, secbits, off);
-	MD5Update(&ctx, &sc->sc_inc, sizeof(sc->sc_inc));
-	MD5Update(&ctx, &sc->sc_irs, sizeof(sc->sc_irs));
-	MD5Update(&ctx, &data, sizeof(data));
-	MD5Final((u_int8_t *)&md5_buffer, &ctx);
+	/*
+	 * Put the flags into the hash and XOR them to get better ISS number
+	 * variance.  This doesn't enhance the cryptographic strength and is
+	 * done to prevent the 8 cookie bits from showing up directly on the
+	 * wire.
+	 */
+	iss = hash & ~0xff;
+	iss |= cookie.cookie ^ (hash >> 24);
 
-	data |= (md5_buffer[0] << 7);
-	sc->sc_iss = data;
-
-#ifdef INET6
-	*flowlabel = md5_buffer[1] & IPV6_FLOWLABEL_MASK;
-#endif
-
-	/* Additional parameters are stored in the timestamp if present. */
+	/* Randomize the timestamp. */
 	if (sc->sc_flags & SCF_TIMESTAMP) {
-		data =  ((sc->sc_flags & SCF_SIGNATURE) ? 1 : 0); /* TCP-MD5, 1 bit */
-		data |= ((sc->sc_flags & SCF_SACK) ? 1 : 0) << 1; /* SACK, 1 bit */
-		data |= sc->sc_requested_s_scale << 2;  /* SWIN scale, 4 bits */
-		data |= sc->sc_requested_r_scale << 6;  /* RWIN scale, 4 bits */
-		data |= md5_buffer[2] << 10;		/* more digest bits */
-		data ^= md5_buffer[3];
-		sc->sc_ts = data;
-		sc->sc_tsoff = data - tcp_ts_getticks();	/* after XOR */
+		sc->sc_ts = arc4random();
+		sc->sc_tsoff = sc->sc_ts - tcp_ts_getticks();
 	}
 
 	TCPSTAT_INC(tcps_sc_sendcookie);
+	return (iss);
 }
 
 static struct syncache *
 syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch, 
-    struct syncache *sc, struct tcpopt *to, struct tcphdr *th,
-    struct socket *so)
+    struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
+    struct socket *lso)
 {
-	MD5_CTX ctx;
-	u_int32_t md5_buffer[MD5_DIGEST_LENGTH / sizeof(u_int32_t)];
-	u_int32_t data = 0;
-	u_int32_t *secbits;
+	uint32_t hash;
+	uint8_t *secbits;
 	tcp_seq ack, seq;
-	int off, mss, wnd, flags;
+	int wnd, wscale = 0;
+	union syncookie cookie;
 
 	SCH_LOCK_ASSERT(sch);
 
 	/*
-	 * Pull information out of SYN-ACK/ACK and
-	 * revert sequence number advances.
+	 * Pull information out of SYN-ACK/ACK and revert sequence number
+	 * advances.
 	 */
 	ack = th->th_ack - 1;
 	seq = th->th_seq - 1;
-	off = (ack >> 1) & 0x7;
-	mss = (ack >> 4) & 0x7;
-	flags = ack & 0x7f;
 
-	/* Which of the two secrets to use. */
-	secbits = (flags & 0x1) ? sch->sch_secbits_odd : sch->sch_secbits_even;
-
 	/*
-	 * The secret wasn't updated for the lifetime of a syncookie,
-	 * so this SYN-ACK/ACK is either too old (replay) or totally bogus.
+	 * Unpack the flags containing enough information to restore the
+	 * connection.
 	 */
-	if (sch->sch_reseed + SYNCOOKIE_LIFETIME < time_uptime) {
-		return (NULL);
-	}
+	cookie.cookie = (ack & 0xff) ^ (ack >> 24);
 
-	/* Recompute the digest so we can compare it. */
-	MD5Init(&ctx);
-	MD5Update(&ctx, ((u_int8_t *)secbits) + off,
-	    SYNCOOKIE_SECRET_SIZE * sizeof(*secbits) - off);
-	MD5Update(&ctx, secbits, off);
-	MD5Update(&ctx, inc, sizeof(*inc));
-	MD5Update(&ctx, &seq, sizeof(seq));
-	MD5Update(&ctx, &flags, sizeof(flags));
-	MD5Final((u_int8_t *)&md5_buffer, &ctx);
+	/* Which of the two secrets to use. */
+	secbits = sch->sch_sc->secret.key[cookie.flags.odd_even];
 
-	/* Does the digest part of or ACK'ed ISS match? */
-	if ((ack & (~0x7f)) != (md5_buffer[0] << 7))
+	hash = syncookie_mac(inc, seq, cookie.cookie, secbits, (uintptr_t)sch);
+
+	/* The recomputed hash matches the ACK if this was a genuine cookie. */
+	if ((ack & ~0xff) != (hash & ~0xff))
 		return (NULL);
 
-	/* Does the digest part of our reflected timestamp match? */
-	if (to->to_flags & TOF_TS) {
-		data = md5_buffer[3] ^ to->to_tsecr;
-		if ((data & (~0x3ff)) != (md5_buffer[2] << 10))
-			return (NULL);
-	}
-
 	/* Fill in the syncache values. */
+	sc->sc_flags = 0;
 	bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo));
 	sc->sc_ipopts = NULL;
 	
@@ -1737,52 +1998,125 @@
 	sc->sc_irs = seq;
 	sc->sc_iss = ack;
 
+	switch (inc->inc_flags & INC_ISIPV6) {
+#ifdef INET
+	case 0:
+		sc->sc_ip_ttl = sotoinpcb(lso)->inp_ip_ttl;
+		sc->sc_ip_tos = sotoinpcb(lso)->inp_ip_tos;
+		break;
+#endif
 #ifdef INET6
-	if (inc->inc_flags & INC_ISIPV6) {
-		if (sotoinpcb(so)->inp_flags & IN6P_AUTOFLOWLABEL)
-			sc->sc_flowlabel = md5_buffer[1] & IPV6_FLOWLABEL_MASK;
-	} else
+	case INC_ISIPV6:
+		if (sotoinpcb(lso)->inp_flags & IN6P_AUTOFLOWLABEL)
+			sc->sc_flowlabel = sc->sc_iss & IPV6_FLOWLABEL_MASK;
+		break;
 #endif
-	{
-		sc->sc_ip_ttl = sotoinpcb(so)->inp_ip_ttl;
-		sc->sc_ip_tos = sotoinpcb(so)->inp_ip_tos;
 	}
 
-	/* Additional parameters that were encoded in the timestamp. */
-	if (data) {
+	sc->sc_peer_mss = tcp_sc_msstab[cookie.flags.mss_idx];
+
+	/* We can simply recompute receive window scale we sent earlier. */
+	while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < sb_max)
+		wscale++;
+
+	/* Only use wscale if it was enabled in the orignal SYN. */
+	if (cookie.flags.wscale_idx > 0) {
+		sc->sc_requested_r_scale = wscale;
+		sc->sc_requested_s_scale = tcp_sc_wstab[cookie.flags.wscale_idx];
+		sc->sc_flags |= SCF_WINSCALE;
+	}
+
+	wnd = sbspace(&lso->so_rcv);
+	wnd = imax(wnd, 0);
+	wnd = imin(wnd, TCP_MAXWIN);
+	sc->sc_wnd = wnd;
+
+	if (cookie.flags.sack_ok)
+		sc->sc_flags |= SCF_SACK;
+
+	if (to->to_flags & TOF_TS) {
 		sc->sc_flags |= SCF_TIMESTAMP;
 		sc->sc_tsreflect = to->to_tsval;
 		sc->sc_ts = to->to_tsecr;
 		sc->sc_tsoff = to->to_tsecr - tcp_ts_getticks();
-		sc->sc_flags |= (data & 0x1) ? SCF_SIGNATURE : 0;
-		sc->sc_flags |= ((data >> 1) & 0x1) ? SCF_SACK : 0;
-		sc->sc_requested_s_scale = min((data >> 2) & 0xf,
-		    TCP_MAX_WINSHIFT);
-		sc->sc_requested_r_scale = min((data >> 6) & 0xf,
-		    TCP_MAX_WINSHIFT);
-		if (sc->sc_requested_s_scale || sc->sc_requested_r_scale)
-			sc->sc_flags |= SCF_WINSCALE;
-	} else
-		sc->sc_flags |= SCF_NOOPT;
+	}
 
-	wnd = sbspace(&so->so_rcv);
-	wnd = imax(wnd, 0);
-	wnd = imin(wnd, TCP_MAXWIN);
-	sc->sc_wnd = wnd;
+	if (to->to_flags & TOF_SIGNATURE)
+		sc->sc_flags |= SCF_SIGNATURE;
 
 	sc->sc_rxmits = 0;
-	sc->sc_peer_mss = tcp_sc_msstab[mss];
 
 	TCPSTAT_INC(tcps_sc_recvcookie);
 	return (sc);
 }
 
+#ifdef INVARIANTS
+static int
+syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch,
+    struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
+    struct socket *lso)
+{
+	struct syncache scs, *scx;
+	char *s;
+
+	bzero(&scs, sizeof(scs));
+	scx = syncookie_lookup(inc, sch, &scs, th, to, lso);
+
+	if ((s = tcp_log_addrs(inc, th, NULL, NULL)) == NULL)
+		return (0);
+
+	if (scx != NULL) {
+		if (sc->sc_peer_mss != scx->sc_peer_mss)
+			log(LOG_DEBUG, "%s; %s: mss different %i vs %i\n",
+			    s, __func__, sc->sc_peer_mss, scx->sc_peer_mss);
+
+		if (sc->sc_requested_r_scale != scx->sc_requested_r_scale)
+			log(LOG_DEBUG, "%s; %s: rwscale different %i vs %i\n",
+			    s, __func__, sc->sc_requested_r_scale,
+			    scx->sc_requested_r_scale);
+
+		if (sc->sc_requested_s_scale != scx->sc_requested_s_scale)
+			log(LOG_DEBUG, "%s; %s: swscale different %i vs %i\n",
+			    s, __func__, sc->sc_requested_s_scale,
+			    scx->sc_requested_s_scale);
+
+		if ((sc->sc_flags & SCF_SACK) != (scx->sc_flags & SCF_SACK))
+			log(LOG_DEBUG, "%s; %s: SACK different\n", s, __func__);
+	}
+
+	if (s != NULL)
+		free(s, M_TCPLOG);
+	return (0);
+}
+#endif /* INVARIANTS */
+
+static void
+syncookie_reseed(void *arg)
+{
+	struct tcp_syncache *sc = arg;
+	uint8_t *secbits;
+	int secbit;
+
+	/*
+	 * Reseeding the secret doesn't have to be protected by a lock.
+	 * It only must be ensured that the new random values are visible
+	 * to all CPUs in a SMP environment.  The atomic with release
+	 * semantics ensures that.
+	 */
+	secbit = (sc->secret.oddeven & 0x1) ? 0 : 1;
+	secbits = sc->secret.key[secbit];
+	arc4rand(secbits, SYNCOOKIE_SECRET_SIZE, 0);
+	atomic_add_rel_int(&sc->secret.oddeven, 1);
+
+	/* Reschedule ourself. */
+	callout_schedule(&sc->secret.reseed, SYNCOOKIE_LIFETIME * hz);
+}
+
 /*
  * Returns the current number of syncache entries.  This number
  * will probably change before you get around to calling 
  * syncache_pcblist.
  */
-
 int
 syncache_pcbcount(void)
 {

Modified: trunk/sys/netinet/tcp_syncache.h
===================================================================
--- trunk/sys/netinet/tcp_syncache.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_syncache.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp_var.h	8.4 (Berkeley) 5/24/95
- * $FreeBSD: stable/9/sys/netinet/tcp_syncache.h 224151 2011-07-17 21:15:20Z bz $
+ * $FreeBSD: stable/10/sys/netinet/tcp_syncache.h 322315 2017-08-09 13:26:12Z tuexen $
  */
 
 #ifndef _NETINET_TCP_SYNCACHE_H_
@@ -35,8 +35,6 @@
 #define _NETINET_TCP_SYNCACHE_H_
 #ifdef _KERNEL
 
-struct toeopt;
-
 void	 syncache_init(void);
 #ifdef VIMAGE
 void	syncache_destroy(void);
@@ -44,14 +42,9 @@
 void	 syncache_unreach(struct in_conninfo *, struct tcphdr *);
 int	 syncache_expand(struct in_conninfo *, struct tcpopt *,
 	     struct tcphdr *, struct socket **, struct mbuf *);
-int	 tcp_offload_syncache_expand(struct in_conninfo *inc, struct toeopt *toeo,
-             struct tcphdr *th, struct socket **lsop, struct mbuf *m);
-void	 syncache_add(struct in_conninfo *, struct tcpopt *,
-	     struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *);
-void	 tcp_offload_syncache_add(struct in_conninfo *, struct toeopt *,
-             struct tcphdr *, struct inpcb *, struct socket **,
-             struct toe_usrreqs *tu, void *toepcb);
-
+int	 syncache_add(struct in_conninfo *, struct tcpopt *,
+	     struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *,
+	     void *, void *);
 void	 syncache_chkrst(struct in_conninfo *, struct tcphdr *);
 void	 syncache_badack(struct in_conninfo *);
 int	 syncache_pcbcount(void);
@@ -76,13 +69,16 @@
 	u_int8_t	sc_requested_s_scale:4,
 			sc_requested_r_scale:4;
 	u_int16_t	sc_flags;
-#ifndef TCP_OFFLOAD_DISABLE
-	struct toe_usrreqs *sc_tu;		/* TOE operations */
-	void		*sc_toepcb;		/* TOE protocol block */
-#endif			
+#if defined(TCP_OFFLOAD) || !defined(TCP_OFFLOAD_DISABLE)
+	struct toedev	*sc_tod;		/* entry added by this TOE */
+	void		*sc_todctx;		/* TOE driver context */
+#endif
 	struct label	*sc_label;		/* MAC label reference */
 	struct ucred	*sc_cred;		/* cred cache for jail checks */
-
+#ifdef TCP_RFC7413
+	void		*sc_tfo_cookie;		/* for TCP Fast Open response */
+#endif
+	void		*sc_pspare;		/* TCP_SIGNATURE */
 	u_int32_t	sc_spare[2];		/* UTO */
 };
 
@@ -98,22 +94,26 @@
 #define SCF_SACK	0x80			/* send SACK option */
 #define SCF_ECN		0x100			/* send ECN setup packet */
 
-#define	SYNCOOKIE_SECRET_SIZE	8	/* dwords */
-#define	SYNCOOKIE_LIFETIME	16	/* seconds */
-
 struct syncache_head {
-	struct vnet	*sch_vnet;
 	struct mtx	sch_mtx;
 	TAILQ_HEAD(sch_head, syncache)	sch_bucket;
 	struct callout	sch_timer;
 	int		sch_nextc;
 	u_int		sch_length;
-	u_int		sch_oddeven;
-	u_int32_t	sch_secbits_odd[SYNCOOKIE_SECRET_SIZE];
-	u_int32_t	sch_secbits_even[SYNCOOKIE_SECRET_SIZE];
-	u_int		sch_reseed;		/* time_uptime, seconds */
+	struct tcp_syncache *sch_sc;
+	time_t		sch_last_overflow;
 };
 
+#define	SYNCOOKIE_SECRET_SIZE	16
+#define	SYNCOOKIE_LIFETIME	15		/* seconds */
+
+struct syncookie_secret {
+	volatile u_int oddeven;
+	uint8_t key[2][SYNCOOKIE_SECRET_SIZE];
+	struct callout reseed;
+	u_int lifetime;
+};
+
 struct tcp_syncache {
 	struct	syncache_head *hashbase;
 	uma_zone_t zone;
@@ -120,11 +120,23 @@
 	u_int	hashsize;
 	u_int	hashmask;
 	u_int	bucket_limit;
-	u_int	cache_count;		/* XXX: unprotected */
 	u_int	cache_limit;
 	u_int	rexmt_limit;
 	u_int	hash_secret;
+	struct vnet *vnet;
+	struct syncookie_secret secret;
 };
 
+/* Internal use for the syncookie functions. */
+union syncookie {
+	uint8_t cookie;
+	struct {
+		uint8_t odd_even:1,
+			sack_ok:1,
+			wscale_idx:3,
+			mss_idx:3;
+	} flags;
+};
+
 #endif /* _KERNEL */
 #endif /* !_NETINET_TCP_SYNCACHE_H_ */

Modified: trunk/sys/netinet/tcp_timer.c
===================================================================
--- trunk/sys/netinet/tcp_timer.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_timer.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,8 +31,9 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_timer.c 247498 2013-02-28 21:24:10Z jhb $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_timer.c 330303 2018-03-03 00:54:12Z jhb $");
 
+#include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_tcpdebug.h"
 
@@ -63,11 +64,22 @@
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
+#ifdef INET6
+#include <netinet6/tcp6_var.h>
+#endif
 #include <netinet/tcpip.h>
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif
 
+int    tcp_persmin;
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmin, CTLTYPE_INT|CTLFLAG_RW,
+    &tcp_persmin, 0, sysctl_msec_to_ticks, "I", "minimum persistence interval");
+
+int    tcp_persmax;
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmax, CTLTYPE_INT|CTLFLAG_RW,
+    &tcp_persmax, 0, sysctl_msec_to_ticks, "I", "maximum persistence interval");
+
 int	tcp_keepinit;
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection");
@@ -99,9 +111,10 @@
     &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
     "Retransmission Timer Slop");
 
-static int	always_keepalive = 1;
+int	tcp_always_keepalive = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
-    &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
+    &tcp_always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
+__strong_reference(tcp_always_keepalive, always_keepalive);
 
 int    tcp_fast_finwait2_recycle = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 
@@ -119,11 +132,59 @@
 	/* max idle probes */
 int	tcp_maxpersistidle;
 
-static int	tcp_rexmit_drop_options = 1;
+static int	tcp_rexmit_drop_options = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW,
     &tcp_rexmit_drop_options, 0,
     "Drop TCP options from 3rd and later retransmitted SYN");
 
+static VNET_DEFINE(int, tcp_pmtud_blackhole_detect);
+#define	V_tcp_pmtud_blackhole_detect	VNET(tcp_pmtud_blackhole_detect)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection,
+    CTLFLAG_RW,
+    &VNET_NAME(tcp_pmtud_blackhole_detect), 0,
+    "Path MTU Discovery Black Hole Detection Enabled");
+
+static VNET_DEFINE(int, tcp_pmtud_blackhole_activated);
+#define	V_tcp_pmtud_blackhole_activated \
+    VNET(tcp_pmtud_blackhole_activated)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated,
+    CTLFLAG_RD,
+    &VNET_NAME(tcp_pmtud_blackhole_activated), 0,
+    "Path MTU Discovery Black Hole Detection, Activation Count");
+
+static VNET_DEFINE(int, tcp_pmtud_blackhole_activated_min_mss);
+#define	V_tcp_pmtud_blackhole_activated_min_mss \
+    VNET(tcp_pmtud_blackhole_activated_min_mss)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated_min_mss,
+    CTLFLAG_RD,
+    &VNET_NAME(tcp_pmtud_blackhole_activated_min_mss), 0,
+    "Path MTU Discovery Black Hole Detection, Activation Count at min MSS");
+
+static VNET_DEFINE(int, tcp_pmtud_blackhole_failed);
+#define	V_tcp_pmtud_blackhole_failed	VNET(tcp_pmtud_blackhole_failed)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_failed,
+    CTLFLAG_RD,
+    &VNET_NAME(tcp_pmtud_blackhole_failed), 0,
+    "Path MTU Discovery Black Hole Detection, Failure Count");
+
+#ifdef INET
+static VNET_DEFINE(int, tcp_pmtud_blackhole_mss) = 1200;
+#define	V_tcp_pmtud_blackhole_mss	VNET(tcp_pmtud_blackhole_mss)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss,
+    CTLFLAG_RW,
+    &VNET_NAME(tcp_pmtud_blackhole_mss), 0,
+    "Path MTU Discovery Black Hole Detection lowered MSS");
+#endif
+
+#ifdef INET6
+static VNET_DEFINE(int, tcp_v6pmtud_blackhole_mss) = 1220;
+#define	V_tcp_v6pmtud_blackhole_mss	VNET(tcp_v6pmtud_blackhole_mss)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, v6pmtud_blackhole_mss,
+    CTLFLAG_RW,
+    &VNET_NAME(tcp_v6pmtud_blackhole_mss), 0,
+    "Path MTU Discovery IPv6 Black Hole Detection lowered MSS");
+#endif
+
 static int	per_cpu_timers = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
     &per_cpu_timers , 0, "run tcp timers on all cpus");
@@ -144,9 +205,7 @@
 	VNET_LIST_RLOCK_NOSLEEP();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
-		INP_INFO_WLOCK(&V_tcbinfo);
 		(void) tcp_tw_2msl_scan(0);
-		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK_NOSLEEP();
@@ -160,10 +219,6 @@
 
 static int tcp_totbackoff = 2559;	/* sum of tcp_backoff[] */
 
-static int tcp_timer_race;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race,
-    0, "Count of t_inpcb races on tcp_discardcb");
-
 /*
  * TCP timer processing.
  */
@@ -176,18 +231,7 @@
 	CURVNET_SET(tp->t_vnet);
 
 	inp = tp->t_inpcb;
-	/*
-	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
-	 * tear-down mean we need it as a work-around for races between
-	 * timers and tcp_discardcb().
-	 *
-	 * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL"));
-	 */
-	if (inp == NULL) {
-		tcp_timer_race++;
-		CURVNET_RESTORE();
-		return;
-	}
+	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
 	INP_WLOCK(inp);
 	if (callout_pending(&tp->t_timers->tt_delack) ||
 	    !callout_active(&tp->t_timers->tt_delack)) {
@@ -201,6 +245,10 @@
 		CURVNET_RESTORE();
 		return;
 	}
+	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
+		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
+	KASSERT((tp->t_timers->tt_flags & TT_DELACK) != 0,
+		("%s: tp %p delack callout should be running", __func__, tp));
 
 	tp->t_flags |= TF_ACKNOW;
 	TCPSTAT_INC(tcps_delack);
@@ -220,30 +268,15 @@
 
 	ostate = tp->t_state;
 #endif
-	/*
-	 * XXXRW: Does this actually happen?
-	 */
-	INP_INFO_WLOCK(&V_tcbinfo);
+	INP_INFO_RLOCK(&V_tcbinfo);
 	inp = tp->t_inpcb;
-	/*
-	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
-	 * tear-down mean we need it as a work-around for races between
-	 * timers and tcp_discardcb().
-	 *
-	 * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL"));
-	 */
-	if (inp == NULL) {
-		tcp_timer_race++;
-		INP_INFO_WUNLOCK(&V_tcbinfo);
-		CURVNET_RESTORE();
-		return;
-	}
+	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
 	INP_WLOCK(inp);
 	tcp_free_sackholes(tp);
 	if (callout_pending(&tp->t_timers->tt_2msl) ||
 	    !callout_active(&tp->t_timers->tt_2msl)) {
 		INP_WUNLOCK(tp->t_inpcb);
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
@@ -250,20 +283,33 @@
 	callout_deactivate(&tp->t_timers->tt_2msl);
 	if ((inp->inp_flags & INP_DROPPED) != 0) {
 		INP_WUNLOCK(inp);
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
+	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
+		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
+	KASSERT((tp->t_timers->tt_flags & TT_2MSL) != 0,
+		("%s: tp %p 2msl callout should be running", __func__, tp));
 	/*
 	 * 2 MSL timeout in shutdown went off.  If we're closed but
 	 * still waiting for peer to close and connection has been idle
-	 * too long, or if 2MSL time is up from TIME_WAIT, delete connection
-	 * control block.  Otherwise, check again in a bit.
+	 * too long delete connection control block.  Otherwise, check
+	 * again in a bit.
 	 *
+	 * If in TIME_WAIT state just ignore as this timeout is handled in
+	 * tcp_tw_2msl_scan().
+	 *
 	 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 
 	 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 
 	 * Ignore fact that there were recent incoming segments.
 	 */
+	if ((inp->inp_flags & INP_TIMEWAIT) != 0) {
+		INP_WUNLOCK(inp);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
+		CURVNET_RESTORE();
+		return;
+	}
 	if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
 	    tp->t_inpcb && tp->t_inpcb->inp_socket && 
 	    (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
@@ -270,11 +316,12 @@
 		TCPSTAT_INC(tcps_finwait2_drops);
 		tp = tcp_close(tp);             
 	} else {
-		if (tp->t_state != TCPS_TIME_WAIT &&
-		   ticks - tp->t_rcvtime <= TP_MAXIDLE(tp))
-		       callout_reset_on(&tp->t_timers->tt_2msl,
-			   TP_KEEPINTVL(tp), tcp_timer_2msl, tp, INP_CPU(inp));
-	       else
+		if (ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) {
+			if (!callout_reset(&tp->t_timers->tt_2msl,
+			   TP_KEEPINTVL(tp), tcp_timer_2msl, tp)) {
+				tp->t_timers->tt_flags &= ~TT_2MSL_RST;
+			}
+		} else
 		       tp = tcp_close(tp);
        }
 
@@ -285,7 +332,7 @@
 #endif
 	if (tp != NULL)
 		INP_WUNLOCK(inp);
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	INP_INFO_RUNLOCK(&V_tcbinfo);
 	CURVNET_RESTORE();
 }
 
@@ -301,26 +348,14 @@
 
 	ostate = tp->t_state;
 #endif
-	INP_INFO_WLOCK(&V_tcbinfo);
+	INP_INFO_RLOCK(&V_tcbinfo);
 	inp = tp->t_inpcb;
-	/*
-	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
-	 * tear-down mean we need it as a work-around for races between
-	 * timers and tcp_discardcb().
-	 *
-	 * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL"));
-	 */
-	if (inp == NULL) {
-		tcp_timer_race++;
-		INP_INFO_WUNLOCK(&V_tcbinfo);
-		CURVNET_RESTORE();
-		return;
-	}
+	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
 	INP_WLOCK(inp);
 	if (callout_pending(&tp->t_timers->tt_keep) ||
 	    !callout_active(&tp->t_timers->tt_keep)) {
 		INP_WUNLOCK(inp);
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
@@ -327,10 +362,14 @@
 	callout_deactivate(&tp->t_timers->tt_keep);
 	if ((inp->inp_flags & INP_DROPPED) != 0) {
 		INP_WUNLOCK(inp);
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
+	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
+		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
+	KASSERT((tp->t_timers->tt_flags & TT_KEEP) != 0,
+		("%s: tp %p keep callout should be running", __func__, tp));
 	/*
 	 * Keep-alive timer went off; send something
 	 * or drop connection if idle for too long.
@@ -338,7 +377,8 @@
 	TCPSTAT_INC(tcps_keeptimeo);
 	if (tp->t_state < TCPS_ESTABLISHED)
 		goto dropit;
-	if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
+	if ((tcp_always_keepalive ||
+	    inp->inp_socket->so_options & SO_KEEPALIVE) &&
 	    tp->t_state <= TCPS_CLOSING) {
 		if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
 			goto dropit;
@@ -362,11 +402,14 @@
 				    tp->rcv_nxt, tp->snd_una - 1, 0);
 			free(t_template, M_TEMP);
 		}
-		callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
-		    tcp_timer_keep, tp, INP_CPU(inp));
-	} else
-		callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
-		    tcp_timer_keep, tp, INP_CPU(inp));
+		if (!callout_reset(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
+		    tcp_timer_keep, tp)) {
+			tp->t_timers->tt_flags &= ~TT_KEEP_RST;
+		}
+	} else if (!callout_reset(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
+		    tcp_timer_keep, tp)) {
+			tp->t_timers->tt_flags &= ~TT_KEEP_RST;
+		}
 
 #ifdef TCPDEBUG
 	if (inp->inp_socket->so_options & SO_DEBUG)
@@ -374,7 +417,7 @@
 			  PRU_SLOWTIMO);
 #endif
 	INP_WUNLOCK(inp);
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	INP_INFO_RUNLOCK(&V_tcbinfo);
 	CURVNET_RESTORE();
 	return;
 
@@ -389,7 +432,7 @@
 #endif
 	if (tp != NULL)
 		INP_WUNLOCK(tp->t_inpcb);
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	INP_INFO_RUNLOCK(&V_tcbinfo);
 	CURVNET_RESTORE();
 }
 
@@ -404,26 +447,14 @@
 
 	ostate = tp->t_state;
 #endif
-	INP_INFO_WLOCK(&V_tcbinfo);
+	INP_INFO_RLOCK(&V_tcbinfo);
 	inp = tp->t_inpcb;
-	/*
-	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
-	 * tear-down mean we need it as a work-around for races between
-	 * timers and tcp_discardcb().
-	 *
-	 * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL"));
-	 */
-	if (inp == NULL) {
-		tcp_timer_race++;
-		INP_INFO_WUNLOCK(&V_tcbinfo);
-		CURVNET_RESTORE();
-		return;
-	}
+	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
 	INP_WLOCK(inp);
 	if (callout_pending(&tp->t_timers->tt_persist) ||
 	    !callout_active(&tp->t_timers->tt_persist)) {
 		INP_WUNLOCK(inp);
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
@@ -430,10 +461,14 @@
 	callout_deactivate(&tp->t_timers->tt_persist);
 	if ((inp->inp_flags & INP_DROPPED) != 0) {
 		INP_WUNLOCK(inp);
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
+	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
+		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
+	KASSERT((tp->t_timers->tt_flags & TT_PERSIST) != 0,
+		("%s: tp %p persist callout should be running", __func__, tp));
 	/*
 	 * Persistance timer into zero window.
 	 * Force a byte to be output, if possible.
@@ -453,6 +488,16 @@
 		tp = tcp_drop(tp, ETIMEDOUT);
 		goto out;
 	}
+	/*
+	 * If the user has closed the socket then drop a persisting
+	 * connection after a much reduced timeout.
+	 */
+	if (tp->t_state > TCPS_CLOSE_WAIT &&
+	    (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
+		TCPSTAT_INC(tcps_persistdrop);
+		tp = tcp_drop(tp, ETIMEDOUT);
+		goto out;
+	}
 	tcp_setpersist(tp);
 	tp->t_flags |= TF_FORCEDATA;
 	(void) tcp_output(tp);
@@ -465,7 +510,7 @@
 #endif
 	if (tp != NULL)
 		INP_WUNLOCK(inp);
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	INP_INFO_RUNLOCK(&V_tcbinfo);
 	CURVNET_RESTORE();
 }
 
@@ -482,21 +527,10 @@
 
 	ostate = tp->t_state;
 #endif
+
 	INP_INFO_RLOCK(&V_tcbinfo);
 	inp = tp->t_inpcb;
-	/*
-	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
-	 * tear-down mean we need it as a work-around for races between
-	 * timers and tcp_discardcb().
-	 *
-	 * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL"));
-	 */
-	if (inp == NULL) {
-		tcp_timer_race++;
-		INP_INFO_RUNLOCK(&V_tcbinfo);
-		CURVNET_RESTORE();
-		return;
-	}
+	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
 	INP_WLOCK(inp);
 	if (callout_pending(&tp->t_timers->tt_rexmt) ||
 	    !callout_active(&tp->t_timers->tt_rexmt)) {
@@ -512,6 +546,10 @@
 		CURVNET_RESTORE();
 		return;
 	}
+	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
+		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
+	KASSERT((tp->t_timers->tt_flags & TT_REXMT) != 0,
+		("%s: tp %p rexmt callout should be running", __func__, tp));
 	tcp_free_sackholes(tp);
 	/*
 	 * Retransmission timer went off.  Message has not
@@ -521,22 +559,6 @@
 	if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
 		tp->t_rxtshift = TCP_MAXRXTSHIFT;
 		TCPSTAT_INC(tcps_timeoutdrop);
-		in_pcbref(inp);
-		INP_INFO_RUNLOCK(&V_tcbinfo);
-		INP_WUNLOCK(inp);
-		INP_INFO_WLOCK(&V_tcbinfo);
-		INP_WLOCK(inp);
-		if (in_pcbrele_wlocked(inp)) {
-			INP_INFO_WUNLOCK(&V_tcbinfo);
-			CURVNET_RESTORE();
-			return;
-		}
-		if (inp->inp_flags & INP_DROPPED) {
-			INP_WUNLOCK(inp);
-			INP_INFO_WUNLOCK(&V_tcbinfo);
-			CURVNET_RESTORE();
-			return;
-		}
 
 		tp = tcp_drop(tp, tp->t_softerror ?
 			      tp->t_softerror : ETIMEDOUT);
@@ -545,8 +567,14 @@
 	}
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 	headlocked = 0;
-	if (tp->t_rxtshift == 1) {
+	if (tp->t_state == TCPS_SYN_SENT) {
 		/*
+		 * If the SYN was retransmitted, indicate CWND to be
+		 * limited to 1 segment in cc_conn_init().
+		 */
+		tp->snd_cwnd = 1;
+	} else if (tp->t_rxtshift == 1) {
+		/*
 		 * first retransmit; record ssthresh and cwnd so they can
 		 * be recovered if this turns out to be a "bad" retransmit.
 		 * A retransmit is considered "bad" if an ACK for this
@@ -571,14 +599,127 @@
 	} else
 		tp->t_flags &= ~TF_PREVVALID;
 	TCPSTAT_INC(tcps_rexmttimeo);
-	if (tp->t_state == TCPS_SYN_SENT)
-		rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift];
+	if ((tp->t_state == TCPS_SYN_SENT) ||
+	    (tp->t_state == TCPS_SYN_RECEIVED))
+		rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift];
 	else
 		rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
 	TCPT_RANGESET(tp->t_rxtcur, rexmt,
 		      tp->t_rttmin, TCPTV_REXMTMAX);
+
 	/*
-	 * Disable rfc1323 if we haven't got any response to
+	 * We enter the path for PLMTUD if connection is established or, if
+	 * connection is FIN_WAIT_1 status, reason for the last is that if
+	 * amount of data we send is very small, we could send it in couple of
+	 * packets and process straight to FIN. In that case we won't catch
+	 * ESTABLISHED state.
+	 */
+	if (V_tcp_pmtud_blackhole_detect && (((tp->t_state == TCPS_ESTABLISHED))
+	    || (tp->t_state == TCPS_FIN_WAIT_1))) {
+		int optlen;
+#ifdef INET6
+		int isipv6;
+#endif
+
+		/*
+		 * Idea here is that at each stage of mtu probe (usually, 1448
+		 * -> 1188 -> 524) should be given 2 chances to recover before
+		 *  further clamping down. 'tp->t_rxtshift % 2 == 0' should
+		 *  take care of that.
+		 */
+		if (((tp->t_flags2 & (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) ==
+		    (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) &&
+		    (tp->t_rxtshift >= 2 && tp->t_rxtshift % 2 == 0)) {
+			/*
+			 * Enter Path MTU Black-hole Detection mechanism:
+			 * - Disable Path MTU Discovery (IP "DF" bit).
+			 * - Reduce MTU to lower value than what we
+			 *   negotiated with peer.
+			 */
+			/* Record that we may have found a black hole. */
+			tp->t_flags2 |= TF2_PLPMTU_BLACKHOLE;
+
+			/* Keep track of previous MSS. */
+			optlen = tp->t_maxopd - tp->t_maxseg;
+			tp->t_pmtud_saved_maxopd = tp->t_maxopd;
+
+			/* 
+			 * Reduce the MSS to blackhole value or to the default
+			 * in an attempt to retransmit.
+			 */
+#ifdef INET6
+			isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) ? 1 : 0;
+			if (isipv6 &&
+			    tp->t_maxopd > V_tcp_v6pmtud_blackhole_mss) {
+				/* Use the sysctl tuneable blackhole MSS. */
+				tp->t_maxopd = V_tcp_v6pmtud_blackhole_mss;
+				V_tcp_pmtud_blackhole_activated++;
+			} else if (isipv6) {
+				/* Use the default MSS. */
+				tp->t_maxopd = V_tcp_v6mssdflt;
+				/*
+				 * Disable Path MTU Discovery when we switch to
+				 * minmss.
+				 */
+				tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
+				V_tcp_pmtud_blackhole_activated_min_mss++;
+			}
+#endif
+#if defined(INET6) && defined(INET)
+			else
+#endif
+#ifdef INET
+			if (tp->t_maxopd > V_tcp_pmtud_blackhole_mss) {
+				/* Use the sysctl tuneable blackhole MSS. */
+				tp->t_maxopd = V_tcp_pmtud_blackhole_mss;
+				V_tcp_pmtud_blackhole_activated++;
+			} else {
+				/* Use the default MSS. */
+				tp->t_maxopd = V_tcp_mssdflt;
+				/*
+				 * Disable Path MTU Discovery when we switch to
+				 * minmss.
+				 */
+				tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
+				V_tcp_pmtud_blackhole_activated_min_mss++;
+			}
+#endif
+			tp->t_maxseg = tp->t_maxopd - optlen;
+			/*
+			 * Reset the slow-start flight size
+			 * as it may depend on the new MSS.
+			 */
+			if (CC_ALGO(tp)->conn_init != NULL)
+				CC_ALGO(tp)->conn_init(tp->ccv);
+		} else {
+			/*
+			 * If further retransmissions are still unsuccessful
+			 * with a lowered MTU, maybe this isn't a blackhole and
+			 * we restore the previous MSS and blackhole detection
+			 * flags.
+			 * The limit '6' is determined by giving each probe
+			 * stage (1448, 1188, 524) 2 chances to recover.
+			 */
+			if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) &&
+			    (tp->t_rxtshift > 6)) {
+				tp->t_flags2 |= TF2_PLPMTU_PMTUD;
+				tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
+				optlen = tp->t_maxopd - tp->t_maxseg;
+				tp->t_maxopd = tp->t_pmtud_saved_maxopd;
+				tp->t_maxseg = tp->t_maxopd - optlen;
+				V_tcp_pmtud_blackhole_failed++;
+				/*
+				 * Reset the slow-start flight size as it
+				 * may depend on the new MSS.
+				 */
+				if (CC_ALGO(tp)->conn_init != NULL)
+					CC_ALGO(tp)->conn_init(tp->ccv);
+			}
+		}
+	}
+
+	/*
+	 * Disable RFC1323 and SACK if we haven't got any response to
 	 * our third SYN to work-around some broken terminal servers
 	 * (most of which have hopefully been retired) that have bad VJ
 	 * header compression code which trashes TCP segments containing
@@ -586,7 +727,7 @@
 	 */
 	if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) &&
 	    (tp->t_rxtshift == 3))
-		tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP);
+		tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT);
 	/*
 	 * If we backed off this far, our srtt estimate is probably bogus.
 	 * Clobber it so we'll take the next rtt measurement as our srtt;
@@ -597,7 +738,6 @@
 #ifdef INET6
 		if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
 			in6_losing(tp->t_inpcb);
-		else
 #endif
 		tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
 		tp->t_srtt = 0;
@@ -626,51 +766,80 @@
 	if (tp != NULL)
 		INP_WUNLOCK(inp);
 	if (headlocked)
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 	CURVNET_RESTORE();
 }
 
 void
-tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
+tcp_timer_activate(struct tcpcb *tp, uint32_t timer_type, u_int delta)
 {
 	struct callout *t_callout;
-	void *f_callout;
+	timeout_t *f_callout;
 	struct inpcb *inp = tp->t_inpcb;
 	int cpu = INP_CPU(inp);
+	uint32_t f_reset;
 
+#ifdef TCP_OFFLOAD
+	if (tp->t_flags & TF_TOE)
+		return;
+#endif
+
+	if (tp->t_timers->tt_flags & TT_STOPPED)
+		return;
+
 	switch (timer_type) {
 		case TT_DELACK:
 			t_callout = &tp->t_timers->tt_delack;
 			f_callout = tcp_timer_delack;
+			f_reset = TT_DELACK_RST;
 			break;
 		case TT_REXMT:
 			t_callout = &tp->t_timers->tt_rexmt;
 			f_callout = tcp_timer_rexmt;
+			f_reset = TT_REXMT_RST;
 			break;
 		case TT_PERSIST:
 			t_callout = &tp->t_timers->tt_persist;
 			f_callout = tcp_timer_persist;
+			f_reset = TT_PERSIST_RST;
 			break;
 		case TT_KEEP:
 			t_callout = &tp->t_timers->tt_keep;
 			f_callout = tcp_timer_keep;
+			f_reset = TT_KEEP_RST;
 			break;
 		case TT_2MSL:
 			t_callout = &tp->t_timers->tt_2msl;
 			f_callout = tcp_timer_2msl;
+			f_reset = TT_2MSL_RST;
 			break;
 		default:
-			panic("bad timer_type");
+			panic("tp %p bad timer_type %#x", tp, timer_type);
 		}
 	if (delta == 0) {
-		callout_stop(t_callout);
+		if ((tp->t_timers->tt_flags & timer_type) &&
+		    callout_stop(t_callout) &&
+		    (tp->t_timers->tt_flags & f_reset)) {
+			tp->t_timers->tt_flags &= ~(timer_type | f_reset);
+		}
 	} else {
-		callout_reset_on(t_callout, delta, f_callout, tp, cpu);
+		if ((tp->t_timers->tt_flags & timer_type) == 0) {
+			tp->t_timers->tt_flags |= (timer_type | f_reset);
+			callout_reset_on(t_callout, delta, f_callout, tp, cpu);
+		} else {
+			/* Reset already running callout on the same CPU. */
+			if (!callout_reset(t_callout, delta, f_callout, tp)) {
+				/*
+				 * Callout not cancelled, consider it as not
+				 * properly restarted. */
+				tp->t_timers->tt_flags &= ~f_reset;
+			}
+		}
 	}
 }
 
 int
-tcp_timer_active(struct tcpcb *tp, int timer_type)
+tcp_timer_active(struct tcpcb *tp, uint32_t timer_type)
 {
 	struct callout *t_callout;
 
@@ -691,28 +860,91 @@
 			t_callout = &tp->t_timers->tt_2msl;
 			break;
 		default:
-			panic("bad timer_type");
+			panic("tp %p bad timer_type %#x", tp, timer_type);
 		}
 	return callout_active(t_callout);
 }
 
+void
+tcp_timer_stop(struct tcpcb *tp, uint32_t timer_type)
+{
+	struct callout *t_callout;
+	timeout_t *f_callout;
+	uint32_t f_reset;
+
+	tp->t_timers->tt_flags |= TT_STOPPED;
+
+	switch (timer_type) {
+		case TT_DELACK:
+			t_callout = &tp->t_timers->tt_delack;
+			f_callout = tcp_timer_delack_discard;
+			f_reset = TT_DELACK_RST;
+			break;
+		case TT_REXMT:
+			t_callout = &tp->t_timers->tt_rexmt;
+			f_callout = tcp_timer_rexmt_discard;
+			f_reset = TT_REXMT_RST;
+			break;
+		case TT_PERSIST:
+			t_callout = &tp->t_timers->tt_persist;
+			f_callout = tcp_timer_persist_discard;
+			f_reset = TT_PERSIST_RST;
+			break;
+		case TT_KEEP:
+			t_callout = &tp->t_timers->tt_keep;
+			f_callout = tcp_timer_keep_discard;
+			f_reset = TT_KEEP_RST;
+			break;
+		case TT_2MSL:
+			t_callout = &tp->t_timers->tt_2msl;
+			f_callout = tcp_timer_2msl_discard;
+			f_reset = TT_2MSL_RST;
+			break;
+		default:
+			panic("tp %p bad timer_type %#x", tp, timer_type);
+		}
+
+	if (tp->t_timers->tt_flags & timer_type) {
+		if (callout_stop(t_callout) &&
+		    (tp->t_timers->tt_flags & f_reset)) {
+			tp->t_timers->tt_flags &= ~(timer_type | f_reset);
+		} else {
+			/*
+			 * Can't stop the callout, defer tcpcb actual deletion
+			 * to the last tcp timer discard callout.
+			 * The TT_STOPPED flag will ensure that no tcp timer
+			 * callouts can be restarted on our behalf, and
+			 * past this point currently running callouts waiting
+			 * on inp lock will return right away after the
+			 * classical check for callout reset/stop events:
+			 * callout_pending() || !callout_active()
+			 */
+			callout_reset(t_callout, 1, f_callout, tp);
+		}
+	}
+}
+
 #define	ticks_to_msecs(t)	(1000*(t) / hz)
 
 void
-tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, struct xtcp_timer *xtimer)
+tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer,
+    struct xtcp_timer *xtimer)
 {
-	bzero(xtimer, sizeof(struct xtcp_timer));
+	sbintime_t now;
+
+	bzero(xtimer, sizeof(*xtimer));
 	if (timer == NULL)
 		return;
+	now = getsbinuptime();
 	if (callout_active(&timer->tt_delack))
-		xtimer->tt_delack = ticks_to_msecs(timer->tt_delack.c_time - ticks);
+		xtimer->tt_delack = (timer->tt_delack.c_time - now) / SBT_1MS;
 	if (callout_active(&timer->tt_rexmt))
-		xtimer->tt_rexmt = ticks_to_msecs(timer->tt_rexmt.c_time - ticks);
+		xtimer->tt_rexmt = (timer->tt_rexmt.c_time - now) / SBT_1MS;
 	if (callout_active(&timer->tt_persist))
-		xtimer->tt_persist = ticks_to_msecs(timer->tt_persist.c_time - ticks);
+		xtimer->tt_persist = (timer->tt_persist.c_time - now) / SBT_1MS;
 	if (callout_active(&timer->tt_keep))
-		xtimer->tt_keep = ticks_to_msecs(timer->tt_keep.c_time - ticks);
+		xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS;
 	if (callout_active(&timer->tt_2msl))
-		xtimer->tt_2msl = ticks_to_msecs(timer->tt_2msl.c_time - ticks);
+		xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS;
 	xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime);
 }

Modified: trunk/sys/netinet/tcp_timer.h
===================================================================
--- trunk/sys/netinet/tcp_timer.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_timer.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp_timer.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet/tcp_timer.h 232945 2012-03-13 20:37:57Z glebius $
+ * $FreeBSD: stable/10/sys/netinet/tcp_timer.h 330303 2018-03-03 00:54:12Z jhb $
  */
 
 #ifndef _NETINET_TCP_TIMER_H_
@@ -79,7 +79,7 @@
 #define	TCPTV_RTOBASE	(  3*hz)		/* assumed RTO if no info */
 #define	TCPTV_SRTTDFLT	(  3*hz)		/* assumed RTT if no info */
 
-#define	TCPTV_PERSMIN	(  5*hz)		/* retransmit persistence */
+#define	TCPTV_PERSMIN	(  5*hz)		/* minimum persist interval */
 #define	TCPTV_PERSMAX	( 60*hz)		/* maximum persist interval */
 
 #define	TCPTV_KEEP_INIT	( 75*hz)		/* initial connect keepalive */
@@ -119,11 +119,11 @@
 
 #define	TCP_MAXRXTSHIFT	12			/* maximum retransmits */
 
-#define	TCPTV_DELACK	(hz / PR_FASTHZ / 2)	/* 100ms timeout */
+#define	TCPTV_DELACK	( hz/10 )		/* 100ms timeout */
 
 #ifdef	TCPTIMERS
 static const char *tcptimers[] =
-    { "REXMT", "PERSIST", "KEEP", "2MSL" };
+    { "REXMT", "PERSIST", "KEEP", "2MSL", "DELACK" };
 #endif
 
 /*
@@ -147,13 +147,28 @@
 	struct	callout tt_keep;	/* keepalive */
 	struct	callout tt_2msl;	/* 2*msl TIME_WAIT timer */
 	struct	callout tt_delack;	/* delayed ACK timer */
+	uint32_t	tt_flags;	/* Timers flags */
+	uint32_t	tt_spare;	/* TDB */
 };
-#define TT_DELACK	0x01
-#define TT_REXMT	0x02
-#define TT_PERSIST	0x04
-#define TT_KEEP		0x08
-#define TT_2MSL		0x10
 
+/*
+ * Flags for the tt_flags field.
+ */
+#define TT_DELACK	0x0001
+#define TT_REXMT	0x0002
+#define TT_PERSIST	0x0004
+#define TT_KEEP		0x0008
+#define TT_2MSL		0x0010
+#define TT_MASK		(TT_DELACK|TT_REXMT|TT_PERSIST|TT_KEEP|TT_2MSL)
+
+#define TT_DELACK_RST	0x0100
+#define TT_REXMT_RST	0x0200
+#define TT_PERSIST_RST	0x0400
+#define TT_KEEP_RST	0x0800
+#define TT_2MSL_RST	0x1000
+
+#define TT_STOPPED	0x00010000
+
 #define	TP_KEEPINIT(tp)	((tp)->t_keepinit ? (tp)->t_keepinit : tcp_keepinit)
 #define	TP_KEEPIDLE(tp)	((tp)->t_keepidle ? (tp)->t_keepidle : tcp_keepidle)
 #define	TP_KEEPINTVL(tp) ((tp)->t_keepintvl ? (tp)->t_keepintvl : tcp_keepintvl)
@@ -160,6 +175,8 @@
 #define	TP_KEEPCNT(tp)	((tp)->t_keepcnt ? (tp)->t_keepcnt : tcp_keepcnt)
 #define	TP_MAXIDLE(tp)	(TP_KEEPCNT(tp) * TP_KEEPINTVL(tp))
 
+extern int tcp_persmin;			/* minimum persist interval */
+extern int tcp_persmax;			/* maximum persist interval */
 extern int tcp_keepinit;		/* time to establish connection */
 extern int tcp_keepidle;		/* time before keepalive probes begin */
 extern int tcp_keepintvl;		/* time between keepalive probes */
@@ -171,7 +188,9 @@
 extern int tcp_msl;
 extern int tcp_ttl;			/* time to live for TCP segs */
 extern int tcp_backoff[];
+extern int tcp_syn_backoff[];
 
+extern int tcp_always_keepalive;
 extern int tcp_finwait2_timeout;
 extern int tcp_fast_finwait2_recycle;
 
@@ -178,11 +197,16 @@
 void	tcp_timer_init(void);
 void	tcp_timer_2msl(void *xtp);
 struct tcptw *
-	tcp_tw_2msl_scan(int _reuse);		/* XXX temporary */
+	tcp_tw_2msl_scan(int reuse);	/* XXX temporary? */
 void	tcp_timer_keep(void *xtp);
 void	tcp_timer_persist(void *xtp);
 void	tcp_timer_rexmt(void *xtp);
 void	tcp_timer_delack(void *xtp);
+void	tcp_timer_2msl_discard(void *xtp);
+void	tcp_timer_keep_discard(void *xtp);
+void	tcp_timer_persist_discard(void *xtp);
+void	tcp_timer_rexmt_discard(void *xtp);
+void	tcp_timer_delack_discard(void *xtp);
 void	tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer,
 	struct xtcp_timer *xtimer);
 

Modified: trunk/sys/netinet/tcp_timewait.c
===================================================================
--- trunk/sys/netinet/tcp_timewait.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_timewait.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_timewait.c 247658 2013-03-02 17:51:22Z flo $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_timewait.c 309108 2016-11-24 14:48:46Z jch $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
@@ -92,7 +92,7 @@
 #include <security/mac/mac_framework.h>
 
 static VNET_DEFINE(uma_zone_t, tcptw_zone);
-#define	V_tcptw_zone			VNET(tcptw_zone)
+#define	V_tcptw_zone		VNET(tcptw_zone)
 static int	maxtcptw;
 
 /*
@@ -99,13 +99,34 @@
  * The timed wait queue contains references to each of the TCP sessions
  * currently in the TIME_WAIT state.  The queue pointers, including the
  * queue pointers in each tcptw structure, are protected using the global
- * tcbinfo lock, which must be held over queue iteration and modification.
+ * timewait lock, which must be held over queue iteration and modification.
+ *
+ * Rules on tcptw usage:
+ *  - a inpcb is always freed _after_ its tcptw
+ *  - a tcptw relies on its inpcb reference counting for memory stability
+ *  - a tcptw is dereferenceable only while its inpcb is locked
  */
 static VNET_DEFINE(TAILQ_HEAD(, tcptw), twq_2msl);
-#define	V_twq_2msl			VNET(twq_2msl)
+#define	V_twq_2msl		VNET(twq_2msl)
 
+/* Global timewait lock */
+static VNET_DEFINE(struct rwlock, tw_lock);
+#define	V_tw_lock		VNET(tw_lock)
+
+#define	TW_LOCK_INIT(tw, d)	rw_init_flags(&(tw), (d), 0)
+#define	TW_LOCK_DESTROY(tw)	rw_destroy(&(tw))
+#define	TW_RLOCK(tw)		rw_rlock(&(tw))
+#define	TW_WLOCK(tw)		rw_wlock(&(tw))
+#define	TW_RUNLOCK(tw)		rw_runlock(&(tw))
+#define	TW_WUNLOCK(tw)		rw_wunlock(&(tw))
+#define	TW_LOCK_ASSERT(tw)	rw_assert(&(tw), RA_LOCKED)
+#define	TW_RLOCK_ASSERT(tw)	rw_assert(&(tw), RA_RLOCKED)
+#define	TW_WLOCK_ASSERT(tw)	rw_assert(&(tw), RA_WLOCKED)
+#define	TW_UNLOCK_ASSERT(tw)	rw_assert(&(tw), RA_UNLOCKED)
+
 static void	tcp_tw_2msl_reset(struct tcptw *, int);
-static void	tcp_tw_2msl_stop(struct tcptw *);
+static void	tcp_tw_2msl_stop(struct tcptw *, int);
+static int	tcp_twrespond(struct tcptw *, int);
 
 static int
 tcptw_auto_size(void)
@@ -172,6 +193,7 @@
 	else
 		uma_zone_set_max(V_tcptw_zone, maxtcptw);
 	TAILQ_INIT(&V_twq_2msl);
+	TW_LOCK_INIT(V_tw_lock, "tcptw");
 }
 
 #ifdef VIMAGE
@@ -180,11 +202,12 @@
 {
 	struct tcptw *tw;
 
-	INP_INFO_WLOCK(&V_tcbinfo);
-	while((tw = TAILQ_FIRST(&V_twq_2msl)) != NULL)
+	INP_INFO_RLOCK(&V_tcbinfo);
+	while ((tw = TAILQ_FIRST(&V_twq_2msl)) != NULL)
 		tcp_twclose(tw, 0);
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	INP_INFO_RUNLOCK(&V_tcbinfo);
 
+	TW_LOCK_DESTROY(V_tw_lock);
 	uma_zdestroy(V_tcptw_zone);
 }
 #endif
@@ -205,9 +228,13 @@
 	int isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6;
 #endif
 
-	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);	/* tcp_tw_2msl_reset(). */
+	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
+	/* A dropped inp should never transition to TIME_WAIT state. */
+	KASSERT((inp->inp_flags & INP_DROPPED) == 0, ("tcp_twstart: "
+	    "(inp->inp_flags & INP_DROPPED) != 0"));
+
 	if (V_nolocaltimewait) {
 		int error = 0;
 #ifdef INET6
@@ -228,8 +255,23 @@
 		}
 	}
 
+
+	/*
+	 * For use only by DTrace.  We do not reference the state
+	 * after this point so modifying it in place is not a problem.
+	 */
+	tcp_state_change(tp, TCPS_TIME_WAIT);
+
 	tw = uma_zalloc(V_tcptw_zone, M_NOWAIT);
 	if (tw == NULL) {
+		/*
+		 * Reached limit on total number of TIMEWAIT connections
+		 * allowed. Remove a connection from TIMEWAIT queue in LRU
+		 * fashion to make room for this connection.
+		 *
+		 * XXX:  Check if it possible to always have enough room
+		 * in advance based on guarantees provided by uma_zalloc().
+		 */
 		tw = tcp_tw_2msl_scan(1);
 		if (tw == NULL) {
 			tp = tcp_close(tp);
@@ -238,7 +280,12 @@
 			return;
 		}
 	}
+	/*
+	 * The tcptw will hold a reference on its inpcb until tcp_twclose
+	 * is called
+	 */
 	tw->tw_inpcb = inp;
+	in_pcbref(inp);	/* Reference from tw */
 
 	/*
 	 * Recover last window size sent.
@@ -321,7 +368,7 @@
  * Most other new OSes use semi-randomized ISN values, so we
  * do not need to worry about them.
  */
-#define MS_ISN_BYTES_PER_SECOND		250000
+#define	MS_ISN_BYTES_PER_SECOND		250000
 
 /*
  * Determine if the ISN we will generate has advanced beyond the last
@@ -334,7 +381,7 @@
 	tcp_seq new_iss = tw->iss;
 	tcp_seq new_irs = tw->irs;
 
-	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	new_iss += (ticks - tw->t_starttime) * (ISN_BYTES_PER_SECOND / hz);
 	new_irs += (ticks - tw->t_starttime) * (MS_ISN_BYTES_PER_SECOND / hz);
 
@@ -350,7 +397,7 @@
  * looking for a pcb in the listen state.  Returns 0 otherwise.
  */
 int
-tcp_twcheck(struct inpcb *inp, struct tcpopt *to, struct tcphdr *th,
+tcp_twcheck(struct inpcb *inp, struct tcpopt *to __unused, struct tcphdr *th,
     struct mbuf *m, int tlen)
 {
 	struct tcptw *tw;
@@ -357,8 +404,7 @@
 	int thflags;
 	tcp_seq seq;
 
-	/* tcbinfo lock required for tcp_twclose(), tcp_tw_2msl_reset(). */
-	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	/*
@@ -459,11 +505,10 @@
 	inp = tw->tw_inpcb;
 	KASSERT((inp->inp_flags & INP_TIMEWAIT), ("tcp_twclose: !timewait"));
 	KASSERT(intotw(inp) == tw, ("tcp_twclose: inp_ppcb != tw"));
-	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);	/* tcp_tw_2msl_stop(). */
+	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);	/* in_pcbfree() */
 	INP_WLOCK_ASSERT(inp);
 
-	tw->tw_inpcb = NULL;
-	tcp_tw_2msl_stop(tw);
+	tcp_tw_2msl_stop(tw, reuse);
 	inp->inp_ppcb = NULL;
 	in_pcbdrop(inp);
 
@@ -492,17 +537,17 @@
 			 */
 			INP_WUNLOCK(inp);
 		}
-	} else
+	} else {
+		/*
+		 * The socket has been already cleaned-up for us, only free the
+		 * inpcb.
+		 */
 		in_pcbfree(inp);
+	}
 	TCPSTAT_INC(tcps_closed);
-	crfree(tw->tw_cred);
-	tw->tw_cred = NULL;
-	if (reuse)
-		return;
-	uma_zfree(V_tcptw_zone, tw);
 }
 
-int
+static int
 tcp_twrespond(struct tcptw *tw, int flags)
 {
 	struct inpcb *inp = tw->tw_inpcb;
@@ -524,7 +569,7 @@
 
 	INP_WLOCK_ASSERT(inp);
 
-	m = m_gethdr(M_DONTWAIT, MT_DATA);
+	m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (ENOBUFS);
 	m->m_data += max_linkhdr;
@@ -595,9 +640,9 @@
 		m->m_pkthdr.csum_flags = CSUM_TCP;
 		th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 		    htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP));
-		ip->ip_len = m->m_pkthdr.len;
+		ip->ip_len = htons(m->m_pkthdr.len);
 		if (V_path_mtu_discovery)
-			ip->ip_off |= IP_DF;
+			ip->ip_off |= htons(IP_DF);
 		error = ip_output(m, inp->inp_options, NULL,
 		    ((tw->tw_so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0),
 		    NULL, inp);
@@ -615,20 +660,43 @@
 tcp_tw_2msl_reset(struct tcptw *tw, int rearm)
 {
 
-	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(tw->tw_inpcb);
+
+	TW_WLOCK(V_tw_lock);
 	if (rearm)
 		TAILQ_REMOVE(&V_twq_2msl, tw, tw_2msl);
 	tw->tw_time = ticks + 2 * tcp_msl;
 	TAILQ_INSERT_TAIL(&V_twq_2msl, tw, tw_2msl);
+	TW_WUNLOCK(V_tw_lock);
 }
 
 static void
-tcp_tw_2msl_stop(struct tcptw *tw)
+tcp_tw_2msl_stop(struct tcptw *tw, int reuse)
 {
+	struct ucred *cred;
+	struct inpcb *inp;
+	int released;
 
-	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+
+	TW_WLOCK(V_tw_lock);
+	inp = tw->tw_inpcb;
+	tw->tw_inpcb = NULL;
+
 	TAILQ_REMOVE(&V_twq_2msl, tw, tw_2msl);
+	cred = tw->tw_cred;
+	tw->tw_cred = NULL;
+	TW_WUNLOCK(V_tw_lock);
+
+	if (cred != NULL)
+		crfree(cred);
+
+	released = in_pcbrele_wlocked(inp);
+	KASSERT(!released, ("%s: inp should not be released here", __func__));
+
+	if (!reuse)
+		uma_zfree(V_tcptw_zone, tw);
 }
 
 struct tcptw *
@@ -635,16 +703,70 @@
 tcp_tw_2msl_scan(int reuse)
 {
 	struct tcptw *tw;
+	struct inpcb *inp;
 
-	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+#ifdef INVARIANTS
+	if (reuse) {
+		/*
+		 * Exclusive pcbinfo lock is not required in reuse case even if
+		 * two inpcb locks can be acquired simultaneously:
+		 *  - the inpcb transitioning to TIME_WAIT state in
+		 *    tcp_tw_start(),
+		 *  - the inpcb closed by tcp_twclose().
+		 *
+		 * It is because only inpcbs in FIN_WAIT2 or CLOSING states can
+		 * transition in TIME_WAIT state.  Then a pcbcb cannot be in
+		 * TIME_WAIT list and transitioning to TIME_WAIT state at same
+		 * time.
+		 */
+		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+	}
+#endif
+
 	for (;;) {
+		TW_RLOCK(V_tw_lock);
 		tw = TAILQ_FIRST(&V_twq_2msl);
-		if (tw == NULL || (!reuse && (tw->tw_time - ticks) > 0))
+		if (tw == NULL || (!reuse && (tw->tw_time - ticks) > 0)) {
+			TW_RUNLOCK(V_tw_lock);
 			break;
-		INP_WLOCK(tw->tw_inpcb);
-		tcp_twclose(tw, reuse);
-		if (reuse)
-			return (tw);
+		}
+		KASSERT(tw->tw_inpcb != NULL, ("%s: tw->tw_inpcb == NULL",
+		    __func__));
+
+		inp = tw->tw_inpcb;
+		in_pcbref(inp);
+		TW_RUNLOCK(V_tw_lock);
+
+		if (INP_INFO_TRY_RLOCK(&V_tcbinfo)) {
+
+			INP_WLOCK(inp);
+			tw = intotw(inp);
+			if (in_pcbrele_wlocked(inp)) {
+				KASSERT(tw == NULL, ("%s: held last inp "
+				    "reference but tw not NULL", __func__));
+				INP_INFO_RUNLOCK(&V_tcbinfo);
+				continue;
+			}
+
+			if (tw == NULL) {
+				/* tcp_twclose() has already been called */
+				INP_WUNLOCK(inp);
+				INP_INFO_RUNLOCK(&V_tcbinfo);
+				continue;
+			}
+
+			tcp_twclose(tw, reuse);
+			INP_INFO_RUNLOCK(&V_tcbinfo);
+			if (reuse)
+			    return tw;
+		} else {
+			/* INP_INFO lock is busy, continue later. */
+			INP_WLOCK(inp);
+			if (!in_pcbrele_wlocked(inp))
+				INP_WUNLOCK(inp);
+			break;
+		}
 	}
-	return (NULL);
+
+	return NULL;
 }

Modified: trunk/sys/netinet/tcp_usrreq.c
===================================================================
--- trunk/sys/netinet/tcp_usrreq.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_usrreq.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_usrreq.c 241132 2012-10-02 12:57:47Z glebius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_usrreq.c 309108 2016-11-24 14:48:46Z jch $");
 
 #include "opt_ddb.h"
 #include "opt_inet.h"
@@ -59,6 +59,7 @@
 #include <sys/protosw.h>
 #include <sys/proc.h>
 #include <sys/jail.h>
+#include <sys/syslog.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
@@ -80,6 +81,9 @@
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #endif
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
@@ -88,7 +92,9 @@
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif
+#ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
+#endif
 
 /*
  * TCP protocol interface to socket abstraction.
@@ -161,7 +167,7 @@
 {
 	struct tcpcb *tp;
 
-	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_INFO_LOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	KASSERT(so->so_pcb == inp, ("tcp_detach: so_pcb != inp"));
@@ -181,12 +187,43 @@
 		 * present until timewait ends.
 		 *
 		 * XXXRW: Would it be cleaner to free the tcptw here?
+		 *
+		 * Astute question indeed, from twtcp perspective there are
+		 * three cases to consider:
+		 *
+		 * #1 tcp_detach is called at tcptw creation time by
+		 *  tcp_twstart, then do not discard the newly created tcptw
+		 *  and leave inpcb present until timewait ends
+		 * #2 tcp_detach is called at timewait end (or reuse) by
+		 *  tcp_twclose, then the tcptw has already been discarded
+		 *  (or reused) and inpcb is freed here
+		 * #3 tcp_detach is called() after timewait ends (or reuse)
+		 *  (e.g. by soclose), then tcptw has already been discarded
+		 *  (or reused) and inpcb is freed here
+		 *
+		 *  In all three cases the tcptw should not be freed here.
 		 */
 		if (inp->inp_flags & INP_DROPPED) {
-			KASSERT(tp == NULL, ("tcp_detach: INP_TIMEWAIT && "
-			    "INP_DROPPED && tp != NULL"));
 			in_pcbdetach(inp);
-			in_pcbfree(inp);
+			if (__predict_true(tp == NULL)) {
+				in_pcbfree(inp);
+			} else {
+				/*
+				 * This case should not happen as in TIMEWAIT
+				 * state the inp should not be destroyed before
+				 * its tcptw.  If INVARIANTS is defined, panic.
+				 */
+#ifdef INVARIANTS
+				panic("%s: Panic before an inp double-free: "
+				    "INP_TIMEWAIT && INP_DROPPED && tp != NULL"
+				    , __func__);
+#else
+				log(LOG_ERR, "%s: Avoid an inp double-free: "
+				    "INP_TIMEWAIT && INP_DROPPED && tp != NULL"
+				    , __func__);
+#endif
+				INP_WUNLOCK(inp);
+			}
 		} else {
 			in_pcbdetach(inp);
 			INP_WUNLOCK(inp);
@@ -224,15 +261,20 @@
 tcp_usr_detach(struct socket *so)
 {
 	struct inpcb *inp;
+	int rlock = 0;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL"));
-	INP_INFO_WLOCK(&V_tcbinfo);
+	if (!INP_INFO_WLOCKED(&V_tcbinfo)) {
+		INP_INFO_RLOCK(&V_tcbinfo);
+		rlock = 1;
+	}
 	INP_WLOCK(inp);
 	KASSERT(inp->inp_socket != NULL,
 	    ("tcp_usr_detach: inp_socket == NULL"));
 	tcp_detach(so, inp);
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	if (rlock)
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 }
 
 #ifdef INET
@@ -366,12 +408,19 @@
 		error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
 	INP_HASH_WUNLOCK(&V_tcbinfo);
 	if (error == 0) {
-		tp->t_state = TCPS_LISTEN;
+		tcp_state_change(tp, TCPS_LISTEN);
 		solisten_proto(so, backlog);
-		tcp_offload_listen_open(tp);
+#ifdef TCP_OFFLOAD
+		if ((so->so_options & SO_NO_OFFLOAD) == 0)
+			tcp_offload_listen_start(tp);
+#endif
 	}
 	SOCK_UNLOCK(so);
 
+#ifdef TCP_RFC7413
+	if (tp->t_flags & TF_FASTOPEN)
+		tp->t_tfo_pending = tcp_fastopen_alloc_counter();
+#endif
 out:
 	TCPDEBUG2(PRU_LISTEN);
 	INP_WUNLOCK(inp);
@@ -408,11 +457,19 @@
 	}
 	INP_HASH_WUNLOCK(&V_tcbinfo);
 	if (error == 0) {
-		tp->t_state = TCPS_LISTEN;
+		tcp_state_change(tp, TCPS_LISTEN);
 		solisten_proto(so, backlog);
+#ifdef TCP_OFFLOAD
+		if ((so->so_options & SO_NO_OFFLOAD) == 0)
+			tcp_offload_listen_start(tp);
+#endif
 	}
 	SOCK_UNLOCK(so);
 
+#ifdef TCP_RFC7413
+	if (tp->t_flags & TF_FASTOPEN)
+		tp->t_tfo_pending = tcp_fastopen_alloc_counter();
+#endif
 out:
 	TCPDEBUG2(PRU_LISTEN);
 	INP_WUNLOCK(inp);
@@ -452,15 +509,26 @@
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL"));
 	INP_WLOCK(inp);
-	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
-		error = EINVAL;
+	if (inp->inp_flags & INP_TIMEWAIT) {
+		error = EADDRINUSE;
 		goto out;
 	}
+	if (inp->inp_flags & INP_DROPPED) {
+		error = ECONNREFUSED;
+		goto out;
+	}
 	tp = intotcpcb(inp);
 	TCPDEBUG1();
 	if ((error = tcp_connect(tp, nam, td)) != 0)
 		goto out;
-	error = tcp_output_connect(so, nam);
+#ifdef TCP_OFFLOAD
+	if (registered_toedevs > 0 &&
+	    (so->so_options & SO_NO_OFFLOAD) == 0 &&
+	    (error = tcp_offload_connect(so, nam)) == 0)
+		goto out;
+#endif
+	tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
+	error = tcp_output(tp);
 out:
 	TCPDEBUG2(PRU_CONNECT);
 	INP_WUNLOCK(inp);
@@ -492,10 +560,14 @@
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL"));
 	INP_WLOCK(inp);
-	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
-		error = EINVAL;
+	if (inp->inp_flags & INP_TIMEWAIT) {
+		error = EADDRINUSE;
 		goto out;
 	}
+	if (inp->inp_flags & INP_DROPPED) {
+		error = ECONNREFUSED;
+		goto out;
+	}
 	tp = intotcpcb(inp);
 	TCPDEBUG1();
 #ifdef INET
@@ -520,7 +592,13 @@
 			goto out;
 		if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0)
 			goto out;
-		error = tcp_output_connect(so, nam);
+#ifdef TCP_OFFLOAD
+		if (registered_toedevs > 0 &&
+		    (so->so_options & SO_NO_OFFLOAD) == 0 &&
+		    (error = tcp_offload_connect(so, nam)) == 0)
+			goto out;
+#endif
+		error = tcp_output(tp);
 		goto out;
 	}
 #endif
@@ -531,7 +609,14 @@
 		goto out;
 	if ((error = tcp6_connect(tp, nam, td)) != 0)
 		goto out;
-	error = tcp_output_connect(so, nam);
+#ifdef TCP_OFFLOAD
+	if (registered_toedevs > 0 &&
+	    (so->so_options & SO_NO_OFFLOAD) == 0 &&
+	    (error = tcp_offload_connect(so, nam)) == 0)
+		goto out;
+#endif
+	tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
+	error = tcp_output(tp);
 
 out:
 	TCPDEBUG2(PRU_CONNECT);
@@ -559,11 +644,13 @@
 	int error = 0;
 
 	TCPDEBUG0;
-	INP_INFO_WLOCK(&V_tcbinfo);
+	INP_INFO_RLOCK(&V_tcbinfo);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL"));
 	INP_WLOCK(inp);
-	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+	if (inp->inp_flags & INP_TIMEWAIT)
+		goto out;
+	if (inp->inp_flags & INP_DROPPED) {
 		error = ECONNRESET;
 		goto out;
 	}
@@ -573,7 +660,7 @@
 out:
 	TCPDEBUG2(PRU_DISCONNECT);
 	INP_WUNLOCK(inp);
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	INP_INFO_RUNLOCK(&V_tcbinfo);
 	return (error);
 }
 
@@ -581,13 +668,6 @@
 /*
  * Accept a connection.  Essentially all the work is done at higher levels;
  * just return the address of the peer, storing through addr.
- *
- * The rationale for acquiring the tcbinfo lock here is somewhat complicated,
- * and is described in detail in the commit log entry for r175612.  Acquiring
- * it delays an accept(2) racing with sonewconn(), which inserts the socket
- * before the inpcb address/port fields are initialized.  A better fix would
- * prevent the socket from being placed in the listen queue until all fields
- * are fully initialized.
  */
 static int
 tcp_usr_accept(struct socket *so, struct sockaddr **nam)
@@ -604,7 +684,6 @@
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL"));
-	INP_INFO_RLOCK(&V_tcbinfo);
 	INP_WLOCK(inp);
 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 		error = ECONNABORTED;
@@ -624,7 +703,6 @@
 out:
 	TCPDEBUG2(PRU_ACCEPT);
 	INP_WUNLOCK(inp);
-	INP_INFO_RUNLOCK(&V_tcbinfo);
 	if (error == 0)
 		*nam = in_sockaddr(port, &addr);
 	return error;
@@ -697,7 +775,7 @@
 	struct tcpcb *tp = NULL;
 
 	TCPDEBUG0;
-	INP_INFO_WLOCK(&V_tcbinfo);
+	INP_INFO_RLOCK(&V_tcbinfo);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("inp == NULL"));
 	INP_WLOCK(inp);
@@ -710,12 +788,12 @@
 	socantsendmore(so);
 	tcp_usrclosed(tp);
 	if (!(inp->inp_flags & INP_DROPPED))
-		error = tcp_output_disconnect(tp);
+		error = tcp_output(tp);
 
 out:
 	TCPDEBUG2(PRU_SHUTDOWN);
 	INP_WUNLOCK(inp);
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	INP_INFO_RUNLOCK(&V_tcbinfo);
 
 	return (error);
 }
@@ -740,7 +818,24 @@
 	}
 	tp = intotcpcb(inp);
 	TCPDEBUG1();
-	tcp_output_rcvd(tp);
+#ifdef TCP_RFC7413
+	/*
+	 * For passively-created TFO connections, don't attempt a window
+	 * update while still in SYN_RECEIVED as this may trigger an early
+	 * SYN|ACK.  It is preferable to have the SYN|ACK be sent along with
+	 * application response data, or failing that, when the DELACK timer
+	 * expires.
+	 */
+	if ((tp->t_flags & TF_FASTOPEN) &&
+	    (tp->t_state == TCPS_SYN_RECEIVED))
+		goto out;
+#endif
+#ifdef TCP_OFFLOAD
+	if (tp->t_flags & TF_TOE)
+		tcp_offload_rcvd(tp);
+	else
+#endif
+	tcp_output(tp);
 
 out:
 	TCPDEBUG2(PRU_RCVD);
@@ -772,7 +867,7 @@
 	 * this call.
 	 */
 	if (flags & PRUS_EOF)
-		INP_INFO_WLOCK(&V_tcbinfo);
+		INP_INFO_RLOCK(&V_tcbinfo);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
 	INP_WLOCK(inp);
@@ -829,7 +924,7 @@
 			 * Close the send side of the connection after
 			 * the data is sent.
 			 */
-			INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+			INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 			socantsendmore(so);
 			tcp_usrclosed(tp);
 		}
@@ -836,7 +931,7 @@
 		if (!(inp->inp_flags & INP_DROPPED)) {
 			if (flags & PRUS_MORETOCOME)
 				tp->t_flags |= TF_MORETOCOME;
-			error = tcp_output_send(tp);
+			error = tcp_output(tp);
 			if (flags & PRUS_MORETOCOME)
 				tp->t_flags &= ~TF_MORETOCOME;
 		}
@@ -885,7 +980,7 @@
 		}
 		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
 		tp->t_flags |= TF_FORCEDATA;
-		error = tcp_output_send(tp);
+		error = tcp_output(tp);
 		tp->t_flags &= ~TF_FORCEDATA;
 	}
 out:
@@ -893,7 +988,7 @@
 		  ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
 	INP_WUNLOCK(inp);
 	if (flags & PRUS_EOF)
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 	return (error);
 }
 
@@ -910,7 +1005,7 @@
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL"));
 
-	INP_INFO_WLOCK(&V_tcbinfo);
+	INP_INFO_RLOCK(&V_tcbinfo);
 	INP_WLOCK(inp);
 	KASSERT(inp->inp_socket != NULL,
 	    ("tcp_usr_abort: inp_socket == NULL"));
@@ -932,7 +1027,7 @@
 		inp->inp_flags |= INP_SOCKREF;
 	}
 	INP_WUNLOCK(inp);
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	INP_INFO_RUNLOCK(&V_tcbinfo);
 }
 
 /*
@@ -948,7 +1043,7 @@
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL"));
 
-	INP_INFO_WLOCK(&V_tcbinfo);
+	INP_INFO_RLOCK(&V_tcbinfo);
 	INP_WLOCK(inp);
 	KASSERT(inp->inp_socket != NULL,
 	    ("tcp_usr_close: inp_socket == NULL"));
@@ -971,7 +1066,7 @@
 		inp->inp_flags |= INP_SOCKREF;
 	}
 	INP_WUNLOCK(inp);
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	INP_INFO_RUNLOCK(&V_tcbinfo);
 }
 
 /*
@@ -1119,8 +1214,7 @@
 
 	soisconnecting(so);
 	TCPSTAT_INC(tcps_connattempt);
-	tp->t_state = TCPS_SYN_SENT;
-	tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
+	tcp_state_change(tp, TCPS_SYN_SENT);
 	tp->iss = tcp_new_isn(tp);
 	tcp_sendseqinit(tp);
 
@@ -1192,8 +1286,7 @@
 
 	soisconnecting(so);
 	TCPSTAT_INC(tcps_connattempt);
-	tp->t_state = TCPS_SYN_SENT;
-	tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
+	tcp_state_change(tp, TCPS_SYN_SENT);
 	tp->iss = tcp_new_isn(tp);
 	tcp_sendseqinit(tp);
 
@@ -1324,9 +1417,9 @@
 				tp->t_flags |= TF_SIGNATURE;
 			else
 				tp->t_flags &= ~TF_SIGNATURE;
-			INP_WUNLOCK(inp);
-			break;
+			goto unlock_and_done;
 #endif /* TCP_SIGNATURE */
+
 		case TCP_NODELAY:
 		case TCP_NOOPT:
 			INP_WUNLOCK(inp);
@@ -1352,6 +1445,13 @@
 				tp->t_flags |= opt;
 			else
 				tp->t_flags &= ~opt;
+unlock_and_done:
+#ifdef TCP_OFFLOAD
+			if (tp->t_flags & TF_TOE) {
+				tcp_offload_ctloutput(tp, sopt->sopt_dir,
+				    sopt->sopt_name);
+			}
+#endif
 			INP_WUNLOCK(inp);
 			break;
 
@@ -1370,8 +1470,7 @@
 				if (TCPS_HAVEESTABLISHED(tp->t_state))
 					error = tcp_output(tp);
 			}
-			INP_WUNLOCK(inp);
-			break;
+			goto unlock_and_done;
 
 		case TCP_MAXSEG:
 			INP_WUNLOCK(inp);
@@ -1386,8 +1485,7 @@
 				tp->t_maxseg = optval;
 			else
 				error = EINVAL;
-			INP_WUNLOCK(inp);
-			break;
+			goto unlock_and_done;
 
 		case TCP_INFO:
 			INP_WUNLOCK(inp);
@@ -1439,8 +1537,7 @@
 				}
 			}
 			CC_LIST_RUNLOCK();
-			INP_WUNLOCK(inp);
-			break;
+			goto unlock_and_done;
 
 		case TCP_KEEPIDLE:
 		case TCP_KEEPINTVL:
@@ -1484,8 +1581,7 @@
 					    TP_KEEPINIT(tp));
 				break;
 			}
-			INP_WUNLOCK(inp);
-			break;
+			goto unlock_and_done;
 
 		case TCP_KEEPCNT:
 			INP_WUNLOCK(inp);
@@ -1499,9 +1595,31 @@
 			    (TP_MAXIDLE(tp) > 0))
 				tcp_timer_activate(tp, TT_2MSL,
 				    TP_MAXIDLE(tp));
+			goto unlock_and_done;
+
+#ifdef TCP_RFC7413
+		case TCP_FASTOPEN:
 			INP_WUNLOCK(inp);
-			break;
+			if (!V_tcp_fastopen_enabled)
+				return (EPERM);
 
+			error = sooptcopyin(sopt, &optval, sizeof optval,
+			    sizeof optval);
+			if (error)
+				return (error);
+
+			INP_WLOCK_RECHECK(inp);
+			if (optval) {
+				tp->t_flags |= TF_FASTOPEN;
+				if ((tp->t_state == TCPS_LISTEN) &&
+				    (tp->t_tfo_pending == NULL))
+					tp->t_tfo_pending =
+					    tcp_fastopen_alloc_counter();
+			} else
+				tp->t_flags &= ~TF_FASTOPEN;
+			goto unlock_and_done;
+#endif
+
 		default:
 			INP_WUNLOCK(inp);
 			error = ENOPROTOOPT;
@@ -1551,6 +1669,34 @@
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, buf, TCP_CA_NAME_MAX);
 			break;
+		case TCP_KEEPIDLE:
+		case TCP_KEEPINTVL:
+		case TCP_KEEPINIT:
+		case TCP_KEEPCNT:
+			switch (sopt->sopt_name) {
+			case TCP_KEEPIDLE:
+				ui = tp->t_keepidle / hz;
+				break;
+			case TCP_KEEPINTVL:
+				ui = tp->t_keepintvl / hz;
+				break;
+			case TCP_KEEPINIT:
+				ui = tp->t_keepinit / hz;
+				break;
+			case TCP_KEEPCNT:
+				ui = tp->t_keepcnt;
+				break;
+			}
+			INP_WUNLOCK(inp);
+			error = sooptcopyout(sopt, &ui, sizeof(ui));
+			break;
+#ifdef TCP_RFC7413
+		case TCP_FASTOPEN:
+			optval = tp->t_flags & TF_FASTOPEN;
+			INP_WUNLOCK(inp);
+			error = sooptcopyout(sopt, &optval, sizeof optval);
+			break;
+#endif
 		default:
 			INP_WUNLOCK(inp);
 			error = ENOPROTOOPT;
@@ -1563,18 +1709,6 @@
 #undef INP_WLOCK_RECHECK
 
 /*
- * tcp_sendspace and tcp_recvspace are the default send and receive window
- * sizes, respectively.  These are obsolescent (this information should
- * be set by the route).
- */
-u_long	tcp_sendspace = 1024*32;
-SYSCTL_ULONG(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW,
-    &tcp_sendspace , 0, "Maximum outgoing TCP datagram size");
-u_long	tcp_recvspace = 1024*64;
-SYSCTL_ULONG(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
-    &tcp_recvspace , 0, "Maximum incoming TCP datagram size");
-
-/*
  * Attach TCP protocol to socket, allocating
  * internet protocol control block, tcp control block,
  * bufer space, and entering LISTEN state if to accept connections.
@@ -1587,16 +1721,16 @@
 	int error;
 
 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
-		error = soreserve(so, tcp_sendspace, tcp_recvspace);
+		error = soreserve(so, V_tcp_sendspace, V_tcp_recvspace);
 		if (error)
 			return (error);
 	}
 	so->so_rcv.sb_flags |= SB_AUTOSIZE;
 	so->so_snd.sb_flags |= SB_AUTOSIZE;
-	INP_INFO_WLOCK(&V_tcbinfo);
+	INP_INFO_RLOCK(&V_tcbinfo);
 	error = in_pcballoc(so, &V_tcbinfo);
 	if (error) {
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 		return (error);
 	}
 	inp = sotoinpcb(so);
@@ -1612,12 +1746,12 @@
 	if (tp == NULL) {
 		in_pcbdetach(inp);
 		in_pcbfree(inp);
-		INP_INFO_WUNLOCK(&V_tcbinfo);
+		INP_INFO_RUNLOCK(&V_tcbinfo);
 		return (ENOBUFS);
 	}
 	tp->t_state = TCPS_CLOSED;
 	INP_WUNLOCK(inp);
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	INP_INFO_RUNLOCK(&V_tcbinfo);
 	return (0);
 }
 
@@ -1635,7 +1769,7 @@
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so = inp->inp_socket;
 
-	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	/*
@@ -1655,7 +1789,7 @@
 		sbflush(&so->so_rcv);
 		tcp_usrclosed(tp);
 		if (!(inp->inp_flags & INP_DROPPED))
-			tcp_output_disconnect(tp);
+			tcp_output(tp);
 	}
 }
 
@@ -1673,15 +1807,17 @@
 tcp_usrclosed(struct tcpcb *tp)
 {
 
-	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	switch (tp->t_state) {
 	case TCPS_LISTEN:
-		tcp_offload_listen_close(tp);
+#ifdef TCP_OFFLOAD
+		tcp_offload_listen_stop(tp);
+#endif
+		tcp_state_change(tp, TCPS_CLOSED);
 		/* FALLTHROUGH */
 	case TCPS_CLOSED:
-		tp->t_state = TCPS_CLOSED;
 		tp = tcp_close(tp);
 		/*
 		 * tcp_close() should never return NULL here as the socket is
@@ -1697,11 +1833,11 @@
 		break;
 
 	case TCPS_ESTABLISHED:
-		tp->t_state = TCPS_FIN_WAIT_1;
+		tcp_state_change(tp, TCPS_FIN_WAIT_1);
 		break;
 
 	case TCPS_CLOSE_WAIT:
-		tp->t_state = TCPS_LAST_ACK;
+		tcp_state_change(tp, TCPS_LAST_ACK);
 		break;
 	}
 	if (tp->t_state >= TCPS_FIN_WAIT_2) {
@@ -1884,6 +2020,10 @@
 		db_printf("%sTF_ECN_PERMIT", comma ? ", " : "");
 		comma = 1;
 	}
+	if (t_flags & TF_FASTOPEN) {
+		db_printf("%sTF_FASTOPEN", comma ? ", " : "");
+		comma = 1;
+	}
 }
 
 static void

Modified: trunk/sys/netinet/tcp_var.h
===================================================================
--- trunk/sys/netinet/tcp_var.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_var.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp_var.h	8.4 (Berkeley) 5/24/95
- * $FreeBSD: stable/9/sys/netinet/tcp_var.h 235051 2012-05-05 07:55:50Z glebius $
+ * $FreeBSD: stable/10/sys/netinet/tcp_var.h 317375 2017-04-24 16:31:28Z smh $
  */
 
 #ifndef _NETINET_TCP_VAR_H_
@@ -74,7 +74,12 @@
 	tcp_seq		last_sack_ack;	/* Most recent/largest sacked ack */
 
 	int		ispare;		/* explicit pad for 64bit alignment */
-	uint64_t	_pad[2];	/* 1 sacked_bytes, 1 TBD */
+	int             sacked_bytes;	/*
+					 * Total sacked bytes reported by the
+					 * receiver via sack option
+					 */
+	uint32_t	_pad1[1];	/* TBD */
+	uint64_t	_pad[1];	/* TBD */
 };
 
 struct tcptemp {
@@ -195,7 +200,7 @@
 	int	t_rttlow;		/* smallest observerved RTT */
 	u_int32_t	rfbuf_ts;	/* recv buffer autoscaling timestamp */
 	int	rfbuf_cnt;		/* recv buffer autoscaling byte count */
-	struct toe_usrreqs *t_tu;	/* offload operations vector */
+	struct toedev	*tod;		/* toedev handling this connection */
 	int	t_sndrexmitpack;	/* retransmit packets sent */
 	int	t_rcvoopack;		/* out-of-order packets received */
 	void	*t_toe;			/* TOE pcb pointer */
@@ -209,9 +214,25 @@
 	u_int	t_keepintvl;		/* interval between keepalives */
 	u_int	t_keepcnt;		/* number of keepalives before close */
 
-	uint32_t t_ispare[8];		/* 5 UTO, 3 TBD */
-	void	*t_pspare2[4];		/* 4 TBD */
-	uint64_t _pad[6];		/* 6 TBD (1-2 CC/RTT?) */
+	u_int	t_tsomax;		/* TSO total burst length limit in bytes */
+	u_int	t_pmtud_saved_maxopd;	/* pre-blackhole MSS */
+	u_int	t_flags2;		/* More tcpcb flags storage */
+
+	uint32_t t_ispare[6];		/* 5 UTO, 1 TBD */
+#if defined(_KERNEL) && defined(TCP_RFC7413)
+	void	*t_pspare2[3];		/* 1 TCP_SIGNATURE, 2 TBD */
+	unsigned int *t_tfo_pending;	/* TCP Fast Open pending counter */
+#else
+	void	*t_pspare2[4];		/* 1 TCP_SIGNATURE, 3 TBD */
+#endif
+#if defined(_KERNEL) && defined(TCP_RFC7413)
+	uint64_t _pad[4];		/* 4 TBD (1-2 CC/RTT?) */
+	uint64_t t_tfo_cookie;		/* TCP Fast Open cookie */
+#else
+	uint64_t _pad[5];		/* 5 TBD (1-2 CC/RTT?) */
+#endif
+	uint32_t t_tsomaxsegcount;	/* TSO maximum segment count */
+	uint32_t t_tsomaxsegsize;	/* TSO maximum segment size in bytes */
 };
 
 /*
@@ -246,6 +267,7 @@
 #define	TF_ECN_SND_ECE	0x10000000	/* ECN ECE in queue */
 #define	TF_CONGRECOVERY	0x20000000	/* congestion recovery mode */
 #define	TF_WASCRECOVERY	0x40000000	/* was in congestion recovery */
+#define	TF_FASTOPEN	0x80000000	/* TCP Fast Open indication */
 
 #define	IN_FASTRECOVERY(t_flags)	(t_flags & TF_FASTRECOVERY)
 #define	ENTER_FASTRECOVERY(t_flags)	t_flags |= TF_FASTRECOVERY
@@ -283,6 +305,13 @@
 #endif /* TCP_SIGNATURE */
 
 /*
+ * Flags for PLPMTU handling, t_flags2
+ */
+#define	TF2_PLPMTU_BLACKHOLE	0x00000001 /* Possible PLPMTUD Black Hole. */
+#define	TF2_PLPMTU_PMTUD	0x00000002 /* Allowed to attempt PLPMTUD. */
+#define	TF2_PLPMTU_MAXSEGSNT	0x00000004 /* Last seg sent was full seg. */
+
+/*
  * Structure to hold TCP options that are only used during segment
  * processing (in tcp_input), but not held in the tcpcb.
  * It's basically used to reduce the number of parameters
@@ -298,14 +327,17 @@
 #define	TOF_TS		0x0010		/* timestamp */
 #define	TOF_SIGNATURE	0x0040		/* TCP-MD5 signature option (RFC2385) */
 #define	TOF_SACK	0x0080		/* Peer sent SACK option */
-#define	TOF_MAXOPT	0x0100
+#define	TOF_FASTOPEN	0x0100		/* TCP Fast Open (TFO) cookie */
+#define	TOF_MAXOPT	0x0200
 	u_int32_t	to_tsval;	/* new timestamp */
 	u_int32_t	to_tsecr;	/* reflected timestamp */
 	u_char		*to_sacks;	/* pointer to the first SACK blocks */
 	u_char		*to_signature;	/* pointer to the TCP-MD5 signature */
+	u_char		*to_tfo_cookie; /* pointer to the TFO cookie */
 	u_int16_t	to_mss;		/* maximum segment size */
 	u_int8_t	to_wscale;	/* window scaling */
 	u_int8_t	to_nsacks;	/* number of SACK blocks */
+	u_int8_t	to_tfo_len;	/* TFO cookie length */
 	u_int32_t	to_spare;	/* UTO */
 };
 
@@ -325,6 +357,17 @@
 	u_long	rmx_recvpipe;   /* inbound delay-bandwidth product */
 };
 
+/*
+ * Used by tcp_maxmtu() to communicate interface specific features
+ * and limits at the time of connection setup.
+ */
+struct tcp_ifcap {
+	int	ifcap;
+	u_int	tsomax;
+	u_int	tsomaxsegcount;
+	u_int	tsomaxsegsize;
+};
+
 #ifndef _NETINET_IN_PCB_H_
 struct in_conninfo;
 #endif /* _NETINET_IN_PCB_H_ */
@@ -343,6 +386,8 @@
 	u_int		t_starttime;
 	int		tw_time;
 	TAILQ_ENTRY(tcptw) tw_2msl;
+	void		*tw_pspare;	/* TCP_SIGNATURE */
+	u_int		*tw_spare;	/* TCP_SIGNATURE */
 };
 
 #define	intotcpcb(ip)	((struct tcpcb *)(ip)->inp_ppcb)
@@ -390,125 +435,129 @@
  * but that's inconvenient at the moment.
  */
 struct	tcpstat {
-	u_long	tcps_connattempt;	/* connections initiated */
-	u_long	tcps_accepts;		/* connections accepted */
-	u_long	tcps_connects;		/* connections established */
-	u_long	tcps_drops;		/* connections dropped */
-	u_long	tcps_conndrops;		/* embryonic connections dropped */
-	u_long	tcps_minmssdrops;	/* average minmss too low drops */
-	u_long	tcps_closed;		/* conn. closed (includes drops) */
-	u_long	tcps_segstimed;		/* segs where we tried to get rtt */
-	u_long	tcps_rttupdated;	/* times we succeeded */
-	u_long	tcps_delack;		/* delayed acks sent */
-	u_long	tcps_timeoutdrop;	/* conn. dropped in rxmt timeout */
-	u_long	tcps_rexmttimeo;	/* retransmit timeouts */
-	u_long	tcps_persisttimeo;	/* persist timeouts */
-	u_long	tcps_keeptimeo;		/* keepalive timeouts */
-	u_long	tcps_keepprobe;		/* keepalive probes sent */
-	u_long	tcps_keepdrops;		/* connections dropped in keepalive */
+	uint64_t tcps_connattempt;	/* connections initiated */
+	uint64_t tcps_accepts;		/* connections accepted */
+	uint64_t tcps_connects;		/* connections established */
+	uint64_t tcps_drops;		/* connections dropped */
+	uint64_t tcps_conndrops;	/* embryonic connections dropped */
+	uint64_t tcps_minmssdrops;	/* average minmss too low drops */
+	uint64_t tcps_closed;		/* conn. closed (includes drops) */
+	uint64_t tcps_segstimed;	/* segs where we tried to get rtt */
+	uint64_t tcps_rttupdated;	/* times we succeeded */
+	uint64_t tcps_delack;		/* delayed acks sent */
+	uint64_t tcps_timeoutdrop;	/* conn. dropped in rxmt timeout */
+	uint64_t tcps_rexmttimeo;	/* retransmit timeouts */
+	uint64_t tcps_persisttimeo;	/* persist timeouts */
+	uint64_t tcps_keeptimeo;	/* keepalive timeouts */
+	uint64_t tcps_keepprobe;	/* keepalive probes sent */
+	uint64_t tcps_keepdrops;	/* connections dropped in keepalive */
 
-	u_long	tcps_sndtotal;		/* total packets sent */
-	u_long	tcps_sndpack;		/* data packets sent */
-	u_long	tcps_sndbyte;		/* data bytes sent */
-	u_long	tcps_sndrexmitpack;	/* data packets retransmitted */
-	u_long	tcps_sndrexmitbyte;	/* data bytes retransmitted */
-	u_long	tcps_sndrexmitbad;	/* unnecessary packet retransmissions */
-	u_long	tcps_sndacks;		/* ack-only packets sent */
-	u_long	tcps_sndprobe;		/* window probes sent */
-	u_long	tcps_sndurg;		/* packets sent with URG only */
-	u_long	tcps_sndwinup;		/* window update-only packets sent */
-	u_long	tcps_sndctrl;		/* control (SYN|FIN|RST) packets sent */
+	uint64_t tcps_sndtotal;		/* total packets sent */
+	uint64_t tcps_sndpack;		/* data packets sent */
+	uint64_t tcps_sndbyte;		/* data bytes sent */
+	uint64_t tcps_sndrexmitpack;	/* data packets retransmitted */
+	uint64_t tcps_sndrexmitbyte;	/* data bytes retransmitted */
+	uint64_t tcps_sndrexmitbad;	/* unnecessary packet retransmissions */
+	uint64_t tcps_sndacks;		/* ack-only packets sent */
+	uint64_t tcps_sndprobe;		/* window probes sent */
+	uint64_t tcps_sndurg;		/* packets sent with URG only */
+	uint64_t tcps_sndwinup;		/* window update-only packets sent */
+	uint64_t tcps_sndctrl;		/* control (SYN|FIN|RST) packets sent */
 
-	u_long	tcps_rcvtotal;		/* total packets received */
-	u_long	tcps_rcvpack;		/* packets received in sequence */
-	u_long	tcps_rcvbyte;		/* bytes received in sequence */
-	u_long	tcps_rcvbadsum;		/* packets received with ccksum errs */
-	u_long	tcps_rcvbadoff;		/* packets received with bad offset */
-	u_long	tcps_rcvmemdrop;	/* packets dropped for lack of memory */
-	u_long	tcps_rcvshort;		/* packets received too short */
-	u_long	tcps_rcvduppack;	/* duplicate-only packets received */
-	u_long	tcps_rcvdupbyte;	/* duplicate-only bytes received */
-	u_long	tcps_rcvpartduppack;	/* packets with some duplicate data */
-	u_long	tcps_rcvpartdupbyte;	/* dup. bytes in part-dup. packets */
-	u_long	tcps_rcvoopack;		/* out-of-order packets received */
-	u_long	tcps_rcvoobyte;		/* out-of-order bytes received */
-	u_long	tcps_rcvpackafterwin;	/* packets with data after window */
-	u_long	tcps_rcvbyteafterwin;	/* bytes rcvd after window */
-	u_long	tcps_rcvafterclose;	/* packets rcvd after "close" */
-	u_long	tcps_rcvwinprobe;	/* rcvd window probe packets */
-	u_long	tcps_rcvdupack;		/* rcvd duplicate acks */
-	u_long	tcps_rcvacktoomuch;	/* rcvd acks for unsent data */
-	u_long	tcps_rcvackpack;	/* rcvd ack packets */
-	u_long	tcps_rcvackbyte;	/* bytes acked by rcvd acks */
-	u_long	tcps_rcvwinupd;		/* rcvd window update packets */
-	u_long	tcps_pawsdrop;		/* segments dropped due to PAWS */
-	u_long	tcps_predack;		/* times hdr predict ok for acks */
-	u_long	tcps_preddat;		/* times hdr predict ok for data pkts */
-	u_long	tcps_pcbcachemiss;
-	u_long	tcps_cachedrtt;		/* times cached RTT in route updated */
-	u_long	tcps_cachedrttvar;	/* times cached rttvar updated */
-	u_long	tcps_cachedssthresh;	/* times cached ssthresh updated */
-	u_long	tcps_usedrtt;		/* times RTT initialized from route */
-	u_long	tcps_usedrttvar;	/* times RTTVAR initialized from rt */
-	u_long	tcps_usedssthresh;	/* times ssthresh initialized from rt*/
-	u_long	tcps_persistdrop;	/* timeout in persist state */
-	u_long	tcps_badsyn;		/* bogus SYN, e.g. premature ACK */
-	u_long	tcps_mturesent;		/* resends due to MTU discovery */
-	u_long	tcps_listendrop;	/* listen queue overflows */
-	u_long	tcps_badrst;		/* ignored RSTs in the window */
+	uint64_t tcps_rcvtotal;		/* total packets received */
+	uint64_t tcps_rcvpack;		/* packets received in sequence */
+	uint64_t tcps_rcvbyte;		/* bytes received in sequence */
+	uint64_t tcps_rcvbadsum;	/* packets received with ccksum errs */
+	uint64_t tcps_rcvbadoff;	/* packets received with bad offset */
+	uint64_t tcps_rcvmemdrop;	/* packets dropped for lack of memory */
+	uint64_t tcps_rcvshort;		/* packets received too short */
+	uint64_t tcps_rcvduppack;	/* duplicate-only packets received */
+	uint64_t tcps_rcvdupbyte;	/* duplicate-only bytes received */
+	uint64_t tcps_rcvpartduppack;	/* packets with some duplicate data */
+	uint64_t tcps_rcvpartdupbyte;	/* dup. bytes in part-dup. packets */
+	uint64_t tcps_rcvoopack;	/* out-of-order packets received */
+	uint64_t tcps_rcvoobyte;	/* out-of-order bytes received */
+	uint64_t tcps_rcvpackafterwin;	/* packets with data after window */
+	uint64_t tcps_rcvbyteafterwin;	/* bytes rcvd after window */
+	uint64_t tcps_rcvafterclose;	/* packets rcvd after "close" */
+	uint64_t tcps_rcvwinprobe;	/* rcvd window probe packets */
+	uint64_t tcps_rcvdupack;	/* rcvd duplicate acks */
+	uint64_t tcps_rcvacktoomuch;	/* rcvd acks for unsent data */
+	uint64_t tcps_rcvackpack;	/* rcvd ack packets */
+	uint64_t tcps_rcvackbyte;	/* bytes acked by rcvd acks */
+	uint64_t tcps_rcvwinupd;	/* rcvd window update packets */
+	uint64_t tcps_pawsdrop;		/* segments dropped due to PAWS */
+	uint64_t tcps_predack;		/* times hdr predict ok for acks */
+	uint64_t tcps_preddat;		/* times hdr predict ok for data pkts */
+	uint64_t tcps_pcbcachemiss;
+	uint64_t tcps_cachedrtt;	/* times cached RTT in route updated */
+	uint64_t tcps_cachedrttvar;	/* times cached rttvar updated */
+	uint64_t tcps_cachedssthresh;	/* times cached ssthresh updated */
+	uint64_t tcps_usedrtt;		/* times RTT initialized from route */
+	uint64_t tcps_usedrttvar;	/* times RTTVAR initialized from rt */
+	uint64_t tcps_usedssthresh;	/* times ssthresh initialized from rt*/
+	uint64_t tcps_persistdrop;	/* timeout in persist state */
+	uint64_t tcps_badsyn;		/* bogus SYN, e.g. premature ACK */
+	uint64_t tcps_mturesent;	/* resends due to MTU discovery */
+	uint64_t tcps_listendrop;	/* listen queue overflows */
+	uint64_t tcps_badrst;		/* ignored RSTs in the window */
 
-	u_long	tcps_sc_added;		/* entry added to syncache */
-	u_long	tcps_sc_retransmitted;	/* syncache entry was retransmitted */
-	u_long	tcps_sc_dupsyn;		/* duplicate SYN packet */
-	u_long	tcps_sc_dropped;	/* could not reply to packet */
-	u_long	tcps_sc_completed;	/* successful extraction of entry */
-	u_long	tcps_sc_bucketoverflow;	/* syncache per-bucket limit hit */
-	u_long	tcps_sc_cacheoverflow;	/* syncache cache limit hit */
-	u_long	tcps_sc_reset;		/* RST removed entry from syncache */
-	u_long	tcps_sc_stale;		/* timed out or listen socket gone */
-	u_long	tcps_sc_aborted;	/* syncache entry aborted */
-	u_long	tcps_sc_badack;		/* removed due to bad ACK */
-	u_long	tcps_sc_unreach;	/* ICMP unreachable received */
-	u_long	tcps_sc_zonefail;	/* zalloc() failed */
-	u_long	tcps_sc_sendcookie;	/* SYN cookie sent */
-	u_long	tcps_sc_recvcookie;	/* SYN cookie received */
+	uint64_t tcps_sc_added;		/* entry added to syncache */
+	uint64_t tcps_sc_retransmitted;	/* syncache entry was retransmitted */
+	uint64_t tcps_sc_dupsyn;	/* duplicate SYN packet */
+	uint64_t tcps_sc_dropped;	/* could not reply to packet */
+	uint64_t tcps_sc_completed;	/* successful extraction of entry */
+	uint64_t tcps_sc_bucketoverflow;/* syncache per-bucket limit hit */
+	uint64_t tcps_sc_cacheoverflow;	/* syncache cache limit hit */
+	uint64_t tcps_sc_reset;		/* RST removed entry from syncache */
+	uint64_t tcps_sc_stale;		/* timed out or listen socket gone */
+	uint64_t tcps_sc_aborted;	/* syncache entry aborted */
+	uint64_t tcps_sc_badack;	/* removed due to bad ACK */
+	uint64_t tcps_sc_unreach;	/* ICMP unreachable received */
+	uint64_t tcps_sc_zonefail;	/* zalloc() failed */
+	uint64_t tcps_sc_sendcookie;	/* SYN cookie sent */
+	uint64_t tcps_sc_recvcookie;	/* SYN cookie received */
 
-	u_long	tcps_hc_added;		/* entry added to hostcache */
-	u_long	tcps_hc_bucketoverflow;	/* hostcache per bucket limit hit */
+	uint64_t tcps_hc_added;		/* entry added to hostcache */
+	uint64_t tcps_hc_bucketoverflow;/* hostcache per bucket limit hit */
 
-	u_long  tcps_finwait2_drops;    /* Drop FIN_WAIT_2 connection after time limit */
+	uint64_t tcps_finwait2_drops;    /* Drop FIN_WAIT_2 connection after time limit */
 
 	/* SACK related stats */
-	u_long	tcps_sack_recovery_episode; /* SACK recovery episodes */
-	u_long  tcps_sack_rexmits;	    /* SACK rexmit segments   */
-	u_long  tcps_sack_rexmit_bytes;	    /* SACK rexmit bytes      */
-	u_long  tcps_sack_rcv_blocks;	    /* SACK blocks (options) received */
-	u_long  tcps_sack_send_blocks;	    /* SACK blocks (options) sent     */
-	u_long  tcps_sack_sboverflow;	    /* times scoreboard overflowed */
+	uint64_t tcps_sack_recovery_episode; /* SACK recovery episodes */
+	uint64_t tcps_sack_rexmits;	    /* SACK rexmit segments   */
+	uint64_t tcps_sack_rexmit_bytes;    /* SACK rexmit bytes      */
+	uint64_t tcps_sack_rcv_blocks;	    /* SACK blocks (options) received */
+	uint64_t tcps_sack_send_blocks;	    /* SACK blocks (options) sent     */
+	uint64_t tcps_sack_sboverflow;	    /* times scoreboard overflowed */
 	
 	/* ECN related stats */
-	u_long	tcps_ecn_ce;		/* ECN Congestion Experienced */
-	u_long	tcps_ecn_ect0;		/* ECN Capable Transport */
-	u_long	tcps_ecn_ect1;		/* ECN Capable Transport */
-	u_long	tcps_ecn_shs;		/* ECN successful handshakes */
-	u_long	tcps_ecn_rcwnd;		/* # times ECN reduced the cwnd */
+	uint64_t tcps_ecn_ce;		/* ECN Congestion Experienced */
+	uint64_t tcps_ecn_ect0;		/* ECN Capable Transport */
+	uint64_t tcps_ecn_ect1;		/* ECN Capable Transport */
+	uint64_t tcps_ecn_shs;		/* ECN successful handshakes */
+	uint64_t tcps_ecn_rcwnd;	/* # times ECN reduced the cwnd */
 
 	/* TCP_SIGNATURE related stats */
-	u_long	tcps_sig_rcvgoodsig;	/* Total matching signature received */
-	u_long	tcps_sig_rcvbadsig;	/* Total bad signature received */
-	u_long	tcps_sig_err_buildsig;	/* Mismatching signature received */
-	u_long	tcps_sig_err_sigopt;	/* No signature expected by socket */
-	u_long	tcps_sig_err_nosigopt;	/* No signature provided by segment */
+	uint64_t tcps_sig_rcvgoodsig;	/* Total matching signature received */
+	uint64_t tcps_sig_rcvbadsig;	/* Total bad signature received */
+	uint64_t tcps_sig_err_buildsig;	/* Mismatching signature received */
+	uint64_t tcps_sig_err_sigopt;	/* No signature expected by socket */
+	uint64_t tcps_sig_err_nosigopt;	/* No signature provided by segment */
 
-	u_long	_pad[12];		/* 6 UTO, 6 TBD */
+	uint64_t _pad[12];		/* 6 UTO, 6 TBD */
 };
 
 #ifdef _KERNEL
+#include <sys/counter.h>
+
+VNET_PCPUSTAT_DECLARE(struct tcpstat, tcpstat);	/* tcp statistics */
 /*
  * In-kernel consumers can use these accessor macros directly to update
  * stats.
  */
-#define	TCPSTAT_ADD(name, val)	V_tcpstat.name += (val)
+#define	TCPSTAT_ADD(name, val)	\
+    VNET_PCPUSTAT_ADD(struct tcpstat, tcpstat, name, (val))
 #define	TCPSTAT_INC(name)	TCPSTAT_ADD(name, 1)
 
 /*
@@ -516,7 +565,7 @@
  */
 void	kmod_tcpstat_inc(int statnum);
 #define	KMOD_TCPSTAT_INC(name)						\
-	kmod_tcpstat_inc(offsetof(struct tcpstat, name) / sizeof(u_long))
+    kmod_tcpstat_inc(offsetof(struct tcpstat, name) / sizeof(uint64_t))
 
 /*
  * TCP specific helper hook point identifiers.
@@ -579,24 +628,6 @@
 #define	TCPCTL_MAXID		16
 #define TCPCTL_FINWAIT2_TIMEOUT        17
 
-#define TCPCTL_NAMES { \
-	{ 0, 0 }, \
-	{ "rfc1323", CTLTYPE_INT }, \
-	{ "mssdflt", CTLTYPE_INT }, \
-	{ "stats", CTLTYPE_STRUCT }, \
-	{ "rttdflt", CTLTYPE_INT }, \
-	{ "keepidle", CTLTYPE_INT }, \
-	{ "keepintvl", CTLTYPE_INT }, \
-	{ "sendspace", CTLTYPE_INT }, \
-	{ "recvspace", CTLTYPE_INT }, \
-	{ "keepinit", CTLTYPE_INT }, \
-	{ "pcblist", CTLTYPE_STRUCT }, \
-	{ "delacktime", CTLTYPE_INT }, \
-	{ "v6mssdflt", CTLTYPE_INT }, \
-	{ "maxid", CTLTYPE_INT }, \
-}
-
-
 #ifdef _KERNEL
 #ifdef SYSCTL_DECL
 SYSCTL_DECL(_net_inet_tcp);
@@ -606,27 +637,27 @@
 
 VNET_DECLARE(struct inpcbhead, tcb);		/* queue of active tcpcb's */
 VNET_DECLARE(struct inpcbinfo, tcbinfo);
-VNET_DECLARE(struct tcpstat, tcpstat);		/* tcp statistics */
 extern	int tcp_log_in_vain;
 VNET_DECLARE(int, tcp_mssdflt);	/* XXX */
 VNET_DECLARE(int, tcp_minmss);
 VNET_DECLARE(int, tcp_delack_enabled);
 VNET_DECLARE(int, tcp_do_rfc3390);
+VNET_DECLARE(int, tcp_do_initcwnd10);
+VNET_DECLARE(int, tcp_sendspace);
+VNET_DECLARE(int, tcp_recvspace);
 VNET_DECLARE(int, path_mtu_discovery);
-VNET_DECLARE(int, ss_fltsz);
-VNET_DECLARE(int, ss_fltsz_local);
 VNET_DECLARE(int, tcp_do_rfc3465);
 VNET_DECLARE(int, tcp_abc_l_var);
 #define	V_tcb			VNET(tcb)
 #define	V_tcbinfo		VNET(tcbinfo)
-#define	V_tcpstat		VNET(tcpstat)
 #define	V_tcp_mssdflt		VNET(tcp_mssdflt)
 #define	V_tcp_minmss		VNET(tcp_minmss)
 #define	V_tcp_delack_enabled	VNET(tcp_delack_enabled)
 #define	V_tcp_do_rfc3390	VNET(tcp_do_rfc3390)
+#define	V_tcp_do_initcwnd10	VNET(tcp_do_initcwnd10)
+#define	V_tcp_sendspace		VNET(tcp_sendspace)
+#define	V_tcp_recvspace		VNET(tcp_recvspace)
 #define	V_path_mtu_discovery	VNET(path_mtu_discovery)
-#define	V_ss_fltsz		VNET(ss_fltsz)
-#define	V_ss_fltsz_local	VNET(ss_fltsz_local)
 #define	V_tcp_do_rfc3465	VNET(tcp_do_rfc3465)
 #define	V_tcp_abc_l_var		VNET(tcp_abc_l_var)
 
@@ -643,6 +674,9 @@
 VNET_DECLARE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST + 1]);
 #define	V_tcp_hhh		VNET(tcp_hhh)
 
+VNET_DECLARE(int, tcp_do_rfc6675_pipe);
+#define V_tcp_do_rfc6675_pipe	VNET(tcp_do_rfc6675_pipe)
+
 int	 tcp_addoptions(struct tcpopt *, u_char *);
 int	 tcp_ccalgounload(struct cc_algo *unload_algo);
 struct tcpcb *
@@ -652,7 +686,7 @@
 #if 0
 int	 tcp_twrecycleable(struct tcptw *tw);
 #endif
-void	 tcp_twclose(struct tcptw *_tw, int _reuse);
+void	 tcp_twclose(struct tcptw *, int);
 void	 tcp_ctlinput(int, struct sockaddr *, void *);
 int	 tcp_ctloutput(struct socket *, struct sockopt *);
 struct tcpcb *
@@ -668,16 +702,15 @@
 char	*tcp_log_vain(struct in_conninfo *, struct tcphdr *, void *,
 	    const void *);
 int	 tcp_reass(struct tcpcb *, struct tcphdr *, int *, struct mbuf *);
-void	 tcp_reass_init(void);
+void	 tcp_reass_global_init(void);
 void	 tcp_reass_flush(struct tcpcb *);
-#ifdef VIMAGE
-void	 tcp_reass_destroy(void);
-#endif
 void	 tcp_input(struct mbuf *, int);
-u_long	 tcp_maxmtu(struct in_conninfo *, int *);
-u_long	 tcp_maxmtu6(struct in_conninfo *, int *);
+int	 tcp_autorcvbuf(struct mbuf *, struct tcphdr *, struct socket *,
+	    struct tcpcb *, int);
+u_long	 tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *);
+u_long	 tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *);
 void	 tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *,
-	    int *);
+	    struct tcp_ifcap *);
 void	 tcp_mss(struct tcpcb *, int);
 int	 tcp_mssopt(struct in_conninfo *);
 struct inpcb *
@@ -687,6 +720,7 @@
 struct tcpcb *
 	 tcp_newtcpcb(struct inpcb *);
 int	 tcp_output(struct tcpcb *);
+void	 tcp_state_change(struct tcpcb *, int);
 void	 tcp_respond(struct tcpcb *, void *,
 	    struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int);
 void	 tcp_tw_init(void);
@@ -696,7 +730,6 @@
 void	 tcp_tw_zone_change(void);
 int	 tcp_twcheck(struct inpcb *, struct tcpopt *, struct tcphdr *,
 	    struct mbuf *, int);
-int	 tcp_twrespond(struct tcptw *, int);
 void	 tcp_setpersist(struct tcpcb *);
 #ifdef TCP_SIGNATURE
 int	 tcp_signature_compute(struct mbuf *, int, int, int, u_char *, u_int);
@@ -707,8 +740,9 @@
 struct tcptemp *
 	 tcpip_maketemplate(struct inpcb *);
 void	 tcpip_fillheaders(struct inpcb *, void *, void *);
-void	 tcp_timer_activate(struct tcpcb *, int, u_int);
-int	 tcp_timer_active(struct tcpcb *, int);
+void	 tcp_timer_activate(struct tcpcb *, uint32_t, u_int);
+int	 tcp_timer_active(struct tcpcb *, uint32_t);
+void	 tcp_timer_stop(struct tcpcb *, uint32_t);
 void	 tcp_trace(short, short, struct tcpcb *, void *, struct tcphdr *, int);
 /*
  * All tcp_hc_* functions are IPv4 and IPv6 (via in_conninfo)
@@ -723,11 +757,9 @@
 void	 tcp_hc_update(struct in_conninfo *, struct hc_metrics_lite *);
 
 extern	struct pr_usrreqs tcp_usrreqs;
-extern	u_long tcp_sendspace;
-extern	u_long tcp_recvspace;
 tcp_seq tcp_new_isn(struct tcpcb *);
 
-void	 tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
+int	 tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
 void	 tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend);
 void	 tcp_clean_sackreport(struct tcpcb *tp);
 void	 tcp_sack_adjust(struct tcpcb *tp);
@@ -735,10 +767,31 @@
 void	 tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
 void	 tcp_free_sackholes(struct tcpcb *tp);
 int	 tcp_newreno(struct tcpcb *, struct tcphdr *);
-u_long	 tcp_seq_subtract(u_long, u_long );
+int	 tcp_compute_pipe(struct tcpcb *);
 
 void	cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type);
 
+static inline void
+tcp_fields_to_host(struct tcphdr *th)
+{
+
+	th->th_seq = ntohl(th->th_seq);
+	th->th_ack = ntohl(th->th_ack);
+	th->th_win = ntohs(th->th_win);
+	th->th_urp = ntohs(th->th_urp);
+}
+
+#ifdef TCP_SIGNATURE
+static inline void
+tcp_fields_to_net(struct tcphdr *th)
+{
+
+	th->th_seq = htonl(th->th_seq);
+	th->th_ack = htonl(th->th_ack);
+	th->th_win = htons(th->th_win);
+	th->th_urp = htons(th->th_urp);
+}
+#endif
 #endif /* _KERNEL */
 
 #endif /* _NETINET_TCP_VAR_H_ */

Modified: trunk/sys/netinet/tcpip.h
===================================================================
--- trunk/sys/netinet/tcpip.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcpip.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcpip.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet/tcpip.h 139823 2005-01-07 01:45:51Z imp $
+ * $FreeBSD: stable/10/sys/netinet/tcpip.h 139823 2005-01-07 01:45:51Z imp $
  */
 
 #ifndef _NETINET_TCPIP_H_

Added: trunk/sys/netinet/toecore.c
===================================================================
--- trunk/sys/netinet/toecore.c	                        (rev 0)
+++ trunk/sys/netinet/toecore.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -0,0 +1,659 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2012 Chelsio Communications, Inc.
+ * All rights reserved.
+ * Written by: Navdeep Parhar <np at FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/netinet/toecore.c 329982 2018-02-25 11:29:55Z hselasky $");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/types.h>
+#include <sys/sockopt.h>
+#include <sys/sysctl.h>
+#include <sys/socket.h>
+
+#if defined(KLD_MODULE) || defined(INET) || defined(INET6)
+
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/if_vlan_var.h>
+#include <net/if_llatbl.h>
+#include <net/route.h>
+
+#include <netinet/if_ether.h>
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/in6_pcb.h>
+#include <netinet6/nd6.h>
+#define TCPSTATES
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp_syncache.h>
+#include <netinet/tcp_offload.h>
+#include <netinet/toecore.h>
+
+static struct mtx toedev_lock;
+static TAILQ_HEAD(, toedev) toedev_list;
+static eventhandler_tag listen_start_eh;
+static eventhandler_tag listen_stop_eh;
+static eventhandler_tag lle_event_eh;
+static eventhandler_tag route_redirect_eh;
+
+static int
+toedev_connect(struct toedev *tod __unused, struct socket *so __unused,
+    struct rtentry *rt __unused, struct sockaddr *nam __unused)
+{
+
+	return (ENOTSUP);
+}
+
+static int
+toedev_listen_start(struct toedev *tod __unused, struct tcpcb *tp __unused)
+{
+
+	return (ENOTSUP);
+}
+
+static int
+toedev_listen_stop(struct toedev *tod __unused, struct tcpcb *tp __unused)
+{
+
+	return (ENOTSUP);
+}
+
+static void
+toedev_input(struct toedev *tod __unused, struct tcpcb *tp __unused,
+    struct mbuf *m)
+{
+
+	m_freem(m);
+	return;
+}
+
+static void
+toedev_rcvd(struct toedev *tod __unused, struct tcpcb *tp __unused)
+{
+
+	return;
+}
+
+static int
+toedev_output(struct toedev *tod __unused, struct tcpcb *tp __unused)
+{
+
+	return (ENOTSUP);
+}
+
+static void
+toedev_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp __unused)
+{
+
+	return;
+}
+
+static void
+toedev_l2_update(struct toedev *tod __unused, struct ifnet *ifp __unused,
+    struct sockaddr *sa __unused, uint8_t *lladdr __unused,
+    uint16_t vtag __unused)
+{
+
+	return;
+}
+
+static void
+toedev_route_redirect(struct toedev *tod __unused, struct ifnet *ifp __unused,
+    struct rtentry *rt0 __unused, struct rtentry *rt1 __unused)
+{
+
+	return;
+}
+
+static void
+toedev_syncache_added(struct toedev *tod __unused, void *ctx __unused)
+{
+
+	return;
+}
+
+static void
+toedev_syncache_removed(struct toedev *tod __unused, void *ctx __unused)
+{
+
+	return;
+}
+
+static int
+toedev_syncache_respond(struct toedev *tod __unused, void *ctx __unused,
+    struct mbuf *m)
+{
+
+	m_freem(m);
+	return (0);
+}
+
+static void
+toedev_offload_socket(struct toedev *tod __unused, void *ctx __unused,
+    struct socket *so __unused)
+{
+
+	return;
+}
+
+static void
+toedev_ctloutput(struct toedev *tod __unused, struct tcpcb *tp __unused,
+    int sopt_dir __unused, int sopt_name __unused)
+{
+
+	return;
+}
+
+/*
+ * Inform one or more TOE devices about a listening socket.
+ */
+static void
+toe_listen_start(struct inpcb *inp, void *arg)
+{
+	struct toedev *t, *tod;
+	struct tcpcb *tp;
+
+	INP_WLOCK_ASSERT(inp);
+	KASSERT(inp->inp_pcbinfo == &V_tcbinfo,
+	    ("%s: inp is not a TCP inp", __func__));
+
+	if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))
+		return;
+
+	tp = intotcpcb(inp);
+	if (tp->t_state != TCPS_LISTEN)
+		return;
+
+	t = arg;
+	mtx_lock(&toedev_lock);
+	TAILQ_FOREACH(tod, &toedev_list, link) {
+		if (t == NULL || t == tod)
+			tod->tod_listen_start(tod, tp);
+	}
+	mtx_unlock(&toedev_lock);
+}
+
+static void
+toe_listen_start_event(void *arg __unused, struct tcpcb *tp)
+{
+	struct inpcb *inp = tp->t_inpcb;
+
+	INP_WLOCK_ASSERT(inp);
+	KASSERT(tp->t_state == TCPS_LISTEN,
+	    ("%s: t_state %s", __func__, tcpstates[tp->t_state]));
+
+	toe_listen_start(inp, NULL);
+}
+
+static void
+toe_listen_stop_event(void *arg __unused, struct tcpcb *tp)
+{
+	struct toedev *tod;
+#ifdef INVARIANTS
+	struct inpcb *inp = tp->t_inpcb;
+#endif
+
+	INP_WLOCK_ASSERT(inp);
+	KASSERT(tp->t_state == TCPS_LISTEN,
+	    ("%s: t_state %s", __func__, tcpstates[tp->t_state]));
+
+	mtx_lock(&toedev_lock);
+	TAILQ_FOREACH(tod, &toedev_list, link)
+	    tod->tod_listen_stop(tod, tp);
+	mtx_unlock(&toedev_lock);
+}
+
+/*
+ * Fill up a freshly allocated toedev struct with reasonable defaults.
+ */
+void
+init_toedev(struct toedev *tod)
+{
+
+	tod->tod_softc = NULL;
+
+	/*
+	 * Provide no-op defaults so that the kernel can call any toedev
+	 * function without having to check whether the TOE driver supplied one
+	 * or not.
+	 */
+	tod->tod_connect = toedev_connect;
+	tod->tod_listen_start = toedev_listen_start;
+	tod->tod_listen_stop = toedev_listen_stop;
+	tod->tod_input = toedev_input;
+	tod->tod_rcvd = toedev_rcvd;
+	tod->tod_output = toedev_output;
+	tod->tod_send_rst = toedev_output;
+	tod->tod_send_fin = toedev_output;
+	tod->tod_pcb_detach = toedev_pcb_detach;
+	tod->tod_l2_update = toedev_l2_update;
+	tod->tod_route_redirect = toedev_route_redirect;
+	tod->tod_syncache_added = toedev_syncache_added;
+	tod->tod_syncache_removed = toedev_syncache_removed;
+	tod->tod_syncache_respond = toedev_syncache_respond;
+	tod->tod_offload_socket = toedev_offload_socket;
+	tod->tod_ctloutput = toedev_ctloutput;
+}
+
+/*
+ * Register an active TOE device with the system.  This allows it to receive
+ * notifications from the kernel.
+ */
+int
+register_toedev(struct toedev *tod)
+{
+	struct toedev *t;
+
+	mtx_lock(&toedev_lock);
+	TAILQ_FOREACH(t, &toedev_list, link) {
+		if (t == tod) {
+			mtx_unlock(&toedev_lock);
+			return (EEXIST);
+		}
+	}
+
+	TAILQ_INSERT_TAIL(&toedev_list, tod, link);
+	registered_toedevs++;
+	mtx_unlock(&toedev_lock);
+
+	inp_apply_all(toe_listen_start, tod);
+
+	return (0);
+}
+
+/*
+ * Remove the TOE device from the global list of active TOE devices.  It is the
+ * caller's responsibility to ensure that the TOE device is quiesced prior to
+ * this call.
+ */
+int
+unregister_toedev(struct toedev *tod)
+{
+	struct toedev *t, *t2;
+	int rc = ENODEV;
+
+	mtx_lock(&toedev_lock);
+	TAILQ_FOREACH_SAFE(t, &toedev_list, link, t2) {
+		if (t == tod) {
+			TAILQ_REMOVE(&toedev_list, tod, link);
+			registered_toedevs--;
+			rc = 0;
+			break;
+		}
+	}
+	KASSERT(registered_toedevs >= 0,
+	    ("%s: registered_toedevs (%d) < 0", __func__, registered_toedevs));
+	mtx_unlock(&toedev_lock);
+	return (rc);
+}
+
+void
+toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
+    struct inpcb *inp, void *tod, void *todctx)
+{
+	struct socket *lso = inp->inp_socket;
+
+	INP_WLOCK_ASSERT(inp);
+
+	syncache_add(inc, to, th, inp, &lso, NULL, tod, todctx);
+}
+
+int
+toe_syncache_expand(struct in_conninfo *inc, struct tcpopt *to,
+    struct tcphdr *th, struct socket **lsop)
+{
+
+	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+
+	return (syncache_expand(inc, to, th, lsop, NULL));
+}
+
+/*
+ * General purpose check to see if a 4-tuple is in use by the kernel.  If a TCP
+ * header (presumably for an incoming SYN) is also provided, an existing 4-tuple
+ * in TIME_WAIT may be assassinated freeing it up for re-use.
+ *
+ * Note that the TCP header must have been run through tcp_fields_to_host() or
+ * equivalent.
+ */
+int
+toe_4tuple_check(struct in_conninfo *inc, struct tcphdr *th, struct ifnet *ifp)
+{
+	struct inpcb *inp;
+
+	if (inc->inc_flags & INC_ISIPV6) {
+#if defined(KLD_MODULE) || defined(INET6)
+		inp = in6_pcblookup(&V_tcbinfo, &inc->inc6_faddr,
+		    inc->inc_fport, &inc->inc6_laddr, inc->inc_lport,
+		    INPLOOKUP_WLOCKPCB, ifp);
+#else
+		inp = NULL;
+#endif
+	} else {
+#if defined(KLD_MODULE) || defined(INET)
+		inp = in_pcblookup(&V_tcbinfo, inc->inc_faddr, inc->inc_fport,
+		    inc->inc_laddr, inc->inc_lport, INPLOOKUP_WLOCKPCB, ifp);
+#else
+		inp = NULL;
+#endif
+	}
+	if (inp != NULL) {
+		INP_WLOCK_ASSERT(inp);
+
+		if ((inp->inp_flags & INP_TIMEWAIT) && th != NULL) {
+
+			INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* for twcheck */
+			if (!tcp_twcheck(inp, NULL, th, NULL, 0))
+				return (EADDRINUSE);
+		} else {
+			INP_WUNLOCK(inp);
+			return (EADDRINUSE);
+		}
+	}
+
+	return (0);
+}
+
+static void
+toe_lle_event(void *arg __unused, struct llentry *lle, int evt)
+{
+	struct toedev *tod;
+	struct ifnet *ifp;
+	struct sockaddr *sa;
+	uint8_t *lladdr;
+	uint16_t vtag;
+
+	LLE_WLOCK_ASSERT(lle);
+
+	ifp = lle->lle_tbl->llt_ifp;
+	sa = L3_ADDR(lle);
+
+	KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6,
+	    ("%s: lle_event %d for lle %p but sa %p !INET && !INET6",
+	    __func__, evt, lle, sa));
+
+	/*
+	 * Not interested if the interface's TOE capability is not enabled.
+	 */
+	if ((sa->sa_family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4)) ||
+	    (sa->sa_family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6)))
+		return;
+
+	tod = TOEDEV(ifp);
+	if (tod == NULL)
+		return;
+
+	vtag = 0xfff;
+	if (evt != LLENTRY_RESOLVED) {
+
+		/*
+		 * LLENTRY_TIMEDOUT, LLENTRY_DELETED, LLENTRY_EXPIRED all mean
+		 * this entry is going to be deleted.
+		 */
+
+		lladdr = NULL;
+	} else {
+
+		KASSERT(lle->la_flags & LLE_VALID,
+		    ("%s: %p resolved but not valid?", __func__, lle));
+
+		lladdr = (uint8_t *)&lle->ll_addr;
+#ifdef VLAN_TAG
+		VLAN_TAG(ifp, &vtag);
+#endif
+	}
+
+	tod->tod_l2_update(tod, ifp, sa, lladdr, vtag);
+}
+
+/*
+ * XXX: implement.
+ */
+static void
+toe_route_redirect_event(void *arg __unused, struct rtentry *rt0,
+    struct rtentry *rt1, struct sockaddr *sa)
+{
+
+	return;
+}
+
+#ifdef INET6
+/*
+ * XXX: no checks to verify that sa is really a neighbor because we assume it is
+ * the result of a route lookup and is on-link on the given ifp.
+ */
+static int
+toe_nd6_resolve(struct ifnet *ifp, struct sockaddr *sa, uint8_t *lladdr)
+{
+	struct llentry *lle;
+	struct sockaddr_in6 *sin6 = (void *)sa;
+	int rc, flags = 0;
+
+restart:
+	IF_AFDATA_RLOCK(ifp);
+	lle = lla_lookup(LLTABLE6(ifp), flags, sa);
+	IF_AFDATA_RUNLOCK(ifp);
+	if (lle == NULL) {
+		IF_AFDATA_LOCK(ifp);
+		lle = nd6_lookup(&sin6->sin6_addr, ND6_CREATE | ND6_EXCLUSIVE,
+		    ifp);
+		IF_AFDATA_UNLOCK(ifp);
+		if (lle == NULL)
+			return (ENOMEM); /* Couldn't create entry in cache. */
+		lle->ln_state = ND6_LLINFO_INCOMPLETE;
+		nd6_llinfo_settimer_locked(lle,
+		    (long)ND_IFINFO(ifp)->retrans * hz / 1000);
+		LLE_WUNLOCK(lle);
+
+		nd6_ns_output(ifp, NULL, &sin6->sin6_addr, NULL, 0);
+
+		return (EWOULDBLOCK);
+	}
+
+	if (lle->ln_state == ND6_LLINFO_STALE) {
+		if ((flags & LLE_EXCLUSIVE) == 0) {
+			LLE_RUNLOCK(lle);
+			flags |= LLE_EXCLUSIVE;
+			goto restart;
+		}
+
+		LLE_WLOCK_ASSERT(lle);
+
+		lle->la_asked = 0;
+		lle->ln_state = ND6_LLINFO_DELAY;
+		nd6_llinfo_settimer_locked(lle, (long)V_nd6_delay * hz);
+	}
+
+	if (lle->la_flags & LLE_VALID) {
+		memcpy(lladdr, &lle->ll_addr, ifp->if_addrlen);
+		rc = 0;
+	} else
+		rc = EWOULDBLOCK;
+
+	if (flags & LLE_EXCLUSIVE)
+		LLE_WUNLOCK(lle);
+	else
+		LLE_RUNLOCK(lle);
+
+	return (rc);
+}
+#endif
+
+/*
+ * Returns 0 or EWOULDBLOCK on success (any other value is an error).  0 means
+ * lladdr and vtag are valid on return, EWOULDBLOCK means the TOE driver's
+ * tod_l2_update will be called later, when the entry is resolved or times out.
+ */
+int
+toe_l2_resolve(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
+    uint8_t *lladdr, uint16_t *vtag)
+{
+#ifdef INET
+	struct llentry *lle;
+#endif
+	int rc;
+
+	switch (sa->sa_family) {
+#ifdef INET
+	case AF_INET:
+		rc = arpresolve(ifp, NULL, NULL, sa, lladdr, &lle);
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		rc = toe_nd6_resolve(ifp, sa, lladdr);
+		break;
+#endif
+	default:
+		return (EPROTONOSUPPORT);
+	}
+
+	if (rc == 0) {
+#ifdef VLAN_TAG
+		if (VLAN_TAG(ifp, vtag) != 0)
+#endif
+			*vtag = 0xfff;
+	}
+
+	return (rc);
+}
+
+void
+toe_connect_failed(struct toedev *tod, struct inpcb *inp, int err)
+{
+
+	INP_WLOCK_ASSERT(inp);
+
+	if (!(inp->inp_flags & INP_DROPPED)) {
+		struct tcpcb *tp = intotcpcb(inp);
+
+		KASSERT(tp->t_flags & TF_TOE,
+		    ("%s: tp %p not offloaded.", __func__, tp));
+
+		if (err == EAGAIN) {
+
+			/*
+			 * Temporary failure during offload, take this PCB back.
+			 * Detach from the TOE driver and do the rest of what
+			 * TCP's pru_connect would have done if the connection
+			 * wasn't offloaded.
+			 */
+
+			tod->tod_pcb_detach(tod, tp);
+			KASSERT(!(tp->t_flags & TF_TOE),
+			    ("%s: tp %p still offloaded.", __func__, tp));
+			tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
+			(void) tcp_output(tp);
+		} else {
+
+			INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+			tp = tcp_drop(tp, err);
+			if (tp == NULL)
+				INP_WLOCK(inp);	/* re-acquire */
+		}
+	}
+	INP_WLOCK_ASSERT(inp);
+}
+
+static int
+toecore_load(void)
+{
+
+	mtx_init(&toedev_lock, "toedev lock", NULL, MTX_DEF);
+	TAILQ_INIT(&toedev_list);
+
+	listen_start_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_start,
+	    toe_listen_start_event, NULL, EVENTHANDLER_PRI_ANY);
+	listen_stop_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_stop,
+	    toe_listen_stop_event, NULL, EVENTHANDLER_PRI_ANY);
+	lle_event_eh = EVENTHANDLER_REGISTER(lle_event, toe_lle_event, NULL,
+	    EVENTHANDLER_PRI_ANY);
+	route_redirect_eh = EVENTHANDLER_REGISTER(route_redirect_event,
+	    toe_route_redirect_event, NULL, EVENTHANDLER_PRI_ANY);
+
+	return (0);
+}
+
+static int
+toecore_unload(void)
+{
+
+	mtx_lock(&toedev_lock);
+	if (!TAILQ_EMPTY(&toedev_list)) {
+		mtx_unlock(&toedev_lock);
+		return (EBUSY);
+	}
+
+	EVENTHANDLER_DEREGISTER(tcp_offload_listen_start, listen_start_eh);
+	EVENTHANDLER_DEREGISTER(tcp_offload_listen_stop, listen_stop_eh);
+	EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh);
+	EVENTHANDLER_DEREGISTER(route_redirect_event, route_redirect_eh);
+
+	mtx_unlock(&toedev_lock);
+	mtx_destroy(&toedev_lock);
+
+	return (0);
+}
+
+static int
+toecore_mod_handler(module_t mod, int cmd, void *arg)
+{
+
+	if (cmd == MOD_LOAD)
+		return (toecore_load());
+
+	if (cmd == MOD_UNLOAD)
+		return (toecore_unload());
+
+	return (EOPNOTSUPP);
+}
+
+static moduledata_t mod_data= {
+	"toecore",
+	toecore_mod_handler,
+	0
+};
+
+DECLARE_MODULE(toecore, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
+#endif /* defined(KLD_MODULE) || defined(INET) || defined(INET6) */
+
+MODULE_VERSION(toecore, 1);
+


Property changes on: trunk/sys/netinet/toecore.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/netinet/toecore.h
===================================================================
--- trunk/sys/netinet/toecore.h	                        (rev 0)
+++ trunk/sys/netinet/toecore.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -0,0 +1,131 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2012 Chelsio Communications, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/netinet/toecore.h 239511 2012-08-21 18:09:33Z np $
+ */
+
+#ifndef _NETINET_TOE_H_
+#define	_NETINET_TOE_H_
+
+#ifndef _KERNEL
+#error "no user-serviceable parts inside"
+#endif
+
+struct tcpopt;
+struct tcphdr;
+struct in_conninfo;
+
+struct toedev {
+	TAILQ_ENTRY(toedev) link;	/* glue for toedev_list */
+	void *tod_softc;		/* TOE driver private data */
+
+	/*
+	 * Active open.  If a failure occurs, it is reported back by the driver
+	 * via toe_connect_failed.
+	 */
+	int (*tod_connect)(struct toedev *, struct socket *, struct rtentry *,
+	    struct sockaddr *);
+
+	/* Passive open. */
+	int (*tod_listen_start)(struct toedev *, struct tcpcb *);
+	int (*tod_listen_stop)(struct toedev *, struct tcpcb *);
+
+	/*
+	 * The kernel uses this routine to pass on any frame it receives for an
+	 * offloaded connection to the TOE driver.  This is an unusual event.
+	 */
+	void (*tod_input)(struct toedev *, struct tcpcb *, struct mbuf *);
+
+	/*
+	 * This is called by the kernel during pru_rcvd for an offloaded TCP
+	 * connection and provides an opportunity for the TOE driver to manage
+	 * its rx window and credits.
+	 */
+	void (*tod_rcvd)(struct toedev *, struct tcpcb *);
+
+	/*
+	 * Transmit routine.  The kernel calls this to have the TOE driver
+	 * evaluate whether there is data to be transmitted, and transmit it.
+	 */
+	int (*tod_output)(struct toedev *, struct tcpcb *);
+
+	/* Immediate teardown: send RST to peer. */
+	int (*tod_send_rst)(struct toedev *, struct tcpcb *);
+
+	/* Initiate orderly disconnect by sending FIN to the peer. */
+	int (*tod_send_fin)(struct toedev *, struct tcpcb *);
+
+	/* Called to indicate that the kernel is done with this TCP PCB. */
+	void (*tod_pcb_detach)(struct toedev *, struct tcpcb *);
+
+	/*
+	 * The kernel calls this once it has information about an L2 entry that
+	 * the TOE driver enquired about previously (via toe_l2_resolve).
+	 */
+	void (*tod_l2_update)(struct toedev *, struct ifnet *,
+	    struct sockaddr *, uint8_t *, uint16_t);
+
+	/* XXX.  Route has been redirected. */
+	void (*tod_route_redirect)(struct toedev *, struct ifnet *,
+	    struct rtentry *, struct rtentry *);
+
+	/* Syncache interaction. */
+	void (*tod_syncache_added)(struct toedev *, void *);
+	void (*tod_syncache_removed)(struct toedev *, void *);
+	int (*tod_syncache_respond)(struct toedev *, void *, struct mbuf *);
+	void (*tod_offload_socket)(struct toedev *, void *, struct socket *);
+
+	/* TCP socket option */
+	void (*tod_ctloutput)(struct toedev *, struct tcpcb *, int, int);
+};
+
+#include <sys/eventhandler.h>
+typedef	void (*tcp_offload_listen_start_fn)(void *, struct tcpcb *);
+typedef	void (*tcp_offload_listen_stop_fn)(void *, struct tcpcb *);
+EVENTHANDLER_DECLARE(tcp_offload_listen_start, tcp_offload_listen_start_fn);
+EVENTHANDLER_DECLARE(tcp_offload_listen_stop, tcp_offload_listen_stop_fn);
+
+void init_toedev(struct toedev *);
+int register_toedev(struct toedev *);
+int unregister_toedev(struct toedev *);
+
+/*
+ * General interface for looking up L2 information for an IP address.  If an
+ * answer is not available right away then the TOE driver's tod_l2_update will
+ * be called later.
+ */
+int toe_l2_resolve(struct toedev *, struct ifnet *, struct sockaddr *,
+    uint8_t *, uint16_t *);
+
+void toe_connect_failed(struct toedev *, struct inpcb *, int);
+
+void toe_syncache_add(struct in_conninfo *, struct tcpopt *, struct tcphdr *,
+    struct inpcb *, void *, void *);
+int  toe_syncache_expand(struct in_conninfo *, struct tcpopt *, struct tcphdr *,
+    struct socket **);
+
+int toe_4tuple_check(struct in_conninfo *, struct tcphdr *, struct ifnet *);
+#endif


Property changes on: trunk/sys/netinet/toecore.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/netinet/udp.h
===================================================================
--- trunk/sys/netinet/udp.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/udp.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -29,7 +29,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)udp.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet/udp.h 247525 2013-03-01 03:04:57Z jhb $
+ * $FreeBSD: stable/10/sys/netinet/udp.h 246210 2013-02-01 15:32:20Z jhb $
  */
 
 #ifndef _NETINET_UDP_H_

Modified: trunk/sys/netinet/udp_usrreq.c
===================================================================
--- trunk/sys/netinet/udp_usrreq.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/udp_usrreq.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -4,6 +4,7 @@
  *	The Regents of the University of California.
  * Copyright (c) 2008 Robert N. M. Watson
  * Copyright (c) 2010-2011 Juniper Networks, Inc.
+ * Copyright (c) 2014 Kevin Lo
  * All rights reserved.
  *
  * Portions of this software were developed by Robert N. M. Watson under
@@ -37,12 +38,13 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/udp_usrreq.c 243586 2012-11-27 01:59:51Z ae $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/udp_usrreq.c 313558 2017-02-10 16:11:11Z vangyzen $");
 
 #include "opt_ipfw.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
+#include "opt_kdtrace.h"
 
 #include <sys/param.h>
 #include <sys/domain.h>
@@ -55,6 +57,7 @@
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
+#include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
@@ -69,6 +72,7 @@
 #include <net/route.h>
 
 #include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
@@ -85,6 +89,7 @@
 #endif
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
+#include <netinet/udplite.h>
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
@@ -96,8 +101,9 @@
 #include <security/mac/mac_framework.h>
 
 /*
- * UDP protocol implementation.
+ * UDP and UDP-Lite protocols implementation.
  * Per RFC 768, August, 1980.
+ * Per RFC 3828, July, 2004.
  */
 
 /*
@@ -137,6 +143,8 @@
 
 VNET_DEFINE(struct inpcbhead, udb);		/* from udp_var.h */
 VNET_DEFINE(struct inpcbinfo, udbinfo);
+VNET_DEFINE(struct inpcbhead, ulitecb);
+VNET_DEFINE(struct inpcbinfo, ulitecbinfo);
 static VNET_DEFINE(uma_zone_t, udpcb_zone);
 #define	V_udpcb_zone			VNET(udpcb_zone)
 
@@ -144,11 +152,14 @@
 #define	UDBHASHSIZE	128
 #endif
 
-VNET_DEFINE(struct udpstat, udpstat);		/* from udp_var.h */
-SYSCTL_VNET_STRUCT(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RW,
-    &VNET_NAME(udpstat), udpstat,
-    "UDP statistics (struct udpstat, netinet/udp_var.h)");
+VNET_PCPUSTAT_DEFINE(struct udpstat, udpstat);		/* from udp_var.h */
+VNET_PCPUSTAT_SYSINIT(udpstat);
+SYSCTL_VNET_PCPUSTAT(_net_inet_udp, UDPCTL_STATS, stats, struct udpstat,
+    udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)");
 
+#ifdef VIMAGE
+VNET_PCPUSTAT_SYSUNINIT(udpstat);
+#endif /* VIMAGE */
 #ifdef INET
 static void	udp_detach(struct socket *so);
 static int	udp_output(struct inpcb *, struct mbuf *, struct sockaddr *,
@@ -182,6 +193,16 @@
 	return (0);
 }
 
+static int
+udplite_inpcb_init(void *mem, int size, int flags)
+{
+	struct inpcb *inp;
+
+	inp = mem;
+	INP_LOCK_INIT(inp, "inp", "udpliteinp");
+	return (0);
+}
+
 void
 udp_init(void)
 {
@@ -192,10 +213,20 @@
 	V_udpcb_zone = uma_zcreate("udpcb", sizeof(struct udpcb),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	uma_zone_set_max(V_udpcb_zone, maxsockets);
+	uma_zone_set_warning(V_udpcb_zone, "kern.ipc.maxsockets limit reached");
 	EVENTHANDLER_REGISTER(maxsockets_change, udp_zone_change, NULL,
 	    EVENTHANDLER_PRI_ANY);
 }
 
+void
+udplite_init(void)
+{
+
+	in_pcbinfo_init(&V_ulitecbinfo, "udplite", &V_ulitecb, UDBHASHSIZE,
+	    UDBHASHSIZE, "udplite_inpcb", udplite_inpcb_init, NULL,
+	    UMA_ZONE_NOFREE, IPI_HASHFIELDS_2TUPLE);
+}
+
 /*
  * Kernel module interface for updating udpstat.  The argument is an index
  * into udpstat treated as an array of u_long.  While this encodes the
@@ -207,7 +238,7 @@
 kmod_udpstat_inc(int statnum)
 {
 
-	(*((u_long *)&V_udpstat + statnum))++;
+	counter_u64_add(VNET(udpstat)[statnum], 1);
 }
 
 int
@@ -237,6 +268,13 @@
 	in_pcbinfo_destroy(&V_udbinfo);
 	uma_zdestroy(V_udpcb_zone);
 }
+
+void
+udplite_destroy(void)
+{
+
+	in_pcbinfo_destroy(&V_ulitecbinfo);
+}
 #endif
 
 #ifdef INET
@@ -266,13 +304,11 @@
 	 */
 	up = intoudpcb(inp);
 	if (up->u_tun_func != NULL) {
-		(*up->u_tun_func)(n, off, inp);
+		(*up->u_tun_func)(n, off, inp, (struct sockaddr *)udp_in,
+		    up->u_tun_ctx);
 		return;
 	}
 
-	if (n == NULL)
-		return;
-
 	off += sizeof(struct udphdr);
 
 #ifdef IPSEC
@@ -279,7 +315,7 @@
 	/* Check AH/ESP integrity. */
 	if (ipsec4_in_reject(n, inp)) {
 		m_freem(n);
-		V_ipsec4stat.in_polvio++;
+		IPSECSTAT_INC(ips_in_polvio);
 		return;
 	}
 #ifdef IPSEC_NAT_T
@@ -339,10 +375,13 @@
 	struct udphdr *uh;
 	struct ifnet *ifp;
 	struct inpcb *inp;
-	int len;
+	uint16_t len, ip_len;
+	struct inpcbinfo *pcbinfo;
 	struct ip save_ip;
 	struct sockaddr_in udp_in;
 	struct m_tag *fwd_tag;
+	int cscov_partial;
+	uint8_t pr;
 
 	ifp = m->m_pkthdr.rcvif;
 	UDPSTAT_INC(udps_ipackets);
@@ -353,7 +392,7 @@
 	 * check the checksum with options still present.
 	 */
 	if (iphlen > sizeof (struct ip)) {
-		ip_stripoptions(m, (struct mbuf *)0);
+		ip_stripoptions(m);
 		iphlen = sizeof(struct ip);
 	}
 
@@ -362,7 +401,7 @@
 	 */
 	ip = mtod(m, struct ip *);
 	if (m->m_len < iphlen + sizeof(struct udphdr)) {
-		if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == 0) {
+		if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == NULL) {
 			UDPSTAT_INC(udps_hdrops);
 			return;
 		}
@@ -369,6 +408,8 @@
 		ip = mtod(m, struct ip *);
 	}
 	uh = (struct udphdr *)((caddr_t)ip + iphlen);
+	pr = ip->ip_p;
+	cscov_partial = (pr == IPPROTO_UDPLITE) ? 1 : 0;
 
 	/*
 	 * Destination port of 0 is illegal, based on RFC768.
@@ -391,13 +432,20 @@
 	 * reflect UDP length, drop.
 	 */
 	len = ntohs((u_short)uh->uh_ulen);
-	if (ip->ip_len != len) {
-		if (len > ip->ip_len || len < sizeof(struct udphdr)) {
+	ip_len = ntohs(ip->ip_len) - iphlen;
+	if (pr == IPPROTO_UDPLITE && (len == 0 || len == ip_len)) {
+		/* Zero means checksum over the complete packet. */
+		if (len == 0)
+			len = ip_len;
+		cscov_partial = 0;
+	}
+	if (ip_len != len) {
+		if (len > ip_len || len < sizeof(struct udphdr)) {
 			UDPSTAT_INC(udps_badlen);
 			goto badunlocked;
 		}
-		m_adj(m, len - ip->ip_len);
-		/* ip->ip_len = len; */
+		if (pr == IPPROTO_UDP)
+			m_adj(m, len - ip_len);
 	}
 
 	/*
@@ -415,13 +463,14 @@
 	if (uh->uh_sum) {
 		u_short uh_sum;
 
-		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
+		if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID) &&
+		    !cscov_partial) {
 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
 				uh_sum = m->m_pkthdr.csum_data;
 			else
 				uh_sum = in_pseudo(ip->ip_src.s_addr,
 				    ip->ip_dst.s_addr, htonl((u_short)len +
-				    m->m_pkthdr.csum_data + IPPROTO_UDP));
+				    m->m_pkthdr.csum_data + pr));
 			uh_sum ^= 0xffff;
 		} else {
 			char b[9];
@@ -428,7 +477,8 @@
 
 			bcopy(((struct ipovly *)ip)->ih_x1, b, 9);
 			bzero(((struct ipovly *)ip)->ih_x1, 9);
-			((struct ipovly *)ip)->ih_len = uh->uh_ulen;
+			((struct ipovly *)ip)->ih_len = (pr == IPPROTO_UDP) ?
+			    uh->uh_ulen : htons(ip_len);
 			uh_sum = in_cksum(m, len + sizeof (struct ip));
 			bcopy(b, ((struct ipovly *)ip)->ih_x1, 9);
 		}
@@ -437,17 +487,28 @@
 			m_freem(m);
 			return;
 		}
-	} else
-		UDPSTAT_INC(udps_nosum);
+	} else {
+		if (pr == IPPROTO_UDP) {
+			UDPSTAT_INC(udps_nosum);
+		} else {
+			/* UDPLite requires a checksum */
+			/* XXX: What is the right UDPLite MIB counter here? */
+			m_freem(m);
+			return;
+		}
+	}
 
+	pcbinfo = get_inpcbinfo(pr);
 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
 	    in_broadcast(ip->ip_dst, ifp)) {
 		struct inpcb *last;
+		struct inpcbhead *pcblist;
 		struct ip_moptions *imo;
 
-		INP_INFO_RLOCK(&V_udbinfo);
+		INP_INFO_RLOCK(pcbinfo);
+		pcblist = get_pcblist(pr);
 		last = NULL;
-		LIST_FOREACH(inp, &V_udb, inp_list) {
+		LIST_FOREACH(inp, pcblist, inp_list) {
 			if (inp->inp_lport != uh->uh_dport)
 				continue;
 #ifdef INET6
@@ -506,8 +567,12 @@
 			if (last != NULL) {
 				struct mbuf *n;
 
-				n = m_copy(m, 0, M_COPYALL);
-				udp_append(last, ip, n, iphlen, &udp_in);
+				if ((n = m_copy(m, 0, M_COPYALL)) != NULL) {
+					UDP_PROBE(receive, NULL, last, ip,
+					    last, uh);
+					udp_append(last, ip, n, iphlen,
+					    &udp_in);
+				}
 				INP_RUNLOCK(last);
 			}
 			last = inp;
@@ -533,12 +598,13 @@
 			UDPSTAT_INC(udps_noportbcast);
 			if (inp)
 				INP_RUNLOCK(inp);
-			INP_INFO_RUNLOCK(&V_udbinfo);
+			INP_INFO_RUNLOCK(pcbinfo);
 			goto badunlocked;
 		}
+		UDP_PROBE(receive, NULL, last, ip, last, uh);
 		udp_append(last, ip, m, iphlen, &udp_in);
 		INP_RUNLOCK(last);
-		INP_INFO_RUNLOCK(&V_udbinfo);
+		INP_INFO_RUNLOCK(pcbinfo);
 		return;
 	}
 
@@ -559,7 +625,7 @@
 		 * Transparently forwarded. Pretend to be the destination.
 		 * Already got one like this?
 		 */
-		inp = in_pcblookup_mbuf(&V_udbinfo, ip->ip_src, uh->uh_sport,
+		inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport,
 		    ip->ip_dst, uh->uh_dport, INPLOOKUP_RLOCKPCB, ifp, m);
 		if (!inp) {
 			/*
@@ -567,7 +633,7 @@
 			 * Because we've rewritten the destination address,
 			 * any hardware-generated hash is ignored.
 			 */
-			inp = in_pcblookup(&V_udbinfo, ip->ip_src,
+			inp = in_pcblookup(pcbinfo, ip->ip_src,
 			    uh->uh_sport, next_hop->sin_addr,
 			    next_hop->sin_port ? htons(next_hop->sin_port) :
 			    uh->uh_dport, INPLOOKUP_WILDCARD |
@@ -577,18 +643,18 @@
 		m_tag_delete(m, fwd_tag);
 		m->m_flags &= ~M_IP_NEXTHOP;
 	} else
-		inp = in_pcblookup_mbuf(&V_udbinfo, ip->ip_src, uh->uh_sport,
+		inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport,
 		    ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD |
 		    INPLOOKUP_RLOCKPCB, ifp, m);
 	if (inp == NULL) {
 		if (udp_log_in_vain) {
-			char buf[4*sizeof "123"];
+			char src[INET_ADDRSTRLEN];
+			char dst[INET_ADDRSTRLEN];
 
-			strcpy(buf, inet_ntoa(ip->ip_dst));
 			log(LOG_INFO,
 			    "Connection attempt to UDP %s:%d from %s:%d\n",
-			    buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src),
-			    ntohs(uh->uh_sport));
+			    inet_ntoa_r(ip->ip_dst, dst), ntohs(uh->uh_dport),
+			    inet_ntoa_r(ip->ip_src, src), ntohs(uh->uh_sport));
 		}
 		UDPSTAT_INC(udps_noport);
 		if (m->m_flags & (M_BCAST | M_MCAST)) {
@@ -600,7 +666,6 @@
 		if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0)
 			goto badunlocked;
 		*ip = save_ip;
-		ip->ip_len += iphlen;
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
 		return;
 	}
@@ -614,6 +679,18 @@
 		m_freem(m);
 		return;
 	}
+	if (cscov_partial) {
+		struct udpcb *up;
+
+		up = intoudpcb(inp);
+		if (up->u_rxcslen == 0 || up->u_rxcslen > len) {
+			INP_RUNLOCK(inp);
+			m_freem(m);
+			return;
+		}
+	}
+
+	UDP_PROBE(receive, NULL, inp, ip, inp, uh);
 	udp_append(inp, ip, m, iphlen, &udp_in);
 	INP_RUNLOCK(inp);
 	return;
@@ -646,8 +723,9 @@
 }
 
 #ifdef INET
-void
-udp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
+static void
+udp_common_ctlinput(int cmd, struct sockaddr *sa, void *vip,
+    struct inpcbinfo *pcbinfo)
 {
 	struct ip *ip = vip;
 	struct udphdr *uh;
@@ -676,7 +754,7 @@
 		return;
 	if (ip != NULL) {
 		uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
-		inp = in_pcblookup(&V_udbinfo, faddr, uh->uh_dport,
+		inp = in_pcblookup(pcbinfo, faddr, uh->uh_dport,
 		    ip->ip_src, uh->uh_sport, INPLOOKUP_RLOCKPCB, NULL);
 		if (inp != NULL) {
 			INP_RLOCK_ASSERT(inp);
@@ -686,9 +764,22 @@
 			INP_RUNLOCK(inp);
 		}
 	} else
-		in_pcbnotifyall(&V_udbinfo, faddr, inetctlerrmap[cmd],
+		in_pcbnotifyall(pcbinfo, faddr, inetctlerrmap[cmd],
 		    udp_notify);
 }
+void
+udp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
+{
+
+	return (udp_common_ctlinput(cmd, sa, vip, &V_udbinfo));
+}
+
+void
+udplite_ctlinput(int cmd, struct sockaddr *sa, void *vip)
+{
+
+	return (udp_common_ctlinput(cmd, sa, vip, &V_ulitecbinfo));
+}
 #endif /* INET */
 
 static int
@@ -844,16 +935,16 @@
 int
 udp_ctloutput(struct socket *so, struct sockopt *sopt)
 {
-	int error = 0, optval;
 	struct inpcb *inp;
-#ifdef IPSEC_NAT_T
 	struct udpcb *up;
-#endif
+	int isudplite, error, optval;
 
+	error = 0;
+	isudplite = (so->so_proto->pr_protocol == IPPROTO_UDPLITE) ? 1 : 0;
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
 	INP_WLOCK(inp);
-	if (sopt->sopt_level != IPPROTO_UDP) {
+	if (sopt->sopt_level != so->so_proto->pr_protocol) {
 #ifdef INET6
 		if (INP_CHECK_SOCKAF(so, AF_INET6)) {
 			INP_WUNLOCK(inp);
@@ -911,6 +1002,34 @@
 			}
 			INP_WUNLOCK(inp);
 			break;
+		case UDPLITE_SEND_CSCOV:
+		case UDPLITE_RECV_CSCOV:
+			if (!isudplite) {
+				INP_WUNLOCK(inp);
+				error = ENOPROTOOPT;
+				break;
+			}
+			INP_WUNLOCK(inp);
+			error = sooptcopyin(sopt, &optval, sizeof(optval),
+			    sizeof(optval));
+			if (error != 0)
+				break;
+			inp = sotoinpcb(so);
+			KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
+			INP_WLOCK(inp);
+			up = intoudpcb(inp);
+			KASSERT(up != NULL, ("%s: up == NULL", __func__));
+			if ((optval != 0 && optval < 8) || (optval > 65535)) {
+				INP_WUNLOCK(inp);
+				error = EINVAL;
+				break;
+			}
+			if (sopt->sopt_name == UDPLITE_SEND_CSCOV)
+				up->u_txcslen = optval;
+			else
+				up->u_rxcslen = optval;
+			INP_WUNLOCK(inp);
+			break;
 		default:
 			INP_WUNLOCK(inp);
 			error = ENOPROTOOPT;
@@ -928,6 +1047,22 @@
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 #endif
+		case UDPLITE_SEND_CSCOV:
+		case UDPLITE_RECV_CSCOV:
+			if (!isudplite) {
+				INP_WUNLOCK(inp);
+				error = ENOPROTOOPT;
+				break;
+			}
+			up = intoudpcb(inp);
+			KASSERT(up != NULL, ("%s: up == NULL", __func__));
+			if (sopt->sopt_name == UDPLITE_SEND_CSCOV)
+				optval = up->u_txcslen;
+			else
+				optval = up->u_rxcslen;
+			INP_WUNLOCK(inp);
+			error = sooptcopyout(sopt, &optval, sizeof(optval));
+			break;
 		default:
 			INP_WUNLOCK(inp);
 			error = ENOPROTOOPT;
@@ -950,12 +1085,16 @@
 	int len = m->m_pkthdr.len;
 	struct in_addr faddr, laddr;
 	struct cmsghdr *cm;
+	struct inpcbinfo *pcbinfo;
 	struct sockaddr_in *sin, src;
+	int cscov_partial = 0;
 	int error = 0;
 	int ipflags;
 	u_short fport, lport;
 	int unlock_udbinfo;
 	u_char tos;
+	uint8_t pr;
+	uint16_t cscov = 0;
 
 	/*
 	 * udp_output() may need to temporarily bind or connect the current
@@ -1050,12 +1189,14 @@
 	 *
 	 * XXXRW: Check that hash locking update here is correct.
 	 */
+	pr = inp->inp_socket->so_proto->pr_protocol;
+	pcbinfo = get_inpcbinfo(pr);
 	sin = (struct sockaddr_in *)addr;
 	if (sin != NULL &&
 	    (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
 		INP_RUNLOCK(inp);
 		INP_WLOCK(inp);
-		INP_HASH_WLOCK(&V_udbinfo);
+		INP_HASH_WLOCK(pcbinfo);
 		unlock_udbinfo = UH_WLOCKED;
 	} else if ((sin != NULL && (
 	    (sin->sin_addr.s_addr == INADDR_ANY) ||
@@ -1063,7 +1204,7 @@
 	    (inp->inp_laddr.s_addr == INADDR_ANY) ||
 	    (inp->inp_lport == 0))) ||
 	    (src.sin_family == AF_INET)) {
-		INP_HASH_RLOCK(&V_udbinfo);
+		INP_HASH_RLOCK(pcbinfo);
 		unlock_udbinfo = UH_RLOCKED;
 	} else
 		unlock_udbinfo = UH_UNLOCKED;
@@ -1076,7 +1217,7 @@
 	laddr = inp->inp_laddr;
 	lport = inp->inp_lport;
 	if (src.sin_family == AF_INET) {
-		INP_HASH_LOCK_ASSERT(&V_udbinfo);
+		INP_HASH_LOCK_ASSERT(pcbinfo);
 		if ((lport == 0) ||
 		    (laddr.s_addr == INADDR_ANY &&
 		     src.sin_addr.s_addr == INADDR_ANY)) {
@@ -1127,7 +1268,7 @@
 		    inp->inp_lport == 0 ||
 		    sin->sin_addr.s_addr == INADDR_ANY ||
 		    sin->sin_addr.s_addr == INADDR_BROADCAST) {
-			INP_HASH_LOCK_ASSERT(&V_udbinfo);
+			INP_HASH_LOCK_ASSERT(pcbinfo);
 			error = in_pcbconnect_setup(inp, addr, &laddr.s_addr,
 			    &lport, &faddr.s_addr, &fport, NULL,
 			    td->td_ucred);
@@ -1142,7 +1283,7 @@
 			if (inp->inp_laddr.s_addr == INADDR_ANY &&
 			    inp->inp_lport == 0) {
 				INP_WLOCK_ASSERT(inp);
-				INP_HASH_WLOCK_ASSERT(&V_udbinfo);
+				INP_HASH_WLOCK_ASSERT(pcbinfo);
 				/*
 				 * Remember addr if jailed, to prevent
 				 * rebinding.
@@ -1176,7 +1317,7 @@
 	 * link-layer headers.  Immediate slide the data pointer back forward
 	 * since we won't use that space at this layer.
 	 */
-	M_PREPEND(m, sizeof(struct udpiphdr) + max_linkhdr, M_DONTWAIT);
+	M_PREPEND(m, sizeof(struct udpiphdr) + max_linkhdr, M_NOWAIT);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto release;
@@ -1191,13 +1332,31 @@
 	 */
 	ui = mtod(m, struct udpiphdr *);
 	bzero(ui->ui_x1, sizeof(ui->ui_x1));	/* XXX still needed? */
-	ui->ui_pr = IPPROTO_UDP;
+	ui->ui_pr = pr;
 	ui->ui_src = laddr;
 	ui->ui_dst = faddr;
 	ui->ui_sport = lport;
 	ui->ui_dport = fport;
 	ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr));
+	if (pr == IPPROTO_UDPLITE) {
+		struct udpcb *up;
+		uint16_t plen;
 
+		up = intoudpcb(inp);
+		cscov = up->u_txcslen;
+		plen = (u_short)len + sizeof(struct udphdr);
+		if (cscov >= plen)
+			cscov = 0;
+		ui->ui_len = htons(plen);
+		ui->ui_ulen = htons(cscov);
+		/*
+		 * For UDP-Lite, checksum coverage length of zero means
+		 * the entire UDPLite packet is covered by the checksum.
+		 */
+		cscov_partial = (cscov == 0) ? 0 : 1;
+	} else
+		ui->ui_v = IPVERSION << 4;
+
 	/*
 	 * Set the Don't Fragment bit in the IP header.
 	 */
@@ -1205,7 +1364,7 @@
 		struct ip *ip;
 
 		ip = (struct ip *)&ui->ui_i;
-		ip->ip_off |= IP_DF;
+		ip->ip_off |= htons(IP_DF);
 	}
 
 	ipflags = 0;
@@ -1223,24 +1382,35 @@
 	/*
 	 * Set up checksum and output datagram.
 	 */
-	if (V_udp_cksum) {
+	ui->ui_sum = 0;
+	if (pr == IPPROTO_UDPLITE) {
 		if (inp->inp_flags & INP_ONESBCAST)
 			faddr.s_addr = INADDR_BROADCAST;
+		if (cscov_partial) {
+			if ((ui->ui_sum = in_cksum(m, sizeof(struct ip) + cscov)) == 0)
+				ui->ui_sum = 0xffff;
+		} else {
+			if ((ui->ui_sum = in_cksum(m, sizeof(struct udpiphdr) + len)) == 0)
+				ui->ui_sum = 0xffff;
+		}
+	} else if (V_udp_cksum) {
+		if (inp->inp_flags & INP_ONESBCAST)
+			faddr.s_addr = INADDR_BROADCAST;
 		ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr,
-		    htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP));
+		    htons((u_short)len + sizeof(struct udphdr) + pr));
 		m->m_pkthdr.csum_flags = CSUM_UDP;
 		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
-	} else
-		ui->ui_sum = 0;
-	((struct ip *)ui)->ip_len = sizeof (struct udpiphdr) + len;
+	}
+	((struct ip *)ui)->ip_len = htons(sizeof(struct udpiphdr) + len);
 	((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl;	/* XXX */
 	((struct ip *)ui)->ip_tos = tos;		/* XXX */
 	UDPSTAT_INC(udps_opackets);
 
 	if (unlock_udbinfo == UH_WLOCKED)
-		INP_HASH_WUNLOCK(&V_udbinfo);
+		INP_HASH_WUNLOCK(pcbinfo);
 	else if (unlock_udbinfo == UH_RLOCKED)
-		INP_HASH_RUNLOCK(&V_udbinfo);
+		INP_HASH_RUNLOCK(pcbinfo);
+	UDP_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
 	error = ip_output(m, inp->inp_options, NULL, ipflags,
 	    inp->inp_moptions, inp);
 	if (unlock_udbinfo == UH_WLOCKED)
@@ -1251,10 +1421,10 @@
 
 release:
 	if (unlock_udbinfo == UH_WLOCKED) {
-		INP_HASH_WUNLOCK(&V_udbinfo);
+		INP_HASH_WUNLOCK(pcbinfo);
 		INP_WUNLOCK(inp);
 	} else if (unlock_udbinfo == UH_RLOCKED) {
-		INP_HASH_RUNLOCK(&V_udbinfo);
+		INP_HASH_RUNLOCK(pcbinfo);
 		INP_RUNLOCK(inp);
 	} else
 		INP_RUNLOCK(inp);
@@ -1292,7 +1462,7 @@
 	if (minlen > m->m_pkthdr.len)
 		minlen = m->m_pkthdr.len;
 	if ((m = m_pullup(m, minlen)) == NULL) {
-		V_ipsec4stat.in_inval++;
+		IPSECSTAT_INC(ips_in_inval);
 		return (NULL);		/* Bypass caller processing. */
 	}
 	data = mtod(m, caddr_t);	/* Points to ip header. */
@@ -1332,7 +1502,7 @@
 		uint32_t spi;
 
 		if (payload <= sizeof(struct esp)) {
-			V_ipsec4stat.in_inval++;
+			IPSECSTAT_INC(ips_in_inval);
 			m_freem(m);
 			return (NULL);	/* Discard. */
 		}
@@ -1353,7 +1523,7 @@
 	tag = m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS,
 		2 * sizeof(uint16_t), M_NOWAIT);
 	if (tag == NULL) {
-		V_ipsec4stat.in_nomem++;
+		IPSECSTAT_INC(ips_in_nomem);
 		m_freem(m);
 		return (NULL);		/* Discard. */
 	}
@@ -1382,7 +1552,7 @@
 	m_adj(m, skip);
 
 	ip = mtod(m, struct ip *);
-	ip->ip_len -= skip;
+	ip->ip_len = htons(ntohs(ip->ip_len) - skip);
 	ip->ip_p = IPPROTO_ESP;
 
 	/*
@@ -1401,15 +1571,17 @@
 udp_abort(struct socket *so)
 {
 	struct inpcb *inp;
+	struct inpcbinfo *pcbinfo;
 
+	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_abort: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_faddr.s_addr != INADDR_ANY) {
-		INP_HASH_WLOCK(&V_udbinfo);
+		INP_HASH_WLOCK(pcbinfo);
 		in_pcbdisconnect(inp);
 		inp->inp_laddr.s_addr = INADDR_ANY;
-		INP_HASH_WUNLOCK(&V_udbinfo);
+		INP_HASH_WUNLOCK(pcbinfo);
 		soisdisconnected(so);
 	}
 	INP_WUNLOCK(inp);
@@ -1419,17 +1591,19 @@
 udp_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct inpcb *inp;
+	struct inpcbinfo *pcbinfo;
 	int error;
 
+	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp == NULL, ("udp_attach: inp != NULL"));
 	error = soreserve(so, udp_sendspace, udp_recvspace);
 	if (error)
 		return (error);
-	INP_INFO_WLOCK(&V_udbinfo);
-	error = in_pcballoc(so, &V_udbinfo);
+	INP_INFO_WLOCK(pcbinfo);
+	error = in_pcballoc(so, pcbinfo);
 	if (error) {
-		INP_INFO_WUNLOCK(&V_udbinfo);
+		INP_INFO_WUNLOCK(pcbinfo);
 		return (error);
 	}
 
@@ -1441,18 +1615,18 @@
 	if (error) {
 		in_pcbdetach(inp);
 		in_pcbfree(inp);
-		INP_INFO_WUNLOCK(&V_udbinfo);
+		INP_INFO_WUNLOCK(pcbinfo);
 		return (error);
 	}
 
 	INP_WUNLOCK(inp);
-	INP_INFO_WUNLOCK(&V_udbinfo);
+	INP_INFO_WUNLOCK(pcbinfo);
 	return (0);
 }
 #endif /* INET */
 
 int
-udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f)
+udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f, void *ctx)
 {
 	struct inpcb *inp;
 	struct udpcb *up;
@@ -1468,6 +1642,7 @@
 		return (EBUSY);
 	}
 	up->u_tun_func = f;
+	up->u_tun_ctx = ctx;
 	INP_WUNLOCK(inp);
 	return (0);
 }
@@ -1477,14 +1652,16 @@
 udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp;
+	struct inpcbinfo *pcbinfo;
 	int error;
 
+	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_bind: inp == NULL"));
 	INP_WLOCK(inp);
-	INP_HASH_WLOCK(&V_udbinfo);
+	INP_HASH_WLOCK(pcbinfo);
 	error = in_pcbbind(inp, nam, td->td_ucred);
-	INP_HASH_WUNLOCK(&V_udbinfo);
+	INP_HASH_WUNLOCK(pcbinfo);
 	INP_WUNLOCK(inp);
 	return (error);
 }
@@ -1493,15 +1670,17 @@
 udp_close(struct socket *so)
 {
 	struct inpcb *inp;
+	struct inpcbinfo *pcbinfo;
 
+	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_close: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_faddr.s_addr != INADDR_ANY) {
-		INP_HASH_WLOCK(&V_udbinfo);
+		INP_HASH_WLOCK(pcbinfo);
 		in_pcbdisconnect(inp);
 		inp->inp_laddr.s_addr = INADDR_ANY;
-		INP_HASH_WUNLOCK(&V_udbinfo);
+		INP_HASH_WUNLOCK(pcbinfo);
 		soisdisconnected(so);
 	}
 	INP_WUNLOCK(inp);
@@ -1511,9 +1690,11 @@
 udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp;
+	struct inpcbinfo *pcbinfo;
+	struct sockaddr_in *sin;
 	int error;
-	struct sockaddr_in *sin;
 
+	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_connect: inp == NULL"));
 	INP_WLOCK(inp);
@@ -1527,9 +1708,9 @@
 		INP_WUNLOCK(inp);
 		return (error);
 	}
-	INP_HASH_WLOCK(&V_udbinfo);
+	INP_HASH_WLOCK(pcbinfo);
 	error = in_pcbconnect(inp, nam, td->td_ucred);
-	INP_HASH_WUNLOCK(&V_udbinfo);
+	INP_HASH_WUNLOCK(pcbinfo);
 	if (error == 0)
 		soisconnected(so);
 	INP_WUNLOCK(inp);
@@ -1540,13 +1721,15 @@
 udp_detach(struct socket *so)
 {
 	struct inpcb *inp;
+	struct inpcbinfo *pcbinfo;
 	struct udpcb *up;
 
+	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_detach: inp == NULL"));
 	KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
 	    ("udp_detach: not disconnected"));
-	INP_INFO_WLOCK(&V_udbinfo);
+	INP_INFO_WLOCK(pcbinfo);
 	INP_WLOCK(inp);
 	up = intoudpcb(inp);
 	KASSERT(up != NULL, ("%s: up == NULL", __func__));
@@ -1553,7 +1736,7 @@
 	inp->inp_ppcb = NULL;
 	in_pcbdetach(inp);
 	in_pcbfree(inp);
-	INP_INFO_WUNLOCK(&V_udbinfo);
+	INP_INFO_WUNLOCK(pcbinfo);
 	udp_discardcb(up);
 }
 
@@ -1561,7 +1744,9 @@
 udp_disconnect(struct socket *so)
 {
 	struct inpcb *inp;
+	struct inpcbinfo *pcbinfo;
 
+	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_disconnect: inp == NULL"));
 	INP_WLOCK(inp);
@@ -1569,10 +1754,10 @@
 		INP_WUNLOCK(inp);
 		return (ENOTCONN);
 	}
-	INP_HASH_WLOCK(&V_udbinfo);
+	INP_HASH_WLOCK(pcbinfo);
 	in_pcbdisconnect(inp);
 	inp->inp_laddr.s_addr = INADDR_ANY;
-	INP_HASH_WUNLOCK(&V_udbinfo);
+	INP_HASH_WUNLOCK(pcbinfo);
 	SOCK_LOCK(so);
 	so->so_state &= ~SS_ISCONNECTED;		/* XXX */
 	SOCK_UNLOCK(so);

Modified: trunk/sys/netinet/udp_var.h
===================================================================
--- trunk/sys/netinet/udp_var.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/udp_var.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -29,7 +29,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)udp_var.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet/udp_var.h 234780 2012-04-29 08:50:50Z bz $
+ * $FreeBSD: stable/10/sys/netinet/udp_var.h 278812 2015-02-15 20:53:50Z marius $
  */
 
 #ifndef _NETINET_UDP_VAR_H_
@@ -43,6 +43,7 @@
 	struct udphdr	ui_u;		/* udp header */
 };
 #define	ui_x1		ui_i.ih_x1
+#define	ui_v		ui_i.ih_x1[0]
 #define	ui_pr		ui_i.ih_pr
 #define	ui_len		ui_i.ih_len
 #define	ui_src		ui_i.ih_src
@@ -52,8 +53,12 @@
 #define	ui_ulen		ui_u.uh_ulen
 #define	ui_sum		ui_u.uh_sum
 
-typedef void(*udp_tun_func_t)(struct mbuf *, int off, struct inpcb *);
+struct inpcb;
+struct mbuf;
 
+typedef void(*udp_tun_func_t)(struct mbuf *, int off, struct inpcb *,
+			      const struct sockaddr *, void *);
+
 /*
  * UDP control block; one per udp.
  */
@@ -60,6 +65,9 @@
 struct udpcb {
 	udp_tun_func_t	u_tun_func;	/* UDP kernel tunneling callback. */
 	u_int		u_flags;	/* Generic UDP flags. */
+	uint16_t	u_rxcslen;	/* Coverage for incoming datagrams. */
+	uint16_t	u_txcslen;	/* Coverage for outgoing datagrams. */
+	void 		*u_tun_ctx;	/* Tunneling callback context. */
 };
 
 #define	intoudpcb(ip)	((struct udpcb *)(ip)->inp_ppcb)
@@ -73,30 +81,34 @@
 
 struct udpstat {
 				/* input statistics: */
-	u_long	udps_ipackets;		/* total input packets */
-	u_long	udps_hdrops;		/* packet shorter than header */
-	u_long	udps_badsum;		/* checksum error */
-	u_long	udps_nosum;		/* no checksum */
-	u_long	udps_badlen;		/* data length larger than packet */
-	u_long	udps_noport;		/* no socket on port */
-	u_long	udps_noportbcast;	/* of above, arrived as broadcast */
-	u_long	udps_fullsock;		/* not delivered, input socket full */
-	u_long	udpps_pcbcachemiss;	/* input packets missing pcb cache */
-	u_long	udpps_pcbhashmiss;	/* input packets not for hashed pcb */
+	uint64_t udps_ipackets;		/* total input packets */
+	uint64_t udps_hdrops;		/* packet shorter than header */
+	uint64_t udps_badsum;		/* checksum error */
+	uint64_t udps_nosum;		/* no checksum */
+	uint64_t udps_badlen;		/* data length larger than packet */
+	uint64_t udps_noport;		/* no socket on port */
+	uint64_t udps_noportbcast;	/* of above, arrived as broadcast */
+	uint64_t udps_fullsock;		/* not delivered, input socket full */
+	uint64_t udpps_pcbcachemiss;	/* input packets missing pcb cache */
+	uint64_t udpps_pcbhashmiss;	/* input packets not for hashed pcb */
 				/* output statistics: */
-	u_long	udps_opackets;		/* total output packets */
-	u_long	udps_fastout;		/* output packets on fast path */
+	uint64_t udps_opackets;		/* total output packets */
+	uint64_t udps_fastout;		/* output packets on fast path */
 	/* of no socket on port, arrived as multicast */
-	u_long	udps_noportmcast;
-	u_long	udps_filtermcast;	/* blocked by multicast filter */
+	uint64_t udps_noportmcast;
+	uint64_t udps_filtermcast;	/* blocked by multicast filter */
 };
 
 #ifdef _KERNEL
+#include <sys/counter.h>
+
+VNET_PCPUSTAT_DECLARE(struct udpstat, udpstat);
 /*
  * In-kernel consumers can use these accessor macros directly to update
  * stats.
  */
-#define	UDPSTAT_ADD(name, val)	V_udpstat.name += (val)
+#define	UDPSTAT_ADD(name, val)  \
+    VNET_PCPUSTAT_ADD(struct udpstat, udpstat, name, (val))
 #define	UDPSTAT_INC(name)	UDPSTAT_ADD(name, 1)
 
 /*
@@ -103,8 +115,8 @@
  * Kernel module consumers must use this accessor macro.
  */
 void	kmod_udpstat_inc(int statnum);
-#define	KMOD_UDPSTAT_INC(name)						\
-	kmod_udpstat_inc(offsetof(struct udpstat, name) / sizeof(u_long))
+#define	KMOD_UDPSTAT_INC(name)	\
+    kmod_udpstat_inc(offsetof(struct udpstat, name) / sizeof(uint64_t))
 #endif
 
 /*
@@ -117,48 +129,60 @@
 #define	UDPCTL_PCBLIST		5	/* list of PCBs for UDP sockets */
 #define	UDPCTL_MAXID		6
 
-#define	UDPCTL_NAMES	{						\
-	{ 0, 0 },							\
-	{ "checksum", CTLTYPE_INT },					\
-	{ "stats", CTLTYPE_STRUCT },					\
-	{ "maxdgram", CTLTYPE_INT },					\
-	{ "recvspace", CTLTYPE_INT },					\
-	{ "pcblist", CTLTYPE_STRUCT },					\
-}
-
 #ifdef _KERNEL
+#include <netinet/in_pcb.h>
 SYSCTL_DECL(_net_inet_udp);
 
 extern struct pr_usrreqs	udp_usrreqs;
 VNET_DECLARE(struct inpcbhead, udb);
 VNET_DECLARE(struct inpcbinfo, udbinfo);
+VNET_DECLARE(struct inpcbhead, ulitecb);
+VNET_DECLARE(struct inpcbinfo, ulitecbinfo);
 #define	V_udb			VNET(udb)
 #define	V_udbinfo		VNET(udbinfo)
+#define	V_ulitecb		VNET(ulitecb)
+#define	V_ulitecbinfo		VNET(ulitecbinfo)
 
 extern u_long			udp_sendspace;
 extern u_long			udp_recvspace;
 VNET_DECLARE(int, udp_cksum);
-VNET_DECLARE(struct udpstat, udpstat);
 VNET_DECLARE(int, udp_blackhole);
 #define	V_udp_cksum		VNET(udp_cksum)
-#define	V_udpstat		VNET(udpstat)
 #define	V_udp_blackhole		VNET(udp_blackhole)
 extern int			udp_log_in_vain;
 
-int		 udp_newudpcb(struct inpcb *);
-void		 udp_discardcb(struct udpcb *);
+static __inline struct inpcbinfo *
+get_inpcbinfo(uint8_t protocol)
+{
+	return (protocol == IPPROTO_UDP) ? &V_udbinfo : &V_ulitecbinfo;
+}
 
-void		 udp_ctlinput(int, struct sockaddr *, void *);
-int		 udp_ctloutput(struct socket *, struct sockopt *);
-void		 udp_init(void);
+static __inline struct inpcbhead *
+get_pcblist(uint8_t protocol)
+{
+	return (protocol == IPPROTO_UDP) ? &V_udb : &V_ulitecb;
+}
+
+int		udp_newudpcb(struct inpcb *);
+void		udp_discardcb(struct udpcb *);
+
+void		udp_ctlinput(int, struct sockaddr *, void *);
+void		udplite_ctlinput(int, struct sockaddr *, void *);
+int		udp_ctloutput(struct socket *, struct sockopt *);
+void		udp_init(void);
+void		udplite_init(void);
 #ifdef VIMAGE
-void		 udp_destroy(void);
+void		udp_destroy(void);
+void		udplite_destroy(void);
 #endif
-void		 udp_input(struct mbuf *, int);
+void		udp_input(struct mbuf *, int);
+void		udplite_input(struct mbuf *, int);
 struct inpcb	*udp_notify(struct inpcb *inp, int errno);
-int		 udp_shutdown(struct socket *so);
+int		udp_shutdown(struct socket *so);
 
-int udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f);
-#endif
+int		udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f,
+					 void *ctx);
 
-#endif
+#endif /* _KERNEL */
+
+#endif /* _NETINET_UDP_VAR_H_ */

Added: trunk/sys/netinet/udplite.h
===================================================================
--- trunk/sys/netinet/udplite.h	                        (rev 0)
+++ trunk/sys/netinet/udplite.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -0,0 +1,39 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2014, Kevin Lo
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/netinet/udplite.h 265946 2014-05-13 06:05:53Z kevlo $
+ */
+
+#ifndef _NETINET_UDPLITE_H_
+#define	_NETINET_UDPLITE_H_
+
+/* 
+ * User-settable options (used with setsockopt).
+ */
+#define	UDPLITE_SEND_CSCOV	2	/* Sender checksum coverage. */
+#define	UDPLITE_RECV_CSCOV	4	/* Receiver checksum coverage. */
+
+#endif	/* !_NETINET_UDPLITE_H_ */


Property changes on: trunk/sys/netinet/udplite.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/netinet6/dest6.c
===================================================================
--- trunk/sys/netinet6/dest6.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/dest6.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/dest6.c 196019 2009-08-01 19:26:27Z rwatson $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/dest6.c 249294 2013-04-09 07:11:22Z ae $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"

Modified: trunk/sys/netinet6/frag6.c
===================================================================
--- trunk/sys/netinet6/frag6.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/frag6.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/frag6.c 238479 2012-07-15 11:27:15Z bz $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/frag6.c 329158 2018-02-12 13:52:58Z ae $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -223,9 +223,8 @@
 	offset += sizeof(struct ip6_frag);
 
 	/*
-	 * XXX-BZ RFC XXXX (draft-gont-6man-ipv6-atomic-fragments)
-	 * Handle "atomic" fragments (offset and m bit set to 0) upfront,
-	 * unrelated to any reassembly.  Just skip the fragment header.
+	 * RFC 6946: Handle "atomic" fragments (offset and m bit set to 0)
+	 * upfront, unrelated to any reassembly.  Just skip the fragment header.
 	 */
 	if ((ip6f->ip6f_offlg & ~IP6F_RESERVED_MASK) == 0) {
 		/* XXX-BZ we want dedicated counters for this. */
@@ -535,6 +534,11 @@
 	af6 = ip6af->ip6af_down;
 	frag6_deq(ip6af);
 	while (af6 != (struct ip6asfrag *)q6) {
+		m->m_pkthdr.csum_flags &=
+		    IP6_REASS_MBUF(af6)->m_pkthdr.csum_flags;
+		m->m_pkthdr.csum_data +=
+		    IP6_REASS_MBUF(af6)->m_pkthdr.csum_data;
+
 		af6dwn = af6->ip6af_down;
 		frag6_deq(af6);
 		while (t->m_next)
@@ -545,6 +549,10 @@
 		af6 = af6dwn;
 	}
 
+	while (m->m_pkthdr.csum_data & 0xffff0000)
+		m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
+		    (m->m_pkthdr.csum_data >> 16);
+
 	/* adjust offset to point where the original next header starts */
 	offset = ip6af->ip6af_offset - sizeof(struct ip6_frag);
 	free(ip6af, M_FTABLE);
@@ -557,36 +565,23 @@
 	*q6->ip6q_nxtp = (u_char)(nxt & 0xff);
 #endif
 
-	/* Delete frag6 header */
-	if (m->m_len >= offset + sizeof(struct ip6_frag)) {
-		/* This is the only possible case with !PULLDOWN_TEST */
-		ovbcopy((caddr_t)ip6, (caddr_t)ip6 + sizeof(struct ip6_frag),
-		    offset);
-		m->m_data += sizeof(struct ip6_frag);
-		m->m_len -= sizeof(struct ip6_frag);
-	} else {
-		/* this comes with no copy if the boundary is on cluster */
-		if ((t = m_split(m, offset, M_DONTWAIT)) == NULL) {
-			frag6_remque(q6);
-			V_frag6_nfrags -= q6->ip6q_nfrag;
+	if (ip6_deletefraghdr(m, offset, M_NOWAIT) != 0) {
+		frag6_remque(q6);
+		V_frag6_nfrags -= q6->ip6q_nfrag;
 #ifdef MAC
-			mac_ip6q_destroy(q6);
+		mac_ip6q_destroy(q6);
 #endif
-			free(q6, M_FTABLE);
-			V_frag6_nfragpackets--;
-			goto dropfrag;
-		}
-		m_adj(t, sizeof(struct ip6_frag));
-		m_cat(m, t);
+		free(q6, M_FTABLE);
+		V_frag6_nfragpackets--;
+
+		goto dropfrag;
 	}
 
 	/*
 	 * Store NXT to the original.
 	 */
-	{
-		char *prvnxtp = ip6_get_prevhdr(m, offset); /* XXX */
-		*prvnxtp = nxt;
-	}
+	m_copyback(m, ip6_get_prevhdr(m, offset), sizeof(uint8_t),
+	    (caddr_t)&nxt);
 
 	frag6_remque(q6);
 	V_frag6_nfrags -= q6->ip6q_nfrag;
@@ -791,3 +786,27 @@
 	IP6Q_UNLOCK();
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
+
+int
+ip6_deletefraghdr(struct mbuf *m, int offset, int wait)
+{
+	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
+	struct mbuf *t;
+
+	/* Delete frag6 header. */
+	if (m->m_len >= offset + sizeof(struct ip6_frag)) {
+		/* This is the only possible case with !PULLDOWN_TEST. */
+		bcopy(ip6, (char *)ip6 + sizeof(struct ip6_frag),
+		    offset);
+		m->m_data += sizeof(struct ip6_frag);
+		m->m_len -= sizeof(struct ip6_frag);
+	} else {
+		/* This comes with no copy if the boundary is on cluster. */
+		if ((t = m_split(m, offset, wait)) == NULL)
+			return (ENOMEM);
+		m_adj(t, sizeof(struct ip6_frag));
+		m_cat(m, t);
+	}
+
+	return (0);
+}

Modified: trunk/sys/netinet6/icmp6.c
===================================================================
--- trunk/sys/netinet6/icmp6.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/icmp6.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -62,11 +62,10 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/icmp6.c 244524 2012-12-21 00:41:52Z delphij $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/icmp6.c 303459 2016-07-28 20:11:34Z sbruno $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
-#include "opt_ipsec.h"
 
 #include <sys/param.h>
 #include <sys/domain.h>
@@ -108,15 +107,15 @@
 #include <netinet6/nd6.h>
 #include <netinet6/send.h>
 
-#ifdef IPSEC
-#include <netipsec/ipsec.h>
-#include <netipsec/key.h>
-#endif
-
 extern struct domain inet6domain;
 
-VNET_DEFINE(struct icmp6stat, icmp6stat);
+VNET_PCPUSTAT_DEFINE(struct icmp6stat, icmp6stat);
+VNET_PCPUSTAT_SYSINIT(icmp6stat);
 
+#ifdef VIMAGE
+VNET_PCPUSTAT_SYSUNINIT(icmp6stat);
+#endif /* VIMAGE */
+
 VNET_DECLARE(struct inpcbinfo, ripcbinfo);
 VNET_DECLARE(struct inpcbhead, ripcb);
 VNET_DECLARE(int, icmp6errppslim);
@@ -131,7 +130,7 @@
 #define	V_icmp6errppslim_last		VNET(icmp6errppslim_last)
 #define	V_icmp6_nodeinfo		VNET(icmp6_nodeinfo)
 
-static void icmp6_errcount(struct icmp6errstat *, int, int);
+static void icmp6_errcount(int, int);
 static int icmp6_rip6_input(struct mbuf **, int);
 static int icmp6_ratelimit(const struct in6_addr *, const int, const int);
 static const char *icmp6_redirect_diag(struct in6_addr *,
@@ -156,42 +155,42 @@
 kmod_icmp6stat_inc(int statnum)
 {
 
-	(*((u_quad_t *)&V_icmp6stat + statnum))++;
+	counter_u64_add(VNET(icmp6stat)[statnum], 1);
 }
 
 static void
-icmp6_errcount(struct icmp6errstat *stat, int type, int code)
+icmp6_errcount(int type, int code)
 {
 	switch (type) {
 	case ICMP6_DST_UNREACH:
 		switch (code) {
 		case ICMP6_DST_UNREACH_NOROUTE:
-			stat->icp6errs_dst_unreach_noroute++;
+			ICMP6STAT_INC(icp6s_odst_unreach_noroute);
 			return;
 		case ICMP6_DST_UNREACH_ADMIN:
-			stat->icp6errs_dst_unreach_admin++;
+			ICMP6STAT_INC(icp6s_odst_unreach_admin);
 			return;
 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
-			stat->icp6errs_dst_unreach_beyondscope++;
+			ICMP6STAT_INC(icp6s_odst_unreach_beyondscope);
 			return;
 		case ICMP6_DST_UNREACH_ADDR:
-			stat->icp6errs_dst_unreach_addr++;
+			ICMP6STAT_INC(icp6s_odst_unreach_addr);
 			return;
 		case ICMP6_DST_UNREACH_NOPORT:
-			stat->icp6errs_dst_unreach_noport++;
+			ICMP6STAT_INC(icp6s_odst_unreach_noport);
 			return;
 		}
 		break;
 	case ICMP6_PACKET_TOO_BIG:
-		stat->icp6errs_packet_too_big++;
+		ICMP6STAT_INC(icp6s_opacket_too_big);
 		return;
 	case ICMP6_TIME_EXCEEDED:
 		switch (code) {
 		case ICMP6_TIME_EXCEED_TRANSIT:
-			stat->icp6errs_time_exceed_transit++;
+			ICMP6STAT_INC(icp6s_otime_exceed_transit);
 			return;
 		case ICMP6_TIME_EXCEED_REASSEMBLY:
-			stat->icp6errs_time_exceed_reassembly++;
+			ICMP6STAT_INC(icp6s_otime_exceed_reassembly);
 			return;
 		}
 		break;
@@ -198,21 +197,21 @@
 	case ICMP6_PARAM_PROB:
 		switch (code) {
 		case ICMP6_PARAMPROB_HEADER:
-			stat->icp6errs_paramprob_header++;
+			ICMP6STAT_INC(icp6s_oparamprob_header);
 			return;
 		case ICMP6_PARAMPROB_NEXTHEADER:
-			stat->icp6errs_paramprob_nextheader++;
+			ICMP6STAT_INC(icp6s_oparamprob_nextheader);
 			return;
 		case ICMP6_PARAMPROB_OPTION:
-			stat->icp6errs_paramprob_option++;
+			ICMP6STAT_INC(icp6s_oparamprob_option);
 			return;
 		}
 		break;
 	case ND_REDIRECT:
-		stat->icp6errs_redirect++;
+		ICMP6STAT_INC(icp6s_oredirect);
 		return;
 	}
-	stat->icp6errs_unknown++;
+	ICMP6STAT_INC(icp6s_ounknown);
 }
 
 /*
@@ -263,7 +262,7 @@
 	ICMP6STAT_INC(icp6s_error);
 
 	/* count per-type-code statistics */
-	icmp6_errcount(&V_icmp6stat.icp6s_outerrhist, type, code);
+	icmp6_errcount(type, code);
 
 #ifdef M_DECRYPTED	/*not openbsd*/
 	if (m->m_flags & M_DECRYPTED) {
@@ -361,9 +360,7 @@
 		m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len);
 
 	preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
-	M_PREPEND(m, preplen, M_DONTWAIT);	/* FIB is also copied over. */
-	if (m && m->m_len < preplen)
-		m = m_pullup(m, preplen);
+	M_PREPEND(m, preplen, M_NOWAIT);	/* FIB is also copied over. */
 	if (m == NULL) {
 		nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__));
 		return;
@@ -579,25 +576,18 @@
 		if ((n->m_flags & M_EXT) != 0
 		 || n->m_len < off + sizeof(struct icmp6_hdr)) {
 			struct mbuf *n0 = n;
-			const int maxlen = sizeof(*nip6) + sizeof(*nicmp6);
 			int n0len;
 
-			MGETHDR(n, M_DONTWAIT, n0->m_type);
-			n0len = n0->m_pkthdr.len;	/* save for use below */
-			if (n)
-				M_MOVE_PKTHDR(n, n0);	/* FIB copied. */
-			if (n && maxlen >= MHLEN) {
-				MCLGET(n, M_DONTWAIT);
-				if ((n->m_flags & M_EXT) == 0) {
-					m_free(n);
-					n = NULL;
-				}
-			}
+			CTASSERT(sizeof(*nip6) + sizeof(*nicmp6) <= MHLEN);
+			n = m_gethdr(M_NOWAIT, n0->m_type);
 			if (n == NULL) {
 				/* Give up remote */
 				m_freem(n0);
 				break;
 			}
+
+			m_move_pkthdr(n, n0);	/* FIB copied. */
+			n0len = n0->m_pkthdr.len;	/* save for use below */
 			/*
 			 * Copy IPv6 and ICMPv6 only.
 			 */
@@ -684,31 +674,27 @@
 		} else {
 			struct prison *pr;
 			u_char *p;
-			int maxlen, maxhlen, hlen;
+			int maxhlen, hlen;
 
 			/*
 			 * XXX: this combination of flags is pointless,
 			 * but should we keep this for compatibility?
 			 */
-			if ((V_icmp6_nodeinfo & 5) != 5)
+			if ((V_icmp6_nodeinfo & (ICMP6_NODEINFO_FQDNOK |
+			    ICMP6_NODEINFO_TMPADDROK)) !=
+			    (ICMP6_NODEINFO_FQDNOK | ICMP6_NODEINFO_TMPADDROK))
 				break;
 
 			if (code != 0)
 				goto badcode;
-			maxlen = sizeof(*nip6) + sizeof(*nicmp6) + 4;
-			if (maxlen >= MCLBYTES) {
+
+			CTASSERT(sizeof(*nip6) + sizeof(*nicmp6) + 4 <= MHLEN);
+			n = m_gethdr(M_NOWAIT, m->m_type);
+			if (n == NULL) {
 				/* Give up remote */
 				break;
 			}
-			MGETHDR(n, M_DONTWAIT, m->m_type);
-			if (n && maxlen > MHLEN) {
-				MCLGET(n, M_DONTWAIT);
-				if ((n->m_flags & M_EXT) == 0) {
-					m_free(n);
-					n = NULL;
-				}
-			}
-			if (n && !m_dup_pkthdr(n, m, M_DONTWAIT)) {
+			if (!m_dup_pkthdr(n, m, M_NOWAIT)) {
 				/*
 				 * Previous code did a blind M_COPY_PKTHDR
 				 * and said "just for rcvif".  If true, then
@@ -719,13 +705,8 @@
 				m_free(n);
 				n = NULL;
 			}
-			if (n == NULL) {
-				/* Give up remote */
-				break;
-			}
-			n->m_pkthdr.rcvif = NULL;
-			n->m_len = 0;
-			maxhlen = M_TRAILINGSPACE(n) - maxlen;
+			maxhlen = M_TRAILINGSPACE(n) -
+			    (sizeof(*nip6) + sizeof(*nicmp6) + 4);
 			pr = curthread->td_ucred->cr_prison;
 			mtx_lock(&pr->pr_mtx);
 			hlen = strlen(pr->pr_hostname);
@@ -768,7 +749,7 @@
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_router_solicit))
 			goto badlen;
-		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
+		if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
 			/* give up local */
 
 			/* Send incoming SeND packet to user space. */
@@ -806,7 +787,7 @@
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_router_advert))
 			goto badlen;
-		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
+		if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
 
 			/* Send incoming SeND-protected/ND packet to user space. */
 			if (send_sendso_input_hook != NULL) {
@@ -837,7 +818,7 @@
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_neighbor_solicit))
 			goto badlen;
-		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
+		if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
 			if (send_sendso_input_hook != NULL) {
 				error = send_sendso_input_hook(m, ifp,
 				    SND_IN, ip6len);
@@ -866,7 +847,7 @@
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_neighbor_advert))
 			goto badlen;
-		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
+		if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
 
 			/* Send incoming SeND-protected/ND packet to user space. */
 			if (send_sendso_input_hook != NULL) {
@@ -897,7 +878,7 @@
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_redirect))
 			goto badlen;
-		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
+		if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
 			if (send_sendso_input_hook != NULL) {
 				error = send_sendso_input_hook(m, ifp,
 				    SND_IN, ip6len);
@@ -1178,8 +1159,6 @@
 		ip6cp.ip6c_src = &icmp6src;
 		ip6cp.ip6c_nxt = nxt;
 
-		m_addr_changed(m);
-
 		if (icmp6type == ICMP6_PACKET_TOO_BIG) {
 			notifymtu = ntohl(icmp6->icmp6_mtu);
 			ip6cp.ip6c_cmdarg = (void *)¬ifymtu;
@@ -1243,6 +1222,7 @@
 		mtu = IPV6_MMTU - 8;
 
 	bzero(&inc, sizeof(inc));
+	inc.inc_fibnum = M_GETFIB(m);
 	inc.inc_flags |= INC_ISIPV6;
 	inc.inc6_faddr = *dst;
 	if (in6_setscope(&inc.inc6_faddr, m->m_pkthdr.rcvif, NULL))
@@ -1497,26 +1477,23 @@
 		break;
 	}
 
-	/* allocate an mbuf to reply. */
-	MGETHDR(n, M_DONTWAIT, m->m_type);
+	/* Allocate an mbuf to reply. */
+	if (replylen > MCLBYTES) {
+		/*
+		 * XXX: should we try to allocate more? But MCLBYTES
+		 * is probably much larger than IPV6_MMTU...
+		 */
+		goto bad;
+	}
+	if (replylen > MHLEN)
+		n = m_getcl(M_NOWAIT, m->m_type, M_PKTHDR);
+	else
+		n = m_gethdr(M_NOWAIT, m->m_type);
 	if (n == NULL) {
 		m_freem(m);
 		return (NULL);
 	}
-	M_MOVE_PKTHDR(n, m); /* just for recvif and FIB */
-	if (replylen > MHLEN) {
-		if (replylen > MCLBYTES) {
-			/*
-			 * XXX: should we try to allocate more? But MCLBYTES
-			 * is probably much larger than IPV6_MMTU...
-			 */
-			goto bad;
-		}
-		MCLGET(n, M_DONTWAIT);
-		if ((n->m_flags & M_EXT) == 0) {
-			goto bad;
-		}
-	}
+	m_move_pkthdr(n, m); /* just for recvif and FIB */
 	n->m_pkthdr.len = n->m_len = replylen;
 
 	/* copy mbuf header and IPv6 + Node Information base headers */
@@ -1611,16 +1588,13 @@
 	else
 		len = MCLBYTES;
 
-	/* because MAXHOSTNAMELEN is usually 256, we use cluster mbuf */
-	MGET(m, M_DONTWAIT, MT_DATA);
-	if (m && len > MLEN) {
-		MCLGET(m, M_DONTWAIT);
-		if ((m->m_flags & M_EXT) == 0)
-			goto fail;
-	}
-	if (!m)
+	/* Because MAXHOSTNAMELEN is usually 256, we use cluster mbuf. */
+	if (len > MLEN)
+		m = m_getcl(M_NOWAIT, MT_DATA, 0);
+	else
+		m = m_get(M_NOWAIT, MT_DATA);
+	if (m == NULL)
 		goto fail;
-	m->m_next = NULL;
 
 	if (old) {
 		m->m_len = len;
@@ -1953,8 +1927,8 @@
 				ltime = ND6_INFINITE_LIFETIME;
 			else {
 				if (ifa6->ia6_lifetime.ia6t_expire >
-				    time_second)
-					ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_second);
+				    time_uptime)
+					ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_uptime);
 				else
 					ltime = 0;
 			}
@@ -2066,7 +2040,7 @@
 			 */
 			if ((m->m_flags & M_EXT) && m->m_next == NULL &&
 			    m->m_len <= MHLEN) {
-				MGET(n, M_DONTWAIT, m->m_type);
+				n = m_get(M_NOWAIT, m->m_type);
 				if (n != NULL) {
 					if (m_dup_pkthdr(n, m, M_NOWAIT)) {
 						bcopy(m->m_data, n->m_data,
@@ -2116,7 +2090,7 @@
 		    m->m_len <= MHLEN) {
 			struct mbuf *n;
 
-			MGET(n, M_DONTWAIT, m->m_type);
+			n = m_get(M_NOWAIT, m->m_type);
 			if (n != NULL) {
 				if (m_dup_pkthdr(n, m, M_NOWAIT)) {
 					bcopy(m->m_data, n->m_data, m->m_len);
@@ -2299,8 +2273,6 @@
 
 	m->m_flags &= ~(M_BCAST|M_MCAST);
 
-	m_addr_changed(m);
-
 	ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL);
 	if (outif)
 		icmp6_ifoutstat_inc(outif, type, code);
@@ -2528,9 +2500,6 @@
 	sdst.sin6_len = sizeof(struct sockaddr_in6);
 	bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
 	pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst);
-#ifdef IPSEC
-	key_sa_routechange((struct sockaddr *)&sdst);
-#endif /* IPSEC */
     }
 
  freeit:
@@ -2559,7 +2528,7 @@
 	struct ifnet *outif = NULL;
 	struct sockaddr_in6 src_sa;
 
-	icmp6_errcount(&V_icmp6stat.icp6s_outerrhist, ND_REDIRECT, 0);
+	icmp6_errcount(ND_REDIRECT, 0);
 
 	/* if we are not router, we don't send icmp6 redirect */
 	if (!V_ip6_forwarding)
@@ -2597,14 +2566,10 @@
 #if IPV6_MMTU >= MCLBYTES
 # error assumption failed about IPV6_MMTU and MCLBYTES
 #endif
-	MGETHDR(m, M_DONTWAIT, MT_HEADER);
-	if (m && IPV6_MMTU >= MHLEN)
-		MCLGET(m, M_DONTWAIT);
-	if (!m)
+	m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+	if (m == NULL)
 		goto fail;
 	M_SETFIB(m, rt->rt_fibnum);
-	m->m_pkthdr.rcvif = NULL;
-	m->m_len = 0;
 	maxlen = M_TRAILINGSPACE(m);
 	maxlen = min(IPV6_MMTU, maxlen);
 	/* just for safety */

Modified: trunk/sys/netinet6/icmp6.h
===================================================================
--- trunk/sys/netinet6/icmp6.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/icmp6.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,5 +1,5 @@
 /* $MidnightBSD$ */
-/*	$FreeBSD: stable/9/sys/netinet6/icmp6.h 62587 2000-07-04 16:35:15Z itojun $	*/
+/*	$FreeBSD: stable/10/sys/netinet6/icmp6.h 62587 2000-07-04 16:35:15Z itojun $	*/
 /*	$KAME: icmp6.h,v 1.17 2000/06/11 17:23:40 jinmei Exp $	*/
 
 #error "netinet6/icmp6.h is obsolete.  use netinet/icmp6.h"

Modified: trunk/sys/netinet6/in6.c
===================================================================
--- trunk/sys/netinet6/in6.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/in6.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/in6.c 244524 2012-12-21 00:41:52Z delphij $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/in6.c 303458 2016-07-28 20:08:01Z sbruno $");
 
 #include "opt_compat.h"
 #include "opt_inet.h"
@@ -96,6 +96,7 @@
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
+#include <netinet/ip_carp.h>
 
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
@@ -106,6 +107,9 @@
 #include <netinet6/scope6_var.h>
 #include <netinet6/in6_pcb.h>
 
+VNET_DECLARE(int, icmp6_nodeinfo_oldmcprefix);
+#define V_icmp6_nodeinfo_oldmcprefix	VNET(icmp6_nodeinfo_oldmcprefix)
+
 /*
  * Definitions of some costant IP6 addresses.
  */
@@ -152,27 +156,28 @@
 
 	ia = ifa2ia6(ifa);
 	ifp = ifa->ifa_ifp;
-	IF_AFDATA_LOCK(ifp);
-	ifa->ifa_rtrequest = nd6_rtrequest;
-	ln = lla_lookup(LLTABLE6(ifp), (LLE_CREATE | LLE_IFADDR |
-	    LLE_EXCLUSIVE), (struct sockaddr *)&ia->ia_addr);
-	IF_AFDATA_UNLOCK(ifp);
-	if (ln != NULL) {
-		ln->la_expire = 0;  /* for IPv6 this means permanent */
-		ln->ln_state = ND6_LLINFO_REACHABLE;
-		/*
-		 * initialize for rtmsg generation
-		 */
-		bzero(&gateway, sizeof(gateway));
-		gateway.sdl_len = sizeof(gateway);
-		gateway.sdl_family = AF_LINK;
-		gateway.sdl_nlen = 0;
-		gateway.sdl_alen = 6;
-		memcpy(gateway.sdl_data, &ln->ll_addr.mac_aligned,
-		    sizeof(ln->ll_addr));
-		LLE_WUNLOCK(ln);
+	/*
+	 * initialize for rtmsg generation
+	 */
+	bzero(&gateway, sizeof(gateway));
+	gateway.sdl_len = sizeof(gateway);
+	gateway.sdl_family = AF_LINK;
+	if (nd6_need_cache(ifp) != 0) {
+		IF_AFDATA_LOCK(ifp);
+		ifa->ifa_rtrequest = nd6_rtrequest;
+		ln = lla_lookup(LLTABLE6(ifp), (LLE_CREATE | LLE_IFADDR |
+		    LLE_EXCLUSIVE), (struct sockaddr *)&ia->ia_addr);
+		IF_AFDATA_UNLOCK(ifp);
+		if (ln != NULL) {
+			ln->la_expire = 0;  /* for IPv6 this means permanent */
+			ln->ln_state = ND6_LLINFO_REACHABLE;
+
+			gateway.sdl_alen = 6;
+			memcpy(gateway.sdl_data, &ln->ll_addr.mac_aligned,
+			    sizeof(ln->ll_addr));
+			LLE_WUNLOCK(ln);
+		}
 	}
-
 	bzero(&rt, sizeof(rt));
 	rt.rt_gateway = (struct sockaddr *)&gateway;
 	memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask));
@@ -269,8 +274,16 @@
 	struct	in6_ifaddr *ia = NULL;
 	struct	in6_aliasreq *ifra = (struct in6_aliasreq *)data;
 	struct sockaddr_in6 *sa6;
+	int carp_attached = 0;
 	int error;
+	u_long ocmd = cmd;
 
+	/*
+	 * Compat to make pre-10.x ifconfig(8) operable.
+	 */
+	if (cmd == OSIOCAIFADDR_IN6)
+		cmd = SIOCAIFADDR_IN6;
+
 	switch (cmd) {
 	case SIOCGETSGCNT_IN6:
 	case SIOCGETMIFCNT_IN6:
@@ -282,7 +295,7 @@
 		return (mrt6_ioctl ? mrt6_ioctl(cmd, data) : EOPNOTSUPP);
 	}
 
-	switch(cmd) {
+	switch (cmd) {
 	case SIOCAADDRCTL_POLICY:
 	case SIOCDADDRCTL_POLICY:
 		if (td != NULL) {
@@ -311,8 +324,6 @@
 		/* FALLTHROUGH */
 	case OSIOCGIFINFO_IN6:
 	case SIOCGIFINFO_IN6:
-	case SIOCGDRLST_IN6:
-	case SIOCGPRLST_IN6:
 	case SIOCGNBRINFO_IN6:
 	case SIOCGDEFIFACE_IN6:
 		return (nd6_ioctl(cmd, data, ifp));
@@ -354,14 +365,10 @@
 			if (error)
 				return (error);
 		}
-		return (scope6_set(ifp,
-		    (struct scope6_id *)ifr->ifr_ifru.ifru_scope_id));
+		/* FALLTHROUGH */
 	case SIOCGSCOPE6:
-		return (scope6_get(ifp,
-		    (struct scope6_id *)ifr->ifr_ifru.ifru_scope_id));
 	case SIOCGSCOPE6DEF:
-		return (scope6_get_default((struct scope6_id *)
-		    ifr->ifr_ifru.ifru_scope_id));
+		return (scope6_ioctl(cmd, data, ifp));
 	}
 
 	switch (cmd) {
@@ -420,6 +427,18 @@
 	case SIOCGIFSTAT_ICMP6:
 		sa6 = &ifr->ifr_addr;
 		break;
+	case SIOCSIFADDR:
+	case SIOCSIFBRDADDR:
+	case SIOCSIFDSTADDR:
+	case SIOCSIFNETMASK:
+		/*
+		 * Although we should pass any non-INET6 ioctl requests
+		 * down to driver, we filter some legacy INET requests.
+		 * Drivers trust SIOCSIFADDR et al to come from an already
+		 * privileged layer, and do not perform any credentials
+		 * checks or input validation.
+		 */
+		return (EINVAL);
 	default:
 		sa6 = NULL;
 		break;
@@ -480,6 +499,13 @@
 			if (error)
 				goto out;
 		}
+		/* FALLTHROUGH */
+	case SIOCGIFSTAT_IN6:
+	case SIOCGIFSTAT_ICMP6:
+		if (ifp->if_afdata[AF_INET6] == NULL) {
+			error = EPFNOSUPPORT;
+			goto out;
+		}
 		break;
 
 	case SIOCGIFADDR_IN6:
@@ -512,12 +538,12 @@
 		/* sanity for overflow - beware unsigned */
 		lt = &ifr->ifr_ifru.ifru_lifetime;
 		if (lt->ia6t_vltime != ND6_INFINITE_LIFETIME &&
-		    lt->ia6t_vltime + time_second < time_second) {
+		    lt->ia6t_vltime + time_uptime < time_uptime) {
 			error = EINVAL;
 			goto out;
 		}
 		if (lt->ia6t_pltime != ND6_INFINITE_LIFETIME &&
-		    lt->ia6t_pltime + time_second < time_second) {
+		    lt->ia6t_pltime + time_uptime < time_uptime) {
 			error = EINVAL;
 			goto out;
 		}
@@ -555,25 +581,17 @@
 		break;
 
 	case SIOCGIFSTAT_IN6:
-		if (ifp == NULL) {
-			error = EINVAL;
-			goto out;
-		}
-		bzero(&ifr->ifr_ifru.ifru_stat,
-		    sizeof(ifr->ifr_ifru.ifru_stat));
-		ifr->ifr_ifru.ifru_stat =
-		    *((struct in6_ifextra *)ifp->if_afdata[AF_INET6])->in6_ifstat;
+		COUNTER_ARRAY_COPY(((struct in6_ifextra *)
+		    ifp->if_afdata[AF_INET6])->in6_ifstat,
+		    &ifr->ifr_ifru.ifru_stat,
+		    sizeof(struct in6_ifstat) / sizeof(uint64_t));
 		break;
 
 	case SIOCGIFSTAT_ICMP6:
-		if (ifp == NULL) {
-			error = EINVAL;
-			goto out;
-		}
-		bzero(&ifr->ifr_ifru.ifru_icmp6stat,
-		    sizeof(ifr->ifr_ifru.ifru_icmp6stat));
-		ifr->ifr_ifru.ifru_icmp6stat =
-		    *((struct in6_ifextra *)ifp->if_afdata[AF_INET6])->icmp6_ifstat;
+		COUNTER_ARRAY_COPY(((struct in6_ifextra *)
+		    ifp->if_afdata[AF_INET6])->icmp6_ifstat,
+		    &ifr->ifr_ifru.ifru_icmp6stat,
+		    sizeof(struct icmp6_ifstat) / sizeof(uint64_t));
 		break;
 
 	case SIOCGIFALIFETIME_IN6:
@@ -621,12 +639,12 @@
 		/* for sanity */
 		if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
 			ia->ia6_lifetime.ia6t_expire =
-				time_second + ia->ia6_lifetime.ia6t_vltime;
+				time_uptime + ia->ia6_lifetime.ia6t_vltime;
 		} else
 			ia->ia6_lifetime.ia6t_expire = 0;
 		if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
 			ia->ia6_lifetime.ia6t_preferred =
-				time_second + ia->ia6_lifetime.ia6t_pltime;
+				time_uptime + ia->ia6_lifetime.ia6t_pltime;
 		} else
 			ia->ia6_lifetime.ia6t_preferred = 0;
 		break;
@@ -654,6 +672,18 @@
 			break;
 		}
 
+		if (cmd == ocmd && ifra->ifra_vhid > 0) {
+			if (carp_attach_p != NULL)
+				error = (*carp_attach_p)(&ia->ia_ifa,
+				    ifra->ifra_vhid);
+			else
+				error = EPROTONOSUPPORT;
+			if (error)
+				goto out;
+			else
+				carp_attached = 1;
+		}
+
 		/*
 		 * then, make the prefix on-link on the interface.
 		 * XXX: we'd rather create the prefix before the address, but
@@ -670,7 +700,8 @@
 		pr0.ndpr_plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr,
 		    NULL);
 		if (pr0.ndpr_plen == 128) {
-			break;	/* we don't need to install a host route. */
+			/* we don't need to install a host route. */
+			goto aifaddr_out;
 		}
 		pr0.ndpr_prefix = ifra->ifra_addr;
 		/* apply the mask for safety. */
@@ -697,13 +728,10 @@
 			 * nd6_prelist_add will install the corresponding
 			 * interface route.
 			 */
-			if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0)
+			if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0) {
+				if (carp_attached)
+					(*carp_detach_p)(&ia->ia_ifa);
 				goto out;
-			if (pr == NULL) {
-				log(LOG_ERR, "nd6_prelist_add succeeded but "
-				    "no prefix\n");
-				error = EINVAL;
-				goto out;
 			}
 		}
 
@@ -733,32 +761,29 @@
 		 * that is, this address might make other addresses detached.
 		 */
 		pfxlist_onlink_check();
-		if (error == 0 && ia) {
-			if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) {
-				/*
-				 * Try to clear the flag when a new
-				 * IPv6 address is added onto an
-				 * IFDISABLED interface and it
-				 * succeeds.
-				 */
-				struct in6_ndireq nd;
+aifaddr_out:
+		if (error != 0 || ia == NULL)
+			break;
+		/*
+		 * Try to clear the flag when a new IPv6 address is added
+		 * onto an IFDISABLED interface and it succeeds.
+		 */
+		if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) {
+			struct in6_ndireq nd;
 
-				memset(&nd, 0, sizeof(nd));
-				nd.ndi.flags = ND_IFINFO(ifp)->flags;
-				nd.ndi.flags &= ~ND6_IFF_IFDISABLED;
-				if (nd6_ioctl(SIOCSIFINFO_FLAGS,
-				    (caddr_t)&nd, ifp) < 0)
-					log(LOG_NOTICE, "SIOCAIFADDR_IN6: "
-					    "SIOCSIFINFO_FLAGS for -ifdisabled "
-					    "failed.");
-				/*
-				 * Ignore failure of clearing the flag
-				 * intentionally.  The failure means
-				 * address duplication was detected.
-				 */
-			}
-			EVENTHANDLER_INVOKE(ifaddr_event, ifp);
+			memset(&nd, 0, sizeof(nd));
+			nd.ndi.flags = ND_IFINFO(ifp)->flags;
+			nd.ndi.flags &= ~ND6_IFF_IFDISABLED;
+			if (nd6_ioctl(SIOCSIFINFO_FLAGS, (caddr_t)&nd, ifp) < 0)
+				log(LOG_NOTICE, "SIOCAIFADDR_IN6: "
+				    "SIOCSIFINFO_FLAGS for -ifdisabled "
+				    "failed.");
+			/*
+			 * Ignore failure of clearing the flag intentionally.
+			 * The failure means address duplication was detected.
+			 */
 		}
+		EVENTHANDLER_INVOKE(ifaddr_event, ifp);
 		break;
 	}
 
@@ -785,7 +810,7 @@
 	}
 
 	default:
-		if (ifp == NULL || ifp->if_ioctl == 0) {
+		if (ifp->if_ioctl == NULL) {
 			error = EOPNOTSUPP;
 			goto out;
 		}
@@ -922,6 +947,17 @@
 		else
 			LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
 	}
+	if (V_icmp6_nodeinfo_oldmcprefix && 
+	     in6_nigroup_oldmcprefix(ifp, NULL, -1, &mltaddr.sin6_addr) == 0) {
+		imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, delay);
+		if (imm == NULL)
+			nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s "
+			    "(errno=%d)\n", __func__, ip6_sprintf(ip6buf,
+			    &mltaddr.sin6_addr), if_name(ifp), error));
+			/* XXX not very fatal, go on... */
+		else
+			LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
+	}
 
 	/*
 	 * Join interface-local all-nodes address.
@@ -968,7 +1004,6 @@
  * Update parameters of an IPv6 interface address.
  * If necessary, a new entry is created and linked into address chains.
  * This function is separated from in6_control().
- * XXX: should this be performed under splnet()?
  */
 int
 in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
@@ -1102,7 +1137,7 @@
 		ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
 		ia->ia_addr.sin6_family = AF_INET6;
 		ia->ia_addr.sin6_len = sizeof(ia->ia_addr);
-		ia->ia6_createtime = time_second;
+		ia->ia6_createtime = time_uptime;
 		if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) != 0) {
 			/*
 			 * XXX: some functions expect that ifa_dstaddr is not
@@ -1123,11 +1158,13 @@
 		ifa_ref(&ia->ia_ifa);			/* in6_ifaddrhead */
 		IN6_IFADDR_WLOCK();
 		TAILQ_INSERT_TAIL(&V_in6_ifaddrhead, ia, ia_link);
+		LIST_INSERT_HEAD(IN6ADDR_HASH(&ifra->ifra_addr.sin6_addr),
+		    ia, ia6_hash);
 		IN6_IFADDR_WUNLOCK();
 	}
 
 	/* update timestamp */
-	ia->ia6_updatetime = time_second;
+	ia->ia6_updatetime = time_uptime;
 
 	/* set prefix mask */
 	if (ifra->ifra_prefixmask.sin6_len) {
@@ -1146,6 +1183,7 @@
 			goto unlink;
 		}
 		ia->ia_prefixmask = ifra->ifra_prefixmask;
+		ia->ia_prefixmask.sin6_family = AF_INET6;
 	}
 
 	/*
@@ -1176,12 +1214,12 @@
 	ia->ia6_lifetime = ifra->ifra_lifetime;
 	if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
 		ia->ia6_lifetime.ia6t_expire =
-		    time_second + ia->ia6_lifetime.ia6t_vltime;
+		    time_uptime + ia->ia6_lifetime.ia6t_vltime;
 	} else
 		ia->ia6_lifetime.ia6t_expire = 0;
 	if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
 		ia->ia6_lifetime.ia6t_preferred =
-		    time_second + ia->ia6_lifetime.ia6t_pltime;
+		    time_uptime + ia->ia6_lifetime.ia6t_pltime;
 	} else
 		ia->ia6_lifetime.ia6t_preferred = 0;
 
@@ -1199,7 +1237,7 @@
 	 */
 	if ((ifra->ifra_flags & IN6_IFF_DEPRECATED) != 0) {
 		ia->ia6_lifetime.ia6t_pltime = 0;
-		ia->ia6_lifetime.ia6t_preferred = time_second;
+		ia->ia6_lifetime.ia6t_preferred = time_uptime;
 	}
 	/*
 	 * Make the address tentative before joining multicast addresses,
@@ -1207,11 +1245,13 @@
 	 * source address.
 	 */
 	ia->ia6_flags &= ~IN6_IFF_DUPLICATED;	/* safety */
-	if (hostIsNew && in6if_do_dad(ifp))
-		ia->ia6_flags |= IN6_IFF_TENTATIVE;
 
-	/* DAD should be performed after ND6_IFF_IFDISABLED is cleared. */
-	if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)
+	/*
+	 * DAD should be performed for an new address or addresses on
+	 * an interface with ND6_IFF_IFDISABLED.
+	 */
+	if (in6if_do_dad(ifp) &&
+	    (hostIsNew || (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)))
 		ia->ia6_flags |= IN6_IFF_TENTATIVE;
 
 	/*
@@ -1233,13 +1273,8 @@
 			goto cleanup;
 	}
 
-	/*
-	 * Perform DAD, if needed.
-	 * XXX It may be of use, if we can administratively disable DAD.
-	 */
-	if (in6if_do_dad(ifp) && ((ifra->ifra_flags & IN6_IFF_NODAD) == 0) &&
-	    (ia->ia6_flags & IN6_IFF_TENTATIVE))
-	{
+	/* Perform DAD, if the address is TENTATIVE. */
+	if ((ia->ia6_flags & IN6_IFF_TENTATIVE)) {
 		int mindelay, maxdelay;
 
 		delay = 0;
@@ -1434,6 +1469,9 @@
 	int plen, error;
 	struct ifaddr *ifa0;
 
+	if (ifa->ifa_carp)
+		(*carp_detach_p)(ifa);
+
 	/*
 	 * find another IPv6 address as the gateway for the
 	 * link-local and node-local all-nodes multicast
@@ -1492,7 +1530,7 @@
 static void
 in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp)
 {
-	int	s = splnet();
+	char ip6buf[INET6_ADDRSTRLEN];
 
 	IF_ADDR_WLOCK(ifp);
 	TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
@@ -1506,6 +1544,7 @@
 	 */
 	IN6_IFADDR_WLOCK();
 	TAILQ_REMOVE(&V_in6_ifaddrhead, ia, ia_link);
+	LIST_REMOVE(ia, ia6_hash);
 	IN6_IFADDR_WUNLOCK();
 
 	/*
@@ -1515,7 +1554,7 @@
 	if (ia->ia6_ndpr == NULL) {
 		nd6log((LOG_NOTICE,
 		    "in6_unlink_ifa: autoconf'ed address "
-		    "%p has no prefix\n", ia));
+		    "%s has no prefix\n", ip6_sprintf(ip6buf, IA6_IN6(ia))));
 	} else {
 		ia->ia6_ndpr->ndpr_refcnt--;
 		ia->ia6_ndpr = NULL;
@@ -1530,7 +1569,6 @@
 		pfxlist_onlink_check();
 	}
 	ifa_free(&ia->ia_ifa);			/* in6_ifaddrhead */
-	splx(s);
 }
 
 void
@@ -1569,6 +1607,10 @@
  * in the future.
  * RFC2373 defines interface id to be 64bit, but it allows non-RFC2374
  * address encoding scheme. (see figure on page 8)
+ * Notifies other subsystems about address change/arrival:
+ * 1) Notifies device handler on the first IPv6 address assignment
+ * 2) Handle routing table changes for P2P links and route
+ * 3) Handle routing table changes for address host route
  */
 static int
 in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
@@ -1826,7 +1868,6 @@
     struct sockaddr_in6 *sin6, int newhost)
 {
 	int	error = 0, plen, ifacount = 0;
-	int	s = splimp();
 	struct ifaddr *ifa;
 
 	/*
@@ -1846,12 +1887,9 @@
 
 	if (ifacount <= 1 && ifp->if_ioctl) {
 		error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia);
-		if (error) {
-			splx(s);
+		if (error)
 			return (error);
-		}
 	}
-	splx(s);
 
 	ia->ia_ifa.ifa_metric = ifp->if_metric;
 
@@ -1948,6 +1986,32 @@
 }
 
 /*
+ * Find a link-local scoped address on ifp and return it if any.
+ */
+struct in6_ifaddr *
+in6ifa_llaonifp(struct ifnet *ifp)
+{
+	struct sockaddr_in6 *sin6;
+	struct ifaddr *ifa;
+
+	if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)
+		return (NULL);
+	IF_ADDR_RLOCK(ifp);
+	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+		if (ifa->ifa_addr->sa_family != AF_INET6)
+			continue;
+		sin6 = (struct sockaddr_in6 *)ifa->ifa_addr;
+		if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) ||
+		    IN6_IS_ADDR_MC_INTFACELOCAL(&sin6->sin6_addr) ||
+		    IN6_IS_ADDR_MC_NODELOCAL(&sin6->sin6_addr))
+			break;
+	}
+	IF_ADDR_RUNLOCK(ifp);
+
+	return ((struct in6_ifaddr *)ifa);
+}
+
+/*
  * Convert IP6 address to printable (loggable) representation. Caller
  * has to make sure that ip6buf is at least INET6_ADDRSTRLEN long.
  */
@@ -2060,7 +2124,7 @@
 	struct in6_ifaddr *ia;
 
 	IN6_IFADDR_RLOCK();
-	TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
+	LIST_FOREACH(ia, IN6ADDR_HASH(in6), ia6_hash) {
 		if (IN6_ARE_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr)) {
 			IN6_IFADDR_RUNLOCK();
 			return (1);
@@ -2070,7 +2134,6 @@
 	return (0);
 }
 
-
 int
 in6_is_addr_deprecated(struct sockaddr_in6 *sa6)
 {
@@ -2077,15 +2140,14 @@
 	struct in6_ifaddr *ia;
 
 	IN6_IFADDR_RLOCK();
-	TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
-		if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
-		    &sa6->sin6_addr) &&
-		    (ia->ia6_flags & IN6_IFF_DEPRECATED) != 0) {
-			IN6_IFADDR_RUNLOCK();
-			return (1); /* true */
+	LIST_FOREACH(ia, IN6ADDR_HASH(&sa6->sin6_addr), ia6_hash) {
+		if (IN6_ARE_ADDR_EQUAL(IA6_IN6(ia), &sa6->sin6_addr)) {
+			if (ia->ia6_flags & IN6_IFF_DEPRECATED) {
+				IN6_IFADDR_RUNLOCK();
+				return (1); /* true */
+			}
+			break;
 		}
-
-		/* XXX: do we still have to go thru the rest of the list? */
 	}
 	IN6_IFADDR_RUNLOCK();
 
@@ -2297,7 +2359,8 @@
 	if ((ifp->if_flags & IFF_LOOPBACK) != 0)
 		return (0);
 
-	if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)
+	if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) ||
+	    (ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD))
 		return (0);
 
 	switch (ifp->if_type) {
@@ -2318,13 +2381,13 @@
 		 * However, some interfaces can be up before the RUNNING
 		 * status.  Additionaly, users may try to assign addresses
 		 * before the interface becomes up (or running).
-		 * We simply skip DAD in such a case as a work around.
-		 * XXX: we should rather mark "tentative" on such addresses,
-		 * and do DAD after the interface becomes ready.
+		 * This function returns EAGAIN in that case.
+		 * The caller should mark "tentative" on the address instead of
+		 * performing DAD immediately.
 		 */
 		if (!((ifp->if_flags & IFF_UP) &&
 		    (ifp->if_drv_flags & IFF_DRV_RUNNING)))
-			return (0);
+			return (EAGAIN);
 
 		return (1);
 	}
@@ -2379,6 +2442,7 @@
 #ifdef IFT_MIP
 	case IFT_MIP:	/* ditto */
 #endif
+	case IFT_BRIDGE:	/* bridge(4) only does Ethernet-like links */
 	case IFT_INFINIBAND:
 		return (64);
 	case IFT_FDDI:		/* RFC2467 */
@@ -2448,8 +2512,7 @@
 	lle->base.lle_refcnt = 1;
 	lle->base.lle_free = in6_lltable_free;
 	LLE_LOCK_INIT(&lle->base);
-	callout_init_rw(&lle->base.ln_timer_ch, &lle->base.lle_lock,
-	    CALLOUT_RETURNUNLOCKED);
+	callout_init(&lle->base.ln_timer_ch, 1);
 
 	return (&lle->base);
 }
@@ -2552,6 +2615,7 @@
 	if (lle == NULL) {
 		if (!(flags & LLE_CREATE))
 			return (NULL);
+		IF_AFDATA_WLOCK_ASSERT(ifp);
 		/*
 		 * A route that covers the given address must have
 		 * been installed 1st because we are doing a resolution,
@@ -2580,6 +2644,7 @@
 		if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) {
 			LLE_WLOCK(lle);
 			lle->la_flags |= LLE_DELETED;
+			EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED);
 #ifdef DIAGNOSTIC
 			log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
 #endif
@@ -2650,6 +2715,8 @@
 			ndpc.sin6.sin6_family = AF_INET6;
 			ndpc.sin6.sin6_len = sizeof(ndpc.sin6);
 			bcopy(L3_ADDR(lle), &ndpc.sin6, L3_ADDR_LEN(lle));
+			if (V_deembed_scopeid)
+				sa6_recoverscope(&ndpc.sin6);
 
 			/* publish */
 			if (lle->la_flags & LLE_PUB)
@@ -2681,17 +2748,26 @@
 {
 	struct in6_ifextra *ext;
 
+	/* There are not IPv6-capable interfaces. */
+	switch (ifp->if_type) {
+	case IFT_PFLOG:
+	case IFT_PFSYNC:
+	case IFT_USB:
+		return (NULL);
+	}
 	ext = (struct in6_ifextra *)malloc(sizeof(*ext), M_IFADDR, M_WAITOK);
 	bzero(ext, sizeof(*ext));
 
-	ext->in6_ifstat = (struct in6_ifstat *)malloc(sizeof(struct in6_ifstat),
-	    M_IFADDR, M_WAITOK);
-	bzero(ext->in6_ifstat, sizeof(*ext->in6_ifstat));
+	ext->in6_ifstat = malloc(sizeof(counter_u64_t) *
+	    sizeof(struct in6_ifstat) / sizeof(uint64_t), M_IFADDR, M_WAITOK);
+	COUNTER_ARRAY_ALLOC(ext->in6_ifstat,
+	    sizeof(struct in6_ifstat) / sizeof(uint64_t), M_WAITOK);
 
-	ext->icmp6_ifstat =
-	    (struct icmp6_ifstat *)malloc(sizeof(struct icmp6_ifstat),
-	    M_IFADDR, M_WAITOK);
-	bzero(ext->icmp6_ifstat, sizeof(*ext->icmp6_ifstat));
+	ext->icmp6_ifstat = malloc(sizeof(counter_u64_t) *
+	    sizeof(struct icmp6_ifstat) / sizeof(uint64_t), M_IFADDR,
+	    M_WAITOK);
+	COUNTER_ARRAY_ALLOC(ext->icmp6_ifstat,
+	    sizeof(struct icmp6_ifstat) / sizeof(uint64_t), M_WAITOK);
 
 	ext->nd_ifinfo = nd6_ifattach(ifp);
 	ext->scope6_id = scope6_ifattach(ifp);
@@ -2716,7 +2792,11 @@
 	scope6_ifdetach(ext->scope6_id);
 	nd6_ifdetach(ext->nd_ifinfo);
 	lltable_free(ext->lltable);
+	COUNTER_ARRAY_FREE(ext->in6_ifstat,
+	    sizeof(struct in6_ifstat) / sizeof(uint64_t));
 	free(ext->in6_ifstat, M_IFADDR);
+	COUNTER_ARRAY_FREE(ext->icmp6_ifstat,
+	    sizeof(struct icmp6_ifstat) / sizeof(uint64_t));
 	free(ext->icmp6_ifstat, M_IFADDR);
 	free(ext, M_IFADDR);
 }

Modified: trunk/sys/netinet6/in6.h
===================================================================
--- trunk/sys/netinet6/in6.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/in6.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -59,7 +59,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)in.h	8.3 (Berkeley) 1/3/94
- * $FreeBSD: stable/9/sys/netinet6/in6.h 244524 2012-12-21 00:41:52Z delphij $
+ * $FreeBSD: stable/10/sys/netinet6/in6.h 272662 2014-10-06 17:08:19Z tuexen $
  */
 
 #ifndef __KAME_NETINET_IN_H_INCLUDED_
@@ -362,11 +362,11 @@
 
 #define IFA6_IS_DEPRECATED(a) \
 	((a)->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME && \
-	 (u_int32_t)((time_second - (a)->ia6_updatetime)) > \
+	 (u_int32_t)((time_uptime - (a)->ia6_updatetime)) > \
 	 (a)->ia6_lifetime.ia6t_pltime)
 #define IFA6_IS_INVALID(a) \
 	((a)->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME && \
-	 (u_int32_t)((time_second - (a)->ia6_updatetime)) > \
+	 (u_int32_t)((time_uptime - (a)->ia6_updatetime)) > \
 	 (a)->ia6_lifetime.ia6t_vltime)
 #endif /* _KERNEL */
 
@@ -623,13 +623,18 @@
 #endif /* __BSD_VISIBLE */
 
 /*
- * Redefinition of mbuf flags
+ * Since both netinet/ and netinet6/ call into netipsec/ and netpfil/,
+ * the protocol specific mbuf flags are shared between them.
  */
-#define	M_AUTHIPHDR	M_PROTO2
-#define	M_DECRYPTED	M_PROTO3
-#define	M_LOOP		M_PROTO4
-#define	M_AUTHIPDGM	M_PROTO5
-#define	M_RTALERT_MLD	M_PROTO6
+#define	M_FASTFWD_OURS		M_PROTO1	/* changed dst to local */
+#define	M_IP6_NEXTHOP		M_PROTO2	/* explicit ip nexthop */
+#define	M_IP_NEXTHOP		M_PROTO2	/* explicit ip nexthop */
+#define	M_SKIP_FIREWALL		M_PROTO3	/* skip firewall processing */
+#define	M_AUTHIPHDR		M_PROTO4
+#define	M_DECRYPTED		M_PROTO5
+#define	M_LOOP			M_PROTO6
+#define	M_AUTHIPDGM		M_PROTO7
+#define	M_RTALERT_MLD		M_PROTO8
 
 #ifdef _KERNEL
 struct cmsghdr;
@@ -637,6 +642,8 @@
 
 int	in6_cksum_pseudo(struct ip6_hdr *, uint32_t, uint8_t, uint16_t);
 int	in6_cksum(struct mbuf *, u_int8_t, u_int32_t, u_int32_t);
+int	in6_cksum_partial(struct mbuf *, u_int8_t, u_int32_t, u_int32_t,
+			  u_int32_t);
 int	in6_localaddr(struct in6_addr *);
 int	in6_localip(struct in6_addr *);
 int	in6_addrscope(struct in6_addr *);

Modified: trunk/sys/netinet6/in6_cksum.c
===================================================================
--- trunk/sys/netinet6/in6_cksum.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/in6_cksum.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/in6_cksum.c 238227 2012-07-08 10:29:01Z bz $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/in6_cksum.c 272662 2014-10-06 17:08:19Z tuexen $");
 
 #include <sys/param.h>
 #include <sys/mbuf.h>
@@ -146,9 +146,11 @@
  * off is an offset where TCP/UDP/ICMP6 header starts.
  * len is a total length of a transport segment.
  * (e.g. TCP header + TCP payload)
+ * cov is the number of bytes to be taken into account for the checksum
  */
 int
-in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len)
+in6_cksum_partial(struct mbuf *m, u_int8_t nxt, u_int32_t off,
+    u_int32_t len, u_int32_t cov)
 {
 	struct ip6_hdr *ip6;
 	u_int16_t *w, scope;
@@ -216,9 +218,9 @@
 	}
 	w = (u_int16_t *)(mtod(m, u_char *) + off);
 	mlen = m->m_len - off;
-	if (len < mlen)
-		mlen = len;
-	len -= mlen;
+	if (cov < mlen)
+		mlen = cov;
+	cov -= mlen;
 	/*
 	 * Force to even boundary.
 	 */
@@ -274,7 +276,7 @@
 	 * Lastly calculate a summary of the rest of mbufs.
 	 */
 
-	for (;m && len; m = m->m_next) {
+	for (;m && cov; m = m->m_next) {
 		if (m->m_len == 0)
 			continue;
 		w = mtod(m, u_int16_t *);
@@ -291,12 +293,12 @@
 			sum += s_util.s;
 			w = (u_int16_t *)((char *)w + 1);
 			mlen = m->m_len - 1;
-			len--;
+			cov--;
 		} else
 			mlen = m->m_len;
-		if (len < mlen)
-			mlen = len;
-		len -= mlen;
+		if (cov < mlen)
+			mlen = cov;
+		cov -= mlen;
 		/*
 		 * Force to even boundary.
 		 */
@@ -344,7 +346,7 @@
 		} else if (mlen == -1)
 			s_util.c[0] = *(char *)w;
 	}
-	if (len)
+	if (cov)
 		panic("in6_cksum: out of data");
 	if (mlen == -1) {
 		/* The last mbuf has odd # of bytes. Follow the
@@ -356,3 +358,9 @@
 	REDUCE;
 	return (~sum & 0xffff);
 }
+
+int
+in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len)
+{
+	return (in6_cksum_partial(m, nxt, off, len, len));
+}

Modified: trunk/sys/netinet6/in6_gif.c
===================================================================
--- trunk/sys/netinet6/in6_gif.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/in6_gif.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,12 +31,14 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/in6_gif.c 232292 2012-02-29 09:47:26Z bz $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/in6_gif.c 284072 2015-06-06 13:26:13Z ae $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
 #include <sys/systm.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
@@ -50,7 +52,9 @@
 #include <sys/malloc.h>
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/route.h>
+#include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
@@ -61,7 +65,6 @@
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
-#include <netinet6/in6_gif.h>
 #include <netinet6/in6_var.h>
 #endif
 #include <netinet6/ip6protosw.h>
@@ -72,7 +75,8 @@
 
 #include <net/if_gif.h>
 
-VNET_DEFINE(int, ip6_gif_hlim) = GIF_HLIM;
+#define GIF_HLIM	30
+static VNET_DEFINE(int, ip6_gif_hlim) = GIF_HLIM;
 #define	V_ip6_gif_hlim			VNET(ip6_gif_hlim)
 
 SYSCTL_DECL(_net_inet6_ip6);
@@ -81,9 +85,10 @@
 
 static int gif_validate6(const struct ip6_hdr *, struct gif_softc *,
 			 struct ifnet *);
+static int in6_gif_input(struct mbuf **, int *, int);
 
 extern  struct domain inet6domain;
-struct ip6protosw in6_gif_protosw = {
+static struct ip6protosw in6_gif_protosw = {
 	.pr_type =	SOCK_RAW,
 	.pr_domain =	&inet6domain,
 	.pr_protocol =	0,			/* IPPROTO_IPV[46] */
@@ -95,112 +100,24 @@
 };
 
 int
-in6_gif_output(struct ifnet *ifp,
-    int family,			/* family of the packet to be encapsulate */
-    struct mbuf *m)
+in6_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn)
 {
+	GIF_RLOCK_TRACKER;
 	struct gif_softc *sc = ifp->if_softc;
-	struct sockaddr_in6 *dst = (struct sockaddr_in6 *)&sc->gif_ro6.ro_dst;
-	struct sockaddr_in6 *sin6_src = (struct sockaddr_in6 *)sc->gif_psrc;
-	struct sockaddr_in6 *sin6_dst = (struct sockaddr_in6 *)sc->gif_pdst;
 	struct ip6_hdr *ip6;
-	struct etherip_header eiphdr;
-	int error, len, proto;
-	u_int8_t itos, otos;
+	int len;
 
-	GIF_LOCK_ASSERT(sc);
-
-	if (sin6_src == NULL || sin6_dst == NULL ||
-	    sin6_src->sin6_family != AF_INET6 ||
-	    sin6_dst->sin6_family != AF_INET6) {
-		m_freem(m);
-		return EAFNOSUPPORT;
-	}
-
-	switch (family) {
-#ifdef INET
-	case AF_INET:
-	    {
-		struct ip *ip;
-
-		proto = IPPROTO_IPV4;
-		if (m->m_len < sizeof(*ip)) {
-			m = m_pullup(m, sizeof(*ip));
-			if (!m)
-				return ENOBUFS;
-		}
-		ip = mtod(m, struct ip *);
-		itos = ip->ip_tos;
-		break;
-	    }
-#endif
-#ifdef INET6
-	case AF_INET6:
-	    {
-		struct ip6_hdr *ip6;
-		proto = IPPROTO_IPV6;
-		if (m->m_len < sizeof(*ip6)) {
-			m = m_pullup(m, sizeof(*ip6));
-			if (!m)
-				return ENOBUFS;
-		}
-		ip6 = mtod(m, struct ip6_hdr *);
-		itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
-		break;
-	    }
-#endif
-	case AF_LINK:
-		proto = IPPROTO_ETHERIP;
-
-		/*
-		 * GIF_SEND_REVETHIP (disabled by default) intentionally
-		 * sends an EtherIP packet with revered version field in
-		 * the header.  This is a knob for backward compatibility
-		 * with FreeBSD 7.2R or prior.
-		 */
-		if ((sc->gif_options & GIF_SEND_REVETHIP)) {
-			eiphdr.eip_ver = 0;
-			eiphdr.eip_resvl = ETHERIP_VERSION;
-			eiphdr.eip_resvh = 0;
-		} else {
-			eiphdr.eip_ver = ETHERIP_VERSION;
-			eiphdr.eip_resvl = 0;
-			eiphdr.eip_resvh = 0;
-		}
-		/* prepend Ethernet-in-IP header */
-		M_PREPEND(m, sizeof(struct etherip_header), M_DONTWAIT);
-		if (m && m->m_len < sizeof(struct etherip_header))
-			m = m_pullup(m, sizeof(struct etherip_header));
-		if (m == NULL)
-			return ENOBUFS;
-		bcopy(&eiphdr, mtod(m, struct etherip_header *),
-		    sizeof(struct etherip_header));
-		break;
-
-	default:
-#ifdef DEBUG
-		printf("in6_gif_output: warning: unknown family %d passed\n",
-			family);
-#endif
-		m_freem(m);
-		return EAFNOSUPPORT;
-	}
-
 	/* prepend new IP header */
 	len = sizeof(struct ip6_hdr);
 #ifndef __NO_STRICT_ALIGNMENT
-	if (family == AF_LINK)
+	if (proto == IPPROTO_ETHERIP)
 		len += ETHERIP_ALIGN;
 #endif
-	M_PREPEND(m, len, M_DONTWAIT);
-	if (m != NULL && m->m_len < len)
-		m = m_pullup(m, len);
-	if (m == NULL) {
-		printf("ENOBUFS in in6_gif_output %d\n", __LINE__);
-		return ENOBUFS;
-	}
+	M_PREPEND(m, len, M_NOWAIT);
+	if (m == NULL)
+		return (ENOBUFS);
 #ifndef __NO_STRICT_ALIGNMENT
-	if (family == AF_LINK) {
+	if (proto == IPPROTO_ETHERIP) {
 		len = mtod(m, vm_offset_t) & 3;
 		KASSERT(len == 0 || len == ETHERIP_ALIGN,
 		    ("in6_gif_output: unexpected misalignment"));
@@ -210,167 +127,52 @@
 #endif
 
 	ip6 = mtod(m, struct ip6_hdr *);
-	ip6->ip6_flow	= 0;
-	ip6->ip6_vfc	&= ~IPV6_VERSION_MASK;
-	ip6->ip6_vfc	|= IPV6_VERSION;
-	ip6->ip6_plen	= htons((u_short)m->m_pkthdr.len);
-	ip6->ip6_nxt	= proto;
-	ip6->ip6_hlim	= V_ip6_gif_hlim;
-	ip6->ip6_src	= sin6_src->sin6_addr;
-	/* bidirectional configured tunnel mode */
-	if (!IN6_IS_ADDR_UNSPECIFIED(&sin6_dst->sin6_addr))
-		ip6->ip6_dst = sin6_dst->sin6_addr;
-	else  {
+	GIF_RLOCK(sc);
+	if (sc->gif_family != AF_INET6) {
 		m_freem(m);
-		return ENETUNREACH;
+		GIF_RUNLOCK(sc);
+		return (ENETDOWN);
 	}
-	ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED : ECN_NOCARE,
-		       &otos, &itos);
-	ip6->ip6_flow &= ~htonl(0xff << 20);
-	ip6->ip6_flow |= htonl((u_int32_t)otos << 20);
+	bcopy(sc->gif_ip6hdr, ip6, sizeof(struct ip6_hdr));
+	GIF_RUNLOCK(sc);
 
-	M_SETFIB(m, sc->gif_fibnum);
-
-	if (dst->sin6_family != sin6_dst->sin6_family ||
-	     !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &sin6_dst->sin6_addr)) {
-		/* cache route doesn't match */
-		bzero(dst, sizeof(*dst));
-		dst->sin6_family = sin6_dst->sin6_family;
-		dst->sin6_len = sizeof(struct sockaddr_in6);
-		dst->sin6_addr = sin6_dst->sin6_addr;
-		if (sc->gif_ro6.ro_rt) {
-			RTFREE(sc->gif_ro6.ro_rt);
-			sc->gif_ro6.ro_rt = NULL;
-		}
-#if 0
-		GIF2IFP(sc)->if_mtu = GIF_MTU;
-#endif
-	}
-
-	if (sc->gif_ro6.ro_rt == NULL) {
-		in6_rtalloc(&sc->gif_ro6, sc->gif_fibnum);
-		if (sc->gif_ro6.ro_rt == NULL) {
-			m_freem(m);
-			return ENETUNREACH;
-		}
-
-		/* if it constitutes infinite encapsulation, punt. */
-		if (sc->gif_ro.ro_rt->rt_ifp == ifp) {
-			m_freem(m);
-			return ENETUNREACH;	/*XXX*/
-		}
-#if 0
-		ifp->if_mtu = sc->gif_ro6.ro_rt->rt_ifp->if_mtu
-			- sizeof(struct ip6_hdr);
-#endif
-	}
-
-	m_addr_changed(m);
-
-#ifdef IPV6_MINMTU
+	ip6->ip6_flow  |= htonl((uint32_t)ecn << 20);
+	ip6->ip6_nxt	= proto;
+	ip6->ip6_hlim	= V_ip6_gif_hlim;
 	/*
 	 * force fragmentation to minimum MTU, to avoid path MTU discovery.
 	 * it is too painful to ask for resend of inner packet, to achieve
 	 * path MTU discovery for encapsulated packets.
 	 */
-	error = ip6_output(m, 0, &sc->gif_ro6, IPV6_MINMTU, 0, NULL, NULL);
-#else
-	error = ip6_output(m, 0, &sc->gif_ro6, 0, 0, NULL, NULL);
-#endif
-
-	if (!(GIF2IFP(sc)->if_flags & IFF_LINK0) &&
-	    sc->gif_ro6.ro_rt != NULL) {
-		RTFREE(sc->gif_ro6.ro_rt);
-		sc->gif_ro6.ro_rt = NULL;
-	}
-
-	return (error);
+	return (ip6_output(m, 0, NULL, IPV6_MINMTU, 0, NULL, NULL));
 }
 
-int
+static int
 in6_gif_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp;
-	struct ifnet *gifp = NULL;
+	struct ifnet *gifp;
 	struct gif_softc *sc;
 	struct ip6_hdr *ip6;
-	int af = 0;
-	u_int32_t otos;
+	uint8_t ecn;
 
-	ip6 = mtod(m, struct ip6_hdr *);
-
-	sc = (struct gif_softc *)encap_getarg(m);
+	sc = encap_getarg(m);
 	if (sc == NULL) {
 		m_freem(m);
 		IP6STAT_INC(ip6s_nogif);
-		return IPPROTO_DONE;
+		return (IPPROTO_DONE);
 	}
-
 	gifp = GIF2IFP(sc);
-	if (gifp == NULL || (gifp->if_flags & IFF_UP) == 0) {
+	if ((gifp->if_flags & IFF_UP) != 0) {
+		ip6 = mtod(m, struct ip6_hdr *);
+		ecn = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+		m_adj(m, *offp);
+		gif_input(m, gifp, proto, ecn);
+	} else {
 		m_freem(m);
 		IP6STAT_INC(ip6s_nogif);
-		return IPPROTO_DONE;
 	}
-
-	otos = ip6->ip6_flow;
-	m_adj(m, *offp);
-
-	switch (proto) {
-#ifdef INET
-	case IPPROTO_IPV4:
-	    {
-		struct ip *ip;
-		u_int8_t otos8;
-		af = AF_INET;
-		otos8 = (ntohl(otos) >> 20) & 0xff;
-		if (m->m_len < sizeof(*ip)) {
-			m = m_pullup(m, sizeof(*ip));
-			if (!m)
-				return IPPROTO_DONE;
-		}
-		ip = mtod(m, struct ip *);
-		if (ip_ecn_egress((gifp->if_flags & IFF_LINK1) ?
-				  ECN_ALLOWED : ECN_NOCARE,
-				  &otos8, &ip->ip_tos) == 0) {
-			m_freem(m);
-			return IPPROTO_DONE;
-		}
-		break;
-	    }
-#endif /* INET */
-#ifdef INET6
-	case IPPROTO_IPV6:
-	    {
-		struct ip6_hdr *ip6;
-		af = AF_INET6;
-		if (m->m_len < sizeof(*ip6)) {
-			m = m_pullup(m, sizeof(*ip6));
-			if (!m)
-				return IPPROTO_DONE;
-		}
-		ip6 = mtod(m, struct ip6_hdr *);
-		if (ip6_ecn_egress((gifp->if_flags & IFF_LINK1) ?
-				   ECN_ALLOWED : ECN_NOCARE,
-				   &otos, &ip6->ip6_flow) == 0) {
-			m_freem(m);
-			return IPPROTO_DONE;
-		}
-		break;
-	    }
-#endif
-	case IPPROTO_ETHERIP:
-		af = AF_LINK;
-		break;
-
-	default:
-		IP6STAT_INC(ip6s_nogif);
-		m_freem(m);
-		return IPPROTO_DONE;
-	}
-
-	gif_input(m, af, gifp);
-	return IPPROTO_DONE;
+	return (IPPROTO_DONE);
 }
 
 /*
@@ -380,19 +182,22 @@
 gif_validate6(const struct ip6_hdr *ip6, struct gif_softc *sc,
     struct ifnet *ifp)
 {
-	struct sockaddr_in6 *src, *dst;
+	int ret;
 
-	src = (struct sockaddr_in6 *)sc->gif_psrc;
-	dst = (struct sockaddr_in6 *)sc->gif_pdst;
-
+	GIF_RLOCK_ASSERT(sc);
 	/*
 	 * Check for address match.  Note that the check is for an incoming
 	 * packet.  We should compare the *source* address in our configuration
 	 * and the *destination* address of the packet, and vice versa.
 	 */
-	if (!IN6_ARE_ADDR_EQUAL(&src->sin6_addr, &ip6->ip6_dst) ||
-	    !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_src))
-		return 0;
+	if (!IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src, &ip6->ip6_dst))
+		return (0);
+	ret = 128;
+	if (!IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_dst, &ip6->ip6_src)) {
+		if ((sc->gif_options & GIF_IGNORE_SOURCE) == 0)
+			return (0);
+	} else
+		ret += 128;
 
 	/* martian filters on outer source - done in ip6_input */
 
@@ -410,29 +215,22 @@
 		rt = in6_rtalloc1((struct sockaddr *)&sin6, 0, 0UL,
 		    sc->gif_fibnum);
 		if (!rt || rt->rt_ifp != ifp) {
-#if 0
-			char ip6buf[INET6_ADDRSTRLEN];
-			log(LOG_WARNING, "%s: packet from %s dropped "
-			    "due to ingress filter\n", if_name(GIF2IFP(sc)),
-			    ip6_sprintf(ip6buf, &sin6.sin6_addr));
-#endif
 			if (rt)
 				RTFREE_LOCKED(rt);
-			return 0;
+			return (0);
 		}
 		RTFREE_LOCKED(rt);
 	}
 
-	return 128 * 2;
+	return (ret);
 }
 
 /*
  * we know that we are in IFF_UP, outer address available, and outer family
  * matched the physical addr family.  see gif_encapcheck().
- * sanity check for arg should have been done in the caller.
  */
 int
-gif_encapcheck6(const struct mbuf *m, int off, int proto, void *arg)
+in6_gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
 {
 	struct ip6_hdr ip6;
 	struct gif_softc *sc;
@@ -440,31 +238,21 @@
 
 	/* sanity check done in caller */
 	sc = (struct gif_softc *)arg;
+	GIF_RLOCK_ASSERT(sc);
 
-	/* LINTED const cast */
 	m_copydata(m, 0, sizeof(ip6), (caddr_t)&ip6);
 	ifp = ((m->m_flags & M_PKTHDR) != 0) ? m->m_pkthdr.rcvif : NULL;
-
-	return gif_validate6(&ip6, sc, ifp);
+	return (gif_validate6(&ip6, sc, ifp));
 }
 
 int
 in6_gif_attach(struct gif_softc *sc)
 {
-	sc->encap_cookie6 = encap_attach_func(AF_INET6, -1, gif_encapcheck,
+
+	KASSERT(sc->gif_ecookie == NULL, ("gif_ecookie isn't NULL"));
+	sc->gif_ecookie = encap_attach_func(AF_INET6, -1, gif_encapcheck,
 	    (void *)&in6_gif_protosw, sc);
-	if (sc->encap_cookie6 == NULL)
-		return EEXIST;
-	return 0;
+	if (sc->gif_ecookie == NULL)
+		return (EEXIST);
+	return (0);
 }
-
-int
-in6_gif_detach(struct gif_softc *sc)
-{
-	int error;
-
-	error = encap_detach(sc->encap_cookie6);
-	if (error == 0)
-		sc->encap_cookie6 = NULL;
-	return error;
-}

Modified: trunk/sys/netinet6/in6_ifattach.c
===================================================================
--- trunk/sys/netinet6/in6_ifattach.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/in6_ifattach.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/in6_ifattach.c 233200 2012-03-19 20:49:16Z jhb $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/in6_ifattach.c 287734 2015-09-13 02:09:06Z hrs $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -267,6 +267,7 @@
 
 	/* get EUI64 */
 	switch (ifp->if_type) {
+	case IFT_BRIDGE:
 	case IFT_ETHER:
 	case IFT_L2VLAN:
 	case IFT_FDDI:
@@ -595,12 +596,6 @@
 	ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME;
 	ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME;
 
-	/* we don't need to perform DAD on loopback interfaces. */
-	ifra.ifra_flags |= IN6_IFF_NODAD;
-
-	/* skip registration to the prefix list. XXX should be temporary. */
-	ifra.ifra_flags |= IN6_IFF_NOPFX;
-
 	/*
 	 * We are sure that this is a newly assigned address, so we can set
 	 * NULL to the 3rd arg.
@@ -617,13 +612,16 @@
 
 /*
  * compute NI group address, based on the current hostname setting.
- * see draft-ietf-ipngwg-icmp-name-lookup-* (04 and later).
+ * see RFC 4620.
  *
  * when ifp == NULL, the caller is responsible for filling scopeid.
+ *
+ * If oldmcprefix == 1, FF02:0:0:0:0:2::/96 is used for NI group address
+ * while it is FF02:0:0:0:0:2:FF00::/104 in RFC 4620. 
  */
-int
-in6_nigroup(struct ifnet *ifp, const char *name, int namelen,
-    struct in6_addr *in6)
+static int
+in6_nigroup0(struct ifnet *ifp, const char *name, int namelen,
+    struct in6_addr *in6, int oldmcprefix)
 {
 	struct prison *pr;
 	const char *p;
@@ -668,7 +666,7 @@
 			*q = *q - 'A' + 'a';
 	}
 
-	/* generate 8 bytes of pseudo-random value. */
+	/* generate 16 bytes of pseudo-random value. */
 	bzero(&ctxt, sizeof(ctxt));
 	MD5Init(&ctxt);
 	MD5Update(&ctxt, &l, sizeof(l));
@@ -678,7 +676,14 @@
 	bzero(in6, sizeof(*in6));
 	in6->s6_addr16[0] = IPV6_ADDR_INT16_MLL;
 	in6->s6_addr8[11] = 2;
-	bcopy(digest, &in6->s6_addr32[3], sizeof(in6->s6_addr32[3]));
+	if (oldmcprefix == 0) {
+		in6->s6_addr8[12] = 0xff;
+	 	/* Copy the first 24 bits of 128-bit hash into the address. */
+		bcopy(digest, &in6->s6_addr8[13], 3);
+	} else {
+	 	/* Copy the first 32 bits of 128-bit hash into the address. */
+		bcopy(digest, &in6->s6_addr32[3], sizeof(in6->s6_addr32[3]));
+	}
 	if (in6_setscope(in6, ifp, NULL))
 		return (-1); /* XXX: should not fail */
 
@@ -685,6 +690,22 @@
 	return 0;
 }
 
+int
+in6_nigroup(struct ifnet *ifp, const char *name, int namelen,
+    struct in6_addr *in6)
+{
+
+	return (in6_nigroup0(ifp, name, namelen, in6, 0));
+}
+
+int
+in6_nigroup_oldmcprefix(struct ifnet *ifp, const char *name, int namelen,
+    struct in6_addr *in6)
+{
+
+	return (in6_nigroup0(ifp, name, namelen, in6, 1));
+}
+
 /*
  * XXX multiple loopback interface needs more care.  for instance,
  * nodelocal address needs to be configured onto only one of them.
@@ -698,19 +719,12 @@
 	struct in6_ifaddr *ia;
 	struct in6_addr in6;
 
-	/* some of the interfaces are inherently not IPv6 capable */
-	switch (ifp->if_type) {
-	case IFT_PFLOG:
-	case IFT_PFSYNC:
-	case IFT_CARP:
+	if (ifp->if_afdata[AF_INET6] == NULL)
 		return;
-	}
-
 	/*
 	 * quirks based on interface type
 	 */
 	switch (ifp->if_type) {
-#ifdef IFT_STF
 	case IFT_STF:
 		/*
 		 * 6to4 interface is a very special kind of beast.
@@ -718,8 +732,8 @@
 		 * linklocals for 6to4 interface, but there's no use and
 		 * it is rather harmful to have one.
 		 */
-		goto statinit;
-#endif
+		ND_IFINFO(ifp)->flags &= ~ND6_IFF_AUTO_LINKLOCAL;
+		break;
 	default:
 		break;
 	}
@@ -753,8 +767,7 @@
 	/*
 	 * assign a link-local address, if there's none.
 	 */
-	if (ifp->if_type != IFT_BRIDGE &&
-	    !(ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) &&
+	if (!(ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) &&
 	    ND_IFINFO(ifp)->flags & ND6_IFF_AUTO_LINKLOCAL) {
 		int error;
 
@@ -771,10 +784,6 @@
 			ifa_free(&ia->ia_ifa);
 	}
 
-#ifdef IFT_STF			/* XXX */
-statinit:
-#endif
-
 	/* update dynamically. */
 	if (V_in6_maxmtu < ifp->if_mtu)
 		V_in6_maxmtu = ifp->if_mtu;
@@ -795,6 +804,9 @@
 	struct sockaddr_in6 sin6;
 	struct in6_multi_mship *imm;
 
+	if (ifp->if_afdata[AF_INET6] == NULL)
+		return;
+
 	/* remove neighbor management table */
 	nd6_purge(ifp);
 
@@ -839,6 +851,7 @@
 	}
 
 	in6_pcbpurgeif0(&V_udbinfo, ifp);
+	in6_pcbpurgeif0(&V_ulitecbinfo, ifp);
 	in6_pcbpurgeif0(&V_ripcbinfo, ifp);
 	/* leave from all multicast groups joined */
 	in6_purgemaddrs(ifp);
@@ -918,6 +931,8 @@
 
 	bzero(nullbuf, sizeof(nullbuf));
 	TAILQ_FOREACH(ifp, &V_ifnet, if_list) {
+		if (ifp->if_afdata[AF_INET6] == NULL)
+			continue;
 		ndi = ND_IFINFO(ifp);
 		if (bcmp(ndi->randomid, nullbuf, sizeof(nullbuf)) != 0) {
 			/*

Modified: trunk/sys/netinet6/in6_ifattach.h
===================================================================
--- trunk/sys/netinet6/in6_ifattach.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/in6_ifattach.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	$KAME: in6_ifattach.h,v 1.14 2001/02/08 12:48:39 jinmei Exp $
- * $FreeBSD: stable/9/sys/netinet6/in6_ifattach.h 244524 2012-12-21 00:41:52Z delphij $
+ * $FreeBSD: stable/10/sys/netinet6/in6_ifattach.h 250251 2013-05-04 19:16:26Z hrs $
  */
 
 #ifndef _NETINET6_IN6_IFATTACH_H_
@@ -41,6 +41,7 @@
 void in6_tmpaddrtimer(void *);
 int in6_get_hw_ifid(struct ifnet *, struct in6_addr *);
 int in6_nigroup(struct ifnet *, const char *, int, struct in6_addr *);
+int in6_nigroup_oldmcprefix(struct ifnet *, const char *, int, struct in6_addr *);
 #endif /* _KERNEL */
 
 #endif /* _NETINET6_IN6_IFATTACH_H_ */

Modified: trunk/sys/netinet6/in6_mcast.c
===================================================================
--- trunk/sys/netinet6/in6_mcast.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/in6_mcast.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/in6_mcast.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/in6_mcast.c 321134 2017-07-18 16:58:52Z ngie $");
 
 #include "opt_inet6.h"
 
@@ -132,7 +132,9 @@
 static int	in6m_get_source(struct in6_multi *inm,
 		    const struct in6_addr *addr, const int noalloc,
 		    struct ip6_msource **pims);
+#ifdef KTR
 static int	in6m_is_ifp_detached(const struct in6_multi *);
+#endif
 static int	in6m_merge(struct in6_multi *, /*const*/ struct in6_mfilter *);
 static void	in6m_purge(struct in6_multi *);
 static void	in6m_reap(struct in6_multi *);
@@ -176,6 +178,7 @@
     CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip6_mcast_filters,
     "Per-interface stack-wide source filters");
 
+#ifdef KTR
 /*
  * Inline function which wraps assertions for a valid ifp.
  * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp
@@ -198,6 +201,7 @@
 
 	return (ifp == NULL);
 }
+#endif
 
 /*
  * Initialize an in6_mfilter structure to a known state at t0, t1
@@ -997,9 +1001,10 @@
 	/* Decrement ASM listener count on transition out of ASM mode. */
 	if (imf->im6f_st[0] == MCAST_EXCLUDE && nsrc0 == 0) {
 		if ((imf->im6f_st[1] != MCAST_EXCLUDE) ||
-		    (imf->im6f_st[1] == MCAST_EXCLUDE && nsrc1 > 0))
+		    (imf->im6f_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) {
 			CTR1(KTR_MLD, "%s: --asm on inm at t1", __func__);
 			--inm->in6m_st[1].iss_asm;
+		}
 	}
 
 	/* Increment ASM listener count on transition to ASM mode. */
@@ -1072,6 +1077,8 @@
 		free(ims, M_IP6MSOURCE);
 		inm->in6m_nsrc--;
 	}
+	/* Free state-change requests that might be queued. */
+	_IF_DRAIN(&inm->in6m_scq);
 }
 
 /*
@@ -1446,16 +1453,15 @@
 
 	CTR1(KTR_MLD, "%s: merge inm state", __func__);
 	error = in6m_merge(inm, imf);
-	if (error) {
+	if (error)
 		CTR1(KTR_MLD, "%s: failed to merge inm state", __func__);
-		goto out_im6f_rollback;
+	else {
+		CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
+		error = mld_change_state(inm, 0);
+		if (error)
+			CTR1(KTR_MLD, "%s: failed mld downcall", __func__);
 	}
 
-	CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
-	error = mld_change_state(inm, 0);
-	if (error)
-		CTR1(KTR_MLD, "%s: failed mld downcall", __func__);
-
 	IN6_MULTI_UNLOCK();
 
 out_im6f_rollback:
@@ -1626,6 +1632,8 @@
 	 * has asked for, but we always tell userland how big the
 	 * buffer really needs to be.
 	 */
+	if (msfr.msfr_nsrcs > in6_mcast_maxsocksrc)
+		msfr.msfr_nsrcs = in6_mcast_maxsocksrc;
 	tss = NULL;
 	if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
 		tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
@@ -1775,8 +1783,6 @@
 	    ("%s: not INP_IPV6 inpcb", __func__));
 	KASSERT(gsin6->sin6_family == AF_INET6,
 	    ("%s: not AF_INET6 group", __func__));
-	KASSERT(IN6_IS_ADDR_MULTICAST(&gsin6->sin6_addr),
-	    ("%s: not multicast", __func__));
 
 	ifp = NULL;
 	memset(&ro6, 0, sizeof(struct route_in6));
@@ -1845,8 +1851,7 @@
 		if (mreq.ipv6mr_interface == 0) {
 			ifp = in6p_lookup_mcast_ifp(inp, &gsa->sin6);
 		} else {
-			if (mreq.ipv6mr_interface < 0 ||
-			    V_if_index < mreq.ipv6mr_interface)
+			if (V_if_index < mreq.ipv6mr_interface)
 				return (EADDRNOTAVAIL);
 			ifp = ifnet_byindex(mreq.ipv6mr_interface);
 		}
@@ -2041,29 +2046,27 @@
 	if (is_new) {
 		error = in6_mc_join_locked(ifp, &gsa->sin6.sin6_addr, imf,
 		    &inm, 0);
-		if (error)
+		if (error) {
+			IN6_MULTI_UNLOCK();
 			goto out_im6o_free;
+		}
 		imo->im6o_membership[idx] = inm;
 	} else {
 		CTR1(KTR_MLD, "%s: merge inm state", __func__);
 		error = in6m_merge(inm, imf);
-		if (error) {
+		if (error)
 			CTR1(KTR_MLD, "%s: failed to merge inm state",
 			    __func__);
-			goto out_im6f_rollback;
+		else {
+			CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
+			error = mld_change_state(inm, 0);
+			if (error)
+				CTR1(KTR_MLD, "%s: failed mld downcall",
+				    __func__);
 		}
-		CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
-		error = mld_change_state(inm, 0);
-		if (error) {
-			CTR1(KTR_MLD, "%s: failed mld downcall",
-			    __func__);
-			goto out_im6f_rollback;
-		}
 	}
 
 	IN6_MULTI_UNLOCK();
-
-out_im6f_rollback:
 	INP_WLOCK_ASSERT(inp);
 	if (error) {
 		im6f_rollback(imf);
@@ -2192,7 +2195,7 @@
 	 * XXX SCOPE6 lock potentially taken here.
 	 */
 	if (ifindex != 0) {
-		if (ifindex < 0 || V_if_index < ifindex)
+		if (V_if_index < ifindex)
 			return (EADDRNOTAVAIL);
 		ifp = ifnet_byindex(ifindex);
 		if (ifp == NULL)
@@ -2290,23 +2293,20 @@
 	} else {
 		CTR1(KTR_MLD, "%s: merge inm state", __func__);
 		error = in6m_merge(inm, imf);
-		if (error) {
+		if (error)
 			CTR1(KTR_MLD, "%s: failed to merge inm state",
 			    __func__);
-			goto out_im6f_rollback;
+		else {
+			CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
+			error = mld_change_state(inm, 0);
+			if (error)
+				CTR1(KTR_MLD, "%s: failed mld downcall",
+				    __func__);
 		}
-
-		CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
-		error = mld_change_state(inm, 0);
-		if (error) {
-			CTR1(KTR_MLD, "%s: failed mld downcall",
-			    __func__);
-		}
 	}
 
 	IN6_MULTI_UNLOCK();
 
-out_im6f_rollback:
 	if (error)
 		im6f_rollback(imf);
 	else
@@ -2350,13 +2350,17 @@
 	error = sooptcopyin(sopt, &ifindex, sizeof(u_int), sizeof(u_int));
 	if (error)
 		return (error);
-	if (ifindex < 0 || V_if_index < ifindex)
+	if (V_if_index < ifindex)
 		return (EINVAL);
-
-	ifp = ifnet_byindex(ifindex);
-	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
-		return (EADDRNOTAVAIL);
-
+	if (ifindex == 0)
+		ifp = NULL;
+	else {
+		ifp = ifnet_byindex(ifindex);
+		if (ifp == NULL)
+			return (EINVAL);
+		if ((ifp->if_flags & IFF_MULTICAST) == 0)
+			return (EADDRNOTAVAIL);
+	}
 	imo = in6p_findmoptions(inp);
 	imo->im6o_multicast_ifp = ifp;
 	INP_WUNLOCK(inp);
@@ -2515,16 +2519,15 @@
 	 */
 	CTR1(KTR_MLD, "%s: merge inm state", __func__);
 	error = in6m_merge(inm, imf);
-	if (error) {
+	if (error)
 		CTR1(KTR_MLD, "%s: failed to merge inm state", __func__);
-		goto out_im6f_rollback;
+	else {
+		CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
+		error = mld_change_state(inm, 0);
+		if (error)
+			CTR1(KTR_MLD, "%s: failed mld downcall", __func__);
 	}
 
-	CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
-	error = mld_change_state(inm, 0);
-	if (error)
-		CTR1(KTR_MLD, "%s: failed mld downcall", __func__);
-
 	IN6_MULTI_UNLOCK();
 
 out_im6f_rollback:

Modified: trunk/sys/netinet6/in6_pcb.c
===================================================================
--- trunk/sys/netinet6/in6_pcb.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/in6_pcb.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -66,7 +66,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/in6_pcb.c 234279 2012-04-14 10:36:43Z glebius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/in6_pcb.c 309108 2016-11-24 14:48:46Z jch $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
@@ -107,8 +107,6 @@
 #include <netinet6/in6_pcb.h>
 #include <netinet6/scope6_var.h>
 
-struct	in6_addr zeroin6_addr;
-
 int
 in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam,
     struct ucred *cred)
@@ -159,7 +157,7 @@
 			 * and a multicast address is bound on both
 			 * new and duplicated sockets.
 			 */
-			if (so->so_options & SO_REUSEADDR)
+			if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0)
 				reuseport = SO_REUSEADDR|SO_REUSEPORT;
 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 			struct ifaddr *ifa;
@@ -246,8 +244,7 @@
 				if (tw == NULL ||
 				    (reuseport & tw->tw_so_options) == 0)
 					return (EADDRINUSE);
-			} else if (t && (reuseport == 0 ||
-			    (t->inp_flags2 & INP_REUSEPORT) == 0)) {
+			} else if (t && (reuseport & inp_so_options(t)) == 0) {
 				return (EADDRINUSE);
 			}
 #ifdef INET
@@ -268,8 +265,8 @@
 					     INP_IPV6PROTO) ==
 					     (t->inp_vflag & INP_IPV6PROTO))))
 						return (EADDRINUSE);
-				} else if (t && (reuseport == 0 ||
-				    (t->inp_flags2 & INP_REUSEPORT) == 0) &&
+				} else if (t &&
+				    (reuseport & inp_so_options(t)) == 0 &&
 				    (ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
 				    (t->inp_vflag & INP_IPV6PROTO) != 0))
 					return (EADDRINUSE);
@@ -626,18 +623,12 @@
 		/*
 		 * If the error designates a new path MTU for a destination
 		 * and the application (associated with this socket) wanted to
-		 * know the value, notify. Note that we notify for all
-		 * disconnected sockets if the corresponding application
-		 * wanted. This is because some UDP applications keep sending
-		 * sockets disconnected.
+		 * know the value, notify.
 		 * XXX: should we avoid to notify the value to TCP sockets?
 		 */
-		if (cmd == PRC_MSGSIZE && (inp->inp_flags & IN6P_MTU) != 0 &&
-		    (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) ||
-		     IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &sa6_dst->sin6_addr))) {
+		if (cmd == PRC_MSGSIZE && cmdarg != NULL)
 			ip6_notify_pmtu(inp, (struct sockaddr_in6 *)dst,
-					(u_int32_t *)cmdarg);
-		}
+					*(u_int32_t *)cmdarg);
 
 		/*
 		 * Detect if we should notify the error. If no source and
@@ -778,7 +769,7 @@
 	struct ip6_moptions *im6o;
 	int i, gap;
 
-	INP_INFO_RLOCK(pcbinfo);
+	INP_INFO_WLOCK(pcbinfo);
 	LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) {
 		INP_WLOCK(in6p);
 		im6o = in6p->in6p_moptions;
@@ -809,7 +800,7 @@
 		}
 		INP_WUNLOCK(in6p);
 	}
-	INP_INFO_RUNLOCK(pcbinfo);
+	INP_INFO_WUNLOCK(pcbinfo);
 }
 
 /*

Modified: trunk/sys/netinet6/in6_pcb.h
===================================================================
--- trunk/sys/netinet6/in6_pcb.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/in6_pcb.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -59,7 +59,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)in_pcb.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet6/in6_pcb.h 244524 2012-12-21 00:41:52Z delphij $
+ * $FreeBSD: stable/10/sys/netinet6/in6_pcb.h 241916 2012-10-22 21:49:56Z delphij $
  */
 
 #ifndef _NETINET6_IN6_PCB_H_

Modified: trunk/sys/netinet6/in6_pcbgroup.c
===================================================================
--- trunk/sys/netinet6/in6_pcbgroup.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/in6_pcbgroup.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -30,7 +30,7 @@
 
 #include <sys/cdefs.h>
 
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/in6_pcbgroup.c 222748 2011-06-06 12:55:02Z rwatson $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/in6_pcbgroup.c 222748 2011-06-06 12:55:02Z rwatson $");
 
 #include "opt_inet6.h"
 

Modified: trunk/sys/netinet6/in6_proto.c
===================================================================
--- trunk/sys/netinet6/in6_proto.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/in6_proto.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/in6_proto.c 239936 2012-08-31 06:38:43Z maxim $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/in6_proto.c 284066 2015-06-06 12:44:42Z ae $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
@@ -127,10 +127,6 @@
 
 #include <netinet6/ip6protosw.h>
 
-#ifdef FLOWTABLE
-#include <net/flowtable.h>
-#endif
-
 /*
  * TCP/IP protocol family: IP6, ICMP6, UDP, TCP.
  */
@@ -212,7 +208,7 @@
 	.pr_protocol =		IPPROTO_SCTP,
 	.pr_flags =		PR_WANTRCVD,
 	.pr_input =		sctp6_input,
-	.pr_ctlinput =	sctp6_ctlinput,
+	.pr_ctlinput =		sctp6_ctlinput,
 	.pr_ctloutput =		sctp_ctloutput,
 	.pr_drain =		sctp_drain,
 	.pr_usrreqs =		&sctp6_usrreqs
@@ -219,6 +215,19 @@
 },
 #endif /* SCTP */
 {
+	.pr_type =		SOCK_DGRAM,
+	.pr_domain =		&inet6domain,
+	.pr_protocol =		IPPROTO_UDPLITE,
+	.pr_flags =		PR_ATOMIC|PR_ADDR,
+	.pr_input =		udp6_input,
+	.pr_ctlinput =		udplite6_ctlinput,
+	.pr_ctloutput =		udp_ctloutput,
+#ifndef INET	/* Do not call initialization twice. */
+	.pr_init =		udplite_init,
+#endif
+	.pr_usrreqs =		&udp6_usrreqs,
+},
+{
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_RAW,
@@ -323,6 +332,17 @@
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inet6domain,
+	.pr_protocol =		IPPROTO_GRE,
+	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
+	.pr_input =		encap6_input,
+	.pr_output =		rip6_output,
+	.pr_ctloutput =		rip6_ctloutput,
+	.pr_init =		encap_init,
+	.pr_usrreqs =		&rip6_usrreqs
+},
+{
+	.pr_type =		SOCK_RAW,
+	.pr_domain =		&inet6domain,
 	.pr_protocol =		IPPROTO_PIM,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
 	.pr_input =		encap6_input,
@@ -432,16 +452,6 @@
 VNET_DEFINE(int, pmtu_expire) = 60*10;
 VNET_DEFINE(int, pmtu_probe) = 60*2;
 
-/* raw IP6 parameters */
-/*
- * Nominal space allocated to a raw ip socket.
- */
-#define	RIPV6SNDQ	8192
-#define	RIPV6RCVQ	8192
-
-VNET_DEFINE(u_long, rip6_sendspace) = RIPV6SNDQ;
-VNET_DEFINE(u_long, rip6_recvspace) = RIPV6RCVQ;
-
 /* ICMPV6 parameters */
 VNET_DEFINE(int, icmp6_rediraccept) = 1;/* accept and process redirects */
 VNET_DEFINE(int, icmp6_redirtimeout) = 10 * 60;	/* 10 minutes */
@@ -449,12 +459,8 @@
 /* control how to respond to NI queries */
 VNET_DEFINE(int, icmp6_nodeinfo) =
     (ICMP6_NODEINFO_FQDNOK|ICMP6_NODEINFO_NODEADDROK);
+VNET_DEFINE(int, icmp6_nodeinfo_oldmcprefix) = 1;
 
-/* UDP on IP6 parameters */
-VNET_DEFINE(int, udp6_sendspace) = 9216;/* really max datagram size */
-VNET_DEFINE(int, udp6_recvspace) = 40 * (1024 + sizeof(struct sockaddr_in6));
-					/* 40 1K datagrams */
-
 /*
  * sysctl related items.
  */
@@ -480,8 +486,6 @@
 	int error = 0;
 	int old;
 
-	VNET_SYSCTL_ARG(req, arg1);
-
 	error = SYSCTL_OUT(req, arg1, sizeof(int));
 	if (error || !req->newptr)
 		return (error);
@@ -501,8 +505,6 @@
 	int error = 0;
 	int old;
 
-	VNET_SYSCTL_ARG(req, arg1);
-
 	error = SYSCTL_OUT(req, arg1, sizeof(int));
 	if (error || !req->newptr)
 		return (error);
@@ -521,8 +523,8 @@
 	&VNET_NAME(ip6_sendredirects), 0, "");
 SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_DEFHLIM, hlim, CTLFLAG_RW,
 	&VNET_NAME(ip6_defhlim), 0, "");
-SYSCTL_VNET_STRUCT(_net_inet6_ip6, IPV6CTL_STATS, stats, CTLFLAG_RW,
-	&VNET_NAME(ip6stat), ip6stat, "");
+SYSCTL_VNET_PCPUSTAT(_net_inet6_ip6, IPV6CTL_STATS, stats, struct ip6stat,
+    ip6stat, "IP6 statistics (struct ip6stat, netinet6/ip6_var.h)");
 SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets,
 	CTLFLAG_RW, &VNET_NAME(ip6_maxfragpackets), 0, "");
 SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_ACCEPT_RTADV, accept_rtadv,
@@ -574,8 +576,9 @@
 	CTLFLAG_RW, &VNET_NAME(ip6_auto_linklocal), 0,
 	"Default value of per-interface flag for automatically adding an IPv6"
 	" link-local address to interfaces when attached");
-SYSCTL_VNET_STRUCT(_net_inet6_ip6, IPV6CTL_RIP6STATS, rip6stats, CTLFLAG_RW,
-	&VNET_NAME(rip6stat), rip6stat, "");
+SYSCTL_VNET_PCPUSTAT(_net_inet6_ip6, IPV6CTL_RIP6STATS, rip6stats,
+    struct rip6stat, rip6stat,
+    "Raw IP6 statistics (struct rip6stat, netinet6/raw_ip6.h)");
 SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_PREFER_TEMPADDR, prefer_tempaddr,
 	CTLFLAG_RW, &VNET_NAME(ip6_prefer_tempaddr), 0, "");
 SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_USE_DEFAULTZONE, use_defaultzone,
@@ -589,23 +592,14 @@
 	&VNET_NAME(ip6stealth), 0, "");
 #endif
 
-#ifdef FLOWTABLE
-VNET_DEFINE(int, ip6_output_flowtable_size) = 2048;
-VNET_DEFINE(struct flowtable *, ip6_ft);
-#define	V_ip6_output_flowtable_size	VNET(ip6_output_flowtable_size)
-
-SYSCTL_VNET_INT(_net_inet6_ip6, OID_AUTO, output_flowtable_size, CTLFLAG_RDTUN,
-    &VNET_NAME(ip6_output_flowtable_size), 2048,
-    "number of entries in the per-cpu output flow caches");
-#endif
-
 /* net.inet6.icmp6 */
 SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRACCEPT, rediraccept,
 	CTLFLAG_RW, &VNET_NAME(icmp6_rediraccept), 0, "");
 SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRTIMEOUT, redirtimeout,
 	CTLFLAG_RW, &VNET_NAME(icmp6_redirtimeout), 0, "");
-SYSCTL_VNET_STRUCT(_net_inet6_icmp6, ICMPV6CTL_STATS, stats, CTLFLAG_RW,
-	&VNET_NAME(icmp6stat), icmp6stat, "");
+SYSCTL_VNET_PCPUSTAT(_net_inet6_icmp6, ICMPV6CTL_STATS, stats,
+    struct icmp6stat, icmp6stat,
+    "ICMPv6 statistics (struct icmp6stat, netinet/icmp6.h)");
 SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_PRUNE, nd6_prune, CTLFLAG_RW,
 	&VNET_NAME(nd6_prune), 0, "");
 SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DELAY, nd6_delay, CTLFLAG_RW,
@@ -618,6 +612,11 @@
 	CTLFLAG_RW, &VNET_NAME(nd6_useloopback), 0, "");
 SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO, nodeinfo, CTLFLAG_RW,
 	&VNET_NAME(icmp6_nodeinfo), 0, "");
+SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO_OLDMCPREFIX,
+	nodeinfo_oldmcprefix, CTLFLAG_RW,
+	&VNET_NAME(icmp6_nodeinfo_oldmcprefix), 0, 
+	"Join old IPv6 NI group address in draft-ietf-ipngwg-icmp-name-lookup"
+	" for compatibility with KAME implememtation.");
 SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ERRPPSLIMIT, errppslimit,
 	CTLFLAG_RW, &VNET_NAME(icmp6errppslim), 0, "");
 SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXNUDHINT, nd6_maxnudhint,

Modified: trunk/sys/netinet6/in6_rmx.c
===================================================================
--- trunk/sys/netinet6/in6_rmx.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/in6_rmx.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -61,7 +61,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/in6_rmx.c 242646 2012-11-06 01:18:53Z melifaro $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/in6_rmx.c 314667 2017-03-04 13:03:31Z avg $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -136,8 +136,8 @@
 		}
 	}
 
-	if (!rt->rt_rmx.rmx_mtu && rt->rt_ifp)
-		rt->rt_rmx.rmx_mtu = IN6_LINKMTU(rt->rt_ifp);
+	if (!rt->rt_mtu && rt->rt_ifp)
+		rt->rt_mtu = IN6_LINKMTU(rt->rt_ifp);
 
 	ret = rn_addroute(v_arg, n_arg, head, treenodes);
 	if (ret == NULL) {
@@ -207,12 +207,11 @@
 	if (!rt)
 		panic("rt == NULL in in6_mtuexpire");
 
-	if (rt->rt_rmx.rmx_expire && !(rt->rt_flags & RTF_PROBEMTU)) {
-		if (rt->rt_rmx.rmx_expire <= time_uptime) {
+	if (rt->rt_expire && !(rt->rt_flags & RTF_PROBEMTU)) {
+		if (rt->rt_expire <= time_uptime) {
 			rt->rt_flags |= RTF_PROBEMTU;
 		} else {
-			ap->nextstop = lmin(ap->nextstop,
-					rt->rt_rmx.rmx_expire);
+			ap->nextstop = lmin(ap->nextstop, rt->rt_expire);
 		}
 	}
 
@@ -278,7 +277,7 @@
 	rnh->rnh_addaddr = in6_addroute;
 
 	if (V__in6_rt_was_here == 0) {
-		callout_init(&V_rtq_mtutimer, CALLOUT_MPSAFE);
+		callout_init(&V_rtq_mtutimer, 1);
 		in6_mtutimo(curvnet);	/* kick off timeout first time */
 		V__in6_rt_was_here = 1;
 	}
@@ -292,7 +291,7 @@
 {
 
 	callout_drain(&V_rtq_mtutimer);
-	return (1);
+	return (rn_detachhead(head));
 }
 #endif
 

Modified: trunk/sys/netinet6/in6_src.c
===================================================================
--- trunk/sys/netinet6/in6_src.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/in6_src.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/in6_src.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/in6_src.c 297445 2016-03-31 09:55:21Z ae $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
@@ -141,7 +141,7 @@
 static int add_addrsel_policyent(struct in6_addrpolicy *);
 static int delete_addrsel_policyent(struct in6_addrpolicy *);
 static int walk_addrsel_policy(int (*)(struct in6_addrpolicy *, void *),
-				    void *);
+	void *);
 static int dump_addrsel_policyent(struct in6_addrpolicy *, void *);
 static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *);
 
@@ -250,19 +250,27 @@
 		    (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0)
 			return (error);
 
-		ia6 = (struct in6_ifaddr *)ifa_ifwithaddr(
-		    (struct sockaddr *)&srcsock);
-		if (ia6 == NULL ||
-		    (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) {
-			if (ia6 != NULL)
-				ifa_free(&ia6->ia_ifa);
-			return (EADDRNOTAVAIL);
-		}
+		/*
+		 * If IPV6_BINDANY socket option is set, we allow to specify
+		 * non local addresses as source address in IPV6_PKTINFO
+		 * ancillary data.
+		 */
+		if ((inp->inp_flags & INP_BINDANY) == 0) {
+			ia6 = (struct in6_ifaddr *)ifa_ifwithaddr(
+			    (struct sockaddr *)&srcsock);
+			if (ia6 == NULL || (ia6->ia6_flags & (IN6_IFF_ANYCAST |
+			    IN6_IFF_NOTREADY))) {
+				if (ia6 != NULL)
+					ifa_free(&ia6->ia_ifa);
+				return (EADDRNOTAVAIL);
+			}
+			bcopy(&ia6->ia_addr.sin6_addr, srcp, sizeof(*srcp));
+			ifa_free(&ia6->ia_ifa);
+		} else
+			bcopy(&srcsock.sin6_addr, srcp, sizeof(*srcp));
 		pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */
 		if (ifpp)
 			*ifpp = ifp;
-		bcopy(&ia6->ia_addr.sin6_addr, srcp, sizeof(*srcp));
-		ifa_free(&ia6->ia_ifa);
 		return (0);
 	}
 
@@ -444,6 +452,24 @@
 			REPLACE(8);
 
 		/*
+		 * Rule 9: prefer address with better virtual status.
+		 */
+		if (ifa_preferred(&ia_best->ia_ifa, &ia->ia_ifa))
+			REPLACE(9);
+		if (ifa_preferred(&ia->ia_ifa, &ia_best->ia_ifa))
+			NEXT(9);
+
+		/*
+		 * Rule 10: prefer address with `prefer_source' flag.
+		 */
+		if ((ia_best->ia6_flags & IN6_IFF_PREFER_SOURCE) == 0 &&
+		    (ia->ia6_flags & IN6_IFF_PREFER_SOURCE) != 0)
+			REPLACE(10);
+		if ((ia_best->ia6_flags & IN6_IFF_PREFER_SOURCE) != 0 &&
+		    (ia->ia6_flags & IN6_IFF_PREFER_SOURCE) == 0)
+			NEXT(10);
+
+		/*
 		 * Rule 14: Use longest matching prefix.
 		 * Note: in the address selection draft, this rule is
 		 * documented as "Rule 8".  However, since it is also
@@ -608,6 +634,7 @@
 		if (ron->ro_rt == NULL) {
 			in6_rtalloc(ron, fibnum); /* multi path case? */
 			if (ron->ro_rt == NULL) {
+				/* XXX-BZ WT.? */
 				if (ron->ro_rt) {
 					RTFREE(ron->ro_rt);
 					ron->ro_rt = NULL;
@@ -1114,8 +1141,7 @@
 }
 
 static int
-walk_addrsel_policy(int (*callback)(struct in6_addrpolicy *, void *),
-    void *w)
+walk_addrsel_policy(int (*callback)(struct in6_addrpolicy *, void *), void *w)
 {
 	struct addrsel_policyent *pol;
 	int error = 0;

Modified: trunk/sys/netinet6/in6_var.h
===================================================================
--- trunk/sys/netinet6/in6_var.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/in6_var.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -59,7 +59,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)in_var.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet6/in6_var.h 244524 2012-12-21 00:41:52Z delphij $
+ * $FreeBSD: stable/10/sys/netinet6/in6_var.h 287734 2015-09-13 02:09:06Z hrs $
  */
 
 #ifndef _NETINET6_IN6_VAR_H_
@@ -68,6 +68,7 @@
 #include <sys/tree.h>
 
 #ifdef _KERNEL
+#include <sys/fnv_hash.h>
 #include <sys/libkern.h>
 #endif
 
@@ -98,15 +99,29 @@
 struct lltable;
 struct mld_ifinfo;
 
+#ifdef _KERNEL
+#include <sys/counter.h>
+
 struct in6_ifextra {
-	struct in6_ifstat *in6_ifstat;
-	struct icmp6_ifstat *icmp6_ifstat;
+	counter_u64_t *in6_ifstat;
+	counter_u64_t *icmp6_ifstat;
 	struct nd_ifinfo *nd_ifinfo;
 	struct scope6_id *scope6_id;
 	struct lltable *lltable;
 	struct mld_ifinfo *mld_ifinfo;
 };
+#else
 
+struct in6_ifextra {
+	void *in6_ifstat;
+	void *icmp6_ifstat;
+	struct nd_ifinfo *nd_ifinfo;
+	struct scope6_id *scope6_id;
+	struct lltable *lltable;
+	struct mld_ifinfo *mld_ifinfo;
+};
+#endif /* !_KERNEL */
+
 #define	LLTABLE6(ifp)	(((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->lltable)
 
 struct	in6_ifaddr {
@@ -132,10 +147,13 @@
 
 	/* multicast addresses joined from the kernel */
 	LIST_HEAD(, in6_multi_mship) ia6_memberships;
+	/* entry in bucket of inet6 addresses */
+	LIST_ENTRY(in6_ifaddr) ia6_hash;
 };
 
 /* List of in6_ifaddr's. */
 TAILQ_HEAD(in6_ifaddrhead, in6_ifaddr);
+LIST_HEAD(in6_ifaddrlisthead, in6_ifaddr);
 
 /* control structure to manage address selection policy */
 struct in6_addrpolicy {
@@ -150,37 +168,37 @@
  * IPv6 interface statistics, as defined in RFC2465 Ipv6IfStatsEntry (p12).
  */
 struct in6_ifstat {
-	u_quad_t ifs6_in_receive;	/* # of total input datagram */
-	u_quad_t ifs6_in_hdrerr;	/* # of datagrams with invalid hdr */
-	u_quad_t ifs6_in_toobig;	/* # of datagrams exceeded MTU */
-	u_quad_t ifs6_in_noroute;	/* # of datagrams with no route */
-	u_quad_t ifs6_in_addrerr;	/* # of datagrams with invalid dst */
-	u_quad_t ifs6_in_protounknown;	/* # of datagrams with unknown proto */
+	uint64_t ifs6_in_receive;	/* # of total input datagram */
+	uint64_t ifs6_in_hdrerr;	/* # of datagrams with invalid hdr */
+	uint64_t ifs6_in_toobig;	/* # of datagrams exceeded MTU */
+	uint64_t ifs6_in_noroute;	/* # of datagrams with no route */
+	uint64_t ifs6_in_addrerr;	/* # of datagrams with invalid dst */
+	uint64_t ifs6_in_protounknown;	/* # of datagrams with unknown proto */
 					/* NOTE: increment on final dst if */
-	u_quad_t ifs6_in_truncated;	/* # of truncated datagrams */
-	u_quad_t ifs6_in_discard;	/* # of discarded datagrams */
+	uint64_t ifs6_in_truncated;	/* # of truncated datagrams */
+	uint64_t ifs6_in_discard;	/* # of discarded datagrams */
 					/* NOTE: fragment timeout is not here */
-	u_quad_t ifs6_in_deliver;	/* # of datagrams delivered to ULP */
+	uint64_t ifs6_in_deliver;	/* # of datagrams delivered to ULP */
 					/* NOTE: increment on final dst if */
-	u_quad_t ifs6_out_forward;	/* # of datagrams forwarded */
+	uint64_t ifs6_out_forward;	/* # of datagrams forwarded */
 					/* NOTE: increment on outgoing if */
-	u_quad_t ifs6_out_request;	/* # of outgoing datagrams from ULP */
+	uint64_t ifs6_out_request;	/* # of outgoing datagrams from ULP */
 					/* NOTE: does not include forwrads */
-	u_quad_t ifs6_out_discard;	/* # of discarded datagrams */
-	u_quad_t ifs6_out_fragok;	/* # of datagrams fragmented */
-	u_quad_t ifs6_out_fragfail;	/* # of datagrams failed on fragment */
-	u_quad_t ifs6_out_fragcreat;	/* # of fragment datagrams */
+	uint64_t ifs6_out_discard;	/* # of discarded datagrams */
+	uint64_t ifs6_out_fragok;	/* # of datagrams fragmented */
+	uint64_t ifs6_out_fragfail;	/* # of datagrams failed on fragment */
+	uint64_t ifs6_out_fragcreat;	/* # of fragment datagrams */
 					/* NOTE: this is # after fragment */
-	u_quad_t ifs6_reass_reqd;	/* # of incoming fragmented packets */
+	uint64_t ifs6_reass_reqd;	/* # of incoming fragmented packets */
 					/* NOTE: increment on final dst if */
-	u_quad_t ifs6_reass_ok;		/* # of reassembled packets */
+	uint64_t ifs6_reass_ok;		/* # of reassembled packets */
 					/* NOTE: this is # after reass */
 					/* NOTE: increment on final dst if */
-	u_quad_t ifs6_reass_fail;	/* # of reass failures */
+	uint64_t ifs6_reass_fail;	/* # of reass failures */
 					/* NOTE: may not be packet count */
 					/* NOTE: increment on final dst if */
-	u_quad_t ifs6_in_mcast;		/* # of inbound multicast datagrams */
-	u_quad_t ifs6_out_mcast;	/* # of outbound multicast datagrams */
+	uint64_t ifs6_in_mcast;		/* # of inbound multicast datagrams */
+	uint64_t ifs6_out_mcast;	/* # of outbound multicast datagrams */
 };
 
 /*
@@ -192,77 +210,77 @@
 	 * Input statistics
 	 */
 	/* ipv6IfIcmpInMsgs, total # of input messages */
-	u_quad_t ifs6_in_msg;
+	uint64_t ifs6_in_msg;
 	/* ipv6IfIcmpInErrors, # of input error messages */
-	u_quad_t ifs6_in_error;
+	uint64_t ifs6_in_error;
 	/* ipv6IfIcmpInDestUnreachs, # of input dest unreach errors */
-	u_quad_t ifs6_in_dstunreach;
+	uint64_t ifs6_in_dstunreach;
 	/* ipv6IfIcmpInAdminProhibs, # of input administratively prohibited errs */
-	u_quad_t ifs6_in_adminprohib;
+	uint64_t ifs6_in_adminprohib;
 	/* ipv6IfIcmpInTimeExcds, # of input time exceeded errors */
-	u_quad_t ifs6_in_timeexceed;
+	uint64_t ifs6_in_timeexceed;
 	/* ipv6IfIcmpInParmProblems, # of input parameter problem errors */
-	u_quad_t ifs6_in_paramprob;
+	uint64_t ifs6_in_paramprob;
 	/* ipv6IfIcmpInPktTooBigs, # of input packet too big errors */
-	u_quad_t ifs6_in_pkttoobig;
+	uint64_t ifs6_in_pkttoobig;
 	/* ipv6IfIcmpInEchos, # of input echo requests */
-	u_quad_t ifs6_in_echo;
+	uint64_t ifs6_in_echo;
 	/* ipv6IfIcmpInEchoReplies, # of input echo replies */
-	u_quad_t ifs6_in_echoreply;
+	uint64_t ifs6_in_echoreply;
 	/* ipv6IfIcmpInRouterSolicits, # of input router solicitations */
-	u_quad_t ifs6_in_routersolicit;
+	uint64_t ifs6_in_routersolicit;
 	/* ipv6IfIcmpInRouterAdvertisements, # of input router advertisements */
-	u_quad_t ifs6_in_routeradvert;
+	uint64_t ifs6_in_routeradvert;
 	/* ipv6IfIcmpInNeighborSolicits, # of input neighbor solicitations */
-	u_quad_t ifs6_in_neighborsolicit;
+	uint64_t ifs6_in_neighborsolicit;
 	/* ipv6IfIcmpInNeighborAdvertisements, # of input neighbor advertisements */
-	u_quad_t ifs6_in_neighboradvert;
+	uint64_t ifs6_in_neighboradvert;
 	/* ipv6IfIcmpInRedirects, # of input redirects */
-	u_quad_t ifs6_in_redirect;
+	uint64_t ifs6_in_redirect;
 	/* ipv6IfIcmpInGroupMembQueries, # of input MLD queries */
-	u_quad_t ifs6_in_mldquery;
+	uint64_t ifs6_in_mldquery;
 	/* ipv6IfIcmpInGroupMembResponses, # of input MLD reports */
-	u_quad_t ifs6_in_mldreport;
+	uint64_t ifs6_in_mldreport;
 	/* ipv6IfIcmpInGroupMembReductions, # of input MLD done */
-	u_quad_t ifs6_in_mlddone;
+	uint64_t ifs6_in_mlddone;
 
 	/*
 	 * Output statistics. We should solve unresolved routing problem...
 	 */
 	/* ipv6IfIcmpOutMsgs, total # of output messages */
-	u_quad_t ifs6_out_msg;
+	uint64_t ifs6_out_msg;
 	/* ipv6IfIcmpOutErrors, # of output error messages */
-	u_quad_t ifs6_out_error;
+	uint64_t ifs6_out_error;
 	/* ipv6IfIcmpOutDestUnreachs, # of output dest unreach errors */
-	u_quad_t ifs6_out_dstunreach;
+	uint64_t ifs6_out_dstunreach;
 	/* ipv6IfIcmpOutAdminProhibs, # of output administratively prohibited errs */
-	u_quad_t ifs6_out_adminprohib;
+	uint64_t ifs6_out_adminprohib;
 	/* ipv6IfIcmpOutTimeExcds, # of output time exceeded errors */
-	u_quad_t ifs6_out_timeexceed;
+	uint64_t ifs6_out_timeexceed;
 	/* ipv6IfIcmpOutParmProblems, # of output parameter problem errors */
-	u_quad_t ifs6_out_paramprob;
+	uint64_t ifs6_out_paramprob;
 	/* ipv6IfIcmpOutPktTooBigs, # of output packet too big errors */
-	u_quad_t ifs6_out_pkttoobig;
+	uint64_t ifs6_out_pkttoobig;
 	/* ipv6IfIcmpOutEchos, # of output echo requests */
-	u_quad_t ifs6_out_echo;
+	uint64_t ifs6_out_echo;
 	/* ipv6IfIcmpOutEchoReplies, # of output echo replies */
-	u_quad_t ifs6_out_echoreply;
+	uint64_t ifs6_out_echoreply;
 	/* ipv6IfIcmpOutRouterSolicits, # of output router solicitations */
-	u_quad_t ifs6_out_routersolicit;
+	uint64_t ifs6_out_routersolicit;
 	/* ipv6IfIcmpOutRouterAdvertisements, # of output router advertisements */
-	u_quad_t ifs6_out_routeradvert;
+	uint64_t ifs6_out_routeradvert;
 	/* ipv6IfIcmpOutNeighborSolicits, # of output neighbor solicitations */
-	u_quad_t ifs6_out_neighborsolicit;
+	uint64_t ifs6_out_neighborsolicit;
 	/* ipv6IfIcmpOutNeighborAdvertisements, # of output neighbor advertisements */
-	u_quad_t ifs6_out_neighboradvert;
+	uint64_t ifs6_out_neighboradvert;
 	/* ipv6IfIcmpOutRedirects, # of output redirects */
-	u_quad_t ifs6_out_redirect;
+	uint64_t ifs6_out_redirect;
 	/* ipv6IfIcmpOutGroupMembQueries, # of output MLD queries */
-	u_quad_t ifs6_out_mldquery;
+	uint64_t ifs6_out_mldquery;
 	/* ipv6IfIcmpOutGroupMembResponses, # of output MLD reports */
-	u_quad_t ifs6_out_mldreport;
+	uint64_t ifs6_out_mldreport;
 	/* ipv6IfIcmpOutGroupMembReductions, # of output MLD done */
-	u_quad_t ifs6_out_mlddone;
+	uint64_t ifs6_out_mlddone;
 };
 
 struct	in6_ifreq {
@@ -288,8 +306,19 @@
 	struct	sockaddr_in6 ifra_prefixmask;
 	int	ifra_flags;
 	struct in6_addrlifetime ifra_lifetime;
+	int	ifra_vhid;
 };
 
+/* pre-10.x compat */
+struct	oin6_aliasreq {
+	char	ifra_name[IFNAMSIZ];
+	struct	sockaddr_in6 ifra_addr;
+	struct	sockaddr_in6 ifra_dstaddr;
+	struct	sockaddr_in6 ifra_prefixmask;
+	int	ifra_flags;
+	struct in6_addrlifetime ifra_lifetime;
+};
+
 /* prefix type macro */
 #define IN6_PREFIX_ND	1
 #define IN6_PREFIX_RR	2
@@ -410,7 +439,8 @@
 #define SIOCGIFNETMASK_IN6	_IOWR('i', 37, struct in6_ifreq)
 
 #define SIOCDIFADDR_IN6		 _IOW('i', 25, struct in6_ifreq)
-#define SIOCAIFADDR_IN6		 _IOW('i', 26, struct in6_aliasreq)
+#define OSIOCAIFADDR_IN6	 _IOW('i', 26, struct oin6_aliasreq)
+#define SIOCAIFADDR_IN6		 _IOW('i', 27, struct in6_aliasreq)
 
 #define SIOCSIFPHYADDR_IN6       _IOW('i', 70, struct in6_aliasreq)
 #define	SIOCGIFPSRCADDR_IN6	_IOWR('i', 71, struct in6_ifreq)
@@ -418,12 +448,7 @@
 
 #define SIOCGIFAFLAG_IN6	_IOWR('i', 73, struct in6_ifreq)
 
-#define SIOCGDRLST_IN6		_IOWR('i', 74, struct in6_drlist)
 #ifdef _KERNEL
-/* XXX: SIOCGPRLST_IN6 is exposed in KAME but in6_oprlist is not. */
-#define SIOCGPRLST_IN6		_IOWR('i', 75, struct in6_oprlist)
-#endif
-#ifdef _KERNEL
 #define OSIOCGIFINFO_IN6	_IOWR('i', 76, struct in6_ondireq)
 #endif
 #define SIOCGIFINFO_IN6		_IOWR('i', 108, struct in6_ndireq)
@@ -470,13 +495,11 @@
 #define IN6_IFF_DETACHED	0x08	/* may be detached from the link */
 #define IN6_IFF_DEPRECATED	0x10	/* deprecated address */
 #define IN6_IFF_NODAD		0x20	/* don't perform DAD on this address
-					 * (used only at first SIOC* call)
+					 * (obsolete)
 					 */
 #define IN6_IFF_AUTOCONF	0x40	/* autoconfigurable address. */
 #define IN6_IFF_TEMPORARY	0x80	/* temporary (anonymous) address. */
-#define IN6_IFF_NOPFX		0x8000	/* skip kernel prefix management.
-					 * XXX: this should be temporary.
-					 */
+#define	IN6_IFF_PREFER_SOURCE	0x0100	/* preferred address for SAS */
 
 /* do not input/output */
 #define IN6_IFF_NOTREADY (IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED)
@@ -488,8 +511,28 @@
 
 #ifdef _KERNEL
 VNET_DECLARE(struct in6_ifaddrhead, in6_ifaddrhead);
+VNET_DECLARE(struct in6_ifaddrlisthead *, in6_ifaddrhashtbl);
+VNET_DECLARE(u_long, in6_ifaddrhmask);
 #define	V_in6_ifaddrhead		VNET(in6_ifaddrhead)
+#define	V_in6_ifaddrhashtbl		VNET(in6_ifaddrhashtbl)
+#define	V_in6_ifaddrhmask		VNET(in6_ifaddrhmask)
 
+#define	IN6ADDR_NHASH_LOG2		8
+#define	IN6ADDR_NHASH			(1 << IN6ADDR_NHASH_LOG2)
+#define	IN6ADDR_HASHVAL(x)		(in6_addrhash(x))
+#define	IN6ADDR_HASH(x) \
+    (&V_in6_ifaddrhashtbl[IN6ADDR_HASHVAL(x) & V_in6_ifaddrhmask])
+
+static __inline uint32_t
+in6_addrhash(struct in6_addr *in6)
+{
+	uint32_t x;
+
+	x = in6->s6_addr32[0] ^ in6->s6_addr32[1] ^ in6->s6_addr32[2] ^
+	    in6->s6_addr32[3];
+	return (fnv_32_buf(&x, sizeof(x), FNV1_32_INIT));
+}
+
 extern struct rwlock in6_ifaddr_lock;
 #define	IN6_IFADDR_LOCK_ASSERT(	)	rw_assert(&in6_ifaddr_lock, RA_LOCKED)
 #define	IN6_IFADDR_RLOCK()		rw_rlock(&in6_ifaddr_lock)
@@ -499,15 +542,14 @@
 #define	IN6_IFADDR_WLOCK_ASSERT()	rw_assert(&in6_ifaddr_lock, RA_WLOCKED)
 #define	IN6_IFADDR_WUNLOCK()		rw_wunlock(&in6_ifaddr_lock)
 
-VNET_DECLARE(struct icmp6stat, icmp6stat);
-#define	V_icmp6stat			VNET(icmp6stat)
 #define in6_ifstat_inc(ifp, tag) \
 do {								\
 	if (ifp)						\
-		((struct in6_ifextra *)((ifp)->if_afdata[AF_INET6]))->in6_ifstat->tag++; \
+		counter_u64_add(((struct in6_ifextra *)		\
+		    ((ifp)->if_afdata[AF_INET6]))->in6_ifstat[	\
+		    offsetof(struct in6_ifstat, tag) / sizeof(uint64_t)], 1);\
 } while (/*CONSTCOND*/ 0)
 
-extern struct in6_addr zeroin6_addr;
 extern u_char inet6ctlerrmap[];
 VNET_DECLARE(unsigned long, in6_maxmtu);
 #define	V_in6_maxmtu			VNET(in6_maxmtu)
@@ -766,6 +808,7 @@
 int	in6_if2idlen(struct ifnet *);
 struct in6_ifaddr *in6ifa_ifpforlinklocal(struct ifnet *, int);
 struct in6_ifaddr *in6ifa_ifpwithaddr(struct ifnet *, struct in6_addr *);
+struct in6_ifaddr *in6ifa_llaonifp(struct ifnet *);
 char	*ip6_sprintf(char *, const struct in6_addr *);
 int	in6_addr2zoneid(struct ifnet *, struct in6_addr *, u_int32_t *);
 int	in6_matchlen(struct in6_addr *, struct in6_addr *);

Modified: trunk/sys/netinet6/ip6.h
===================================================================
--- trunk/sys/netinet6/ip6.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/ip6.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,5 +1,5 @@
 /* $MidnightBSD$ */
-/*	$FreeBSD: stable/9/sys/netinet6/ip6.h 62587 2000-07-04 16:35:15Z itojun $	*/
+/*	$FreeBSD: stable/10/sys/netinet6/ip6.h 62587 2000-07-04 16:35:15Z itojun $	*/
 /*	$KAME: ip6.h,v 1.7 2000/03/25 07:23:36 sumikawa Exp $	*/
 
 #error "netinet6/ip6.h is obsolete.  use netinet/ip6.h"

Modified: trunk/sys/netinet6/ip6_ecn.h
===================================================================
--- trunk/sys/netinet6/ip6_ecn.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/ip6_ecn.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	$KAME: ip_ecn.h,v 1.5 2000/03/27 04:58:38 sumikawa Exp $
- * $FreeBSD: stable/9/sys/netinet6/ip6_ecn.h 174510 2007-12-10 16:03:40Z obrien $
+ * $FreeBSD: stable/10/sys/netinet6/ip6_ecn.h 174510 2007-12-10 16:03:40Z obrien $
  */
 
 /*

Modified: trunk/sys/netinet6/ip6_forward.c
===================================================================
--- trunk/sys/netinet6/ip6_forward.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/ip6_forward.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/ip6_forward.c 243586 2012-11-27 01:59:51Z ae $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/ip6_forward.c 284576 2015-06-18 20:57:21Z kp $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
@@ -121,7 +121,7 @@
 	 * before forwarding packet actually.
 	 */
 	if (ipsec6_in_reject(m, NULL)) {
-		V_ipsec6stat.in_polvio++;
+		IPSEC6STAT_INC(ips_in_polvio);
 		m_freem(m);
 		return;
 	}
@@ -138,8 +138,8 @@
 	    IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
 		IP6STAT_INC(ip6s_cantforward);
 		/* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */
-		if (V_ip6_log_time + V_ip6_log_interval < time_second) {
-			V_ip6_log_time = time_second;
+		if (V_ip6_log_time + V_ip6_log_interval < time_uptime) {
+			V_ip6_log_time = time_uptime;
 			log(LOG_DEBUG,
 			    "cannot forward "
 			    "from %s to %s nxt %d received on %s\n",
@@ -183,7 +183,7 @@
 	sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
 	    IP_FORWARDING, &error);
 	if (sp == NULL) {
-		V_ipsec6stat.out_inval++;
+		IPSEC6STAT_INC(ips_out_inval);
 		IP6STAT_INC(ip6s_cantforward);
 		if (mcopy) {
 #if 0
@@ -204,7 +204,7 @@
 		/*
 		 * This packet is just discarded.
 		 */
-		V_ipsec6stat.out_polvio++;
+		IPSEC6STAT_INC(ips_out_polvio);
 		IP6STAT_INC(ip6s_cantforward);
 		KEY_FREESP(&sp);
 		if (mcopy) {
@@ -252,7 +252,6 @@
 
     {
 	struct ipsecrequest *isr = NULL;
-	struct ipsec_output_state state;
 
 	/*
 	 * when the kernel forwards a packet, it is not proper to apply
@@ -285,18 +284,27 @@
 	 *
 	 * IPv6 [ESP|AH] IPv6 [extension headers] payload
 	 */
-	bzero(&state, sizeof(state));
-	state.m = m;
-	state.ro = NULL;	/* update at ipsec6_output_tunnel() */
-	state.dst = NULL;	/* update at ipsec6_output_tunnel() */
 
-	error = ipsec6_output_tunnel(&state, sp, 0);
+	/*
+	 * If we need to encapsulate the packet, do it here
+	 * ipsec6_proces_packet will send the packet using ip6_output
+	 */
+	error = ipsec6_process_packet(m, sp->req);
 
-	m = state.m;
 	KEY_FREESP(&sp);
 
+	if (error == EJUSTRETURN) {
+		/*
+		 * We had a SP with a level of 'use' and no SA. We
+		 * will just continue to process the packet without
+		 * IPsec processing.
+		 */
+		error = 0;
+		goto skip_ipsec;
+	}
+
 	if (error) {
-		/* mbuf is already reclaimed in ipsec6_output_tunnel. */
+		/* mbuf is already reclaimed in ipsec6_process_packet. */
 		switch (error) {
 		case EHOSTUNREACH:
 		case ENETUNREACH:
@@ -319,7 +327,6 @@
 			m_freem(mcopy);
 #endif
 		}
-		m_freem(m);
 		return;
 	} else {
 		/*
@@ -331,25 +338,7 @@
 		m = NULL;
 		goto freecopy;
 	}
-
-	if ((m != NULL) && (ip6 != mtod(m, struct ip6_hdr *)) ){
-		/*
-		 * now tunnel mode headers are added.  we are originating
-		 * packet instead of forwarding the packet.
-		 */
-		ip6_output(m, NULL, NULL, IPV6_FORWARDING/*XXX*/, NULL, NULL,
-		    NULL);
-		goto freecopy;
-	}
-
-	/* adjust pointer */
-	dst = (struct sockaddr_in6 *)state.dst;
-	rt = state.ro ? state.ro->ro_rt : NULL;
-	if (dst != NULL && rt != NULL)
-		ipsecrt = 1;
     }
-	if (ipsecrt)
-		goto skip_routing;
 skip_ipsec:
 #endif
 again:
@@ -372,9 +361,6 @@
 		goto bad;
 	}
 	rt = rin6.ro_rt;
-#ifdef IPSEC
-skip_routing:
-#endif
 
 	/*
 	 * Source scope check: if a packet can't be delivered to its
@@ -406,8 +392,8 @@
 		IP6STAT_INC(ip6s_badscope);
 		in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard);
 
-		if (V_ip6_log_time + V_ip6_log_interval < time_second) {
-			V_ip6_log_time = time_second;
+		if (V_ip6_log_time + V_ip6_log_interval < time_uptime) {
+			V_ip6_log_time = time_uptime;
 			log(LOG_DEBUG,
 			    "cannot forward "
 			    "src %s, dst %s, nxt %d, rcvif %s, outif %s\n",
@@ -438,46 +424,6 @@
 		goto bad;
 	}
 
-	if (m->m_pkthdr.len > IN6_LINKMTU(rt->rt_ifp)) {
-		in6_ifstat_inc(rt->rt_ifp, ifs6_in_toobig);
-		if (mcopy) {
-			u_long mtu;
-#ifdef IPSEC
-			struct secpolicy *sp;
-			int ipsecerror;
-			size_t ipsechdrsiz;
-#endif /* IPSEC */
-
-			mtu = IN6_LINKMTU(rt->rt_ifp);
-#ifdef IPSEC
-			/*
-			 * When we do IPsec tunnel ingress, we need to play
-			 * with the link value (decrement IPsec header size
-			 * from mtu value).  The code is much simpler than v4
-			 * case, as we have the outgoing interface for
-			 * encapsulated packet as "rt->rt_ifp".
-			 */
-			sp = ipsec_getpolicybyaddr(mcopy, IPSEC_DIR_OUTBOUND,
-				IP_FORWARDING, &ipsecerror);
-			if (sp) {
-				ipsechdrsiz = ipsec_hdrsiz(mcopy,
-					IPSEC_DIR_OUTBOUND, NULL);
-				if (ipsechdrsiz < mtu)
-					mtu -= ipsechdrsiz;
-			}
-
-			/*
-			 * if mtu becomes less than minimum MTU,
-			 * tell minimum MTU (and I'll need to fragment it).
-			 */
-			if (mtu < IPV6_MMTU)
-				mtu = IPV6_MMTU;
-#endif /* IPSEC */
-			icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, mtu);
-		}
-		goto bad;
-	}
-
 	if (rt->rt_flags & RTF_GATEWAY)
 		dst = (struct sockaddr_in6 *)rt->rt_gateway;
 
@@ -564,10 +510,8 @@
 	odst = ip6->ip6_dst;
 	/* Run through list of hooks for output packets. */
 	error = pfil_run_hooks(&V_inet6_pfil_hook, &m, rt->rt_ifp, PFIL_OUT, NULL);
-	if (error != 0)
-		goto senderr;
-	if (m == NULL)
-		goto freecopy;
+	if (error != 0 || m == NULL)
+		goto freecopy;		/* consumed by filter */
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	/* See if destination IP address was changed by packet filter. */
@@ -574,22 +518,9 @@
 	if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
 		m->m_flags |= M_SKIP_FIREWALL;
 		/* If destination is now ourself drop to ip6_input(). */
-		if (in6_localip(&ip6->ip6_dst)) {
+		if (in6_localip(&ip6->ip6_dst))
 			m->m_flags |= M_FASTFWD_OURS;
-			if (m->m_pkthdr.rcvif == NULL)
-				m->m_pkthdr.rcvif = V_loif;
-			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
-				m->m_pkthdr.csum_flags |=
-				    CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
-				m->m_pkthdr.csum_data = 0xffff;
-			}
-#ifdef SCTP
-			if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
-				m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
-#endif
-			error = netisr_queue(NETISR_IPV6, m);
-			goto out;
-		} else
+		else
 			goto again;	/* Redo the routing table lookup. */
 	}
 
@@ -621,6 +552,47 @@
 	}
 
 pass:
+	/* See if the size was changed by the packet filter. */
+	if (m->m_pkthdr.len > IN6_LINKMTU(rt->rt_ifp)) {
+		in6_ifstat_inc(rt->rt_ifp, ifs6_in_toobig);
+		if (mcopy) {
+			u_long mtu;
+#ifdef IPSEC
+			struct secpolicy *sp;
+			int ipsecerror;
+			size_t ipsechdrsiz;
+#endif /* IPSEC */
+
+			mtu = IN6_LINKMTU(rt->rt_ifp);
+#ifdef IPSEC
+			/*
+			 * When we do IPsec tunnel ingress, we need to play
+			 * with the link value (decrement IPsec header size
+			 * from mtu value).  The code is much simpler than v4
+			 * case, as we have the outgoing interface for
+			 * encapsulated packet as "rt->rt_ifp".
+			 */
+			sp = ipsec_getpolicybyaddr(mcopy, IPSEC_DIR_OUTBOUND,
+				IP_FORWARDING, &ipsecerror);
+			if (sp) {
+				ipsechdrsiz = ipsec_hdrsiz(mcopy,
+					IPSEC_DIR_OUTBOUND, NULL);
+				if (ipsechdrsiz < mtu)
+					mtu -= ipsechdrsiz;
+			}
+
+			/*
+			 * if mtu becomes less than minimum MTU,
+			 * tell minimum MTU (and I'll need to fragment it).
+			 */
+			if (mtu < IPV6_MMTU)
+				mtu = IPV6_MMTU;
+#endif /* IPSEC */
+			icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, mtu);
+		}
+		goto bad;
+	}
+
 	error = nd6_output(rt->rt_ifp, origifp, m, dst, rt);
 	if (error) {
 		in6_ifstat_inc(rt->rt_ifp, ifs6_out_discard);
@@ -636,7 +608,6 @@
 		}
 	}
 
-senderr:
 	if (mcopy == NULL)
 		goto out;
 	switch (error) {

Added: trunk/sys/netinet6/ip6_gre.c
===================================================================
--- trunk/sys/netinet6/ip6_gre.c	                        (rev 0)
+++ trunk/sys/netinet6/ip6_gre.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -0,0 +1,148 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2014 Andrey V. Elsukov <ae at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/ip6_gre.c 284072 2015-06-06 13:26:13Z ae $");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/systm.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/mbuf.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+#include <sys/syslog.h>
+#include <sys/sysctl.h>
+#include <sys/protosw.h>
+#include <sys/malloc.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#ifdef INET
+#include <net/ethernet.h>
+#include <netinet/ip.h>
+#endif
+#include <netinet/ip_encap.h>
+#include <netinet/ip6.h>
+#include <netinet6/ip6protosw.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/in6_var.h>
+#include <net/if_gre.h>
+
+extern  struct domain inet6domain;
+struct ip6protosw in6_gre_protosw = {
+	.pr_type =	SOCK_RAW,
+	.pr_domain =	&inet6domain,
+	.pr_protocol =	IPPROTO_GRE,
+	.pr_flags =	PR_ATOMIC|PR_ADDR,
+	.pr_input =	gre_input,
+	.pr_output =	rip6_output,
+	.pr_ctloutput =	rip6_ctloutput,
+	.pr_usrreqs =	&rip6_usrreqs
+};
+
+VNET_DEFINE(int, ip6_gre_hlim) = IPV6_DEFHLIM;
+#define	V_ip6_gre_hlim		VNET(ip6_gre_hlim)
+
+SYSCTL_DECL(_net_inet6_ip6);
+SYSCTL_INT(_net_inet6_ip6, OID_AUTO, grehlim, CTLFLAG_VNET | CTLFLAG_RW,
+    &VNET_NAME(ip6_gre_hlim), 0, "Default hop limit for encapsulated packets");
+
+static int
+in6_gre_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
+{
+	GRE_RLOCK_TRACKER;
+	struct gre_softc *sc;
+	struct ip6_hdr *ip6;
+
+	sc = (struct gre_softc *)arg;
+	if ((GRE2IFP(sc)->if_flags & IFF_UP) == 0)
+		return (0);
+
+	M_ASSERTPKTHDR(m);
+	/*
+	 * We expect that payload contains at least IPv4
+	 * or IPv6 packet.
+	 */
+	if (m->m_pkthdr.len < sizeof(struct greip6) +
+#ifdef INET
+	    sizeof(struct ip))
+#else
+	    sizeof(struct ip6_hdr))
+#endif
+		return (0);
+
+	GRE_RLOCK(sc);
+	if (sc->gre_family == 0)
+		goto bad;
+
+	KASSERT(sc->gre_family == AF_INET6,
+	    ("wrong gre_family: %d", sc->gre_family));
+
+	ip6 = mtod(m, struct ip6_hdr *);
+	if (!IN6_ARE_ADDR_EQUAL(&sc->gre_oip6.ip6_src, &ip6->ip6_dst) ||
+	    !IN6_ARE_ADDR_EQUAL(&sc->gre_oip6.ip6_dst, &ip6->ip6_src))
+		goto bad;
+
+	GRE_RUNLOCK(sc);
+	return (128 * 2);
+bad:
+	GRE_RUNLOCK(sc);
+	return (0);
+}
+
+int
+in6_gre_output(struct mbuf *m, int af, int hlen)
+{
+	struct greip6 *gi6;
+
+	gi6 = mtod(m, struct greip6 *);
+	gi6->gi6_ip6.ip6_hlim = V_ip6_gre_hlim;
+	return (ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, NULL, NULL));
+}
+
+int
+in6_gre_attach(struct gre_softc *sc)
+{
+
+	KASSERT(sc->gre_ecookie == NULL, ("gre_ecookie isn't NULL"));
+	sc->gre_ecookie = encap_attach_func(AF_INET6, IPPROTO_GRE,
+	    in6_gre_encapcheck, (void *)&in6_gre_protosw, sc);
+	if (sc->gre_ecookie == NULL)
+		return (EEXIST);
+	return (0);
+}


Property changes on: trunk/sys/netinet6/ip6_gre.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/netinet6/ip6_id.c
===================================================================
--- trunk/sys/netinet6/ip6_id.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/ip6_id.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -64,11 +64,11 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * $OpenBSD: ip_id.c,v 1.6 2002/03/15 18:19:52 millert Exp $
+ * $OpenBSD: ip6_id.c,v 1.2 2003/12/10 07:21:01 itojun Exp $
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/ip6_id.c 174510 2007-12-10 16:03:40Z obrien $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/ip6_id.c 327550 2018-01-04 15:57:49Z pfg $");
 
 /*
  * seed = random (bits - 1) bit
@@ -222,7 +222,7 @@
 	p->ru_g = pmod(p->ru_gen, j, p->ru_n);
 	p->ru_counter = 0;
 
-	p->ru_reseed = time_second + p->ru_out;
+	p->ru_reseed = time_uptime + p->ru_out;
 	p->ru_msb = p->ru_msb ? 0 : (1U << (p->ru_bits - 1));
 }
 
@@ -230,15 +230,12 @@
 randomid(struct randomtab *p)
 {
 	int i, n;
-	u_int32_t tmp;
 
-	if (p->ru_counter >= p->ru_max || time_second > p->ru_reseed)
+	if (p->ru_counter >= p->ru_max || time_uptime > p->ru_reseed)
 		initid(p);
 
-	tmp = arc4random();
-
 	/* Skip a random number of ids */
-	n = tmp & 0x3; tmp = tmp >> 2;
+	n = arc4random() & 0x3;
 	if (p->ru_counter + n >= p->ru_max)
 		initid(p);
 
@@ -249,7 +246,7 @@
 
 	p->ru_counter += i;
 
-	return (p->ru_seed ^ pmod(p->ru_g, p->ru_seed2 ^ p->ru_x, p->ru_n)) |
+	return (p->ru_seed ^ pmod(p->ru_g, p->ru_seed2 + p->ru_x, p->ru_n)) |
 	    p->ru_msb;
 }
 

Modified: trunk/sys/netinet6/ip6_input.c
===================================================================
--- trunk/sys/netinet6/ip6_input.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/ip6_input.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -62,12 +62,13 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/ip6_input.c 243586 2012-11-27 01:59:51Z ae $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/ip6_input.c 329158 2018-02-12 13:52:58Z ae $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipfw.h"
 #include "opt_ipsec.h"
+#include "opt_kdtrace.h"
 #include "opt_route.h"
 
 #include <sys/param.h>
@@ -77,6 +78,7 @@
 #include <sys/proc.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
+#include <sys/sdt.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/errno.h>
@@ -93,6 +95,7 @@
 #include <net/vnet.h>
 
 #include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
 #include <netinet/ip_var.h>
 #include <netinet/in_systm.h>
 #include <net/if_llatbl.h>
@@ -117,16 +120,12 @@
 
 #include <netinet6/ip6protosw.h>
 
-#ifdef FLOWTABLE
-#include <net/flowtable.h>
-VNET_DECLARE(int, ip6_output_flowtable_size);
-#define	V_ip6_output_flowtable_size	VNET(ip6_output_flowtable_size)
-#endif
-
 extern struct domain inet6domain;
 
 u_char ip6_protox[IPPROTO_MAX];
 VNET_DEFINE(struct in6_ifaddrhead, in6_ifaddrhead);
+VNET_DEFINE(struct in6_ifaddrlisthead *, in6_ifaddrhashtbl);
+VNET_DEFINE(u_long, in6_ifaddrhmask);
 
 static struct netisr_handler ip6_nh = {
 	.nh_name = "ip6",
@@ -140,7 +139,11 @@
 
 VNET_DEFINE(struct pfil_head, inet6_pfil_hook);
 
-VNET_DEFINE(struct ip6stat, ip6stat);
+VNET_PCPUSTAT_DEFINE(struct ip6stat, ip6stat);
+VNET_PCPUSTAT_SYSINIT(ip6stat);
+#ifdef VIMAGE
+VNET_PCPUSTAT_SYSUNINIT(ip6stat);
+#endif /* VIMAGE */
 
 struct rwlock in6_ifaddr_lock;
 RW_SYSINIT(in6_ifaddr_lock, &in6_ifaddr_lock, "in6_ifaddr_lock");
@@ -167,8 +170,12 @@
 
 	TUNABLE_INT_FETCH("net.inet6.ip6.auto_linklocal",
 	    &V_ip6_auto_linklocal);
+	TUNABLE_INT_FETCH("net.inet6.ip6.accept_rtadv", &V_ip6_accept_rtadv);
+	TUNABLE_INT_FETCH("net.inet6.ip6.no_radr", &V_ip6_no_radr);
 
 	TAILQ_INIT(&V_in6_ifaddrhead);
+	V_in6_ifaddrhashtbl = hashinit(IN6ADDR_NHASH, M_IFADDR,
+	    &V_in6_ifaddrhmask);
 
 	/* Initialize packet filter hooks. */
 	V_inet6_pfil_hook.ph_type = PFIL_TYPE_AF;
@@ -182,24 +189,6 @@
 	nd6_init();
 	frag6_init();
 
-#ifdef FLOWTABLE
-	if (TUNABLE_INT_FETCH("net.inet6.ip6.output_flowtable_size",
-		&V_ip6_output_flowtable_size)) {
-		if (V_ip6_output_flowtable_size < 256)
-			V_ip6_output_flowtable_size = 256;
-		if (!powerof2(V_ip6_output_flowtable_size)) {
-			printf("flowtable must be power of 2 size\n");
-			V_ip6_output_flowtable_size = 2048;
-		}
-	} else {
-		/*
-		 * round up to the next power of 2
-		 */
-		V_ip6_output_flowtable_size = 1 << fls((1024 + maxusers * 64)-1);
-	}
-	V_ip6_ft = flowtable_alloc("ipv6", V_ip6_output_flowtable_size, FL_IPV6|FL_PCPU);
-#endif	
-	
 	V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR;
 
 	/* Skip global initialization stuff for non-default instances. */
@@ -295,7 +284,12 @@
 void
 ip6_destroy()
 {
+	int i;
 
+	if ((i = pfil_head_unregister(&V_inet6_pfil_hook)) != 0)
+		printf("%s: WARNING: unable to unregister pfil hook, "
+		    "error %d\n", __func__, i);
+	hashdestroy(V_in6_ifaddrhashtbl, M_IFADDR, V_in6_ifaddrhmask);
 	nd6_destroy();
 	callout_drain(&V_in6_tmpaddrtimer_ch);
 }
@@ -490,21 +484,16 @@
 	if (m && m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) {
 		struct mbuf *n;
 
-		MGETHDR(n, M_DONTWAIT, MT_HEADER);
-		if (n)
-			M_MOVE_PKTHDR(n, m);
-		if (n && n->m_pkthdr.len > MHLEN) {
-			MCLGET(n, M_DONTWAIT);
-			if ((n->m_flags & M_EXT) == 0) {
-				m_freem(n);
-				n = NULL;
-			}
-		}
+		if (m->m_pkthdr.len > MHLEN)
+			n = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+		else
+			n = m_gethdr(M_NOWAIT, MT_DATA);
 		if (n == NULL) {
 			m_freem(m);
 			return;	/* ENOBUFS */
 		}
 
+		m_move_pkthdr(n, m);
 		m_copydata(m, 0, n->m_pkthdr.len, mtod(n, caddr_t));
 		n->m_len = n->m_pkthdr.len;
 		m_freem(m);
@@ -533,6 +522,8 @@
 
 	IP6STAT_INC(ip6s_nxthist[ip6->ip6_nxt]);
 
+	IP_PROBE(receive, NULL, NULL, ip6, m->m_pkthdr.rcvif, NULL, ip6);
+
 	/*
 	 * Check against address spoofing/corruption.
 	 */
@@ -557,7 +548,18 @@
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
 		goto bad;
 	}
-
+	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
+	    IPV6_ADDR_MC_SCOPE(&ip6->ip6_dst) == 0) {
+		/*
+		 * RFC4291 2.7:
+		 * Nodes must not originate a packet to a multicast address
+		 * whose scop field contains the reserved value 0; if such
+		 * a packet is received, it must be silently dropped.
+		 */
+		IP6STAT_INC(ip6s_badscope);
+		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
+		goto bad;
+	}
 #ifdef ALTQ
 	if (altq_input != NULL && (*altq_input)(m, AF_INET6) == 0) {
 		/* packet is dropped by traffic conditioner */
@@ -1073,7 +1075,6 @@
 	struct mbuf *m = *mp;
 	int off = *offp, hbhlen;
 	struct ip6_hbh *hbh;
-	u_int8_t *opt;
 
 	/* validation of the length of the header */
 #ifndef PULLDOWN_TEST
@@ -1100,8 +1101,6 @@
 #endif
 	off += hbhlen;
 	hbhlen -= sizeof(struct ip6_hbh);
-	opt = (u_int8_t *)hbh + sizeof(struct ip6_hbh);
-
 	if (ip6_process_hopopts(m, (u_int8_t *)hbh + sizeof(struct ip6_hbh),
 				hbhlen, rtalertp, plenp) < 0)
 		return (-1);
@@ -1594,24 +1593,28 @@
 #undef IS2292
 
 void
-ip6_notify_pmtu(struct inpcb *in6p, struct sockaddr_in6 *dst, u_int32_t *mtu)
+ip6_notify_pmtu(struct inpcb *inp, struct sockaddr_in6 *dst, u_int32_t mtu)
 {
 	struct socket *so;
 	struct mbuf *m_mtu;
 	struct ip6_mtuinfo mtuctl;
 
-	so =  in6p->inp_socket;
-
-	if (mtu == NULL)
+	KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
+	/*
+	 * Notify the error by sending IPV6_PATHMTU ancillary data if
+	 * application wanted to know the MTU value.
+	 * NOTE: we notify disconnected sockets, because some udp
+	 * applications keep sending sockets disconnected.
+	 * NOTE: our implementation doesn't notify connected sockets that has
+	 * foreign address that is different than given destination addresses
+	 * (this is permitted by RFC 3542).
+	 */
+	if ((inp->inp_flags & IN6P_MTU) == 0 || (
+	    !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
+	    !IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &dst->sin6_addr)))
 		return;
 
-#ifdef DIAGNOSTIC
-	if (so == NULL)		/* I believe this is impossible */
-		panic("ip6_notify_pmtu: socket is NULL");
-#endif
-
-	bzero(&mtuctl, sizeof(mtuctl));	/* zero-clear for safety */
-	mtuctl.ip6m_mtu = *mtu;
+	mtuctl.ip6m_mtu = mtu;
 	mtuctl.ip6m_addr = *dst;
 	if (sa6_recoverscope(&mtuctl.ip6m_addr))
 		return;
@@ -1620,6 +1623,7 @@
 	    IPV6_PATHMTU, IPPROTO_IPV6)) == NULL)
 		return;
 
+	so =  inp->inp_socket;
 	if (sbappendaddr(&so->so_rcv, (struct sockaddr *)dst, NULL, m_mtu)
 	    == 0) {
 		m_freem(m_mtu);
@@ -1626,8 +1630,6 @@
 		/* XXX: should count statistics */
 	} else
 		sorwakeup(so);
-
-	return;
 }
 
 #ifdef PULLDOWN_TEST
@@ -1660,23 +1662,13 @@
 	else
 		elen = (ip6e.ip6e_len + 1) << 3;
 
-	MGET(n, M_DONTWAIT, MT_DATA);
-	if (n && elen >= MLEN) {
-		MCLGET(n, M_DONTWAIT);
-		if ((n->m_flags & M_EXT) == 0) {
-			m_free(n);
-			n = NULL;
-		}
-	}
-	if (!n)
+	if (elen > MLEN)
+		n = m_getcl(M_NOWAIT, MT_DATA, 0);
+	else
+		n = m_get(M_NOWAIT, MT_DATA);
+	if (n == NULL)
 		return NULL;
 
-	n->m_len = 0;
-	if (elen >= M_TRAILINGSPACE(n)) {
-		m_free(n);
-		return NULL;
-	}
-
 	m_copydata(m, off, elen, mtod(n, caddr_t));
 	n->m_len = elen;
 	return n;
@@ -1686,49 +1678,39 @@
 /*
  * Get pointer to the previous header followed by the header
  * currently processed.
- * XXX: This function supposes that
- *	M includes all headers,
- *	the next header field and the header length field of each header
- *	are valid, and
- *	the sum of each header length equals to OFF.
- * Because of these assumptions, this function must be called very
- * carefully. Moreover, it will not be used in the near future when
- * we develop `neater' mechanism to process extension headers.
  */
-char *
+int
 ip6_get_prevhdr(struct mbuf *m, int off)
 {
-	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
+	struct ip6_ext ip6e;
+	struct ip6_hdr *ip6;
+	int len, nlen, nxt;
 
 	if (off == sizeof(struct ip6_hdr))
-		return (&ip6->ip6_nxt);
-	else {
-		int len, nxt;
-		struct ip6_ext *ip6e = NULL;
+		return (offsetof(struct ip6_hdr, ip6_nxt));
+	if (off < sizeof(struct ip6_hdr))
+		panic("%s: off < sizeof(struct ip6_hdr)", __func__);
 
-		nxt = ip6->ip6_nxt;
-		len = sizeof(struct ip6_hdr);
-		while (len < off) {
-			ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + len);
-
-			switch (nxt) {
-			case IPPROTO_FRAGMENT:
-				len += sizeof(struct ip6_frag);
-				break;
-			case IPPROTO_AH:
-				len += (ip6e->ip6e_len + 2) << 2;
-				break;
-			default:
-				len += (ip6e->ip6e_len + 1) << 3;
-				break;
-			}
-			nxt = ip6e->ip6e_nxt;
+	ip6 = mtod(m, struct ip6_hdr *);
+	nxt = ip6->ip6_nxt;
+	len = sizeof(struct ip6_hdr);
+	nlen = 0;
+	while (len < off) {
+		m_copydata(m, len, sizeof(ip6e), (caddr_t)&ip6e);
+		switch (nxt) {
+		case IPPROTO_FRAGMENT:
+			nlen = sizeof(struct ip6_frag);
+			break;
+		case IPPROTO_AH:
+			nlen = (ip6e.ip6e_len + 2) << 2;
+			break;
+		default:
+			nlen = (ip6e.ip6e_len + 1) << 3;
 		}
-		if (ip6e)
-			return (&ip6e->ip6e_nxt);
-		else
-			return NULL;
+		len += nlen;
+		nxt = ip6e.ip6e_nxt;
 	}
+	return (len - nlen);
 }
 
 /*

Modified: trunk/sys/netinet6/ip6_ipsec.c
===================================================================
--- trunk/sys/netinet6/ip6_ipsec.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/ip6_ipsec.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -29,7 +29,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/ip6_ipsec.c 239831 2012-08-29 13:14:39Z bz $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/ip6_ipsec.c 283901 2015-06-02 03:14:42Z ae $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
@@ -48,7 +48,6 @@
 #include <sys/syslog.h>
 
 #include <net/if.h>
-#include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
@@ -104,7 +103,7 @@
 int
 ip6_ipsec_filtertunnel(struct mbuf *m)
 {
-#if defined(IPSEC)
+#ifdef IPSEC
 
 	/*
 	 * Bypass packet filtering for packets previously handled by IPsec.
@@ -129,9 +128,8 @@
 	struct m_tag *mtag;
 	struct tdb_ident *tdbi;
 	struct secpolicy *sp;
-	int s, error;
+	int error;
 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
-	s = splnet();
 	if (mtag != NULL) {
 		tdbi = (struct tdb_ident *)(mtag + 1);
 		sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
@@ -140,7 +138,6 @@
 					   IP_FORWARDING, &error);
 	}
 	if (sp == NULL) {	/* NB: can happen if error */
-		splx(s);
 		/*XXX error stat???*/
 		DPRINTF(("%s: no SP for forwarding\n", __func__));	/*XXX*/
 		return 1;
@@ -151,7 +148,6 @@
 	 */
 	error = ipsec_in_reject(sp, m);
 	KEY_FREESP(&sp);
-	splx(s);
 	if (error) {
 		IP6STAT_INC(ip6s_cantforward);
 		return 1;
@@ -174,7 +170,7 @@
 	struct m_tag *mtag;
 	struct tdb_ident *tdbi;
 	struct secpolicy *sp;
-	int s, error;
+	int error;
 	/*
 	 * enforce IPsec policy checking if we are seeing last header.
 	 * note that we do not visit this with protocols with pcb layer
@@ -190,7 +186,6 @@
 		 * packet is returned to the ip input queue for delivery.
 		 */
 		mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
-		s = splnet();
 		if (mtag != NULL) {
 			tdbi = (struct tdb_ident *)(mtag + 1);
 			sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
@@ -210,7 +205,6 @@
 			DPRINTF(("%s: no SP, packet discarded\n", __func__));/*XXX*/
 			return 1;
 		}
-		splx(s);
 		if (error)
 			return 1;
 	}
@@ -226,23 +220,22 @@
 
 int
 ip6_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error,
-    struct ifnet **ifp, struct secpolicy **sp)
+    struct ifnet **ifp)
 {
 #ifdef IPSEC
+	struct secpolicy *sp = NULL;
 	struct tdb_ident *tdbi;
 	struct m_tag *mtag;
 	/* XXX int s; */
-	if (sp == NULL)
-		return 1;
 	mtag = m_tag_find(*m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
 	if (mtag != NULL) {
 		tdbi = (struct tdb_ident *)(mtag + 1);
-		*sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND);
-		if (*sp == NULL)
+		sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND);
+		if (sp == NULL)
 			*error = -EINVAL;	/* force silent drop */
 		m_tag_delete(*m, mtag);
 	} else {
-		*sp = ipsec4_checkpolicy(*m, IPSEC_DIR_OUTBOUND, *flags,
+		sp = ipsec4_checkpolicy(*m, IPSEC_DIR_OUTBOUND, *flags,
 					error, inp);
 	}
 
@@ -253,9 +246,9 @@
 	 *    sp == NULL, error == -EINVAL  discard packet w/o error
 	 *    sp == NULL, error != 0	    discard packet, report error
 	 */
-	if (*sp != NULL) {
+	if (sp != NULL) {
 		/* Loop detection, check if ipsec processing already done */
-		KASSERT((*sp)->req != NULL, ("ip_output: no ipsec request"));
+		KASSERT(sp->req != NULL, ("ip_output: no ipsec request"));
 		for (mtag = m_tag_first(*m); mtag != NULL;
 		     mtag = m_tag_next(*m, mtag)) {
 			if (mtag->m_tag_cookie != MTAG_ABI_COMPAT)
@@ -269,22 +262,17 @@
 			 * an SA; e.g. on first reference.  If it occurs,
 			 * then we let ipsec4_process_packet do its thing.
 			 */
-			if ((*sp)->req->sav == NULL)
+			if (sp->req->sav == NULL)
 				break;
 			tdbi = (struct tdb_ident *)(mtag + 1);
-			if (tdbi->spi == (*sp)->req->sav->spi &&
-			    tdbi->proto == (*sp)->req->sav->sah->saidx.proto &&
-			    bcmp(&tdbi->dst, &(*sp)->req->sav->sah->saidx.dst,
+			if (tdbi->spi == sp->req->sav->spi &&
+			    tdbi->proto == sp->req->sav->sah->saidx.proto &&
+			    bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst,
 				 sizeof (union sockaddr_union)) == 0) {
 				/*
 				 * No IPsec processing is needed, free
 				 * reference to SP.
-				 *
-				 * NB: null pointer to avoid free at
-				 *     done: below.
 				 */
-				KEY_FREESP(sp), *sp = NULL;
-				/* XXX splx(s); */
 				goto done;
 			}
 		}
@@ -292,17 +280,38 @@
 		/*
 		 * Do delayed checksums now because we send before
 		 * this is done in the normal processing path.
-		 * For IPv6 we do delayed checksums in ip6_output.c.
 		 */
 #ifdef INET
 		if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
-			ipseclog((LOG_DEBUG,
-			    "%s: we do not support IPv4 over IPv6", __func__));
 			in_delayed_cksum(*m);
 			(*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 		}
 #endif
+		if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
+			in6_delayed_cksum(*m, (*m)->m_pkthdr.len - sizeof(struct ip6_hdr),
+							sizeof(struct ip6_hdr));
+			(*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
+		}
+#ifdef SCTP
+		if ((*m)->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) {
+			sctp_delayed_cksum(*m, sizeof(struct ip6_hdr));
+			(*m)->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
+		}
+#endif
 
+		/* NB: callee frees mbuf */
+		*error = ipsec6_process_packet(*m, sp->req);
+
+		if (*error == EJUSTRETURN) {
+			/*
+			 * We had a SP with a level of 'use' and no SA. We
+			 * will just continue to process the packet without
+			 * IPsec processing.
+			 */
+			*error = 0;
+			goto done;
+		}
+
 		/*
 		 * Preserve KAME behaviour: ENOENT can be returned
 		 * when an SA acquire is in progress.  Don't propagate
@@ -312,7 +321,7 @@
 		 */
 		if (*error == ENOENT)
 			*error = 0;
-		goto do_ipsec;
+		goto reinjected;
 	} else {	/* sp == NULL */
 		if (*error != 0) {
 			/*
@@ -329,10 +338,16 @@
 		}
 	}
 done:
+	if (sp != NULL)
+		KEY_FREESP(&sp);
 	return 0;
-do_ipsec:
+reinjected:
+	if (sp != NULL)
+		KEY_FREESP(&sp);
 	return -1;
 bad:
+	if (sp != NULL)
+		KEY_FREESP(&sp);
 	return 1;
 #endif /* IPSEC */
 	return 0;
@@ -376,9 +391,7 @@
 		    sp->req->sav->sah != NULL) {
 			ro = &sp->req->sav->sah->route_cache.sa_route;
 			if (ro->ro_rt && ro->ro_rt->rt_ifp) {
-				mtu =
-				    ro->ro_rt->rt_rmx.rmx_mtu ?
-				    ro->ro_rt->rt_rmx.rmx_mtu :
+				mtu = ro->ro_rt->rt_mtu ? ro->ro_rt->rt_mtu :
 				    ro->ro_rt->rt_ifp->if_mtu;
 				mtu -= ipsechdr;
 			}

Modified: trunk/sys/netinet6/ip6_ipsec.h
===================================================================
--- trunk/sys/netinet6/ip6_ipsec.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/ip6_ipsec.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -27,7 +27,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/netinet6/ip6_ipsec.h 177166 2008-03-14 11:44:30Z bz $
+ * $FreeBSD: stable/10/sys/netinet6/ip6_ipsec.h 274132 2014-11-05 09:23:29Z ae $
  */
 
 #ifndef _NETINET_IP6_IPSEC_H_
@@ -37,7 +37,7 @@
 int	ip6_ipsec_fwd(struct mbuf *);
 int	ip6_ipsec_input(struct mbuf *, int);
 int	ip6_ipsec_output(struct mbuf **, struct inpcb *, int *, int *,
-	    struct ifnet **, struct secpolicy **sp);
+	    struct ifnet **);
 #if 0
 int	ip6_ipsec_mtu(struct mbuf *);
 #endif

Modified: trunk/sys/netinet6/ip6_mroute.c
===================================================================
--- trunk/sys/netinet6/ip6_mroute.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/ip6_mroute.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -80,9 +80,10 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/ip6_mroute.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/ip6_mroute.c 328878 2018-02-05 09:25:53Z ae $");
 
 #include "opt_inet6.h"
+#include "opt_kdtrace.h"
 
 #include <sys/param.h>
 #include <sys/callout.h>
@@ -94,6 +95,7 @@
 #include <sys/module.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
+#include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
@@ -115,6 +117,7 @@
 #include <netinet/ip_encap.h>
 
 #include <netinet/ip6.h>
+#include <netinet/in_kdtrace.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/nd6.h>
@@ -163,6 +166,7 @@
     &mrt6stat, mrt6stat,
     "Multicast Routing Statistics (struct mrt6stat, netinet6/ip6_mroute.h)");
 
+#define	MRT6STAT_INC(name)	mrt6stat.name += 1
 #define NO_RTE_FOUND	0x1
 #define RTE_FOUND	0x2
 
@@ -218,6 +222,14 @@
 #define DEBUG_XMIT	0x10
 #define DEBUG_REG	0x20
 #define DEBUG_PIM	0x40
+#define	DEBUG_ERR	0x80
+#define	DEBUG_ANY	0x7f
+#define	MRT6_DLOG(m, fmt, ...)	\
+	if (V_mrt6debug & (m))	\
+		log(((m) & DEBUG_ERR) ? LOG_ERR: LOG_DEBUG, \
+		    "%s: " fmt "\n", __func__, ##__VA_ARGS__)
+#else
+#define	MRT6_DLOG(m, fmt, ...)
 #endif
 
 static void	expire_upcalls(void *);
@@ -252,8 +264,9 @@
 static struct pim6stat pim6stat;
 SYSCTL_STRUCT(_net_inet6_pim, PIM6CTL_STATS, stats, CTLFLAG_RW,
     &pim6stat, pim6stat,
-    "PIM Statistics (struct pim6stat, netinet6/pim_var.h)");
+    "PIM Statistics (struct pim6stat, netinet6/pim6_var.h)");
 
+#define	PIM6STAT_INC(name)	pim6stat.name += 1
 static VNET_DEFINE(int, pim6);
 #define	V_pim6		VNET(pim6)
 
@@ -271,7 +284,6 @@
 #define MF6CFIND(o, g, rt) do { \
 	struct mf6c *_rt = mf6ctable[MF6CHASH(o,g)]; \
 	rt = NULL; \
-	mrt6stat.mrt6s_mfc_lookups++; \
 	while (_rt) { \
 		if (IN6_ARE_ADDR_EQUAL(&_rt->mf6c_origin.sin6_addr, &(o)) && \
 		    IN6_ARE_ADDR_EQUAL(&_rt->mf6c_mcastgrp.sin6_addr, &(g)) && \
@@ -282,7 +294,7 @@
 		_rt = _rt->mf6c_next; \
 	} \
 	if (rt == NULL) { \
-		mrt6stat.mrt6s_mfc_misses++; \
+		MRT6STAT_INC(mrt6s_mfc_misses); \
 	} \
 } while (/*CONSTCOND*/ 0)
 
@@ -522,12 +534,8 @@
 ip6_mrouter_init(struct socket *so, int v, int cmd)
 {
 
-#ifdef MRT6DEBUG
-	if (V_mrt6debug)
-		log(LOG_DEBUG,
-		    "ip6_mrouter_init: so_type = %d, pr_protocol = %d\n",
-		    so->so_type, so->so_proto->pr_protocol);
-#endif
+	MRT6_DLOG(DEBUG_ANY, "so_type = %d, pr_protocol = %d",
+	    so->so_type, so->so_proto->pr_protocol);
 
 	if (so->so_type != SOCK_RAW ||
 	    so->so_proto->pr_protocol != IPPROTO_ICMPV6)
@@ -556,12 +564,8 @@
 	    expire_upcalls, NULL);
 
 	MROUTER6_UNLOCK();
+	MRT6_DLOG(DEBUG_ANY, "finished");
 
-#ifdef MRT6DEBUG
-	if (V_mrt6debug)
-		log(LOG_DEBUG, "ip6_mrouter_init\n");
-#endif
-
 	return (0);
 }
 
@@ -572,7 +576,7 @@
 X_ip6_mrouter_done(void)
 {
 	mifi_t mifi;
-	int i;
+	u_long i;
 	struct mf6c *rt;
 	struct rtdetq *rte;
 
@@ -612,7 +616,7 @@
 			for (rte = rt->mf6c_stall; rte != NULL; ) {
 				struct rtdetq *n = rte->next;
 
-				m_free(rte->m);
+				m_freem(rte->m);
 				free(rte, M_MRTABLE6);
 				rte = n;
 			}
@@ -638,12 +642,8 @@
 	V_ip6_mrouter_ver = 0;
 
 	MROUTER6_UNLOCK();
+	MRT6_DLOG(DEBUG_ANY, "finished");
 
-#ifdef MRT6DEBUG
-	if (V_mrt6debug)
-		log(LOG_DEBUG, "ip6_mrouter_done\n");
-#endif
-
 	return (0);
 }
 
@@ -723,15 +723,9 @@
 		nummifs = mifcp->mif6c_mifi + 1;
 
 	MIF6_UNLOCK();
+	MRT6_DLOG(DEBUG_ANY, "mif #%d, phyint %s", mifcp->mif6c_mifi,
+	    if_name(ifp));
 
-#ifdef MRT6DEBUG
-	if (V_mrt6debug)
-		log(LOG_DEBUG,
-		    "add_mif #%d, phyint %s\n",
-		    mifcp->mif6c_mifi,
-		    ifp->if_xname);
-#endif
-
 	return (0);
 }
 
@@ -773,12 +767,8 @@
 		if (mif6table[mifi - 1].m6_ifp)
 			break;
 	nummifs = mifi;
+	MRT6_DLOG(DEBUG_ANY, "mif %d, nummifs %d", *mifip, nummifs);
 
-#ifdef MRT6DEBUG
-	if (V_mrt6debug)
-		log(LOG_DEBUG, "del_m6if %d, nummifs %d\n", *mifip, nummifs);
-#endif
-
 	return (0);
 }
 
@@ -813,15 +803,10 @@
 
 	/* If an entry already exists, just update the fields */
 	if (rt) {
-#ifdef MRT6DEBUG
-		if (V_mrt6debug & DEBUG_MFC) {
-		    log(LOG_DEBUG,
-			"add_m6fc no upcall h %d o %s g %s p %x\n",
-			ip6_sprintf(ip6bufo, &mfccp->mf6cc_origin.sin6_addr),
-			ip6_sprintf(ip6bufg, &mfccp->mf6cc_mcastgrp.sin6_addr),
-			mfccp->mf6cc_parent);
-		}
-#endif
+		MRT6_DLOG(DEBUG_MFC, "no upcall o %s g %s p %x",
+		    ip6_sprintf(ip6bufo, &mfccp->mf6cc_origin.sin6_addr),
+		    ip6_sprintf(ip6bufg, &mfccp->mf6cc_mcastgrp.sin6_addr),
+		    mfccp->mf6cc_parent);
 
 		rt->mf6c_parent = mfccp->mf6cc_parent;
 		rt->mf6c_ifset = mfccp->mf6cc_ifset;
@@ -852,16 +837,12 @@
 					    &mfccp->mf6cc_mcastgrp.sin6_addr),
 				    mfccp->mf6cc_parent, rt->mf6c_stall);
 
-#ifdef MRT6DEBUG
-			if (V_mrt6debug & DEBUG_MFC)
-				log(LOG_DEBUG,
-				    "add_m6fc o %s g %s p %x dbg %x\n",
-				    ip6_sprintf(ip6bufo,
-					    &mfccp->mf6cc_origin.sin6_addr),
-				    ip6_sprintf(ip6bufg,
-					    &mfccp->mf6cc_mcastgrp.sin6_addr),
-				    mfccp->mf6cc_parent, rt->mf6c_stall);
-#endif
+			MRT6_DLOG(DEBUG_MFC, "o %s g %s p %x dbg %p",
+			    ip6_sprintf(ip6bufo,
+			    &mfccp->mf6cc_origin.sin6_addr),
+			    ip6_sprintf(ip6bufg,
+				&mfccp->mf6cc_mcastgrp.sin6_addr),
+			    mfccp->mf6cc_parent, rt->mf6c_stall);
 
 			rt->mf6c_origin     = mfccp->mf6cc_origin;
 			rt->mf6c_mcastgrp   = mfccp->mf6cc_mcastgrp;
@@ -894,15 +875,10 @@
 	 * It is possible that an entry is being inserted without an upcall
 	 */
 	if (nstl == 0) {
-#ifdef MRT6DEBUG
-		if (V_mrt6debug & DEBUG_MFC)
-		    log(LOG_DEBUG,
-			"add_mfc no upcall h %d o %s g %s p %x\n",
-			hash,
-			ip6_sprintf(ip6bufo, &mfccp->mf6cc_origin.sin6_addr),
-			ip6_sprintf(ip6bufg, &mfccp->mf6cc_mcastgrp.sin6_addr),
-			mfccp->mf6cc_parent);
-#endif
+		MRT6_DLOG(DEBUG_MFC, "no upcall h %lu o %s g %s p %x", hash,
+		    ip6_sprintf(ip6bufo, &mfccp->mf6cc_origin.sin6_addr),
+		    ip6_sprintf(ip6bufg, &mfccp->mf6cc_mcastgrp.sin6_addr),
+		    mfccp->mf6cc_parent);
 
 		for (rt = mf6ctable[hash]; rt; rt = rt->mf6c_next) {
 
@@ -988,6 +964,9 @@
 static int
 del_m6fc(struct mf6cctl *mfccp)
 {
+#ifdef MRT6DEBUG
+	char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN];
+#endif
 	struct sockaddr_in6	origin;
 	struct sockaddr_in6	mcastgrp;
 	struct mf6c		*rt;
@@ -998,14 +977,9 @@
 	mcastgrp = mfccp->mf6cc_mcastgrp;
 	hash = MF6CHASH(origin.sin6_addr, mcastgrp.sin6_addr);
 
-#ifdef MRT6DEBUG
-	if (V_mrt6debug & DEBUG_MFC) {
-		char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN];
-		log(LOG_DEBUG,"del_m6fc orig %s mcastgrp %s\n",
-		    ip6_sprintf(ip6bufo, &origin.sin6_addr),
-		    ip6_sprintf(ip6bufg, &mcastgrp.sin6_addr));
-	}
-#endif
+	MRT6_DLOG(DEBUG_MFC, "orig %s mcastgrp %s",
+	    ip6_sprintf(ip6bufo, &origin.sin6_addr),
+	    ip6_sprintf(ip6bufg, &mcastgrp.sin6_addr));
 
 	MFC6_LOCK();
 
@@ -1070,20 +1044,24 @@
 int
 X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m)
 {
+	struct rtdetq *rte;
+	struct mbuf *mb0;
 	struct mf6c *rt;
 	struct mif6 *mifp;
 	struct mbuf *mm;
+	u_long hash;
 	mifi_t mifi;
 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
+#ifdef UPCALL_TIMING
+	struct timeval tp;
 
-#ifdef MRT6DEBUG
-	if (V_mrt6debug & DEBUG_FORWARD)
-		log(LOG_DEBUG, "ip6_mforward: src %s, dst %s, ifindex %d\n",
-		    ip6_sprintf(ip6bufs, &ip6->ip6_src),
-		    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
-		    ifp->if_index);
-#endif
+	GET_TIME(tp);
+#endif /* UPCALL_TIMING */
 
+	MRT6_DLOG(DEBUG_FORWARD, "src %s, dst %s, ifindex %d",
+	    ip6_sprintf(ip6bufs, &ip6->ip6_src),
+	    ip6_sprintf(ip6bufd, &ip6->ip6_dst), ifp->if_index);
+
 	/*
 	 * Don't forward a packet with Hop limit of zero or one,
 	 * or a packet destined to a local-only group.
@@ -1102,8 +1080,8 @@
 	 */
 	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
 		IP6STAT_INC(ip6s_cantforward);
-		if (V_ip6_log_time + V_ip6_log_interval < time_second) {
-			V_ip6_log_time = time_second;
+		if (V_ip6_log_time + V_ip6_log_interval < time_uptime) {
+			V_ip6_log_time = time_uptime;
 			log(LOG_DEBUG,
 			    "cannot forward "
 			    "from %s to %s nxt %d received on %s\n",
@@ -1121,211 +1099,184 @@
 	 * Determine forwarding mifs from the forwarding cache table
 	 */
 	MF6CFIND(ip6->ip6_src, ip6->ip6_dst, rt);
+	MRT6STAT_INC(mrt6s_mfc_lookups);
 
 	/* Entry exists, so forward if necessary */
 	if (rt) {
 		MFC6_UNLOCK();
 		return (ip6_mdq(m, ifp, rt));
-	} else {
-		/*
-		 * If we don't have a route for packet's origin,
-		 * Make a copy of the packet &
-		 * send message to routing daemon
-		 */
+	}
 
-		struct mbuf *mb0;
-		struct rtdetq *rte;
-		u_long hash;
-/*		int i, npkts;*/
-#ifdef UPCALL_TIMING
-		struct timeval tp;
+	/*
+	 * If we don't have a route for packet's origin,
+	 * Make a copy of the packet & send message to routing daemon.
+	 */
+	MRT6STAT_INC(mrt6s_no_route);
+	MRT6_DLOG(DEBUG_FORWARD | DEBUG_MFC, "no rte s %s g %s",
+	    ip6_sprintf(ip6bufs, &ip6->ip6_src),
+	    ip6_sprintf(ip6bufd, &ip6->ip6_dst));
 
-		GET_TIME(tp);
-#endif /* UPCALL_TIMING */
+	/*
+	 * Allocate mbufs early so that we don't do extra work if we
+	 * are just going to fail anyway.
+	 */
+	rte = (struct rtdetq *)malloc(sizeof(*rte), M_MRTABLE6, M_NOWAIT);
+	if (rte == NULL) {
+		MFC6_UNLOCK();
+		return (ENOBUFS);
+	}
+	mb0 = m_copy(m, 0, M_COPYALL);
+	/*
+	 * Pullup packet header if needed before storing it,
+	 * as other references may modify it in the meantime.
+	 */
+	if (mb0 && (M_HASCL(mb0) || mb0->m_len < sizeof(struct ip6_hdr)))
+		mb0 = m_pullup(mb0, sizeof(struct ip6_hdr));
+	if (mb0 == NULL) {
+		free(rte, M_MRTABLE6);
+		MFC6_UNLOCK();
+		return (ENOBUFS);
+	}
 
-		mrt6stat.mrt6s_no_route++;
-#ifdef MRT6DEBUG
-		if (V_mrt6debug & (DEBUG_FORWARD | DEBUG_MFC))
-			log(LOG_DEBUG, "ip6_mforward: no rte s %s g %s\n",
-			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
-			    ip6_sprintf(ip6bufd, &ip6->ip6_dst));
+	/* is there an upcall waiting for this packet? */
+	hash = MF6CHASH(ip6->ip6_src, ip6->ip6_dst);
+	for (rt = mf6ctable[hash]; rt; rt = rt->mf6c_next) {
+		if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_src,
+		    &rt->mf6c_origin.sin6_addr) &&
+		    IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
+		    &rt->mf6c_mcastgrp.sin6_addr) && (rt->mf6c_stall != NULL))
+			break;
+	}
+
+	if (rt == NULL) {
+		struct mrt6msg *im;
+#ifdef MRT6_OINIT
+		struct omrt6msg *oim;
 #endif
-
-		/*
-		 * Allocate mbufs early so that we don't do extra work if we
-		 * are just going to fail anyway.
-		 */
-		rte = (struct rtdetq *)malloc(sizeof(*rte), M_MRTABLE6,
-					      M_NOWAIT);
-		if (rte == NULL) {
+		/* no upcall, so make a new entry */
+		rt = (struct mf6c *)malloc(sizeof(*rt), M_MRTABLE6, M_NOWAIT);
+		if (rt == NULL) {
+			free(rte, M_MRTABLE6);
+			m_freem(mb0);
 			MFC6_UNLOCK();
 			return (ENOBUFS);
 		}
-		mb0 = m_copy(m, 0, M_COPYALL);
 		/*
-		 * Pullup packet header if needed before storing it,
-		 * as other references may modify it in the meantime.
+		 * Make a copy of the header to send to the user
+		 * level process
 		 */
-		if (mb0 &&
-		    (M_HASCL(mb0) || mb0->m_len < sizeof(struct ip6_hdr)))
-			mb0 = m_pullup(mb0, sizeof(struct ip6_hdr));
-		if (mb0 == NULL) {
+		mm = m_copy(mb0, 0, sizeof(struct ip6_hdr));
+		if (mm == NULL) {
 			free(rte, M_MRTABLE6);
+			m_freem(mb0);
+			free(rt, M_MRTABLE6);
 			MFC6_UNLOCK();
 			return (ENOBUFS);
 		}
 
-		/* is there an upcall waiting for this packet? */
-		hash = MF6CHASH(ip6->ip6_src, ip6->ip6_dst);
-		for (rt = mf6ctable[hash]; rt; rt = rt->mf6c_next) {
-			if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_src,
-					       &rt->mf6c_origin.sin6_addr) &&
-			    IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
-					       &rt->mf6c_mcastgrp.sin6_addr) &&
-			    (rt->mf6c_stall != NULL))
-				break;
-		}
-
-		if (rt == NULL) {
-			struct mrt6msg *im;
+		/*
+		 * Send message to routing daemon
+		 */
+		sin6.sin6_addr = ip6->ip6_src;
+		im = NULL;
 #ifdef MRT6_OINIT
-			struct omrt6msg *oim;
+		oim = NULL;
 #endif
-
-			/* no upcall, so make a new entry */
-			rt = (struct mf6c *)malloc(sizeof(*rt), M_MRTABLE6,
-						  M_NOWAIT);
-			if (rt == NULL) {
-				free(rte, M_MRTABLE6);
-				m_freem(mb0);
-				MFC6_UNLOCK();
-				return (ENOBUFS);
-			}
-			/*
-			 * Make a copy of the header to send to the user
-			 * level process
-			 */
-			mm = m_copy(mb0, 0, sizeof(struct ip6_hdr));
-
-			if (mm == NULL) {
-				free(rte, M_MRTABLE6);
-				m_freem(mb0);
-				free(rt, M_MRTABLE6);
-				MFC6_UNLOCK();
-				return (ENOBUFS);
-			}
-
-			/*
-			 * Send message to routing daemon
-			 */
-			sin6.sin6_addr = ip6->ip6_src;
-
-			im = NULL;
+		switch (V_ip6_mrouter_ver) {
 #ifdef MRT6_OINIT
-			oim = NULL;
+		case MRT6_OINIT:
+			oim = mtod(mm, struct omrt6msg *);
+			oim->im6_msgtype = MRT6MSG_NOCACHE;
+			oim->im6_mbz = 0;
+			break;
 #endif
-			switch (V_ip6_mrouter_ver) {
-#ifdef MRT6_OINIT
-			case MRT6_OINIT:
-				oim = mtod(mm, struct omrt6msg *);
-				oim->im6_msgtype = MRT6MSG_NOCACHE;
-				oim->im6_mbz = 0;
-				break;
-#endif
-			case MRT6_INIT:
-				im = mtod(mm, struct mrt6msg *);
-				im->im6_msgtype = MRT6MSG_NOCACHE;
-				im->im6_mbz = 0;
-				break;
-			default:
-				free(rte, M_MRTABLE6);
-				m_freem(mb0);
-				free(rt, M_MRTABLE6);
-				MFC6_UNLOCK();
-				return (EINVAL);
-			}
+		case MRT6_INIT:
+			im = mtod(mm, struct mrt6msg *);
+			im->im6_msgtype = MRT6MSG_NOCACHE;
+			im->im6_mbz = 0;
+			break;
+		default:
+			free(rte, M_MRTABLE6);
+			m_freem(mb0);
+			free(rt, M_MRTABLE6);
+			MFC6_UNLOCK();
+			return (EINVAL);
+		}
 
-#ifdef MRT6DEBUG
-			if (V_mrt6debug & DEBUG_FORWARD)
-				log(LOG_DEBUG,
-				    "getting the iif info in the kernel\n");
-#endif
-
-			for (mifp = mif6table, mifi = 0;
-			     mifi < nummifs && mifp->m6_ifp != ifp;
-			     mifp++, mifi++)
+		MRT6_DLOG(DEBUG_FORWARD, "getting the iif info in the kernel");
+		for (mifp = mif6table, mifi = 0;
+		    mifi < nummifs && mifp->m6_ifp != ifp; mifp++, mifi++)
 				;
 
-			switch (V_ip6_mrouter_ver) {
+		switch (V_ip6_mrouter_ver) {
 #ifdef MRT6_OINIT
-			case MRT6_OINIT:
-				oim->im6_mif = mifi;
-				break;
+		case MRT6_OINIT:
+			oim->im6_mif = mifi;
+			break;
 #endif
-			case MRT6_INIT:
-				im->im6_mif = mifi;
-				break;
-			}
+		case MRT6_INIT:
+			im->im6_mif = mifi;
+			break;
+		}
 
-			if (socket_send(V_ip6_mrouter, mm, &sin6) < 0) {
-				log(LOG_WARNING, "ip6_mforward: ip6_mrouter "
-				    "socket queue full\n");
-				mrt6stat.mrt6s_upq_sockfull++;
+		if (socket_send(V_ip6_mrouter, mm, &sin6) < 0) {
+			log(LOG_WARNING, "ip6_mforward: ip6_mrouter "
+			    "socket queue full\n");
+			MRT6STAT_INC(mrt6s_upq_sockfull);
+			free(rte, M_MRTABLE6);
+			m_freem(mb0);
+			free(rt, M_MRTABLE6);
+			MFC6_UNLOCK();
+			return (ENOBUFS);
+		}
+
+		MRT6STAT_INC(mrt6s_upcalls);
+
+		/* insert new entry at head of hash chain */
+		bzero(rt, sizeof(*rt));
+		rt->mf6c_origin.sin6_family = AF_INET6;
+		rt->mf6c_origin.sin6_len = sizeof(struct sockaddr_in6);
+		rt->mf6c_origin.sin6_addr = ip6->ip6_src;
+		rt->mf6c_mcastgrp.sin6_family = AF_INET6;
+		rt->mf6c_mcastgrp.sin6_len = sizeof(struct sockaddr_in6);
+		rt->mf6c_mcastgrp.sin6_addr = ip6->ip6_dst;
+		rt->mf6c_expire = UPCALL_EXPIRE;
+		n6expire[hash]++;
+		rt->mf6c_parent = MF6C_INCOMPLETE_PARENT;
+
+		/* link into table */
+		rt->mf6c_next  = mf6ctable[hash];
+		mf6ctable[hash] = rt;
+		/* Add this entry to the end of the queue */
+		rt->mf6c_stall = rte;
+	} else {
+		/* determine if q has overflowed */
+		struct rtdetq **p;
+		int npkts = 0;
+
+		for (p = &rt->mf6c_stall; *p != NULL; p = &(*p)->next)
+			if (++npkts > MAX_UPQ6) {
+				MRT6STAT_INC(mrt6s_upq_ovflw);
 				free(rte, M_MRTABLE6);
 				m_freem(mb0);
-				free(rt, M_MRTABLE6);
 				MFC6_UNLOCK();
-				return (ENOBUFS);
+				return (0);
 			}
 
-			mrt6stat.mrt6s_upcalls++;
+		/* Add this entry to the end of the queue */
+		*p = rte;
+	}
 
-			/* insert new entry at head of hash chain */
-			bzero(rt, sizeof(*rt));
-			rt->mf6c_origin.sin6_family = AF_INET6;
-			rt->mf6c_origin.sin6_len = sizeof(struct sockaddr_in6);
-			rt->mf6c_origin.sin6_addr = ip6->ip6_src;
-			rt->mf6c_mcastgrp.sin6_family = AF_INET6;
-			rt->mf6c_mcastgrp.sin6_len = sizeof(struct sockaddr_in6);
-			rt->mf6c_mcastgrp.sin6_addr = ip6->ip6_dst;
-			rt->mf6c_expire = UPCALL_EXPIRE;
-			n6expire[hash]++;
-			rt->mf6c_parent = MF6C_INCOMPLETE_PARENT;
-
-			/* link into table */
-			rt->mf6c_next  = mf6ctable[hash];
-			mf6ctable[hash] = rt;
-			/* Add this entry to the end of the queue */
-			rt->mf6c_stall = rte;
-		} else {
-			/* determine if q has overflowed */
-			struct rtdetq **p;
-			int npkts = 0;
-
-			for (p = &rt->mf6c_stall; *p != NULL; p = &(*p)->next)
-				if (++npkts > MAX_UPQ6) {
-					mrt6stat.mrt6s_upq_ovflw++;
-					free(rte, M_MRTABLE6);
-					m_freem(mb0);
-					MFC6_UNLOCK();
-					return (0);
-				}
-
-			/* Add this entry to the end of the queue */
-			*p = rte;
-		}
-
-		rte->next = NULL;
-		rte->m = mb0;
-		rte->ifp = ifp;
+	rte->next = NULL;
+	rte->m = mb0;
+	rte->ifp = ifp;
 #ifdef UPCALL_TIMING
-		rte->t = tp;
+	rte->t = tp;
 #endif /* UPCALL_TIMING */
 
-		MFC6_UNLOCK();
+	MFC6_UNLOCK();
 
-		return (0);
-	}
+	return (0);
 }
 
 /*
@@ -1335,9 +1286,12 @@
 static void
 expire_upcalls(void *unused)
 {
+#ifdef MRT6DEBUG
+	char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN];
+#endif
 	struct rtdetq *rte;
 	struct mf6c *mfc, **nptr;
-	int i;
+	u_long i;
 
 	MFC6_LOCK();
 	for (i = 0; i < MF6CTBLSIZ; i++) {
@@ -1354,15 +1308,9 @@
 			if (rte != NULL &&
 			    mfc->mf6c_expire != 0 &&
 			    --mfc->mf6c_expire == 0) {
-#ifdef MRT6DEBUG
-				if (V_mrt6debug & DEBUG_EXPIRE) {
-					char ip6bufo[INET6_ADDRSTRLEN];
-					char ip6bufg[INET6_ADDRSTRLEN];
-					log(LOG_DEBUG, "expire_upcalls: expiring (%s %s)\n",
-					    ip6_sprintf(ip6bufo, &mfc->mf6c_origin.sin6_addr),
-					    ip6_sprintf(ip6bufg, &mfc->mf6c_mcastgrp.sin6_addr));
-				}
-#endif
+				MRT6_DLOG(DEBUG_EXPIRE, "expiring (%s %s)",
+				    ip6_sprintf(ip6bufo, &mfc->mf6c_origin.sin6_addr),
+				    ip6_sprintf(ip6bufg, &mfc->mf6c_mcastgrp.sin6_addr));
 				/*
 				 * drop all the packets
 				 * free the mbuf with the pkt, if, timing info
@@ -1373,7 +1321,7 @@
 					free(rte, M_MRTABLE6);
 					rte = n;
 				} while (rte != NULL);
-				mrt6stat.mrt6s_cache_cleanups++;
+				MRT6STAT_INC(mrt6s_cache_cleanups);
 				n6expire[i]--;
 
 				*nptr = mfc->mf6c_next;
@@ -1422,14 +1370,10 @@
 	mifi = rt->mf6c_parent;
 	if ((mifi >= nummifs) || (mif6table[mifi].m6_ifp != ifp)) {
 		/* came in the wrong interface */
-#ifdef MRT6DEBUG
-		if (V_mrt6debug & DEBUG_FORWARD)
-			log(LOG_DEBUG,
-			    "wrong if: ifid %d mifi %d mififid %x\n",
-			    ifp->if_index, mifi,
-			    mif6table[mifi].m6_ifp->if_index);
-#endif
-		mrt6stat.mrt6s_wrong_if++;
+		MRT6_DLOG(DEBUG_FORWARD,
+		    "wrong if: ifid %d mifi %d mififid %x", ifp->if_index,
+		    mifi, mif6table[mifi].m6_ifp->if_index);
+		MRT6STAT_INC(mrt6s_wrong_if);
 		rt->mf6c_wrong_if++;
 		/*
 		 * If we are doing PIM processing, and we are forwarding
@@ -1502,14 +1446,12 @@
 					break;
 				}
 
-				mrt6stat.mrt6s_upcalls++;
+				MRT6STAT_INC(mrt6s_upcalls);
 
 				if (socket_send(V_ip6_mrouter, mm, &sin6) < 0) {
-#ifdef MRT6DEBUG
-					if (V_mrt6debug)
-						log(LOG_WARNING, "mdq, ip6_mrouter socket queue full\n");
-#endif
-					++mrt6stat.mrt6s_upq_sockfull;
+					MRT6_DLOG(DEBUG_ANY,
+					    "ip6_mrouter socket queue full");
+					MRT6STAT_INC(mrt6s_upq_sockfull);
 					return (ENOBUFS);
 				}	/* if socket Q full */
 			}		/* if PIM */
@@ -1572,6 +1514,9 @@
 static void
 phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m)
 {
+#ifdef MRT6DEBUG
+	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
+#endif
 	struct mbuf *mb_copy;
 	struct ifnet *ifp = mifp->m6_ifp;
 	int error = 0;
@@ -1609,11 +1554,8 @@
 		error = ip6_output(mb_copy, NULL, NULL, IPV6_FORWARDING, &im6o,
 		    NULL, NULL);
 
-#ifdef MRT6DEBUG
-		if (V_mrt6debug & DEBUG_XMIT)
-			log(LOG_DEBUG, "phyint_send on mif %d err %d\n",
-			    mifp - mif6table, error);
-#endif
+		MRT6_DLOG(DEBUG_XMIT, "mif %u err %d",
+		    (uint16_t)(mifp - mif6table), error);
 		return;
 	}
 
@@ -1643,17 +1585,17 @@
 		dst6.sin6_len = sizeof(struct sockaddr_in6);
 		dst6.sin6_family = AF_INET6;
 		dst6.sin6_addr = ip6->ip6_dst;
+
+		IP_PROBE(send, NULL, NULL, ip6, ifp, NULL, ip6);
 		/*
 		 * We just call if_output instead of nd6_output here, since
 		 * we need no ND for a multicast forwarded packet...right?
 		 */
+		m_clrprotoflags(m);	/* Avoid confusing lower layers. */
 		error = (*ifp->if_output)(ifp, mb_copy,
 		    (struct sockaddr *)&dst6, NULL);
-#ifdef MRT6DEBUG
-		if (V_mrt6debug & DEBUG_XMIT)
-			log(LOG_DEBUG, "phyint_send on mif %d err %d\n",
-			    mifp - mif6table, error);
-#endif
+		MRT6_DLOG(DEBUG_XMIT, "mif %u err %d",
+		    (uint16_t)(mifp - mif6table), error);
 	} else {
 		/*
 		 * pMTU discovery is intentionally disabled by default, since
@@ -1663,19 +1605,11 @@
 		if (V_ip6_mcast_pmtu)
 			icmp6_error(mb_copy, ICMP6_PACKET_TOO_BIG, 0, linkmtu);
 		else {
-#ifdef MRT6DEBUG
-			if (V_mrt6debug & DEBUG_XMIT) {
-				char ip6bufs[INET6_ADDRSTRLEN];
-				char ip6bufd[INET6_ADDRSTRLEN];
-				log(LOG_DEBUG,
-				    "phyint_send: packet too big on %s o %s "
-				    "g %s size %d(discarded)\n",
-				    if_name(ifp),
-				    ip6_sprintf(ip6bufs, &ip6->ip6_src),
-				    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
-				    mb_copy->m_pkthdr.len);
-			}
-#endif /* MRT6DEBUG */
+			MRT6_DLOG(DEBUG_XMIT, " packet too big on %s o %s "
+			    "g %s size %d (discarded)", if_name(ifp),
+			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
+			    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
+			    mb_copy->m_pkthdr.len);
 			m_freem(mb_copy); /* simply discard the packet */
 		}
 	}
@@ -1684,26 +1618,23 @@
 static int
 register_send(struct ip6_hdr *ip6, struct mif6 *mif, struct mbuf *m)
 {
+#ifdef MRT6DEBUG
+	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
+#endif
 	struct mbuf *mm;
 	int i, len = m->m_pkthdr.len;
 	static struct sockaddr_in6 sin6 = { sizeof(sin6), AF_INET6 };
 	struct mrt6msg *im6;
 
-#ifdef MRT6DEBUG
-	if (V_mrt6debug) {
-		char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
-		log(LOG_DEBUG, "** IPv6 register_send **\n src %s dst %s\n",
-		    ip6_sprintf(ip6bufs, &ip6->ip6_src),
-		    ip6_sprintf(ip6bufd, &ip6->ip6_dst));
-	}
-#endif
-	++pim6stat.pim6s_snd_registers;
+	MRT6_DLOG(DEBUG_ANY, "src %s dst %s",
+	    ip6_sprintf(ip6bufs, &ip6->ip6_src),
+	    ip6_sprintf(ip6bufd, &ip6->ip6_dst));
+	PIM6STAT_INC(pim6s_snd_registers);
 
-	/* Make a copy of the packet to send to the user level process */
-	MGETHDR(mm, M_DONTWAIT, MT_HEADER);
+	/* Make a copy of the packet to send to the user level process. */
+	mm = m_gethdr(M_NOWAIT, MT_DATA);
 	if (mm == NULL)
 		return (ENOBUFS);
-	mm->m_pkthdr.rcvif = NULL;
 	mm->m_data += max_linkhdr;
 	mm->m_len = sizeof(struct ip6_hdr);
 
@@ -1732,15 +1663,11 @@
 	im6->im6_mif = mif - mif6table;
 
 	/* iif info is not given for reg. encap.n */
-	mrt6stat.mrt6s_upcalls++;
+	MRT6STAT_INC(mrt6s_upcalls);
 
 	if (socket_send(V_ip6_mrouter, mm, &sin6) < 0) {
-#ifdef MRT6DEBUG
-		if (V_mrt6debug)
-			log(LOG_WARNING,
-			    "register_send: ip6_mrouter socket queue full\n");
-#endif
-		++mrt6stat.mrt6s_upq_sockfull;
+		MRT6_DLOG(DEBUG_ANY, "ip6_mrouter socket queue full");
+		MRT6STAT_INC(mrt6s_upq_sockfull);
 		return (ENOBUFS);
 	}
 	return (0);
@@ -1781,7 +1708,7 @@
 	int minlen;
 	int off = *offp;
 
-	++pim6stat.pim6s_rcv_total;
+	PIM6STAT_INC(pim6s_rcv_total);
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	pimlen = m->m_pkthdr.len - *offp;
@@ -1790,11 +1717,8 @@
 	 * Validate lengths
 	 */
 	if (pimlen < PIM_MINLEN) {
-		++pim6stat.pim6s_rcv_tooshort;
-#ifdef MRT6DEBUG
-		if (V_mrt6debug & DEBUG_PIM)
-			log(LOG_DEBUG,"pim6_input: PIM packet too short\n");
-#endif
+		PIM6STAT_INC(pim6s_rcv_tooshort);
+		MRT6_DLOG(DEBUG_PIM, "PIM packet too short");
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
@@ -1823,7 +1747,7 @@
 #else
 	IP6_EXTHDR_GET(pim, struct pim *, m, off, minlen);
 	if (pim == NULL) {
-		pim6stat.pim6s_rcv_tooshort++;
+		PIM6STAT_INC(pim6s_rcv_tooshort);
 		return (IPPROTO_DONE);
 	}
 #endif
@@ -1843,12 +1767,8 @@
 			cksumlen = pimlen;
 
 		if (in6_cksum(m, IPPROTO_PIM, off, cksumlen)) {
-			++pim6stat.pim6s_rcv_badsum;
-#ifdef MRT6DEBUG
-			if (V_mrt6debug & DEBUG_PIM)
-				log(LOG_DEBUG,
-				    "pim6_input: invalid checksum\n");
-#endif
+			PIM6STAT_INC(pim6s_rcv_badsum);
+			MRT6_DLOG(DEBUG_PIM, "invalid checksum");
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
@@ -1857,12 +1777,10 @@
 
 	/* PIM version check */
 	if (pim->pim_ver != PIM_VERSION) {
-		++pim6stat.pim6s_rcv_badversion;
-#ifdef MRT6DEBUG
-		log(LOG_ERR,
-		    "pim6_input: incorrect version %d, expecting %d\n",
+		PIM6STAT_INC(pim6s_rcv_badversion);
+		MRT6_DLOG(DEBUG_ANY | DEBUG_ERR,
+		    "incorrect version %d, expecting %d",
 		    pim->pim_ver, PIM_VERSION);
-#endif
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
@@ -1883,15 +1801,11 @@
 		char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 #endif
 
-		++pim6stat.pim6s_rcv_registers;
+		PIM6STAT_INC(pim6s_rcv_registers);
 
 		if ((reg_mif_num >= nummifs) || (reg_mif_num == (mifi_t) -1)) {
-#ifdef MRT6DEBUG
-			if (V_mrt6debug & DEBUG_PIM)
-				log(LOG_DEBUG,
-				    "pim6_input: register mif not set: %d\n",
-				    reg_mif_num);
-#endif
+			MRT6_DLOG(DEBUG_PIM, "register mif not set: %d",
+			    reg_mif_num);
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
@@ -1905,51 +1819,37 @@
 		 * Validate length
 		 */
 		if (pimlen < PIM6_REG_MINLEN) {
-			++pim6stat.pim6s_rcv_tooshort;
-			++pim6stat.pim6s_rcv_badregisters;
-#ifdef MRT6DEBUG
-			log(LOG_ERR,
-			    "pim6_input: register packet size too "
-			    "small %d from %s\n",
+			PIM6STAT_INC(pim6s_rcv_tooshort);
+			PIM6STAT_INC(pim6s_rcv_badregisters);
+			MRT6_DLOG(DEBUG_ANY | DEBUG_ERR, "register packet "
+			    "size too small %d from %s",
 			    pimlen, ip6_sprintf(ip6bufs, &ip6->ip6_src));
-#endif
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 
 		eip6 = (struct ip6_hdr *) (reghdr + 1);
-#ifdef MRT6DEBUG
-		if (V_mrt6debug & DEBUG_PIM)
-			log(LOG_DEBUG,
-			    "pim6_input[register], eip6: %s -> %s, "
-			    "eip6 plen %d\n",
-			    ip6_sprintf(ip6bufs, &eip6->ip6_src),
-			    ip6_sprintf(ip6bufd, &eip6->ip6_dst),
-			    ntohs(eip6->ip6_plen));
-#endif
+		MRT6_DLOG(DEBUG_PIM, "eip6: %s -> %s, eip6 plen %d",
+		    ip6_sprintf(ip6bufs, &eip6->ip6_src),
+		    ip6_sprintf(ip6bufd, &eip6->ip6_dst),
+		    ntohs(eip6->ip6_plen));
 
 		/* verify the version number of the inner packet */
 		if ((eip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
-			++pim6stat.pim6s_rcv_badregisters;
-#ifdef MRT6DEBUG
-			log(LOG_DEBUG, "pim6_input: invalid IP version (%d) "
-			    "of the inner packet\n",
+			PIM6STAT_INC(pim6s_rcv_badregisters);
+			MRT6_DLOG(DEBUG_ANY, "invalid IP version (%d) "
+			    "of the inner packet",
 			    (eip6->ip6_vfc & IPV6_VERSION));
-#endif
 			m_freem(m);
-			return (IPPROTO_NONE);
+			return (IPPROTO_DONE);
 		}
 
 		/* verify the inner packet is destined to a mcast group */
 		if (!IN6_IS_ADDR_MULTICAST(&eip6->ip6_dst)) {
-			++pim6stat.pim6s_rcv_badregisters;
-#ifdef MRT6DEBUG
-			if (V_mrt6debug & DEBUG_PIM)
-				log(LOG_DEBUG,
-				    "pim6_input: inner packet of register "
-				    "is not multicast %s\n",
-				    ip6_sprintf(ip6bufd, &eip6->ip6_dst));
-#endif
+			PIM6STAT_INC(pim6s_rcv_badregisters);
+			MRT6_DLOG(DEBUG_PIM, "inner packet of register "
+			    "is not multicast %s",
+			    ip6_sprintf(ip6bufd, &eip6->ip6_dst));
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
@@ -1959,11 +1859,8 @@
 		 */
 		mcp = m_copy(m, 0, off + PIM6_REG_MINLEN);
 		if (mcp == NULL) {
-#ifdef MRT6DEBUG
-			log(LOG_ERR,
-			    "pim6_input: pim register: "
-			    "could not copy register head\n");
-#endif
+			MRT6_DLOG(DEBUG_ANY | DEBUG_ERR, "pim register: "
+			    "could not copy register head");
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
@@ -1972,16 +1869,10 @@
 		 * forward the inner ip6 packet; point m_data at the inner ip6.
 		 */
 		m_adj(m, off + PIM_MINLEN);
-#ifdef MRT6DEBUG
-		if (V_mrt6debug & DEBUG_PIM) {
-			log(LOG_DEBUG,
-			    "pim6_input: forwarding decapsulated register: "
-			    "src %s, dst %s, mif %d\n",
-			    ip6_sprintf(ip6bufs, &eip6->ip6_src),
-			    ip6_sprintf(ip6bufd, &eip6->ip6_dst),
-			    reg_mif_num);
-		}
-#endif
+		MRT6_DLOG(DEBUG_PIM, "forwarding decapsulated register: "
+		    "src %s, dst %s, mif %d",
+		    ip6_sprintf(ip6bufs, &eip6->ip6_src),
+		    ip6_sprintf(ip6bufd, &eip6->ip6_dst), reg_mif_num);
 
 		rc = if_simloop(mif6table[reg_mif_num].m6_ifp, m,
 				dst.sin6_family, 0);

Modified: trunk/sys/netinet6/ip6_mroute.h
===================================================================
--- trunk/sys/netinet6/ip6_mroute.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/ip6_mroute.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	$KAME: ip6_mroute.h,v 1.19 2001/06/14 06:12:55 suz Exp $
- * $FreeBSD: stable/9/sys/netinet6/ip6_mroute.h 240305 2012-09-10 11:38:02Z glebius $
+ * $FreeBSD: stable/10/sys/netinet6/ip6_mroute.h 261218 2014-01-28 00:30:17Z ae $
  */
 
 /*	BSDI ip_mroute.h,v 2.5 1996/10/11 16:01:48 pjd Exp	*/
@@ -122,19 +122,19 @@
  * The kernel's multicast routing statistics.
  */
 struct mrt6stat {
-	u_quad_t mrt6s_mfc_lookups;	/* # forw. cache hash table hits   */
-	u_quad_t mrt6s_mfc_misses;	/* # forw. cache hash table misses */
-	u_quad_t mrt6s_upcalls;		/* # calls to multicast routing daemon */
-	u_quad_t mrt6s_no_route;	/* no route for packet's origin    */
-	u_quad_t mrt6s_bad_tunnel;	/* malformed tunnel options        */
-	u_quad_t mrt6s_cant_tunnel;	/* no room for tunnel options      */
-	u_quad_t mrt6s_wrong_if;	/* arrived on wrong interface	   */
-	u_quad_t mrt6s_upq_ovflw;	/* upcall Q overflow		   */
-	u_quad_t mrt6s_cache_cleanups;	/* # entries with no upcalls	   */
-	u_quad_t mrt6s_drop_sel;	/* pkts dropped selectively        */
-	u_quad_t mrt6s_q_overflow;	/* pkts dropped - Q overflow       */
-	u_quad_t mrt6s_pkt2large;	/* pkts dropped - size > BKT SIZE  */
-	u_quad_t mrt6s_upq_sockfull;	/* upcalls dropped - socket full   */
+	uint64_t mrt6s_mfc_lookups;	/* # forw. cache hash table hits   */
+	uint64_t mrt6s_mfc_misses;	/* # forw. cache hash table misses */
+	uint64_t mrt6s_upcalls;		/* # calls to multicast routing daemon */
+	uint64_t mrt6s_no_route;	/* no route for packet's origin    */
+	uint64_t mrt6s_bad_tunnel;	/* malformed tunnel options        */
+	uint64_t mrt6s_cant_tunnel;	/* no room for tunnel options      */
+	uint64_t mrt6s_wrong_if;	/* arrived on wrong interface	   */
+	uint64_t mrt6s_upq_ovflw;	/* upcall Q overflow		   */
+	uint64_t mrt6s_cache_cleanups;	/* # entries with no upcalls	   */
+	uint64_t mrt6s_drop_sel;	/* pkts dropped selectively        */
+	uint64_t mrt6s_q_overflow;	/* pkts dropped - Q overflow       */
+	uint64_t mrt6s_pkt2large;	/* pkts dropped - size > BKT SIZE  */
+	uint64_t mrt6s_upq_sockfull;	/* upcalls dropped - socket full   */
 };
 
 #ifdef MRT6_OINIT
@@ -146,11 +146,6 @@
 struct omrt6msg {
 	u_long	    unused1;
 	u_char	    im6_msgtype;		/* what type of message	    */
-#if 0
-#define MRT6MSG_NOCACHE	1
-#define MRT6MSG_WRONGMIF	2
-#define MRT6MSG_WHOLEPKT	3		/* used for user level encap*/
-#endif
 	u_char	    im6_mbz;			/* must be zero		    */
 	u_char	    im6_mif;			/* mif rec'd on		    */
 	u_char	    unused2;

Modified: trunk/sys/netinet6/ip6_output.c
===================================================================
--- trunk/sys/netinet6/ip6_output.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/ip6_output.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/ip6_output.c 245067 2013-01-05 20:07:28Z ae $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/ip6_output.c 317335 2017-04-23 08:59:57Z kp $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
@@ -185,7 +185,7 @@
 	}\
     } while (/*CONSTCOND*/ 0)
 
-static void
+void
 in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset)
 {
 	u_short csum;
@@ -197,8 +197,8 @@
 
 	if (offset + sizeof(u_short) > m->m_len) {
 		printf("%s: delayed m_pullup, m->len: %d plen %u off %u "
-		    "csum_flags=0x%04x\n", __func__, m->m_len, plen, offset,
-		    m->m_pkthdr.csum_flags);
+		    "csum_flags=%b\n", __func__, m->m_len, plen, offset,
+		    (int)m->m_pkthdr.csum_flags, CSUM_BITS);
 		/*
 		 * XXX this should not happen, but if it does, the correct
 		 * behavior may be to insert the checksum in the appropriate
@@ -209,6 +209,66 @@
 	*(u_short *)(m->m_data + offset) = csum;
 }
 
+int
+ip6_fragment(struct ifnet *ifp, struct mbuf *m0, int hlen, u_char nextproto,
+    int mtu, uint32_t id)
+{
+	struct mbuf *m, **mnext, *m_frgpart;
+	struct ip6_hdr *ip6, *mhip6;
+	struct ip6_frag *ip6f;
+	int off;
+	int error;
+	int tlen = m0->m_pkthdr.len;
+
+	KASSERT(( mtu % 8 == 0), ("Fragment length must be a multiple of 8"));
+
+	m = m0;
+	ip6 = mtod(m, struct ip6_hdr *);
+	mnext = &m->m_nextpkt;
+
+	for (off = hlen; off < tlen; off += mtu) {
+		m = m_gethdr(M_NOWAIT, MT_DATA);
+		if (!m) {
+			IP6STAT_INC(ip6s_odropped);
+			return (ENOBUFS);
+		}
+		m->m_flags = m0->m_flags & M_COPYFLAGS;
+		*mnext = m;
+		mnext = &m->m_nextpkt;
+		m->m_data += max_linkhdr;
+		mhip6 = mtod(m, struct ip6_hdr *);
+		*mhip6 = *ip6;
+		m->m_len = sizeof(*mhip6);
+		error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
+		if (error) {
+			IP6STAT_INC(ip6s_odropped);
+			return (error);
+		}
+		ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
+		if (off + mtu >= tlen)
+			mtu = tlen - off;
+		else
+			ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
+		mhip6->ip6_plen = htons((u_short)(mtu + hlen +
+		    sizeof(*ip6f) - sizeof(struct ip6_hdr)));
+		if ((m_frgpart = m_copy(m0, off, mtu)) == 0) {
+			IP6STAT_INC(ip6s_odropped);
+			return (ENOBUFS);
+		}
+		m_cat(m, m_frgpart);
+		m->m_pkthdr.len = mtu + hlen + sizeof(*ip6f);
+		m->m_pkthdr.fibnum = m0->m_pkthdr.fibnum;
+		m->m_pkthdr.rcvif = NULL;
+		ip6f->ip6f_reserved = 0;
+		ip6f->ip6f_ident = id;
+		ip6f->ip6f_nxt = nextproto;
+		IP6STAT_INC(ip6s_ofragments);
+		in6_ifstat_inc(ifp, ifs6_out_fragcreat);
+	}
+
+	return (0);
+}
+
 /*
  * IP6 output. The packet in mbuf chain m contains a skeletal IP6
  * header (with pri, len, nxt, hlim, src, dst).
@@ -219,9 +279,9 @@
  * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL,
  * then result of route lookup is stored in ro->ro_rt.
  *
- * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
+ * type of "mtu": rt_mtu is u_long, ifnet.ifr_mtu is int, and
  * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
- * which is rt_rmx.rmx_mtu.
+ * which is rt_mtu.
  *
  * ifpp - XXX: just for statistics
  */
@@ -230,11 +290,11 @@
     struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
     struct ifnet **ifpp, struct inpcb *inp)
 {
-	struct ip6_hdr *ip6, *mhip6;
+	struct ip6_hdr *ip6;
 	struct ifnet *ifp, *origifp;
 	struct mbuf *m = m0;
 	struct mbuf *mprev = NULL;
-	int hlen, tlen, len, off;
+	int hlen, tlen, len;
 	struct route_in6 ip6route;
 	struct rtentry *rt = NULL;
 	struct sockaddr_in6 *dst, src_sa, dst_sa;
@@ -249,16 +309,9 @@
 	u_int32_t zone;
 	struct route_in6 *ro_pmtu = NULL;
 	int hdrsplit = 0;
-	int needipsec = 0;
 	int sw_csum, tso;
-#ifdef IPSEC
-	struct ipsec_output_state state;
-	struct ip6_rthdr *rh = NULL;
-	int needipsectun = 0;
-	int segleft_org = 0;
-	struct secpolicy *sp = NULL;
-#endif /* IPSEC */
 	struct m_tag *fwd_tag = NULL;
+	uint32_t id;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	if (ip6 == NULL) {
@@ -299,26 +352,12 @@
 	 * IPSec checking which handles several cases.
 	 * FAST IPSEC: We re-injected the packet.
 	 */
-	switch(ip6_ipsec_output(&m, inp, &flags, &error, &ifp, &sp))
+	switch(ip6_ipsec_output(&m, inp, &flags, &error, &ifp))
 	{
 	case 1:                 /* Bad packet */
 		goto freehdrs;
-	case -1:                /* Do IPSec */
-		needipsec = 1;
-		/*
-		 * Do delayed checksums now, as we may send before returning.
-		 */
-		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
-			plen = m->m_pkthdr.len - sizeof(*ip6);
-			in6_delayed_cksum(m, plen, sizeof(struct ip6_hdr));
-			m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
-		}
-#ifdef SCTP
-		if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) {
-			sctp_delayed_cksum(m, sizeof(struct ip6_hdr));
-			m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
-		}
-#endif
+	case -1:                /* IPSec done */
+		goto done;
 	case 0:                 /* No IPSec */
 	default:
 		break;
@@ -338,15 +377,15 @@
 		optlen += exthdrs.ip6e_rthdr->m_len;
 	unfragpartlen = optlen + sizeof(struct ip6_hdr);
 
-	/* NOTE: we don't add AH/ESP length here. do that later. */
+	/* NOTE: we don't add AH/ESP length here (done in ip6_ipsec_output) */
 	if (exthdrs.ip6e_dest2)
 		optlen += exthdrs.ip6e_dest2->m_len;
 
 	/*
-	 * If we need IPsec, or there is at least one extension header,
+	 * If there is at least one extension header,
 	 * separate IP6 header from the payload.
 	 */
-	if ((needipsec || optlen) && !hdrsplit) {
+	if (optlen && !hdrsplit) {
 		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
 			m = NULL;
 			goto freehdrs;
@@ -421,73 +460,7 @@
 	MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
 		   IPPROTO_ROUTING);
 
-#ifdef IPSEC
-	if (!needipsec)
-		goto skip_ipsec2;
-
 	/*
-	 * pointers after IPsec headers are not valid any more.
-	 * other pointers need a great care too.
-	 * (IPsec routines should not mangle mbufs prior to AH/ESP)
-	 */
-	exthdrs.ip6e_dest2 = NULL;
-
-	if (exthdrs.ip6e_rthdr) {
-		rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
-		segleft_org = rh->ip6r_segleft;
-		rh->ip6r_segleft = 0;
-	}
-
-	bzero(&state, sizeof(state));
-	state.m = m;
-	error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
-				    &needipsectun);
-	m = state.m;
-	if (error == EJUSTRETURN) {
-		/*
-		 * We had a SP with a level of 'use' and no SA. We
-		 * will just continue to process the packet without
-		 * IPsec processing.
-		 */
-		;
-	} else if (error) {
-		/* mbuf is already reclaimed in ipsec6_output_trans. */
-		m = NULL;
-		switch (error) {
-		case EHOSTUNREACH:
-		case ENETUNREACH:
-		case EMSGSIZE:
-		case ENOBUFS:
-		case ENOMEM:
-			break;
-		default:
-			printf("[%s:%d] (ipsec): error code %d\n",
-			    __func__, __LINE__, error);
-			/* FALLTHROUGH */
-		case ENOENT:
-			/* don't show these error codes to the user */
-			error = 0;
-			break;
-		}
-		goto bad;
-	} else if (!needipsectun) {
-		/*
-		 * In the FAST IPSec case we have already
-		 * re-injected the packet and it has been freed
-		 * by the ipsec_done() function.  So, just clean
-		 * up after ourselves.
-		 */
-		m = NULL;
-		goto done;
-	}
-	if (exthdrs.ip6e_rthdr) {
-		/* ah6_output doesn't modify mbuf chain */
-		rh->ip6r_segleft = segleft_org;
-	}
-skip_ipsec2:;
-#endif /* IPSEC */
-
-	/*
 	 * If there is a routing header, discard the packet.
 	 */
 	if (exthdrs.ip6e_rthdr) {
@@ -522,19 +495,8 @@
 		ro = &opt->ip6po_route;
 	dst = (struct sockaddr_in6 *)&ro->ro_dst;
 #ifdef FLOWTABLE
-	if (ro->ro_rt == NULL) {
-		struct flentry *fle;
-
-		/*
-		 * The flow table returns route entries valid for up to 30
-		 * seconds; we rely on the remainder of ip_output() taking no
-		 * longer than that long for the stability of ro_rt.  The
-		 * flow ID assignment must have happened before this point.
-		 */
-		fle = flowtable_lookup_mbuf(V_ip6_ft, m, AF_INET6);
-		if (fle != NULL)
-			flow_to_route_in6(fle, ro);
-	}
+	if (ro->ro_rt == NULL)
+		(void )flowtable_lookup(AF_INET6, m, (struct route *)ro);
 #endif
 again:
 	/*
@@ -563,77 +525,6 @@
 			ip6->ip6_hlim = V_ip6_defmcasthlim;
 	}
 
-#ifdef IPSEC
-	/*
-	 * We may re-inject packets into the stack here.
-	 */
-	if (needipsec && needipsectun) {
-		struct ipsec_output_state state;
-
-		/*
-		 * All the extension headers will become inaccessible
-		 * (since they can be encrypted).
-		 * Don't panic, we need no more updates to extension headers
-		 * on inner IPv6 packet (since they are now encapsulated).
-		 *
-		 * IPv6 [ESP|AH] IPv6 [extension headers] payload
-		 */
-		bzero(&exthdrs, sizeof(exthdrs));
-		exthdrs.ip6e_ip6 = m;
-
-		bzero(&state, sizeof(state));
-		state.m = m;
-		state.ro = (struct route *)ro;
-		state.dst = (struct sockaddr *)dst;
-
-		error = ipsec6_output_tunnel(&state, sp, flags);
-
-		m = state.m;
-		ro = (struct route_in6 *)state.ro;
-		dst = (struct sockaddr_in6 *)state.dst;
-		if (error == EJUSTRETURN) {
-			/*
-			 * We had a SP with a level of 'use' and no SA. We
-			 * will just continue to process the packet without
-			 * IPsec processing.
-			 */
-			;
-		} else if (error) {
-			/* mbuf is already reclaimed in ipsec6_output_tunnel. */
-			m0 = m = NULL;
-			m = NULL;
-			switch (error) {
-			case EHOSTUNREACH:
-			case ENETUNREACH:
-			case EMSGSIZE:
-			case ENOBUFS:
-			case ENOMEM:
-				break;
-			default:
-				printf("[%s:%d] (ipsec): error code %d\n",
-				    __func__, __LINE__, error);
-				/* FALLTHROUGH */
-			case ENOENT:
-				/* don't show these error codes to the user */
-				error = 0;
-				break;
-			}
-			goto bad;
-		} else {
-			/*
-			 * In the FAST IPSec case we have already
-			 * re-injected the packet and it has been freed
-			 * by the ipsec_done() function.  So, just clean
-			 * up after ourselves.
-			 */
-			m = NULL;
-			goto done;
-		}
-
-		exthdrs.ip6e_ip6 = m;
-	}
-#endif /* IPSEC */
-
 	/* adjust pointer */
 	ip6 = mtod(m, struct ip6_hdr *);
 
@@ -672,7 +563,7 @@
 	}
 	if (rt != NULL) {
 		ia = (struct in6_ifaddr *)(rt->rt_ifa);
-		rt->rt_use++;
+		counter_u64_add(rt->rt_pksent, 1);
 	}
 
 
@@ -775,9 +666,7 @@
 				/*
 				 * XXX: ip6_mforward expects that rcvif is NULL
 				 * when it is called from the originating path.
-				 * However, it is not always the case, since
-				 * some versions of MGETHDR() does not
-				 * initialize the field.
+				 * However, it may not always be the case.
 				 */
 				m->m_pkthdr.rcvif = NULL;
 				if (ip6_mforward(ip6, ifp, m) != 0) {
@@ -997,19 +886,12 @@
 		 * Even if the DONTFRAG option is specified, we cannot send the
 		 * packet when the data length is larger than the MTU of the
 		 * outgoing interface.
-		 * Notify the error by sending IPV6_PATHMTU ancillary data as
-		 * well as returning an error code (the latter is not described
-		 * in the API spec.)
+		 * Notify the error by sending IPV6_PATHMTU ancillary data if
+		 * application wanted to know the MTU value. Also return an
+		 * error code (this is not described in the API spec).
 		 */
-		u_int32_t mtu32;
-		struct ip6ctlparam ip6cp;
-
-		mtu32 = (u_int32_t)mtu;
-		bzero(&ip6cp, sizeof(ip6cp));
-		ip6cp.ip6c_cmdarg = (void *)&mtu32;
-		pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
-		    (void *)&ip6cp);
-
+		if (inp != NULL)
+			ip6_notify_pmtu(inp, &dst_sa, (u_int32_t)mtu);
 		error = EMSGSIZE;
 		goto bad;
 	}
@@ -1046,9 +928,6 @@
 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
 		goto bad;
 	} else {
-		struct mbuf **mnext, *m_frgpart;
-		struct ip6_frag *ip6f;
-		u_int32_t id = htonl(ip6_randomid());
 		u_char nextproto;
 
 		int qslots = ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len;
@@ -1096,8 +975,6 @@
 			m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
 		}
 #endif
-		mnext = &m->m_nextpkt;
-
 		/*
 		 * Change the next header field of the last header in the
 		 * unfragmentable part.
@@ -1122,47 +999,9 @@
 		 * chain.
 		 */
 		m0 = m;
-		for (off = hlen; off < tlen; off += len) {
-			MGETHDR(m, M_DONTWAIT, MT_HEADER);
-			if (!m) {
-				error = ENOBUFS;
-				IP6STAT_INC(ip6s_odropped);
-				goto sendorfree;
-			}
-			m->m_pkthdr.rcvif = NULL;
-			m->m_flags = m0->m_flags & M_COPYFLAGS;	/* incl. FIB */
-			*mnext = m;
-			mnext = &m->m_nextpkt;
-			m->m_data += max_linkhdr;
-			mhip6 = mtod(m, struct ip6_hdr *);
-			*mhip6 = *ip6;
-			m->m_len = sizeof(*mhip6);
-			error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
-			if (error) {
-				IP6STAT_INC(ip6s_odropped);
-				goto sendorfree;
-			}
-			ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
-			if (off + len >= tlen)
-				len = tlen - off;
-			else
-				ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
-			mhip6->ip6_plen = htons((u_short)(len + hlen +
-			    sizeof(*ip6f) - sizeof(struct ip6_hdr)));
-			if ((m_frgpart = m_copy(m0, off, len)) == 0) {
-				error = ENOBUFS;
-				IP6STAT_INC(ip6s_odropped);
-				goto sendorfree;
-			}
-			m_cat(m, m_frgpart);
-			m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
-			m->m_pkthdr.rcvif = NULL;
-			ip6f->ip6f_reserved = 0;
-			ip6f->ip6f_ident = id;
-			ip6f->ip6f_nxt = nextproto;
-			IP6STAT_INC(ip6s_ofragments);
-			in6_ifstat_inc(ifp, ifs6_out_fragcreat);
-		}
+		id = htonl(ip6_randomid());
+		if ((error = ip6_fragment(ifp, m, hlen, nextproto, len, id)))
+			goto sendorfree;
 
 		in6_ifstat_inc(ifp, ifs6_out_fragok);
 	}
@@ -1196,11 +1035,6 @@
 		RO_RTFREE(ro);
 	if (ro_pmtu == &ip6route)
 		RO_RTFREE(ro_pmtu);
-#ifdef IPSEC
-	if (sp != NULL)
-		KEY_FREESP(&sp);
-#endif
-
 	return (error);
 
 freehdrs:
@@ -1223,17 +1057,12 @@
 	if (hlen > MCLBYTES)
 		return (ENOBUFS); /* XXX */
 
-	MGET(m, M_DONTWAIT, MT_DATA);
-	if (!m)
+	if (hlen > MLEN)
+		m = m_getcl(M_NOWAIT, MT_DATA, 0);
+	else
+		m = m_get(M_NOWAIT, MT_DATA);
+	if (m == NULL)
 		return (ENOBUFS);
-
-	if (hlen > MLEN) {
-		MCLGET(m, M_DONTWAIT);
-		if ((m->m_flags & M_EXT) == 0) {
-			m_free(m);
-			return (ENOBUFS);
-		}
-	}
 	m->m_len = hlen;
 	if (hdr)
 		bcopy(hdr, mtod(m, caddr_t), hlen);
@@ -1261,8 +1090,8 @@
 	 * Otherwise, use it to store the options.
 	 */
 	if (exthdrs->ip6e_hbh == 0) {
-		MGET(mopt, M_DONTWAIT, MT_DATA);
-		if (mopt == 0)
+		mopt = m_get(M_NOWAIT, MT_DATA);
+		if (mopt == NULL)
 			return (ENOBUFS);
 		mopt->m_len = JUMBOOPTLEN;
 		optbuf = mtod(mopt, u_char *);
@@ -1293,15 +1122,8 @@
 			 * As a consequence, we must always prepare a cluster
 			 * at this point.
 			 */
-			MGET(n, M_DONTWAIT, MT_DATA);
-			if (n) {
-				MCLGET(n, M_DONTWAIT);
-				if ((n->m_flags & M_EXT) == 0) {
-					m_freem(n);
-					n = NULL;
-				}
-			}
-			if (!n)
+			n = m_getcl(M_NOWAIT, MT_DATA, 0);
+			if (n == NULL)
 				return (ENOBUFS);
 			n->m_len = oldoptlen + JUMBOOPTLEN;
 			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
@@ -1348,7 +1170,7 @@
 
 	if (hlen > sizeof(struct ip6_hdr)) {
 		n = m_copym(m0, sizeof(struct ip6_hdr),
-		    hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
+		    hlen - sizeof(struct ip6_hdr), M_NOWAIT);
 		if (n == 0)
 			return (ENOBUFS);
 		m->m_next = n;
@@ -1370,8 +1192,8 @@
 		/* allocate a new mbuf for the fragment header */
 		struct mbuf *mfrg;
 
-		MGET(mfrg, M_DONTWAIT, MT_DATA);
-		if (mfrg == 0)
+		mfrg = m_get(M_NOWAIT, MT_DATA);
+		if (mfrg == NULL)
 			return (ENOBUFS);
 		mfrg->m_len = sizeof(struct ip6_frag);
 		*frghdrp = mtod(mfrg, struct ip6_frag *);
@@ -1422,9 +1244,9 @@
 		ifmtu = IN6_LINKMTU(ifp);
 		mtu = tcp_hc_getmtu(&inc);
 		if (mtu)
-			mtu = min(mtu, ro_pmtu->ro_rt->rt_rmx.rmx_mtu);
+			mtu = min(mtu, ro_pmtu->ro_rt->rt_mtu);
 		else
-			mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
+			mtu = ro_pmtu->ro_rt->rt_mtu;
 		if (mtu == 0)
 			mtu = ifmtu;
 		else if (mtu < IPV6_MMTU) {
@@ -1448,7 +1270,7 @@
 			 * field isn't locked).
 			 */
 			mtu = ifmtu;
-			ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
+			ro_pmtu->ro_rt->rt_mtu = mtu;
 		}
 	} else if (ifp) {
 		mtu = IN6_LINKMTU(ifp);
@@ -1492,13 +1314,10 @@
 			switch (sopt->sopt_name) {
 			case SO_REUSEADDR:
 				INP_WLOCK(in6p);
-				if (IN_MULTICAST(ntohl(in6p->inp_laddr.s_addr))) {
-					if ((so->so_options &
-					    (SO_REUSEADDR | SO_REUSEPORT)) != 0)
-						in6p->inp_flags2 |= INP_REUSEPORT;
-					else
-						in6p->inp_flags2 &= ~INP_REUSEPORT;
-				}
+				if ((so->so_options & SO_REUSEADDR) != 0)
+					in6p->inp_flags2 |= INP_REUSEADDR;
+				else
+					in6p->inp_flags2 &= ~INP_REUSEADDR;
 				INP_WUNLOCK(in6p);
 				error = 0;
 				break;
@@ -3016,14 +2835,6 @@
 		if (copym == NULL)
 			return;
 	}
-
-#ifdef DIAGNOSTIC
-	if (copym->m_len < sizeof(*ip6)) {
-		m_freem(copym);
-		return;
-	}
-#endif
-
 	ip6 = mtod(copym, struct ip6_hdr *);
 	/*
 	 * clear embedded scope identifiers if necessary.
@@ -3031,7 +2842,11 @@
 	 */
 	in6_clearscope(&ip6->ip6_src);
 	in6_clearscope(&ip6->ip6_dst);
-
+	if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
+		copym->m_pkthdr.csum_flags |= CSUM_DATA_VALID_IPV6 |
+		    CSUM_PSEUDO_HDR;
+		copym->m_pkthdr.csum_data = 0xffff;
+	}
 	(void)if_simloop(ifp, copym, dst->sin6_family, 0);
 }
 
@@ -3046,12 +2861,12 @@
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	if (m->m_len > sizeof(*ip6)) {
-		MGETHDR(mh, M_DONTWAIT, MT_HEADER);
-		if (mh == 0) {
+		mh = m_gethdr(M_NOWAIT, MT_DATA);
+		if (mh == NULL) {
 			m_freem(m);
 			return ENOBUFS;
 		}
-		M_MOVE_PKTHDR(mh, m);
+		m_move_pkthdr(mh, m);
 		MH_ALIGN(mh, sizeof(*ip6));
 		m->m_len -= sizeof(*ip6);
 		m->m_data += sizeof(*ip6);

Modified: trunk/sys/netinet6/ip6_var.h
===================================================================
--- trunk/sys/netinet6/ip6_var.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/ip6_var.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -59,7 +59,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)ip_var.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet6/ip6_var.h 244524 2012-12-21 00:41:52Z delphij $
+ * $FreeBSD: stable/10/sys/netinet6/ip6_var.h 329158 2018-02-12 13:52:58Z ae $
  */
 
 #ifndef _NETINET6_IP6_VAR_H_
@@ -182,39 +182,39 @@
  */
 
 struct	ip6stat {
-	u_quad_t ip6s_total;		/* total packets received */
-	u_quad_t ip6s_tooshort;		/* packet too short */
-	u_quad_t ip6s_toosmall;		/* not enough data */
-	u_quad_t ip6s_fragments;	/* fragments received */
-	u_quad_t ip6s_fragdropped;	/* frags dropped(dups, out of space) */
-	u_quad_t ip6s_fragtimeout;	/* fragments timed out */
-	u_quad_t ip6s_fragoverflow;	/* fragments that exceeded limit */
-	u_quad_t ip6s_forward;		/* packets forwarded */
-	u_quad_t ip6s_cantforward;	/* packets rcvd for unreachable dest */
-	u_quad_t ip6s_redirectsent;	/* packets forwarded on same net */
-	u_quad_t ip6s_delivered;	/* datagrams delivered to upper level*/
-	u_quad_t ip6s_localout;		/* total ip packets generated here */
-	u_quad_t ip6s_odropped;		/* lost packets due to nobufs, etc. */
-	u_quad_t ip6s_reassembled;	/* total packets reassembled ok */
-	u_quad_t ip6s_fragmented;	/* datagrams successfully fragmented */
-	u_quad_t ip6s_ofragments;	/* output fragments created */
-	u_quad_t ip6s_cantfrag;		/* don't fragment flag was set, etc. */
-	u_quad_t ip6s_badoptions;	/* error in option processing */
-	u_quad_t ip6s_noroute;		/* packets discarded due to no route */
-	u_quad_t ip6s_badvers;		/* ip6 version != 6 */
-	u_quad_t ip6s_rawout;		/* total raw ip packets generated */
-	u_quad_t ip6s_badscope;		/* scope error */
-	u_quad_t ip6s_notmember;	/* don't join this multicast group */
+	uint64_t ip6s_total;		/* total packets received */
+	uint64_t ip6s_tooshort;		/* packet too short */
+	uint64_t ip6s_toosmall;		/* not enough data */
+	uint64_t ip6s_fragments;	/* fragments received */
+	uint64_t ip6s_fragdropped;	/* frags dropped(dups, out of space) */
+	uint64_t ip6s_fragtimeout;	/* fragments timed out */
+	uint64_t ip6s_fragoverflow;	/* fragments that exceeded limit */
+	uint64_t ip6s_forward;		/* packets forwarded */
+	uint64_t ip6s_cantforward;	/* packets rcvd for unreachable dest */
+	uint64_t ip6s_redirectsent;	/* packets forwarded on same net */
+	uint64_t ip6s_delivered;	/* datagrams delivered to upper level*/
+	uint64_t ip6s_localout;		/* total ip packets generated here */
+	uint64_t ip6s_odropped;		/* lost packets due to nobufs, etc. */
+	uint64_t ip6s_reassembled;	/* total packets reassembled ok */
+	uint64_t ip6s_fragmented;	/* datagrams successfully fragmented */
+	uint64_t ip6s_ofragments;	/* output fragments created */
+	uint64_t ip6s_cantfrag;		/* don't fragment flag was set, etc. */
+	uint64_t ip6s_badoptions;	/* error in option processing */
+	uint64_t ip6s_noroute;		/* packets discarded due to no route */
+	uint64_t ip6s_badvers;		/* ip6 version != 6 */
+	uint64_t ip6s_rawout;		/* total raw ip packets generated */
+	uint64_t ip6s_badscope;		/* scope error */
+	uint64_t ip6s_notmember;	/* don't join this multicast group */
 #define	IP6S_HDRCNT		256	/* headers count */
-	u_quad_t ip6s_nxthist[IP6S_HDRCNT]; /* next header history */
-	u_quad_t ip6s_m1;		/* one mbuf */
+	uint64_t ip6s_nxthist[IP6S_HDRCNT]; /* next header history */
+	uint64_t ip6s_m1;		/* one mbuf */
 #define	IP6S_M2MMAX		32
-	u_quad_t ip6s_m2m[IP6S_M2MMAX];	/* two or more mbuf */
-	u_quad_t ip6s_mext1;		/* one ext mbuf */
-	u_quad_t ip6s_mext2m;		/* two or more ext mbuf */
-	u_quad_t ip6s_exthdrtoolong;	/* ext hdr are not contiguous */
-	u_quad_t ip6s_nogif;		/* no match gif found */
-	u_quad_t ip6s_toomanyhdr;	/* discarded due to too many headers */
+	uint64_t ip6s_m2m[IP6S_M2MMAX];	/* two or more mbuf */
+	uint64_t ip6s_mext1;		/* one ext mbuf */
+	uint64_t ip6s_mext2m;		/* two or more ext mbuf */
+	uint64_t ip6s_exthdrtoolong;	/* ext hdr are not contiguous */
+	uint64_t ip6s_nogif;		/* no match gif found */
+	uint64_t ip6s_toomanyhdr;	/* discarded due to too many headers */
 
 	/*
 	 * statistics for improvement of the source address selection
@@ -224,31 +224,35 @@
 #define	IP6S_RULESMAX		16
 #define	IP6S_SCOPECNT		16
 	/* number of times that address selection fails */
-	u_quad_t ip6s_sources_none;
+	uint64_t ip6s_sources_none;
 	/* number of times that an address on the outgoing I/F is chosen */
-	u_quad_t ip6s_sources_sameif[IP6S_SCOPECNT];
+	uint64_t ip6s_sources_sameif[IP6S_SCOPECNT];
 	/* number of times that an address on a non-outgoing I/F is chosen */
-	u_quad_t ip6s_sources_otherif[IP6S_SCOPECNT];
+	uint64_t ip6s_sources_otherif[IP6S_SCOPECNT];
 	/*
 	 * number of times that an address that has the same scope
 	 * from the destination is chosen.
 	 */
-	u_quad_t ip6s_sources_samescope[IP6S_SCOPECNT];
+	uint64_t ip6s_sources_samescope[IP6S_SCOPECNT];
 	/*
 	 * number of times that an address that has a different scope
 	 * from the destination is chosen.
 	 */
-	u_quad_t ip6s_sources_otherscope[IP6S_SCOPECNT];
+	uint64_t ip6s_sources_otherscope[IP6S_SCOPECNT];
 	/* number of times that a deprecated address is chosen */
-	u_quad_t ip6s_sources_deprecated[IP6S_SCOPECNT];
+	uint64_t ip6s_sources_deprecated[IP6S_SCOPECNT];
 
 	/* number of times that each rule of source selection is applied. */
-	u_quad_t ip6s_sources_rule[IP6S_RULESMAX];
+	uint64_t ip6s_sources_rule[IP6S_RULESMAX];
 };
 
 #ifdef _KERNEL
-#define	IP6STAT_ADD(name, val)	V_ip6stat.name += (val)
-#define	IP6STAT_SUB(name, val)	V_ip6stat.name -= (val)
+#include <sys/counter.h>
+
+VNET_PCPUSTAT_DECLARE(struct ip6stat, ip6stat);
+#define	IP6STAT_ADD(name, val)	\
+    VNET_PCPUSTAT_ADD(struct ip6stat, ip6stat, name, (val))
+#define	IP6STAT_SUB(name, val)	IP6STAT_ADD(name, -(val))
 #define	IP6STAT_INC(name)	IP6STAT_ADD(name, 1)
 #define	IP6STAT_DEC(name)	IP6STAT_SUB(name, 1)
 #endif
@@ -290,8 +294,6 @@
 #define	IPV6_FORWARDING		0x02	/* most of IPv6 header exists */
 #define	IPV6_MINMTU		0x04	/* use minimum MTU (IPV6_USE_MIN_MTU) */
 
-#define	M_IP6_NEXTHOP		M_PROTO7	/* explicit ip nexthop */
-
 #ifdef __NO_STRICT_ALIGNMENT
 #define IP6_HDR_ALIGNED_P(ip)	1
 #else
@@ -298,7 +300,6 @@
 #define IP6_HDR_ALIGNED_P(ip)	((((intptr_t) (ip)) & 3) == 0)
 #endif
 
-VNET_DECLARE(struct ip6stat, ip6stat);	/* statistics */
 VNET_DECLARE(int, ip6_defhlim);		/* default hop limit */
 VNET_DECLARE(int, ip6_defmcasthlim);	/* default multicast hop limit */
 VNET_DECLARE(int, ip6_forwarding);	/* act as router? */
@@ -307,7 +308,6 @@
 					 * walk list every 5 sec.    */
 VNET_DECLARE(int, ip6_mcast_pmtu);	/* enable pMTU discovery for multicast? */
 VNET_DECLARE(int, ip6_v6only);
-#define	V_ip6stat			VNET(ip6stat)
 #define	V_ip6_defhlim			VNET(ip6_defhlim)
 #define	V_ip6_defmcasthlim		VNET(ip6_defmcasthlim)
 #define	V_ip6_forwarding		VNET(ip6_forwarding)
@@ -391,7 +391,7 @@
 void	ip6_freepcbopts(struct ip6_pktopts *);
 
 int	ip6_unknown_opt(u_int8_t *, struct mbuf *, int);
-char *	ip6_get_prevhdr(struct mbuf *, int);
+int	ip6_get_prevhdr(struct mbuf *, int);
 int	ip6_nexthdr(struct mbuf *, int, int, int *);
 int	ip6_lasthdr(struct mbuf *, int, int, int *);
 
@@ -407,8 +407,7 @@
 struct mbuf	**ip6_savecontrol_v4(struct inpcb *, struct mbuf *,
 	    struct mbuf **, int *);
 void	ip6_savecontrol(struct inpcb *, struct mbuf *, struct mbuf **);
-void	ip6_notify_pmtu(struct inpcb *, struct sockaddr_in6 *,
-			     u_int32_t *);
+void	ip6_notify_pmtu(struct inpcb *, struct sockaddr_in6 *, u_int32_t);
 int	ip6_sysctl(int *, u_int, void *, size_t *, void *, size_t);
 
 void	ip6_forward(struct mbuf *, int);
@@ -427,6 +426,9 @@
 void	ip6_clearpktopts(struct ip6_pktopts *, int);
 struct ip6_pktopts *ip6_copypktopts(struct ip6_pktopts *, int);
 int	ip6_optlen(struct inpcb *);
+int	ip6_deletefraghdr(struct mbuf *, int, int);
+int	ip6_fragment(struct ifnet *, struct mbuf *, int, u_char, int,
+			uint32_t);
 
 int	route6_input(struct mbuf **, int *, int);
 
@@ -457,6 +459,7 @@
 	    struct rtentry **, u_int);
 u_int32_t ip6_randomid(void);
 u_int32_t ip6_randomflowlabel(void);
+void in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset);
 #endif /* _KERNEL */
 
 #endif /* !_NETINET6_IP6_VAR_H_ */

Modified: trunk/sys/netinet6/ip6protosw.h
===================================================================
--- trunk/sys/netinet6/ip6protosw.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/ip6protosw.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -60,7 +60,7 @@
  *
  *	@(#)protosw.h	8.1 (Berkeley) 6/2/93
  *	BSDI protosw.h,v 2.3 1996/10/11 16:02:40 pjd Exp
- * $FreeBSD: stable/9/sys/netinet6/ip6protosw.h 244524 2012-12-21 00:41:52Z delphij $
+ * $FreeBSD: stable/10/sys/netinet6/ip6protosw.h 241916 2012-10-22 21:49:56Z delphij $
  */
 
 #ifndef _NETINET6_IP6PROTOSW_H_

Modified: trunk/sys/netinet6/mld6.c
===================================================================
--- trunk/sys/netinet6/mld6.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/mld6.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -65,7 +65,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/mld6.c 249132 2013-04-05 08:22:11Z mav $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/mld6.c 291987 2015-12-08 07:31:26Z ae $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
@@ -276,7 +276,7 @@
 {
 
 #ifdef VIMAGE
-	m->m_pkthdr.header = ifp->if_vnet;
+	m->m_pkthdr.PH_loc.ptr = ifp->if_vnet;
 #endif /* VIMAGE */
 	m->m_pkthdr.flowid = ifp->if_index;
 }
@@ -285,7 +285,7 @@
 mld_scrub_context(struct mbuf *m)
 {
 
-	m->m_pkthdr.header = NULL;
+	m->m_pkthdr.PH_loc.ptr = NULL;
 	m->m_pkthdr.flowid = 0;
 }
 
@@ -301,7 +301,7 @@
 {
 
 #if defined(VIMAGE) && defined(INVARIANTS)
-	KASSERT(curvnet == m->m_pkthdr.header,
+	KASSERT(curvnet == m->m_pkthdr.PH_loc.ptr,
 	    ("%s: called when curvnet was not restored", __func__));
 #endif
 	return (m->m_pkthdr.flowid);
@@ -1800,13 +1800,13 @@
 	ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
 	/* ia may be NULL if link-local address is tentative. */
 
-	MGETHDR(mh, M_DONTWAIT, MT_HEADER);
+	mh = m_gethdr(M_NOWAIT, MT_DATA);
 	if (mh == NULL) {
 		if (ia != NULL)
 			ifa_free(&ia->ia_ifa);
 		return (ENOMEM);
 	}
-	MGET(md, M_DONTWAIT, MT_DATA);
+	md = m_get(M_NOWAIT, MT_DATA);
 	if (md == NULL) {
 		m_free(mh);
 		if (ia != NULL)
@@ -2448,9 +2448,9 @@
 		m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
 		    sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
 		if (!is_state_change && !is_group_query)
-			m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+			m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 		if (m == NULL)
-			m = m_gethdr(M_DONTWAIT, MT_DATA);
+			m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m == NULL)
 			return (-ENOMEM);
 
@@ -2573,9 +2573,9 @@
 			CTR1(KTR_MLD, "%s: outbound queue full", __func__);
 			return (-ENOMEM);
 		}
-		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 		if (m == NULL)
-			m = m_gethdr(M_DONTWAIT, MT_DATA);
+			m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m == NULL)
 			return (-ENOMEM);
 		mld_save_context(m, ifp);
@@ -2727,9 +2727,9 @@
 				CTR1(KTR_MLD,
 				    "%s: use previous packet", __func__);
 			} else {
-				m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+				m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 				if (m == NULL)
-					m = m_gethdr(M_DONTWAIT, MT_DATA);
+					m = m_gethdr(M_NOWAIT, MT_DATA);
 				if (m == NULL) {
 					CTR1(KTR_MLD,
 					    "%s: m_get*() failed", __func__);
@@ -2990,6 +2990,15 @@
 	KASSERT(mli->mli_version == MLD_VERSION_2,
 	    ("%s: called when version %d", __func__, mli->mli_version));
 
+	/*
+	 * Check that there are some packets queued. If so, send them first.
+	 * For large number of groups the reply to general query can take
+	 * many packets, we should finish sending them before starting of
+	 * queuing the new reply.
+	 */
+	if (mli->mli_gq.ifq_head != NULL)
+		goto send;
+
 	ifp = mli->mli_ifp;
 
 	IF_ADDR_RLOCK(ifp);
@@ -3025,6 +3034,7 @@
 	}
 	IF_ADDR_RUNLOCK(ifp);
 
+send:
 	mld_dispatch_queue(&mli->mli_gq, MLD_MAX_RESPONSE_BURST);
 
 	/*
@@ -3099,7 +3109,7 @@
 	}
 
 	mld_scrub_context(m0);
-	m->m_flags &= ~(M_PROTOFLAGS);
+	m_clrprotoflags(m);
 	m0->m_pkthdr.rcvif = V_loif;
 
 	ip6 = mtod(m0, struct ip6_hdr *);
@@ -3174,7 +3184,7 @@
 	if (ia == NULL)
 		CTR1(KTR_MLD, "%s: warning: ia is NULL", __func__);
 
-	MGETHDR(mh, M_DONTWAIT, MT_HEADER);
+	mh = m_gethdr(M_NOWAIT, MT_DATA);
 	if (mh == NULL) {
 		if (ia != NULL)
 			ifa_free(&ia->ia_ifa);

Modified: trunk/sys/netinet6/mld6.h
===================================================================
--- trunk/sys/netinet6/mld6.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/mld6.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -26,7 +26,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/netinet6/mld6.h 191666 2009-04-29 11:31:23Z bms $
+ * $FreeBSD: stable/10/sys/netinet6/mld6.h 191666 2009-04-29 11:31:23Z bms $
  */
 
 #ifndef _NETINET6_MLD6_H_

Modified: trunk/sys/netinet6/mld6_var.h
===================================================================
--- trunk/sys/netinet6/mld6_var.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/mld6_var.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -26,7 +26,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/netinet6/mld6_var.h 200871 2009-12-22 20:40:22Z bms $
+ * $FreeBSD: stable/10/sys/netinet6/mld6_var.h 200871 2009-12-22 20:40:22Z bms $
  */
 #ifndef _NETINET6_MLD6_VAR_H_
 #define _NETINET6_MLD6_VAR_H_

Modified: trunk/sys/netinet6/nd6.c
===================================================================
--- trunk/sys/netinet6/nd6.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/nd6.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,10 +31,11 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/nd6.c 248852 2013-03-28 20:48:40Z emaste $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/nd6.c 303458 2016-07-28 20:08:01Z sbruno $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
+#include "opt_kdtrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -51,6 +52,7 @@
 #include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <sys/queue.h>
+#include <sys/sdt.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
@@ -63,6 +65,7 @@
 #include <net/vnet.h>
 
 #include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
 #include <net/if_llatbl.h>
 #define	L3_ADDR_SIN6(le)	((struct sockaddr_in6 *) L3_ADDR(le))
 #include <netinet/if_ether.h>
@@ -82,7 +85,7 @@
 #define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
 #define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
 
-#define SIN6(s) ((struct sockaddr_in6 *)s)
+#define SIN6(s) ((const struct sockaddr_in6 *)(s))
 
 /* timer values */
 VNET_DEFINE(int, nd6_prune)	= 1;	/* walk list every 1 seconds */
@@ -110,19 +113,13 @@
 VNET_DEFINE(int, nd6_debug) = 0;
 #endif
 
-/* for debugging? */
-#if 0
-static int nd6_inuse, nd6_allocated;
-#endif
-
 VNET_DEFINE(struct nd_drhead, nd_defrouter);
 VNET_DEFINE(struct nd_prhead, nd_prefix);
+VNET_DEFINE(struct rwlock, nd6_lock);
 
 VNET_DEFINE(int, nd6_recalc_reachtm_interval) = ND6_RECALC_REACHTM_INTERVAL;
 #define	V_nd6_recalc_reachtm_interval	VNET(nd6_recalc_reachtm_interval)
 
-static struct sockaddr_in6 all1_sa;
-
 int	(*send_sendso_input_hook)(struct mbuf *, struct ifnet *, int, int);
 
 static int nd6_is_new_addr_neighbor(struct sockaddr_in6 *,
@@ -130,9 +127,13 @@
 static void nd6_setmtu0(struct ifnet *, struct nd_ifinfo *);
 static void nd6_slowtimo(void *);
 static int regen_tmpaddr(struct in6_ifaddr *);
-static struct llentry *nd6_free(struct llentry *, int);
+static struct llentry *nd6_free(struct llentry **, int);
 static void nd6_llinfo_timer(void *);
 static void clear_llinfo_pqueue(struct llentry *);
+static int nd6_output_lle(struct ifnet *, struct ifnet *, struct mbuf *,
+	struct sockaddr_in6 *);
+static int nd6_output_ifp(struct ifnet *, struct ifnet *, struct mbuf *,
+    struct sockaddr_in6 *);
 
 static VNET_DEFINE(struct callout, nd6_slowtimo_ch);
 #define	V_nd6_slowtimo_ch		VNET(nd6_slowtimo_ch)
@@ -142,15 +143,11 @@
 void
 nd6_init(void)
 {
-	int i;
 
+	rw_init(&V_nd6_lock, "nd6");
+
 	LIST_INIT(&V_nd_prefix);
 
-	all1_sa.sin6_family = AF_INET6;
-	all1_sa.sin6_len = sizeof(struct sockaddr_in6);
-	for (i = 0; i < sizeof(all1_sa.sin6_addr); i++)
-		all1_sa.sin6_addr.s6_addr[i] = 0xff;
-
 	/* initialization of the default router list */
 	TAILQ_INIT(&V_nd_defrouter);
 
@@ -158,6 +155,8 @@
 	callout_init(&V_nd6_slowtimo_ch, 0);
 	callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
 	    nd6_slowtimo, curvnet);
+
+	nd6_dad_init();
 }
 
 #ifdef VIMAGE
@@ -167,6 +166,7 @@
 
 	callout_drain(&V_nd6_slowtimo_ch);
 	callout_drain(&V_nd6_timer_ch);
+	rw_destroy(&V_nd6_lock);
 }
 #endif
 
@@ -175,7 +175,7 @@
 {
 	struct nd_ifinfo *nd;
 
-	nd = (struct nd_ifinfo *)malloc(sizeof(*nd), M_IP6NDP, M_WAITOK|M_ZERO);
+	nd = malloc(sizeof(*nd), M_IP6NDP, M_WAITOK | M_ZERO);
 	nd->initialized = 1;
 
 	nd->chlim = IPV6_DEFHLIM;
@@ -185,13 +185,25 @@
 
 	nd->flags = ND6_IFF_PERFORMNUD;
 
-	/* A loopback interface always has ND6_IFF_AUTO_LINKLOCAL. */
-	if (V_ip6_auto_linklocal || (ifp->if_flags & IFF_LOOPBACK))
+	/* A loopback interface always has ND6_IFF_AUTO_LINKLOCAL.
+	 * XXXHRS: Clear ND6_IFF_AUTO_LINKLOCAL on an IFT_BRIDGE interface by
+	 * default regardless of the V_ip6_auto_linklocal configuration to
+	 * give a reasonable default behavior.
+	 */
+	if ((V_ip6_auto_linklocal && ifp->if_type != IFT_BRIDGE) ||
+	    (ifp->if_flags & IFF_LOOPBACK))
 		nd->flags |= ND6_IFF_AUTO_LINKLOCAL;
-
-	/* A loopback interface does not need to accept RTADV. */
-	if (V_ip6_accept_rtadv && !(ifp->if_flags & IFF_LOOPBACK))
-		nd->flags |= ND6_IFF_ACCEPT_RTADV;
+	/*
+	 * A loopback interface does not need to accept RTADV.
+	 * XXXHRS: Clear ND6_IFF_ACCEPT_RTADV on an IFT_BRIDGE interface by
+	 * default regardless of the V_ip6_accept_rtadv configuration to
+	 * prevent the interface from accepting RA messages arrived
+	 * on one of the member interfaces with ND6_IFF_ACCEPT_RTADV.
+	 */
+	if (V_ip6_accept_rtadv &&
+	    !(ifp->if_flags & IFF_LOOPBACK) &&
+	    (ifp->if_type != IFT_BRIDGE))
+			nd->flags |= ND6_IFF_ACCEPT_RTADV;
 	if (V_ip6_no_radr && !(ifp->if_flags & IFF_LOOPBACK))
 		nd->flags |= ND6_IFF_NO_RADR;
 
@@ -215,6 +227,8 @@
 void
 nd6_setmtu(struct ifnet *ifp)
 {
+	if (ifp->if_afdata[AF_INET6] == NULL)
+		return;
 
 	nd6_setmtu0(ifp, ND_IFINFO(ifp));
 }
@@ -359,6 +373,7 @@
 		case ND_OPT_TARGET_LINKADDR:
 		case ND_OPT_MTU:
 		case ND_OPT_REDIRECTED_HEADER:
+		case ND_OPT_NONCE:
 			if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
 				nd6log((LOG_INFO,
 				    "duplicated ND6 option found (type=%d)\n",
@@ -425,7 +440,7 @@
 		ln->ln_ntick = 0;
 		canceled = callout_stop(&ln->ln_timer_ch);
 	} else {
-		ln->la_expire = time_second + tick / hz;
+		ln->la_expire = time_uptime + tick / hz;
 		LLE_ADDREF(ln);
 		if (tick > INT_MAX) {
 			ln->ln_ntick = tick - INT_MAX;
@@ -456,15 +471,38 @@
 	struct llentry *ln;
 	struct in6_addr *dst;
 	struct ifnet *ifp;
-	struct nd_ifinfo *ndi = NULL;
+	struct nd_ifinfo *ndi;
 
 	KASSERT(arg != NULL, ("%s: arg NULL", __func__));
 	ln = (struct llentry *)arg;
-	LLE_WLOCK_ASSERT(ln);
 	ifp = ln->lle_tbl->llt_ifp;
-
 	CURVNET_SET(ifp->if_vnet);
 
+	ND6_RLOCK();
+	LLE_WLOCK(ln);
+	if (callout_pending(&ln->la_timer)) {
+		/*
+		 * Here we are a bit odd here in the treatment of 
+		 * active/pending. If the pending bit is set, it got
+		 * rescheduled before I ran. The active
+		 * bit we ignore, since if it was stopped
+		 * in ll_tablefree() and was currently running
+		 * it would have return 0 so the code would
+		 * not have deleted it since the callout could
+		 * not be stopped so we want to go through
+		 * with the delete here now. If the callout
+		 * was restarted, the pending bit will be back on and
+		 * we just want to bail since the callout_reset would
+		 * return 1 and our reference would have been removed
+		 * by nd6_llinfo_settimer_locked above since canceled
+		 * would have been 1.
+		 */
+		LLE_WUNLOCK(ln);
+		ND6_RUNLOCK();
+		CURVNET_RESTORE();
+		return;
+	}
+
 	if (ln->ln_ntick > 0) {
 		if (ln->ln_ntick > INT_MAX) {
 			ln->ln_ntick -= INT_MAX;
@@ -483,8 +521,7 @@
 	}
 
 	if (ln->la_flags & LLE_DELETED) {
-		(void)nd6_free(ln, 0);
-		ln = NULL;
+		(void)nd6_free(&ln, 0);
 		goto done;
 	}
 
@@ -494,7 +531,7 @@
 			ln->la_asked++;
 			nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
 			LLE_WUNLOCK(ln);
-			nd6_ns_output(ifp, NULL, dst, ln, 0);
+			nd6_ns_output(ifp, NULL, dst, ln, NULL);
 			LLE_WLOCK(ln);
 		} else {
 			struct mbuf *m = ln->la_hold;
@@ -510,8 +547,7 @@
 				ln->la_hold = m0;
 				clear_llinfo_pqueue(ln);
 			}
-			(void)nd6_free(ln, 0);
-			ln = NULL;
+			(void)nd6_free(&ln, 0);
 			if (m != NULL)
 				icmp6_error2(m, ICMP6_DST_UNREACH,
 				    ICMP6_DST_UNREACH_ADDR, 0, ifp);
@@ -527,7 +563,8 @@
 	case ND6_LLINFO_STALE:
 		/* Garbage Collection(RFC 2461 5.3) */
 		if (!ND6_LLINFO_PERMANENT(ln)) {
-			(void)nd6_free(ln, 1);
+			EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED);
+			(void)nd6_free(&ln, 1);
 			ln = NULL;
 		}
 		break;
@@ -539,7 +576,7 @@
 			ln->ln_state = ND6_LLINFO_PROBE;
 			nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
 			LLE_WUNLOCK(ln);
-			nd6_ns_output(ifp, dst, dst, ln, 0);
+			nd6_ns_output(ifp, dst, dst, ln, NULL);
 			LLE_WLOCK(ln);
 		} else {
 			ln->ln_state = ND6_LLINFO_STALE; /* XXX */
@@ -551,11 +588,10 @@
 			ln->la_asked++;
 			nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
 			LLE_WUNLOCK(ln);
-			nd6_ns_output(ifp, dst, dst, ln, 0);
+			nd6_ns_output(ifp, dst, dst, ln, NULL);
 			LLE_WLOCK(ln);
 		} else {
-			(void)nd6_free(ln, 0);
-			ln = NULL;
+			(void)nd6_free(&ln, 0);
 		}
 		break;
 	default:
@@ -563,8 +599,10 @@
 		    __func__, ln->ln_state);
 	}
 done:
-	if (ln != NULL)
+	if (ln != NULL) {
+		ND6_RUNLOCK();
 		LLE_FREE_LOCKED(ln);
+	}
 	CURVNET_RESTORE();
 }
 
@@ -576,7 +614,7 @@
 nd6_timer(void *arg)
 {
 	CURVNET_SET((struct vnet *) arg);
-	int s;
+	struct nd_drhead drq;
 	struct nd_defrouter *dr, *ndr;
 	struct nd_prefix *pr, *npr;
 	struct in6_ifaddr *ia6, *nia6;
@@ -584,11 +622,18 @@
 	callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz,
 	    nd6_timer, curvnet);
 
+	TAILQ_INIT(&drq);
+
 	/* expire default router list */
-	s = splnet();
-	TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) {
-		if (dr->expire && dr->expire < time_second)
-			defrtrlist_del(dr);
+	ND6_WLOCK();
+	TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr)
+		if (dr->expire && dr->expire < time_uptime)
+			defrouter_unlink(dr, &drq);
+	ND6_WUNLOCK();
+
+	while ((dr = TAILQ_FIRST(&drq)) != NULL) {
+		TAILQ_REMOVE(&drq, dr, dr_entry);
+		defrouter_del(dr);
 	}
 
 	/*
@@ -654,8 +699,31 @@
 					goto addrloop;
 				}
 			}
+		} else if ((ia6->ia6_flags & IN6_IFF_TENTATIVE) != 0) {
+			/*
+			 * Schedule DAD for a tentative address.  This happens
+			 * if the interface was down or not running
+			 * when the address was configured.
+			 */
+			int delay;
+
+			delay = arc4random() %
+			    (MAX_RTR_SOLICITATION_DELAY * hz);
+			nd6_dad_start((struct ifaddr *)ia6, delay);
 		} else {
 			/*
+			 * Check status of the interface.  If it is down,
+			 * mark the address as tentative for future DAD.
+			 */
+			if ((ia6->ia_ifp->if_flags & IFF_UP) == 0 ||
+			    (ia6->ia_ifp->if_drv_flags & IFF_DRV_RUNNING)
+				== 0 ||
+			    (ND_IFINFO(ia6->ia_ifp)->flags &
+				ND6_IFF_IFDISABLED) != 0) {
+				ia6->ia6_flags &= ~IN6_IFF_DUPLICATED;
+				ia6->ia6_flags |= IN6_IFF_TENTATIVE;
+			}
+			/*
 			 * A new RA might have made a deprecated address
 			 * preferred.
 			 */
@@ -671,7 +739,7 @@
 		 * prefix is not necessary.
 		 */
 		if (pr->ndpr_vltime != ND6_INFINITE_LIFETIME &&
-		    time_second - pr->ndpr_lastupdate > pr->ndpr_vltime) {
+		    time_uptime - pr->ndpr_lastupdate > pr->ndpr_vltime) {
 
 			/*
 			 * address expiration and prefix expiration are
@@ -680,7 +748,6 @@
 			prelist_remove(pr);
 		}
 	}
-	splx(s);
 	CURVNET_RESTORE();
 }
 
@@ -732,11 +799,10 @@
 		 * address with the prefix.
 		 */
 		if (!IFA6_IS_DEPRECATED(it6))
-		    public_ifa6 = it6;
-
-		if (public_ifa6 != NULL)
-			ifa_ref(&public_ifa6->ia_ifa);
+			public_ifa6 = it6;
 	}
+	if (public_ifa6 != NULL)
+		ifa_ref(&public_ifa6->ia_ifa);
 	IF_ADDR_RUNLOCK(ifp);
 
 	if (public_ifa6 != NULL) {
@@ -762,9 +828,12 @@
 void
 nd6_purge(struct ifnet *ifp)
 {
+	struct nd_drhead drq;
 	struct nd_defrouter *dr, *ndr;
 	struct nd_prefix *pr, *npr;
 
+	TAILQ_INIT(&drq);
+
 	/*
 	 * Nuke default router list entries toward ifp.
 	 * We defer removal of default router list entries that is installed
@@ -771,22 +840,27 @@
 	 * in the routing table, in order to keep additional side effects as
 	 * small as possible.
 	 */
+	ND6_WLOCK();
 	TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) {
 		if (dr->installed)
 			continue;
-
 		if (dr->ifp == ifp)
-			defrtrlist_del(dr);
+			defrouter_unlink(dr, &drq);
 	}
 
 	TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) {
 		if (!dr->installed)
 			continue;
-
 		if (dr->ifp == ifp)
-			defrtrlist_del(dr);
+			defrouter_unlink(dr, &drq);
 	}
+	ND6_WUNLOCK();
 
+	while ((dr = TAILQ_FIRST(&drq)) != NULL) {
+		TAILQ_REMOVE(&drq, dr, dr_entry);
+		defrouter_del(dr);
+	}
+
 	/* Nuke prefix list entries toward ifp */
 	LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) {
 		if (pr->ndpr_ifp == ifp) {
@@ -995,14 +1069,28 @@
  * that the change is safe.
  */
 static struct llentry *
-nd6_free(struct llentry *ln, int gc)
+nd6_free(struct llentry **lnp, int gc)
 {
-        struct llentry *next;
+	struct ifnet *ifp;
+	struct llentry *ln, *next;
 	struct nd_defrouter *dr;
-	struct ifnet *ifp;
 
+	ln = *lnp;
+	*lnp = NULL;
+
 	LLE_WLOCK_ASSERT(ln);
+	ND6_RLOCK_ASSERT();
 
+	ifp = ln->lle_tbl->llt_ifp;
+	if ((ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) != 0)
+		dr = defrouter_lookup_locked(&L3_ADDR_SIN6(ln)->sin6_addr, ifp);
+	else
+		dr = NULL;
+	ND6_RUNLOCK();
+
+	if ((ln->la_flags & LLE_DELETED) == 0)
+		EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED);
+
 	/*
 	 * we used to have pfctlinput(PRC_HOSTDEAD) here.
 	 * even though it is not harmful, it was not really necessary.
@@ -1011,11 +1099,7 @@
 	/* cancel timer */
 	nd6_llinfo_settimer_locked(ln, -1);
 
-	ifp = ln->lle_tbl->llt_ifp;
-
 	if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
-		dr = defrouter_lookup(&L3_ADDR_SIN6(ln)->sin6_addr, ifp);
-
 		if (dr != NULL && dr->expire &&
 		    ln->ln_state == ND6_LLINFO_STALE && gc) {
 			/*
@@ -1030,9 +1114,9 @@
 			 * XXX: the check for ln_state would be redundant,
 			 *      but we intentionally keep it just in case.
 			 */
-			if (dr->expire > time_second)
+			if (dr->expire > time_uptime)
 				nd6_llinfo_settimer_locked(ln,
-				    (dr->expire - time_second) * hz);
+				    (dr->expire - time_uptime) * hz);
 			else
 				nd6_llinfo_settimer_locked(ln,
 				    (long)V_nd6_gctimer * hz);
@@ -1040,6 +1124,7 @@
 			next = LIST_NEXT(ln, lle_next);
 			LLE_REMREF(ln);
 			LLE_WUNLOCK(ln);
+			defrouter_rele(dr);
 			return (next);
 		}
 
@@ -1122,6 +1207,8 @@
 
 	IF_AFDATA_UNLOCK(ifp);
 
+	if (dr != NULL)
+		defrouter_rele(dr);
 	return (next);
 }
 
@@ -1140,9 +1227,9 @@
 		return;
 
 	ifp = rt->rt_ifp;
-	IF_AFDATA_LOCK(ifp);
+	IF_AFDATA_RLOCK(ifp);
 	ln = nd6_lookup(dst6, ND6_EXCLUSIVE, NULL);
-	IF_AFDATA_UNLOCK(ifp);
+	IF_AFDATA_RUNLOCK(ifp);
 	if (ln == NULL)
 		return;
 
@@ -1200,12 +1287,13 @@
 		/*
 		 * check for default route
 		 */
-		if (IN6_ARE_ADDR_EQUAL(&in6addr_any, 
-				       &SIN6(rt_key(rt))->sin6_addr)) {
-
+		if (IN6_ARE_ADDR_EQUAL(&in6addr_any,
+		    &SIN6(rt_key(rt))->sin6_addr)) {
 			dr = defrouter_lookup(&gateway->sin6_addr, ifp);
-			if (dr != NULL)
+			if (dr != NULL) {
 				dr->installed = 0;
+				defrouter_rele(dr);
+			}
 		}
 		break;
 	}
@@ -1215,100 +1303,14 @@
 int
 nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
 {
-	struct in6_drlist *drl = (struct in6_drlist *)data;
-	struct in6_oprlist *oprl = (struct in6_oprlist *)data;
 	struct in6_ndireq *ndi = (struct in6_ndireq *)data;
 	struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data;
 	struct in6_ndifreq *ndif = (struct in6_ndifreq *)data;
-	struct nd_defrouter *dr;
-	struct nd_prefix *pr;
-	int i = 0, error = 0;
-	int s;
+	int error = 0;
 
+	if (ifp->if_afdata[AF_INET6] == NULL)
+		return (EPFNOSUPPORT);
 	switch (cmd) {
-	case SIOCGDRLST_IN6:
-		/*
-		 * obsolete API, use sysctl under net.inet6.icmp6
-		 */
-		bzero(drl, sizeof(*drl));
-		s = splnet();
-		TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
-			if (i >= DRLSTSIZ)
-				break;
-			drl->defrouter[i].rtaddr = dr->rtaddr;
-			in6_clearscope(&drl->defrouter[i].rtaddr);
-
-			drl->defrouter[i].flags = dr->flags;
-			drl->defrouter[i].rtlifetime = dr->rtlifetime;
-			drl->defrouter[i].expire = dr->expire;
-			drl->defrouter[i].if_index = dr->ifp->if_index;
-			i++;
-		}
-		splx(s);
-		break;
-	case SIOCGPRLST_IN6:
-		/*
-		 * obsolete API, use sysctl under net.inet6.icmp6
-		 *
-		 * XXX the structure in6_prlist was changed in backward-
-		 * incompatible manner.  in6_oprlist is used for SIOCGPRLST_IN6,
-		 * in6_prlist is used for nd6_sysctl() - fill_prlist().
-		 */
-		/*
-		 * XXX meaning of fields, especialy "raflags", is very
-		 * differnet between RA prefix list and RR/static prefix list.
-		 * how about separating ioctls into two?
-		 */
-		bzero(oprl, sizeof(*oprl));
-		s = splnet();
-		LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
-			struct nd_pfxrouter *pfr;
-			int j;
-
-			if (i >= PRLSTSIZ)
-				break;
-			oprl->prefix[i].prefix = pr->ndpr_prefix.sin6_addr;
-			oprl->prefix[i].raflags = pr->ndpr_raf;
-			oprl->prefix[i].prefixlen = pr->ndpr_plen;
-			oprl->prefix[i].vltime = pr->ndpr_vltime;
-			oprl->prefix[i].pltime = pr->ndpr_pltime;
-			oprl->prefix[i].if_index = pr->ndpr_ifp->if_index;
-			if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME)
-				oprl->prefix[i].expire = 0;
-			else {
-				time_t maxexpire;
-
-				/* XXX: we assume time_t is signed. */
-				maxexpire = (-1) &
-				    ~((time_t)1 <<
-				    ((sizeof(maxexpire) * 8) - 1));
-				if (pr->ndpr_vltime <
-				    maxexpire - pr->ndpr_lastupdate) {
-					oprl->prefix[i].expire =
-					    pr->ndpr_lastupdate +
-					    pr->ndpr_vltime;
-				} else
-					oprl->prefix[i].expire = maxexpire;
-			}
-
-			j = 0;
-			LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) {
-				if (j < DRLSTSIZ) {
-#define RTRADDR oprl->prefix[i].advrtr[j]
-					RTRADDR = pfr->router->rtaddr;
-					in6_clearscope(&RTRADDR);
-#undef RTRADDR
-				}
-				j++;
-			}
-			oprl->prefix[i].advrtrs = j;
-			oprl->prefix[i].origin = PR_ORIG_RA;
-
-			i++;
-		}
-		splx(s);
-
-		break;
 	case OSIOCGIFINFO_IN6:
 #define ND	ndi->ndi
 		/* XXX: old ndp(8) assumes a positive value for linkmtu. */
@@ -1358,17 +1360,7 @@
 		struct ifaddr *ifa;
 		struct in6_ifaddr *ia;
 
-		/*
-		 * Try to clear ifdisabled flag when enabling
-		 * accept_rtadv or auto_linklocal.
-		 */
 		if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) &&
-		    !(ND.flags & ND6_IFF_IFDISABLED) &&
-		    (ND.flags & (ND6_IFF_ACCEPT_RTADV |
-		    ND6_IFF_AUTO_LINKLOCAL)))
-			ND.flags &= ~ND6_IFF_IFDISABLED;
-
-		if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) &&
 		    !(ND.flags & ND6_IFF_IFDISABLED)) {
 			/* ifdisabled 1->0 transision */
 
@@ -1378,8 +1370,6 @@
 			 * do not clear ND6_IFF_IFDISABLED.
 			 * See RFC 4862, Section 5.4.5.
 			 */
-			int duplicated_linklocal = 0;
-
 			IF_ADDR_RLOCK(ifp);
 			TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 				if (ifa->ifa_addr->sa_family != AF_INET6)
@@ -1386,14 +1376,13 @@
 					continue;
 				ia = (struct in6_ifaddr *)ifa;
 				if ((ia->ia6_flags & IN6_IFF_DUPLICATED) &&
-				    IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) {
-					duplicated_linklocal = 1;
+				    IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia)))
 					break;
-				}
 			}
 			IF_ADDR_RUNLOCK(ifp);
 
-			if (duplicated_linklocal) {
+			if (ifa != NULL) {
+				/* LLA is duplicated. */
 				ND.flags |= ND6_IFF_IFDISABLED;
 				log(LOG_ERR, "Cannot enable an interface"
 				    " with a link-local address marked"
@@ -1409,14 +1398,19 @@
 			/* Mark all IPv6 address as tentative. */
 
 			ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED;
-			IF_ADDR_RLOCK(ifp);
-			TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
-				if (ifa->ifa_addr->sa_family != AF_INET6)
-					continue;
-				ia = (struct in6_ifaddr *)ifa;
-				ia->ia6_flags |= IN6_IFF_TENTATIVE;
+			if (V_ip6_dad_count > 0 &&
+			    (ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD) == 0) {
+				IF_ADDR_RLOCK(ifp);
+				TAILQ_FOREACH(ifa, &ifp->if_addrhead,
+				    ifa_link) {
+					if (ifa->ifa_addr->sa_family !=
+					    AF_INET6)
+						continue;
+					ia = (struct in6_ifaddr *)ifa;
+					ia->ia6_flags |= IN6_IFF_TENTATIVE;
+				}
+				IF_ADDR_RUNLOCK(ifp);
 			}
-			IF_ADDR_RUNLOCK(ifp);
 		}
 
 		if (ND.flags & ND6_IFF_AUTO_LINKLOCAL) {
@@ -1434,20 +1428,19 @@
 				 * address is assigned, and IFF_UP, try to
 				 * assign one.
 				 */
-				int haslinklocal = 0;
-			
 				IF_ADDR_RLOCK(ifp);
-				TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
-					if (ifa->ifa_addr->sa_family != AF_INET6)
+				TAILQ_FOREACH(ifa, &ifp->if_addrhead,
+				    ifa_link) {
+					if (ifa->ifa_addr->sa_family !=
+					    AF_INET6)
 						continue;
 					ia = (struct in6_ifaddr *)ifa;
-					if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) {
-						haslinklocal = 1;
+					if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia)))
 						break;
-					}
 				}
 				IF_ADDR_RUNLOCK(ifp);
-				if (!haslinklocal)
+				if (ifa != NULL)
+					/* No LLA is configured. */
 					in6_ifattach(ifp, NULL);
 			}
 		}
@@ -1465,7 +1458,6 @@
 		/* flush all the prefix advertised by routers */
 		struct nd_prefix *pr, *next;
 
-		s = splnet();
 		LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, next) {
 			struct in6_ifaddr *ia, *ia_next;
 
@@ -1484,21 +1476,28 @@
 			}
 			prelist_remove(pr);
 		}
-		splx(s);
 		break;
 	}
 	case SIOCSRTRFLUSH_IN6:
 	{
 		/* flush all the default routers */
-		struct nd_defrouter *dr, *next;
+		struct nd_drhead drq;
+		struct nd_defrouter *dr;
 
-		s = splnet();
+		TAILQ_INIT(&drq);
+
 		defrouter_reset();
-		TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, next) {
-			defrtrlist_del(dr);
+
+		ND6_WLOCK();
+		while ((dr = TAILQ_FIRST(&V_nd_defrouter)) != NULL)
+			defrouter_unlink(dr, &drq);
+		ND6_WUNLOCK();
+		while ((dr = TAILQ_FIRST(&drq)) != NULL) {
+			TAILQ_REMOVE(&drq, dr, dr_entry);
+			defrouter_del(dr);
 		}
+
 		defrouter_select();
-		splx(s);
 		break;
 	}
 	case SIOCGNBRINFO_IN6:
@@ -1520,7 +1519,11 @@
 		nbi->state = ln->ln_state;
 		nbi->asked = ln->la_asked;
 		nbi->isrouter = ln->ln_router;
-		nbi->expire = ln->la_expire;
+		if (ln->la_expire == 0)
+			nbi->expire = 0;
+		else
+			nbi->expire = ln->la_expire +
+			    (time_second - time_uptime);
 		LLE_RUNLOCK(ln);
 		break;
 	}
@@ -1579,16 +1582,16 @@
 	 * description on it in NS section (RFC 2461 7.2.3).
 	 */
 	flags = lladdr ? ND6_EXCLUSIVE : 0;
-	IF_AFDATA_LOCK(ifp);
+	IF_AFDATA_RLOCK(ifp);
 	ln = nd6_lookup(from, flags, ifp);
-
+	IF_AFDATA_RUNLOCK(ifp);
 	if (ln == NULL) {
 		flags |= ND6_EXCLUSIVE;
+		IF_AFDATA_LOCK(ifp);
 		ln = nd6_lookup(from, flags | ND6_CREATE, ifp);
 		IF_AFDATA_UNLOCK(ifp);
 		is_newentry = 1;
 	} else {
-		IF_AFDATA_UNLOCK(ifp);		
 		/* do nothing if static ndp is set */
 		if (ln->la_flags & LLE_STATIC) {
 			static_route = 1;
@@ -1624,6 +1627,7 @@
 		 */
 		bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen);
 		ln->la_flags |= LLE_VALID;
+		EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
 	}
 
 	if (!is_newentry) {
@@ -1648,42 +1652,8 @@
 		ln->ln_state = newstate;
 
 		if (ln->ln_state == ND6_LLINFO_STALE) {
-			/*
-			 * XXX: since nd6_output() below will cause
-			 * state tansition to DELAY and reset the timer,
-			 * we must set the timer now, although it is actually
-			 * meaningless.
-			 */
-			nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
-
-			if (ln->la_hold) {
-				struct mbuf *m_hold, *m_hold_next;
-
-				/*
-				 * reset the la_hold in advance, to explicitly
-				 * prevent a la_hold lookup in nd6_output()
-				 * (wouldn't happen, though...)
-				 */
-				for (m_hold = ln->la_hold, ln->la_hold = NULL;
-				    m_hold; m_hold = m_hold_next) {
-					m_hold_next = m_hold->m_nextpkt;
-					m_hold->m_nextpkt = NULL;
-
-					/*
-					 * we assume ifp is not a p2p here, so
-					 * just set the 2nd argument as the
-					 * 1st one.
-					 */
-					nd6_output_lle(ifp, ifp, m_hold, L3_ADDR_SIN6(ln), NULL, ln, &chain);
-				}
-				/*
-				 * If we have mbufs in the chain we need to do
-				 * deferred transmit. Copy the address from the
-				 * llentry before dropping the lock down below.
-				 */
-				if (chain != NULL)
-					memcpy(&sin6, L3_ADDR_SIN6(ln), sizeof(sin6));
-			}
+			if (ln->la_hold != NULL)
+				nd6_grab_holdchain(ln, &chain, &sin6);
 		} else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
 			/* probe right away */
 			nd6_llinfo_settimer_locked((void *)ln, 0);
@@ -1766,8 +1736,8 @@
 		if (static_route)
 			ln = NULL;
 	}
-	if (chain)
-		nd6_output_flush(ifp, ifp, chain, &sin6, NULL);
+	if (chain != NULL)
+		nd6_flush_holdchain(ifp, ifp, chain, &sin6);
 	
 	/*
 	 * When the link-layer address of a router changes, select the
@@ -1816,6 +1786,8 @@
 	    nd6_slowtimo, curvnet);
 	IFNET_RLOCK_NOSLEEP();
 	TAILQ_FOREACH(ifp, &V_ifnet, if_list) {
+		if (ifp->if_afdata[AF_INET6] == NULL)
+			continue;
 		nd6if = ND_IFINFO(ifp);
 		if (nd6if->basereachable && /* already initialized */
 		    (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
@@ -1833,46 +1805,154 @@
 	CURVNET_RESTORE();
 }
 
+void
+nd6_grab_holdchain(struct llentry *ln, struct mbuf **chain,
+    struct sockaddr_in6 *sin6)
+{
+
+	LLE_WLOCK_ASSERT(ln);
+
+	*chain = ln->la_hold;
+	ln->la_hold = NULL;
+	memcpy(sin6, L3_ADDR_SIN6(ln), sizeof(*sin6));
+
+	if (ln->ln_state == ND6_LLINFO_STALE) {
+
+		/*
+		 * The first time we send a packet to a
+		 * neighbor whose entry is STALE, we have
+		 * to change the state to DELAY and a sets
+		 * a timer to expire in DELAY_FIRST_PROBE_TIME
+		 * seconds to ensure do neighbor unreachability
+		 * detection on expiration.
+		 * (RFC 2461 7.3.3)
+		 */
+		ln->la_asked = 0;
+		ln->ln_state = ND6_LLINFO_DELAY;
+		nd6_llinfo_settimer_locked(ln, (long)V_nd6_delay * hz);
+	}
+}
+
+static int
+nd6_output_ifp(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m,
+    struct sockaddr_in6 *dst)
+{
+	int error;
+	int ip6len;
+	struct ip6_hdr *ip6;
+	struct m_tag *mtag;
+
+#ifdef MAC
+	mac_netinet6_nd6_send(ifp, m);
+#endif
+
+	/*
+	 * If called from nd6_ns_output() (NS), nd6_na_output() (NA),
+	 * icmp6_redirect_output() (REDIRECT) or from rip6_output() (RS, RA
+	 * as handled by rtsol and rtadvd), mbufs will be tagged for SeND
+	 * to be diverted to user space.  When re-injected into the kernel,
+	 * send_output() will directly dispatch them to the outgoing interface.
+	 */
+	if (send_sendso_input_hook != NULL) {
+		mtag = m_tag_find(m, PACKET_TAG_ND_OUTGOING, NULL);
+		if (mtag != NULL) {
+			ip6 = mtod(m, struct ip6_hdr *);
+			ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen);
+			/* Use the SEND socket */
+			error = send_sendso_input_hook(m, ifp, SND_OUT,
+			    ip6len);
+			/* -1 == no app on SEND socket */
+			if (error == 0 || error != -1)
+			    return (error);
+		}
+	}
+
+	m_clrprotoflags(m);	/* Avoid confusing lower layers. */
+	IP_PROBE(send, NULL, NULL, mtod(m, struct ip6_hdr *), ifp, NULL,
+	    mtod(m, struct ip6_hdr *));
+
+	if ((ifp->if_flags & IFF_LOOPBACK) == 0)
+		origifp = ifp;
+
+	error = (*ifp->if_output)(origifp, m, (struct sockaddr *)dst, NULL);
+	return (error);
+}
+
+/*
+ * IPv6 packet output - light version.
+ * Checks if destination LLE exists and is in proper state
+ * (e.g no modification required). If not true, fall back to
+ * "heavy" version.
+ */
 int
-nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
+nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m,
     struct sockaddr_in6 *dst, struct rtentry *rt0)
 {
+	struct llentry *ln = NULL;
 
-	return (nd6_output_lle(ifp, origifp, m0, dst, rt0, NULL, NULL));
+	/* discard the packet if IPv6 operation is disabled on the interface */
+	if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) {
+		m_freem(m);
+		return (ENETDOWN); /* better error? */
+	}
+
+	if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr))
+		goto sendpkt;
+
+	if (nd6_need_cache(ifp) == 0)
+		goto sendpkt;
+
+	IF_AFDATA_RLOCK(ifp);
+	ln = nd6_lookup(&dst->sin6_addr, 0, ifp);
+	IF_AFDATA_RUNLOCK(ifp);
+
+	/*
+	 * Perform fast path for the following cases:
+	 * 1) lle state is REACHABLE
+	 * 2) lle state is DELAY (NS message sentNS message sent)
+	 *
+	 * Every other case involves lle modification, so we handle
+	 * them separately.
+	 */
+	if (ln == NULL || (ln->ln_state != ND6_LLINFO_REACHABLE &&
+	    ln->ln_state != ND6_LLINFO_DELAY)) {
+		/* Fall back to slow processing path */
+		if (ln != NULL)
+			LLE_RUNLOCK(ln);
+		return (nd6_output_lle(ifp, origifp, m, dst));
+	}
+
+sendpkt:
+	if (ln != NULL)
+		LLE_RUNLOCK(ln);
+
+	return (nd6_output_ifp(ifp, origifp, m, dst));
 }
 
 
 /*
- * Note that I'm not enforcing any global serialization
- * lle state or asked changes here as the logic is too
- * complicated to avoid having to always acquire an exclusive
- * lock
- * KMM
+ * Output IPv6 packet - heavy version.
+ * Function assume that either
+ * 1) destination LLE does not exist, is invalid or stale, so
+ *   ND6_EXCLUSIVE lock needs to be acquired
+ * 2) destination lle is provided (with ND6_EXCLUSIVE lock),
+ *   in that case packets are queued in &chain.
  *
  */
-#define senderr(e) { error = (e); goto bad;}
-
-int
-nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
-    struct sockaddr_in6 *dst, struct rtentry *rt0, struct llentry *lle,
-	struct mbuf **chain)
+static int
+nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m,
+    struct sockaddr_in6 *dst)
 {
-	struct mbuf *m = m0;
-	struct m_tag *mtag;
-	struct llentry *ln = lle;
-	struct ip6_hdr *ip6;
-	int error = 0;
+	struct llentry *lle = NULL;
 	int flags = 0;
-	int ip6len;
 
-#ifdef INVARIANTS
-	if (lle != NULL) {
-		
-		LLE_WLOCK_ASSERT(lle);
+	KASSERT(m != NULL, ("NULL mbuf, nothing to send"));
+	/* discard the packet if IPv6 operation is disabled on the interface */
+	if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) {
+		m_freem(m);
+		return (ENETDOWN); /* better error? */
+	}
 
-		KASSERT(chain != NULL, (" lle locked but no mbuf chain pointer passed"));
-	}
-#endif
 	if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr))
 		goto sendpkt;
 
@@ -1880,35 +1960,28 @@
 		goto sendpkt;
 
 	/*
-	 * next hop determination.  This routine is derived from ether_output.
-	 */
-
-	/*
 	 * Address resolution or Neighbor Unreachability Detection
 	 * for the next hop.
 	 * At this point, the destination of the packet must be a unicast
 	 * or an anycast address(i.e. not a multicast).
 	 */
-
-	flags = ((m != NULL) || (lle != NULL)) ? LLE_EXCLUSIVE : 0;
-	if (ln == NULL) {
-	retry:
-		IF_AFDATA_LOCK(ifp);
-		ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)dst);
-		IF_AFDATA_UNLOCK(ifp);
-		if ((ln == NULL) && nd6_is_addr_neighbor(dst, ifp))  {
+	if (lle == NULL) {
+		IF_AFDATA_RLOCK(ifp);
+		lle = nd6_lookup(&dst->sin6_addr, ND6_EXCLUSIVE, ifp);
+		IF_AFDATA_RUNLOCK(ifp);
+		if ((lle == NULL) && nd6_is_addr_neighbor(dst, ifp))  {
 			/*
 			 * Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
 			 * the condition below is not very efficient.  But we believe
 			 * it is tolerable, because this should be a rare case.
 			 */
-			flags = ND6_CREATE | (m ? ND6_EXCLUSIVE : 0);
+			flags = ND6_CREATE | ND6_EXCLUSIVE;
 			IF_AFDATA_LOCK(ifp);
-			ln = nd6_lookup(&dst->sin6_addr, flags, ifp);
+			lle = nd6_lookup(&dst->sin6_addr, flags, ifp);
 			IF_AFDATA_UNLOCK(ifp);
 		}
 	} 
-	if (ln == NULL) {
+	if (lle == NULL) {
 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0 &&
 		    !(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) {
 			char ip6buf[INET6_ADDRSTRLEN];
@@ -1915,21 +1988,20 @@
 			log(LOG_DEBUG,
 			    "nd6_output: can't allocate llinfo for %s "
 			    "(ln=%p)\n",
-			    ip6_sprintf(ip6buf, &dst->sin6_addr), ln);
-			senderr(EIO);	/* XXX: good error? */
+			    ip6_sprintf(ip6buf, &dst->sin6_addr), lle);
+			m_freem(m);
+			return (ENOBUFS);
 		}
 		goto sendpkt;	/* send anyway */
 	}
 
+	LLE_WLOCK_ASSERT(lle);
+
 	/* We don't have to do link-layer address resolution on a p2p link. */
 	if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
-	    ln->ln_state < ND6_LLINFO_REACHABLE) {
-		if ((flags & LLE_EXCLUSIVE) == 0) {
-			flags |= LLE_EXCLUSIVE;
-			goto retry;
-		}
-		ln->ln_state = ND6_LLINFO_STALE;
-		nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
+	    lle->ln_state < ND6_LLINFO_REACHABLE) {
+		lle->ln_state = ND6_LLINFO_STALE;
+		nd6_llinfo_settimer_locked(lle, (long)V_nd6_gctimer * hz);
 	}
 
 	/*
@@ -1939,15 +2011,10 @@
 	 * neighbor unreachability detection on expiration.
 	 * (RFC 2461 7.3.3)
 	 */
-	if (ln->ln_state == ND6_LLINFO_STALE) {
-		if ((flags & LLE_EXCLUSIVE) == 0) {
-			flags |= LLE_EXCLUSIVE;
-			LLE_RUNLOCK(ln);
-			goto retry;
-		}
-		ln->la_asked = 0;
-		ln->ln_state = ND6_LLINFO_DELAY;
-		nd6_llinfo_settimer_locked(ln, (long)V_nd6_delay * hz);
+	if (lle->ln_state == ND6_LLINFO_STALE) {
+		lle->la_asked = 0;
+		lle->ln_state = ND6_LLINFO_DELAY;
+		nd6_llinfo_settimer_locked(lle, (long)V_nd6_delay * hz);
 	}
 
 	/*
@@ -1955,7 +2022,7 @@
 	 * (i.e. its link-layer address is already resolved), just
 	 * send the packet.
 	 */
-	if (ln->ln_state > ND6_LLINFO_INCOMPLETE)
+	if (lle->ln_state > ND6_LLINFO_INCOMPLETE)
 		goto sendpkt;
 
 	/*
@@ -1965,23 +2032,15 @@
 	 * does not exceed nd6_maxqueuelen.  When it exceeds nd6_maxqueuelen,
 	 * the oldest packet in the queue will be removed.
 	 */
-	if (ln->ln_state == ND6_LLINFO_NOSTATE)
-		ln->ln_state = ND6_LLINFO_INCOMPLETE;
+	if (lle->ln_state == ND6_LLINFO_NOSTATE)
+		lle->ln_state = ND6_LLINFO_INCOMPLETE;
 
-	if ((flags & LLE_EXCLUSIVE) == 0) {
-		flags |= LLE_EXCLUSIVE;
-		LLE_RUNLOCK(ln);
-		goto retry;
-	}
-
-	LLE_WLOCK_ASSERT(ln);
-
-	if (ln->la_hold) {
+	if (lle->la_hold != NULL) {
 		struct mbuf *m_hold;
 		int i;
 		
 		i = 0;
-		for (m_hold = ln->la_hold; m_hold; m_hold = m_hold->m_nextpkt) {
+		for (m_hold = lle->la_hold; m_hold; m_hold = m_hold->m_nextpkt){
 			i++;
 			if (m_hold->m_nextpkt == NULL) {
 				m_hold->m_nextpkt = m;
@@ -1989,13 +2048,13 @@
 			}
 		}
 		while (i >= V_nd6_maxqueuelen) {
-			m_hold = ln->la_hold;
-			ln->la_hold = ln->la_hold->m_nextpkt;
+			m_hold = lle->la_hold;
+			lle->la_hold = lle->la_hold->m_nextpkt;
 			m_freem(m_hold);
 			i--;
 		}
 	} else {
-		ln->la_hold = m;
+		lle->la_hold = m;
 	}
 
 	/*
@@ -2002,121 +2061,31 @@
 	 * If there has been no NS for the neighbor after entering the
 	 * INCOMPLETE state, send the first solicitation.
 	 */
-	if (!ND6_LLINFO_PERMANENT(ln) && ln->la_asked == 0) {
-		ln->la_asked++;
+	if (!ND6_LLINFO_PERMANENT(lle) && lle->la_asked == 0) {
+		lle->la_asked++;
 		
-		nd6_llinfo_settimer_locked(ln,
+		nd6_llinfo_settimer_locked(lle,
 		    (long)ND_IFINFO(ifp)->retrans * hz / 1000);
-		LLE_WUNLOCK(ln);
-		nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0);
-		if (lle != NULL && ln == lle)
-			LLE_WLOCK(lle);
-
-	} else if (lle == NULL || ln != lle) {
-		/*
-		 * We did the lookup (no lle arg) so we
-		 * need to do the unlock here.
-		 */
-		LLE_WUNLOCK(ln);
+		LLE_WUNLOCK(lle);
+		nd6_ns_output(ifp, NULL, &dst->sin6_addr, lle, NULL);
+	} else {
+		/* We did the lookup so we need to do the unlock here. */
+		LLE_WUNLOCK(lle);
 	}
 
 	return (0);
 
   sendpkt:
-	/* discard the packet if IPv6 operation is disabled on the interface */
-	if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) {
-		error = ENETDOWN; /* better error? */
-		goto bad;
-	}
-	/*
-	 * ln is valid and the caller did not pass in 
-	 * an llentry
-	 */
-	if ((ln != NULL) && (lle == NULL)) {
-		if (flags & LLE_EXCLUSIVE)
-			LLE_WUNLOCK(ln);
-		else
-			LLE_RUNLOCK(ln);
-	}
+	if (lle != NULL)
+		LLE_WUNLOCK(lle);
 
-#ifdef MAC
-	mac_netinet6_nd6_send(ifp, m);
-#endif
-
-	/*
-	 * If called from nd6_ns_output() (NS), nd6_na_output() (NA),
-	 * icmp6_redirect_output() (REDIRECT) or from rip6_output() (RS, RA
-	 * as handled by rtsol and rtadvd), mbufs will be tagged for SeND
-	 * to be diverted to user space.  When re-injected into the kernel,
-	 * send_output() will directly dispatch them to the outgoing interface.
-	 */
-	if (send_sendso_input_hook != NULL) {
-		mtag = m_tag_find(m, PACKET_TAG_ND_OUTGOING, NULL);
-		if (mtag != NULL) {
-			ip6 = mtod(m, struct ip6_hdr *);
-			ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen);
-			/* Use the SEND socket */
-			error = send_sendso_input_hook(m, ifp, SND_OUT,
-			    ip6len);
-			/* -1 == no app on SEND socket */
-			if (error == 0 || error != -1)
-			    return (error);
-		}
-	}
-
-	/*
-	 * We were passed in a pointer to an lle with the lock held 
-	 * this means that we can't call if_output as we will
-	 * recurse on the lle lock - so what we do is we create
-	 * a list of mbufs to send and transmit them in the caller
-	 * after the lock is dropped
-	 */
-	if (lle != NULL) {
-		if (*chain == NULL)
-			*chain = m;
-		else {
-			struct mbuf *mb;
-
-			/*
-			 * append mbuf to end of deferred chain
-			 */
-			mb = *chain;
-			while (mb->m_nextpkt != NULL)
-				mb = mb->m_nextpkt;
-			mb->m_nextpkt = m;
-		}
-		return (error);
-	}
-	/* Reset layer specific mbuf flags to avoid confusing lower layers. */
-	m->m_flags &= ~(M_PROTOFLAGS);  
-	if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
-		return ((*ifp->if_output)(origifp, m, (struct sockaddr *)dst,
-		    NULL));
-	}
-	error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, NULL);
-	return (error);
-
-  bad:
-	/*
-	 * ln is valid and the caller did not pass in 
-	 * an llentry
-	 */
-	if ((ln != NULL) && (lle == NULL)) {
-		if (flags & LLE_EXCLUSIVE)
-			LLE_WUNLOCK(ln);
-		else
-			LLE_RUNLOCK(ln);
-	}
-	if (m)
-		m_freem(m);
-	return (error);
+	return (nd6_output_ifp(ifp, origifp, m, dst));
 }
-#undef senderr
 
 
 int
-nd6_output_flush(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain,
-    struct sockaddr_in6 *dst, struct route *ro)
+nd6_flush_holdchain(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain,
+    struct sockaddr_in6 *dst)
 {
 	struct mbuf *m, *m_head;
 	struct ifnet *outifp;
@@ -2131,7 +2100,7 @@
 	while (m_head) {
 		m = m_head;
 		m_head = m_head->m_nextpkt;
-		error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, ro);			       
+		error = nd6_output_ifp(ifp, origifp, m, dst);
 	}
 
 	/*
@@ -2165,13 +2134,7 @@
 #ifdef IFT_IEEE80211
 	case IFT_IEEE80211:
 #endif
-#ifdef IFT_CARP
-	case IFT_CARP:
-#endif
 	case IFT_INFINIBAND:
-	case IFT_GIF:		/* XXX need more cases? */
-	case IFT_PPP:
-	case IFT_TUNNEL:
 	case IFT_BRIDGE:
 	case IFT_PROPVIRTUAL:
 		return (1);
@@ -2186,13 +2149,13 @@
  */
 int
 nd6_storelladdr(struct ifnet *ifp, struct mbuf *m,
-    struct sockaddr *dst, u_char *desten, struct llentry **lle)
+    const struct sockaddr *dst, u_char *desten, struct llentry **lle)
 {
 	struct llentry *ln;
 
 	*lle = NULL;
 	IF_AFDATA_UNLOCK_ASSERT(ifp);
-	if (m->m_flags & M_MCAST) {
+	if (m != NULL && m->m_flags & M_MCAST) {
 		int i;
 
 		switch (ifp->if_type) {
@@ -2261,7 +2224,6 @@
 	}
 
 	ln->la_hold = NULL;
-	return;
 }
 
 static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS);
@@ -2275,6 +2237,8 @@
 	CTLFLAG_RD, nd6_sysctl_prlist, "");
 SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXQLEN, nd6_maxqueuelen,
 	CTLFLAG_RW, &VNET_NAME(nd6_maxqueuelen), 1, "");
+SYSCTL_VNET_INT(_net_inet6_icmp6, OID_AUTO, nd6_gctimer,
+	CTLFLAG_RW, &VNET_NAME(nd6_gctimer), (60 * 60 * 24), "");
 
 static int
 nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS)
@@ -2283,30 +2247,33 @@
 	struct nd_defrouter *dr;
 	int error;
 
-	if (req->newptr)
+	if (req->newptr != NULL)
 		return (EPERM);
 
+	error = sysctl_wire_old_buffer(req, 0);
+	if (error != 0)
+		return (error);
+
 	bzero(&d, sizeof(d));
 	d.rtaddr.sin6_family = AF_INET6;
 	d.rtaddr.sin6_len = sizeof(d.rtaddr);
 
-	/*
-	 * XXX locking
-	 */
+	ND6_RLOCK();
 	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
 		d.rtaddr.sin6_addr = dr->rtaddr;
 		error = sa6_recoverscope(&d.rtaddr);
 		if (error != 0)
-			return (error);
-		d.flags = dr->flags;
+			break;
+		d.flags = dr->raflags;
 		d.rtlifetime = dr->rtlifetime;
-		d.expire = dr->expire;
+		d.expire = dr->expire + (time_second - time_uptime);
 		d.if_index = dr->ifp->if_index;
 		error = SYSCTL_OUT(req, &d, sizeof(d));
 		if (error != 0)
-			return (error);
+			break;
 	}
-	return (0);
+	ND6_RUNLOCK();
+	return (error);
 }
 
 static int
@@ -2352,7 +2319,8 @@
 			    ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1));
 			if (pr->ndpr_vltime < maxexpire - pr->ndpr_lastupdate)
 				p.expire = pr->ndpr_lastupdate +
-				    pr->ndpr_vltime;
+				    pr->ndpr_vltime +
+				    (time_second - time_uptime);
 			else
 				p.expire = maxexpire;
 		}

Modified: trunk/sys/netinet6/nd6.h
===================================================================
--- trunk/sys/netinet6/nd6.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/nd6.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	$KAME: nd6.h,v 1.76 2001/12/18 02:10:31 itojun Exp $
- * $FreeBSD: stable/9/sys/netinet6/nd6.h 245555 2013-01-17 16:39:21Z ume $
+ * $FreeBSD: stable/10/sys/netinet6/nd6.h 303458 2016-07-28 20:08:01Z sbruno $
  */
 
 #ifndef _NETINET6_ND6_H_
@@ -80,7 +80,7 @@
 
 #define ND6_IFF_PERFORMNUD	0x1
 #define ND6_IFF_ACCEPT_RTADV	0x2
-#define ND6_IFF_PREFER_SOURCE	0x4 /* XXX: not related to ND. */
+#define ND6_IFF_PREFER_SOURCE	0x4 /* Not used in FreeBSD. */
 #define ND6_IFF_IFDISABLED	0x8 /* IPv6 operation is disabled due to
 				     * DAD failure.  (XXX: not ND-specific)
 				     */
@@ -88,6 +88,7 @@
 #define ND6_IFF_AUTO_LINKLOCAL	0x20
 #define	ND6_IFF_NO_RADR		0x40
 #define ND6_IFF_NO_PREFER_IFACE	0x80 /* XXX: not related to ND. */
+#define ND6_IFF_NO_DAD		0x100
 
 #define	ND6_CREATE		LLE_CREATE
 #define	ND6_EXCLUSIVE		LLE_EXCLUSIVE
@@ -235,14 +236,15 @@
 		((MAX_RANDOM_FACTOR - MIN_RANDOM_FACTOR) * (x >> 10)))) /1000)
 
 TAILQ_HEAD(nd_drhead, nd_defrouter);
-struct	nd_defrouter {
+struct nd_defrouter {
 	TAILQ_ENTRY(nd_defrouter) dr_entry;
-	struct	in6_addr rtaddr;
-	u_char	flags;		/* flags on RA message */
+	struct in6_addr rtaddr;
+	u_char	raflags;	/* flags on RA message */
 	u_short	rtlifetime;
 	u_long	expire;
-	struct  ifnet *ifp;
+	struct ifnet *ifp;
 	int	installed;	/* is installed into kernel routing table */
+	u_int	refcnt;
 };
 
 struct nd_prefixctl {
@@ -342,6 +344,19 @@
 #define	V_nd6_debug			VNET(nd6_debug)
 #define	V_nd6_onlink_ns_rfc4861		VNET(nd6_onlink_ns_rfc4861)
 
+/* Lock for the prefix and default router lists. */
+VNET_DECLARE(struct rwlock, nd6_lock);
+#define	V_nd6_lock			VNET(nd6_lock)
+
+#define	ND6_RLOCK()			rw_rlock(&V_nd6_lock)
+#define	ND6_RUNLOCK()			rw_runlock(&V_nd6_lock)
+#define	ND6_WLOCK()			rw_wlock(&V_nd6_lock)
+#define	ND6_WUNLOCK()			rw_wunlock(&V_nd6_lock)
+#define	ND6_WLOCK_ASSERT()		rw_assert(&V_nd6_lock, RA_WLOCKED)
+#define	ND6_RLOCK_ASSERT()		rw_assert(&V_nd6_lock, RA_RLOCKED)
+#define	ND6_LOCK_ASSERT()		rw_assert(&V_nd6_lock, RA_LOCKED)
+#define	ND6_UNLOCK_ASSERT()		rw_assert(&V_nd6_lock, RA_UNLOCKED)
+
 #define nd6log(x)	do { if (V_nd6_debug) log x; } while (/*CONSTCOND*/ 0)
 
 VNET_DECLARE(struct callout, nd6_timer_ch);
@@ -360,7 +375,7 @@
 #define	V_ip6_temp_regen_advance	VNET(ip6_temp_regen_advance)
 
 union nd_opts {
-	struct nd_opt_hdr *nd_opt_array[8];	/* max = target address list */
+	struct nd_opt_hdr *nd_opt_array[16];	/* max = ND_OPT_NONCE */
 	struct {
 		struct nd_opt_hdr *zero;
 		struct nd_opt_hdr *src_lladdr;
@@ -368,6 +383,16 @@
 		struct nd_opt_prefix_info *pi_beg; /* multiple opts, start */
 		struct nd_opt_rd_hdr *rh;
 		struct nd_opt_mtu *mtu;
+		struct nd_opt_hdr *__res6;
+		struct nd_opt_hdr *__res7;
+		struct nd_opt_hdr *__res8;
+		struct nd_opt_hdr *__res9;
+		struct nd_opt_hdr *__res10;
+		struct nd_opt_hdr *__res11;
+		struct nd_opt_hdr *__res12;
+		struct nd_opt_hdr *__res13;
+		struct nd_opt_nonce *nonce;
+		struct nd_opt_hdr *__res15;
 		struct nd_opt_hdr *search;	/* multiple opts */
 		struct nd_opt_hdr *last;	/* multiple opts */
 		int done;
@@ -380,6 +405,7 @@
 #define nd_opts_pi_end		nd_opt_each.pi_end
 #define nd_opts_rh		nd_opt_each.rh
 #define nd_opts_mtu		nd_opt_each.mtu
+#define nd_opts_nonce		nd_opt_each.nonce
 #define nd_opts_search		nd_opt_each.search
 #define nd_opts_last		nd_opt_each.last
 #define nd_opts_done		nd_opt_each.done
@@ -411,14 +437,13 @@
 	char *, int, int, int);
 int nd6_output(struct ifnet *, struct ifnet *, struct mbuf *,
 	struct sockaddr_in6 *, struct rtentry *);
-int nd6_output_lle(struct ifnet *, struct ifnet *, struct mbuf *,
-	struct sockaddr_in6 *, struct rtentry *, struct llentry *,
-	struct mbuf **);
-int nd6_output_flush(struct ifnet *, struct ifnet *, struct mbuf *,
-	struct sockaddr_in6 *, struct route *);
+void nd6_grab_holdchain(struct llentry *, struct mbuf **,
+    struct sockaddr_in6 *);
+int nd6_flush_holdchain(struct ifnet *, struct ifnet *, struct mbuf *,
+    struct sockaddr_in6 *);
 int nd6_need_cache(struct ifnet *);
 int nd6_storelladdr(struct ifnet *, struct mbuf *,
-	struct sockaddr *, u_char *, struct llentry **);
+	const struct sockaddr *, u_char *, struct llentry **);
 
 /* nd6_nbr.c */
 void nd6_na_input(struct mbuf *, int, int);
@@ -426,24 +451,28 @@
 	const struct in6_addr *, u_long, int, struct sockaddr *);
 void nd6_ns_input(struct mbuf *, int, int);
 void nd6_ns_output(struct ifnet *, const struct in6_addr *,
-	const struct in6_addr *, struct llentry *, int);
+	const struct in6_addr *, struct llentry *, uint8_t *);
 caddr_t nd6_ifptomac(struct ifnet *);
+void nd6_dad_init(void);
 void nd6_dad_start(struct ifaddr *, int);
 void nd6_dad_stop(struct ifaddr *);
-void nd6_dad_duplicated(struct ifaddr *);
 
 /* nd6_rtr.c */
 void nd6_rs_input(struct mbuf *, int, int);
 void nd6_ra_input(struct mbuf *, int, int);
-void prelist_del(struct nd_prefix *);
 void defrouter_reset(void);
 void defrouter_select(void);
-void defrtrlist_del(struct nd_defrouter *);
+void defrouter_ref(struct nd_defrouter *);
+void defrouter_rele(struct nd_defrouter *);
+bool defrouter_remove(struct in6_addr *, struct ifnet *);
+void defrouter_unlink(struct nd_defrouter *, struct nd_drhead *);
+void defrouter_del(struct nd_defrouter *);
 void prelist_remove(struct nd_prefix *);
 int nd6_prelist_add(struct nd_prefixctl *, struct nd_defrouter *,
 	struct nd_prefix **);
 void pfxlist_onlink_check(void);
 struct nd_defrouter *defrouter_lookup(struct in6_addr *, struct ifnet *);
+struct nd_defrouter *defrouter_lookup_locked(struct in6_addr *, struct ifnet *);
 struct nd_prefix *nd6_prefix_lookup(struct nd_prefixctl *);
 void rt6_flush(struct in6_addr *, struct ifnet *);
 int nd6_setdefaultiface(int);

Modified: trunk/sys/netinet6/nd6_nbr.c
===================================================================
--- trunk/sys/netinet6/nd6_nbr.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/nd6_nbr.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/nd6_nbr.c 240305 2012-09-10 11:38:02Z glebius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/nd6_nbr.c 303458 2016-07-28 20:08:01Z sbruno $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
@@ -41,6 +41,7 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
+#include <sys/libkern.h>
 #include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <sys/mbuf.h>
@@ -49,9 +50,11 @@
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/errno.h>
+#include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/queue.h>
 #include <sys/callout.h>
+#include <sys/refcount.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
@@ -61,6 +64,7 @@
 #ifdef RADIX_MPATH
 #include <net/radix_mpath.h>
 #endif
+#include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
@@ -79,19 +83,32 @@
 #define SDL(s) ((struct sockaddr_dl *)s)
 
 struct dadq;
-static struct dadq *nd6_dad_find(struct ifaddr *);
-static void nd6_dad_starttimer(struct dadq *, int);
+static struct dadq *nd6_dad_find(struct ifaddr *, struct nd_opt_nonce *);
+static void nd6_dad_add(struct dadq *dp);
+static void nd6_dad_del(struct dadq *dp);
+static void nd6_dad_rele(struct dadq *);
+static void nd6_dad_starttimer(struct dadq *, int, int);
 static void nd6_dad_stoptimer(struct dadq *);
 static void nd6_dad_timer(struct dadq *);
-static void nd6_dad_ns_output(struct dadq *, struct ifaddr *);
-static void nd6_dad_ns_input(struct ifaddr *);
+static void nd6_dad_duplicated(struct ifaddr *, struct dadq *);
+static void nd6_dad_ns_output(struct dadq *);
+static void nd6_dad_ns_input(struct ifaddr *, struct nd_opt_nonce *);
 static void nd6_dad_na_input(struct ifaddr *);
 static void nd6_na_output_fib(struct ifnet *, const struct in6_addr *,
     const struct in6_addr *, u_long, int, struct sockaddr *, u_int);
+static void nd6_ns_output_fib(struct ifnet *, const struct in6_addr *,
+    const struct in6_addr *, struct llentry *, uint8_t *, u_int);
 
-VNET_DEFINE(int, dad_ignore_ns) = 0;	/* ignore NS in DAD - specwise incorrect*/
-VNET_DEFINE(int, dad_maxtry) = 15;	/* max # of *tries* to transmit DAD packet */
-#define	V_dad_ignore_ns			VNET(dad_ignore_ns)
+static VNET_DEFINE(int, dad_enhanced) = 1;
+#define	V_dad_enhanced			VNET(dad_enhanced)
+
+SYSCTL_DECL(_net_inet6_ip6);
+SYSCTL_INT(_net_inet6_ip6, OID_AUTO, dad_enhanced, CTLFLAG_VNET | CTLFLAG_RW,
+    &VNET_NAME(dad_enhanced), 0,
+    "Enable Enhanced DAD, which adds a random nonce to NS messages for DAD.");
+
+static VNET_DEFINE(int, dad_maxtry) = 15;	/* max # of *tries* to
+						   transmit DAD packet */
 #define	V_dad_maxtry			VNET(dad_maxtry)
 
 /*
@@ -228,46 +245,33 @@
 	/* (1) and (3) check. */
 	if (ifp->if_carp)
 		ifa = (*carp_iamatch6_p)(ifp, &taddr6);
-	if (ifa == NULL)
+	else
 		ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
 
 	/* (2) check. */
 	if (ifa == NULL) {
-		struct rtentry *rt;
-		struct sockaddr_in6 tsin6;
+		struct route_in6 ro;
 		int need_proxy;
-#ifdef RADIX_MPATH
-		struct route_in6 ro;
-#endif
 
-		bzero(&tsin6, sizeof tsin6);
-		tsin6.sin6_len = sizeof(struct sockaddr_in6);
-		tsin6.sin6_family = AF_INET6;
-		tsin6.sin6_addr = taddr6;
+		bzero(&ro, sizeof(ro));
+		ro.ro_dst.sin6_len = sizeof(struct sockaddr_in6);
+		ro.ro_dst.sin6_family = AF_INET6;
+		ro.ro_dst.sin6_addr = taddr6;
 
 		/* Always use the default FIB. */
 #ifdef RADIX_MPATH
-		bzero(&ro, sizeof(ro));
-		ro.ro_dst = tsin6;
 		rtalloc_mpath_fib((struct route *)&ro, RTF_ANNOUNCE,
 		    RT_DEFAULT_FIB);
-		rt = ro.ro_rt;
 #else
-		rt = in6_rtalloc1((struct sockaddr *)&tsin6, 0, 0,
-		    RT_DEFAULT_FIB);
+		in6_rtalloc(&ro, RT_DEFAULT_FIB);
 #endif
-		need_proxy = (rt && (rt->rt_flags & RTF_ANNOUNCE) != 0 &&
-		    rt->rt_gateway->sa_family == AF_LINK);
-		if (rt != NULL) {
-			/*
-			 * Make a copy while we can be sure that rt_gateway
-			 * is still stable before unlocking to avoid lock
-			 * order problems.  proxydl will only be used if
-			 * proxy will be set in the next block.
-			 */
+		need_proxy = (ro.ro_rt &&
+		    (ro.ro_rt->rt_flags & RTF_ANNOUNCE) != 0 &&
+		    ro.ro_rt->rt_gateway->sa_family == AF_LINK);
+		if (ro.ro_rt != NULL) {
 			if (need_proxy)
-				proxydl = *SDL(rt->rt_gateway);
-			RTFREE_LOCKED(rt);
+				proxydl = *SDL(ro.ro_rt->rt_gateway);
+			RTFREE(ro.ro_rt);
 		}
 		if (need_proxy) {
 			/*
@@ -328,7 +332,7 @@
 		 * silently ignore it.
 		 */
 		if (IN6_IS_ADDR_UNSPECIFIED(&saddr6))
-			nd6_dad_ns_input(ifa);
+			nd6_dad_ns_input(ifa, ndopts.nd_opts_nonce);
 
 		goto freeit;
 	}
@@ -389,12 +393,14 @@
  * Based on RFC 2461
  * Based on RFC 2462 (duplicate address detection)
  *
- *   ln - for source address determination
- *  dad - duplicate address detection
+ *    ln - for source address determination
+ * nonce - If non-NULL, NS is used for duplicate address detection and
+ *         the value (length is ND_OPT_NONCE_LEN) is used as a random nonce.
  */
-void
-nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6, 
-    const struct in6_addr *taddr6, struct llentry *ln, int dad)
+static void
+nd6_ns_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6,
+    const struct in6_addr *taddr6, struct llentry *ln, uint8_t *nonce,
+    u_int fibnum)
 {
 	struct mbuf *m;
 	struct m_tag *mtag;
@@ -412,25 +418,18 @@
 	/* estimate the size of message */
 	maxlen = sizeof(*ip6) + sizeof(*nd_ns);
 	maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7;
-	if (max_linkhdr + maxlen >= MCLBYTES) {
-#ifdef DIAGNOSTIC
-		printf("nd6_ns_output: max_linkhdr + maxlen >= MCLBYTES "
-		    "(%d + %d > %d)\n", max_linkhdr, maxlen, MCLBYTES);
-#endif
-		return;
-	}
+	KASSERT(max_linkhdr + maxlen <= MCLBYTES, (
+	    "%s: max_linkhdr + maxlen > MCLBYTES (%d + %d > %d)",
+	    __func__, max_linkhdr, maxlen, MCLBYTES));
 
-	MGETHDR(m, M_DONTWAIT, MT_DATA);
-	if (m && max_linkhdr + maxlen >= MHLEN) {
-		MCLGET(m, M_DONTWAIT);
-		if ((m->m_flags & M_EXT) == 0) {
-			m_free(m);
-			m = NULL;
-		}
-	}
+
+	if (max_linkhdr + maxlen > MHLEN)
+		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+	else
+		m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return;
-	m->m_pkthdr.rcvif = NULL;
+	M_SETFIB(m, fibnum);
 
 	bzero(&ro, sizeof(ro));
 
@@ -465,7 +464,7 @@
 		if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0)
 			goto bad;
 	}
-	if (!dad) {
+	if (nonce == NULL) {
 		struct ifaddr *ifa;
 
 		/*
@@ -524,9 +523,8 @@
 			    NULL, &ro, NULL, &oifp, &src_in);
 			if (error) {
 				char ip6buf[INET6_ADDRSTRLEN];
-				nd6log((LOG_DEBUG,
-				    "nd6_ns_output: source can't be "
-				    "determined: dst=%s, error=%d\n",
+				nd6log((LOG_DEBUG, "%s: source can't be "
+				    "determined: dst=%s, error=%d\n", __func__,
 				    ip6_sprintf(ip6buf, &dst_sa.sin6_addr),
 				    error));
 				goto bad;
@@ -562,7 +560,7 @@
 	 *	Multicast NS		MUST add one	add the option
 	 *	Unicast NS		SHOULD add one	add the option
 	 */
-	if (!dad && (mac = nd6_ifptomac(ifp))) {
+	if (nonce == NULL && (mac = nd6_ifptomac(ifp))) {
 		int optlen = sizeof(struct nd_opt_hdr) + ifp->if_addrlen;
 		struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_ns + 1);
 		/* 8 byte alignments... */
@@ -576,7 +574,26 @@
 		nd_opt->nd_opt_len = optlen >> 3;
 		bcopy(mac, (caddr_t)(nd_opt + 1), ifp->if_addrlen);
 	}
+	/*
+	 * Add a Nonce option (RFC 3971) to detect looped back NS messages.
+	 * This behavior is documented as Enhanced Duplicate Address
+	 * Detection in RFC 7527.
+	 * net.inet6.ip6.dad_enhanced=0 disables this.
+	 */
+	if (V_dad_enhanced != 0 && nonce != NULL) {
+		int optlen = sizeof(struct nd_opt_hdr) + ND_OPT_NONCE_LEN;
+		struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_ns + 1);
+		/* 8-byte alignment is required. */
+		optlen = (optlen + 7) & ~7;
 
+		m->m_pkthdr.len += optlen;
+		m->m_len += optlen;
+		icmp6len += optlen;
+		bzero((caddr_t)nd_opt, optlen);
+		nd_opt->nd_opt_type = ND_OPT_NONCE;
+		nd_opt->nd_opt_len = optlen >> 3;
+		bcopy(nonce, (caddr_t)(nd_opt + 1), ND_OPT_NONCE_LEN);
+	}
 	ip6->ip6_plen = htons((u_short)icmp6len);
 	nd_ns->nd_ns_cksum = 0;
 	nd_ns->nd_ns_cksum =
@@ -591,7 +608,8 @@
 		m_tag_prepend(m, mtag);
 	}
 
-	ip6_output(m, NULL, &ro, dad ? IPV6_UNSPECSRC : 0, &im6o, NULL, NULL);
+	ip6_output(m, NULL, &ro, (nonce != NULL) ? IPV6_UNSPECSRC : 0,
+	    &im6o, NULL, NULL);
 	icmp6_ifstat_inc(ifp, ifs6_out_msg);
 	icmp6_ifstat_inc(ifp, ifs6_out_neighborsolicit);
 	ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_SOLICIT]);
@@ -606,9 +624,17 @@
 		RTFREE(ro.ro_rt);
 	}
 	m_freem(m);
-	return;
 }
 
+#ifndef BURN_BRIDGES
+void
+nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
+    const struct in6_addr *taddr6, struct llentry *ln, uint8_t *nonce)
+{
+
+	nd6_ns_output_fib(ifp, daddr6, taddr6, ln, nonce, RT_DEFAULT_FIB);
+}
+#endif
 /*
  * Neighbor advertisement input handling.
  *
@@ -638,7 +664,6 @@
 	struct llentry *ln = NULL;
 	union nd_opts ndopts;
 	struct mbuf *chain = NULL;
-	struct m_tag *mtag;
 	struct sockaddr_in6 sin6;
 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 
@@ -665,6 +690,7 @@
 	is_router = ((flags & ND_NA_FLAG_ROUTER) != 0);
 	is_solicited = ((flags & ND_NA_FLAG_SOLICITED) != 0);
 	is_override = ((flags & ND_NA_FLAG_OVERRIDE) != 0);
+	memset(&sin6, 0, sizeof(sin6));
 
 	taddr6 = nd_na->nd_na_target;
 	if (in6_setscope(&taddr6, ifp, NULL))
@@ -697,7 +723,14 @@
 		lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
 	}
 
-	ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
+	/*
+	 * This effectively disables the DAD check on a non-master CARP
+	 * address.
+	 */
+	if (ifp->if_carp)
+		ifa = (*carp_iamatch6_p)(ifp, &taddr6);
+	else
+		ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
 
 	/*
 	 * Target address matches one of my interface address.
@@ -710,8 +743,8 @@
 	 */
 	if (ifa
 	 && (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_TENTATIVE)) {
+		nd6_dad_na_input(ifa);
 		ifa_free(ifa);
-		nd6_dad_na_input(ifa);
 		goto freeit;
 	}
 
@@ -735,9 +768,9 @@
 	 * If no neighbor cache entry is found, NA SHOULD silently be
 	 * discarded.
 	 */
-	IF_AFDATA_LOCK(ifp);
+	IF_AFDATA_RLOCK(ifp);
 	ln = nd6_lookup(&taddr6, LLE_EXCLUSIVE, ifp);
-	IF_AFDATA_UNLOCK(ifp);
+	IF_AFDATA_RUNLOCK(ifp);
 	if (ln == NULL) {
 		goto freeit;
 	}
@@ -756,6 +789,7 @@
 		 */
 		bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen);
 		ln->la_flags |= LLE_VALID;
+		EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
 		if (is_solicited) {
 			ln->ln_state = ND6_LLINFO_REACHABLE;
 			ln->ln_byhint = 0;
@@ -831,6 +865,8 @@
 			if (lladdr != NULL) {
 				bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen);
 				ln->la_flags |= LLE_VALID;
+				EVENTHANDLER_INVOKE(lle_event, ln,
+				    LLENTRY_RESOLVED);
 			}
 
 			/*
@@ -860,31 +896,17 @@
 			 * Remove the sender from the Default Router List and
 			 * update the Destination Cache entries.
 			 */
-			struct nd_defrouter *dr;
-			struct in6_addr *in6;
-
-			in6 = &L3_ADDR_SIN6(ln)->sin6_addr;
-
-			/*
-			 * Lock to protect the default router list.
-			 * XXX: this might be unnecessary, since this function
-			 * is only called under the network software interrupt
-			 * context.  However, we keep it just for safety.
-			 */
-			dr = defrouter_lookup(in6, ln->lle_tbl->llt_ifp);
-			if (dr)
-				defrtrlist_del(dr);
-			else if (ND_IFINFO(ln->lle_tbl->llt_ifp)->flags &
-			    ND6_IFF_ACCEPT_RTADV) {
+			if (!defrouter_remove(&L3_ADDR_SIN6(ln)->sin6_addr,
+			    ln->lle_tbl->llt_ifp) &&
+			    (ND_IFINFO(ln->lle_tbl->llt_ifp)->flags &
+			     ND6_IFF_ACCEPT_RTADV) != 0)
 				/*
 				 * Even if the neighbor is not in the default
-				 * router list, the neighbor may be used
-				 * as a next hop for some destinations
-				 * (e.g. redirect case). So we must
-				 * call rt6_flush explicitly.
+				 * router list, the neighbor may be used as a
+				 * next hop for some destinations (e.g. redirect
+				 * case). So we must call rt6_flush explicitly.
 				 */
 				rt6_flush(&ip6->ip6_src, ifp);
-			}
 		}
 		ln->ln_router = is_router;
 	}
@@ -893,43 +915,15 @@
 	 *  rt->rt_flags &= ~RTF_REJECT;
 	 */
 	ln->la_asked = 0;
-	if (ln->la_hold) {
-		struct mbuf *m_hold, *m_hold_next;
-
-		/*
-		 * reset the la_hold in advance, to explicitly
-		 * prevent a la_hold lookup in nd6_output()
-		 * (wouldn't happen, though...)
-		 */
-		for (m_hold = ln->la_hold, ln->la_hold = NULL;
-		    m_hold; m_hold = m_hold_next) {
-			m_hold_next = m_hold->m_nextpkt;
-			m_hold->m_nextpkt = NULL;
-			/*
-			 * we assume ifp is not a loopback here, so just set
-			 * the 2nd argument as the 1st one.
-			 */
-
-			if (send_sendso_input_hook != NULL) {
-				mtag = m_tag_get(PACKET_TAG_ND_OUTGOING,
-				    sizeof(unsigned short), M_NOWAIT);
-				if (mtag == NULL)
-					goto bad;
-				m_tag_prepend(m, mtag);
-			}
-
-			nd6_output_lle(ifp, ifp, m_hold, L3_ADDR_SIN6(ln), NULL, ln, &chain);
-		}
-	}
+	if (ln->la_hold != NULL)
+		nd6_grab_holdchain(ln, &chain, &sin6);
  freeit:
-	if (ln != NULL) {
-		if (chain)
-			memcpy(&sin6, L3_ADDR_SIN6(ln), sizeof(sin6));
+	if (ln != NULL)
 		LLE_WUNLOCK(ln);
 
-		if (chain)
-			nd6_output_flush(ifp, ifp, chain, &sin6, NULL);
-	}
+	if (chain != NULL)
+		nd6_flush_holdchain(ifp, ifp, chain, &sin6);
+
 	if (checklink)
 		pfxlist_onlink_check();
 
@@ -980,25 +974,16 @@
 	/* estimate the size of message */
 	maxlen = sizeof(*ip6) + sizeof(*nd_na);
 	maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7;
-	if (max_linkhdr + maxlen >= MCLBYTES) {
-#ifdef DIAGNOSTIC
-		printf("nd6_na_output: max_linkhdr + maxlen >= MCLBYTES "
-		    "(%d + %d > %d)\n", max_linkhdr, maxlen, MCLBYTES);
-#endif
-		return;
-	}
+	KASSERT(max_linkhdr + maxlen <= MCLBYTES, (
+	    "%s: max_linkhdr + maxlen > MCLBYTES (%d + %d > %d)",
+	    __func__, max_linkhdr, maxlen, MCLBYTES));
 
-	MGETHDR(m, M_DONTWAIT, MT_DATA);
-	if (m && max_linkhdr + maxlen >= MHLEN) {
-		MCLGET(m, M_DONTWAIT);
-		if ((m->m_flags & M_EXT) == 0) {
-			m_free(m);
-			m = NULL;
-		}
-	}
+	if (max_linkhdr + maxlen > MHLEN)
+		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+	else
+		m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return;
-	m->m_pkthdr.rcvif = NULL;
 	M_SETFIB(m, fibnum);
 
 	if (IN6_IS_ADDR_MULTICAST(&daddr6)) {
@@ -1128,7 +1113,6 @@
 		RTFREE(ro.ro_rt);
 	}
 	m_freem(m);
-	return;
 }
 
 #ifndef BURN_BRIDGES
@@ -1157,9 +1141,6 @@
 #ifdef IFT_IEEE80211
 	case IFT_IEEE80211:
 #endif
-#ifdef IFT_CARP
-	case IFT_CARP:
-#endif
 	case IFT_INFINIBAND:
 	case IFT_BRIDGE:
 	case IFT_ISO88025:
@@ -1177,31 +1158,80 @@
 	int dad_ns_ocount;	/* NS sent so far */
 	int dad_ns_icount;
 	int dad_na_icount;
+	int dad_ns_lcount;	/* looped back NS */
+	int dad_loopbackprobe;	/* probing state for loopback detection */
 	struct callout dad_timer_ch;
 	struct vnet *dad_vnet;
+	u_int dad_refcnt;
+#define	ND_OPT_NONCE_LEN32 \
+		((ND_OPT_NONCE_LEN + sizeof(uint32_t) - 1)/sizeof(uint32_t))
+	uint32_t dad_nonce[ND_OPT_NONCE_LEN32];
 };
 
 static VNET_DEFINE(TAILQ_HEAD(, dadq), dadq);
-VNET_DEFINE(int, dad_init) = 0;
-#define	V_dadq				VNET(dadq)
-#define	V_dad_init			VNET(dad_init)
+static VNET_DEFINE(struct rwlock, dad_rwlock);
+#define	V_dadq			VNET(dadq)
+#define	V_dad_rwlock		VNET(dad_rwlock)
 
+#define	DADQ_RLOCK()		rw_rlock(&V_dad_rwlock)	
+#define	DADQ_RUNLOCK()		rw_runlock(&V_dad_rwlock)	
+#define	DADQ_WLOCK()		rw_wlock(&V_dad_rwlock)	
+#define	DADQ_WUNLOCK()		rw_wunlock(&V_dad_rwlock)	
+
+static void
+nd6_dad_add(struct dadq *dp)
+{
+
+	DADQ_WLOCK();
+	TAILQ_INSERT_TAIL(&V_dadq, dp, dad_list);
+	DADQ_WUNLOCK();
+}
+
+static void
+nd6_dad_del(struct dadq *dp)
+{
+
+	DADQ_WLOCK();
+	TAILQ_REMOVE(&V_dadq, dp, dad_list);
+	DADQ_WUNLOCK();
+	nd6_dad_rele(dp);
+}
+
 static struct dadq *
-nd6_dad_find(struct ifaddr *ifa)
+nd6_dad_find(struct ifaddr *ifa, struct nd_opt_nonce *n)
 {
 	struct dadq *dp;
 
-	TAILQ_FOREACH(dp, &V_dadq, dad_list)
-		if (dp->dad_ifa == ifa)
-			return (dp);
+	DADQ_RLOCK();
+	TAILQ_FOREACH(dp, &V_dadq, dad_list) {
+		if (dp->dad_ifa != ifa)
+			continue;
+		/*
+		 * Skip if the nonce matches the received one.
+		 * +2 in the length is required because of type and
+		 * length fields are included in a header.
+		 */
+		if (n != NULL &&
+		    n->nd_opt_nonce_len == (ND_OPT_NONCE_LEN + 2) / 8 &&
+		    memcmp(&n->nd_opt_nonce[0], &dp->dad_nonce[0],
+		        ND_OPT_NONCE_LEN) == 0) {
+			dp->dad_ns_lcount++;
+			continue;
+		}
+		refcount_acquire(&dp->dad_refcnt);
+		break;
+	}
+	DADQ_RUNLOCK();
 
-	return (NULL);
+	return (dp);
 }
 
 static void
-nd6_dad_starttimer(struct dadq *dp, int ticks)
+nd6_dad_starttimer(struct dadq *dp, int ticks, int send_ns)
 {
 
+	if (send_ns != 0)
+		nd6_dad_ns_output(dp);
 	callout_reset(&dp->dad_timer_ch, ticks,
 	    (void (*)(void *))nd6_dad_timer, (void *)dp);
 }
@@ -1210,9 +1240,27 @@
 nd6_dad_stoptimer(struct dadq *dp)
 {
 
-	callout_stop(&dp->dad_timer_ch);
+	callout_drain(&dp->dad_timer_ch);
 }
 
+static void
+nd6_dad_rele(struct dadq *dp)
+{
+
+	if (refcount_release(&dp->dad_refcnt)) {
+		ifa_free(dp->dad_ifa);
+		free(dp, M_IP6NDP);
+	}
+}
+
+void
+nd6_dad_init(void)
+{
+
+	rw_init(&V_dad_rwlock, "nd6 DAD queue");
+	TAILQ_INIT(&V_dadq);
+}
+
 /*
  * Start Duplicate Address Detection (DAD) for specified interface address.
  */
@@ -1222,12 +1270,8 @@
 	struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
 	struct dadq *dp;
 	char ip6buf[INET6_ADDRSTRLEN];
+	int send_ns;
 
-	if (!V_dad_init) {
-		TAILQ_INIT(&V_dadq);
-		V_dad_init++;
-	}
-
 	/*
 	 * If we don't need DAD, don't do it.
 	 * There are several cases:
@@ -1252,17 +1296,26 @@
 	}
 	if (ifa->ifa_ifp == NULL)
 		panic("nd6_dad_start: ifa->ifa_ifp == NULL");
-	if (!(ifa->ifa_ifp->if_flags & IFF_UP)) {
+	if (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_NO_DAD) {
+		ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
 		return;
 	}
-	if (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_IFDISABLED)
+	if (!(ifa->ifa_ifp->if_flags & IFF_UP) ||
+	    !(ifa->ifa_ifp->if_drv_flags & IFF_DRV_RUNNING) ||
+	    (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_IFDISABLED)) {
+		ia->ia6_flags |= IN6_IFF_TENTATIVE;
 		return;
-	if (nd6_dad_find(ifa) != NULL) {
-		/* DAD already in progress */
+	}
+	if ((dp = nd6_dad_find(ifa, NULL)) != NULL) {
+		/*
+		 * DAD is already in progress.  Let the existing entry
+		 * finish it.
+		 */
+		nd6_dad_rele(dp);
 		return;
 	}
 
-	dp = malloc(sizeof(*dp), M_IP6NDP, M_NOWAIT);
+	dp = malloc(sizeof(*dp), M_IP6NDP, M_NOWAIT | M_ZERO);
 	if (dp == NULL) {
 		log(LOG_ERR, "nd6_dad_start: memory allocation failed for "
 			"%s(%s)\n",
@@ -1270,13 +1323,10 @@
 			ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
 		return;
 	}
-	bzero(dp, sizeof(*dp));
 	callout_init(&dp->dad_timer_ch, 0);
 #ifdef VIMAGE
 	dp->dad_vnet = curvnet;
 #endif
-	TAILQ_INSERT_TAIL(&V_dadq, (struct dadq *)dp, dad_list);
-
 	nd6log((LOG_DEBUG, "%s: starting DAD for %s\n", if_name(ifa->ifa_ifp),
 	    ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
 
@@ -1287,17 +1337,19 @@
 	 * (re)initialization.
 	 */
 	dp->dad_ifa = ifa;
-	ifa_ref(ifa);	/* just for safety */
+	ifa_ref(dp->dad_ifa);
 	dp->dad_count = V_ip6_dad_count;
 	dp->dad_ns_icount = dp->dad_na_icount = 0;
 	dp->dad_ns_ocount = dp->dad_ns_tcount = 0;
+	dp->dad_ns_lcount = dp->dad_loopbackprobe = 0;
+	refcount_init(&dp->dad_refcnt, 1);
+	nd6_dad_add(dp);
+	send_ns = 0;
 	if (delay == 0) {
-		nd6_dad_ns_output(dp, ifa);
-		nd6_dad_starttimer(dp,
-		    (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000);
-	} else {
-		nd6_dad_starttimer(dp, delay);
+		send_ns = 1;
+		delay = (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000;
 	}
+	nd6_dad_starttimer(dp, delay, send_ns);
 }
 
 /*
@@ -1308,9 +1360,7 @@
 {
 	struct dadq *dp;
 
-	if (!V_dad_init)
-		return;
-	dp = nd6_dad_find(ifa);
+	dp = nd6_dad_find(ifa, NULL);
 	if (!dp) {
 		/* DAD wasn't started yet */
 		return;
@@ -1318,10 +1368,16 @@
 
 	nd6_dad_stoptimer(dp);
 
-	TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list);
-	free(dp, M_IP6NDP);
-	dp = NULL;
-	ifa_free(ifa);
+	/*
+	 * The DAD queue entry may have been removed by nd6_dad_timer() while
+	 * we were waiting for it to stop, so re-do the lookup.
+	 */
+	nd6_dad_rele(dp);
+	if (nd6_dad_find(ifa, NULL) == NULL)
+		return;
+
+	nd6_dad_del(dp);
+	nd6_dad_rele(dp);
 }
 
 static void
@@ -1328,24 +1384,28 @@
 nd6_dad_timer(struct dadq *dp)
 {
 	CURVNET_SET(dp->dad_vnet);
-	int s;
 	struct ifaddr *ifa = dp->dad_ifa;
+	struct ifnet *ifp = dp->dad_ifa->ifa_ifp;
 	struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
 	char ip6buf[INET6_ADDRSTRLEN];
 
-	s = splnet();		/* XXX */
-
 	/* Sanity check */
 	if (ia == NULL) {
 		log(LOG_ERR, "nd6_dad_timer: called with null parameter\n");
-		goto done;
+		goto err;
 	}
+	if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) {
+		/* Do not need DAD for ifdisabled interface. */
+		log(LOG_ERR, "nd6_dad_timer: cancel DAD on %s because of "
+		    "ND6_IFF_IFDISABLED.\n", ifp->if_xname);
+		goto err;
+	}
 	if (ia->ia6_flags & IN6_IFF_DUPLICATED) {
 		log(LOG_ERR, "nd6_dad_timer: called with duplicated address "
 			"%s(%s)\n",
 			ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
 			ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
-		goto done;
+		goto err;
 	}
 	if ((ia->ia6_flags & IN6_IFF_TENTATIVE) == 0) {
 		log(LOG_ERR, "nd6_dad_timer: called with non-tentative address "
@@ -1352,19 +1412,17 @@
 			"%s(%s)\n",
 			ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
 			ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
-		goto done;
+		goto err;
 	}
 
-	/* timeouted with IFF_{RUNNING,UP} check */
-	if (dp->dad_ns_tcount > V_dad_maxtry) {
-		nd6log((LOG_INFO, "%s: could not run DAD, driver problem?\n",
+	/* Stop DAD if the interface is down even after dad_maxtry attempts. */
+	if ((dp->dad_ns_tcount > V_dad_maxtry) &&
+	    (((ifp->if_flags & IFF_UP) == 0) ||
+	     ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0))) {
+		nd6log((LOG_INFO, "%s: could not run DAD "
+		    "because the interface was down or not running.\n",
 		    if_name(ifa->ifa_ifp)));
-
-		TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list);
-		free(dp, M_IP6NDP);
-		dp = NULL;
-		ifa_free(ifa);
-		goto done;
+		goto err;
 	}
 
 	/* Need more checks? */
@@ -1372,84 +1430,85 @@
 		/*
 		 * We have more NS to go.  Send NS packet for DAD.
 		 */
-		nd6_dad_ns_output(dp, ifa);
 		nd6_dad_starttimer(dp,
-		    (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000);
+		    (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000, 1);
+		goto done;
 	} else {
 		/*
 		 * We have transmitted sufficient number of DAD packets.
 		 * See what we've got.
 		 */
-		int duplicate;
-
-		duplicate = 0;
-
-		if (dp->dad_na_icount) {
+		if (dp->dad_ns_icount > 0 || dp->dad_na_icount > 0)
+			/* We've seen NS or NA, means DAD has failed. */
+			nd6_dad_duplicated(ifa, dp);
+		else if (V_dad_enhanced != 0 &&
+		    dp->dad_ns_lcount > 0 &&
+		    dp->dad_ns_lcount > dp->dad_loopbackprobe) {
 			/*
-			 * the check is in nd6_dad_na_input(),
-			 * but just in case
+			 * Sec. 4.1 in RFC 7527 requires transmission of
+			 * additional probes until the loopback condition
+			 * becomes clear when a looped back probe is detected.
 			 */
-			duplicate++;
-		}
-
-		if (dp->dad_ns_icount) {
-			/* We've seen NS, means DAD has failed. */
-			duplicate++;
-		}
-
-		if (duplicate) {
-			/* (*dp) will be freed in nd6_dad_duplicated() */
-			dp = NULL;
-			nd6_dad_duplicated(ifa);
+			log(LOG_ERR, "%s: a looped back NS message is "
+			    "detected during DAD for %s.  "
+			    "Another DAD probes are being sent.\n",
+			    if_name(ifa->ifa_ifp),
+			    ip6_sprintf(ip6buf, IFA_IN6(ifa)));
+			dp->dad_loopbackprobe = dp->dad_ns_lcount;
+			/*
+			 * Send an NS immediately and increase dad_count by
+			 * V_nd6_mmaxtries - 1.
+			 */
+			dp->dad_count =
+			    dp->dad_ns_ocount + V_nd6_mmaxtries - 1;
+			nd6_dad_starttimer(dp,
+			    (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000,
+			    1);
+			goto done;
 		} else {
 			/*
 			 * We are done with DAD.  No NA came, no NS came.
-			 * No duplicate address found.
+			 * No duplicate address found.  Check IFDISABLED flag
+			 * again in case that it is changed between the
+			 * beginning of this function and here.
 			 */
-			ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
+			if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) == 0)
+				ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
 
 			nd6log((LOG_DEBUG,
 			    "%s: DAD complete for %s - no duplicates found\n",
 			    if_name(ifa->ifa_ifp),
 			    ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
-
-			TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list);
-			free(dp, M_IP6NDP);
-			dp = NULL;
-			ifa_free(ifa);
+			if (dp->dad_ns_lcount > 0)
+				log(LOG_ERR, "%s: DAD completed while "
+				    "a looped back NS message is detected "
+				    "during DAD for %s.\n",
+				    if_name(ifa->ifa_ifp),
+				    ip6_sprintf(ip6buf, IFA_IN6(ifa)));
 		}
 	}
-
+err:
+	nd6_dad_del(dp);
 done:
-	splx(s);
 	CURVNET_RESTORE();
 }
 
-void
-nd6_dad_duplicated(struct ifaddr *ifa)
+static void
+nd6_dad_duplicated(struct ifaddr *ifa, struct dadq *dp)
 {
 	struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
 	struct ifnet *ifp;
-	struct dadq *dp;
 	char ip6buf[INET6_ADDRSTRLEN];
 
-	dp = nd6_dad_find(ifa);
-	if (dp == NULL) {
-		log(LOG_ERR, "nd6_dad_duplicated: DAD structure not found\n");
-		return;
-	}
-
 	log(LOG_ERR, "%s: DAD detected duplicate IPv6 address %s: "
-	    "NS in/out=%d/%d, NA in=%d\n",
+	    "NS in/out/loopback=%d/%d/%d, NA in=%d\n",
 	    if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
-	    dp->dad_ns_icount, dp->dad_ns_ocount, dp->dad_na_icount);
+	    dp->dad_ns_icount, dp->dad_ns_ocount, dp->dad_ns_lcount,
+	    dp->dad_na_icount);
 
 	ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
 	ia->ia6_flags |= IN6_IFF_DUPLICATED;
 
-	/* We are done with DAD, with duplicate address found. (failure) */
-	nd6_dad_stoptimer(dp);
-
 	ifp = ifa->ifa_ifp;
 	log(LOG_ERR, "%s: DAD complete for %s - duplicate found\n",
 	    if_name(ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr));
@@ -1490,18 +1549,14 @@
 			break;
 		}
 	}
-
-	TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list);
-	free(dp, M_IP6NDP);
-	dp = NULL;
-	ifa_free(ifa);
 }
 
 static void
-nd6_dad_ns_output(struct dadq *dp, struct ifaddr *ifa)
+nd6_dad_ns_output(struct dadq *dp)
 {
-	struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
-	struct ifnet *ifp = ifa->ifa_ifp;
+	struct in6_ifaddr *ia = (struct in6_ifaddr *)dp->dad_ifa;
+	struct ifnet *ifp = dp->dad_ifa->ifa_ifp;
+	int i;
 
 	dp->dad_ns_tcount++;
 	if ((ifp->if_flags & IFF_UP) == 0) {
@@ -1512,17 +1567,29 @@
 	}
 
 	dp->dad_ns_ocount++;
-	nd6_ns_output(ifp, NULL, &ia->ia_addr.sin6_addr, NULL, 1);
+	if (V_dad_enhanced != 0) {
+		for (i = 0; i < ND_OPT_NONCE_LEN32; i++)
+			dp->dad_nonce[i] = arc4random();
+		/*
+		 * XXXHRS: Note that in the case that
+		 * DupAddrDetectTransmits > 1, multiple NS messages with
+		 * different nonces can be looped back in an unexpected
+		 * order.  The current implementation recognizes only
+		 * the latest nonce on the sender side.  Practically it
+		 * should work well in almost all cases.
+		 */
+	}
+	nd6_ns_output(ifp, NULL, &ia->ia_addr.sin6_addr, NULL,
+	    (uint8_t *)&dp->dad_nonce[0]);
 }
 
 static void
-nd6_dad_ns_input(struct ifaddr *ifa)
+nd6_dad_ns_input(struct ifaddr *ifa, struct nd_opt_nonce *ndopt_nonce)
 {
 	struct in6_ifaddr *ia;
 	struct ifnet *ifp;
 	const struct in6_addr *taddr6;
 	struct dadq *dp;
-	int duplicate;
 
 	if (ifa == NULL)
 		panic("ifa == NULL in nd6_dad_ns_input");
@@ -1530,39 +1597,15 @@
 	ia = (struct in6_ifaddr *)ifa;
 	ifp = ifa->ifa_ifp;
 	taddr6 = &ia->ia_addr.sin6_addr;
-	duplicate = 0;
-	dp = nd6_dad_find(ifa);
-
-	/* Quickhack - completely ignore DAD NS packets */
-	if (V_dad_ignore_ns) {
-		char ip6buf[INET6_ADDRSTRLEN];
-		nd6log((LOG_INFO,
-		    "nd6_dad_ns_input: ignoring DAD NS packet for "
-		    "address %s(%s)\n", ip6_sprintf(ip6buf, taddr6),
-		    if_name(ifa->ifa_ifp)));
+	/* Ignore Nonce option when Enhanced DAD is disabled. */
+	if (V_dad_enhanced == 0)
+		ndopt_nonce = NULL;
+	dp = nd6_dad_find(ifa, ndopt_nonce);
+	if (dp == NULL)
 		return;
-	}
 
-	/*
-	 * if I'm yet to start DAD, someone else started using this address
-	 * first.  I have a duplicate and you win.
-	 */
-	if (dp == NULL || dp->dad_ns_ocount == 0)
-		duplicate++;
-
-	/* XXX more checks for loopback situation - see nd6_dad_timer too */
-
-	if (duplicate) {
-		dp = NULL;	/* will be freed in nd6_dad_duplicated() */
-		nd6_dad_duplicated(ifa);
-	} else {
-		/*
-		 * not sure if I got a duplicate.
-		 * increment ns count and see what happens.
-		 */
-		if (dp)
-			dp->dad_ns_icount++;
-	}
+	dp->dad_ns_icount++;
+	nd6_dad_rele(dp);
 }
 
 static void
@@ -1573,10 +1616,9 @@
 	if (ifa == NULL)
 		panic("ifa == NULL in nd6_dad_na_input");
 
-	dp = nd6_dad_find(ifa);
-	if (dp)
+	dp = nd6_dad_find(ifa, NULL);
+	if (dp != NULL) {
 		dp->dad_na_icount++;
-
-	/* remove the address. */
-	nd6_dad_duplicated(ifa);
+		nd6_dad_rele(dp);
+	}
 }

Modified: trunk/sys/netinet6/nd6_rtr.c
===================================================================
--- trunk/sys/netinet6/nd6_rtr.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/nd6_rtr.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/nd6_rtr.c 244524 2012-12-21 00:41:52Z delphij $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/nd6_rtr.c 303458 2016-07-28 20:08:01Z sbruno $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
@@ -40,6 +40,7 @@
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
+#include <sys/refcount.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/time.h>
@@ -219,6 +220,8 @@
 	struct nd_defrouter *dr;
 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 
+	dr = NULL;
+
 	/*
 	 * We only accept RAs only when the per-interface flag
 	 * ND6_IFF_ACCEPT_RTADV is on the receiving interface.
@@ -271,7 +274,7 @@
 
 	bzero(&dr0, sizeof(dr0));
 	dr0.rtaddr = saddr6;
-	dr0.flags  = nd_ra->nd_ra_flags_reserved;
+	dr0.raflags = nd_ra->nd_ra_flags_reserved;
 	/*
 	 * Effectively-disable routes from RA messages when
 	 * ND6_IFF_NO_RADR enabled on the receiving interface or
@@ -283,7 +286,7 @@
 		dr0.rtlifetime = 0;
 	else
 		dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime);
-	dr0.expire = time_second + dr0.rtlifetime;
+	dr0.expire = time_uptime + dr0.rtlifetime;
 	dr0.ifp = ifp;
 	/* unspecified or not? (RFC 2461 6.3.4) */
 	if (advreachable) {
@@ -297,8 +300,16 @@
 	}
 	if (nd_ra->nd_ra_retransmit)
 		ndi->retrans = ntohl(nd_ra->nd_ra_retransmit);
-	if (nd_ra->nd_ra_curhoplimit)
-		ndi->chlim = nd_ra->nd_ra_curhoplimit;
+	if (nd_ra->nd_ra_curhoplimit) {
+		if (ndi->chlim < nd_ra->nd_ra_curhoplimit)
+			ndi->chlim = nd_ra->nd_ra_curhoplimit;
+		else if (ndi->chlim != nd_ra->nd_ra_curhoplimit) {
+			log(LOG_ERR, "RA with a lower CurHopLimit sent from "
+			    "%s on %s (current = %d, received = %d). "
+			    "Ignored.\n", ip6_sprintf(ip6bufs, &ip6->ip6_src),
+			    if_name(ifp), ndi->chlim, nd_ra->nd_ra_curhoplimit);
+		}
+	}
 	dr = defrtrlist_update(&dr0);
     }
 
@@ -360,6 +371,10 @@
 			(void)prelist_update(&pr, dr, m, mcast);
 		}
 	}
+	if (dr != NULL) {
+		defrouter_rele(dr);
+		dr = NULL;
+	}
 
 	/*
 	 * MTU
@@ -437,10 +452,6 @@
 	m_freem(m);
 }
 
-/*
- * default router list proccessing sub routines
- */
-
 /* tell the change to user processes watching the routing socket. */
 static void
 nd6_rtmsg(int cmd, struct rtentry *rt)
@@ -469,12 +480,15 @@
 		ifa_free(ifa);
 }
 
+/*
+ * default router list proccessing sub routines
+ */
+
 static void
 defrouter_addreq(struct nd_defrouter *new)
 {
 	struct sockaddr_in6 def, mask, gate;
 	struct rtentry *newrt = NULL;
-	int s;
 	int error;
 
 	bzero(&def, sizeof(def));
@@ -486,7 +500,6 @@
 	def.sin6_family = gate.sin6_family = AF_INET6;
 	gate.sin6_addr = new->rtaddr;
 
-	s = splnet();
 	error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&def,
 	    (struct sockaddr *)&gate, (struct sockaddr *)&mask,
 	    RTF_GATEWAY, &newrt, RT_DEFAULT_FIB);
@@ -496,23 +509,48 @@
 	}
 	if (error == 0)
 		new->installed = 1;
-	splx(s);
-	return;
 }
 
 struct nd_defrouter *
-defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp)
+defrouter_lookup_locked(struct in6_addr *addr, struct ifnet *ifp)
 {
 	struct nd_defrouter *dr;
 
-	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
-		if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr))
+	ND6_LOCK_ASSERT();
+	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry)
+		if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr)) {
+			defrouter_ref(dr);
 			return (dr);
-	}
+		}
+	return (NULL);
+}
 
-	return (NULL);		/* search failed */
+struct nd_defrouter *
+defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp)
+{
+	struct nd_defrouter *dr;
+
+	ND6_RLOCK();
+	dr = defrouter_lookup_locked(addr, ifp);
+	ND6_RUNLOCK();
+	return (dr);
 }
 
+void
+defrouter_ref(struct nd_defrouter *dr)
+{
+
+	refcount_acquire(&dr->refcnt);
+}
+
+void
+defrouter_rele(struct nd_defrouter *dr)
+{
+
+	if (refcount_release(&dr->refcnt))
+		free(dr, M_IP6NDP);
+}
+
 /*
  * Remove the default route for a given router.
  * This is just a subroutine function for defrouter_select(), and should
@@ -545,16 +583,42 @@
 }
 
 /*
- * remove all default routes from default router list
+ * Remove all default routes from default router list.
  */
 void
 defrouter_reset(void)
 {
-	struct nd_defrouter *dr;
+	struct nd_defrouter *dr, **dra;
+	int count, i;
 
+	count = i = 0;
+
+	/*
+	 * We can't delete routes with the ND lock held, so make a copy of the
+	 * current default router list and use that when deleting routes.
+	 */
+	ND6_RLOCK();
 	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry)
-		defrouter_delreq(dr);
+		count++;
+	ND6_RUNLOCK();
 
+	dra = malloc(count * sizeof(*dra), M_TEMP, M_WAITOK | M_ZERO);
+
+	ND6_RLOCK();
+	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
+		if (i == count)
+			break;
+		defrouter_ref(dr);
+		dra[i++] = dr;
+	}
+	ND6_RUNLOCK();
+
+	for (i = 0; i < count && dra[i] != NULL; i++) {
+		defrouter_delreq(dra[i]);
+		defrouter_rele(dra[i]);
+	}
+	free(dra, M_TEMP);
+
 	/*
 	 * XXX should we also nuke any default routers in the kernel, by
 	 * going through them by rtalloc1()?
@@ -561,12 +625,53 @@
 	 */
 }
 
+/*
+ * Look up a matching default router list entry and remove it. Returns true if a
+ * matching entry was found, false otherwise.
+ */
+bool
+defrouter_remove(struct in6_addr *addr, struct ifnet *ifp)
+{
+	struct nd_defrouter *dr;
+
+	ND6_WLOCK();
+	dr = defrouter_lookup_locked(addr, ifp);
+	if (dr == NULL) {
+		ND6_WUNLOCK();
+		return (false);
+	}
+
+	defrouter_unlink(dr, NULL);
+	ND6_WUNLOCK();
+	defrouter_del(dr);
+	defrouter_rele(dr);
+	return (true);
+}
+
+/*
+ * Remove a router from the global list and optionally stash it in a
+ * caller-supplied queue.
+ *
+ * The ND lock must be held.
+ */
 void
-defrtrlist_del(struct nd_defrouter *dr)
+defrouter_unlink(struct nd_defrouter *dr, struct nd_drhead *drq)
 {
+
+	ND6_WLOCK_ASSERT();
+	TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
+	if (drq != NULL)
+		TAILQ_INSERT_TAIL(drq, dr, dr_entry);
+}
+
+void
+defrouter_del(struct nd_defrouter *dr)
+{
 	struct nd_defrouter *deldr = NULL;
 	struct nd_prefix *pr;
 
+	ND6_UNLOCK_ASSERT();
+
 	/*
 	 * Flush all the routing table entries that use the router
 	 * as a next hop.
@@ -578,7 +683,6 @@
 		deldr = dr;
 		defrouter_delreq(dr);
 	}
-	TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
 
 	/*
 	 * Also delete all the pointers to the router in each prefix lists.
@@ -598,7 +702,10 @@
 	if (deldr)
 		defrouter_select();
 
-	free(dr, M_IP6NDP);
+	/*
+	 * Release the list reference.
+	 */
+	defrouter_rele(dr);
 }
 
 /*
@@ -625,16 +732,16 @@
 void
 defrouter_select(void)
 {
-	int s = splnet();
-	struct nd_defrouter *dr, *selected_dr = NULL, *installed_dr = NULL;
+	struct nd_defrouter *dr, *selected_dr, *installed_dr;
 	struct llentry *ln = NULL;
 
+	ND6_RLOCK();
 	/*
 	 * Let's handle easy case (3) first:
 	 * If default router list is empty, there's nothing to be done.
 	 */
 	if (TAILQ_EMPTY(&V_nd_defrouter)) {
-		splx(s);
+		ND6_RUNLOCK();
 		return;
 	}
 
@@ -643,6 +750,7 @@
 	 * We just pick up the first reachable one (if any), assuming that
 	 * the ordering rule of the list described in defrtrlist_update().
 	 */
+	selected_dr = installed_dr = NULL;
 	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
 		IF_AFDATA_RLOCK(dr->ifp);
 		if (selected_dr == NULL &&
@@ -649,6 +757,7 @@
 		    (ln = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) &&
 		    ND6_IS_LLINFO_PROBREACH(ln)) {
 			selected_dr = dr;
+			defrouter_ref(selected_dr);
 		}
 		IF_AFDATA_RUNLOCK(dr->ifp);
 		if (ln != NULL) {
@@ -656,12 +765,15 @@
 			ln = NULL;
 		}
 
-		if (dr->installed && installed_dr == NULL)
-			installed_dr = dr;
-		else if (dr->installed && installed_dr) {
-			/* this should not happen.  warn for diagnosis. */
-			log(LOG_ERR, "defrouter_select: more than one router"
-			    " is installed\n");
+		if (dr->installed) {
+			if (installed_dr == NULL) {
+				installed_dr = dr;
+				defrouter_ref(installed_dr);
+			} else {
+				/* this should not happen.  warn for diagnosis. */
+				log(LOG_ERR,
+		    "defrouter_select: more than one router is installed\n");
+			}
 		}
 	}
 	/*
@@ -673,15 +785,18 @@
 	 * or when the new one has a really higher preference value.
 	 */
 	if (selected_dr == NULL) {
-		if (installed_dr == NULL || !TAILQ_NEXT(installed_dr, dr_entry))
+		if (installed_dr == NULL ||
+		    TAILQ_NEXT(installed_dr, dr_entry) == NULL)
 			selected_dr = TAILQ_FIRST(&V_nd_defrouter);
 		else
 			selected_dr = TAILQ_NEXT(installed_dr, dr_entry);
-	} else if (installed_dr) {
+		defrouter_ref(selected_dr);
+	} else if (installed_dr != NULL) {
 		IF_AFDATA_RLOCK(installed_dr->ifp);
 		if ((ln = nd6_lookup(&installed_dr->rtaddr, 0, installed_dr->ifp)) &&
 		    ND6_IS_LLINFO_PROBREACH(ln) &&
 		    rtpref(selected_dr) <= rtpref(installed_dr)) {
+			defrouter_rele(selected_dr);
 			selected_dr = installed_dr;
 		}
 		IF_AFDATA_RUNLOCK(installed_dr->ifp);
@@ -688,6 +803,7 @@
 		if (ln != NULL)
 			LLE_RUNLOCK(ln);
 	}
+	ND6_RUNLOCK();
 
 	/*
 	 * If the selected router is different than the installed one,
@@ -695,13 +811,13 @@
 	 * Note that the selected router is never NULL here.
 	 */
 	if (installed_dr != selected_dr) {
-		if (installed_dr)
+		if (installed_dr != NULL) {
 			defrouter_delreq(installed_dr);
+			defrouter_rele(installed_dr);
+		}
 		defrouter_addreq(selected_dr);
 	}
-
-	splx(s);
-	return;
+	defrouter_rele(selected_dr);
 }
 
 /*
@@ -711,7 +827,7 @@
 static int
 rtpref(struct nd_defrouter *dr)
 {
-	switch (dr->flags & ND_RA_FLAG_RTPREF_MASK) {
+	switch (dr->raflags & ND_RA_FLAG_RTPREF_MASK) {
 	case ND_RA_FLAG_RTPREF_HIGH:
 		return (RTPREF_HIGH);
 	case ND_RA_FLAG_RTPREF_MEDIUM:
@@ -725,7 +841,7 @@
 		 * serious bug of kernel internal.  We thus always bark here.
 		 * Or, can we even panic?
 		 */
-		log(LOG_ERR, "rtpref: impossible RA flag %x\n", dr->flags);
+		log(LOG_ERR, "rtpref: impossible RA flag %x\n", dr->raflags);
 		return (RTPREF_INVALID);
 	}
 	/* NOTREACHED */
@@ -735,63 +851,50 @@
 defrtrlist_update(struct nd_defrouter *new)
 {
 	struct nd_defrouter *dr, *n;
-	int s = splnet();
+	int oldpref;
 
-	if ((dr = defrouter_lookup(&new->rtaddr, new->ifp)) != NULL) {
-		/* entry exists */
-		if (new->rtlifetime == 0) {
-			defrtrlist_del(dr);
-			dr = NULL;
-		} else {
-			int oldpref = rtpref(dr);
+	if (new->rtlifetime == 0) {
+		defrouter_remove(&new->rtaddr, new->ifp);
+		return (NULL);
+	}
 
-			/* override */
-			dr->flags = new->flags; /* xxx flag check */
-			dr->rtlifetime = new->rtlifetime;
-			dr->expire = new->expire;
+	ND6_WLOCK();
+	dr = defrouter_lookup_locked(&new->rtaddr, new->ifp);
+	if (dr != NULL) {
+		oldpref = rtpref(dr);
 
-			/*
-			 * If the preference does not change, there's no need
-			 * to sort the entries. Also make sure the selected
-			 * router is still installed in the kernel.
-			 */
-			if (dr->installed && rtpref(new) == oldpref) {
-				splx(s);
-				return (dr);
-			}
+		/* override */
+		dr->raflags = new->raflags; /* XXX flag check */
+		dr->rtlifetime = new->rtlifetime;
+		dr->expire = new->expire;
 
-			/*
-			 * preferred router may be changed, so relocate
-			 * this router.
-			 * XXX: calling TAILQ_REMOVE directly is a bad manner.
-			 * However, since defrtrlist_del() has many side
-			 * effects, we intentionally do so here.
-			 * defrouter_select() below will handle routing
-			 * changes later.
-			 */
-			TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
-			n = dr;
-			goto insert;
+		/*
+		 * If the preference does not change, there's no need
+		 * to sort the entries. Also make sure the selected
+		 * router is still installed in the kernel.
+		 */
+		if (dr->installed && rtpref(new) == oldpref) {
+			ND6_WUNLOCK();
+			return (dr);
 		}
-		splx(s);
-		return (dr);
-	}
 
-	/* entry does not exist */
-	if (new->rtlifetime == 0) {
-		splx(s);
-		return (NULL);
+		/*
+		 * The preferred router may have changed, so relocate this
+		 * router.
+		 */
+		TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
+		n = dr;
+	} else {
+		n = malloc(sizeof(*n), M_IP6NDP, M_NOWAIT | M_ZERO);
+		if (n == NULL) {
+			ND6_WUNLOCK();
+			return (NULL);
+		}
+		memcpy(n, new, sizeof(*n));
+		/* Initialize with an extra reference for the caller. */
+		refcount_init(&n->refcnt, 2);
 	}
 
-	n = (struct nd_defrouter *)malloc(sizeof(*n), M_IP6NDP, M_NOWAIT);
-	if (n == NULL) {
-		splx(s);
-		return (NULL);
-	}
-	bzero(n, sizeof(*n));
-	*n = *new;
-
-insert:
 	/*
 	 * Insert the new router in the Default Router List;
 	 * The Default Router List should be in the descending order
@@ -804,15 +907,14 @@
 		if (rtpref(n) > rtpref(dr))
 			break;
 	}
-	if (dr)
+	if (dr != NULL)
 		TAILQ_INSERT_BEFORE(dr, n, dr_entry);
 	else
 		TAILQ_INSERT_TAIL(&V_nd_defrouter, n, dr_entry);
+	ND6_WUNLOCK();
 
 	defrouter_select();
 
-	splx(s);
-
 	return (n);
 }
 
@@ -834,11 +936,11 @@
 {
 	struct nd_pfxrouter *new;
 
-	new = (struct nd_pfxrouter *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT);
+	new = malloc(sizeof(*new), M_IP6NDP, M_NOWAIT | M_ZERO);
 	if (new == NULL)
 		return;
-	bzero(new, sizeof(*new));
 	new->router = dr;
+	defrouter_ref(dr);
 
 	LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry);
 
@@ -848,7 +950,9 @@
 static void
 pfxrtr_del(struct nd_pfxrouter *pfr)
 {
+
 	LIST_REMOVE(pfr, pfr_entry);
+	defrouter_rele(pfr->router);
 	free(pfr, M_IP6NDP);
 }
 
@@ -875,13 +979,12 @@
 {
 	struct nd_prefix *new = NULL;
 	int error = 0;
-	int i, s;
+	int i;
 	char ip6buf[INET6_ADDRSTRLEN];
 
-	new = (struct nd_prefix *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT);
+	new = malloc(sizeof(*new), M_IP6NDP, M_NOWAIT | M_ZERO);
 	if (new == NULL)
-		return(ENOMEM);
-	bzero(new, sizeof(*new));
+		return (ENOMEM);
 	new->ndpr_ifp = pr->ndpr_ifp;
 	new->ndpr_prefix = pr->ndpr_prefix;
 	new->ndpr_plen = pr->ndpr_plen;
@@ -890,11 +993,9 @@
 	new->ndpr_flags = pr->ndpr_flags;
 	if ((error = in6_init_prefix_ltimes(new)) != 0) {
 		free(new, M_IP6NDP);
-		return(error);
+		return (error);
 	}
-	new->ndpr_lastupdate = time_second;
-	if (newp != NULL)
-		*newp = new;
+	new->ndpr_lastupdate = time_uptime;
 
 	/* initialization */
 	LIST_INIT(&new->ndpr_advrtrs);
@@ -904,10 +1005,8 @@
 		new->ndpr_prefix.sin6_addr.s6_addr32[i] &=
 		    new->ndpr_mask.s6_addr32[i];
 
-	s = splnet();
 	/* link ndpr_entry to nd_prefix list */
 	LIST_INSERT_HEAD(&V_nd_prefix, new, ndpr_entry);
-	splx(s);
 
 	/* ND_OPT_PI_FLAG_ONLINK processing */
 	if (new->ndpr_raf_onlink) {
@@ -922,10 +1021,11 @@
 		}
 	}
 
-	if (dr)
+	if (dr != NULL)
 		pfxrtr_add(new, dr);
-
-	return 0;
+	if (newp != NULL)
+		*newp = new;
+	return (0);
 }
 
 void
@@ -932,7 +1032,7 @@
 prelist_remove(struct nd_prefix *pr)
 {
 	struct nd_pfxrouter *pfr, *next;
-	int e, s;
+	int e;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	/* make sure to invalidate the prefix until it is really freed. */
@@ -957,17 +1057,13 @@
 	if (pr->ndpr_refcnt > 0)
 		return;		/* notice here? */
 
-	s = splnet();
-
 	/* unlink ndpr_entry from nd_prefix list */
 	LIST_REMOVE(pr, ndpr_entry);
 
-	/* free list of routers that adversed the prefix */
+	/* free list of routers that advertised the prefix */
 	LIST_FOREACH_SAFE(pfr, &pr->ndpr_advrtrs, pfr_entry, next) {
-		free(pfr, M_IP6NDP);
+		pfxrtr_del(pfr);
 	}
-	splx(s);
-
 	free(pr, M_IP6NDP);
 
 	pfxlist_onlink_check();
@@ -985,9 +1081,7 @@
 	struct ifaddr *ifa;
 	struct ifnet *ifp = new->ndpr_ifp;
 	struct nd_prefix *pr;
-	int s = splnet();
 	int error = 0;
-	int newprefix = 0;
 	int auth;
 	struct in6_addrlifetime lt6_tmp;
 	char ip6buf[INET6_ADDRSTRLEN];
@@ -1023,7 +1117,7 @@
 			pr->ndpr_vltime = new->ndpr_vltime;
 			pr->ndpr_pltime = new->ndpr_pltime;
 			(void)in6_init_prefix_ltimes(pr); /* XXX error case? */
-			pr->ndpr_lastupdate = time_second;
+			pr->ndpr_lastupdate = time_uptime;
 		}
 
 		if (new->ndpr_raf_onlink &&
@@ -1045,23 +1139,17 @@
 		if (dr && pfxrtr_lookup(pr, dr) == NULL)
 			pfxrtr_add(pr, dr);
 	} else {
-		struct nd_prefix *newpr = NULL;
-
-		newprefix = 1;
-
 		if (new->ndpr_vltime == 0)
 			goto end;
 		if (new->ndpr_raf_onlink == 0 && new->ndpr_raf_auto == 0)
 			goto end;
 
-		error = nd6_prelist_add(new, dr, &newpr);
-		if (error != 0 || newpr == NULL) {
+		error = nd6_prelist_add(new, dr, &pr);
+		if (error != 0) {
 			nd6log((LOG_NOTICE, "prelist_update: "
-			    "nd6_prelist_add failed for %s/%d on %s "
-			    "errno=%d, returnpr=%p\n",
+			    "nd6_prelist_add failed for %s/%d on %s errno=%d\n",
 			    ip6_sprintf(ip6buf, &new->ndpr_prefix.sin6_addr),
-			    new->ndpr_plen, if_name(new->ndpr_ifp),
-			    error, newpr));
+			    new->ndpr_plen, if_name(new->ndpr_ifp), error));
 			goto end; /* we should just give up in this case. */
 		}
 
@@ -1072,13 +1160,11 @@
 		 * addresses.  Thus, we explicitly make sure that the prefix
 		 * itself expires now.
 		 */
-		if (newpr->ndpr_raf_onlink == 0) {
-			newpr->ndpr_vltime = 0;
-			newpr->ndpr_pltime = 0;
-			in6_init_prefix_ltimes(newpr);
+		if (pr->ndpr_raf_onlink == 0) {
+			pr->ndpr_vltime = 0;
+			pr->ndpr_pltime = 0;
+			in6_init_prefix_ltimes(pr);
 		}
-
-		pr = newpr;
 	}
 
 	/*
@@ -1161,7 +1247,7 @@
 
 		if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME)
 			remaininglifetime = ND6_INFINITE_LIFETIME;
-		else if (time_second - ifa6->ia6_updatetime >
+		else if (time_uptime - ifa6->ia6_updatetime >
 			 lt6_tmp.ia6t_vltime) {
 			/*
 			 * The case of "invalid" address.  We should usually
@@ -1170,7 +1256,7 @@
 			remaininglifetime = 0;
 		} else
 			remaininglifetime = lt6_tmp.ia6t_vltime -
-			    (time_second - ifa6->ia6_updatetime);
+			    (time_uptime - ifa6->ia6_updatetime);
 
 		/* when not updating, keep the current stored lifetime. */
 		lt6_tmp.ia6t_vltime = remaininglifetime;
@@ -1206,18 +1292,18 @@
 			u_int32_t maxvltime, maxpltime;
 
 			if (V_ip6_temp_valid_lifetime >
-			    (u_int32_t)((time_second - ifa6->ia6_createtime) +
+			    (u_int32_t)((time_uptime - ifa6->ia6_createtime) +
 			    V_ip6_desync_factor)) {
 				maxvltime = V_ip6_temp_valid_lifetime -
-				    (time_second - ifa6->ia6_createtime) -
+				    (time_uptime - ifa6->ia6_createtime) -
 				    V_ip6_desync_factor;
 			} else
 				maxvltime = 0;
 			if (V_ip6_temp_preferred_lifetime >
-			    (u_int32_t)((time_second - ifa6->ia6_createtime) +
+			    (u_int32_t)((time_uptime - ifa6->ia6_createtime) +
 			    V_ip6_desync_factor)) {
 				maxpltime = V_ip6_temp_preferred_lifetime -
-				    (time_second - ifa6->ia6_createtime) -
+				    (time_uptime - ifa6->ia6_createtime) -
 				    V_ip6_desync_factor;
 			} else
 				maxpltime = 0;
@@ -1232,7 +1318,7 @@
 			}
 		}
 		ifa6->ia6_lifetime = lt6_tmp;
-		ifa6->ia6_updatetime = time_second;
+		ifa6->ia6_updatetime = time_uptime;
 	}
 	IF_ADDR_RUNLOCK(ifp);
 	if (ia6_match == NULL && new->ndpr_vltime) {
@@ -1310,7 +1396,6 @@
 	}
 
  end:
-	splx(s);
 	return error;
 }
 
@@ -1375,6 +1460,7 @@
 	 * that does not advertise any prefixes.
 	 */
 	if (pr == NULL) {
+		ND6_RLOCK();
 		TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
 			struct nd_prefix *pr0;
 
@@ -1385,6 +1471,7 @@
 			if (pfxrtr != NULL)
 				break;
 		}
+		ND6_RUNLOCK();
 	}
 	if (pr != NULL || (!TAILQ_EMPTY(&V_nd_defrouter) && pfxrtr == NULL)) {
 		/*
@@ -1746,6 +1833,7 @@
 		}
 	}
 	error = a_failure;
+	a_failure = 1;
 	if (error == 0) {
 		pr->ndpr_stateflags &= ~NDPRF_ONLINK;
 
@@ -1784,7 +1872,8 @@
 						&opr->ndpr_prefix.sin6_addr),
 					    opr->ndpr_plen, if_name(ifp),
 					    if_name(opr->ndpr_ifp), e));
-				}
+				} else
+					a_failure = 0;
 			}
 		}
 	} else {
@@ -1796,6 +1885,10 @@
 		    if_name(ifp), error));
 	}
 
+	if (a_failure)
+		lltable_prefix_free(AF_INET6, (struct sockaddr *)&sa6,
+		    (struct sockaddr *)&mask6, LLE_STATIC);
+
 	return (error);
 }
 
@@ -2008,7 +2101,7 @@
 	if (ia0->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
 		vltime0 = IFA6_IS_INVALID(ia0) ? 0 :
 		    (ia0->ia6_lifetime.ia6t_vltime -
-		    (time_second - ia0->ia6_updatetime));
+		    (time_uptime - ia0->ia6_updatetime));
 		if (vltime0 > V_ip6_temp_valid_lifetime)
 			vltime0 = V_ip6_temp_valid_lifetime;
 	} else
@@ -2016,7 +2109,7 @@
 	if (ia0->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
 		pltime0 = IFA6_IS_DEPRECATED(ia0) ? 0 :
 		    (ia0->ia6_lifetime.ia6t_pltime -
-		    (time_second - ia0->ia6_updatetime));
+		    (time_uptime - ia0->ia6_updatetime));
 		if (pltime0 > V_ip6_temp_preferred_lifetime - V_ip6_desync_factor){
 			pltime0 = V_ip6_temp_preferred_lifetime -
 			    V_ip6_desync_factor;
@@ -2074,11 +2167,11 @@
 	if (ndpr->ndpr_pltime == ND6_INFINITE_LIFETIME)
 		ndpr->ndpr_preferred = 0;
 	else
-		ndpr->ndpr_preferred = time_second + ndpr->ndpr_pltime;
+		ndpr->ndpr_preferred = time_uptime + ndpr->ndpr_pltime;
 	if (ndpr->ndpr_vltime == ND6_INFINITE_LIFETIME)
 		ndpr->ndpr_expire = 0;
 	else
-		ndpr->ndpr_expire = time_second + ndpr->ndpr_vltime;
+		ndpr->ndpr_expire = time_uptime + ndpr->ndpr_vltime;
 
 	return 0;
 }
@@ -2090,7 +2183,7 @@
 	if (lt6->ia6t_vltime == ND6_INFINITE_LIFETIME)
 		lt6->ia6t_expire = 0;
 	else {
-		lt6->ia6t_expire = time_second;
+		lt6->ia6t_expire = time_uptime;
 		lt6->ia6t_expire += lt6->ia6t_vltime;
 	}
 
@@ -2098,7 +2191,7 @@
 	if (lt6->ia6t_pltime == ND6_INFINITE_LIFETIME)
 		lt6->ia6t_preferred = 0;
 	else {
-		lt6->ia6t_preferred = time_second;
+		lt6->ia6t_preferred = time_uptime;
 		lt6->ia6t_preferred += lt6->ia6t_pltime;
 	}
 }
@@ -2113,13 +2206,10 @@
 {
 	struct radix_node_head *rnh;
 	u_int fibnum;
-	int s = splnet();
 
 	/* We'll care only link-local addresses */
-	if (!IN6_IS_ADDR_LINKLOCAL(gateway)) {
-		splx(s);
+	if (!IN6_IS_ADDR_LINKLOCAL(gateway))
 		return;
-	}
 
 	/* XXX Do we really need to walk any but the default FIB? */
 	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
@@ -2131,7 +2221,6 @@
 		rnh->rnh_walktree(rnh, rt6_deleteroute, (void *)gateway);
 		RADIX_NODE_HEAD_UNLOCK(rnh);
 	}
-	splx(s);
 }
 
 static int

Modified: trunk/sys/netinet6/pim6.h
===================================================================
--- trunk/sys/netinet6/pim6.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/pim6.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	$KAME: pim6.h,v 1.3 2000/03/25 07:23:58 sumikawa Exp $
- * $FreeBSD: stable/9/sys/netinet6/pim6.h 174510 2007-12-10 16:03:40Z obrien $
+ * $FreeBSD: stable/10/sys/netinet6/pim6.h 174510 2007-12-10 16:03:40Z obrien $
  */
 /*
  * Protocol Independent Multicast (PIM) definitions

Modified: trunk/sys/netinet6/pim6_var.h
===================================================================
--- trunk/sys/netinet6/pim6_var.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/pim6_var.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	$KAME: pim6_var.h,v 1.8 2000/06/06 08:07:43 jinmei Exp $
- * $FreeBSD: stable/9/sys/netinet6/pim6_var.h 244524 2012-12-21 00:41:52Z delphij $
+ * $FreeBSD: stable/10/sys/netinet6/pim6_var.h 254925 2013-08-26 18:16:05Z jhb $
  */
 
 /*
@@ -43,13 +43,13 @@
 #define _NETINET6_PIM6_VAR_H_
 
 struct pim6stat {
-	u_quad_t pim6s_rcv_total;	/* total PIM messages received	*/
-	u_quad_t pim6s_rcv_tooshort;	/* received with too few bytes	*/
-	u_quad_t pim6s_rcv_badsum;	/* received with bad checksum	*/
-	u_quad_t pim6s_rcv_badversion;	/* received bad PIM version	*/
-	u_quad_t pim6s_rcv_registers;	/* received registers		*/
-	u_quad_t pim6s_rcv_badregisters; /* received invalid registers	*/
-	u_quad_t pim6s_snd_registers;	/* sent registers		*/
+	uint64_t pim6s_rcv_total;	/* total PIM messages received	*/
+	uint64_t pim6s_rcv_tooshort;	/* received with too few bytes	*/
+	uint64_t pim6s_rcv_badsum;	/* received with bad checksum	*/
+	uint64_t pim6s_rcv_badversion;	/* received bad PIM version	*/
+	uint64_t pim6s_rcv_registers;	/* received registers		*/
+	uint64_t pim6s_rcv_badregisters; /* received invalid registers	*/
+	uint64_t pim6s_snd_registers;	/* sent registers		*/
 };
 
 #if (defined(KERNEL)) || (defined(_KERNEL))
@@ -62,8 +62,4 @@
 #define PIM6CTL_STATS		1	/* statistics (read-only) */
 #define PIM6CTL_MAXID		2
 
-#define PIM6CTL_NAMES { \
-	{ 0, 0 }, \
-	{ 0, 0 }, \
-}
 #endif /* _NETINET6_PIM6_VAR_H_ */

Modified: trunk/sys/netinet6/raw_ip6.c
===================================================================
--- trunk/sys/netinet6/raw_ip6.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/raw_ip6.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -61,7 +61,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/raw_ip6.c 248549 2013-03-20 09:12:28Z ae $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/raw_ip6.c 329158 2018-02-12 13:52:58Z ae $");
 
 #include "opt_ipsec.h"
 #include "opt_inet6.h"
@@ -69,6 +69,7 @@
 #include <sys/param.h>
 #include <sys/errno.h>
 #include <sys/jail.h>
+#include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
@@ -125,8 +126,13 @@
 extern u_long	rip_sendspace;
 extern u_long	rip_recvspace;
 
-VNET_DEFINE(struct rip6stat, rip6stat);
+VNET_PCPUSTAT_DEFINE(struct rip6stat, rip6stat);
+VNET_PCPUSTAT_SYSINIT(rip6stat);
 
+#ifdef VIMAGE
+VNET_PCPUSTAT_SYSUNINIT(rip6stat);
+#endif /* VIMAGE */
+
 /*
  * Hooks for multicast routing. They all default to NULL, so leave them not
  * initialized and rely on BSS being set to 0.
@@ -161,7 +167,7 @@
 	struct mbuf *opts = NULL;
 	struct sockaddr_in6 fromsa;
 
-	V_rip6stat.rip6s_ipackets++;
+	RIP6STAT_INC(rip6s_ipackets);
 
 	if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) {
 		/* XXX Send icmp6 host/port unreach? */
@@ -200,11 +206,11 @@
 		}
 		INP_RLOCK(in6p);
 		if (in6p->in6p_cksum != -1) {
-			V_rip6stat.rip6s_isum++;
+			RIP6STAT_INC(rip6s_isum);
 			if (in6_cksum(m, proto, *offp,
 			    m->m_pkthdr.len - *offp)) {
 				INP_RUNLOCK(in6p);
-				V_rip6stat.rip6s_badsum++;
+				RIP6STAT_INC(rip6s_badsum);
 				continue;
 			}
 		}
@@ -264,7 +270,7 @@
 			 */
 			if (n && ipsec6_in_reject(n, last)) {
 				m_freem(n);
-				V_ipsec6stat.in_polvio++;
+				IPSEC6STAT_INC(ips_in_polvio);
 				/* Do not inject data into pcb. */
 			} else
 #endif /* IPSEC */
@@ -280,7 +286,7 @@
 					m_freem(n);
 					if (opts)
 						m_freem(opts);
-					V_rip6stat.rip6s_fullsock++;
+					RIP6STAT_INC(rip6s_fullsock);
 				} else
 					sorwakeup(last->inp_socket);
 				opts = NULL;
@@ -296,7 +302,7 @@
 	 */
 	if ((last != NULL) && ipsec6_in_reject(m, last)) {
 		m_freem(m);
-		V_ipsec6stat.in_polvio++;
+		IPSEC6STAT_INC(ips_in_polvio);
 		IP6STAT_DEC(ip6s_delivered);
 		/* Do not inject data into pcb. */
 		INP_RUNLOCK(last);
@@ -313,22 +319,20 @@
 			m_freem(m);
 			if (opts)
 				m_freem(opts);
-			V_rip6stat.rip6s_fullsock++;
+			RIP6STAT_INC(rip6s_fullsock);
 		} else
 			sorwakeup(last->inp_socket);
 		INP_RUNLOCK(last);
 	} else {
-		V_rip6stat.rip6s_nosock++;
+		RIP6STAT_INC(rip6s_nosock);
 		if (m->m_flags & M_MCAST)
-			V_rip6stat.rip6s_nosockmcast++;
+			RIP6STAT_INC(rip6s_nosockmcast);
 		if (proto == IPPROTO_NONE)
 			m_freem(m);
-		else {
-			char *prvnxtp = ip6_get_prevhdr(m, *offp); /* XXX */
+		else
 			icmp6_error(m, ICMP6_PARAM_PROB,
 			    ICMP6_PARAMPROB_NEXTHEADER,
-			    prvnxtp - mtod(m, char *));
-		}
+			    ip6_get_prevhdr(m, *offp));
 		IP6STAT_DEC(ip6s_delivered);
 	}
 	return (IPPROTO_DONE);
@@ -460,7 +464,7 @@
 		code = icmp6->icmp6_code;
 	}
 
-	M_PREPEND(m, sizeof(*ip6), M_DONTWAIT);
+	M_PREPEND(m, sizeof(*ip6), M_NOWAIT);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto bad;
@@ -560,7 +564,7 @@
 			icmp6_ifoutstat_inc(oifp, type, code);
 		ICMP6STAT_INC(icp6s_outhist[type]);
 	} else
-		V_rip6stat.rip6s_opackets++;
+		RIP6STAT_INC(rip6s_opackets);
 
 	goto freectl;
 

Modified: trunk/sys/netinet6/raw_ip6.h
===================================================================
--- trunk/sys/netinet6/raw_ip6.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/raw_ip6.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	$KAME: raw_ip6.h,v 1.2 2001/05/27 13:28:35 itojun Exp $
- * $FreeBSD: stable/9/sys/netinet6/raw_ip6.h 195727 2009-07-16 21:13:04Z rwatson $
+ * $FreeBSD: stable/10/sys/netinet6/raw_ip6.h 253085 2013-07-09 09:54:54Z ae $
  */
 
 #ifndef _NETINET6_RAW_IP6_H_
@@ -38,19 +38,23 @@
  * ICMPv6 stat is counted separately.  see netinet/icmp6.h
  */
 struct rip6stat {
-	u_quad_t rip6s_ipackets;	/* total input packets */
-	u_quad_t rip6s_isum;		/* input checksum computations */
-	u_quad_t rip6s_badsum;		/* of above, checksum error */
-	u_quad_t rip6s_nosock;		/* no matching socket */
-	u_quad_t rip6s_nosockmcast;	/* of above, arrived as multicast */
-	u_quad_t rip6s_fullsock;	/* not delivered, input socket full */
+	uint64_t rip6s_ipackets;	/* total input packets */
+	uint64_t rip6s_isum;		/* input checksum computations */
+	uint64_t rip6s_badsum;		/* of above, checksum error */
+	uint64_t rip6s_nosock;		/* no matching socket */
+	uint64_t rip6s_nosockmcast;	/* of above, arrived as multicast */
+	uint64_t rip6s_fullsock;	/* not delivered, input socket full */
 
-	u_quad_t rip6s_opackets;	/* total output packets */
+	uint64_t rip6s_opackets;	/* total output packets */
 };
 
 #ifdef _KERNEL
-VNET_DECLARE(struct rip6stat, rip6stat);
-#define	V_rip6stat			VNET(rip6stat)
-#endif
+#include <sys/counter.h>
 
+VNET_PCPUSTAT_DECLARE(struct rip6stat, rip6stat);
+#define	RIP6STAT_ADD(name, val)	\
+    VNET_PCPUSTAT_ADD(struct rip6stat, rip6stat, name, (val))
+#define	RIP6STAT_INC(name)	RIP6STAT_ADD(name, 1)
+#endif /* _KERNEL */
+
 #endif

Modified: trunk/sys/netinet6/route6.c
===================================================================
--- trunk/sys/netinet6/route6.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/route6.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/route6.c 238231 2012-07-08 11:28:33Z bz $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/route6.c 249294 2013-04-09 07:11:22Z ae $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"

Modified: trunk/sys/netinet6/scope6.c
===================================================================
--- trunk/sys/netinet6/scope6.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/scope6.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,14 +31,16 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/scope6.c 243382 2012-11-22 00:22:54Z ae $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/scope6.c 299145 2016-05-05 23:06:39Z markj $");
 
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
+#include <sys/sockio.h>
 #include <sys/systm.h>
 #include <sys/queue.h>
+#include <sys/sysctl.h>
 #include <sys/syslog.h>
 
 #include <net/if.h>
@@ -56,6 +58,11 @@
 #else
 VNET_DEFINE(int, ip6_use_defzone) = 0;
 #endif
+VNET_DEFINE(int, deembed_scopeid) = 1;
+SYSCTL_DECL(_net_inet6_ip6);
+SYSCTL_VNET_INT(_net_inet6_ip6, OID_AUTO, deembed_scopeid, CTLFLAG_RW,
+    &VNET_NAME(deembed_scopeid), 0,
+    "Extract embedded zone ID and set it to sin6_scope_id in sockaddr_in6.");
 
 /*
  * The scope6_lock protects the global sid default stored in
@@ -73,6 +80,9 @@
 #define SID(ifp) \
 	(((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->scope6_id)
 
+static int	scope6_get(struct ifnet *, struct scope6_id *);
+static int	scope6_set(struct ifnet *, struct scope6_id *);
+
 void
 scope6_init(void)
 {
@@ -116,6 +126,30 @@
 }
 
 int
+scope6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
+{
+	struct in6_ifreq *ifr;
+
+	if (ifp->if_afdata[AF_INET6] == NULL)
+		return (EPFNOSUPPORT);
+
+	ifr = (struct in6_ifreq *)data;
+	switch (cmd) {
+	case SIOCSSCOPE6:
+		return (scope6_set(ifp,
+		    (struct scope6_id *)ifr->ifr_ifru.ifru_scope_id));
+	case SIOCGSCOPE6:
+		return (scope6_get(ifp,
+		    (struct scope6_id *)ifr->ifr_ifru.ifru_scope_id));
+	case SIOCGSCOPE6DEF:
+		return (scope6_get_default(
+		    (struct scope6_id *)ifr->ifr_ifru.ifru_scope_id));
+	default:
+		return (EOPNOTSUPP);
+	}
+}
+
+static int
 scope6_set(struct ifnet *ifp, struct scope6_id *idlist)
 {
 	int i;
@@ -178,7 +212,7 @@
 	return (error);
 }
 
-int
+static int
 scope6_get(struct ifnet *ifp, struct scope6_id *idlist)
 {
 	struct scope6_id *sid;
@@ -197,7 +231,6 @@
 	return (0);
 }
 
-
 /*
  * Get a scope of the address. Node-local, link-local, site-local or global.
  */
@@ -331,7 +364,6 @@
 int
 sa6_embedscope(struct sockaddr_in6 *sin6, int defaultok)
 {
-	struct ifnet *ifp;
 	u_int32_t zoneid;
 
 	if ((zoneid = sin6->sin6_scope_id) == 0 && defaultok)
@@ -346,15 +378,11 @@
 		 * zone IDs assuming a one-to-one mapping between interfaces
 		 * and links.
 		 */
-		if (V_if_index < zoneid)
+		if (V_if_index < zoneid || ifnet_byindex(zoneid) == NULL)
 			return (ENXIO);
-		ifp = ifnet_byindex(zoneid);
-		if (ifp == NULL) /* XXX: this can happen for some OS */
-			return (ENXIO);
 
 		/* XXX assignment to 16bit from 32bit variable */
 		sin6->sin6_addr.s6_addr16[1] = htons(zoneid & 0xffff);
-
 		sin6->sin6_scope_id = 0;
 	}
 
@@ -370,12 +398,6 @@
 	char ip6buf[INET6_ADDRSTRLEN];
 	u_int32_t zoneid;
 
-	if (sin6->sin6_scope_id != 0) {
-		log(LOG_NOTICE,
-		    "sa6_recoverscope: assumption failure (non 0 ID): %s%%%d\n",
-		    ip6_sprintf(ip6buf, &sin6->sin6_addr), sin6->sin6_scope_id);
-		/* XXX: proceed anyway... */
-	}
 	if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) ||
 	    IN6_IS_ADDR_MC_INTFACELOCAL(&sin6->sin6_addr)) {
 		/*
@@ -386,8 +408,19 @@
 			/* sanity check */
 			if (V_if_index < zoneid)
 				return (ENXIO);
+#if 0
+			/* XXX: Disabled due to possible deadlock. */
 			if (!ifnet_byindex(zoneid))
 				return (ENXIO);
+#endif
+			if (sin6->sin6_scope_id != 0 &&
+			    zoneid != sin6->sin6_scope_id) {
+				log(LOG_NOTICE,
+				    "%s: embedded scope mismatch: %s%%%d. "
+				    "sin6_scope_id was overridden\n", __func__,
+				    ip6_sprintf(ip6buf, &sin6->sin6_addr),
+				    sin6->sin6_scope_id);
+			}
 			sin6->sin6_addr.s6_addr16[1] = 0;
 			sin6->sin6_scope_id = zoneid;
 		}
@@ -410,63 +443,35 @@
 	u_int32_t zoneid = 0;
 	struct scope6_id *sid;
 
-	IF_AFDATA_RLOCK(ifp);
-
-	sid = SID(ifp);
-
-#ifdef DIAGNOSTIC
-	if (sid == NULL) { /* should not happen */
-		panic("in6_setscope: scope array is NULL");
-		/* NOTREACHED */
-	}
-#endif
-
 	/*
 	 * special case: the loopback address can only belong to a loopback
 	 * interface.
 	 */
 	if (IN6_IS_ADDR_LOOPBACK(in6)) {
-		if (!(ifp->if_flags & IFF_LOOPBACK)) {
-			IF_AFDATA_RUNLOCK(ifp);
+		if (!(ifp->if_flags & IFF_LOOPBACK))
 			return (EINVAL);
-		} else {
-			if (ret_id != NULL)
-				*ret_id = 0; /* there's no ambiguity */
+	} else {
+		scope = in6_addrscope(in6);
+		if (scope == IPV6_ADDR_SCOPE_INTFACELOCAL ||
+		    scope == IPV6_ADDR_SCOPE_LINKLOCAL) {
+			/*
+			 * Currently we use interface indeces as the
+			 * zone IDs for interface-local and link-local
+			 * scopes.
+			 */
+			zoneid = ifp->if_index;
+			in6->s6_addr16[1] = htons(zoneid & 0xffff); /* XXX */
+		} else if (scope != IPV6_ADDR_SCOPE_GLOBAL) {
+			IF_AFDATA_RLOCK(ifp);
+			sid = SID(ifp);
+			zoneid = sid->s6id_list[scope];
 			IF_AFDATA_RUNLOCK(ifp);
-			return (0);
 		}
 	}
 
-	scope = in6_addrscope(in6);
-	switch (scope) {
-	case IPV6_ADDR_SCOPE_INTFACELOCAL: /* should be interface index */
-		zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL];
-		break;
-
-	case IPV6_ADDR_SCOPE_LINKLOCAL:
-		zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL];
-		break;
-
-	case IPV6_ADDR_SCOPE_SITELOCAL:
-		zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_SITELOCAL];
-		break;
-
-	case IPV6_ADDR_SCOPE_ORGLOCAL:
-		zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_ORGLOCAL];
-		break;
-
-	default:
-		zoneid = 0;	/* XXX: treat as global. */
-		break;
-	}
-	IF_AFDATA_RUNLOCK(ifp);
-
 	if (ret_id != NULL)
 		*ret_id = zoneid;
 
-	if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6))
-		in6->s6_addr16[1] = htons(zoneid & 0xffff); /* XXX */
-
 	return (0);
 }
 

Modified: trunk/sys/netinet6/scope6_var.h
===================================================================
--- trunk/sys/netinet6/scope6_var.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/scope6_var.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
  * SUCH DAMAGE.
  *
  *	$KAME: scope6_var.h,v 1.4 2000/05/18 15:03:27 jinmei Exp $
- * $FreeBSD: stable/9/sys/netinet6/scope6_var.h 244524 2012-12-21 00:41:52Z delphij $
+ * $FreeBSD: stable/10/sys/netinet6/scope6_var.h 271185 2014-09-06 04:39:26Z markj $
  */
 
 #ifndef _NETINET6_SCOPE6_VAR_H_
@@ -35,6 +35,8 @@
 #define _NETINET6_SCOPE6_VAR_H_
 
 #ifdef _KERNEL
+#include <net/vnet.h>
+
 struct scope6_id {
 	/*
 	 * 16 is correspondent to 4bit multicast scope field.
@@ -43,11 +45,13 @@
 	u_int32_t s6id_list[16];
 };
 
+VNET_DECLARE(int, deembed_scopeid);
+#define V_deembed_scopeid       VNET(deembed_scopeid)
+
 void	scope6_init(void);
 struct scope6_id *scope6_ifattach(struct ifnet *);
 void	scope6_ifdetach(struct scope6_id *);
-int	scope6_set(struct ifnet *, struct scope6_id *);
-int	scope6_get(struct ifnet *, struct scope6_id *);
+int	scope6_ioctl(u_long cmd, caddr_t data, struct ifnet *);
 void	scope6_setdefault(struct ifnet *);
 int	scope6_get_default(struct scope6_id *);
 u_int32_t scope6_addr2default(struct in6_addr *);

Modified: trunk/sys/netinet6/sctp6_usrreq.c
===================================================================
--- trunk/sys/netinet6/sctp6_usrreq.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/sctp6_usrreq.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/sctp6_usrreq.c 238613 2012-07-19 09:32:59Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/sctp6_usrreq.c 296052 2016-02-25 18:46:06Z tuexen $");
 
 #include <netinet/sctp_os.h>
 #ifdef INET6
@@ -40,9 +40,7 @@
 #include <netinet/sctp_pcb.h>
 #include <netinet/sctp_header.h>
 #include <netinet/sctp_var.h>
-#ifdef INET6
 #include <netinet6/sctp6_var.h>
-#endif
 #include <netinet/sctp_sysctl.h>
 #include <netinet/sctp_output.h>
 #include <netinet/sctp_uio.h>
@@ -55,261 +53,142 @@
 #include <netinet/sctp_output.h>
 #include <netinet/sctp_bsd_addr.h>
 #include <netinet/sctp_crc32.h>
+#include <netinet/icmp6.h>
 #include <netinet/udp.h>
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
-#ifdef INET6
 #include <netipsec/ipsec6.h>
-#endif				/* INET6 */
 #endif				/* IPSEC */
 
 extern struct protosw inetsw[];
 
 int
-sctp6_input(struct mbuf **i_pak, int *offp, int proto)
+sctp6_input_with_port(struct mbuf **i_pak, int *offp, uint16_t port)
 {
 	struct mbuf *m;
+	int iphlen;
+	uint32_t vrf_id;
+	uint8_t ecn_bits;
+	struct sockaddr_in6 src, dst;
 	struct ip6_hdr *ip6;
 	struct sctphdr *sh;
-	struct sctp_inpcb *in6p = NULL;
-	struct sctp_nets *net;
-	int refcount_up = 0;
-	uint32_t vrf_id = 0;
-
-#ifdef IPSEC
-	struct inpcb *in6p_ip;
-
-#endif
 	struct sctp_chunkhdr *ch;
-	int length, offset, iphlen;
-	uint8_t ecn_bits;
-	struct sctp_tcb *stcb = NULL;
-	int pkt_len = 0;
-	uint32_t mflowid;
-	uint8_t use_mflowid;
+	int length, offset;
 
 #if !defined(SCTP_WITH_NO_CSUM)
-	uint32_t check, calc_check;
+	uint8_t compute_crc;
 
 #endif
-	int off = *offp;
-	uint16_t port = 0;
+	uint32_t mflowid;
+	uint8_t mflowtype;
+	uint16_t fibnum;
 
-	/* get the VRF and table id's */
+	iphlen = *offp;
 	if (SCTP_GET_PKT_VRFID(*i_pak, vrf_id)) {
 		SCTP_RELEASE_PKT(*i_pak);
-		return (-1);
+		return (IPPROTO_DONE);
 	}
 	m = SCTP_HEADER_TO_CHAIN(*i_pak);
-	pkt_len = SCTP_HEADER_LEN((*i_pak));
-
-#ifdef  SCTP_PACKET_LOGGING
-	sctp_packet_log(m, pkt_len);
+#ifdef SCTP_MBUF_LOGGING
+	/* Log in any input mbufs */
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
+		sctp_log_mbc(m, SCTP_MBUF_INPUT);
+	}
 #endif
-	if (m->m_flags & M_FLOWID) {
-		mflowid = m->m_pkthdr.flowid;
-		use_mflowid = 1;
-	} else {
-		mflowid = 0;
-		use_mflowid = 0;
+#ifdef SCTP_PACKET_LOGGING
+	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) {
+		sctp_packet_log(m);
 	}
+#endif
+	SCTPDBG(SCTP_DEBUG_CRCOFFLOAD,
+	    "sctp6_input(): Packet of length %d received on %s with csum_flags 0x%b.\n",
+	    m->m_pkthdr.len,
+	    if_name(m->m_pkthdr.rcvif),
+	    (int)m->m_pkthdr.csum_flags, CSUM_BITS);
+	mflowid = m->m_pkthdr.flowid;
+	mflowtype = M_HASHTYPE_GET(m);
+	fibnum = M_GETFIB(m);
+	SCTP_STAT_INCR(sctps_recvpackets);
+	SCTP_STAT_INCR_COUNTER64(sctps_inpackets);
+	/* Get IP, SCTP, and first chunk header together in the first mbuf. */
+	offset = iphlen + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
 	ip6 = mtod(m, struct ip6_hdr *);
-	/* Ensure that (sctphdr + sctp_chunkhdr) in a row. */
-	IP6_EXTHDR_GET(sh, struct sctphdr *, m, off,
-	    (int)(sizeof(*sh) + sizeof(*ch)));
+	IP6_EXTHDR_GET(sh, struct sctphdr *, m, iphlen,
+	    (int)(sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr)));
 	if (sh == NULL) {
 		SCTP_STAT_INCR(sctps_hdrops);
 		return (IPPROTO_DONE);
 	}
 	ch = (struct sctp_chunkhdr *)((caddr_t)sh + sizeof(struct sctphdr));
-	iphlen = off;
-	offset = iphlen + sizeof(*sh) + sizeof(*ch);
-	SCTPDBG(SCTP_DEBUG_INPUT1,
-	    "sctp6_input() length:%d iphlen:%d\n", pkt_len, iphlen);
-
-
-#if defined(NFAITH) && NFAITH > 0
-
-	if (faithprefix_p != NULL && (*faithprefix_p) (&ip6->ip6_dst)) {
+	offset -= sizeof(struct sctp_chunkhdr);
+	memset(&src, 0, sizeof(struct sockaddr_in6));
+	src.sin6_family = AF_INET6;
+	src.sin6_len = sizeof(struct sockaddr_in6);
+	src.sin6_port = sh->src_port;
+	src.sin6_addr = ip6->ip6_src;
+	if (in6_setscope(&src.sin6_addr, m->m_pkthdr.rcvif, NULL) != 0) {
+		goto out;
+	}
+	memset(&dst, 0, sizeof(struct sockaddr_in6));
+	dst.sin6_family = AF_INET6;
+	dst.sin6_len = sizeof(struct sockaddr_in6);
+	dst.sin6_port = sh->dest_port;
+	dst.sin6_addr = ip6->ip6_dst;
+	if (in6_setscope(&dst.sin6_addr, m->m_pkthdr.rcvif, NULL) != 0) {
+		goto out;
+	}
+	if (faithprefix_p != NULL && (*faithprefix_p) (&dst.sin6_addr)) {
 		/* XXX send icmp6 host/port unreach? */
-		goto bad;
+		goto out;
 	}
-#endif				/* NFAITH defined and > 0 */
-	SCTP_STAT_INCR(sctps_recvpackets);
-	SCTP_STAT_INCR_COUNTER64(sctps_inpackets);
-	SCTPDBG(SCTP_DEBUG_INPUT1, "V6 input gets a packet iphlen:%d pktlen:%d\n",
-	    iphlen, pkt_len);
+	length = ntohs(ip6->ip6_plen) + iphlen;
+	/* Validate mbuf chain length with IP payload length. */
+	if (SCTP_HEADER_LEN(m) != length) {
+		SCTPDBG(SCTP_DEBUG_INPUT1,
+		    "sctp6_input() length:%d reported length:%d\n", length, SCTP_HEADER_LEN(m));
+		SCTP_STAT_INCR(sctps_hdrops);
+		goto out;
+	}
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
-		/* No multi-cast support in SCTP */
-		goto bad;
+		goto out;
 	}
-	/* destination port of 0 is illegal, based on RFC2960. */
-	if (sh->dest_port == 0)
-		goto bad;
-
-	SCTPDBG(SCTP_DEBUG_CRCOFFLOAD,
-	    "sctp_input(): Packet of length %d received on %s with csum_flags 0x%x.\n",
-	    m->m_pkthdr.len,
-	    if_name(m->m_pkthdr.rcvif),
-	    m->m_pkthdr.csum_flags);
+	ecn_bits = ((ntohl(ip6->ip6_flow) >> 20) & 0x000000ff);
 #if defined(SCTP_WITH_NO_CSUM)
 	SCTP_STAT_INCR(sctps_recvnocrc);
 #else
 	if (m->m_pkthdr.csum_flags & CSUM_SCTP_VALID) {
 		SCTP_STAT_INCR(sctps_recvhwcrc);
-		goto sctp_skip_csum;
+		compute_crc = 0;
+	} else {
+		SCTP_STAT_INCR(sctps_recvswcrc);
+		compute_crc = 1;
 	}
-	check = sh->checksum;	/* save incoming checksum */
-	sh->checksum = 0;	/* prepare for calc */
-	calc_check = sctp_calculate_cksum(m, iphlen);
-	SCTP_STAT_INCR(sctps_recvswcrc);
-	if (calc_check != check) {
-		SCTPDBG(SCTP_DEBUG_INPUT1, "Bad CSUM on SCTP packet calc_check:%x check:%x  m:%p phlen:%d\n",
-		    calc_check, check, m, iphlen);
-		stcb = sctp_findassociation_addr(m, offset - sizeof(*ch),
-		    sh, ch, &in6p, &net, vrf_id);
-		if ((net) && (port)) {
-			if (net->port == 0) {
-				sctp_pathmtu_adjustment(stcb, net->mtu - sizeof(struct udphdr));
-			}
-			net->port = port;
-		}
-		if ((net != NULL) && (use_mflowid != 0)) {
-			net->flowid = mflowid;
-#ifdef INVARIANTS
-			net->flowidset = 1;
 #endif
-		}
-		/* in6p's ref-count increased && stcb locked */
-		if ((in6p) && (stcb)) {
-			sctp_send_packet_dropped(stcb, net, m, iphlen, 1);
-			sctp_chunk_output((struct sctp_inpcb *)in6p, stcb, SCTP_OUTPUT_FROM_INPUT_ERROR, SCTP_SO_NOT_LOCKED);
-		} else if ((in6p != NULL) && (stcb == NULL)) {
-			refcount_up = 1;
-		}
-		SCTP_STAT_INCR(sctps_badsum);
-		SCTP_STAT_INCR_COUNTER32(sctps_checksumerrors);
-		goto bad;
-	}
-	sh->checksum = calc_check;
-
-sctp_skip_csum:
+	sctp_common_input_processing(&m, iphlen, offset, length,
+	    (struct sockaddr *)&src,
+	    (struct sockaddr *)&dst,
+	    sh, ch,
+#if !defined(SCTP_WITH_NO_CSUM)
+	    compute_crc,
 #endif
-	net = NULL;
-	/*
-	 * Locate pcb and tcb for datagram sctp_findassociation_addr() wants
-	 * IP/SCTP/first chunk header...
-	 */
-	stcb = sctp_findassociation_addr(m, offset - sizeof(*ch),
-	    sh, ch, &in6p, &net, vrf_id);
-	if ((net) && (port)) {
-		if (net->port == 0) {
-			sctp_pathmtu_adjustment(stcb, net->mtu - sizeof(struct udphdr));
-		}
-		net->port = port;
-	}
-	if ((net != NULL) && (use_mflowid != 0)) {
-		net->flowid = mflowid;
-#ifdef INVARIANTS
-		net->flowidset = 1;
-#endif
-	}
-	/* in6p's ref-count increased */
-	if (in6p == NULL) {
-		struct sctp_init_chunk *init_chk, chunk_buf;
-
-		SCTP_STAT_INCR(sctps_noport);
-		if (ch->chunk_type == SCTP_INITIATION) {
-			/*
-			 * we do a trick here to get the INIT tag, dig in
-			 * and get the tag from the INIT and put it in the
-			 * common header.
-			 */
-			init_chk = (struct sctp_init_chunk *)sctp_m_getptr(m,
-			    iphlen + sizeof(*sh), sizeof(*init_chk),
-			    (uint8_t *) & chunk_buf);
-			if (init_chk)
-				sh->v_tag = init_chk->init.initiate_tag;
-			else
-				sh->v_tag = 0;
-		}
-		if (ch->chunk_type == SCTP_SHUTDOWN_ACK) {
-			sctp_send_shutdown_complete2(m, sh,
-			    use_mflowid, mflowid,
-			    vrf_id, port);
-			goto bad;
-		}
-		if (ch->chunk_type == SCTP_SHUTDOWN_COMPLETE) {
-			goto bad;
-		}
-		if (ch->chunk_type != SCTP_ABORT_ASSOCIATION) {
-			if ((SCTP_BASE_SYSCTL(sctp_blackhole) == 0) ||
-			    ((SCTP_BASE_SYSCTL(sctp_blackhole) == 1) &&
-			    (ch->chunk_type != SCTP_INIT))) {
-				sctp_send_abort(m, iphlen, sh, 0, NULL,
-				    use_mflowid, mflowid,
-				    vrf_id, port);
-			}
-		}
-		goto bad;
-	} else if (stcb == NULL) {
-		refcount_up = 1;
-	}
-#ifdef IPSEC
-	/*
-	 * Check AH/ESP integrity.
-	 */
-	in6p_ip = (struct inpcb *)in6p;
-	if (in6p_ip && (ipsec6_in_reject(m, in6p_ip))) {
-/* XXX */
-		MODULE_GLOBAL(ipsec6stat).in_polvio++;
-		goto bad;
-	}
-#endif				/* IPSEC */
-
-	/*
-	 * CONTROL chunk processing
-	 */
-	offset -= sizeof(*ch);
-	ecn_bits = ((ntohl(ip6->ip6_flow) >> 20) & 0x000000ff);
-
-	/* Length now holds the total packet length payload + iphlen */
-	length = ntohs(ip6->ip6_plen) + iphlen;
-
-	/* sa_ignore NO_NULL_CHK */
-	sctp_common_input_processing(&m, iphlen, offset, length, sh, ch,
-	    in6p, stcb, net, ecn_bits,
-	    use_mflowid, mflowid,
+	    ecn_bits,
+	    mflowtype, mflowid, fibnum,
 	    vrf_id, port);
-	/* inp's ref-count reduced && stcb unlocked */
-	/* XXX this stuff below gets moved to appropriate parts later... */
-	if (m)
+out:
+	if (m) {
 		sctp_m_freem(m);
-	if ((in6p) && refcount_up) {
-		/* reduce ref-count */
-		SCTP_INP_WLOCK(in6p);
-		SCTP_INP_DECR_REF(in6p);
-		SCTP_INP_WUNLOCK(in6p);
 	}
 	return (IPPROTO_DONE);
+}
 
-bad:
-	if (stcb) {
-		SCTP_TCB_UNLOCK(stcb);
-	}
-	if ((in6p) && refcount_up) {
-		/* reduce ref-count */
-		SCTP_INP_WLOCK(in6p);
-		SCTP_INP_DECR_REF(in6p);
-		SCTP_INP_WUNLOCK(in6p);
-	}
-	if (m)
-		sctp_m_freem(m);
-	return (IPPROTO_DONE);
+
+int
+sctp6_input(struct mbuf **i_pak, int *offp, int proto SCTP_UNUSED)
+{
+	return (sctp6_input_with_port(i_pak, offp, 0));
 }
 
-
 static void
 sctp6_notify_mbuf(struct sctp_inpcb *inp, struct icmp6_hdr *icmp6,
     struct sctphdr *sh, struct sctp_tcb *stcb, struct sctp_nets *net)
@@ -336,7 +215,8 @@
 	 */
 	nxtsz = ntohl(icmp6->icmp6_mtu);
 	/* Stop any PMTU timer */
-	sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, NULL, SCTP_FROM_SCTP6_USRREQ + SCTP_LOC_1);
+	sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, NULL,
+	    SCTP_FROM_SCTP6_USRREQ + SCTP_LOC_1);
 
 	/* Adjust destination size limit */
 	if (net->mtu > nxtsz) {
@@ -395,7 +275,7 @@
     struct sctp_tcb *stcb,
     struct sctp_nets *net)
 {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	struct socket *so;
 
 #endif
@@ -450,7 +330,7 @@
 		 * with no TCB
 		 */
 		sctp_abort_notification(stcb, 1, 0, NULL, SCTP_SO_NOT_LOCKED);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		so = SCTP_INP_SO(inp);
 		atomic_add_int(&stcb->asoc.refcnt, 1);
 		SCTP_TCB_UNLOCK(stcb);
@@ -458,8 +338,9 @@
 		SCTP_TCB_LOCK(stcb);
 		atomic_subtract_int(&stcb->asoc.refcnt, 1);
 #endif
-		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_2);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+		    SCTP_FROM_SCTP6_USRREQ + SCTP_LOC_2);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_SOCKET_UNLOCK(so, 1);
 		/* SCTP_TCB_UNLOCK(stcb); MT: I think this is not needed. */
 #endif
@@ -503,7 +384,6 @@
 		 * XXX: We assume that when IPV6 is non NULL, M and OFF are
 		 * valid.
 		 */
-		/* check if we can safely examine src and dst ports */
 		struct sctp_inpcb *inp = NULL;
 		struct sctp_tcb *stcb = NULL;
 		struct sctp_nets *net = NULL;
@@ -512,6 +392,10 @@
 		if (ip6cp->ip6c_m == NULL)
 			return;
 
+		/* Check if we can safely examine the SCTP header. */
+		if (ip6cp->ip6c_m->m_pkthdr.len < ip6cp->ip6c_off + sizeof(sh))
+			return;
+
 		bzero(&sh, sizeof(sh));
 		bzero(&final, sizeof(final));
 		inp = NULL;
@@ -523,8 +407,8 @@
 		final.sin6_family = AF_INET6;
 		final.sin6_addr = ((struct sockaddr_in6 *)pktdst)->sin6_addr;
 		final.sin6_port = sh.dest_port;
-		stcb = sctp_findassociation_addr_sa((struct sockaddr *)ip6cp->ip6c_src,
-		    (struct sockaddr *)&final,
+		stcb = sctp_findassociation_addr_sa((struct sockaddr *)&final,
+		    (struct sockaddr *)ip6cp->ip6c_src,
 		    &inp, &net, 1, vrf_id);
 		/* inp's ref-count increased && stcb locked */
 		if (stcb != NULL && inp && (inp->sctp_socket != NULL)) {
@@ -591,8 +475,8 @@
 	if (error)
 		return (error);
 
-	stcb = sctp_findassociation_addr_sa(sin6tosa(&addrs[0]),
-	    sin6tosa(&addrs[1]),
+	stcb = sctp_findassociation_addr_sa(sin6tosa(&addrs[1]),
+	    sin6tosa(&addrs[0]),
 	    &inp, &net, 1, vrf_id);
 	if (stcb == NULL || inp == NULL || inp->sctp_socket == NULL) {
 		if ((inp != NULL) && (stcb == NULL)) {
@@ -897,18 +781,11 @@
 		}
 	}
 	if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
-		if (!MODULE_GLOBAL(ip6_v6only)) {
-			struct sockaddr_in sin;
+		struct sockaddr_in sin;
 
-			/* convert v4-mapped into v4 addr and send */
-			in6_sin6_2_sin(&sin, sin6);
-			return (sctp_sendm(so, flags, m, (struct sockaddr *)&sin,
-			    control, p));
-		} else {
-			/* mapped addresses aren't enabled */
-			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL);
-			return (EINVAL);
-		}
+		/* convert v4-mapped into v4 addr and send */
+		in6_sin6_2_sin(&sin, sin6);
+		return (sctp_sendm(so, flags, m, (struct sockaddr *)&sin, control, p));
 	}
 #endif				/* INET */
 connected_type:
@@ -956,16 +833,18 @@
 	uint32_t vrf_id;
 	int error = 0;
 	struct sctp_inpcb *inp;
-	struct in6pcb *inp6;
 	struct sctp_tcb *stcb;
 
 #ifdef INET
+	struct in6pcb *inp6;
 	struct sockaddr_in6 *sin6;
-	struct sockaddr_storage ss;
+	union sctp_sockstore store;
 
 #endif
 
+#ifdef INET
 	inp6 = (struct in6pcb *)so->so_pcb;
+#endif
 	inp = (struct sctp_inpcb *)so->so_pcb;
 	if (inp == NULL) {
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ECONNRESET);
@@ -1042,17 +921,9 @@
 		}
 	}
 	if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
-		if (!MODULE_GLOBAL(ip6_v6only)) {
-			/* convert v4-mapped into v4 addr */
-			in6_sin6_2_sin((struct sockaddr_in *)&ss, sin6);
-			addr = (struct sockaddr *)&ss;
-		} else {
-			/* mapped addresses aren't enabled */
-			SCTP_INP_RUNLOCK(inp);
-			SCTP_ASOC_CREATE_UNLOCK(inp);
-			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL);
-			return (EINVAL);
-		}
+		/* convert v4-mapped into v4 addr */
+		in6_sin6_2_sin(&store.sin, sin6);
+		addr = &store.sa;
 	}
 #endif				/* INET */
 	/* Now do we connect? */
@@ -1083,7 +954,7 @@
 		return (EALREADY);
 	}
 	/* We are GOOD to go */
-	stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id, p);
+	stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id, inp->sctp_ep.pre_open_stream_count, p);
 	SCTP_ASOC_CREATE_UNLOCK(inp);
 	if (stcb == NULL) {
 		/* Gak! no memory */
@@ -1142,7 +1013,10 @@
 
 			stcb = LIST_FIRST(&inp->sctp_asoc_list);
 			if (stcb == NULL) {
-				goto notConn6;
+				SCTP_INP_RUNLOCK(inp);
+				SCTP_FREE_SONAME(sin6);
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ENOENT);
+				return (ENOENT);
 			}
 			fnd = 0;
 			sin_a6 = NULL;
@@ -1159,7 +1033,10 @@
 			}
 			if ((!fnd) || (sin_a6 == NULL)) {
 				/* punt */
-				goto notConn6;
+				SCTP_INP_RUNLOCK(inp);
+				SCTP_FREE_SONAME(sin6);
+				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ENOENT);
+				return (ENOENT);
 			}
 			vrf_id = inp->def_vrf_id;
 			sctp_ifa = sctp_source_address_selection(inp, stcb, (sctp_route_t *) & net->ro, net, 0, vrf_id);
@@ -1168,7 +1045,6 @@
 			}
 		} else {
 			/* For the bound all case you get back 0 */
-	notConn6:
 			memset(&sin6->sin6_addr, 0, sizeof(sin6->sin6_addr));
 		}
 	} else {
@@ -1180,7 +1056,7 @@
 			if (laddr->ifa->address.sa.sa_family == AF_INET6) {
 				struct sockaddr_in6 *sin_a;
 
-				sin_a = (struct sockaddr_in6 *)&laddr->ifa->address.sin6;
+				sin_a = &laddr->ifa->address.sin6;
 				sin6->sin6_addr = sin_a->sin6_addr;
 				fnd = 1;
 				break;
@@ -1257,8 +1133,11 @@
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ENOENT);
 		return (ENOENT);
 	}
-	if ((error = sa6_recoverscope(sin6)) != 0)
+	if ((error = sa6_recoverscope(sin6)) != 0) {
+		SCTP_FREE_SONAME(sin6);
+		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, error);
 		return (error);
+	}
 	*addr = (struct sockaddr *)sin6;
 	return (0);
 }
@@ -1266,10 +1145,6 @@
 static int
 sctp6_in6getaddr(struct socket *so, struct sockaddr **nam)
 {
-#ifdef INET
-	struct sockaddr *addr;
-
-#endif
 	struct in6pcb *inp6 = sotoin6pcb(so);
 	int error;
 
@@ -1281,19 +1156,21 @@
 	error = sctp6_getaddr(so, nam);
 #ifdef INET
 	if (error) {
+		struct sockaddr_in6 *sin6;
+
 		/* try v4 next if v6 failed */
 		error = sctp_ingetaddr(so, nam);
 		if (error) {
 			return (error);
 		}
-		addr = *nam;
-		/* if I'm V6ONLY, convert it to v4-mapped */
-		if (SCTP_IPV6_V6ONLY(inp6)) {
-			struct sockaddr_in6 sin6;
-
-			in6_sin_2_v4mapsin6((struct sockaddr_in *)addr, &sin6);
-			memcpy(addr, &sin6, sizeof(struct sockaddr_in6));
+		SCTP_MALLOC_SONAME(sin6, struct sockaddr_in6 *, sizeof *sin6);
+		if (sin6 == NULL) {
+			SCTP_FREE_SONAME(*nam);
+			return (ENOMEM);
 		}
+		in6_sin_2_v4mapsin6((struct sockaddr_in *)*nam, sin6);
+		SCTP_FREE_SONAME(*nam);
+		*nam = (struct sockaddr *)sin6;
 	}
 #endif
 	return (error);
@@ -1303,10 +1180,6 @@
 static int
 sctp6_getpeeraddr(struct socket *so, struct sockaddr **nam)
 {
-#ifdef INET
-	struct sockaddr *addr;
-
-#endif
 	struct in6pcb *inp6 = sotoin6pcb(so);
 	int error;
 
@@ -1318,19 +1191,21 @@
 	error = sctp6_peeraddr(so, nam);
 #ifdef INET
 	if (error) {
+		struct sockaddr_in6 *sin6;
+
 		/* try v4 next if v6 failed */
 		error = sctp_peeraddr(so, nam);
 		if (error) {
 			return (error);
 		}
-		addr = *nam;
-		/* if I'm V6ONLY, convert it to v4-mapped */
-		if (SCTP_IPV6_V6ONLY(inp6)) {
-			struct sockaddr_in6 sin6;
-
-			in6_sin_2_v4mapsin6((struct sockaddr_in *)addr, &sin6);
-			memcpy(addr, &sin6, sizeof(struct sockaddr_in6));
+		SCTP_MALLOC_SONAME(sin6, struct sockaddr_in6 *, sizeof *sin6);
+		if (sin6 == NULL) {
+			SCTP_FREE_SONAME(*nam);
+			return (ENOMEM);
 		}
+		in6_sin_2_v4mapsin6((struct sockaddr_in *)*nam, sin6);
+		SCTP_FREE_SONAME(*nam);
+		*nam = (struct sockaddr *)sin6;
 	}
 #endif
 	return (error);

Modified: trunk/sys/netinet6/sctp6_var.h
===================================================================
--- trunk/sys/netinet6/sctp6_var.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/sctp6_var.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/sctp6_var.h 244524 2012-12-21 00:41:52Z delphij $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/sctp6_var.h 243186 2012-11-17 20:04:04Z tuexen $");
 
 #ifndef _NETINET6_SCTP6_VAR_H_
 #define _NETINET6_SCTP6_VAR_H_
@@ -42,21 +42,16 @@
 SYSCTL_DECL(_net_inet6_sctp6);
 extern struct pr_usrreqs sctp6_usrreqs;
 
-
 int sctp6_input(struct mbuf **, int *, int);
-int sctp6_output
-(struct sctp_inpcb *, struct mbuf *, struct sockaddr *,
+int sctp6_input_with_port(struct mbuf **, int *, uint16_t);
+int 
+sctp6_output(struct sctp_inpcb *, struct mbuf *, struct sockaddr *,
     struct mbuf *, struct proc *);
-	void sctp6_ctlinput(int, struct sockaddr *, void *);
+void sctp6_ctlinput(int, struct sockaddr *, void *);
+extern void 
+sctp6_notify(struct sctp_inpcb *, struct icmp6_hdr *,
+    struct sctphdr *, struct sockaddr *,
+    struct sctp_tcb *, struct sctp_nets *);
 
-
-	extern void sctp6_notify(struct sctp_inpcb *inp,
-         struct icmp6_hdr *icmph,
-         struct sctphdr *sh,
-         struct sockaddr *to,
-         struct sctp_tcb *stcb,
-         struct sctp_nets *net);
-
-
-#endif				/* _KERNEL */
 #endif
+#endif

Modified: trunk/sys/netinet6/send.c
===================================================================
--- trunk/sys/netinet6/send.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/send.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -26,8 +26,10 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/send.c 249132 2013-04-05 08:22:11Z mav $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/send.c 254889 2013-08-25 21:54:41Z markj $");
 
+#include "opt_kdtrace.h"
+
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/mbuf.h>
@@ -34,6 +36,7 @@
 #include <sys/module.h>
 #include <sys/priv.h>
 #include <sys/protosw.h>
+#include <sys/sdt.h>
 #include <sys/systm.h>
 #include <sys/socket.h>
 #include <sys/sockstate.h>
@@ -47,6 +50,7 @@
 #include <net/vnet.h>
 
 #include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
@@ -181,6 +185,10 @@
 		dst.sin6_len = sizeof(dst);
 		dst.sin6_addr = ip6->ip6_dst;
 
+		m_clrprotoflags(m);	/* Avoid confusing lower layers. */
+
+		IP_PROBE(send, NULL, NULL, ip6, ifp, NULL, ip6);
+
 		/*
 		 * Output the packet as nd6.c:nd6_output_lle() would do.
 		 * The mbuf is always consumed, so we do not have to care

Modified: trunk/sys/netinet6/send.h
===================================================================
--- trunk/sys/netinet6/send.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/send.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -24,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: stable/9/sys/netinet6/send.h 222712 2011-06-05 11:40:30Z hrs $
+ * $FreeBSD: stable/10/sys/netinet6/send.h 222712 2011-06-05 11:40:30Z hrs $
  */
 
 #ifndef _NETINET6_SEND_H_

Modified: trunk/sys/netinet6/tcp6_var.h
===================================================================
--- trunk/sys/netinet6/tcp6_var.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/tcp6_var.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -57,7 +57,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp_var.h	8.4 (Berkeley) 5/24/95
- * $FreeBSD: stable/9/sys/netinet6/tcp6_var.h 244524 2012-12-21 00:41:52Z delphij $
+ * $FreeBSD: stable/10/sys/netinet6/tcp6_var.h 241916 2012-10-22 21:49:56Z delphij $
  */
 
 #ifndef _NETINET_TCP6_VAR_H_

Modified: trunk/sys/netinet6/udp6_usrreq.c
===================================================================
--- trunk/sys/netinet6/udp6_usrreq.c	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/udp6_usrreq.c	2018-05-25 13:10:11 UTC (rev 9927)
@@ -2,6 +2,7 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * Copyright (c) 2010-2011 Juniper Networks, Inc.
+ * Copyright (c) 2014 Kevin Lo
  * All rights reserved.
  *
  * Portions of this software were developed by Robert N. M. Watson under
@@ -68,12 +69,13 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/udp6_usrreq.c 243586 2012-11-27 01:59:51Z ae $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/udp6_usrreq.c 277789 2015-01-27 06:19:30Z bryanv $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipfw.h"
 #include "opt_ipsec.h"
+#include "opt_kdtrace.h"
 
 #include <sys/param.h>
 #include <sys/jail.h>
@@ -83,6 +85,7 @@
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
+#include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
@@ -96,6 +99,7 @@
 #include <net/route.h>
 
 #include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
@@ -107,6 +111,7 @@
 #include <netinet/ip_var.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
+#include <netinet/udplite.h>
 
 #include <netinet6/ip6protosw.h>
 #include <netinet6/ip6_var.h>
@@ -135,14 +140,24 @@
 {
 	struct socket *so;
 	struct mbuf *opts;
+	struct udpcb *up;
 
 	INP_LOCK_ASSERT(inp);
 
+	/*
+	 * Engage the tunneling protocol.
+	 */
+	up = intoudpcb(inp);
+	if (up->u_tun_func != NULL) {
+		(*up->u_tun_func)(n, off, inp, (struct sockaddr *)fromsa,
+		    up->u_tun_ctx);
+		return;
+	}
 #ifdef IPSEC
 	/* Check AH/ESP integrity. */
 	if (ipsec6_in_reject(n, inp)) {
 		m_freem(n);
-		V_ipsec6stat.in_polvio++;
+		IPSEC6STAT_INC(ips_in_polvio);
 		return;
 	}
 #endif /* IPSEC */
@@ -179,12 +194,15 @@
 	struct ip6_hdr *ip6;
 	struct udphdr *uh;
 	struct inpcb *inp;
+	struct inpcbinfo *pcbinfo;
 	struct udpcb *up;
 	int off = *offp;
+	int cscov_partial;
 	int plen, ulen;
 	struct sockaddr_in6 fromsa;
 	struct m_tag *fwd_tag;
 	uint16_t uh_sum;
+	uint8_t nxt;
 
 	ifp = m->m_pkthdr.rcvif;
 	ip6 = mtod(m, struct ip6_hdr *);
@@ -216,28 +234,43 @@
 	plen = ntohs(ip6->ip6_plen) - off + sizeof(*ip6);
 	ulen = ntohs((u_short)uh->uh_ulen);
 
-	if (plen != ulen) {
-		UDPSTAT_INC(udps_badlen);
-		goto badunlocked;
+	nxt = ip6->ip6_nxt;
+	cscov_partial = (nxt == IPPROTO_UDPLITE) ? 1 : 0;
+	if (nxt == IPPROTO_UDPLITE) {
+		/* Zero means checksum over the complete packet. */
+		if (ulen == 0)
+			ulen = plen;
+		if (ulen == plen)
+			cscov_partial = 0;
+		if ((ulen < sizeof(struct udphdr)) || (ulen > plen)) {
+			/* XXX: What is the right UDPLite MIB counter? */
+			goto badunlocked;
+		}
+		if (uh->uh_sum == 0) {
+			/* XXX: What is the right UDPLite MIB counter? */
+			goto badunlocked;
+		}
+	} else {
+		if ((ulen < sizeof(struct udphdr)) || (plen != ulen)) {
+			UDPSTAT_INC(udps_badlen);
+			goto badunlocked;
+		}
+		if (uh->uh_sum == 0) {
+			UDPSTAT_INC(udps_nosum);
+			goto badunlocked;
+		}
 	}
 
-	/*
-	 * Checksum extended UDP header and data.
-	 */
-	if (uh->uh_sum == 0) {
-		UDPSTAT_INC(udps_nosum);
-		goto badunlocked;
-	}
-
-	if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) {
+	if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) &&
+	    !cscov_partial) {
 		if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
 			uh_sum = m->m_pkthdr.csum_data;
 		else
-			uh_sum = in6_cksum_pseudo(ip6, ulen,
-			    IPPROTO_UDP, m->m_pkthdr.csum_data);
+			uh_sum = in6_cksum_pseudo(ip6, ulen, nxt,
+			    m->m_pkthdr.csum_data);
 		uh_sum ^= 0xffff;
 	} else
-		uh_sum = in6_cksum(m, IPPROTO_UDP, off, ulen);
+		uh_sum = in6_cksum_partial(m, nxt, off, plen, ulen);
 
 	if (uh_sum != 0) {
 		UDPSTAT_INC(udps_badsum);
@@ -250,11 +283,13 @@
 	init_sin6(&fromsa, m);
 	fromsa.sin6_port = uh->uh_sport;
 
+	pcbinfo = get_inpcbinfo(nxt);
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		struct inpcb *last;
+		struct inpcbhead *pcblist;
 		struct ip6_moptions *imo;
 
-		INP_INFO_RLOCK(&V_udbinfo);
+		INP_INFO_RLOCK(pcbinfo);
 		/*
 		 * In the event that laddr should be set to the link-local
 		 * address (this happens in RIPng), the multicast address
@@ -270,8 +305,9 @@
 		 * here.  We need udphdr for IPsec processing so we do that
 		 * later.
 		 */
+		pcblist = get_pcblist(nxt);
 		last = NULL;
-		LIST_FOREACH(inp, &V_udb, inp_list) {
+		LIST_FOREACH(inp, pcblist, inp_list) {
 			if ((inp->inp_vflag & INP_IPV6) == 0)
 				continue;
 			if (inp->inp_lport != uh->uh_dport)
@@ -334,20 +370,9 @@
 
 				if ((n = m_copy(m, 0, M_COPYALL)) != NULL) {
 					INP_RLOCK(last);
-					up = intoudpcb(last);
-					if (up->u_tun_func == NULL) {
-						udp6_append(last, n, off, &fromsa);
-					} else {
-						/*
-						 * Engage the tunneling
-						 * protocol we will have to
-						 * leave the info_lock up,
-						 * since we are hunting
-						 * through multiple UDP's.
-						 * 
-						 */
-						(*up->u_tun_func)(n, off, last);
-					}
+					UDP_PROBE(receive, NULL, last, ip6,
+					    last, uh);
+					udp6_append(last, n, off, &fromsa);
 					INP_RUNLOCK(last);
 				}
 			}
@@ -376,16 +401,9 @@
 			goto badheadlocked;
 		}
 		INP_RLOCK(last);
-		INP_INFO_RUNLOCK(&V_udbinfo);
-		up = intoudpcb(last);
-		if (up->u_tun_func == NULL) {
-			udp6_append(last, m, off, &fromsa);
-		} else {
-			/*
-			 * Engage the tunneling protocol.
-			 */
-			(*up->u_tun_func)(m, off, last);
-		}
+		INP_INFO_RUNLOCK(pcbinfo);
+		UDP_PROBE(receive, NULL, last, ip6, last, uh);
+		udp6_append(last, m, off, &fromsa);
 		INP_RUNLOCK(last);
 		return (IPPROTO_DONE);
 	}
@@ -406,8 +424,8 @@
 		 * Transparently forwarded. Pretend to be the destination.
 		 * Already got one like this?
 		 */
-		inp = in6_pcblookup_mbuf(&V_udbinfo,
-		    &ip6->ip6_src, uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
+		inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src,
+		    uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
 		    INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif, m);
 		if (!inp) {
 			/*
@@ -415,7 +433,7 @@
 			 * Because we've rewritten the destination address,
 			 * any hardware-generated hash is ignored.
 			 */
-			inp = in6_pcblookup(&V_udbinfo, &ip6->ip6_src,
+			inp = in6_pcblookup(pcbinfo, &ip6->ip6_src,
 			    uh->uh_sport, &next_hop6->sin6_addr,
 			    next_hop6->sin6_port ? htons(next_hop6->sin6_port) :
 			    uh->uh_dport, INPLOOKUP_WILDCARD |
@@ -425,7 +443,7 @@
 		m_tag_delete(m, fwd_tag);
 		m->m_flags &= ~M_IP6_NEXTHOP;
 	} else
-		inp = in6_pcblookup_mbuf(&V_udbinfo, &ip6->ip6_src,
+		inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src,
 		    uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
 		    INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB,
 		    m->m_pkthdr.rcvif, m);
@@ -456,20 +474,20 @@
 	}
 	INP_RLOCK_ASSERT(inp);
 	up = intoudpcb(inp);
-	if (up->u_tun_func == NULL) {
-		udp6_append(inp, m, off, &fromsa);
-	} else {
-		/*
-		 * Engage the tunneling protocol.
-		 */
-
-		(*up->u_tun_func)(m, off, inp);
+	if (cscov_partial) {
+		if (up->u_rxcslen == 0 || up->u_rxcslen > ulen) {
+			INP_RUNLOCK(inp);
+			m_freem(m);
+			return (IPPROTO_DONE);
+		}
 	}
+	UDP_PROBE(receive, NULL, inp, ip6, inp, uh);
+	udp6_append(inp, m, off, &fromsa);
 	INP_RUNLOCK(inp);
 	return (IPPROTO_DONE);
 
 badheadlocked:
-	INP_INFO_RUNLOCK(&V_udbinfo);
+	INP_INFO_RUNLOCK(pcbinfo);
 badunlocked:
 	if (m)
 		m_freem(m);
@@ -476,8 +494,9 @@
 	return (IPPROTO_DONE);
 }
 
-void
-udp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
+static void
+udp6_common_ctlinput(int cmd, struct sockaddr *sa, void *d,
+    struct inpcbinfo *pcbinfo)
 {
 	struct udphdr uh;
 	struct ip6_hdr *ip6;
@@ -533,14 +552,28 @@
 		bzero(&uh, sizeof(uh));
 		m_copydata(m, off, sizeof(*uhp), (caddr_t)&uh);
 
-		(void) in6_pcbnotify(&V_udbinfo, sa, uh.uh_dport,
+		(void)in6_pcbnotify(pcbinfo, sa, uh.uh_dport,
 		    (struct sockaddr *)ip6cp->ip6c_src, uh.uh_sport, cmd,
 		    cmdarg, notify);
 	} else
-		(void) in6_pcbnotify(&V_udbinfo, sa, 0,
+		(void)in6_pcbnotify(pcbinfo, sa, 0,
 		    (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify);
 }
 
+void
+udp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
+{
+
+	return (udp6_common_ctlinput(cmd, sa, d, &V_udbinfo));
+}
+
+void
+udplite6_ctlinput(int cmd, struct sockaddr *sa, void *d)
+{
+
+	return (udp6_common_ctlinput(cmd, sa, d, &V_ulitecbinfo));
+}
+
 static int
 udp6_getcred(SYSCTL_HANDLER_ARGS)
 {
@@ -598,9 +631,12 @@
 	struct in6_addr *laddr, *faddr, in6a;
 	struct sockaddr_in6 *sin6 = NULL;
 	struct ifnet *oifp = NULL;
+	int cscov_partial = 0;
 	int scope_ambiguous = 0;
 	u_short fport;
 	int error = 0;
+	uint8_t nxt;
+	uint16_t cscov = 0;
 	struct ip6_pktopts *optp, opt;
 	int af = AF_INET6, hlen = sizeof(struct ip6_hdr);
 	int flags;
@@ -643,8 +679,6 @@
 		faddr = &sin6->sin6_addr;
 
 		/*
-		 * IPv4 version of udp_output calls in_pcbconnect in this case,
-		 * which needs splnet and affects performance.
 		 * Since we saw no essential reason for calling in_pcbconnect,
 		 * we get rid of such kind of logic, and call in6_selectsrc
 		 * and in6_pcbsetport in order to fill in the local address
@@ -750,7 +784,7 @@
 	 * Calculate data length and get a mbuf
 	 * for UDP and IP6 headers.
 	 */
-	M_PREPEND(m, hlen + sizeof(struct udphdr), M_DONTWAIT);
+	M_PREPEND(m, hlen + sizeof(struct udphdr), M_NOWAIT);
 	if (m == 0) {
 		error = ENOBUFS;
 		goto release;
@@ -759,10 +793,25 @@
 	/*
 	 * Stuff checksum and output datagram.
 	 */
+	nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
+	    IPPROTO_UDP : IPPROTO_UDPLITE;
 	udp6 = (struct udphdr *)(mtod(m, caddr_t) + hlen);
 	udp6->uh_sport = inp->inp_lport; /* lport is always set in the PCB */
 	udp6->uh_dport = fport;
-	if (plen <= 0xffff)
+	if (nxt == IPPROTO_UDPLITE) {
+		struct udpcb *up;
+
+		up = intoudpcb(inp);
+		cscov = up->u_txcslen;
+		if (cscov >= plen)
+			cscov = 0;
+		udp6->uh_ulen = htons(cscov);
+		/*
+		 * For UDP-Lite, checksum coverage length of zero means
+		 * the entire UDPLite packet is covered by the checksum.
+		 */
+		cscov_partial = (cscov == 0) ? 0 : 1;
+	} else if (plen <= 0xffff)
 		udp6->uh_ulen = htons((u_short)plen);
 	else
 		udp6->uh_ulen = 0;
@@ -774,20 +823,25 @@
 		ip6->ip6_flow	= inp->inp_flow & IPV6_FLOWINFO_MASK;
 		ip6->ip6_vfc	&= ~IPV6_VERSION_MASK;
 		ip6->ip6_vfc	|= IPV6_VERSION;
-#if 0				/* ip6_plen will be filled in ip6_output. */
 		ip6->ip6_plen	= htons((u_short)plen);
-#endif
-		ip6->ip6_nxt	= IPPROTO_UDP;
+		ip6->ip6_nxt	= nxt;
 		ip6->ip6_hlim	= in6_selecthlim(inp, NULL);
 		ip6->ip6_src	= *laddr;
 		ip6->ip6_dst	= *faddr;
 
-		udp6->uh_sum = in6_cksum_pseudo(ip6, plen, IPPROTO_UDP, 0);
-		m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
-		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+		if (cscov_partial) {
+			if ((udp6->uh_sum = in6_cksum_partial(m, nxt,
+			    sizeof(struct ip6_hdr), plen, cscov)) == 0)
+				udp6->uh_sum = 0xffff;
+		} else {
+			udp6->uh_sum = in6_cksum_pseudo(ip6, plen, nxt, 0);
+			m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
+			m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+		}
 
 		flags = 0;
 
+		UDP_PROBE(send, NULL, inp, ip6, inp, udp6);
 		UDPSTAT_INC(udps_opackets);
 		error = ip6_output(m, optp, NULL, flags, inp->in6p_moptions,
 		    NULL, inp);
@@ -813,7 +867,9 @@
 udp6_abort(struct socket *so)
 {
 	struct inpcb *inp;
+	struct inpcbinfo *pcbinfo;
 
+	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_abort: inp == NULL"));
 
@@ -829,10 +885,10 @@
 
 	INP_WLOCK(inp);
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
-		INP_HASH_WLOCK(&V_udbinfo);
+		INP_HASH_WLOCK(pcbinfo);
 		in6_pcbdisconnect(inp);
 		inp->in6p_laddr = in6addr_any;
-		INP_HASH_WUNLOCK(&V_udbinfo);
+		INP_HASH_WUNLOCK(pcbinfo);
 		soisdisconnected(so);
 	}
 	INP_WUNLOCK(inp);
@@ -842,8 +898,10 @@
 udp6_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct inpcb *inp;
+	struct inpcbinfo *pcbinfo;
 	int error;
 
+	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp == NULL, ("udp6_attach: inp != NULL"));
 
@@ -852,10 +910,10 @@
 		if (error)
 			return (error);
 	}
-	INP_INFO_WLOCK(&V_udbinfo);
-	error = in_pcballoc(so, &V_udbinfo);
+	INP_INFO_WLOCK(pcbinfo);
+	error = in_pcballoc(so, pcbinfo);
 	if (error) {
-		INP_INFO_WUNLOCK(&V_udbinfo);
+		INP_INFO_WUNLOCK(pcbinfo);
 		return (error);
 	}
 	inp = (struct inpcb *)so->so_pcb;
@@ -876,11 +934,11 @@
 	if (error) {
 		in_pcbdetach(inp);
 		in_pcbfree(inp);
-		INP_INFO_WUNLOCK(&V_udbinfo);
+		INP_INFO_WUNLOCK(pcbinfo);
 		return (error);
 	}
 	INP_WUNLOCK(inp);
-	INP_INFO_WUNLOCK(&V_udbinfo);
+	INP_INFO_WUNLOCK(pcbinfo);
 	return (0);
 }
 
@@ -888,13 +946,15 @@
 udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp;
+	struct inpcbinfo *pcbinfo;
 	int error;
 
+	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_bind: inp == NULL"));
 
 	INP_WLOCK(inp);
-	INP_HASH_WLOCK(&V_udbinfo);
+	INP_HASH_WLOCK(pcbinfo);
 	inp->inp_vflag &= ~INP_IPV4;
 	inp->inp_vflag |= INP_IPV6;
 	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
@@ -922,7 +982,7 @@
 #ifdef INET
 out:
 #endif
-	INP_HASH_WUNLOCK(&V_udbinfo);
+	INP_HASH_WUNLOCK(pcbinfo);
 	INP_WUNLOCK(inp);
 	return (error);
 }
@@ -931,7 +991,9 @@
 udp6_close(struct socket *so)
 {
 	struct inpcb *inp;
+	struct inpcbinfo *pcbinfo;
 
+	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_close: inp == NULL"));
 
@@ -946,10 +1008,10 @@
 #endif
 	INP_WLOCK(inp);
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
-		INP_HASH_WLOCK(&V_udbinfo);
+		INP_HASH_WLOCK(pcbinfo);
 		in6_pcbdisconnect(inp);
 		inp->in6p_laddr = in6addr_any;
-		INP_HASH_WUNLOCK(&V_udbinfo);
+		INP_HASH_WUNLOCK(pcbinfo);
 		soisdisconnected(so);
 	}
 	INP_WUNLOCK(inp);
@@ -959,9 +1021,11 @@
 udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp;
+	struct inpcbinfo *pcbinfo;
 	struct sockaddr_in6 *sin6;
 	int error;
 
+	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	sin6 = (struct sockaddr_in6 *)nam;
 	KASSERT(inp != NULL, ("udp6_connect: inp == NULL"));
@@ -988,10 +1052,10 @@
 		error = prison_remote_ip4(td->td_ucred, &sin.sin_addr);
 		if (error != 0)
 			goto out;
-		INP_HASH_WLOCK(&V_udbinfo);
+		INP_HASH_WLOCK(pcbinfo);
 		error = in_pcbconnect(inp, (struct sockaddr *)&sin,
 		    td->td_ucred);
-		INP_HASH_WUNLOCK(&V_udbinfo);
+		INP_HASH_WUNLOCK(pcbinfo);
 		if (error == 0)
 			soisconnected(so);
 		goto out;
@@ -1006,9 +1070,9 @@
 	error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr);
 	if (error != 0)
 		goto out;
-	INP_HASH_WLOCK(&V_udbinfo);
+	INP_HASH_WLOCK(pcbinfo);
 	error = in6_pcbconnect(inp, nam, td->td_ucred);
-	INP_HASH_WUNLOCK(&V_udbinfo);
+	INP_HASH_WUNLOCK(pcbinfo);
 	if (error == 0)
 		soisconnected(so);
 out:
@@ -1020,18 +1084,20 @@
 udp6_detach(struct socket *so)
 {
 	struct inpcb *inp;
+	struct inpcbinfo *pcbinfo;
 	struct udpcb *up;
 
+	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_detach: inp == NULL"));
 
-	INP_INFO_WLOCK(&V_udbinfo);
+	INP_INFO_WLOCK(pcbinfo);
 	INP_WLOCK(inp);
 	up = intoudpcb(inp);
 	KASSERT(up != NULL, ("%s: up == NULL", __func__));
 	in_pcbdetach(inp);
 	in_pcbfree(inp);
-	INP_INFO_WUNLOCK(&V_udbinfo);
+	INP_INFO_WUNLOCK(pcbinfo);
 	udp_discardcb(up);
 }
 
@@ -1039,8 +1105,10 @@
 udp6_disconnect(struct socket *so)
 {
 	struct inpcb *inp;
+	struct inpcbinfo *pcbinfo;
 	int error;
 
+	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_disconnect: inp == NULL"));
 
@@ -1061,10 +1129,10 @@
 		goto out;
 	}
 
-	INP_HASH_WLOCK(&V_udbinfo);
+	INP_HASH_WLOCK(pcbinfo);
 	in6_pcbdisconnect(inp);
 	inp->in6p_laddr = in6addr_any;
-	INP_HASH_WUNLOCK(&V_udbinfo);
+	INP_HASH_WUNLOCK(pcbinfo);
 	SOCK_LOCK(so);
 	so->so_state &= ~SS_ISCONNECTED;		/* XXX */
 	SOCK_UNLOCK(so);
@@ -1078,8 +1146,10 @@
     struct sockaddr *addr, struct mbuf *control, struct thread *td)
 {
 	struct inpcb *inp;
+	struct inpcbinfo *pcbinfo;
 	int error = 0;
 
+	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_send: inp == NULL"));
 
@@ -1131,9 +1201,9 @@
 #ifdef MAC
 	mac_inpcb_create_mbuf(inp, m);
 #endif
-	INP_HASH_WLOCK(&V_udbinfo);
+	INP_HASH_WLOCK(pcbinfo);
 	error = udp6_output(inp, m, addr, control, td);
-	INP_HASH_WUNLOCK(&V_udbinfo);
+	INP_HASH_WUNLOCK(pcbinfo);
 #ifdef INET
 #endif	
 	INP_WUNLOCK(inp);

Modified: trunk/sys/netinet6/udp6_var.h
===================================================================
--- trunk/sys/netinet6/udp6_var.h	2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/udp6_var.h	2018-05-25 13:10:11 UTC (rev 9927)
@@ -58,7 +58,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)udp_var.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet6/udp6_var.h 174510 2007-12-10 16:03:40Z obrien $
+ * $FreeBSD: stable/10/sys/netinet6/udp6_var.h 265946 2014-05-13 06:05:53Z kevlo $
  */
 
 #ifndef _NETINET6_UDP6_VAR_H_
@@ -70,6 +70,7 @@
 extern struct pr_usrreqs	udp6_usrreqs;
 
 void	udp6_ctlinput(int, struct sockaddr *, void *);
+void	udplite6_ctlinput(int, struct sockaddr *, void *);
 int	udp6_input(struct mbuf **, int *, int);
 #endif
 



More information about the Midnightbsd-cvs mailing list