[Midnightbsd-cvs] src [9927] trunk/sys: sync with freebsd
laffer1 at midnightbsd.org
laffer1 at midnightbsd.org
Fri May 25 09:10:12 EDT 2018
Revision: 9927
http://svnweb.midnightbsd.org/src/?rev=9927
Author: laffer1
Date: 2018-05-25 09:10:11 -0400 (Fri, 25 May 2018)
Log Message:
-----------
sync with freebsd
Modified Paths:
--------------
trunk/sys/netinet/accf_data.c
trunk/sys/netinet/accf_dns.c
trunk/sys/netinet/accf_http.c
trunk/sys/netinet/cc/cc.c
trunk/sys/netinet/cc/cc_chd.c
trunk/sys/netinet/cc/cc_cubic.c
trunk/sys/netinet/cc/cc_cubic.h
trunk/sys/netinet/cc/cc_hd.c
trunk/sys/netinet/cc/cc_htcp.c
trunk/sys/netinet/cc/cc_module.h
trunk/sys/netinet/cc/cc_newreno.c
trunk/sys/netinet/cc/cc_vegas.c
trunk/sys/netinet/cc.h
trunk/sys/netinet/icmp6.h
trunk/sys/netinet/icmp_var.h
trunk/sys/netinet/if_atm.c
trunk/sys/netinet/if_atm.h
trunk/sys/netinet/if_ether.c
trunk/sys/netinet/if_ether.h
trunk/sys/netinet/igmp.c
trunk/sys/netinet/igmp.h
trunk/sys/netinet/igmp_var.h
trunk/sys/netinet/in.c
trunk/sys/netinet/in.h
trunk/sys/netinet/in_cksum.c
trunk/sys/netinet/in_debug.c
trunk/sys/netinet/in_gif.c
trunk/sys/netinet/in_mcast.c
trunk/sys/netinet/in_pcb.c
trunk/sys/netinet/in_pcb.h
trunk/sys/netinet/in_pcbgroup.c
trunk/sys/netinet/in_proto.c
trunk/sys/netinet/in_rmx.c
trunk/sys/netinet/in_systm.h
trunk/sys/netinet/in_var.h
trunk/sys/netinet/ip.h
trunk/sys/netinet/ip6.h
trunk/sys/netinet/ip_carp.c
trunk/sys/netinet/ip_carp.h
trunk/sys/netinet/ip_divert.c
trunk/sys/netinet/ip_divert.h
trunk/sys/netinet/ip_dummynet.h
trunk/sys/netinet/ip_ecn.c
trunk/sys/netinet/ip_ecn.h
trunk/sys/netinet/ip_encap.c
trunk/sys/netinet/ip_encap.h
trunk/sys/netinet/ip_fastfwd.c
trunk/sys/netinet/ip_fw.h
trunk/sys/netinet/ip_gre.c
trunk/sys/netinet/ip_icmp.c
trunk/sys/netinet/ip_icmp.h
trunk/sys/netinet/ip_id.c
trunk/sys/netinet/ip_input.c
trunk/sys/netinet/ip_ipsec.c
trunk/sys/netinet/ip_ipsec.h
trunk/sys/netinet/ip_mroute.c
trunk/sys/netinet/ip_mroute.h
trunk/sys/netinet/ip_options.c
trunk/sys/netinet/ip_options.h
trunk/sys/netinet/ip_output.c
trunk/sys/netinet/ip_var.h
trunk/sys/netinet/khelp/h_ertt.c
trunk/sys/netinet/khelp/h_ertt.h
trunk/sys/netinet/libalias/HISTORY
trunk/sys/netinet/libalias/alias.c
trunk/sys/netinet/libalias/alias.h
trunk/sys/netinet/libalias/alias_cuseeme.c
trunk/sys/netinet/libalias/alias_db.c
trunk/sys/netinet/libalias/alias_dummy.c
trunk/sys/netinet/libalias/alias_ftp.c
trunk/sys/netinet/libalias/alias_irc.c
trunk/sys/netinet/libalias/alias_local.h
trunk/sys/netinet/libalias/alias_mod.c
trunk/sys/netinet/libalias/alias_mod.h
trunk/sys/netinet/libalias/alias_nbt.c
trunk/sys/netinet/libalias/alias_old.c
trunk/sys/netinet/libalias/alias_pptp.c
trunk/sys/netinet/libalias/alias_proxy.c
trunk/sys/netinet/libalias/alias_sctp.c
trunk/sys/netinet/libalias/alias_sctp.h
trunk/sys/netinet/libalias/alias_skinny.c
trunk/sys/netinet/libalias/alias_smedia.c
trunk/sys/netinet/libalias/alias_util.c
trunk/sys/netinet/libalias/libalias.3
trunk/sys/netinet/pim.h
trunk/sys/netinet/pim_var.h
trunk/sys/netinet/raw_ip.c
trunk/sys/netinet/sctp.h
trunk/sys/netinet/sctp_asconf.c
trunk/sys/netinet/sctp_asconf.h
trunk/sys/netinet/sctp_auth.c
trunk/sys/netinet/sctp_auth.h
trunk/sys/netinet/sctp_bsd_addr.c
trunk/sys/netinet/sctp_bsd_addr.h
trunk/sys/netinet/sctp_cc_functions.c
trunk/sys/netinet/sctp_constants.h
trunk/sys/netinet/sctp_crc32.c
trunk/sys/netinet/sctp_crc32.h
trunk/sys/netinet/sctp_dtrace_declare.h
trunk/sys/netinet/sctp_dtrace_define.h
trunk/sys/netinet/sctp_header.h
trunk/sys/netinet/sctp_indata.c
trunk/sys/netinet/sctp_indata.h
trunk/sys/netinet/sctp_input.c
trunk/sys/netinet/sctp_input.h
trunk/sys/netinet/sctp_lock_bsd.h
trunk/sys/netinet/sctp_os.h
trunk/sys/netinet/sctp_os_bsd.h
trunk/sys/netinet/sctp_output.c
trunk/sys/netinet/sctp_output.h
trunk/sys/netinet/sctp_pcb.c
trunk/sys/netinet/sctp_pcb.h
trunk/sys/netinet/sctp_peeloff.c
trunk/sys/netinet/sctp_peeloff.h
trunk/sys/netinet/sctp_ss_functions.c
trunk/sys/netinet/sctp_structs.h
trunk/sys/netinet/sctp_sysctl.c
trunk/sys/netinet/sctp_sysctl.h
trunk/sys/netinet/sctp_timer.c
trunk/sys/netinet/sctp_timer.h
trunk/sys/netinet/sctp_uio.h
trunk/sys/netinet/sctp_usrreq.c
trunk/sys/netinet/sctp_var.h
trunk/sys/netinet/sctputil.c
trunk/sys/netinet/sctputil.h
trunk/sys/netinet/siftr.c
trunk/sys/netinet/tcp.h
trunk/sys/netinet/tcp_debug.c
trunk/sys/netinet/tcp_debug.h
trunk/sys/netinet/tcp_fsm.h
trunk/sys/netinet/tcp_hostcache.c
trunk/sys/netinet/tcp_hostcache.h
trunk/sys/netinet/tcp_input.c
trunk/sys/netinet/tcp_lro.c
trunk/sys/netinet/tcp_lro.h
trunk/sys/netinet/tcp_offload.c
trunk/sys/netinet/tcp_offload.h
trunk/sys/netinet/tcp_output.c
trunk/sys/netinet/tcp_reass.c
trunk/sys/netinet/tcp_sack.c
trunk/sys/netinet/tcp_seq.h
trunk/sys/netinet/tcp_subr.c
trunk/sys/netinet/tcp_syncache.c
trunk/sys/netinet/tcp_syncache.h
trunk/sys/netinet/tcp_timer.c
trunk/sys/netinet/tcp_timer.h
trunk/sys/netinet/tcp_timewait.c
trunk/sys/netinet/tcp_usrreq.c
trunk/sys/netinet/tcp_var.h
trunk/sys/netinet/tcpip.h
trunk/sys/netinet/udp.h
trunk/sys/netinet/udp_usrreq.c
trunk/sys/netinet/udp_var.h
trunk/sys/netinet6/dest6.c
trunk/sys/netinet6/frag6.c
trunk/sys/netinet6/icmp6.c
trunk/sys/netinet6/icmp6.h
trunk/sys/netinet6/in6.c
trunk/sys/netinet6/in6.h
trunk/sys/netinet6/in6_cksum.c
trunk/sys/netinet6/in6_gif.c
trunk/sys/netinet6/in6_ifattach.c
trunk/sys/netinet6/in6_ifattach.h
trunk/sys/netinet6/in6_mcast.c
trunk/sys/netinet6/in6_pcb.c
trunk/sys/netinet6/in6_pcb.h
trunk/sys/netinet6/in6_pcbgroup.c
trunk/sys/netinet6/in6_proto.c
trunk/sys/netinet6/in6_rmx.c
trunk/sys/netinet6/in6_src.c
trunk/sys/netinet6/in6_var.h
trunk/sys/netinet6/ip6.h
trunk/sys/netinet6/ip6_ecn.h
trunk/sys/netinet6/ip6_forward.c
trunk/sys/netinet6/ip6_id.c
trunk/sys/netinet6/ip6_input.c
trunk/sys/netinet6/ip6_ipsec.c
trunk/sys/netinet6/ip6_ipsec.h
trunk/sys/netinet6/ip6_mroute.c
trunk/sys/netinet6/ip6_mroute.h
trunk/sys/netinet6/ip6_output.c
trunk/sys/netinet6/ip6_var.h
trunk/sys/netinet6/ip6protosw.h
trunk/sys/netinet6/mld6.c
trunk/sys/netinet6/mld6.h
trunk/sys/netinet6/mld6_var.h
trunk/sys/netinet6/nd6.c
trunk/sys/netinet6/nd6.h
trunk/sys/netinet6/nd6_nbr.c
trunk/sys/netinet6/nd6_rtr.c
trunk/sys/netinet6/pim6.h
trunk/sys/netinet6/pim6_var.h
trunk/sys/netinet6/raw_ip6.c
trunk/sys/netinet6/raw_ip6.h
trunk/sys/netinet6/route6.c
trunk/sys/netinet6/scope6.c
trunk/sys/netinet6/scope6_var.h
trunk/sys/netinet6/sctp6_usrreq.c
trunk/sys/netinet6/sctp6_var.h
trunk/sys/netinet6/send.c
trunk/sys/netinet6/send.h
trunk/sys/netinet6/tcp6_var.h
trunk/sys/netinet6/udp6_usrreq.c
trunk/sys/netinet6/udp6_var.h
Added Paths:
-----------
trunk/sys/netinet/cc/cc_cdg.c
trunk/sys/netinet/in_kdtrace.c
trunk/sys/netinet/in_kdtrace.h
trunk/sys/netinet/sctp_syscalls.c
trunk/sys/netinet/tcp_fastopen.c
trunk/sys/netinet/tcp_fastopen.h
trunk/sys/netinet/toecore.c
trunk/sys/netinet/toecore.h
trunk/sys/netinet/udplite.h
trunk/sys/netinet6/ip6_gre.c
Property Changed:
----------------
trunk/sys/netinet/libalias/HISTORY
trunk/sys/netinet/libalias/libalias.3
Modified: trunk/sys/netinet/accf_data.c
===================================================================
--- trunk/sys/netinet/accf_data.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/accf_data.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/accf_data.c 193272 2009-06-01 21:17:03Z jhb $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/accf_data.c 193272 2009-06-01 21:17:03Z jhb $");
#define ACCEPT_FILTER_MOD
Modified: trunk/sys/netinet/accf_dns.c
===================================================================
--- trunk/sys/netinet/accf_dns.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/accf_dns.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -24,7 +24,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/netinet/accf_dns.c 193272 2009-06-01 21:17:03Z jhb $
+ * $FreeBSD: stable/10/sys/netinet/accf_dns.c 193272 2009-06-01 21:17:03Z jhb $
*/
#define ACCEPT_FILTER_MOD
Modified: trunk/sys/netinet/accf_http.c
===================================================================
--- trunk/sys/netinet/accf_http.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/accf_http.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -27,7 +27,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/accf_http.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/accf_http.c 227309 2011-11-07 15:43:11Z ed $");
#define ACCEPT_FILTER_MOD
Modified: trunk/sys/netinet/cc/cc.c
===================================================================
--- trunk/sys/netinet/cc/cc.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/cc/cc.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2007-2008
* Swinburne University of Technology, Melbourne, Australia.
@@ -47,7 +48,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/cc/cc.c 220560 2011-04-12 08:13:18Z lstewart $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/cc/cc.c 273847 2014-10-30 08:04:48Z hselasky $");
#include <sys/param.h>
#include <sys/kernel.h>
@@ -92,33 +93,33 @@
{
char default_cc[TCP_CA_NAME_MAX];
struct cc_algo *funcs;
- int err, found;
+ int error;
- err = found = 0;
+ /* Get the current default: */
+ CC_LIST_RLOCK();
+ strlcpy(default_cc, CC_DEFAULT()->name, sizeof(default_cc));
+ CC_LIST_RUNLOCK();
- if (req->newptr == NULL) {
- /* Just print the current default. */
- CC_LIST_RLOCK();
- strlcpy(default_cc, CC_DEFAULT()->name, TCP_CA_NAME_MAX);
- CC_LIST_RUNLOCK();
- err = sysctl_handle_string(oidp, default_cc, 1, req);
- } else {
- /* Find algo with specified name and set it to default. */
- CC_LIST_RLOCK();
- STAILQ_FOREACH(funcs, &cc_list, entries) {
- if (strncmp((char *)req->newptr, funcs->name,
- TCP_CA_NAME_MAX) == 0) {
- found = 1;
- V_default_cc_ptr = funcs;
- }
- }
- CC_LIST_RUNLOCK();
+ error = sysctl_handle_string(oidp, default_cc, sizeof(default_cc), req);
- if (!found)
- err = ESRCH;
+ /* Check for error or no change */
+ if (error != 0 || req->newptr == NULL)
+ goto done;
+
+ error = ESRCH;
+
+ /* Find algo with specified name and set it to default. */
+ CC_LIST_RLOCK();
+ STAILQ_FOREACH(funcs, &cc_list, entries) {
+ if (strncmp(default_cc, funcs->name, sizeof(default_cc)))
+ continue;
+ V_default_cc_ptr = funcs;
+ error = 0;
+ break;
}
-
- return (err);
+ CC_LIST_RUNLOCK();
+done:
+ return (error);
}
/*
@@ -166,7 +167,7 @@
if (!err) {
sbuf_finish(s);
- err = sysctl_handle_string(oidp, sbuf_data(s), 1, req);
+ err = sysctl_handle_string(oidp, sbuf_data(s), 0, req);
}
sbuf_delete(s);
Added: trunk/sys/netinet/cc/cc_cdg.c
===================================================================
--- trunk/sys/netinet/cc/cc_cdg.c (rev 0)
+++ trunk/sys/netinet/cc/cc_cdg.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -0,0 +1,701 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2009-2013
+ * Swinburne University of Technology, Melbourne, Australia
+ * All rights reserved.
+ *
+ * This software was developed at the Centre for Advanced Internet
+ * Architectures, Swinburne University of Technology, by David Hayes, made
+ * possible in part by a gift from The Cisco University Research Program Fund,
+ * a corporate advised fund of Silicon Valley Community Foundation. Development
+ * and testing were further assisted by a grant from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * CAIA Delay-Gradient (CDG) congestion control algorithm
+ *
+ * An implemention of the delay-gradient congestion control algorithm proposed
+ * in the following paper:
+ *
+ * D. A. Hayes and G. Armitage, "Revisiting TCP Congestion Control using Delay
+ * Gradients", in IFIP Networking, Valencia, Spain, 9-13 May 2011.
+ *
+ * Developed as part of the NewTCP research project at Swinburne University of
+ * Technology's Centre for Advanced Internet Architectures, Melbourne,
+ * Australia. More details are available at:
+ * http://caia.swin.edu.au/urp/newtcp/
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/netinet/cc/cc_cdg.c 271690 2014-09-16 21:26:24Z lstewart $");
+
+#include <sys/param.h>
+#include <sys/hhook.h>
+#include <sys/kernel.h>
+#include <sys/khelp.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/queue.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+
+#include <net/if.h>
+#include <net/vnet.h>
+
+#include <netinet/cc.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+
+#include <netinet/cc/cc_module.h>
+
+#include <netinet/khelp/h_ertt.h>
+
+#include <vm/uma.h>
+
+#define CDG_VERSION "0.1"
+
+#define CAST_PTR_INT(X) (*((int*)(X)))
+
+/* Private delay-gradient induced congestion control signal. */
+#define CC_CDG_DELAY 0x01000000
+
+/* NewReno window deflation factor on loss (as a percentage). */
+#define RENO_BETA 50
+
+/* Queue states. */
+#define CDG_Q_EMPTY 1
+#define CDG_Q_RISING 2
+#define CDG_Q_FALLING 3
+#define CDG_Q_FULL 4
+#define CDG_Q_UNKNOWN 9999
+
+/* Number of bit shifts used in probexp lookup table. */
+#define EXP_PREC 15
+
+/* Largest gradient represented in probexp lookup table. */
+#define MAXGRAD 5
+
+/*
+ * Delay Precision Enhance - number of bit shifts used for qtrend related
+ * integer arithmetic precision.
+ */
+#define D_P_E 7
+
+struct qdiff_sample {
+ long qdiff;
+ STAILQ_ENTRY(qdiff_sample) qdiff_lnk;
+};
+
+struct cdg {
+ long max_qtrend;
+ long min_qtrend;
+ STAILQ_HEAD(minrtts_head, qdiff_sample) qdiffmin_q;
+ STAILQ_HEAD(maxrtts_head, qdiff_sample) qdiffmax_q;
+ long window_incr;
+ /* rttcount for window increase when in congestion avoidance */
+ long rtt_count;
+ /* maximum measured rtt within an rtt period */
+ int maxrtt_in_rtt;
+ /* maximum measured rtt within prev rtt period */
+ int maxrtt_in_prevrtt;
+ /* minimum measured rtt within an rtt period */
+ int minrtt_in_rtt;
+ /* minimum measured rtt within prev rtt period */
+ int minrtt_in_prevrtt;
+ /* consecutive congestion episode counter */
+ uint32_t consec_cong_cnt;
+ /* when tracking a new reno type loss window */
+ uint32_t shadow_w;
+ /* maximum number of samples in the moving average queue */
+ int sample_q_size;
+ /* number of samples in the moving average queue */
+ int num_samples;
+ /* estimate of the queue state of the path */
+ int queue_state;
+};
+
+/*
+ * Lookup table for:
+ * (1 - exp(-x)) << EXP_PREC, where x = [0,MAXGRAD] in 2^-7 increments
+ *
+ * Note: probexp[0] is set to 10 (not 0) as a safety for very low increase
+ * gradients.
+ */
+static const int probexp[641] = {
+ 10,255,508,759,1008,1255,1501,1744,1985,2225,2463,2698,2932,3165,3395,3624,
+ 3850,4075,4299,4520,4740,4958,5175,5389,5602,5814,6024,6232,6438,6643,6846,
+ 7048,7248,7447,7644,7839,8033,8226,8417,8606,8794,8981,9166,9350,9532,9713,
+ 9892,10070,10247,10422,10596,10769,10940,11110,11278,11445,11611,11776,11939,
+ 12101,12262,12422,12580,12737,12893,13048,13201,13354,13505,13655,13803,13951,
+ 14097,14243,14387,14530,14672,14813,14952,15091,15229,15365,15500,15635,15768,
+ 15900,16032,16162,16291,16419,16547,16673,16798,16922,17046,17168,17289,17410,
+ 17529,17648,17766,17882,17998,18113,18227,18340,18453,18564,18675,18784,18893,
+ 19001,19108,19215,19320,19425,19529,19632,19734,19835,19936,20036,20135,20233,
+ 20331,20427,20523,20619,20713,20807,20900,20993,21084,21175,21265,21355,21444,
+ 21532,21619,21706,21792,21878,21962,22046,22130,22213,22295,22376,22457,22537,
+ 22617,22696,22774,22852,22929,23006,23082,23157,23232,23306,23380,23453,23525,
+ 23597,23669,23739,23810,23879,23949,24017,24085,24153,24220,24286,24352,24418,
+ 24483,24547,24611,24675,24738,24800,24862,24924,24985,25045,25106,25165,25224,
+ 25283,25341,25399,25456,25513,25570,25626,25681,25737,25791,25846,25899,25953,
+ 26006,26059,26111,26163,26214,26265,26316,26366,26416,26465,26514,26563,26611,
+ 26659,26707,26754,26801,26847,26893,26939,26984,27029,27074,27118,27162,27206,
+ 27249,27292,27335,27377,27419,27460,27502,27543,27583,27624,27664,27703,27743,
+ 27782,27821,27859,27897,27935,27973,28010,28047,28084,28121,28157,28193,28228,
+ 28263,28299,28333,28368,28402,28436,28470,28503,28536,28569,28602,28634,28667,
+ 28699,28730,28762,28793,28824,28854,28885,28915,28945,28975,29004,29034,29063,
+ 29092,29120,29149,29177,29205,29232,29260,29287,29314,29341,29368,29394,29421,
+ 29447,29472,29498,29524,29549,29574,29599,29623,29648,29672,29696,29720,29744,
+ 29767,29791,29814,29837,29860,29882,29905,29927,29949,29971,29993,30014,30036,
+ 30057,30078,30099,30120,30141,30161,30181,30201,30221,30241,30261,30280,30300,
+ 30319,30338,30357,30376,30394,30413,30431,30449,30467,30485,30503,30521,30538,
+ 30555,30573,30590,30607,30624,30640,30657,30673,30690,30706,30722,30738,30753,
+ 30769,30785,30800,30815,30831,30846,30861,30876,30890,30905,30919,30934,30948,
+ 30962,30976,30990,31004,31018,31031,31045,31058,31072,31085,31098,31111,31124,
+ 31137,31149,31162,31174,31187,31199,31211,31223,31235,31247,31259,31271,31283,
+ 31294,31306,31317,31328,31339,31351,31362,31373,31383,31394,31405,31416,31426,
+ 31436,31447,31457,31467,31477,31487,31497,31507,31517,31527,31537,31546,31556,
+ 31565,31574,31584,31593,31602,31611,31620,31629,31638,31647,31655,31664,31673,
+ 31681,31690,31698,31706,31715,31723,31731,31739,31747,31755,31763,31771,31778,
+ 31786,31794,31801,31809,31816,31824,31831,31838,31846,31853,31860,31867,31874,
+ 31881,31888,31895,31902,31908,31915,31922,31928,31935,31941,31948,31954,31960,
+ 31967,31973,31979,31985,31991,31997,32003,32009,32015,32021,32027,32033,32038,
+ 32044,32050,32055,32061,32066,32072,32077,32083,32088,32093,32098,32104,32109,
+ 32114,32119,32124,32129,32134,32139,32144,32149,32154,32158,32163,32168,32173,
+ 32177,32182,32186,32191,32195,32200,32204,32209,32213,32217,32222,32226,32230,
+ 32234,32238,32242,32247,32251,32255,32259,32263,32267,32270,32274,32278,32282,
+ 32286,32290,32293,32297,32301,32304,32308,32311,32315,32318,32322,32325,32329,
+ 32332,32336,32339,32342,32346,32349,32352,32356,32359,32362,32365,32368,32371,
+ 32374,32377,32381,32384,32387,32389,32392,32395,32398,32401,32404,32407,32410,
+ 32412,32415,32418,32421,32423,32426,32429,32431,32434,32437,32439,32442,32444,
+ 32447,32449,32452,32454,32457,32459,32461,32464,32466,32469,32471,32473,32476,
+ 32478,32480,32482,32485,32487,32489,32491,32493,32495,32497,32500,32502,32504,
+ 32506,32508,32510,32512,32514,32516,32518,32520,32522,32524,32526,32527,32529,
+ 32531,32533,32535,32537,32538,32540,32542,32544,32545,32547};
+
+static uma_zone_t qdiffsample_zone;
+
+static MALLOC_DEFINE(M_CDG, "cdg data",
+ "Per connection data required for the CDG congestion control algorithm");
+
+static int ertt_id;
+
+static VNET_DEFINE(uint32_t, cdg_alpha_inc);
+static VNET_DEFINE(uint32_t, cdg_beta_delay);
+static VNET_DEFINE(uint32_t, cdg_beta_loss);
+static VNET_DEFINE(uint32_t, cdg_smoothing_factor);
+static VNET_DEFINE(uint32_t, cdg_exp_backoff_scale);
+static VNET_DEFINE(uint32_t, cdg_consec_cong);
+static VNET_DEFINE(uint32_t, cdg_hold_backoff);
+#define V_cdg_alpha_inc VNET(cdg_alpha_inc)
+#define V_cdg_beta_delay VNET(cdg_beta_delay)
+#define V_cdg_beta_loss VNET(cdg_beta_loss)
+#define V_cdg_smoothing_factor VNET(cdg_smoothing_factor)
+#define V_cdg_exp_backoff_scale VNET(cdg_exp_backoff_scale)
+#define V_cdg_consec_cong VNET(cdg_consec_cong)
+#define V_cdg_hold_backoff VNET(cdg_hold_backoff)
+
+/* Function prototypes. */
+static int cdg_mod_init(void);
+static int cdg_mod_destroy(void);
+static void cdg_conn_init(struct cc_var *ccv);
+static int cdg_cb_init(struct cc_var *ccv);
+static void cdg_cb_destroy(struct cc_var *ccv);
+static void cdg_cong_signal(struct cc_var *ccv, uint32_t signal_type);
+static void cdg_ack_received(struct cc_var *ccv, uint16_t ack_type);
+
+struct cc_algo cdg_cc_algo = {
+ .name = "cdg",
+ .mod_init = cdg_mod_init,
+ .ack_received = cdg_ack_received,
+ .cb_destroy = cdg_cb_destroy,
+ .cb_init = cdg_cb_init,
+ .conn_init = cdg_conn_init,
+ .cong_signal = cdg_cong_signal,
+ .mod_destroy = cdg_mod_destroy
+};
+
+/* Vnet created and being initialised. */
+static void
+cdg_init_vnet(const void *unused __unused)
+{
+
+ V_cdg_alpha_inc = 0;
+ V_cdg_beta_delay = 70;
+ V_cdg_beta_loss = 50;
+ V_cdg_smoothing_factor = 8;
+ V_cdg_exp_backoff_scale = 3;
+ V_cdg_consec_cong = 5;
+ V_cdg_hold_backoff = 5;
+}
+
+static int
+cdg_mod_init(void)
+{
+ VNET_ITERATOR_DECL(v);
+
+ ertt_id = khelp_get_id("ertt");
+ if (ertt_id <= 0)
+ return (EINVAL);
+
+ qdiffsample_zone = uma_zcreate("cdg_qdiffsample",
+ sizeof(struct qdiff_sample), NULL, NULL, NULL, NULL, 0, 0);
+
+ VNET_LIST_RLOCK();
+ VNET_FOREACH(v) {
+ CURVNET_SET(v);
+ cdg_init_vnet(NULL);
+ CURVNET_RESTORE();
+ }
+ VNET_LIST_RUNLOCK();
+
+ cdg_cc_algo.post_recovery = newreno_cc_algo.post_recovery;
+ cdg_cc_algo.after_idle = newreno_cc_algo.after_idle;
+
+ return (0);
+}
+
+static int
+cdg_mod_destroy(void)
+{
+
+ uma_zdestroy(qdiffsample_zone);
+ return (0);
+}
+
+static int
+cdg_cb_init(struct cc_var *ccv)
+{
+ struct cdg *cdg_data;
+
+ cdg_data = malloc(sizeof(struct cdg), M_CDG, M_NOWAIT);
+ if (cdg_data == NULL)
+ return (ENOMEM);
+
+ cdg_data->shadow_w = 0;
+ cdg_data->max_qtrend = 0;
+ cdg_data->min_qtrend = 0;
+ cdg_data->queue_state = CDG_Q_UNKNOWN;
+ cdg_data->maxrtt_in_rtt = 0;
+ cdg_data->maxrtt_in_prevrtt = 0;
+ cdg_data->minrtt_in_rtt = INT_MAX;
+ cdg_data->minrtt_in_prevrtt = 0;
+ cdg_data->window_incr = 0;
+ cdg_data->rtt_count = 0;
+ cdg_data->consec_cong_cnt = 0;
+ cdg_data->sample_q_size = V_cdg_smoothing_factor;
+ cdg_data->num_samples = 0;
+ STAILQ_INIT(&cdg_data->qdiffmin_q);
+ STAILQ_INIT(&cdg_data->qdiffmax_q);
+
+ ccv->cc_data = cdg_data;
+
+ return (0);
+}
+
+static void
+cdg_conn_init(struct cc_var *ccv)
+{
+ struct cdg *cdg_data = ccv->cc_data;
+
+ /*
+ * Initialise the shadow_cwnd in case we are competing with loss based
+ * flows from the start
+ */
+ cdg_data->shadow_w = CCV(ccv, snd_cwnd);
+}
+
+static void
+cdg_cb_destroy(struct cc_var *ccv)
+{
+ struct cdg *cdg_data;
+ struct qdiff_sample *qds, *qds_n;
+
+ cdg_data = ccv->cc_data;
+
+ qds = STAILQ_FIRST(&cdg_data->qdiffmin_q);
+ while (qds != NULL) {
+ qds_n = STAILQ_NEXT(qds, qdiff_lnk);
+ uma_zfree(qdiffsample_zone,qds);
+ qds = qds_n;
+ }
+
+ qds = STAILQ_FIRST(&cdg_data->qdiffmax_q);
+ while (qds != NULL) {
+ qds_n = STAILQ_NEXT(qds, qdiff_lnk);
+ uma_zfree(qdiffsample_zone,qds);
+ qds = qds_n;
+ }
+
+ free(ccv->cc_data, M_CDG);
+}
+
+static int
+cdg_beta_handler(SYSCTL_HANDLER_ARGS)
+{
+
+ if (req->newptr != NULL &&
+ (CAST_PTR_INT(req->newptr) == 0 || CAST_PTR_INT(req->newptr) > 100))
+ return (EINVAL);
+
+ return (sysctl_handle_int(oidp, arg1, arg2, req));
+}
+
+static int
+cdg_exp_backoff_scale_handler(SYSCTL_HANDLER_ARGS)
+{
+
+ if (req->newptr != NULL && CAST_PTR_INT(req->newptr) < 1)
+ return (EINVAL);
+
+ return (sysctl_handle_int(oidp, arg1, arg2, req));
+}
+
+static inline unsigned long
+cdg_window_decrease(struct cc_var *ccv, unsigned long owin, unsigned int beta)
+{
+
+ return ((ulmin(CCV(ccv, snd_wnd), owin) * beta) / 100);
+}
+
+/*
+ * Window increase function
+ * This window increase function is independent of the initial window size
+ * to ensure small window flows are not discriminated against (i.e. fairness).
+ * It increases at 1pkt/rtt like Reno for alpha_inc rtts, and then 2pkts/rtt for
+ * the next alpha_inc rtts, etc.
+ */
+static void
+cdg_window_increase(struct cc_var *ccv, int new_measurement)
+{
+ struct cdg *cdg_data;
+ int incr, s_w_incr;
+
+ cdg_data = ccv->cc_data;
+ incr = s_w_incr = 0;
+
+ if (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh)) {
+ /* Slow start. */
+ incr = CCV(ccv, t_maxseg);
+ s_w_incr = incr;
+ cdg_data->window_incr = cdg_data->rtt_count = 0;
+ } else {
+ /* Congestion avoidance. */
+ if (new_measurement) {
+ s_w_incr = CCV(ccv, t_maxseg);
+ if (V_cdg_alpha_inc == 0) {
+ incr = CCV(ccv, t_maxseg);
+ } else {
+ if (++cdg_data->rtt_count >= V_cdg_alpha_inc) {
+ cdg_data->window_incr++;
+ cdg_data->rtt_count = 0;
+ }
+ incr = CCV(ccv, t_maxseg) *
+ cdg_data->window_incr;
+ }
+ }
+ }
+
+ if (cdg_data->shadow_w > 0)
+ cdg_data->shadow_w = ulmin(cdg_data->shadow_w + s_w_incr,
+ TCP_MAXWIN << CCV(ccv, snd_scale));
+
+ CCV(ccv, snd_cwnd) = ulmin(CCV(ccv, snd_cwnd) + incr,
+ TCP_MAXWIN << CCV(ccv, snd_scale));
+}
+
+static void
+cdg_cong_signal(struct cc_var *ccv, uint32_t signal_type)
+{
+ struct cdg *cdg_data = ccv->cc_data;
+
+ switch(signal_type) {
+ case CC_CDG_DELAY:
+ CCV(ccv, snd_ssthresh) = cdg_window_decrease(ccv,
+ CCV(ccv, snd_cwnd), V_cdg_beta_delay);
+ CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh);
+ CCV(ccv, snd_recover) = CCV(ccv, snd_max);
+ cdg_data->window_incr = cdg_data->rtt_count = 0;
+ ENTER_CONGRECOVERY(CCV(ccv, t_flags));
+ break;
+ case CC_NDUPACK:
+ /*
+ * If already responding to congestion OR we have guessed no
+ * queue in the path is full.
+ */
+ if (IN_CONGRECOVERY(CCV(ccv, t_flags)) ||
+ cdg_data->queue_state < CDG_Q_FULL) {
+ CCV(ccv, snd_ssthresh) = CCV(ccv, snd_cwnd);
+ CCV(ccv, snd_recover) = CCV(ccv, snd_max);
+ } else {
+ /*
+ * Loss is likely to be congestion related. We have
+ * inferred a queue full state, so have shadow window
+ * react to loss as NewReno would.
+ */
+ if (cdg_data->shadow_w > 0)
+ cdg_data->shadow_w = cdg_window_decrease(ccv,
+ cdg_data->shadow_w, RENO_BETA);
+
+ CCV(ccv, snd_ssthresh) = ulmax(cdg_data->shadow_w,
+ cdg_window_decrease(ccv, CCV(ccv, snd_cwnd),
+ V_cdg_beta_loss));
+
+ cdg_data->window_incr = cdg_data->rtt_count = 0;
+ }
+ ENTER_RECOVERY(CCV(ccv, t_flags));
+ break;
+ default:
+ newreno_cc_algo.cong_signal(ccv, signal_type);
+ break;
+ }
+}
+
+/*
+ * Using a negative exponential probabilistic backoff so that sources with
+ * varying RTTs which share the same link will, on average, have the same
+ * probability of backoff over time.
+ *
+ * Prob_backoff = 1 - exp(-qtrend / V_cdg_exp_backoff_scale), where
+ * V_cdg_exp_backoff_scale is the average qtrend for the exponential backoff.
+ */
+static inline int
+prob_backoff(long qtrend)
+{
+ int backoff, idx, p;
+
+ backoff = (qtrend > ((MAXGRAD * V_cdg_exp_backoff_scale) << D_P_E));
+
+ if (!backoff) {
+ if (V_cdg_exp_backoff_scale > 1)
+ idx = (qtrend + V_cdg_exp_backoff_scale / 2) /
+ V_cdg_exp_backoff_scale;
+ else
+ idx = qtrend;
+
+ /* Backoff probability proportional to rate of queue growth. */
+ p = (INT_MAX / (1 << EXP_PREC)) * probexp[idx];
+ backoff = (random() < p);
+ }
+
+ return (backoff);
+}
+
+static inline void
+calc_moving_average(struct cdg *cdg_data, long qdiff_max, long qdiff_min)
+{
+ struct qdiff_sample *qds;
+
+ ++cdg_data->num_samples;
+ if (cdg_data->num_samples > cdg_data->sample_q_size) {
+ /* Minimum RTT. */
+ qds = STAILQ_FIRST(&cdg_data->qdiffmin_q);
+ cdg_data->min_qtrend = cdg_data->min_qtrend +
+ (qdiff_min - qds->qdiff) / cdg_data->sample_q_size;
+ STAILQ_REMOVE_HEAD(&cdg_data->qdiffmin_q, qdiff_lnk);
+ qds->qdiff = qdiff_min;
+ STAILQ_INSERT_TAIL(&cdg_data->qdiffmin_q, qds, qdiff_lnk);
+
+ /* Maximum RTT. */
+ qds = STAILQ_FIRST(&cdg_data->qdiffmax_q);
+ cdg_data->max_qtrend = cdg_data->max_qtrend +
+ (qdiff_max - qds->qdiff) / cdg_data->sample_q_size;
+ STAILQ_REMOVE_HEAD(&cdg_data->qdiffmax_q, qdiff_lnk);
+ qds->qdiff = qdiff_max;
+ STAILQ_INSERT_TAIL(&cdg_data->qdiffmax_q, qds, qdiff_lnk);
+ --cdg_data->num_samples;
+ } else {
+ qds = uma_zalloc(qdiffsample_zone, M_NOWAIT);
+ if (qds != NULL) {
+ cdg_data->min_qtrend = cdg_data->min_qtrend +
+ qdiff_min / cdg_data->sample_q_size;
+ qds->qdiff = qdiff_min;
+ STAILQ_INSERT_TAIL(&cdg_data->qdiffmin_q, qds,
+ qdiff_lnk);
+ }
+
+ qds = uma_zalloc(qdiffsample_zone, M_NOWAIT);
+ if (qds) {
+ cdg_data->max_qtrend = cdg_data->max_qtrend +
+ qdiff_max / cdg_data->sample_q_size;
+ qds->qdiff = qdiff_max;
+ STAILQ_INSERT_TAIL(&cdg_data->qdiffmax_q, qds,
+ qdiff_lnk);
+ }
+ }
+}
+
+static void
+cdg_ack_received(struct cc_var *ccv, uint16_t ack_type)
+{
+ struct cdg *cdg_data;
+ struct ertt *e_t;
+ long qdiff_max, qdiff_min;
+ int congestion, new_measurement, slowstart;
+
+ cdg_data = ccv->cc_data;
+ e_t = (struct ertt *)khelp_get_osd(CCV(ccv, osd), ertt_id);
+ new_measurement = e_t->flags & ERTT_NEW_MEASUREMENT;
+ congestion = 0;
+ cdg_data->maxrtt_in_rtt = imax(e_t->rtt, cdg_data->maxrtt_in_rtt);
+ cdg_data->minrtt_in_rtt = imin(e_t->rtt, cdg_data->minrtt_in_rtt);
+
+ if (new_measurement) {
+ slowstart = (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh));
+ /*
+ * Update smoothed gradient measurements. Since we are only
+ * using one measurement per RTT, use max or min rtt_in_rtt.
+ * This is also less noisy than a sample RTT measurement. Max
+ * RTT measurements can have trouble due to OS issues.
+ */
+ if (cdg_data->maxrtt_in_prevrtt) {
+ qdiff_max = ((long)(cdg_data->maxrtt_in_rtt -
+ cdg_data->maxrtt_in_prevrtt) << D_P_E );
+ qdiff_min = ((long)(cdg_data->minrtt_in_rtt -
+ cdg_data->minrtt_in_prevrtt) << D_P_E );
+
+ calc_moving_average(cdg_data, qdiff_max, qdiff_min);
+
+ /* Probabilistic backoff with respect to gradient. */
+ if (slowstart && qdiff_min > 0)
+ congestion = prob_backoff(qdiff_min);
+ else if (cdg_data->min_qtrend > 0)
+ congestion = prob_backoff(cdg_data->min_qtrend);
+ else if (slowstart && qdiff_max > 0)
+ congestion = prob_backoff(qdiff_max);
+ else if (cdg_data->max_qtrend > 0)
+ congestion = prob_backoff(cdg_data->max_qtrend);
+
+ /* Update estimate of queue state. */
+ if (cdg_data->min_qtrend > 0 &&
+ cdg_data->max_qtrend <= 0) {
+ cdg_data->queue_state = CDG_Q_FULL;
+ } else if (cdg_data->min_qtrend >= 0 &&
+ cdg_data->max_qtrend < 0) {
+ cdg_data->queue_state = CDG_Q_EMPTY;
+ cdg_data->shadow_w = 0;
+ } else if (cdg_data->min_qtrend > 0 &&
+ cdg_data->max_qtrend > 0) {
+ cdg_data->queue_state = CDG_Q_RISING;
+ } else if (cdg_data->min_qtrend < 0 &&
+ cdg_data->max_qtrend < 0) {
+ cdg_data->queue_state = CDG_Q_FALLING;
+ }
+
+ if (cdg_data->min_qtrend < 0 ||
+ cdg_data->max_qtrend < 0)
+ cdg_data->consec_cong_cnt = 0;
+ }
+
+ cdg_data->minrtt_in_prevrtt = cdg_data->minrtt_in_rtt;
+ cdg_data->minrtt_in_rtt = INT_MAX;
+ cdg_data->maxrtt_in_prevrtt = cdg_data->maxrtt_in_rtt;
+ cdg_data->maxrtt_in_rtt = 0;
+ e_t->flags &= ~ERTT_NEW_MEASUREMENT;
+ }
+
+ if (congestion) {
+ cdg_data->consec_cong_cnt++;
+ if (!IN_RECOVERY(CCV(ccv, t_flags))) {
+ if (cdg_data->consec_cong_cnt <= V_cdg_consec_cong)
+ cdg_cong_signal(ccv, CC_CDG_DELAY);
+ else
+ /*
+ * We have been backing off but the queue is not
+ * falling. Assume we are competing with
+ * loss-based flows and don't back off for the
+ * next V_cdg_hold_backoff RTT periods.
+ */
+ if (cdg_data->consec_cong_cnt >=
+ V_cdg_consec_cong + V_cdg_hold_backoff)
+ cdg_data->consec_cong_cnt = 0;
+
+ /* Won't see effect until 2nd RTT. */
+ cdg_data->maxrtt_in_prevrtt = 0;
+ /*
+ * Resync shadow window in case we are competing with a
+ * loss based flow
+ */
+ cdg_data->shadow_w = ulmax(CCV(ccv, snd_cwnd),
+ cdg_data->shadow_w);
+ }
+ } else if (ack_type == CC_ACK)
+ cdg_window_increase(ccv, new_measurement);
+}
+
+/* When a vnet is created and being initialised, init the per-stack CDG vars. */
+VNET_SYSINIT(cdg_init_vnet, SI_SUB_PROTO_BEGIN, SI_ORDER_FIRST,
+ cdg_init_vnet, NULL);
+
+SYSCTL_DECL(_net_inet_tcp_cc_cdg);
+SYSCTL_NODE(_net_inet_tcp_cc, OID_AUTO, cdg, CTLFLAG_RW, NULL,
+ "CAIA delay-gradient congestion control related settings");
+
+SYSCTL_STRING(_net_inet_tcp_cc_cdg, OID_AUTO, version,
+ CTLFLAG_RD, CDG_VERSION, sizeof(CDG_VERSION) - 1,
+ "Current algorithm/implementation version number");
+
+SYSCTL_VNET_UINT(_net_inet_tcp_cc_cdg, OID_AUTO, alpha_inc,
+ CTLFLAG_RW, &VNET_NAME(cdg_alpha_inc), 0,
+ "Increment the window increase factor alpha by 1 MSS segment every "
+ "alpha_inc RTTs during congestion avoidance mode.");
+
+SYSCTL_VNET_PROC(_net_inet_tcp_cc_cdg, OID_AUTO, beta_delay,
+ CTLTYPE_UINT|CTLFLAG_RW, &VNET_NAME(cdg_beta_delay), 70,
+ &cdg_beta_handler, "IU",
+ "Delay-based window decrease factor as a percentage "
+ "(on delay-based backoff, w = w * beta_delay / 100)");
+
+SYSCTL_VNET_PROC(_net_inet_tcp_cc_cdg, OID_AUTO, beta_loss,
+ CTLTYPE_UINT|CTLFLAG_RW, &VNET_NAME(cdg_beta_loss), 50,
+ &cdg_beta_handler, "IU",
+ "Loss-based window decrease factor as a percentage "
+ "(on loss-based backoff, w = w * beta_loss / 100)");
+
+SYSCTL_VNET_PROC(_net_inet_tcp_cc_cdg, OID_AUTO, exp_backoff_scale,
+ CTLTYPE_UINT|CTLFLAG_RW, &VNET_NAME(cdg_exp_backoff_scale), 2,
+ &cdg_exp_backoff_scale_handler, "IU",
+ "Scaling parameter for the probabilistic exponential backoff");
+
+SYSCTL_VNET_UINT(_net_inet_tcp_cc_cdg, OID_AUTO, smoothing_factor,
+ CTLFLAG_RW, &VNET_NAME(cdg_smoothing_factor), 8,
+ "Number of samples used for moving average smoothing (0 = no smoothing)");
+
+SYSCTL_VNET_UINT(_net_inet_tcp_cc_cdg, OID_AUTO, loss_compete_consec_cong,
+ CTLFLAG_RW, &VNET_NAME(cdg_consec_cong), 5,
+ "Number of consecutive delay-gradient based congestion episodes which will "
+ "trigger loss based CC compatibility");
+
+SYSCTL_VNET_UINT(_net_inet_tcp_cc_cdg, OID_AUTO, loss_compete_hold_backoff,
+ CTLFLAG_RW, &VNET_NAME(cdg_hold_backoff), 5,
+ "Number of consecutive delay-gradient based congestion episodes to hold "
+ "the window backoff for loss based CC compatibility");
+
+DECLARE_CC_MODULE(cdg, &cdg_cc_algo);
+
+MODULE_DEPEND(cdg, ertt, 1, 1, 1);
Property changes on: trunk/sys/netinet/cc/cc_cdg.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/netinet/cc/cc_chd.c
===================================================================
--- trunk/sys/netinet/cc/cc_chd.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/cc/cc_chd.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2009-2010
* Swinburne University of Technology, Melbourne, Australia
@@ -51,7 +52,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/cc/cc_chd.c 220592 2011-04-13 11:28:46Z pluknet $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/cc/cc_chd.c 273736 2014-10-27 14:38:00Z hselasky $");
#include <sys/param.h>
#include <sys/kernel.h>
@@ -484,11 +485,11 @@
"IU", "Queueing congestion threshold in ticks");
SYSCTL_VNET_UINT(_net_inet_tcp_cc_chd, OID_AUTO, queue_min,
- CTLTYPE_UINT|CTLFLAG_RW, &VNET_NAME(chd_qmin), 5,
+ CTLFLAG_RW, &VNET_NAME(chd_qmin), 5,
"Minimum queueing delay threshold in ticks");
SYSCTL_VNET_UINT(_net_inet_tcp_cc_chd, OID_AUTO, use_max,
- CTLTYPE_UINT|CTLFLAG_RW, &VNET_NAME(chd_use_max), 1,
+ CTLFLAG_RW, &VNET_NAME(chd_use_max), 1,
"Use the maximum RTT seen within the measurement period (RTT) "
"as the basic delay measurement for the algorithm.");
Modified: trunk/sys/netinet/cc/cc_cubic.c
===================================================================
--- trunk/sys/netinet/cc/cc_cubic.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/cc/cc_cubic.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2008-2010 Lawrence Stewart <lstewart at freebsd.org>
* Copyright (c) 2010 The FreeBSD Foundation
@@ -46,7 +47,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/cc/cc_cubic.c 220592 2011-04-13 11:28:46Z pluknet $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/cc/cc_cubic.c 293711 2016-01-11 23:37:31Z hiren $");
#include <sys/param.h>
#include <sys/kernel.h>
@@ -299,8 +300,10 @@
cubic_post_recovery(struct cc_var *ccv)
{
struct cubic *cubic_data;
+ int pipe;
cubic_data = ccv->cc_data;
+ pipe = 0;
/* Fast convergence heuristic. */
if (cubic_data->max_cwnd < cubic_data->prev_max_cwnd)
@@ -315,11 +318,14 @@
*
* XXXLAS: Find a way to do this without needing curack
*/
- if (SEQ_GT(ccv->curack + CCV(ccv, snd_ssthresh),
- CCV(ccv, snd_max)))
- CCV(ccv, snd_cwnd) = CCV(ccv, snd_max) - ccv->curack +
- CCV(ccv, t_maxseg);
+ if (V_tcp_do_rfc6675_pipe)
+ pipe = tcp_compute_pipe(ccv->ccvc.tcp);
else
+ pipe = CCV(ccv, snd_max) - ccv->curack;
+
+ if (pipe < CCV(ccv, snd_ssthresh))
+ CCV(ccv, snd_cwnd) = pipe + CCV(ccv, t_maxseg);
+ else
/* Update cwnd based on beta and adjusted max_cwnd. */
CCV(ccv, snd_cwnd) = max(1, ((CUBIC_BETA *
cubic_data->max_cwnd) >> CUBIC_SHIFT));
Modified: trunk/sys/netinet/cc/cc_cubic.h
===================================================================
--- trunk/sys/netinet/cc/cc_cubic.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/cc/cc_cubic.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2008-2010 Lawrence Stewart <lstewart at freebsd.org>
* Copyright (c) 2010 The FreeBSD Foundation
@@ -33,7 +34,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/netinet/cc/cc_cubic.h 220560 2011-04-12 08:13:18Z lstewart $
+ * $FreeBSD: stable/10/sys/netinet/cc/cc_cubic.h 220560 2011-04-12 08:13:18Z lstewart $
*/
#ifndef _NETINET_CC_CUBIC_H_
Modified: trunk/sys/netinet/cc/cc_hd.c
===================================================================
--- trunk/sys/netinet/cc/cc_hd.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/cc/cc_hd.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2009-2010
* Swinburne University of Technology, Melbourne, Australia
@@ -52,7 +53,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/cc/cc_hd.c 220560 2011-04-12 08:13:18Z lstewart $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/cc/cc_hd.c 220560 2011-04-12 08:13:18Z lstewart $");
#include <sys/param.h>
#include <sys/kernel.h>
Modified: trunk/sys/netinet/cc/cc_htcp.c
===================================================================
--- trunk/sys/netinet/cc/cc_htcp.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/cc/cc_htcp.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2007-2008
* Swinburne University of Technology, Melbourne, Australia
@@ -48,7 +49,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/cc/cc_htcp.c 220592 2011-04-13 11:28:46Z pluknet $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/cc/cc_htcp.c 220592 2011-04-13 11:28:46Z pluknet $");
#include <sys/param.h>
#include <sys/kernel.h>
Modified: trunk/sys/netinet/cc/cc_module.h
===================================================================
--- trunk/sys/netinet/cc/cc_module.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/cc/cc_module.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2009-2010 Lawrence Stewart <lstewart at freebsd.org>
* All rights reserved.
@@ -28,7 +29,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/netinet/cc/cc_module.h 220560 2011-04-12 08:13:18Z lstewart $
+ * $FreeBSD: stable/10/sys/netinet/cc/cc_module.h 220560 2011-04-12 08:13:18Z lstewart $
*/
/*
Modified: trunk/sys/netinet/cc/cc_newreno.c
===================================================================
--- trunk/sys/netinet/cc/cc_newreno.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/cc/cc_newreno.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
* The Regents of the University of California.
@@ -49,7 +50,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/cc/cc_newreno.c 220560 2011-04-12 08:13:18Z lstewart $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/cc/cc_newreno.c 293711 2016-01-11 23:37:31Z hiren $");
#include <sys/param.h>
#include <sys/kernel.h>
@@ -214,6 +215,9 @@
static void
newreno_post_recovery(struct cc_var *ccv)
{
+ int pipe;
+ pipe = 0;
+
if (IN_FASTRECOVERY(CCV(ccv, t_flags))) {
/*
* Fast recovery will conclude after returning from this
@@ -224,11 +228,14 @@
*
* XXXLAS: Find a way to do this without needing curack
*/
- if (SEQ_GT(ccv->curack + CCV(ccv, snd_ssthresh),
- CCV(ccv, snd_max)))
- CCV(ccv, snd_cwnd) = CCV(ccv, snd_max) -
- ccv->curack + CCV(ccv, t_maxseg);
+ if (V_tcp_do_rfc6675_pipe)
+ pipe = tcp_compute_pipe(ccv->ccvc.tcp);
else
+ pipe = CCV(ccv, snd_max) - ccv->curack;
+
+ if (pipe < CCV(ccv, snd_ssthresh))
+ CCV(ccv, snd_cwnd) = pipe + CCV(ccv, t_maxseg);
+ else
CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh);
}
}
Modified: trunk/sys/netinet/cc/cc_vegas.c
===================================================================
--- trunk/sys/netinet/cc/cc_vegas.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/cc/cc_vegas.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2009-2010
* Swinburne University of Technology, Melbourne, Australia
@@ -54,7 +55,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/cc/cc_vegas.c 220592 2011-04-13 11:28:46Z pluknet $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/cc/cc_vegas.c 220592 2011-04-13 11:28:46Z pluknet $");
#include <sys/param.h>
#include <sys/kernel.h>
Modified: trunk/sys/netinet/cc.h
===================================================================
--- trunk/sys/netinet/cc.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/cc.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -36,7 +36,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/netinet/cc.h 220560 2011-04-12 08:13:18Z lstewart $
+ * $FreeBSD: stable/10/sys/netinet/cc.h 220560 2011-04-12 08:13:18Z lstewart $
*/
/*
Modified: trunk/sys/netinet/icmp6.h
===================================================================
--- trunk/sys/netinet/icmp6.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/icmp6.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,5 +1,5 @@
/* $MidnightBSD$ */
-/* $FreeBSD: stable/9/sys/netinet/icmp6.h 235705 2012-05-20 23:33:10Z bz $ */
+/* $FreeBSD: stable/10/sys/netinet/icmp6.h 327716 2018-01-09 06:07:51Z cy $ */
/* $KAME: icmp6.h,v 1.46 2001/04/27 15:09:48 itojun Exp $ */
/*-
@@ -298,9 +298,11 @@
#define ND_OPT_PREFIX_INFORMATION 3
#define ND_OPT_REDIRECTED_HEADER 4
#define ND_OPT_MTU 5
+#define ND_OPT_NONCE 14 /* RFC 3971 */
#define ND_OPT_ROUTE_INFO 24 /* RFC 4191 */
#define ND_OPT_RDNSS 25 /* RFC 6106 */
#define ND_OPT_DNSSL 31 /* RFC 6106 */
+#define ND_OPT_MAX 31
struct nd_opt_prefix_info { /* prefix information */
u_int8_t nd_opt_pi_type;
@@ -331,6 +333,16 @@
u_int32_t nd_opt_mtu_mtu;
} __packed;
+#define ND_OPT_NONCE_LEN ((1 * 8) - 2)
+#if ((ND_OPT_NONCE_LEN + 2) % 8) != 0
+#error "(ND_OPT_NONCE_LEN + 2) must be a multiple of 8."
+#endif
+struct nd_opt_nonce { /* nonce option */
+ u_int8_t nd_opt_nonce_type;
+ u_int8_t nd_opt_nonce_len;
+ u_int8_t nd_opt_nonce[ND_OPT_NONCE_LEN];
+} __packed;
+
struct nd_opt_route_info { /* route info */
u_int8_t nd_opt_rti_type;
u_int8_t nd_opt_rti_len;
@@ -556,39 +568,39 @@
* of the internet control message protocol version 6.
*/
struct icmp6errstat {
- u_quad_t icp6errs_dst_unreach_noroute;
- u_quad_t icp6errs_dst_unreach_admin;
- u_quad_t icp6errs_dst_unreach_beyondscope;
- u_quad_t icp6errs_dst_unreach_addr;
- u_quad_t icp6errs_dst_unreach_noport;
- u_quad_t icp6errs_packet_too_big;
- u_quad_t icp6errs_time_exceed_transit;
- u_quad_t icp6errs_time_exceed_reassembly;
- u_quad_t icp6errs_paramprob_header;
- u_quad_t icp6errs_paramprob_nextheader;
- u_quad_t icp6errs_paramprob_option;
- u_quad_t icp6errs_redirect; /* we regard redirect as an error here */
- u_quad_t icp6errs_unknown;
+ uint64_t icp6errs_dst_unreach_noroute;
+ uint64_t icp6errs_dst_unreach_admin;
+ uint64_t icp6errs_dst_unreach_beyondscope;
+ uint64_t icp6errs_dst_unreach_addr;
+ uint64_t icp6errs_dst_unreach_noport;
+ uint64_t icp6errs_packet_too_big;
+ uint64_t icp6errs_time_exceed_transit;
+ uint64_t icp6errs_time_exceed_reassembly;
+ uint64_t icp6errs_paramprob_header;
+ uint64_t icp6errs_paramprob_nextheader;
+ uint64_t icp6errs_paramprob_option;
+ uint64_t icp6errs_redirect; /* we regard redirect as an error here */
+ uint64_t icp6errs_unknown;
};
struct icmp6stat {
/* statistics related to icmp6 packets generated */
- u_quad_t icp6s_error; /* # of calls to icmp6_error */
- u_quad_t icp6s_canterror; /* no error 'cuz old was icmp */
- u_quad_t icp6s_toofreq; /* no error 'cuz rate limitation */
- u_quad_t icp6s_outhist[256];
+ uint64_t icp6s_error; /* # of calls to icmp6_error */
+ uint64_t icp6s_canterror; /* no error 'cuz old was icmp */
+ uint64_t icp6s_toofreq; /* no error 'cuz rate limitation */
+ uint64_t icp6s_outhist[256];
/* statistics related to input message processed */
- u_quad_t icp6s_badcode; /* icmp6_code out of range */
- u_quad_t icp6s_tooshort; /* packet < sizeof(struct icmp6_hdr) */
- u_quad_t icp6s_checksum; /* bad checksum */
- u_quad_t icp6s_badlen; /* calculated bound mismatch */
+ uint64_t icp6s_badcode; /* icmp6_code out of range */
+ uint64_t icp6s_tooshort; /* packet < sizeof(struct icmp6_hdr) */
+ uint64_t icp6s_checksum; /* bad checksum */
+ uint64_t icp6s_badlen; /* calculated bound mismatch */
/*
* number of responses: this member is inherited from netinet code, but
* for netinet6 code, it is already available in icp6s_outhist[].
*/
- u_quad_t icp6s_reflect;
- u_quad_t icp6s_inhist[256];
- u_quad_t icp6s_nd_toomanyopt; /* too many ND options */
+ uint64_t icp6s_reflect;
+ uint64_t icp6s_inhist[256];
+ uint64_t icp6s_nd_toomanyopt; /* too many ND options */
struct icmp6errstat icp6s_outerrhist;
#define icp6s_odst_unreach_noroute \
icp6s_outerrhist.icp6errs_dst_unreach_noroute
@@ -608,21 +620,25 @@
#define icp6s_oparamprob_option icp6s_outerrhist.icp6errs_paramprob_option
#define icp6s_oredirect icp6s_outerrhist.icp6errs_redirect
#define icp6s_ounknown icp6s_outerrhist.icp6errs_unknown
- u_quad_t icp6s_pmtuchg; /* path MTU changes */
- u_quad_t icp6s_nd_badopt; /* bad ND options */
- u_quad_t icp6s_badns; /* bad neighbor solicitation */
- u_quad_t icp6s_badna; /* bad neighbor advertisement */
- u_quad_t icp6s_badrs; /* bad router advertisement */
- u_quad_t icp6s_badra; /* bad router advertisement */
- u_quad_t icp6s_badredirect; /* bad redirect message */
+ uint64_t icp6s_pmtuchg; /* path MTU changes */
+ uint64_t icp6s_nd_badopt; /* bad ND options */
+ uint64_t icp6s_badns; /* bad neighbor solicitation */
+ uint64_t icp6s_badna; /* bad neighbor advertisement */
+ uint64_t icp6s_badrs; /* bad router solicitation */
+ uint64_t icp6s_badra; /* bad router advertisement */
+ uint64_t icp6s_badredirect; /* bad redirect message */
};
#ifdef _KERNEL
+#include <sys/counter.h>
+
+VNET_PCPUSTAT_DECLARE(struct icmp6stat, icmp6stat);
/*
* In-kernel consumers can use these accessor macros directly to update
* stats.
*/
-#define ICMP6STAT_ADD(name, val) V_icmp6stat.name += (val)
+#define ICMP6STAT_ADD(name, val) \
+ VNET_PCPUSTAT_ADD(struct icmp6stat, icmp6stat, name, (val))
#define ICMP6STAT_INC(name) ICMP6STAT_ADD(name, 1)
/*
@@ -629,8 +645,8 @@
* Kernel module consumers must use this accessor macro.
*/
void kmod_icmp6stat_inc(int statnum);
-#define KMOD_ICMP6STAT_INC(name) \
- kmod_icmp6stat_inc(offsetof(struct icmp6stat, name) / sizeof(u_quad_t))
+#define KMOD_ICMP6STAT_INC(name) \
+ kmod_icmp6stat_inc(offsetof(struct icmp6stat, name) / sizeof(uint64_t))
#endif
/*
@@ -660,7 +676,8 @@
#define ICMPV6CTL_MLD_SOMAXSRC 22
#define ICMPV6CTL_MLD_VERSION 23
#define ICMPV6CTL_ND6_MAXQLEN 24
-#define ICMPV6CTL_MAXID 25
+#define ICMPV6CTL_NODEINFO_OLDMCPREFIX 25
+#define ICMPV6CTL_MAXID 26
#define RTF_PROBEMTU RTF_PROTO1
@@ -688,7 +705,9 @@
#define icmp6_ifstat_inc(ifp, tag) \
do { \
if (ifp) \
- ((struct in6_ifextra *)((ifp)->if_afdata[AF_INET6]))->icmp6_ifstat->tag++; \
+ counter_u64_add(((struct in6_ifextra *) \
+ ((ifp)->if_afdata[AF_INET6]))->icmp6_ifstat[\
+ offsetof(struct icmp6_ifstat, tag) / sizeof(uint64_t)], 1);\
} while (/*CONSTCOND*/ 0)
#define icmp6_ifoutstat_inc(ifp, type, code) \
Modified: trunk/sys/netinet/icmp_var.h
===================================================================
--- trunk/sys/netinet/icmp_var.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/icmp_var.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* @(#)icmp_var.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet/icmp_var.h 237913 2012-07-01 09:00:29Z tuexen $
+ * $FreeBSD: stable/10/sys/netinet/icmp_var.h 254925 2013-08-26 18:16:05Z jhb $
*/
#ifndef _NETINET_ICMP_VAR_H_
@@ -59,11 +59,15 @@
};
#ifdef _KERNEL
+#include <sys/counter.h>
+
+VNET_PCPUSTAT_DECLARE(struct icmpstat, icmpstat);
/*
* In-kernel consumers can use these accessor macros directly to update
* stats.
*/
-#define ICMPSTAT_ADD(name, val) V_icmpstat.name += (val)
+#define ICMPSTAT_ADD(name, val) \
+ VNET_PCPUSTAT_ADD(struct icmpstat, icmpstat, name, (val))
#define ICMPSTAT_INC(name) ICMPSTAT_ADD(name, 1)
/*
@@ -71,7 +75,7 @@
*/
void kmod_icmpstat_inc(int statnum);
#define KMOD_ICMPSTAT_INC(name) \
- kmod_icmpstat_inc(offsetof(struct icmpstat, name) / sizeof(u_long))
+ kmod_icmpstat_inc(offsetof(struct icmpstat, name) / sizeof(uint64_t))
#endif
/*
@@ -82,19 +86,9 @@
#define ICMPCTL_ICMPLIM 3
#define ICMPCTL_MAXID 4
-#define ICMPCTL_NAMES { \
- { 0, 0 }, \
- { "maskrepl", CTLTYPE_INT }, \
- { "stats", CTLTYPE_STRUCT }, \
- { "icmplim", CTLTYPE_INT }, \
-}
-
#ifdef _KERNEL
SYSCTL_DECL(_net_inet_icmp);
-VNET_DECLARE(struct icmpstat, icmpstat); /* icmp statistics. */
-#define V_icmpstat VNET(icmpstat)
-
extern int badport_bandlim(int);
#define BANDLIM_UNLIMITED -1
#define BANDLIM_ICMP_UNREACH 0
Modified: trunk/sys/netinet/if_atm.c
===================================================================
--- trunk/sys/netinet/if_atm.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/if_atm.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -33,7 +33,7 @@
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/if_atm.c 216466 2010-12-15 22:58:45Z bz $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/if_atm.c 249925 2013-04-26 12:50:32Z glebius $");
/*
* IP <=> ATM address resolution.
@@ -318,7 +318,7 @@
* but this is enough for PVCs entered via the "route" command.
*/
int
-atmresolve(struct rtentry *rt, struct mbuf *m, struct sockaddr *dst,
+atmresolve(struct rtentry *rt, struct mbuf *m, const struct sockaddr *dst,
struct atm_pseudohdr *desten)
{
struct sockaddr_dl *sdl;
@@ -330,7 +330,8 @@
}
if (rt == NULL) {
- rt = RTALLOC1(dst, 0); /* link level on table 0 XXX MRT */
+ /* link level on table 0 XXX MRT */
+ rt = RTALLOC1(__DECONST(struct sockaddr *, dst), 0);
if (rt == NULL)
goto bad; /* failed */
RT_REMREF(rt); /* don't keep LL references */
Modified: trunk/sys/netinet/if_atm.h
===================================================================
--- trunk/sys/netinet/if_atm.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/if_atm.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,5 +1,5 @@
/* $MidnightBSD$ */
-/* $FreeBSD: stable/9/sys/netinet/if_atm.h 139823 2005-01-07 01:45:51Z imp $ */
+/* $FreeBSD: stable/10/sys/netinet/if_atm.h 249925 2013-04-26 12:50:32Z glebius $ */
/* $NetBSD: if_atm.h,v 1.2 1996/07/03 17:17:17 chuck Exp $ */
/*-
@@ -44,5 +44,5 @@
struct sockaddr;
void atm_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
-int atmresolve(struct rtentry *, struct mbuf *, struct sockaddr *,
+int atmresolve(struct rtentry *, struct mbuf *, const struct sockaddr *,
struct atm_pseudohdr *);
Modified: trunk/sys/netinet/if_ether.c
===================================================================
--- trunk/sys/netinet/if_ether.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/if_ether.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -37,7 +37,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/if_ether.c 248852 2013-03-28 20:48:40Z emaste $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/if_ether.c 309340 2016-11-30 22:20:23Z vangyzen $");
#include "opt_inet.h"
@@ -65,7 +65,7 @@
#include <netinet/in_var.h>
#include <net/if_llatbl.h>
#include <netinet/if_ether.h>
-#if defined(INET)
+#ifdef INET
#include <netinet/ip_carp.h>
#endif
@@ -74,7 +74,7 @@
#include <security/mac/mac_framework.h>
-#define SIN(s) ((struct sockaddr_in *)s)
+#define SIN(s) ((const struct sockaddr_in *)(s))
#define SDL(s) ((struct sockaddr_dl *)s)
SYSCTL_DECL(_net_link_ether);
@@ -90,8 +90,13 @@
static VNET_DEFINE(int, arp_proxyall) = 0;
static VNET_DEFINE(int, arpt_down) = 20; /* keep incomplete entries for
* 20 seconds */
-VNET_DEFINE(struct arpstat, arpstat); /* ARP statistics, see if_arp.h */
+VNET_PCPUSTAT_DEFINE(struct arpstat, arpstat); /* ARP statistics, see if_arp.h */
+VNET_PCPUSTAT_SYSINIT(arpstat);
+#ifdef VIMAGE
+VNET_PCPUSTAT_SYSUNINIT(arpstat);
+#endif /* VIMAGE */
+
static VNET_DEFINE(int, arp_maxhold) = 1;
#define V_arpt_keep VNET(arpt_keep)
@@ -98,7 +103,6 @@
#define V_arpt_down VNET(arpt_down)
#define V_arp_maxtries VNET(arp_maxtries)
#define V_arp_proxyall VNET(arp_proxyall)
-#define V_arpstat VNET(arpstat)
#define V_arp_maxhold VNET(arp_maxhold)
SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_RW,
@@ -116,16 +120,35 @@
SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, wait, CTLFLAG_RW,
&VNET_NAME(arpt_down), 0,
"Incomplete ARP entry lifetime in seconds");
-SYSCTL_VNET_STRUCT(_net_link_ether_arp, OID_AUTO, stats, CTLFLAG_RW,
- &VNET_NAME(arpstat), arpstat,
- "ARP statistics (struct arpstat, net/if_arp.h)");
+SYSCTL_VNET_PCPUSTAT(_net_link_ether_arp, OID_AUTO, stats, struct arpstat,
+ arpstat, "ARP statistics (struct arpstat, net/if_arp.h)");
SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, maxhold, CTLFLAG_RW,
&VNET_NAME(arp_maxhold), 0,
"Number of packets to hold per ARP entry");
+/*
+ * Due to the exponential backoff algorithm used for the interval between GARP
+ * retransmissions, the maximum number of retransmissions is limited for
+ * sanity. This limit corresponds to a maximum interval between retransmissions
+ * of 2^16 seconds ~= 18 hours.
+ *
+ * Making this limit more dynamic is more complicated than worthwhile,
+ * especially since sending out GARPs spaced days apart would be of little
+ * use. A maximum dynamic limit would look something like:
+ *
+ * const int max = fls(INT_MAX / hz) - 1;
+ */
+#define MAX_GARP_RETRANSMITS 16
+static int sysctl_garp_rexmit(SYSCTL_HANDLER_ARGS);
+static int garp_rexmit_count = 0; /* GARP retransmission setting. */
+
+SYSCTL_PROC(_net_link_ether_inet, OID_AUTO, garp_rexmit_count,
+ CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_MPSAFE,
+ &garp_rexmit_count, 0, sysctl_garp_rexmit, "I",
+ "Number of times to retransmit GARP packets;"
+ " 0 to disable, maximum of 16");
+
static void arp_init(void);
-void arprequest(struct ifnet *,
- struct in_addr *, struct in_addr *, u_char *);
static void arpintr(struct mbuf *);
static void arptimer(void *);
#ifdef INET
@@ -140,8 +163,6 @@
};
#ifdef AF_INET
-void arp_ifscrub(struct ifnet *ifp, uint32_t addr);
-
/*
* called by in_ifscrub to remove entry from the table when
* the interface goes away
@@ -155,10 +176,10 @@
addr4.sin_len = sizeof(addr4);
addr4.sin_family = AF_INET;
addr4.sin_addr.s_addr = addr;
- IF_AFDATA_LOCK(ifp);
+ IF_AFDATA_WLOCK(ifp);
lla_lookup(LLTABLE(ifp), (LLE_DELETE | LLE_IFADDR),
(struct sockaddr *)&addr4);
- IF_AFDATA_UNLOCK(ifp);
+ IF_AFDATA_WUNLOCK(ifp);
}
#endif
@@ -172,13 +193,41 @@
struct ifnet *ifp;
if (lle->la_flags & LLE_STATIC) {
- LLE_WUNLOCK(lle);
return;
}
-
+ LLE_WLOCK(lle);
+ if (callout_pending(&lle->la_timer)) {
+ /*
+ * Here we are a bit odd here in the treatment of
+ * active/pending. If the pending bit is set, it got
+ * rescheduled before I ran. The active
+ * bit we ignore, since if it was stopped
+ * in ll_tablefree() and was currently running
+ * it would have return 0 so the code would
+ * not have deleted it since the callout could
+ * not be stopped so we want to go through
+ * with the delete here now. If the callout
+ * was restarted, the pending bit will be back on and
+ * we just want to bail since the callout_reset would
+ * return 1 and our reference would have been removed
+ * by arpresolve() below.
+ */
+ LLE_WUNLOCK(lle);
+ return;
+ }
ifp = lle->lle_tbl->llt_ifp;
CURVNET_SET(ifp->if_vnet);
+ if ((lle->la_flags & LLE_DELETED) == 0) {
+ int evt;
+
+ if (lle->la_flags & LLE_VALID)
+ evt = LLENTRY_EXPIRED;
+ else
+ evt = LLENTRY_TIMEDOUT;
+ EVENTHANDLER_INVOKE(lle_event, lle, evt);
+ }
+
callout_stop(&lle->la_timer);
/* XXX: LOR avoidance. We still have ref on lle. */
@@ -210,15 +259,15 @@
* - arp header source ethernet address
*/
void
-arprequest(struct ifnet *ifp, struct in_addr *sip, struct in_addr *tip,
- u_char *enaddr)
+arprequest(struct ifnet *ifp, const struct in_addr *sip,
+ const struct in_addr *tip, u_char *enaddr)
{
struct mbuf *m;
struct arphdr *ah;
struct sockaddr sa;
+ u_char *carpaddr = NULL;
if (sip == NULL) {
- /* XXX don't believe this can happen (or explain why) */
/*
* The caller did not supply a source address, try to find
* a compatible one among those assigned to this interface.
@@ -225,22 +274,34 @@
*/
struct ifaddr *ifa;
+ IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- if (!ifa->ifa_addr ||
- ifa->ifa_addr->sa_family != AF_INET)
+ if (ifa->ifa_addr->sa_family != AF_INET)
continue;
- sip = &SIN(ifa->ifa_addr)->sin_addr;
+
+ if (ifa->ifa_carp) {
+ if ((*carp_iamatch_p)(ifa, &carpaddr) == 0)
+ continue;
+ sip = &IA_SIN(ifa)->sin_addr;
+ } else {
+ carpaddr = NULL;
+ sip = &IA_SIN(ifa)->sin_addr;
+ }
+
if (0 == ((sip->s_addr ^ tip->s_addr) &
- SIN(ifa->ifa_netmask)->sin_addr.s_addr) )
+ IA_MASKSIN(ifa)->sin_addr.s_addr))
break; /* found it. */
}
+ IF_ADDR_RUNLOCK(ifp);
if (sip == NULL) {
printf("%s: cannot find matching address\n", __func__);
return;
}
}
+ if (enaddr == NULL)
+ enaddr = carpaddr ? carpaddr : (u_char *)IF_LLADDR(ifp);
- if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
+ if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
return;
m->m_len = sizeof(*ah) + 2*sizeof(struct in_addr) +
2*ifp->if_data.ifi_addrlen;
@@ -255,12 +316,13 @@
ah->ar_hln = ifp->if_addrlen; /* hardware address length */
ah->ar_pln = sizeof(struct in_addr); /* protocol address length */
ah->ar_op = htons(ARPOP_REQUEST);
- bcopy((caddr_t)enaddr, (caddr_t)ar_sha(ah), ah->ar_hln);
- bcopy((caddr_t)sip, (caddr_t)ar_spa(ah), ah->ar_pln);
- bcopy((caddr_t)tip, (caddr_t)ar_tpa(ah), ah->ar_pln);
+ bcopy(enaddr, ar_sha(ah), ah->ar_hln);
+ bcopy(sip, ar_spa(ah), ah->ar_pln);
+ bcopy(tip, ar_tpa(ah), ah->ar_pln);
sa.sa_family = AF_ARP;
sa.sa_len = 2;
m->m_flags |= M_BCAST;
+ m_clrprotoflags(m); /* Avoid confusing lower layers. */
(*ifp->if_output)(ifp, m, &sa, NULL);
ARPSTAT_INC(txrequests);
}
@@ -281,7 +343,7 @@
*/
int
arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m,
- struct sockaddr *dst, u_char *desten, struct llentry **lle)
+ const struct sockaddr *dst, u_char *desten, struct llentry **lle)
{
struct llentry *la = 0;
u_int flags = 0;
@@ -317,8 +379,8 @@
if (la == NULL) {
if (flags & LLE_CREATE)
log(LOG_DEBUG,
- "arpresolve: can't allocate llinfo for %s\n",
- inet_ntoa(SIN(dst)->sin_addr));
+ "arpresolve: can't allocate llinfo for %s on %s\n",
+ inet_ntoa(SIN(dst)->sin_addr), ifp->if_xname);
m_freem(m);
return (EINVAL);
}
@@ -326,6 +388,7 @@
if ((la->la_flags & LLE_VALID) &&
((la->la_flags & LLE_STATIC) || la->la_expire > time_uptime)) {
bcopy(&la->ll_addr, desten, ifp->if_addrlen);
+ renew = 0;
/*
* If entry has an expiry time and it is approaching,
* see if we need to send an ARP request within this
@@ -333,15 +396,21 @@
*/
if (!(la->la_flags & LLE_STATIC) &&
time_uptime + la->la_preempt > la->la_expire) {
- arprequest(ifp, NULL,
- &SIN(dst)->sin_addr, IF_LLADDR(ifp));
-
+ renew = 1;
la->la_preempt--;
}
*lle = la;
- error = 0;
- goto done;
+
+ if (flags & LLE_EXCLUSIVE)
+ LLE_WUNLOCK(la);
+ else
+ LLE_RUNLOCK(la);
+
+ if (renew == 1)
+ arprequest(ifp, NULL, &SIN(dst)->sin_addr, NULL);
+
+ return (0);
}
if (la->la_flags & LLE_STATIC) { /* should not happen! */
@@ -411,8 +480,7 @@
LLE_REMREF(la);
la->la_asked++;
LLE_WUNLOCK(la);
- arprequest(ifp, NULL, &SIN(dst)->sin_addr,
- IF_LLADDR(ifp));
+ arprequest(ifp, NULL, &SIN(dst)->sin_addr, NULL);
return (error);
}
done:
@@ -491,6 +559,9 @@
static int log_arp_movements = 1;
static int log_arp_permanent_modify = 1;
static int allow_multicast = 0;
+static struct timeval arp_lastlog;
+static int arp_curpps;
+static int arp_maxpps = 1;
SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_wrong_iface, CTLFLAG_RW,
&log_arp_wrong_iface, 0,
@@ -503,7 +574,16 @@
"log arp replies from MACs different than the one in the permanent arp entry");
SYSCTL_INT(_net_link_ether_inet, OID_AUTO, allow_multicast, CTLFLAG_RW,
&allow_multicast, 0, "accept multicast addresses");
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_log_per_second,
+ CTLFLAG_RW, &arp_maxpps, 0,
+ "Maximum number of remotely triggered ARP messages that can be "
+ "logged per second");
+#define ARP_LOG(pri, ...) do { \
+ if (ppsratecheck(&arp_lastlog, &arp_curpps, arp_maxpps)) \
+ log((pri), "arp: " __VA_ARGS__); \
+} while (0)
+
static void
in_arpinput(struct mbuf *m)
{
@@ -519,7 +599,7 @@
int op, flags;
int req_len;
int bridged = 0, is_bridge = 0;
- int carp_match = 0;
+ int carped;
struct sockaddr_in sin;
sin.sin_len = sizeof(struct sockaddr_in);
sin.sin_family = AF_INET;
@@ -532,7 +612,7 @@
req_len = arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr));
if (m->m_len < req_len && (m = m_pullup(m, req_len)) == NULL) {
- log(LOG_NOTICE, "in_arp: runt packet -- m_pullup failed\n");
+ ARP_LOG(LOG_NOTICE, "runt packet -- m_pullup failed\n");
return;
}
@@ -542,13 +622,13 @@
* a protocol length not equal to an IPv4 address.
*/
if (ah->ar_pln != sizeof(struct in_addr)) {
- log(LOG_NOTICE, "in_arp: requested protocol length != %zu\n",
+ ARP_LOG(LOG_NOTICE, "requested protocol length != %zu\n",
sizeof(struct in_addr));
goto drop;
}
if (allow_multicast == 0 && ETHER_IS_MULTICAST(ar_sha(ah))) {
- log(LOG_NOTICE, "arp: %*D is multicast\n",
+ ARP_LOG(LOG_NOTICE, "%*D is multicast\n",
ifp->if_addrlen, (u_char *)ar_sha(ah), ":");
goto drop;
}
@@ -564,28 +644,18 @@
* For a bridge, we want to check the address irrespective
* of the receive interface. (This will change slightly
* when we have clusters of interfaces).
- * If the interface does not match, but the recieving interface
- * is part of carp, we call carp_iamatch to see if this is a
- * request for the virtual host ip.
- * XXX: This is really ugly!
*/
IN_IFADDR_RLOCK();
LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) {
if (((bridged && ia->ia_ifp->if_bridge == ifp->if_bridge) ||
ia->ia_ifp == ifp) &&
- itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) {
+ itaddr.s_addr == ia->ia_addr.sin_addr.s_addr &&
+ (ia->ia_ifa.ifa_carp == NULL ||
+ (*carp_iamatch_p)(&ia->ia_ifa, &enaddr))) {
ifa_ref(&ia->ia_ifa);
IN_IFADDR_RUNLOCK();
goto match;
}
- if (ifp->if_carp != NULL &&
- (*carp_iamatch_p)(ifp, ia, &isaddr, &enaddr) &&
- itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) {
- carp_match = 1;
- ifa_ref(&ia->ia_ifa);
- IN_IFADDR_RUNLOCK();
- goto match;
- }
}
LIST_FOREACH(ia, INADDR_HASH(isaddr.s_addr), ia_hash)
if (((bridged && ia->ia_ifp->if_bridge == ifp->if_bridge) ||
@@ -625,7 +695,9 @@
*/
IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
- if (ifa->ifa_addr->sa_family == AF_INET) {
+ if (ifa->ifa_addr->sa_family == AF_INET &&
+ (ifa->ifa_carp == NULL ||
+ (*carp_iamatch_p)(ifa, &enaddr))) {
ia = ifatoia(ifa);
ifa_ref(ifa);
IF_ADDR_RUNLOCK(ifp);
@@ -646,14 +718,14 @@
match:
if (!enaddr)
enaddr = (u_int8_t *)IF_LLADDR(ifp);
+ carped = (ia->ia_ifa.ifa_carp != NULL);
myaddr = ia->ia_addr.sin_addr;
ifa_free(&ia->ia_ifa);
if (!bcmp(ar_sha(ah), enaddr, ifp->if_addrlen))
goto drop; /* it's from me, ignore it. */
if (!bcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen)) {
- log(LOG_NOTICE,
- "arp: link address is broadcast for IP address %s!\n",
- inet_ntoa(isaddr));
+ ARP_LOG(LOG_NOTICE, "link address is broadcast for IP address "
+ "%s!\n", inet_ntoa(isaddr));
goto drop;
}
/*
@@ -662,9 +734,9 @@
* case we suppress the warning to avoid false positive complaints of
* potential misconfiguration.
*/
- if (!bridged && isaddr.s_addr == myaddr.s_addr && myaddr.s_addr != 0) {
- log(LOG_ERR,
- "arp: %*D is using my IP address %s on %s!\n",
+ if (!bridged && !carped && isaddr.s_addr == myaddr.s_addr &&
+ myaddr.s_addr != 0) {
+ ARP_LOG(LOG_ERR, "%*D is using my IP address %s on %s!\n",
ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
inet_ntoa(isaddr), ifp->if_xname);
itaddr = myaddr;
@@ -685,9 +757,9 @@
IF_AFDATA_UNLOCK(ifp);
if (la != NULL) {
/* the following is not an error when doing bridging */
- if (!bridged && la->lle_tbl->llt_ifp != ifp && !carp_match) {
+ if (!bridged && la->lle_tbl->llt_ifp != ifp) {
if (log_arp_wrong_iface)
- log(LOG_WARNING, "arp: %s is on %s "
+ ARP_LOG(LOG_WARNING, "%s is on %s "
"but got reply from %*D on %s\n",
inet_ntoa(isaddr),
la->lle_tbl->llt_ifp->if_xname,
@@ -701,8 +773,8 @@
if (la->la_flags & LLE_STATIC) {
LLE_WUNLOCK(la);
if (log_arp_permanent_modify)
- log(LOG_ERR,
- "arp: %*D attempts to modify "
+ ARP_LOG(LOG_ERR,
+ "%*D attempts to modify "
"permanent entry for %s on %s\n",
ifp->if_addrlen,
(u_char *)ar_sha(ah), ":",
@@ -710,7 +782,7 @@
goto reply;
}
if (log_arp_movements) {
- log(LOG_INFO, "arp: %s moved from %*D "
+ ARP_LOG(LOG_INFO, "%s moved from %*D "
"to %*D on %s\n",
inet_ntoa(isaddr),
ifp->if_addrlen,
@@ -722,7 +794,7 @@
if (ifp->if_addrlen != ah->ar_hln) {
LLE_WUNLOCK(la);
- log(LOG_WARNING, "arp from %*D: addr len: new %d, "
+ ARP_LOG(LOG_WARNING, "from %*D: addr len: new %d, "
"i/f %d (ignored)\n", ifp->if_addrlen,
(u_char *) ar_sha(ah), ":", ah->ar_hln,
ifp->if_addrlen);
@@ -731,7 +803,7 @@
(void)memcpy(&la->ll_addr, ar_sha(ah), ifp->if_addrlen);
la->la_flags |= LLE_VALID;
- EVENTHANDLER_INVOKE(arp_update_event, la);
+ EVENTHANDLER_INVOKE(lle_event, la, LLENTRY_RESOLVED);
if (!(la->la_flags & LLE_STATIC)) {
int canceled;
@@ -763,6 +835,8 @@
for (; m_hold != NULL; m_hold = m_hold_next) {
m_hold_next = m_hold->m_nextpkt;
m_hold->m_nextpkt = NULL;
+ /* Avoid confusing lower layers. */
+ m_clrprotoflags(m_hold);
(*ifp->if_output)(ifp, m_hold, &sa, NULL);
}
} else
@@ -781,9 +855,9 @@
struct llentry *lle = NULL;
sin.sin_addr = itaddr;
- IF_AFDATA_LOCK(ifp);
+ IF_AFDATA_RLOCK(ifp);
lle = lla_lookup(LLTABLE(ifp), 0, (struct sockaddr *)&sin);
- IF_AFDATA_UNLOCK(ifp);
+ IF_AFDATA_RUNLOCK(ifp);
if ((lle != NULL) && (lle->la_flags & LLE_PUB)) {
(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
@@ -830,7 +904,7 @@
if (!rt)
goto drop;
if (rt->rt_ifp != ifp) {
- log(LOG_INFO, "arp_proxy: ignoring request"
+ ARP_LOG(LOG_INFO, "proxy: ignoring request"
" from %s via %s, expecting %s\n",
inet_ntoa(isaddr), ifp->if_xname,
rt->rt_ifp->if_xname);
@@ -867,6 +941,7 @@
m->m_pkthdr.rcvif = NULL;
sa.sa_family = AF_ARP;
sa.sa_len = 2;
+ m_clrprotoflags(m); /* Avoid confusing lower layers. */
(*ifp->if_output)(ifp, m, &sa, NULL);
ARPSTAT_INC(txreplies);
return;
@@ -876,14 +951,124 @@
}
#endif
+/*
+ * Handle the garp_rexmit_count. Like sysctl_handle_int(), but limits the range
+ * of valid values.
+ */
+static int
+sysctl_garp_rexmit(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ int rexmit_count = *(int *)arg1;
+
+ error = sysctl_handle_int(oidp, &rexmit_count, 0, req);
+
+ /* Enforce limits on any new value that may have been set. */
+ if (!error && req->newptr) {
+ /* A new value was set. */
+ if (rexmit_count < 0) {
+ rexmit_count = 0;
+ } else if (rexmit_count > MAX_GARP_RETRANSMITS) {
+ rexmit_count = MAX_GARP_RETRANSMITS;
+ }
+ *(int *)arg1 = rexmit_count;
+ }
+
+ return (error);
+}
+
+/*
+ * Retransmit a Gratuitous ARP (GARP) and, if necessary, schedule a callout to
+ * retransmit it again. A pending callout owns a reference to the ifa.
+ */
+static void
+garp_rexmit(void *arg)
+{
+ struct in_ifaddr *ia = arg;
+
+ if (callout_pending(&ia->ia_garp_timer) ||
+ !callout_active(&ia->ia_garp_timer)) {
+ IFA_UNLOCK(&ia->ia_ifa);
+ ifa_free(&ia->ia_ifa);
+ return;
+ }
+
+ /*
+ * Drop ifa lock while the ARP request is generated.
+ */
+ IFA_UNLOCK(&ia->ia_ifa);
+
+ arprequest(ia->ia_ifa.ifa_ifp, &IA_SIN(ia)->sin_addr,
+ &IA_SIN(ia)->sin_addr, IF_LLADDR(ia->ia_ifa.ifa_ifp));
+
+ /*
+ * Increment the count of retransmissions. If the count has reached the
+ * maximum value, stop sending the GARP packets. Otherwise, schedule
+ * the callout to retransmit another GARP packet.
+ */
+ ++ia->ia_garp_count;
+ if (ia->ia_garp_count >= garp_rexmit_count) {
+ ifa_free(&ia->ia_ifa);
+ } else {
+ int rescheduled;
+ IFA_LOCK(&ia->ia_ifa);
+ rescheduled = callout_reset(&ia->ia_garp_timer,
+ (1 << ia->ia_garp_count) * hz,
+ garp_rexmit, ia);
+ IFA_UNLOCK(&ia->ia_ifa);
+ if (rescheduled) {
+ ifa_free(&ia->ia_ifa);
+ }
+ }
+}
+
+/*
+ * Start the GARP retransmit timer.
+ *
+ * A single GARP is always transmitted when an IPv4 address is added
+ * to an interface and that is usually sufficient. However, in some
+ * circumstances, such as when a shared address is passed between
+ * cluster nodes, this single GARP may occasionally be dropped or
+ * lost. This can lead to neighbors on the network link working with a
+ * stale ARP cache and sending packets destined for that address to
+ * the node that previously owned the address, which may not respond.
+ *
+ * To avoid this situation, GARP retransmits can be enabled by setting
+ * the net.link.ether.inet.garp_rexmit_count sysctl to a value greater
+ * than zero. The setting represents the maximum number of
+ * retransmissions. The interval between retransmissions is calculated
+ * using an exponential backoff algorithm, doubling each time, so the
+ * retransmission intervals are: {1, 2, 4, 8, 16, ...} (seconds).
+ */
+static void
+garp_timer_start(struct ifaddr *ifa)
+{
+ struct in_ifaddr *ia = (struct in_ifaddr *) ifa;
+
+ IFA_LOCK(ifa);
+ ia->ia_garp_count = 0;
+ if (callout_reset(&ia->ia_garp_timer, (1 << ia->ia_garp_count) * hz,
+ garp_rexmit, ia) == 0) {
+ ifa_ref(ifa);
+ }
+ IFA_UNLOCK(ifa);
+}
+
void
arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
{
struct llentry *lle;
+ if (ifa->ifa_carp != NULL)
+ return;
+
if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY) {
arprequest(ifp, &IA_SIN(ifa)->sin_addr,
&IA_SIN(ifa)->sin_addr, IF_LLADDR(ifp));
+ if (garp_rexmit_count > 0) {
+ garp_timer_start(ifa);
+ }
+
/*
* interface address is considered static entry
* because the output of the arp utility shows
Modified: trunk/sys/netinet/if_ether.h
===================================================================
--- trunk/sys/netinet/if_ether.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/if_ether.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* @(#)if_ether.h 8.3 (Berkeley) 5/2/95
- * $FreeBSD: stable/9/sys/netinet/if_ether.h 196995 2009-09-08 21:17:17Z np $
+ * $FreeBSD: stable/10/sys/netinet/if_ether.h 249925 2013-04-26 12:50:32Z glebius $
*/
#ifndef _NETINET_IF_ETHER_H_
@@ -49,9 +49,9 @@
(enaddr)[0] = 0x01; \
(enaddr)[1] = 0x00; \
(enaddr)[2] = 0x5e; \
- (enaddr)[3] = ((u_char *)ipaddr)[1] & 0x7f; \
- (enaddr)[4] = ((u_char *)ipaddr)[2]; \
- (enaddr)[5] = ((u_char *)ipaddr)[3]; \
+ (enaddr)[3] = ((const u_char *)ipaddr)[1] & 0x7f; \
+ (enaddr)[4] = ((const u_char *)ipaddr)[2]; \
+ (enaddr)[5] = ((const u_char *)ipaddr)[3]; \
}
/*
* Macro to map an IP6 multicast address to an Ethernet multicast address.
@@ -64,10 +64,10 @@
{ \
(enaddr)[0] = 0x33; \
(enaddr)[1] = 0x33; \
- (enaddr)[2] = ((u_char *)ip6addr)[12]; \
- (enaddr)[3] = ((u_char *)ip6addr)[13]; \
- (enaddr)[4] = ((u_char *)ip6addr)[14]; \
- (enaddr)[5] = ((u_char *)ip6addr)[15]; \
+ (enaddr)[2] = ((const u_char *)ip6addr)[12]; \
+ (enaddr)[3] = ((const u_char *)ip6addr)[13]; \
+ (enaddr)[4] = ((const u_char *)ip6addr)[14]; \
+ (enaddr)[5] = ((const u_char *)ip6addr)[15]; \
}
/*
@@ -90,6 +90,7 @@
#define arp_pln ea_hdr.ar_pln
#define arp_op ea_hdr.ar_op
+#ifndef BURN_BRIDGES /* Can be used by third party software. */
struct sockaddr_inarp {
u_char sin_len;
u_char sin_family;
@@ -100,6 +101,8 @@
u_short sin_other;
#define SIN_PROXY 1
};
+#endif /* !BURN_BRIDGES */
+
/*
* IP and ethernet specific routing flags
*/
@@ -113,16 +116,13 @@
struct llentry;
struct ifaddr;
-int arpresolve(struct ifnet *ifp, struct rtentry *rt,
- struct mbuf *m, struct sockaddr *dst, u_char *desten,
- struct llentry **lle);
+int arpresolve(struct ifnet *ifp, struct rtentry *rt, struct mbuf *m,
+ const struct sockaddr *dst, u_char *desten, struct llentry **lle);
+void arprequest(struct ifnet *, const struct in_addr *,
+ const struct in_addr *, u_char *);
void arp_ifinit(struct ifnet *, struct ifaddr *);
void arp_ifinit2(struct ifnet *, struct ifaddr *, u_char *);
-
-#include <sys/eventhandler.h>
-typedef void (*llevent_arp_update_fn)(void *, struct llentry *);
-EVENTHANDLER_DECLARE(arp_update_event, llevent_arp_update_fn);
-
+void arp_ifscrub(struct ifnet *, uint32_t);
#endif
#endif
Modified: trunk/sys/netinet/igmp.c
===================================================================
--- trunk/sys/netinet/igmp.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/igmp.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -49,7 +49,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/igmp.c 249132 2013-04-05 08:22:11Z mav $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/igmp.c 305558 2016-09-07 19:25:08Z dim $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -290,7 +290,7 @@
{
#ifdef VIMAGE
- m->m_pkthdr.header = ifp->if_vnet;
+ m->m_pkthdr.PH_loc.ptr = ifp->if_vnet;
#endif /* VIMAGE */
m->m_pkthdr.flowid = ifp->if_index;
}
@@ -299,7 +299,7 @@
igmp_scrub_context(struct mbuf *m)
{
- m->m_pkthdr.header = NULL;
+ m->m_pkthdr.PH_loc.ptr = NULL;
m->m_pkthdr.flowid = 0;
}
@@ -327,7 +327,7 @@
#ifdef notyet
#if defined(VIMAGE) && defined(INVARIANTS)
- KASSERT(curvnet == (m->m_pkthdr.header),
+ KASSERT(curvnet == (m->m_pkthdr.PH_loc.ptr),
("%s: called when curvnet was not restored", __func__));
#endif
#endif
@@ -524,13 +524,13 @@
struct mbuf *m;
struct ipoption *p;
- MGET(m, M_DONTWAIT, MT_DATA);
+ m = m_get(M_WAITOK, MT_DATA);
p = mtod(m, struct ipoption *);
p->ipopt_dst.s_addr = INADDR_ANY;
- p->ipopt_list[0] = IPOPT_RA; /* Router Alert Option */
- p->ipopt_list[1] = 0x04; /* 4 bytes long */
- p->ipopt_list[2] = IPOPT_EOL; /* End of IP option list */
- p->ipopt_list[3] = 0x00; /* pad byte */
+ p->ipopt_list[0] = (char)IPOPT_RA; /* Router Alert Option */
+ p->ipopt_list[1] = 0x04; /* 4 bytes long */
+ p->ipopt_list[2] = IPOPT_EOL; /* End of IP option list */
+ p->ipopt_list[3] = 0x00; /* pad byte */
m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1];
return (m);
@@ -655,16 +655,12 @@
void
igmp_domifdetach(struct ifnet *ifp)
{
- struct igmp_ifinfo *igi;
CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)",
__func__, ifp, ifp->if_xname);
IGMP_LOCK();
-
- igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
igi_delete_locked(ifp);
-
IGMP_UNLOCK();
}
@@ -1443,7 +1439,7 @@
ip = mtod(m, struct ip *);
iphlen = off;
- igmplen = ip->ip_len;
+ igmplen = ntohs(ip->ip_len) - off;
/*
* Validate lengths.
@@ -1534,8 +1530,7 @@
case IGMP_VERSION_3: {
struct igmpv3 *igmpv3;
uint16_t igmpv3len;
- uint16_t srclen;
- int nsrc;
+ uint16_t nsrc;
IGMPSTAT_INC(igps_rcv_v3_queries);
igmpv3 = (struct igmpv3 *)igmp;
@@ -1543,8 +1538,8 @@
* Validate length based on source count.
*/
nsrc = ntohs(igmpv3->igmp_numsrc);
- srclen = sizeof(struct in_addr) * nsrc;
- if (nsrc * sizeof(in_addr_t) > srclen) {
+ if (nsrc * sizeof(in_addr_t) >
+ UINT16_MAX - iphlen - IGMP_V3_QUERY_MINLEN) {
IGMPSTAT_INC(igps_rcv_tooshort);
return;
}
@@ -1553,7 +1548,7 @@
* this scope.
*/
igmpv3len = iphlen + IGMP_V3_QUERY_MINLEN +
- srclen;
+ sizeof(struct in_addr) * nsrc;
if ((m->m_flags & M_EXT ||
m->m_len < igmpv3len) &&
(m = m_pullup(m, igmpv3len)) == NULL) {
@@ -2122,6 +2117,7 @@
__func__, igi->igi_version, IGMP_VERSION_2,
igi->igi_ifp, igi->igi_ifp->if_xname);
igi->igi_version = IGMP_VERSION_2;
+ igmp_v3_cancel_link_timers(igi);
}
}
} else if (igi->igi_v1_timer > 0) {
@@ -2204,7 +2200,7 @@
ifp = inm->inm_ifp;
- MGETHDR(m, M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return (ENOMEM);
MH_ALIGN(m, sizeof(struct ip) + sizeof(struct igmp));
@@ -2226,7 +2222,7 @@
ip = mtod(m, struct ip *);
ip->ip_tos = 0;
- ip->ip_len = sizeof(struct ip) + sizeof(struct igmp);
+ ip->ip_len = htons(sizeof(struct ip) + sizeof(struct igmp));
ip->ip_off = 0;
ip->ip_p = IPPROTO_IGMP;
ip->ip_src.s_addr = INADDR_ANY;
@@ -2781,12 +2777,12 @@
m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
if (!is_state_change && !is_group_query) {
- m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
if (m)
m->m_data += IGMP_LEADINGSPACE;
}
if (m == NULL) {
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m)
MH_ALIGN(m, IGMP_LEADINGSPACE);
}
@@ -2906,11 +2902,11 @@
CTR1(KTR_IGMPV3, "%s: outbound queue full", __func__);
return (-ENOMEM);
}
- m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
if (m)
m->m_data += IGMP_LEADINGSPACE;
if (m == NULL) {
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m)
MH_ALIGN(m, IGMP_LEADINGSPACE);
}
@@ -3062,11 +3058,11 @@
CTR1(KTR_IGMPV3,
"%s: use previous packet", __func__);
} else {
- m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
if (m)
m->m_data += IGMP_LEADINGSPACE;
if (m == NULL) {
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m)
MH_ALIGN(m, IGMP_LEADINGSPACE);
}
@@ -3328,6 +3324,15 @@
KASSERT(igi->igi_version == IGMP_VERSION_3,
("%s: called when version %d", __func__, igi->igi_version));
+ /*
+ * Check that there are some packets queued. If so, send them first.
+ * For large number of groups the reply to general query can take
+ * many packets, we should finish sending them before starting of
+ * queuing the new reply.
+ */
+ if (igi->igi_gq.ifq_head != NULL)
+ goto send;
+
ifp = igi->igi_ifp;
IF_ADDR_RLOCK(ifp);
@@ -3363,6 +3368,7 @@
}
IF_ADDR_RUNLOCK(ifp);
+send:
loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
igmp_dispatch_queue(&igi->igi_gq, IGMP_MAX_RESPONSE_BURST, loop);
@@ -3403,7 +3409,7 @@
* indexes to guard against interface detach, they are
* unique to each VIMAGE and must be retrieved.
*/
- CURVNET_SET((struct vnet *)(m->m_pkthdr.header));
+ CURVNET_SET((struct vnet *)(m->m_pkthdr.PH_loc.ptr));
ifindex = igmp_restore_context(m);
/*
@@ -3450,7 +3456,7 @@
}
igmp_scrub_context(m0);
- m->m_flags &= ~(M_PROTOFLAGS);
+ m_clrprotoflags(m);
m0->m_pkthdr.rcvif = V_loif;
#ifdef MAC
mac_netinet_igmp_send(ifp, m0);
@@ -3498,7 +3504,7 @@
if (m->m_flags & M_IGMPV3_HDR) {
igmpreclen -= hdrlen;
} else {
- M_PREPEND(m, hdrlen, M_DONTWAIT);
+ M_PREPEND(m, hdrlen, M_NOWAIT);
if (m == NULL)
return (NULL);
m->m_flags |= M_IGMPV3_HDR;
@@ -3523,8 +3529,8 @@
ip = mtod(m, struct ip *);
ip->ip_tos = IPTOS_PREC_INTERNETCONTROL;
- ip->ip_len = hdrlen + igmpreclen;
- ip->ip_off = IP_DF;
+ ip->ip_len = htons(hdrlen + igmpreclen);
+ ip->ip_off = htons(IP_DF);
ip->ip_p = IPPROTO_IGMP;
ip->ip_sum = 0;
Modified: trunk/sys/netinet/igmp.h
===================================================================
--- trunk/sys/netinet/igmp.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/igmp.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
* SUCH DAMAGE.
*
* @(#)igmp.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet/igmp.h 193938 2009-06-10 18:12:15Z imp $
+ * $FreeBSD: stable/10/sys/netinet/igmp.h 193938 2009-06-10 18:12:15Z imp $
*/
#ifndef _NETINET_IGMP_H_
Modified: trunk/sys/netinet/igmp_var.h
===================================================================
--- trunk/sys/netinet/igmp_var.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/igmp_var.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
* SUCH DAMAGE.
*
* from: @(#)igmp_var.h 8.1 (Berkeley) 7/19/93
- * $FreeBSD: stable/9/sys/netinet/igmp_var.h 190965 2009-04-12 13:41:13Z rwatson $
+ * $FreeBSD: stable/10/sys/netinet/igmp_var.h 254925 2013-08-26 18:16:05Z jhb $
*/
#ifndef _NETINET_IGMP_VAR_H_
@@ -219,8 +219,4 @@
#define IGMPCTL_STATS 1 /* statistics (read-only) */
#define IGMPCTL_MAXID 2
-#define IGMPCTL_NAMES { \
- { 0, 0 }, \
- { "stats", CTLTYPE_STRUCT } \
-}
#endif
Modified: trunk/sys/netinet/in.c
===================================================================
--- trunk/sys/netinet/in.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/in.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/in.c 240313 2012-09-10 12:25:57Z glebius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/in.c 309340 2016-11-30 22:20:23Z vangyzen $");
#include "opt_mpath.h"
@@ -57,10 +57,12 @@
#include <net/route.h>
#include <net/vnet.h>
+#include <netinet/if_ether.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/in_pcb.h>
#include <netinet/ip_var.h>
+#include <netinet/ip_carp.h>
#include <netinet/igmp_var.h>
#include <netinet/udp.h>
#include <netinet/udp_var.h>
@@ -70,25 +72,20 @@
static int in_lifaddr_ioctl(struct socket *, u_long, caddr_t,
struct ifnet *, struct thread *);
-static int in_addprefix(struct in_ifaddr *, int);
-static int in_scrubprefix(struct in_ifaddr *, u_int);
static void in_socktrim(struct sockaddr_in *);
-static int in_ifinit(struct ifnet *,
- struct in_ifaddr *, struct sockaddr_in *, int);
+static int in_ifinit(struct ifnet *, struct in_ifaddr *,
+ struct sockaddr_in *, int, int);
static void in_purgemaddrs(struct ifnet *);
-static VNET_DEFINE(int, sameprefixcarponly);
-#define V_sameprefixcarponly VNET(sameprefixcarponly)
-SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, same_prefix_carp_only, CTLFLAG_RW,
- &VNET_NAME(sameprefixcarponly), 0,
+static VNET_DEFINE(int, nosameprefix);
+#define V_nosameprefix VNET(nosameprefix)
+SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, no_same_prefix, CTLFLAG_RW,
+ &VNET_NAME(nosameprefix), 0,
"Refuse to create same prefixes on different interfaces");
VNET_DECLARE(struct inpcbinfo, ripcbinfo);
#define V_ripcbinfo VNET(ripcbinfo)
-VNET_DECLARE(struct arpstat, arpstat); /* ARP statistics, see if_arp.h */
-#define V_arpstat VNET(arpstat)
-
/*
* Return 1 if an internet address is for a ``local'' host
* (one to which we have a connection).
@@ -221,10 +218,17 @@
struct in_addr dst;
struct in_ifinfo *ii;
struct in_aliasreq *ifra = (struct in_aliasreq *)data;
- struct sockaddr_in oldaddr;
int error, hostIsNew, iaIsNew, maskIsNew;
int iaIsFirst;
+ u_long ocmd = cmd;
+ /*
+ * Pre-10.x compat: OSIOCAIFADDR passes a shorter
+ * struct in_aliasreq, without ifra_vhid.
+ */
+ if (cmd == OSIOCAIFADDR)
+ cmd = SIOCAIFADDR;
+
ia = NULL;
iaIsFirst = 0;
iaIsNew = 0;
@@ -235,17 +239,44 @@
* in_lifaddr_ioctl() and ifp->if_ioctl().
*/
switch (cmd) {
- case SIOCAIFADDR:
- case SIOCDIFADDR:
case SIOCGIFADDR:
case SIOCGIFBRDADDR:
case SIOCGIFDSTADDR:
case SIOCGIFNETMASK:
+ case SIOCDIFADDR:
+ break;
+ case SIOCAIFADDR:
+ /*
+ * ifra_addr must be present and be of INET family.
+ * ifra_broadaddr and ifra_mask are optional.
+ */
+ if (ifra->ifra_addr.sin_len != sizeof(struct sockaddr_in) ||
+ ifra->ifra_addr.sin_family != AF_INET)
+ return (EINVAL);
+ if (ifra->ifra_broadaddr.sin_len != 0 &&
+ (ifra->ifra_broadaddr.sin_len !=
+ sizeof(struct sockaddr_in) ||
+ ifra->ifra_broadaddr.sin_family != AF_INET))
+ return (EINVAL);
+#if 0
+ /*
+ * ifconfig(8) in pre-10.x doesn't set sin_family for the
+ * mask. The code is disabled for the 10.x timeline, to
+ * make SIOCAIFADDR compatible with 9.x ifconfig(8).
+ * The code should be enabled in 11.x
+ */
+ if (ifra->ifra_mask.sin_len != 0 &&
+ (ifra->ifra_mask.sin_len != sizeof(struct sockaddr_in) ||
+ ifra->ifra_mask.sin_family != AF_INET))
+ return (EINVAL);
+#endif
+ break;
case SIOCSIFADDR:
case SIOCSIFBRDADDR:
case SIOCSIFDSTADDR:
case SIOCSIFNETMASK:
- break;
+ /* We no longer support that old commands. */
+ return (EINVAL);
case SIOCALIFADDR:
if (td != NULL) {
@@ -286,10 +317,6 @@
*/
switch (cmd) {
case SIOCAIFADDR:
- case SIOCSIFADDR:
- case SIOCSIFBRDADDR:
- case SIOCSIFNETMASK:
- case SIOCSIFDSTADDR:
if (td != NULL) {
error = priv_check(td, PRIV_NET_ADDIFADDR);
if (error)
@@ -377,10 +404,6 @@
error = EADDRNOTAVAIL;
goto out;
}
- /* FALLTHROUGH */
- case SIOCSIFADDR:
- case SIOCSIFNETMASK:
- case SIOCSIFDSTADDR:
if (ia == NULL) {
ia = (struct in_ifaddr *)
malloc(sizeof *ia, M_IFADDR, M_NOWAIT |
@@ -395,6 +418,8 @@
ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr;
ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr;
ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask;
+ callout_init_mtx(&ia->ia_garp_timer, &ifa->ifa_mtx,
+ CALLOUT_RETURNUNLOCKED);
ia->ia_sockmask.sin_len = 8;
ia->ia_sockmask.sin_family = AF_INET;
@@ -416,7 +441,6 @@
}
break;
- case SIOCSIFBRDADDR:
case SIOCGIFADDR:
case SIOCGIFNETMASK:
case SIOCGIFDSTADDR:
@@ -457,72 +481,13 @@
*((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_sockmask;
goto out;
- case SIOCSIFDSTADDR:
- if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
- error = EINVAL;
- goto out;
- }
- oldaddr = ia->ia_dstaddr;
- ia->ia_dstaddr = *(struct sockaddr_in *)&ifr->ifr_dstaddr;
- if (ifp->if_ioctl != NULL) {
- error = (*ifp->if_ioctl)(ifp, SIOCSIFDSTADDR,
- (caddr_t)ia);
- if (error) {
- ia->ia_dstaddr = oldaddr;
- goto out;
- }
- }
- if (ia->ia_flags & IFA_ROUTE) {
- ia->ia_ifa.ifa_dstaddr = (struct sockaddr *)&oldaddr;
- rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
- ia->ia_ifa.ifa_dstaddr =
- (struct sockaddr *)&ia->ia_dstaddr;
- rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_HOST|RTF_UP);
- }
- goto out;
-
- case SIOCSIFBRDADDR:
- if ((ifp->if_flags & IFF_BROADCAST) == 0) {
- error = EINVAL;
- goto out;
- }
- ia->ia_broadaddr = *(struct sockaddr_in *)&ifr->ifr_broadaddr;
- goto out;
-
- case SIOCSIFADDR:
- error = in_ifinit(ifp, ia,
- (struct sockaddr_in *) &ifr->ifr_addr, 1);
- if (error != 0 && iaIsNew)
- break;
- if (error == 0) {
- ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
- if (iaIsFirst &&
- (ifp->if_flags & IFF_MULTICAST) != 0) {
- error = in_joingroup(ifp, &allhosts_addr,
- NULL, &ii->ii_allhosts);
- }
- EVENTHANDLER_INVOKE(ifaddr_event, ifp);
- }
- error = 0;
- goto out;
-
- case SIOCSIFNETMASK:
- ia->ia_sockmask.sin_addr = ifra->ifra_addr.sin_addr;
- ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr);
- goto out;
-
case SIOCAIFADDR:
maskIsNew = 0;
hostIsNew = 1;
error = 0;
- if (ia->ia_addr.sin_family == AF_INET) {
- if (ifra->ifra_addr.sin_len == 0) {
- ifra->ifra_addr = ia->ia_addr;
- hostIsNew = 0;
- } else if (ifra->ifra_addr.sin_addr.s_addr ==
- ia->ia_addr.sin_addr.s_addr)
- hostIsNew = 0;
- }
+ if (ifra->ifra_addr.sin_addr.s_addr ==
+ ia->ia_addr.sin_addr.s_addr)
+ hostIsNew = 0;
if (ifra->ifra_mask.sin_len) {
/*
* QL: XXX
@@ -546,14 +511,14 @@
ia->ia_dstaddr = ifra->ifra_dstaddr;
maskIsNew = 1; /* We lie; but the effect's the same */
}
- if (ifra->ifra_addr.sin_family == AF_INET &&
- (hostIsNew || maskIsNew))
- error = in_ifinit(ifp, ia, &ifra->ifra_addr, 0);
+ if (hostIsNew || maskIsNew)
+ error = in_ifinit(ifp, ia, &ifra->ifra_addr, maskIsNew,
+ (ocmd == cmd ? ifra->ifra_vhid : 0));
if (error != 0 && iaIsNew)
break;
if ((ifp->if_flags & IFF_BROADCAST) &&
- (ifra->ifra_broadaddr.sin_family == AF_INET))
+ ifra->ifra_broadaddr.sin_len)
ia->ia_broadaddr = ifra->ifra_broadaddr;
if (error == 0) {
ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
@@ -587,6 +552,9 @@
panic("in_control: unsupported ioctl");
}
+ if (ia->ia_ifa.ifa_carp)
+ (*carp_detach_p)(&ia->ia_ifa);
+
IF_ADDR_WLOCK(ifp);
/* Re-check that ia is still part of the list. */
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
@@ -605,35 +573,34 @@
}
TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
IF_ADDR_WUNLOCK(ifp);
- ifa_free(&ia->ia_ifa); /* if_addrhead */
+ ifa_free(&ia->ia_ifa); /* if_addrhead */
IN_IFADDR_WLOCK();
TAILQ_REMOVE(&V_in_ifaddrhead, ia, ia_link);
- if (ia->ia_addr.sin_family == AF_INET) {
- struct in_ifaddr *if_ia;
- LIST_REMOVE(ia, ia_hash);
- IN_IFADDR_WUNLOCK();
- /*
- * If this is the last IPv4 address configured on this
- * interface, leave the all-hosts group.
- * No state-change report need be transmitted.
- */
- if_ia = NULL;
- IFP_TO_IA(ifp, if_ia);
- if (if_ia == NULL) {
- ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
- IN_MULTI_LOCK();
- if (ii->ii_allhosts) {
- (void)in_leavegroup_locked(ii->ii_allhosts,
- NULL);
- ii->ii_allhosts = NULL;
- }
- IN_MULTI_UNLOCK();
- } else
- ifa_free(&if_ia->ia_ifa);
+ LIST_REMOVE(ia, ia_hash);
+ IN_IFADDR_WUNLOCK();
+ /*
+ * If this is the last IPv4 address configured on this
+ * interface, leave the all-hosts group.
+ * No state-change report need be transmitted.
+ */
+ IFP_TO_IA(ifp, iap);
+ if (iap == NULL) {
+ ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
+ IN_MULTI_LOCK();
+ if (ii->ii_allhosts) {
+ (void)in_leavegroup_locked(ii->ii_allhosts, NULL);
+ ii->ii_allhosts = NULL;
+ }
+ IN_MULTI_UNLOCK();
} else
- IN_IFADDR_WUNLOCK();
+ ifa_free(&iap->ia_ifa);
+
+ IFA_LOCK(&ia->ia_ifa);
+ if (callout_stop(&ia->ia_garp_timer))
+ ifa_free(&ia->ia_ifa);
+ IFA_UNLOCK(&ia->ia_ifa);
ifa_free(&ia->ia_ifa); /* in_ifaddrhead */
out:
if (ia != NULL)
@@ -760,7 +727,8 @@
continue;
if (match.s_addr == 0)
break;
- candidate.s_addr = ((struct sockaddr_in *)&ifa->ifa_addr)->sin_addr.s_addr;
+ sin = (struct sockaddr_in *)&ifa->ifa_addr;
+ candidate.s_addr = sin->sin_addr.s_addr;
candidate.s_addr &= mask.s_addr;
if (candidate.s_addr == match.s_addr)
break;
@@ -832,60 +800,43 @@
*/
static int
in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
- int scrub)
+ int masksupplied, int vhid)
{
register u_long i = ntohl(sin->sin_addr.s_addr);
- struct sockaddr_in oldaddr;
- int s = splimp(), flags = RTF_UP, error = 0;
+ int flags, error = 0;
- oldaddr = ia->ia_addr;
- if (oldaddr.sin_family == AF_INET)
+ IN_IFADDR_WLOCK();
+ if (ia->ia_addr.sin_family == AF_INET)
LIST_REMOVE(ia, ia_hash);
ia->ia_addr = *sin;
- if (ia->ia_addr.sin_family == AF_INET) {
- IN_IFADDR_WLOCK();
- LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr),
- ia, ia_hash);
- IN_IFADDR_WUNLOCK();
+ LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr),
+ ia, ia_hash);
+ IN_IFADDR_WUNLOCK();
+
+ if (vhid > 0) {
+ if (carp_attach_p != NULL)
+ error = (*carp_attach_p)(&ia->ia_ifa, vhid);
+ else
+ error = EPROTONOSUPPORT;
}
+ if (error)
+ return (error);
+
/*
* Give the interface a chance to initialize
* if this is its first address,
* and to validate the address if necessary.
*/
- if (ifp->if_ioctl != NULL) {
- error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia);
- if (error) {
- splx(s);
+ if (ifp->if_ioctl != NULL &&
+ (error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia)) != 0)
/* LIST_REMOVE(ia, ia_hash) is done in in_control */
- ia->ia_addr = oldaddr;
- IN_IFADDR_WLOCK();
- if (ia->ia_addr.sin_family == AF_INET)
- LIST_INSERT_HEAD(INADDR_HASH(
- ia->ia_addr.sin_addr.s_addr), ia, ia_hash);
- else
- /*
- * If oldaddr family is not AF_INET (e.g.
- * interface has been just created) in_control
- * does not call LIST_REMOVE, and we end up
- * with bogus ia entries in hash
- */
- LIST_REMOVE(ia, ia_hash);
- IN_IFADDR_WUNLOCK();
return (error);
- }
- }
- splx(s);
- if (scrub) {
- ia->ia_ifa.ifa_addr = (struct sockaddr *)&oldaddr;
- in_ifscrub(ifp, ia, LLE_STATIC);
- ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
- }
+
/*
* Be compatible with network classes, if netmask isn't supplied,
* guess it based on classes.
*/
- if (ia->ia_subnetmask == 0) {
+ if (!masksupplied) {
if (IN_CLASSA(i))
ia->ia_subnetmask = IN_CLASSA_NET;
else if (IN_CLASSB(i))
@@ -896,14 +847,11 @@
}
ia->ia_subnet = i & ia->ia_subnetmask;
in_socktrim(&ia->ia_sockmask);
+
/*
- * XXX: carp(4) does not have interface route
- */
- if (ifp->if_type == IFT_CARP)
- return (0);
- /*
* Add route for the network.
*/
+ flags = RTF_UP;
ia->ia_ifa.ifa_metric = ifp->if_metric;
if (ifp->if_flags & IFF_BROADCAST) {
if (ia->ia_subnetmask == IN_RFC3021_MASK)
@@ -919,22 +867,20 @@
return (0);
flags |= RTF_HOST;
}
- if ((error = in_addprefix(ia, flags)) != 0)
+ if (!vhid && (error = in_addprefix(ia, flags)) != 0)
return (error);
if (ia->ia_addr.sin_addr.s_addr == INADDR_ANY)
return (0);
- if (ifp->if_flags & IFF_POINTOPOINT) {
- if (ia->ia_dstaddr.sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr)
+ if (ifp->if_flags & IFF_POINTOPOINT &&
+ ia->ia_dstaddr.sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr)
return (0);
- }
-
/*
* add a loopback route to self
*/
- if (V_useloopback && !(ifp->if_flags & IFF_LOOPBACK)) {
+ if (V_useloopback && !vhid && !(ifp->if_flags & IFF_LOOPBACK)) {
struct route ia_ro;
bzero(&ia_ro, sizeof(ia_ro));
@@ -962,48 +908,9 @@
? RTF_HOST : 0)
/*
- * Generate a routing message when inserting or deleting
- * an interface address alias.
- */
-static void in_addralias_rtmsg(int cmd, struct in_addr *prefix,
- struct in_ifaddr *target)
-{
- struct route pfx_ro;
- struct sockaddr_in *pfx_addr;
- struct rtentry msg_rt;
-
- /* QL: XXX
- * This is a bit questionable because there is no
- * additional route entry added/deleted for an address
- * alias. Therefore this route report is inaccurate.
- */
- bzero(&pfx_ro, sizeof(pfx_ro));
- pfx_addr = (struct sockaddr_in *)(&pfx_ro.ro_dst);
- pfx_addr->sin_len = sizeof(*pfx_addr);
- pfx_addr->sin_family = AF_INET;
- pfx_addr->sin_addr = *prefix;
- rtalloc_ign_fib(&pfx_ro, 0, 0);
- if (pfx_ro.ro_rt != NULL) {
- msg_rt = *pfx_ro.ro_rt;
-
- /* QL: XXX
- * Point the gateway to the new interface
- * address as if a new prefix route entry has
- * been added through the new address alias.
- * All other parts of the rtentry is accurate,
- * e.g., rt_key, rt_mask, rt_ifp etc.
- */
- msg_rt.rt_gateway = (struct sockaddr *)&target->ia_addr;
- rt_newaddrmsg(cmd, (struct ifaddr *)target, 0, &msg_rt);
- RTFREE(pfx_ro.ro_rt);
- }
- return;
-}
-
-/*
* Check if we have a route for the given prefix already or add one accordingly.
*/
-static int
+int
in_addprefix(struct in_ifaddr *target, int flags)
{
struct in_ifaddr *ia;
@@ -1020,6 +927,7 @@
}
IN_IFADDR_RLOCK();
+ /* Look for an existing address with the same prefix, mask, and fib */
TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
if (rtinitflags(ia)) {
p = ia->ia_dstaddr.sin_addr;
@@ -1035,6 +943,8 @@
mask.s_addr != m.s_addr)
continue;
}
+ if (target->ia_ifp->if_fib != ia->ia_ifp->if_fib)
+ continue;
/*
* If we got a matching prefix route inserted by other
@@ -1049,13 +959,15 @@
} else
break;
#endif
- if (V_sameprefixcarponly &&
- target->ia_ifp->if_type != IFT_CARP &&
- ia->ia_ifp->if_type != IFT_CARP) {
+ if (V_nosameprefix) {
IN_IFADDR_RUNLOCK();
return (EEXIST);
} else {
- in_addralias_rtmsg(RTM_ADD, &prefix, target);
+ int fibnum;
+
+ fibnum = rt_add_addr_allfibs ? RT_ALL_FIBS :
+ target->ia_ifp->if_fib;
+ rt_addrmsg(RTM_ADD, &target->ia_ifa, fibnum);
IN_IFADDR_RUNLOCK();
return (0);
}
@@ -1072,18 +984,16 @@
return (error);
}
-extern void arp_ifscrub(struct ifnet *ifp, uint32_t addr);
-
/*
* If there is no other address in the system that can serve a route to the
* same prefix, remove the route. Hand over the route to the new address
* otherwise.
*/
-static int
+int
in_scrubprefix(struct in_ifaddr *target, u_int flags)
{
struct in_ifaddr *ia;
- struct in_addr prefix, mask, p;
+ struct in_addr prefix, mask, p, m;
int error = 0;
struct sockaddr_in prefix0, mask0;
@@ -1102,10 +1012,12 @@
(target->ia_flags & IFA_RTSELF)) {
struct route ia_ro;
int freeit = 0;
+ int fib;
bzero(&ia_ro, sizeof(ia_ro));
*((struct sockaddr_in *)(&ia_ro.ro_dst)) = target->ia_addr;
- rtalloc_ign_fib(&ia_ro, 0, 0);
+ fib = target->ia_ifa.ifa_ifp->if_fib;
+ rtalloc_ign_fib(&ia_ro, 0, fib);
if ((ia_ro.ro_rt != NULL) && (ia_ro.ro_rt->rt_ifp != NULL) &&
(ia_ro.ro_rt->rt_ifp == V_loif)) {
RT_LOCK(ia_ro.ro_rt);
@@ -1129,9 +1041,10 @@
arp_ifscrub(target->ia_ifp, IA_SIN(target)->sin_addr.s_addr);
}
- if (rtinitflags(target))
+ if (rtinitflags(target)) {
prefix = target->ia_dstaddr.sin_addr;
- else {
+ mask.s_addr = 0;
+ } else {
prefix = target->ia_addr.sin_addr;
mask = target->ia_sockmask.sin_addr;
prefix.s_addr &= mask.s_addr;
@@ -1138,21 +1051,32 @@
}
if ((target->ia_flags & IFA_ROUTE) == 0) {
- in_addralias_rtmsg(RTM_DELETE, &prefix, target);
+ int fibnum;
+
+ fibnum = rt_add_addr_allfibs ? RT_ALL_FIBS :
+ target->ia_ifp->if_fib;
+ rt_addrmsg(RTM_DELETE, &target->ia_ifa, fibnum);
return (0);
}
IN_IFADDR_RLOCK();
TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
- if (rtinitflags(ia))
+ if (rtinitflags(ia)) {
p = ia->ia_dstaddr.sin_addr;
- else {
+
+ if (prefix.s_addr != p.s_addr)
+ continue;
+ } else {
p = ia->ia_addr.sin_addr;
- p.s_addr &= ia->ia_sockmask.sin_addr.s_addr;
+ m = ia->ia_sockmask.sin_addr;
+ p.s_addr &= m.s_addr;
+
+ if (prefix.s_addr != p.s_addr ||
+ mask.s_addr != m.s_addr)
+ continue;
}
- if ((prefix.s_addr != p.s_addr) ||
- !(ia->ia_ifp->if_flags & IFF_UP))
+ if ((ia->ia_ifp->if_flags & IFF_UP) == 0)
continue;
/*
@@ -1159,13 +1083,8 @@
* If we got a matching prefix address, move IFA_ROUTE and
* the route itself to it. Make sure that routing daemons
* get a heads-up.
- *
- * XXX: a special case for carp(4) interface - this should
- * be more generally specified as an interface that
- * doesn't support such action.
*/
- if ((ia->ia_flags & IFA_ROUTE) == 0
- && (ia->ia_ifp->if_type != IFT_CARP)) {
+ if ((ia->ia_flags & IFA_ROUTE) == 0) {
ifa_ref(&ia->ia_ifa);
IN_IFADDR_RUNLOCK();
error = rtinit(&(target->ia_ifa), (int)RTM_DELETE,
@@ -1264,6 +1183,7 @@
in_pcbpurgeif0(&V_ripcbinfo, ifp);
in_pcbpurgeif0(&V_udbinfo, ifp);
+ in_pcbpurgeif0(&V_ulitecbinfo, ifp);
in_purgemaddrs(ifp);
}
@@ -1313,9 +1233,6 @@
IN_MULTI_UNLOCK();
}
-#include <net/if_dl.h>
-#include <netinet/if_ether.h>
-
struct in_llentry {
struct llentry base;
struct sockaddr_in l3_addr4;
@@ -1340,7 +1257,7 @@
{
struct in_llentry *lle;
- lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_DONTWAIT | M_ZERO);
+ lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_NOWAIT | M_ZERO);
if (lle == NULL) /* NB: caller generates msg */
return NULL;
@@ -1353,8 +1270,7 @@
lle->base.lle_refcnt = 1;
lle->base.lle_free = in_lltable_free;
LLE_LOCK_INIT(&lle->base);
- callout_init_rw(&lle->base.la_timer, &lle->base.lle_lock,
- CALLOUT_RETURNUNLOCKED);
+ callout_init(&lle->base.la_timer, 1);
return (&lle->base);
}
@@ -1402,8 +1318,9 @@
KASSERT(l3addr->sa_family == AF_INET,
("sin_family %d", l3addr->sa_family));
- /* XXX rtalloc1 should take a const param */
- rt = rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0);
+ /* XXX rtalloc1_fib should take a const param */
+ rt = rtalloc1_fib(__DECONST(struct sockaddr *, l3addr), 0, 0,
+ ifp->if_fib);
if (rt == NULL)
return (EINVAL);
@@ -1500,6 +1417,7 @@
#endif
if (!(flags & LLE_CREATE))
return (NULL);
+ IF_AFDATA_WLOCK_ASSERT(ifp);
/*
* A route that covers the given address must have
* been installed 1st because we are doing a resolution,
@@ -1528,7 +1446,7 @@
if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) {
LLE_WLOCK(lle);
lle->la_flags |= LLE_DELETED;
- EVENTHANDLER_INVOKE(arp_update_event, lle);
+ EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED);
#ifdef DIAGNOSTIC
log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
#endif
@@ -1560,7 +1478,7 @@
/* XXX stack use */
struct {
struct rt_msghdr rtm;
- struct sockaddr_inarp sin;
+ struct sockaddr_in sin;
struct sockaddr_dl sdl;
} arpc;
int error, i;
@@ -1581,7 +1499,7 @@
/*
* produce a msg made of:
* struct rt_msghdr;
- * struct sockaddr_inarp; (IPv4)
+ * struct sockaddr_in; (IPv4)
* struct sockaddr_dl;
*/
bzero(&arpc, sizeof(arpc));
@@ -1595,12 +1513,8 @@
arpc.sin.sin_addr.s_addr = SIN(lle)->sin_addr.s_addr;
/* publish */
- if (lle->la_flags & LLE_PUB) {
+ if (lle->la_flags & LLE_PUB)
arpc.rtm.rtm_flags |= RTF_ANNOUNCE;
- /* proxy only */
- if (lle->la_flags & LLE_PROXY)
- arpc.sin.sin_other = SIN_PROXY;
- }
sdl = &arpc.sdl;
sdl->sdl_family = AF_LINK;
Modified: trunk/sys/netinet/in.h
===================================================================
--- trunk/sys/netinet/in.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/in.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* @(#)in.h 8.3 (Berkeley) 1/3/94
- * $FreeBSD: stable/9/sys/netinet/in.h 239430 2012-08-20 14:42:29Z emaste $
+ * $FreeBSD: stable/10/sys/netinet/in.h 273841 2014-10-29 23:10:48Z jilles $
*/
#ifndef _NETINET_IN_H_
@@ -101,7 +101,7 @@
char sin_zero[8];
};
-#if !defined(_KERNEL) && __BSD_VISIBLE
+#if !defined(_KERNEL) && __POSIX_VISIBLE >= 200112
#ifndef _BYTEORDER_PROTOTYPED
#define _BYTEORDER_PROTOTYPED
@@ -121,7 +121,7 @@
#define ntohs(x) __ntohs(x)
#endif
-#endif /* !_KERNEL && __BSD_VISIBLE */
+#endif /* !_KERNEL && __POSIX_VISIBLE >= 200112 */
#if __POSIX_VISIBLE >= 200112
#define IPPROTO_IPV6 41 /* IP6 header */
@@ -238,6 +238,9 @@
#define IPPROTO_IPCOMP 108 /* payload compression (IPComp) */
#define IPPROTO_SCTP 132 /* SCTP */
#define IPPROTO_MH 135 /* IPv6 Mobility Header */
+#define IPPROTO_UDPLITE 136 /* UDP-Lite */
+#define IPPROTO_HIP 139 /* IP6 Host Identity Protocol */
+#define IPPROTO_SHIM6 140 /* IP6 Shim6 Protocol */
/* 101-254: Partly Unassigned */
#define IPPROTO_PIM 103 /* Protocol Independent Mcast */
#define IPPROTO_CARP 112 /* CARP */
@@ -244,6 +247,8 @@
#define IPPROTO_PGM 113 /* PGM */
#define IPPROTO_MPLS 137 /* MPLS-in-IP */
#define IPPROTO_PFSYNC 240 /* PFSYNC */
+#define IPPROTO_RESERVED_253 253 /* Reserved */
+#define IPPROTO_RESERVED_254 254 /* Reserved */
/* 255: Reserved */
/* BSD Private, local use, namespace incursion, no longer used */
#define IPPROTO_OLD_DIVERT 254 /* OLD divert pseudo-proto */
@@ -700,24 +705,6 @@
#define IPCTL_GIF_TTL 16 /* default TTL for gif encap packet */
#define IPCTL_MAXID 17
-#define IPCTL_NAMES { \
- { 0, 0 }, \
- { "forwarding", CTLTYPE_INT }, \
- { "redirect", CTLTYPE_INT }, \
- { "ttl", CTLTYPE_INT }, \
- { "mtu", CTLTYPE_INT }, \
- { "rtexpire", CTLTYPE_INT }, \
- { "rtminexpire", CTLTYPE_INT }, \
- { "rtmaxcache", CTLTYPE_INT }, \
- { "sourceroute", CTLTYPE_INT }, \
- { "directed-broadcast", CTLTYPE_INT }, \
- { "intr-queue-maxlen", CTLTYPE_INT }, \
- { "intr-queue-drops", CTLTYPE_INT }, \
- { "stats", CTLTYPE_STRUCT }, \
- { "accept_sourceroute", CTLTYPE_INT }, \
- { "fastforwarding", CTLTYPE_INT }, \
-}
-
#endif /* __BSD_VISIBLE */
#ifdef _KERNEL
@@ -742,33 +729,6 @@
#define satosin(sa) ((struct sockaddr_in *)(sa))
#define sintosa(sin) ((struct sockaddr *)(sin))
#define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
-
-/*
- * Historically, BSD keeps ip_len and ip_off in host format
- * when doing layer 3 processing, and this often requires
- * to translate the format back and forth.
- * To make the process explicit, we define a couple of macros
- * that also take into account the fact that at some point
- * we may want to keep those fields always in net format.
- */
-
-#if (BYTE_ORDER == BIG_ENDIAN) || defined(HAVE_NET_IPLEN)
-#define SET_NET_IPLEN(p) do {} while (0)
-#define SET_HOST_IPLEN(p) do {} while (0)
-#else
-#define SET_NET_IPLEN(p) do { \
- struct ip *h_ip = (p); \
- h_ip->ip_len = htons(h_ip->ip_len); \
- h_ip->ip_off = htons(h_ip->ip_off); \
- } while (0)
-
-#define SET_HOST_IPLEN(p) do { \
- struct ip *h_ip = (p); \
- h_ip->ip_len = ntohs(h_ip->ip_len); \
- h_ip->ip_off = ntohs(h_ip->ip_off); \
- } while (0)
-#endif /* !HAVE_NET_IPLEN */
-
#endif /* _KERNEL */
/* INET6 stuff */
Modified: trunk/sys/netinet/in_cksum.c
===================================================================
--- trunk/sys/netinet/in_cksum.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/in_cksum.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/in_cksum.c 172467 2007-10-07 20:44:24Z silby $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/in_cksum.c 238941 2012-07-31 08:04:49Z luigi $");
#include <sys/param.h>
#include <sys/mbuf.h>
@@ -89,7 +89,7 @@
/*
* Force to even boundary.
*/
- if ((1 & (int) w) && (mlen > 0)) {
+ if ((1 & (uintptr_t) w) && (mlen > 0)) {
REDUCE;
sum <<= 8;
s_util.c[0] = *(u_char *)w;
Modified: trunk/sys/netinet/in_debug.c
===================================================================
--- trunk/sys/netinet/in_debug.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/in_debug.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/in_debug.c 226572 2011-10-20 15:58:05Z glebius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/in_debug.c 226401 2011-10-15 16:28:06Z glebius $");
#include "opt_ddb.h"
Modified: trunk/sys/netinet/in_gif.c
===================================================================
--- trunk/sys/netinet/in_gif.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/in_gif.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,6 +1,4 @@
/* $MidnightBSD$ */
-/* $KAME: in_gif.c,v 1.54 2001/05/14 14:02:16 itojun Exp $ */
-
/*-
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* All rights reserved.
@@ -28,16 +26,19 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * $KAME: in_gif.c,v 1.54 2001/05/14 14:02:16 itojun Exp $
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/in_gif.c 223637 2011-06-28 11:57:25Z bz $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/in_gif.c 284072 2015-06-06 13:26:13Z ae $");
-#include "opt_mrouting.h"
#include "opt_inet.h"
#include "opt_inet6.h"
#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
#include <sys/systm.h>
#include <sys/socket.h>
#include <sys/sockio.h>
@@ -49,6 +50,7 @@
#include <sys/malloc.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/route.h>
#include <net/vnet.h>
@@ -56,7 +58,6 @@
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
-#include <netinet/in_gif.h>
#include <netinet/in_var.h>
#include <netinet/ip_encap.h>
#include <netinet/ip_ecn.h>
@@ -65,153 +66,50 @@
#include <netinet/ip6.h>
#endif
-#ifdef MROUTING
-#include <netinet/ip_mroute.h>
-#endif /* MROUTING */
+#include <net/if_gif.h>
-#include <net/if_gif.h>
-
static int gif_validate4(const struct ip *, struct gif_softc *,
struct ifnet *);
+static int in_gif_input(struct mbuf **, int *, int);
+static void in_gif_input10(struct mbuf *, int);
extern struct domain inetdomain;
-struct protosw in_gif_protosw = {
+static struct protosw in_gif_protosw = {
.pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
.pr_protocol = 0/* IPPROTO_IPV[46] */,
.pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_input = in_gif_input,
+ .pr_input = in_gif_input10,
.pr_output = (pr_output_t*)rip_output,
.pr_ctloutput = rip_ctloutput,
.pr_usrreqs = &rip_usrreqs
};
-VNET_DEFINE(int, ip_gif_ttl) = GIF_TTL;
+#define GIF_TTL 30
+static VNET_DEFINE(int, ip_gif_ttl) = GIF_TTL;
#define V_ip_gif_ttl VNET(ip_gif_ttl)
SYSCTL_VNET_INT(_net_inet_ip, IPCTL_GIF_TTL, gifttl, CTLFLAG_RW,
&VNET_NAME(ip_gif_ttl), 0, "");
int
-in_gif_output(struct ifnet *ifp, int family, struct mbuf *m)
+in_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn)
{
+ GIF_RLOCK_TRACKER;
struct gif_softc *sc = ifp->if_softc;
- struct sockaddr_in *dst = (struct sockaddr_in *)&sc->gif_ro.ro_dst;
- struct sockaddr_in *sin_src = (struct sockaddr_in *)sc->gif_psrc;
- struct sockaddr_in *sin_dst = (struct sockaddr_in *)sc->gif_pdst;
- struct ip iphdr; /* capsule IP header, host byte ordered */
- struct etherip_header eiphdr;
- int error, len, proto;
- u_int8_t tos;
+ struct ip *ip;
+ int len;
- GIF_LOCK_ASSERT(sc);
-
- if (sin_src == NULL || sin_dst == NULL ||
- sin_src->sin_family != AF_INET ||
- sin_dst->sin_family != AF_INET) {
- m_freem(m);
- return EAFNOSUPPORT;
- }
-
- switch (family) {
-#ifdef INET
- case AF_INET:
- {
- struct ip *ip;
-
- proto = IPPROTO_IPV4;
- if (m->m_len < sizeof(*ip)) {
- m = m_pullup(m, sizeof(*ip));
- if (!m)
- return ENOBUFS;
- }
- ip = mtod(m, struct ip *);
- tos = ip->ip_tos;
- break;
- }
-#endif /* INET */
-#ifdef INET6
- case AF_INET6:
- {
- struct ip6_hdr *ip6;
- proto = IPPROTO_IPV6;
- if (m->m_len < sizeof(*ip6)) {
- m = m_pullup(m, sizeof(*ip6));
- if (!m)
- return ENOBUFS;
- }
- ip6 = mtod(m, struct ip6_hdr *);
- tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
- break;
- }
-#endif /* INET6 */
- case AF_LINK:
- proto = IPPROTO_ETHERIP;
-
- /*
- * GIF_SEND_REVETHIP (disabled by default) intentionally
- * sends an EtherIP packet with revered version field in
- * the header. This is a knob for backward compatibility
- * with FreeBSD 7.2R or prior.
- */
- if ((sc->gif_options & GIF_SEND_REVETHIP)) {
- eiphdr.eip_ver = 0;
- eiphdr.eip_resvl = ETHERIP_VERSION;
- eiphdr.eip_resvh = 0;
- } else {
- eiphdr.eip_ver = ETHERIP_VERSION;
- eiphdr.eip_resvl = 0;
- eiphdr.eip_resvh = 0;
- }
- /* prepend Ethernet-in-IP header */
- M_PREPEND(m, sizeof(struct etherip_header), M_DONTWAIT);
- if (m && m->m_len < sizeof(struct etherip_header))
- m = m_pullup(m, sizeof(struct etherip_header));
- if (m == NULL)
- return ENOBUFS;
- bcopy(&eiphdr, mtod(m, struct etherip_header *),
- sizeof(struct etherip_header));
- break;
-
- default:
-#ifdef DEBUG
- printf("in_gif_output: warning: unknown family %d passed\n",
- family);
-#endif
- m_freem(m);
- return EAFNOSUPPORT;
- }
-
- bzero(&iphdr, sizeof(iphdr));
- iphdr.ip_src = sin_src->sin_addr;
- /* bidirectional configured tunnel mode */
- if (sin_dst->sin_addr.s_addr != INADDR_ANY)
- iphdr.ip_dst = sin_dst->sin_addr;
- else {
- m_freem(m);
- return ENETUNREACH;
- }
- iphdr.ip_p = proto;
- /* version will be set in ip_output() */
- iphdr.ip_ttl = V_ip_gif_ttl;
- iphdr.ip_len = m->m_pkthdr.len + sizeof(struct ip);
- ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED : ECN_NOCARE,
- &iphdr.ip_tos, &tos);
-
/* prepend new IP header */
len = sizeof(struct ip);
#ifndef __NO_STRICT_ALIGNMENT
- if (family == AF_LINK)
+ if (proto == IPPROTO_ETHERIP)
len += ETHERIP_ALIGN;
#endif
- M_PREPEND(m, len, M_DONTWAIT);
- if (m != NULL && m->m_len < len)
- m = m_pullup(m, len);
- if (m == NULL) {
- printf("ENOBUFS in in_gif_output %d\n", __LINE__);
- return ENOBUFS;
- }
+ M_PREPEND(m, len, M_NOWAIT);
+ if (m == NULL)
+ return (ENOBUFS);
#ifndef __NO_STRICT_ALIGNMENT
- if (family == AF_LINK) {
+ if (proto == IPPROTO_ETHERIP) {
len = mtod(m, vm_offset_t) & 3;
KASSERT(len == 0 || len == ETHERIP_ALIGN,
("in_gif_output: unexpected misalignment"));
@@ -219,146 +117,60 @@
m->m_len -= ETHERIP_ALIGN;
}
#endif
- bcopy(&iphdr, mtod(m, struct ip *), sizeof(struct ip));
-
- M_SETFIB(m, sc->gif_fibnum);
-
- if (dst->sin_family != sin_dst->sin_family ||
- dst->sin_addr.s_addr != sin_dst->sin_addr.s_addr) {
- /* cache route doesn't match */
- bzero(dst, sizeof(*dst));
- dst->sin_family = sin_dst->sin_family;
- dst->sin_len = sizeof(struct sockaddr_in);
- dst->sin_addr = sin_dst->sin_addr;
- if (sc->gif_ro.ro_rt) {
- RTFREE(sc->gif_ro.ro_rt);
- sc->gif_ro.ro_rt = NULL;
- }
-#if 0
- GIF2IFP(sc)->if_mtu = GIF_MTU;
-#endif
+ ip = mtod(m, struct ip *);
+ GIF_RLOCK(sc);
+ if (sc->gif_family != AF_INET) {
+ m_freem(m);
+ GIF_RUNLOCK(sc);
+ return (ENETDOWN);
}
+ bcopy(sc->gif_iphdr, ip, sizeof(struct ip));
+ GIF_RUNLOCK(sc);
- if (sc->gif_ro.ro_rt == NULL) {
- in_rtalloc_ign(&sc->gif_ro, 0, sc->gif_fibnum);
- if (sc->gif_ro.ro_rt == NULL) {
- m_freem(m);
- return ENETUNREACH;
- }
+ ip->ip_p = proto;
+ /* version will be set in ip_output() */
+ ip->ip_ttl = V_ip_gif_ttl;
+ ip->ip_len = htons(m->m_pkthdr.len);
+ ip->ip_tos = ecn;
- /* if it constitutes infinite encapsulation, punt. */
- if (sc->gif_ro.ro_rt->rt_ifp == ifp) {
- m_freem(m);
- return ENETUNREACH; /* XXX */
- }
-#if 0
- ifp->if_mtu = sc->gif_ro.ro_rt->rt_ifp->if_mtu
- - sizeof(struct ip);
-#endif
- }
+ return (ip_output(m, NULL, NULL, 0, NULL, NULL));
+}
- m_addr_changed(m);
+static void
+in_gif_input10(struct mbuf *m, int off)
+{
+ int proto;
- error = ip_output(m, NULL, &sc->gif_ro, 0, NULL, NULL);
-
- if (!(GIF2IFP(sc)->if_flags & IFF_LINK0) &&
- sc->gif_ro.ro_rt != NULL) {
- RTFREE(sc->gif_ro.ro_rt);
- sc->gif_ro.ro_rt = NULL;
- }
-
- return (error);
+ proto = (mtod(m, struct ip *))->ip_p;
+ in_gif_input(&m, &off, proto);
}
-void
-in_gif_input(struct mbuf *m, int off)
+static int
+in_gif_input(struct mbuf **mp, int *offp, int proto)
{
- struct ifnet *gifp = NULL;
+ struct mbuf *m = *mp;
struct gif_softc *sc;
+ struct ifnet *gifp;
struct ip *ip;
- int af;
- u_int8_t otos;
- int proto;
+ uint8_t ecn;
- ip = mtod(m, struct ip *);
- proto = ip->ip_p;
-
- sc = (struct gif_softc *)encap_getarg(m);
+ sc = encap_getarg(m);
if (sc == NULL) {
m_freem(m);
KMOD_IPSTAT_INC(ips_nogif);
- return;
+ return (IPPROTO_DONE);
}
-
gifp = GIF2IFP(sc);
- if (gifp == NULL || (gifp->if_flags & IFF_UP) == 0) {
+ if ((gifp->if_flags & IFF_UP) != 0) {
+ ip = mtod(m, struct ip *);
+ ecn = ip->ip_tos;
+ m_adj(m, *offp);
+ gif_input(m, gifp, proto, ecn);
+ } else {
m_freem(m);
KMOD_IPSTAT_INC(ips_nogif);
- return;
}
-
- otos = ip->ip_tos;
- m_adj(m, off);
-
- switch (proto) {
-#ifdef INET
- case IPPROTO_IPV4:
- {
- struct ip *ip;
- af = AF_INET;
- if (m->m_len < sizeof(*ip)) {
- m = m_pullup(m, sizeof(*ip));
- if (!m)
- return;
- }
- ip = mtod(m, struct ip *);
- if (ip_ecn_egress((gifp->if_flags & IFF_LINK1) ?
- ECN_ALLOWED : ECN_NOCARE,
- &otos, &ip->ip_tos) == 0) {
- m_freem(m);
- return;
- }
- break;
- }
-#endif
-#ifdef INET6
- case IPPROTO_IPV6:
- {
- struct ip6_hdr *ip6;
- u_int8_t itos, oitos;
-
- af = AF_INET6;
- if (m->m_len < sizeof(*ip6)) {
- m = m_pullup(m, sizeof(*ip6));
- if (!m)
- return;
- }
- ip6 = mtod(m, struct ip6_hdr *);
- itos = oitos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
- if (ip_ecn_egress((gifp->if_flags & IFF_LINK1) ?
- ECN_ALLOWED : ECN_NOCARE,
- &otos, &itos) == 0) {
- m_freem(m);
- return;
- }
- if (itos != oitos) {
- ip6->ip6_flow &= ~htonl(0xff << 20);
- ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
- }
- break;
- }
-#endif /* INET6 */
- case IPPROTO_ETHERIP:
- af = AF_LINK;
- break;
-
- default:
- KMOD_IPSTAT_INC(ips_nogif);
- m_freem(m);
- return;
- }
- gif_input(m, af, gifp);
- return;
+ return (IPPROTO_DONE);
}
/*
@@ -367,38 +179,30 @@
static int
gif_validate4(const struct ip *ip, struct gif_softc *sc, struct ifnet *ifp)
{
- struct sockaddr_in *src, *dst;
- struct in_ifaddr *ia4;
+ int ret;
- src = (struct sockaddr_in *)sc->gif_psrc;
- dst = (struct sockaddr_in *)sc->gif_pdst;
+ GIF_RLOCK_ASSERT(sc);
/* check for address match */
- if (src->sin_addr.s_addr != ip->ip_dst.s_addr ||
- dst->sin_addr.s_addr != ip->ip_src.s_addr)
- return 0;
+ if (sc->gif_iphdr->ip_src.s_addr != ip->ip_dst.s_addr)
+ return (0);
+ ret = 32;
+ if (sc->gif_iphdr->ip_dst.s_addr != ip->ip_src.s_addr) {
+ if ((sc->gif_options & GIF_IGNORE_SOURCE) == 0)
+ return (0);
+ } else
+ ret += 32;
/* martian filters on outer source - NOT done in ip_input! */
if (IN_MULTICAST(ntohl(ip->ip_src.s_addr)))
- return 0;
+ return (0);
switch ((ntohl(ip->ip_src.s_addr) & 0xff000000) >> 24) {
- case 0: case 127: case 255:
- return 0;
+ case 0:
+ case 127:
+ case 255:
+ return (0);
}
- /* reject packets with broadcast on source */
- /* XXXRW: should use hash lists? */
- IN_IFADDR_RLOCK();
- TAILQ_FOREACH(ia4, &V_in_ifaddrhead, ia_link) {
- if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0)
- continue;
- if (ip->ip_src.s_addr == ia4->ia_broadaddr.sin_addr.s_addr) {
- IN_IFADDR_RUNLOCK();
- return 0;
- }
- }
- IN_IFADDR_RUNLOCK();
-
/* ingress filters on outer source */
if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0 && ifp) {
struct sockaddr_in sin;
@@ -412,19 +216,13 @@
rt = in_rtalloc1((struct sockaddr *)&sin, 0,
0UL, sc->gif_fibnum);
if (!rt || rt->rt_ifp != ifp) {
-#if 0
- log(LOG_WARNING, "%s: packet from 0x%x dropped "
- "due to ingress filter\n", if_name(GIF2IFP(sc)),
- (u_int32_t)ntohl(sin.sin_addr.s_addr));
-#endif
if (rt)
RTFREE_LOCKED(rt);
- return 0;
+ return (0);
}
RTFREE_LOCKED(rt);
}
-
- return 32 * 2;
+ return (ret);
}
/*
@@ -432,7 +230,7 @@
* matched the physical addr family. see gif_encapcheck().
*/
int
-gif_encapcheck4(const struct mbuf *m, int off, int proto, void *arg)
+in_gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
{
struct ip ip;
struct gif_softc *sc;
@@ -440,31 +238,21 @@
/* sanity check done in caller */
sc = (struct gif_softc *)arg;
+ GIF_RLOCK_ASSERT(sc);
- /* LINTED const cast */
m_copydata(m, 0, sizeof(ip), (caddr_t)&ip);
ifp = ((m->m_flags & M_PKTHDR) != 0) ? m->m_pkthdr.rcvif : NULL;
-
- return gif_validate4(&ip, sc, ifp);
+ return (gif_validate4(&ip, sc, ifp));
}
int
in_gif_attach(struct gif_softc *sc)
{
- sc->encap_cookie4 = encap_attach_func(AF_INET, -1, gif_encapcheck,
+
+ KASSERT(sc->gif_ecookie == NULL, ("gif_ecookie isn't NULL"));
+ sc->gif_ecookie = encap_attach_func(AF_INET, -1, gif_encapcheck,
&in_gif_protosw, sc);
- if (sc->encap_cookie4 == NULL)
- return EEXIST;
- return 0;
+ if (sc->gif_ecookie == NULL)
+ return (EEXIST);
+ return (0);
}
-
-int
-in_gif_detach(struct gif_softc *sc)
-{
- int error;
-
- error = encap_detach(sc->encap_cookie4);
- if (error == 0)
- sc->encap_cookie4 = NULL;
- return error;
-}
Added: trunk/sys/netinet/in_kdtrace.c
===================================================================
--- trunk/sys/netinet/in_kdtrace.c (rev 0)
+++ trunk/sys/netinet/in_kdtrace.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -0,0 +1,136 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2013 Mark Johnston <markj at FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/netinet/in_kdtrace.c 318438 2017-05-18 03:32:01Z smh $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/netinet/in_kdtrace.c 318438 2017-05-18 03:32:01Z smh $");
+
+#include "opt_kdtrace.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sdt.h>
+
+SDT_PROVIDER_DEFINE(ip);
+SDT_PROVIDER_DEFINE(tcp);
+SDT_PROVIDER_DEFINE(udp);
+
+SDT_PROBE_DEFINE6_XLATE(ip, , , receive,
+ "void *", "pktinfo_t *",
+ "void *", "csinfo_t *",
+ "uint8_t *", "ipinfo_t *",
+ "struct ifnet *", "ifinfo_t *",
+ "struct ip *", "ipv4info_t *",
+ "struct ip6_hdr *", "ipv6info_t *");
+
+SDT_PROBE_DEFINE6_XLATE(ip, , , send,
+ "void *", "pktinfo_t *",
+ "void *", "csinfo_t *",
+ "uint8_t *", "ipinfo_t *",
+ "struct ifnet *", "ifinfo_t *",
+ "struct ip *", "ipv4info_t *",
+ "struct ip6_hdr *", "ipv6info_t *");
+
+SDT_PROBE_DEFINE5_XLATE(tcp, , , accept__established,
+ "void *", "pktinfo_t *",
+ "struct tcpcb *", "csinfo_t *",
+ "uint8_t *", "ipinfo_t *",
+ "struct tcpcb *", "tcpsinfo_t *" ,
+ "struct tcphdr *", "tcpinfoh_t *");
+
+SDT_PROBE_DEFINE5_XLATE(tcp, , , accept__refused,
+ "void *", "pktinfo_t *",
+ "struct tcpcb *", "csinfo_t *",
+ "uint8_t *", "ipinfo_t *",
+ "struct tcpcb *", "tcpsinfo_t *" ,
+ "struct tcphdr *", "tcpinfo_t *");
+
+SDT_PROBE_DEFINE5_XLATE(tcp, , , connect__established,
+ "void *", "pktinfo_t *",
+ "struct tcpcb *", "csinfo_t *",
+ "uint8_t *", "ipinfo_t *",
+ "struct tcpcb *", "tcpsinfo_t *" ,
+ "struct tcphdr *", "tcpinfoh_t *");
+
+SDT_PROBE_DEFINE5_XLATE(tcp, , , connect__refused,
+ "void *", "pktinfo_t *",
+ "struct tcpcb *", "csinfo_t *",
+ "uint8_t *", "ipinfo_t *",
+ "struct tcpcb *", "tcpsinfo_t *" ,
+ "struct tcphdr *", "tcpinfoh_t *");
+
+SDT_PROBE_DEFINE5_XLATE(tcp, , , connect__request,
+ "void *", "pktinfo_t *",
+ "struct tcpcb *", "csinfo_t *",
+ "uint8_t *", "ipinfo_t *",
+ "struct tcpcb *", "tcpsinfo_t *" ,
+ "struct tcphdr *", "tcpinfo_t *");
+
+SDT_PROBE_DEFINE5_XLATE(tcp, , , receive,
+ "void *", "pktinfo_t *",
+ "struct tcpcb *", "csinfo_t *",
+ "uint8_t *", "ipinfo_t *",
+ "struct tcpcb *", "tcpsinfo_t *" ,
+ "struct tcphdr *", "tcpinfoh_t *");
+
+SDT_PROBE_DEFINE5_XLATE(tcp, , , send,
+ "void *", "pktinfo_t *",
+ "struct tcpcb *", "csinfo_t *",
+ "uint8_t *", "ipinfo_t *",
+ "struct tcpcb *", "tcpsinfo_t *" ,
+ "struct tcphdr *", "tcpinfo_t *");
+
+SDT_PROBE_DEFINE6_XLATE(tcp, , , state__change,
+ "void *", "void *",
+ "struct tcpcb *", "csinfo_t *",
+ "void *", "void *",
+ "struct tcpcb *", "tcpsinfo_t *",
+ "void *", "void *",
+ "int", "tcplsinfo_t *");
+
+SDT_PROBE_DEFINE6_XLATE(tcp, , , receive__autoresize,
+ "void *", "void *",
+ "struct tcpcb *", "csinfo_t *",
+ "uint8_t *", "ipinfo_t *",
+ "struct tcpcb *", "tcpsinfo_t *" ,
+ "struct tcphdr *", "tcpinfoh_t *",
+ "int", "int");
+
+SDT_PROBE_DEFINE5_XLATE(udp, , , receive,
+ "void *", "pktinfo_t *",
+ "struct inpcb *", "csinfo_t *",
+ "uint8_t *", "ipinfo_t *",
+ "struct inpcb *", "udpsinfo_t *",
+ "struct udphdr *", "udpinfo_t *");
+
+SDT_PROBE_DEFINE5_XLATE(udp, , , send,
+ "void *", "pktinfo_t *",
+ "struct inpcb *", "csinfo_t *",
+ "uint8_t *", "ipinfo_t *",
+ "struct inpcb *", "udpsinfo_t *",
+ "struct udphdr *", "udpinfo_t *");
Property changes on: trunk/sys/netinet/in_kdtrace.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/netinet/in_kdtrace.h
===================================================================
--- trunk/sys/netinet/in_kdtrace.h (rev 0)
+++ trunk/sys/netinet/in_kdtrace.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -0,0 +1,61 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2013 Mark Johnston <markj at FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/netinet/in_kdtrace.h 317375 2017-04-24 16:31:28Z smh $
+ */
+
+#ifndef _SYS_IN_KDTRACE_H_
+#define _SYS_IN_KDTRACE_H_
+
+#define IP_PROBE(probe, arg0, arg1, arg2, arg3, arg4, arg5) \
+ SDT_PROBE6(ip, , , probe, arg0, arg1, arg2, arg3, arg4, arg5)
+#define UDP_PROBE(probe, arg0, arg1, arg2, arg3, arg4) \
+ SDT_PROBE5(udp, , , probe, arg0, arg1, arg2, arg3, arg4)
+#define TCP_PROBE5(probe, arg0, arg1, arg2, arg3, arg4) \
+ SDT_PROBE5(tcp, , , probe, arg0, arg1, arg2, arg3, arg4)
+#define TCP_PROBE6(probe, arg0, arg1, arg2, arg3, arg4, arg5) \
+ SDT_PROBE6(tcp, , , probe, arg0, arg1, arg2, arg3, arg4, arg5)
+
+SDT_PROVIDER_DECLARE(ip);
+SDT_PROVIDER_DECLARE(tcp);
+SDT_PROVIDER_DECLARE(udp);
+
+SDT_PROBE_DECLARE(ip, , , receive);
+SDT_PROBE_DECLARE(ip, , , send);
+
+SDT_PROBE_DECLARE(tcp, , , accept__established);
+SDT_PROBE_DECLARE(tcp, , , accept__refused);
+SDT_PROBE_DECLARE(tcp, , , connect__established);
+SDT_PROBE_DECLARE(tcp, , , connect__refused);
+SDT_PROBE_DECLARE(tcp, , , connect__request);
+SDT_PROBE_DECLARE(tcp, , , receive);
+SDT_PROBE_DECLARE(tcp, , , send);
+SDT_PROBE_DECLARE(tcp, , , state__change);
+SDT_PROBE_DECLARE(tcp, , , receive__autoresize);
+
+SDT_PROBE_DECLARE(udp, , , receive);
+SDT_PROBE_DECLARE(udp, , , send);
+
+#endif
Property changes on: trunk/sys/netinet/in_kdtrace.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/netinet/in_mcast.c
===================================================================
--- trunk/sys/netinet/in_mcast.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/in_mcast.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -34,7 +34,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/in_mcast.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/in_mcast.c 321134 2017-07-18 16:58:52Z ngie $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -47,6 +47,7 @@
#include <sys/protosw.h>
#include <sys/sysctl.h>
#include <sys/ktr.h>
+#include <sys/taskqueue.h>
#include <sys/tree.h>
#include <net/if.h>
@@ -139,12 +140,16 @@
struct in_multi **);
static int inm_get_source(struct in_multi *inm, const in_addr_t haddr,
const int noalloc, struct ip_msource **pims);
+#ifdef KTR
static int inm_is_ifp_detached(const struct in_multi *);
+#endif
static int inm_merge(struct in_multi *, /*const*/ struct in_mfilter *);
static void inm_purge(struct in_multi *);
static void inm_reap(struct in_multi *);
static struct ip_moptions *
inp_findmoptions(struct inpcb *);
+static void inp_freemoptions_internal(struct ip_moptions *);
+static void inp_gcmoptions(void *, int);
static int inp_get_source_filters(struct inpcb *, struct sockopt *);
static int inp_join_group(struct inpcb *, struct sockopt *);
static int inp_leave_group(struct inpcb *, struct sockopt *);
@@ -180,6 +185,11 @@
CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters,
"Per-interface stack-wide source filters");
+static STAILQ_HEAD(, ip_moptions) imo_gc_list =
+ STAILQ_HEAD_INITIALIZER(imo_gc_list);
+static struct task imo_gc_task = TASK_INITIALIZER(0, inp_gcmoptions, NULL);
+
+#ifdef KTR
/*
* Inline function which wraps assertions for a valid ifp.
* The ifnet layer will set the ifma's ifp pointer to NULL if the ifp
@@ -202,6 +212,7 @@
return (ifp == NULL);
}
+#endif
/*
* Initialize an in_mfilter structure to a known state at t0, t1
@@ -1004,9 +1015,10 @@
/* Decrement ASM listener count on transition out of ASM mode. */
if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) {
if ((imf->imf_st[1] != MCAST_EXCLUDE) ||
- (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0))
+ (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) {
CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__);
--inm->inm_st[1].iss_asm;
+ }
}
/* Increment ASM listener count on transition to ASM mode. */
@@ -1172,11 +1184,8 @@
int
in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
{
- struct ifnet *ifp;
int error;
- ifp = inm->inm_ifp;
-
IN_MULTI_LOCK();
error = in_leavegroup_locked(inm, imf);
IN_MULTI_UNLOCK();
@@ -1233,7 +1242,9 @@
KASSERT(error == 0, ("%s: failed to merge inm state", __func__));
CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
+ CURVNET_SET(inm->inm_ifp->if_vnet);
error = igmp_change_state(inm);
+ CURVNET_RESTORE();
if (error)
CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
@@ -1443,7 +1454,7 @@
error = inm_merge(inm, imf);
if (error) {
CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
- goto out_imf_rollback;
+ goto out_in_multi_locked;
}
CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
@@ -1451,6 +1462,8 @@
if (error)
CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
+out_in_multi_locked:
+
IN_MULTI_UNLOCK();
out_imf_rollback:
@@ -1519,7 +1532,10 @@
}
/*
- * Discard the IP multicast options (and source filters).
+ * Discard the IP multicast options (and source filters). To minimize
+ * the amount of work done while holding locks such as the INP's
+ * pcbinfo lock (which is used in the receive path), the free
+ * operation is performed asynchronously in a separate task.
*
* SMPng: NOTE: assumes INP write lock is held.
*/
@@ -1526,11 +1542,20 @@
void
inp_freemoptions(struct ip_moptions *imo)
{
+
+ KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
+ IN_MULTI_LOCK();
+ STAILQ_INSERT_TAIL(&imo_gc_list, imo, imo_link);
+ IN_MULTI_UNLOCK();
+ taskqueue_enqueue(taskqueue_thread, &imo_gc_task);
+}
+
+static void
+inp_freemoptions_internal(struct ip_moptions *imo)
+{
struct in_mfilter *imf;
size_t idx, nmships;
- KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
-
nmships = imo->imo_num_memberships;
for (idx = 0; idx < nmships; ++idx) {
imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL;
@@ -1547,6 +1572,22 @@
free(imo, M_IPMOPTS);
}
+static void
+inp_gcmoptions(void *context, int pending)
+{
+ struct ip_moptions *imo;
+
+ IN_MULTI_LOCK();
+ while (!STAILQ_EMPTY(&imo_gc_list)) {
+ imo = STAILQ_FIRST(&imo_gc_list);
+ STAILQ_REMOVE_HEAD(&imo_gc_list, imo_link);
+ IN_MULTI_UNLOCK();
+ inp_freemoptions_internal(imo);
+ IN_MULTI_LOCK();
+ }
+ IN_MULTI_UNLOCK();
+}
+
/*
* Atomically get source filters on a socket for an IPv4 multicast group.
* Called with INP lock held; returns with lock released.
@@ -1615,6 +1656,8 @@
* has asked for, but we always tell userland how big the
* buffer really needs to be.
*/
+ if (msfr.msfr_nsrcs > in_mcast_maxsocksrc)
+ msfr.msfr_nsrcs = in_mcast_maxsocksrc;
tss = NULL;
if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
@@ -2089,8 +2132,12 @@
if (is_new) {
error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf,
&inm);
- if (error)
+ if (error) {
+ CTR1(KTR_IGMPV3, "%s: in_joingroup_locked failed",
+ __func__);
+ IN_MULTI_UNLOCK();
goto out_imo_free;
+ }
imo->imo_membership[idx] = inm;
} else {
CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
@@ -2098,7 +2145,7 @@
if (error) {
CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
__func__);
- goto out_imf_rollback;
+ goto out_in_multi_locked;
}
CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
error = igmp_change_state(inm);
@@ -2105,13 +2152,14 @@
if (error) {
CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
__func__);
- goto out_imf_rollback;
+ goto out_in_multi_locked;
}
}
+out_in_multi_locked:
+
IN_MULTI_UNLOCK();
-out_imf_rollback:
INP_WLOCK_ASSERT(inp);
if (error) {
imf_rollback(imf);
@@ -2315,7 +2363,7 @@
if (error) {
CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
__func__);
- goto out_imf_rollback;
+ goto out_in_multi_locked;
}
CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
@@ -2326,9 +2374,10 @@
}
}
+out_in_multi_locked:
+
IN_MULTI_UNLOCK();
-out_imf_rollback:
if (error)
imf_rollback(imf);
else
@@ -2562,7 +2611,7 @@
error = inm_merge(inm, imf);
if (error) {
CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
- goto out_imf_rollback;
+ goto out_in_multi_locked;
}
CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
@@ -2570,6 +2619,8 @@
if (error)
CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
+out_in_multi_locked:
+
IN_MULTI_UNLOCK();
out_imf_rollback:
Modified: trunk/sys/netinet/in_pcb.c
===================================================================
--- trunk/sys/netinet/in_pcb.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/in_pcb.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -37,7 +37,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/in_pcb.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/in_pcb.c 314667 2017-03-04 13:03:31Z avg $");
#include "opt_ddb.h"
#include "opt_ipsec.h"
@@ -145,11 +145,7 @@
{
int error;
-#ifdef VIMAGE
- error = vnet_sysctl_handle_int(oidp, arg1, arg2, req);
-#else
error = sysctl_handle_int(oidp, arg1, arg2, req);
-#endif
if (error == 0) {
RANGECHK(V_ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
RANGECHK(V_ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
@@ -197,7 +193,7 @@
&VNET_NAME(ipport_randomtime), 0,
"Minimum time to keep sequental port "
"allocation before switching to a random one");
-#endif
+#endif /* INET */
/*
* in_pcb.c: manage the Protocol Control Blocks.
@@ -220,6 +216,7 @@
INP_INFO_LOCK_INIT(pcbinfo, name);
INP_HASH_LOCK_INIT(pcbinfo, "pcbinfohash"); /* XXXRW: argument? */
+ INP_LIST_LOCK_INIT(pcbinfo, "pcbinfolist");
#ifdef VIMAGE
pcbinfo->ipi_vnet = curvnet;
#endif
@@ -237,6 +234,8 @@
NULL, NULL, inpcbzone_init, inpcbzone_fini, UMA_ALIGN_PTR,
inpcbzone_flags);
uma_zone_set_max(pcbinfo->ipi_zone, maxsockets);
+ uma_zone_set_warning(pcbinfo->ipi_zone,
+ "kern.ipc.maxsockets limit reached");
}
/*
@@ -256,6 +255,7 @@
in_pcbgroup_destroy(pcbinfo);
#endif
uma_zdestroy(pcbinfo->ipi_zone);
+ INP_LIST_LOCK_DESTROY(pcbinfo);
INP_HASH_LOCK_DESTROY(pcbinfo);
INP_INFO_LOCK_DESTROY(pcbinfo);
}
@@ -270,7 +270,14 @@
struct inpcb *inp;
int error;
- INP_INFO_WLOCK_ASSERT(pcbinfo);
+#ifdef INVARIANTS
+ if (pcbinfo == &V_tcbinfo) {
+ INP_INFO_RLOCK_ASSERT(pcbinfo);
+ } else {
+ INP_INFO_WLOCK_ASSERT(pcbinfo);
+ }
+#endif
+
error = 0;
inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT);
if (inp == NULL)
@@ -302,6 +309,8 @@
inp->inp_flags |= IN6P_IPV6_V6ONLY;
}
#endif
+ INP_WLOCK(inp);
+ INP_LIST_WLOCK(pcbinfo);
LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
pcbinfo->ipi_count++;
so->so_pcb = (caddr_t)inp;
@@ -309,9 +318,9 @@
if (V_ip6_auto_flowlabel)
inp->inp_flags |= IN6P_AUTOFLOWLABEL;
#endif
- INP_WLOCK(inp);
inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
refcount_init(&inp->inp_refcount, 1); /* Reference from inpcbinfo */
+ INP_LIST_WUNLOCK(pcbinfo);
#if defined(IPSEC) || defined(MAC)
out:
if (error != 0) {
@@ -333,8 +342,7 @@
if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
return (EINVAL);
- anonport = inp->inp_lport == 0 && (nam == NULL ||
- ((struct sockaddr_in *)nam)->sin_port == 0);
+ anonport = nam == NULL || ((struct sockaddr_in *)nam)->sin_port == 0;
error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr,
&inp->inp_lport, cred);
if (error)
@@ -390,13 +398,14 @@
lastport = &pcbinfo->ipi_lastport;
}
/*
- * For UDP, use random port allocation as long as the user
+ * For UDP(-Lite), use random port allocation as long as the user
* allows it. For TCP (and as of yet unknown) connections,
* use random port allocation only if the user allows it AND
* ipport_tick() allows it.
*/
if (V_ipport_randomized &&
- (!V_ipport_stoprandom || pcbinfo == &V_udbinfo))
+ (!V_ipport_stoprandom || pcbinfo == &V_udbinfo ||
+ pcbinfo == &V_ulitecbinfo))
dorandom = 1;
else
dorandom = 0;
@@ -406,8 +415,8 @@
*/
if (first == last)
dorandom = 0;
- /* Make sure to not include UDP packets in the count. */
- if (pcbinfo != &V_udbinfo)
+ /* Make sure to not include UDP(-Lite) packets in the count. */
+ if (pcbinfo != &V_udbinfo || pcbinfo != &V_ulitecbinfo)
V_ipport_tcpallocs++;
/*
* Instead of having two loops further down counting up or down
@@ -467,6 +476,23 @@
return (0);
}
+
+/*
+ * Return cached socket options.
+ */
+short
+inp_so_options(const struct inpcb *inp)
+{
+ short so_options;
+
+ so_options = 0;
+
+ if ((inp->inp_flags2 & INP_REUSEPORT) != 0)
+ so_options |= SO_REUSEPORT;
+ if ((inp->inp_flags2 & INP_REUSEADDR) != 0)
+ so_options |= SO_REUSEADDR;
+ return (so_options);
+}
#endif /* INET || INET6 */
#ifdef INET
@@ -537,7 +563,7 @@
* and a multicast address is bound on both
* new and duplicated sockets.
*/
- if (so->so_options & SO_REUSEADDR)
+ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0)
reuseport = SO_REUSEADDR|SO_REUSEPORT;
} else if (sin->sin_addr.s_addr != INADDR_ANY) {
sin->sin_port = 0; /* yech... */
@@ -595,8 +621,7 @@
if (tw == NULL ||
(reuseport & tw->tw_so_options) == 0)
return (EADDRINUSE);
- } else if (t && (reuseport == 0 ||
- (t->inp_flags2 & INP_REUSEPORT) == 0)) {
+ } else if (t && (reuseport & inp_so_options(t)) == 0) {
#ifdef INET6
if (ntohl(sin->sin_addr.s_addr) !=
INADDR_ANY ||
@@ -681,7 +706,7 @@
* Do proper source address selection on an unbound socket in case
* of connect. Take jails into account as well.
*/
-static int
+int
in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
struct ucred *cred)
{
@@ -1039,7 +1064,7 @@
inp->inp_fport = 0;
in_pcbrehash(inp);
}
-#endif
+#endif /* INET */
/*
* in_pcbdetach() is responsibe for disassociating a socket from an inpcb.
@@ -1135,8 +1160,17 @@
INP_WLOCK_ASSERT(inp);
- if (refcount_release(&inp->inp_refcount) == 0)
+ if (refcount_release(&inp->inp_refcount) == 0) {
+ /*
+ * If the inpcb has been freed, let the caller know, even if
+ * this isn't the last reference.
+ */
+ if (inp->inp_flags2 & INP_FREED) {
+ INP_WUNLOCK(inp);
+ return (1);
+ }
return (0);
+ }
KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
@@ -1172,7 +1206,13 @@
KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
- INP_INFO_WLOCK_ASSERT(pcbinfo);
+#ifdef INVARIANTS
+ if (pcbinfo == &V_tcbinfo) {
+ INP_INFO_LOCK_ASSERT(pcbinfo);
+ } else {
+ INP_INFO_WLOCK_ASSERT(pcbinfo);
+ }
+#endif
INP_WLOCK_ASSERT(inp);
/* XXXRW: Do as much as possible here. */
@@ -1179,9 +1219,11 @@
#ifdef IPSEC
if (inp->inp_sp != NULL)
ipsec_delete_pcbpolicy(inp);
-#endif /* IPSEC */
+#endif
+ INP_LIST_WLOCK(pcbinfo);
inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
in_pcbremlists(inp);
+ INP_LIST_WUNLOCK(pcbinfo);
#ifdef INET6
if (inp->inp_vflag & INP_IPV6PROTO) {
ip6_freepcbopts(inp->in6p_outputopts);
@@ -1338,7 +1380,7 @@
struct ip_moptions *imo;
int i, gap;
- INP_INFO_RLOCK(pcbinfo);
+ INP_INFO_WLOCK(pcbinfo);
LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
INP_WLOCK(inp);
imo = inp->inp_moptions;
@@ -1368,7 +1410,7 @@
}
INP_WUNLOCK(inp);
}
- INP_INFO_RUNLOCK(pcbinfo);
+ INP_INFO_WUNLOCK(pcbinfo);
}
/*
@@ -1597,7 +1639,7 @@
if (inp->inp_vflag & INP_IPV6PROTO)
local_wild_mapped = inp;
else
-#endif /* INET6 */
+#endif
if (injail)
jail_wild = inp;
else
@@ -1612,7 +1654,7 @@
#ifdef INET6
if (inp == NULL)
inp = local_wild_mapped;
-#endif /* defined(INET6) */
+#endif
if (inp != NULL)
goto found;
} /* if (lookupflags & INPLOOKUP_WILDCARD) */
@@ -1742,7 +1784,7 @@
if (inp->inp_vflag & INP_IPV6PROTO)
local_wild_mapped = inp;
else
-#endif /* INET6 */
+#endif
if (injail)
jail_wild = inp;
else
@@ -1758,7 +1800,7 @@
#ifdef INET6
if (local_wild_mapped != NULL)
return (local_wild_mapped);
-#endif /* defined(INET6) */
+#endif
} /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */
return (NULL);
@@ -1882,7 +1924,7 @@
if (inp->inp_vflag & INP_IPV6)
hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
else
-#endif /* INET6 */
+#endif
hashkey_faddr = inp->inp_faddr.s_addr;
pcbhash = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
@@ -1969,7 +2011,7 @@
if (inp->inp_vflag & INP_IPV6)
hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
else
-#endif /* INET6 */
+#endif
hashkey_faddr = inp->inp_faddr.s_addr;
head = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
@@ -2001,8 +2043,16 @@
{
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
- INP_INFO_WLOCK_ASSERT(pcbinfo);
+#ifdef INVARIANTS
+ if (pcbinfo == &V_tcbinfo) {
+ INP_INFO_RLOCK_ASSERT(pcbinfo);
+ } else {
+ INP_INFO_WLOCK_ASSERT(pcbinfo);
+ }
+#endif
+
INP_WLOCK_ASSERT(inp);
+ INP_LIST_WLOCK_ASSERT(pcbinfo);
inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
if (inp->inp_flags & INP_INHASHLIST) {
@@ -2090,7 +2140,7 @@
{
/* Start ipport_tick. */
- callout_init(&ipport_tick_callout, CALLOUT_MPSAFE);
+ callout_init(&ipport_tick_callout, 1);
callout_reset(&ipport_tick_callout, 1, ipport_tick, NULL);
EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL,
SHUTDOWN_PRI_DEFAULT);
@@ -2147,13 +2197,13 @@
{
struct inpcb *inp;
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_WLOCK(&V_tcbinfo);
LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) {
INP_WLOCK(inp);
func(inp, arg);
INP_WUNLOCK(inp);
}
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
}
struct socket *
@@ -2237,14 +2287,13 @@
/* IPv6. */
ip6_sprintf(laddr_str, &inc->inc6_laddr);
ip6_sprintf(faddr_str, &inc->inc6_faddr);
- } else {
+ } else
#endif
+ {
/* IPv4. */
inet_ntoa_r(inc->inc_laddr, laddr_str);
inet_ntoa_r(inc->inc_faddr, faddr_str);
-#ifdef INET6
}
-#endif
db_print_indent(indent);
db_printf("inc_laddr %s inc_lport %u\n", laddr_str,
ntohs(inc->inc_lport));
@@ -2461,4 +2510,4 @@
db_print_inpcb(inp, "inpcb", 0);
}
-#endif
+#endif /* DDB */
Modified: trunk/sys/netinet/in_pcb.h
===================================================================
--- trunk/sys/netinet/in_pcb.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/in_pcb.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -33,7 +33,7 @@
* SUCH DAMAGE.
*
* @(#)in_pcb.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet/in_pcb.h 241435 2012-10-11 01:32:51Z glebius $
+ * $FreeBSD: stable/10/sys/netinet/in_pcb.h 309108 2016-11-24 14:48:46Z jch $
*/
#ifndef _NETINET_IN_PCB_H_
@@ -129,23 +129,35 @@
struct icmp6_filter;
/*-
- * struct inpcb captures the network layer state for TCP, UDP, and raw IPv4
- * and IPv6 sockets. In the case of TCP, further per-connection state is
+ * struct inpcb captures the network layer state for TCP, UDP, and raw IPv4 and
+ * IPv6 sockets. In the case of TCP and UDP, further per-connection state is
* hung off of inp_ppcb most of the time. Almost all fields of struct inpcb
* are static after creation or protected by a per-inpcb rwlock, inp_lock. A
- * few fields also require the global pcbinfo lock for the inpcb to be held,
- * when modified, such as the global connection lists and hashes, as well as
- * binding information (which affects which hash a connection is on). This
- * model means that connections can be looked up without holding the
- * per-connection lock, which is important for performance when attempting to
- * find the connection for a packet given its IP and port tuple. Writing to
- * these fields that write locks be held on both the inpcb and global locks.
+ * few fields are protected by multiple locks as indicated in the locking notes
+ * below. For these fields, all of the listed locks must be write-locked for
+ * any modifications. However, these fields can be safely read while any one of
+ * the listed locks are read-locked. This model can permit greater concurrency
+ * for read operations. For example, connections can be looked up while only
+ * holding a read lock on the global pcblist lock. This is important for
+ * performance when attempting to find the connection for a packet given its IP
+ * and port tuple.
*
+ * One noteworthy exception is that the global pcbinfo lock follows a different
+ * set of rules in relation to the inp_list field. Rather than being
+ * write-locked for modifications and read-locked for list iterations, it must
+ * be read-locked during modifications and write-locked during list iterations.
+ * This ensures that the relatively rare global list iterations safely walk a
+ * stable snapshot of connections while allowing more common list modifications
+ * to safely grab the pcblist lock just while adding or removing a connection
+ * from the global list.
+ *
* Key:
* (c) - Constant after initialization
* (g) - Protected by the pcbgroup lock
* (i) - Protected by the inpcb lock
* (p) - Protected by the pcbinfo lock for the inpcb
+ * (l) - Protected by the pcblist lock for the inpcb
+ * (h) - Protected by the pcbhash lock for the inpcb
* (s) - Protected by another subsystem's locks
* (x) - Undefined locking
*
@@ -160,15 +172,21 @@
* socket has been freed), or there may be close(2)-related races.
*
* The inp_vflag field is overloaded, and would otherwise ideally be (c).
+ *
+ * TODO: Currently only the TCP stack is leveraging the global pcbinfo lock
+ * read-lock usage during modification, this model can be applied to other
+ * protocols (especially SCTP).
*/
struct inpcb {
- LIST_ENTRY(inpcb) inp_hash; /* (i/p) hash list */
+ LIST_ENTRY(inpcb) inp_hash; /* (h/i) hash list */
LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */
- LIST_ENTRY(inpcb) inp_list; /* (i/p) list for all PCBs for proto */
+ LIST_ENTRY(inpcb) inp_list; /* (p/l) list for all PCBs for proto */
+ /* (p[w]) for list iteration */
+ /* (p[r]/l) for addition/removal */
void *inp_ppcb; /* (i) pointer to per-protocol pcb */
struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */
struct inpcbgroup *inp_pcbgroup; /* (g/i) PCB group list */
- LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/p) group wildcard entry */
+ LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/h) group wildcard entry */
struct socket *inp_socket; /* (i) back pointer to socket */
struct ucred *inp_cred; /* (c) cache of socket cred */
u_int32_t inp_flow; /* (i) IPv6 flow information */
@@ -181,11 +199,12 @@
uint32_t inp_flowid; /* (x) flow id / queue id */
u_int inp_refcount; /* (i) refcount */
void *inp_pspare[5]; /* (x) route caching / general use */
- u_int inp_ispare[6]; /* (x) route caching / user cookie /
+ uint32_t inp_flowtype; /* (x) M_HASHTYPE value */
+ u_int inp_ispare[5]; /* (x) route caching / user cookie /
* general use */
/* Local and foreign ports, local and foreign addr. */
- struct in_conninfo inp_inc; /* (i/p) list for PCB's local port */
+ struct in_conninfo inp_inc; /* (i) list for PCB's local port */
/* MAC and IPSEC policy information. */
struct label *inp_label; /* (i) MAC label */
@@ -210,8 +229,8 @@
int inp6_cksum;
short inp6_hops;
} inp_depend6;
- LIST_ENTRY(inpcb) inp_portlist; /* (i/p) */
- struct inpcbport *inp_phd; /* (i/p) head of this list */
+ LIST_ENTRY(inpcb) inp_portlist; /* (i/h) */
+ struct inpcbport *inp_phd; /* (i/h) head of this list */
#define inp_zero_size offsetof(struct inpcb, inp_gencnt)
inp_gen_t inp_gencnt; /* (c) generation count */
struct llentry *inp_lle; /* cached L2 information */
@@ -275,16 +294,25 @@
* Global data structure for each high-level protocol (UDP, TCP, ...) in both
* IPv4 and IPv6. Holds inpcb lists and information for managing them.
*
- * Each pcbinfo is protected by two locks: ipi_lock and ipi_hash_lock,
- * the former covering mutable global fields (such as the global pcb list),
- * and the latter covering the hashed lookup tables. The lock order is:
+ * Each pcbinfo is protected by three locks: ipi_lock, ipi_hash_lock and
+ * ipi_list_lock:
+ * - ipi_lock covering the global pcb list stability during loop iteration,
+ * - ipi_hash_lock covering the hashed lookup tables,
+ * - ipi_list_lock covering mutable global fields (such as the global
+ * pcb list)
*
- * ipi_lock (before) inpcb locks (before) {ipi_hash_lock, pcbgroup locks}
+ * The lock order is:
*
+ * ipi_lock (before)
+ * inpcb locks (before)
+ * ipi_list locks (before)
+ * {ipi_hash_lock, pcbgroup locks}
+ *
* Locking key:
*
* (c) Constant or nearly constant after initialisation
* (g) Locked by ipi_lock
+ * (l) Locked by ipi_list_lock
* (h) Read using either ipi_hash_lock or inpcb lock; write requires both
* (p) Protected by one or more pcbgroup locks
* (x) Synchronisation properties poorly defined
@@ -291,7 +319,7 @@
*/
struct inpcbinfo {
/*
- * Global lock protecting global inpcb list, inpcb count, etc.
+ * Global lock protecting full inpcb list traversal
*/
struct rwlock ipi_lock;
@@ -298,14 +326,14 @@
/*
* Global list of inpcbs on the protocol.
*/
- struct inpcbhead *ipi_listhead; /* (g) */
- u_int ipi_count; /* (g) */
+ struct inpcbhead *ipi_listhead; /* (g/l) */
+ u_int ipi_count; /* (l) */
/*
* Generation count -- incremented each time a connection is allocated
* or freed.
*/
- u_quad_t ipi_gencnt; /* (g) */
+ u_quad_t ipi_gencnt; /* (l) */
/*
* Fields associated with port lookup and allocation.
@@ -363,6 +391,11 @@
* general use 2
*/
void *ipi_pspare[2];
+
+ /*
+ * Global lock protecting global inpcb list, inpcb count, etc.
+ */
+ struct rwlock ipi_list_lock;
};
#ifdef _KERNEL
@@ -443,6 +476,7 @@
inp_inpcbtotcpcb(struct inpcb *inp);
void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
uint32_t *faddr, uint16_t *fp);
+short inp_so_options(const struct inpcb *inp);
#endif /* _KERNEL */
@@ -454,6 +488,7 @@
#define INP_INFO_TRY_RLOCK(ipi) rw_try_rlock(&(ipi)->ipi_lock)
#define INP_INFO_TRY_WLOCK(ipi) rw_try_wlock(&(ipi)->ipi_lock)
#define INP_INFO_TRY_UPGRADE(ipi) rw_try_upgrade(&(ipi)->ipi_lock)
+#define INP_INFO_WLOCKED(ipi) rw_wowned(&(ipi)->ipi_lock)
#define INP_INFO_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_lock)
#define INP_INFO_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_lock)
#define INP_INFO_LOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_LOCKED)
@@ -461,6 +496,25 @@
#define INP_INFO_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_WLOCKED)
#define INP_INFO_UNLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_UNLOCKED)
+#define INP_LIST_LOCK_INIT(ipi, d) \
+ rw_init_flags(&(ipi)->ipi_list_lock, (d), 0)
+#define INP_LIST_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_list_lock)
+#define INP_LIST_RLOCK(ipi) rw_rlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_WLOCK(ipi) rw_wlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_TRY_RLOCK(ipi) rw_try_rlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_TRY_WLOCK(ipi) rw_try_wlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_TRY_UPGRADE(ipi) rw_try_upgrade(&(ipi)->ipi_list_lock)
+#define INP_LIST_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_LOCK_ASSERT(ipi) \
+ rw_assert(&(ipi)->ipi_list_lock, RA_LOCKED)
+#define INP_LIST_RLOCK_ASSERT(ipi) \
+ rw_assert(&(ipi)->ipi_list_lock, RA_RLOCKED)
+#define INP_LIST_WLOCK_ASSERT(ipi) \
+ rw_assert(&(ipi)->ipi_list_lock, RA_WLOCKED)
+#define INP_LIST_UNLOCK_ASSERT(ipi) \
+ rw_assert(&(ipi)->ipi_list_lock, RA_UNLOCKED)
+
#define INP_HASH_LOCK_INIT(ipi, d) \
rw_init_flags(&(ipi)->ipi_hash_lock, (d), 0)
#define INP_HASH_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_hash_lock)
@@ -524,8 +578,8 @@
#define INP_ONESBCAST 0x02000000 /* send all-ones broadcast */
#define INP_DROPPED 0x04000000 /* protocol drop flag */
#define INP_SOCKREF 0x08000000 /* strong socket reference */
-#define INP_SW_FLOWID 0x10000000 /* software generated flow id */
-#define INP_HW_FLOWID 0x20000000 /* hardware generated flow id */
+#define INP_RESERVED_0 0x10000000 /* reserved field */
+#define INP_RESERVED_1 0x20000000 /* reserved field */
#define IN6P_RFC2292 0x40000000 /* used RFC2292 API on the socket */
#define IN6P_MTU 0x80000000 /* receive path MTU */
@@ -544,6 +598,7 @@
#define INP_PCBGROUPWILD 0x00000004 /* in pcbgroup wildcard list */
#define INP_REUSEPORT 0x00000008 /* SO_REUSEPORT option is set */
#define INP_FREED 0x00000010 /* inp itself is not valid */
+#define INP_REUSEADDR 0x00000020 /* SO_REUSEADDR option is set */
/*
* Flags passed to in_pcblookup*() functions.
@@ -635,6 +690,8 @@
void in_pcbfree(struct inpcb *);
int in_pcbinshash(struct inpcb *);
int in_pcbinshash_nopcbgroup(struct inpcb *);
+int in_pcbladdr(struct inpcb *, struct in_addr *, struct in_addr *,
+ struct ucred *);
struct inpcb *
in_pcblookup_local(struct inpcbinfo *,
struct in_addr, u_short, int, struct ucred *);
Modified: trunk/sys/netinet/in_pcbgroup.c
===================================================================
--- trunk/sys/netinet/in_pcbgroup.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/in_pcbgroup.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -30,7 +30,7 @@
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/in_pcbgroup.c 222748 2011-06-06 12:55:02Z rwatson $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/in_pcbgroup.c 222748 2011-06-06 12:55:02Z rwatson $");
#include "opt_inet6.h"
Modified: trunk/sys/netinet/in_proto.c
===================================================================
--- trunk/sys/netinet/in_proto.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/in_proto.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/in_proto.c 230868 2012-02-01 15:57:49Z glebius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/in_proto.c 265946 2014-05-13 06:05:53Z kevlo $");
#include "opt_ipx.h"
#include "opt_mrouting.h"
@@ -185,6 +185,20 @@
},
#endif /* SCTP */
{
+ .pr_type = SOCK_DGRAM,
+ .pr_domain = &inetdomain,
+ .pr_protocol = IPPROTO_UDPLITE,
+ .pr_flags = PR_ATOMIC|PR_ADDR,
+ .pr_input = udp_input,
+ .pr_ctlinput = udplite_ctlinput,
+ .pr_ctloutput = udp_ctloutput,
+ .pr_init = udplite_init,
+#ifdef VIMAGE
+ .pr_destroy = udplite_destroy,
+#endif
+ .pr_usrreqs = &udp_usrreqs
+},
+{
.pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_RAW,
Modified: trunk/sys/netinet/in_rmx.c
===================================================================
--- trunk/sys/netinet/in_rmx.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/in_rmx.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -29,7 +29,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/in_rmx.c 242646 2012-11-06 01:18:53Z melifaro $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/in_rmx.c 295389 2016-02-08 00:07:01Z bz $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -94,8 +94,8 @@
if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
rt->rt_flags |= RTF_MULTICAST;
- if (!rt->rt_rmx.rmx_mtu && rt->rt_ifp)
- rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
+ if (rt->rt_mtu == 0 && rt->rt_ifp != NULL)
+ rt->rt_mtu = rt->rt_ifp->if_mtu;
return (rn_addroute(v_arg, n_arg, head, treenodes));
}
@@ -115,7 +115,7 @@
RT_LOCK(rt);
if (rt->rt_flags & RTPRF_OURS) {
rt->rt_flags &= ~RTPRF_OURS;
- rt->rt_rmx.rmx_expire = 0;
+ rt->rt_expire = 0;
}
RT_UNLOCK(rt);
}
@@ -168,7 +168,7 @@
*/
if (V_rtq_reallyold != 0) {
rt->rt_flags |= RTPRF_OURS;
- rt->rt_rmx.rmx_expire = time_uptime + V_rtq_reallyold;
+ rt->rt_expire = time_uptime + V_rtq_reallyold;
} else {
rtexpunge(rt);
}
@@ -200,7 +200,7 @@
if (rt->rt_flags & RTPRF_OURS) {
ap->found++;
- if (ap->draining || rt->rt_rmx.rmx_expire <= time_uptime) {
+ if (ap->draining || rt->rt_expire <= time_uptime) {
if (rt->rt_refcnt > 0)
panic("rtqkill route really not free");
@@ -216,13 +216,9 @@
}
} else {
if (ap->updating &&
- (rt->rt_rmx.rmx_expire - time_uptime >
- V_rtq_reallyold)) {
- rt->rt_rmx.rmx_expire =
- time_uptime + V_rtq_reallyold;
- }
- ap->nextstop = lmin(ap->nextstop,
- rt->rt_rmx.rmx_expire);
+ (rt->rt_expire - time_uptime > V_rtq_reallyold))
+ rt->rt_expire = time_uptime + V_rtq_reallyold;
+ ap->nextstop = lmin(ap->nextstop, rt->rt_expire);
}
}
@@ -378,7 +374,7 @@
{
callout_drain(&V_rtq_timer);
- return (1);
+ return (rn_detachhead(head));
}
#endif
Modified: trunk/sys/netinet/in_systm.h
===================================================================
--- trunk/sys/netinet/in_systm.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/in_systm.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* @(#)in_systm.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet/in_systm.h 188578 2009-02-13 15:14:43Z luigi $
+ * $FreeBSD: stable/10/sys/netinet/in_systm.h 188578 2009-02-13 15:14:43Z luigi $
*/
#ifndef _NETINET_IN_SYSTM_H_
Modified: trunk/sys/netinet/in_var.h
===================================================================
--- trunk/sys/netinet/in_var.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/in_var.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,12 +28,13 @@
* SUCH DAMAGE.
*
* @(#)in_var.h 8.2 (Berkeley) 1/9/95
- * $FreeBSD: stable/9/sys/netinet/in_var.h 242640 2012-11-06 00:49:52Z melifaro $
+ * $FreeBSD: stable/10/sys/netinet/in_var.h 309340 2016-11-30 22:20:23Z vangyzen $
*/
#ifndef _NETINET_IN_VAR_H_
#define _NETINET_IN_VAR_H_
+#include <sys/callout.h>
#include <sys/queue.h>
#include <sys/fnv_hash.h>
#include <sys/tree.h>
@@ -70,6 +71,8 @@
struct sockaddr_in ia_dstaddr; /* reserve space for broadcast addr */
#define ia_broadaddr ia_dstaddr
struct sockaddr_in ia_sockmask; /* reserve space for general netmask */
+ struct callout ia_garp_timer; /* timer for retransmitting GARPs */
+ int ia_garp_count; /* count of retransmitted GARPs */
};
struct in_aliasreq {
@@ -78,6 +81,7 @@
struct sockaddr_in ifra_broadaddr;
#define ifra_dstaddr ifra_broadaddr
struct sockaddr_in ifra_mask;
+ int ifra_vhid;
};
/*
* Given a pointer to an in_ifaddr (ifaddr),
@@ -85,6 +89,7 @@
*/
#define IA_SIN(ia) (&(((struct in_ifaddr *)(ia))->ia_addr))
#define IA_DSTSIN(ia) (&(((struct in_ifaddr *)(ia))->ia_dstaddr))
+#define IA_MASKSIN(ia) (&(((struct in_ifaddr *)(ia))->ia_sockmask))
#define IN_LNAOF(in, ifa) \
((ntohl((in).s_addr) & ~((struct in_ifaddr *)(ifa)->ia_subnetmask))
@@ -446,6 +451,8 @@
int in_control(struct socket *, u_long, caddr_t, struct ifnet *,
struct thread *);
void in_rtqdrain(void);
+int in_addprefix(struct in_ifaddr *, int);
+int in_scrubprefix(struct in_ifaddr *, u_int);
void ip_input(struct mbuf *);
int in_ifadown(struct ifaddr *ifa, int);
void in_ifscrub(struct ifnet *, struct in_ifaddr *, u_int);
Modified: trunk/sys/netinet/ip.h
===================================================================
--- trunk/sys/netinet/ip.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -29,7 +29,7 @@
* SUCH DAMAGE.
*
* @(#)ip.h 8.2 (Berkeley) 6/1/94
- * $FreeBSD: stable/9/sys/netinet/ip.h 235805 2012-05-22 19:53:25Z delphij $
+ * $FreeBSD: stable/10/sys/netinet/ip.h 286105 2015-07-31 03:40:09Z kevlo $
*/
#ifndef _NETINET_IP_H_
@@ -68,7 +68,7 @@
u_char ip_p; /* protocol */
u_short ip_sum; /* checksum */
struct in_addr ip_src,ip_dst; /* source and dest address */
-} __packed __aligned(4);
+} __packed __aligned(2);
#define IP_MAXPACKET 65535 /* maximum packet size */
@@ -81,19 +81,19 @@
#define IPTOS_MINCOST 0x02
/*
- * Definitions for IP precedence (also in ip_tos) (hopefully unused).
+ * Definitions for IP precedence (also in ip_tos) (deprecated).
*/
-#define IPTOS_PREC_NETCONTROL 0xe0
-#define IPTOS_PREC_INTERNETCONTROL 0xc0
-#define IPTOS_PREC_CRITIC_ECP 0xa0
-#define IPTOS_PREC_FLASHOVERRIDE 0x80
-#define IPTOS_PREC_FLASH 0x60
-#define IPTOS_PREC_IMMEDIATE 0x40
-#define IPTOS_PREC_PRIORITY 0x20
-#define IPTOS_PREC_ROUTINE 0x00
+#define IPTOS_PREC_NETCONTROL IPTOS_DSCP_CS7
+#define IPTOS_PREC_INTERNETCONTROL IPTOS_DSCP_CS6
+#define IPTOS_PREC_CRITIC_ECP IPTOS_DSCP_CS5
+#define IPTOS_PREC_FLASHOVERRIDE IPTOS_DSCP_CS4
+#define IPTOS_PREC_FLASH IPTOS_DSCP_CS3
+#define IPTOS_PREC_IMMEDIATE IPTOS_DSCP_CS2
+#define IPTOS_PREC_PRIORITY IPTOS_DSCP_CS1
+#define IPTOS_PREC_ROUTINE IPTOS_DSCP_CS0
/*
- * Definitions for DiffServ Codepoints as per RFC2474
+ * Definitions for DiffServ Codepoints as per RFC2474 and RFC5865.
*/
#define IPTOS_DSCP_CS0 0x00
#define IPTOS_DSCP_CS1 0x20
@@ -113,6 +113,7 @@
#define IPTOS_DSCP_AF42 0x90
#define IPTOS_DSCP_AF43 0x98
#define IPTOS_DSCP_CS5 0xa0
+#define IPTOS_DSCP_VA 0xb0
#define IPTOS_DSCP_EF 0xb8
#define IPTOS_DSCP_CS6 0xc0
#define IPTOS_DSCP_CS7 0xe0
Modified: trunk/sys/netinet/ip6.h
===================================================================
--- trunk/sys/netinet/ip6.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip6.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,5 +1,5 @@
/* $MidnightBSD$ */
-/* $FreeBSD: stable/9/sys/netinet/ip6.h 215956 2010-11-27 21:51:39Z brucec $ */
+/* $FreeBSD: stable/10/sys/netinet/ip6.h 249294 2013-04-09 07:11:22Z ae $ */
/* $KAME: ip6.h,v 1.18 2001/03/29 05:34:30 itojun Exp $ */
/*-
Modified: trunk/sys/netinet/ip_carp.c
===================================================================
--- trunk/sys/netinet/ip_carp.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_carp.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,7 +1,9 @@
/* $MidnightBSD$ */
-/*
- * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
- * Copyright (c) 2003 Ryan McBride. All rights reserved.
+/*-
+ * Copyright (c) 2002 Michael Shalayeff.
+ * Copyright (c) 2003 Ryan McBride.
+ * Copyright (c) 2011 Gleb Smirnoff <glebius at FreeBSD.org>
+ * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -26,44 +28,38 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/ip_carp.c 233200 2012-03-19 20:49:16Z jhb $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/ip_carp.c 278075 2015-02-02 11:42:35Z loos $");
#include "opt_bpf.h"
#include "opt_inet.h"
#include "opt_inet6.h"
-#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
-#include <sys/conf.h>
+#include <sys/bus.h>
+#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/limits.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/module.h>
-#include <sys/time.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
-#include <sys/signalvar.h>
-#include <sys/filio.h>
-#include <sys/sockio.h>
+#include <sys/taskqueue.h>
+#include <sys/counter.h>
-#include <sys/socket.h>
-#include <sys/vnode.h>
-
-#include <machine/stdarg.h>
-
-#include <net/bpf.h>
#include <net/ethernet.h>
#include <net/fddi.h>
-#include <net/iso88025.h>
#include <net/if.h>
-#include <net/if_clone.h>
#include <net/if_dl.h>
+#include <net/if_llatbl.h>
#include <net/if_types.h>
+#include <net/iso88025.h>
#include <net/route.h>
#include <net/vnet.h>
@@ -72,12 +68,9 @@
#include <netinet/in_var.h>
#include <netinet/ip_carp.h>
#include <netinet/ip.h>
-
#include <machine/in_cksum.h>
#endif
-
#ifdef INET
-#include <netinet/in_systm.h>
#include <netinet/ip_var.h>
#include <netinet/if_ether.h>
#endif
@@ -86,88 +79,72 @@
#include <netinet/icmp6.h>
#include <netinet/ip6.h>
#include <netinet6/ip6protosw.h>
+#include <netinet6/in6_var.h>
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
-#include <netinet6/in6_var.h>
#include <netinet6/nd6.h>
#endif
#include <crypto/sha1.h>
-#define CARP_IFNAME "carp"
-static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
-SYSCTL_DECL(_net_inet_carp);
+static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses");
struct carp_softc {
- struct ifnet *sc_ifp; /* Interface clue */
- struct ifnet *sc_carpdev; /* Pointer to parent interface */
- struct in_ifaddr *sc_ia; /* primary iface address */
+ struct ifnet *sc_carpdev; /* Pointer to parent ifnet. */
+ struct ifaddr **sc_ifas; /* Our ifaddrs. */
+ struct sockaddr_dl sc_addr; /* Our link level address. */
+ struct callout sc_ad_tmo; /* Advertising timeout. */
#ifdef INET
- struct ip_moptions sc_imo;
+ struct callout sc_md_tmo; /* Master down timeout. */
#endif
#ifdef INET6
- struct in6_ifaddr *sc_ia6; /* primary iface address v6 */
- struct ip6_moptions sc_im6o;
-#endif /* INET6 */
- TAILQ_ENTRY(carp_softc) sc_list;
+ struct callout sc_md6_tmo; /* XXX: Master down timeout. */
+#endif
+ struct mtx sc_mtx;
+ int sc_vhid;
+ int sc_advskew;
+ int sc_advbase;
+
+ int sc_naddrs;
+ int sc_naddrs6;
+ int sc_ifasiz;
enum { INIT = 0, BACKUP, MASTER } sc_state;
-
- int sc_flags_backup;
- int sc_suppress;
-
- int sc_sendad_errors;
+ int sc_suppress;
+ int sc_sendad_errors;
#define CARP_SENDAD_MAX_ERRORS 3
- int sc_sendad_success;
+ int sc_sendad_success;
#define CARP_SENDAD_MIN_SUCCESS 3
- int sc_vhid;
- int sc_advskew;
- int sc_naddrs;
- int sc_naddrs6;
- int sc_advbase; /* seconds */
- int sc_init_counter;
- u_int64_t sc_counter;
+ int sc_init_counter;
+ uint64_t sc_counter;
/* authentication */
-#define CARP_HMAC_PAD 64
+#define CARP_HMAC_PAD 64
unsigned char sc_key[CARP_KEY_LEN];
unsigned char sc_pad[CARP_HMAC_PAD];
SHA1_CTX sc_sha1;
- struct callout sc_ad_tmo; /* advertisement timeout */
- struct callout sc_md_tmo; /* master down timeout */
- struct callout sc_md6_tmo; /* master down timeout */
-
- LIST_ENTRY(carp_softc) sc_next; /* Interface clue */
+ TAILQ_ENTRY(carp_softc) sc_list; /* On the carp_if list. */
+ LIST_ENTRY(carp_softc) sc_next; /* On the global list. */
};
-#define SC2IFP(sc) ((sc)->sc_ifp)
-int carp_suppress_preempt = 0;
-int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */
-SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP");
-SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
- &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
-SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
- &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
-SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
- &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
-SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
- &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
-SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
- &carp_suppress_preempt, 0, "Preemption is suppressed");
-
-struct carpstats carpstats;
-SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
- &carpstats, carpstats,
- "CARP statistics (struct carpstats, netinet/ip_carp.h)");
-
struct carp_if {
- TAILQ_HEAD(, carp_softc) vhif_vrs;
- int vhif_nvrs;
-
- struct ifnet *vhif_ifp;
- struct mtx vhif_mtx;
+#ifdef INET
+ int cif_naddrs;
+#endif
+#ifdef INET6
+ int cif_naddrs6;
+#endif
+ TAILQ_HEAD(, carp_softc) cif_vrs;
+#ifdef INET
+ struct ip_moptions cif_imo;
+#endif
+#ifdef INET6
+ struct ip6_moptions cif_im6o;
+#endif
+ struct ifnet *cif_ifp;
+ struct mtx cif_mtx;
};
#define CARP_INET 0
@@ -174,93 +151,179 @@
#define CARP_INET6 1
static int proto_reg[] = {-1, -1};
-/* Get carp_if from softc. Valid after carp_set_addr{,6}. */
-#define SC2CIF(sc) ((struct carp_if *)(sc)->sc_carpdev->if_carp)
+/*
+ * Brief design of carp(4).
+ *
+ * Any carp-capable ifnet may have a list of carp softcs hanging off
+ * its ifp->if_carp pointer. Each softc represents one unique virtual
+ * host id, or vhid. The softc has a back pointer to the ifnet. All
+ * softcs are joined in a global list, which has quite limited use.
+ *
+ * Any interface address that takes part in CARP negotiation has a
+ * pointer to the softc of its vhid, ifa->ifa_carp. That could be either
+ * AF_INET or AF_INET6 address.
+ *
+ * Although, one can get the softc's backpointer to ifnet and traverse
+ * through its ifp->if_addrhead queue to find all interface addresses
+ * involved in CARP, we keep a growable array of ifaddr pointers. This
+ * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that
+ * do calls into the network stack, thus avoiding LORs.
+ *
+ * Locking:
+ *
+ * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(),
+ * callout-driven events and ioctl()s.
+ *
+ * To traverse the list of softcs on an ifnet we use CIF_LOCK(), to
+ * traverse the global list we use the mutex carp_mtx.
+ *
+ * Known issues with locking:
+ *
+ * - There is no protection for races between two ioctl() requests,
+ * neither SIOCSVH, nor SIOCAIFADDR & SIOCAIFADDR_IN6. I think that all
+ * interface ioctl()s should be serialized right in net/if.c.
+ * - Sending ad, we put the pointer to the softc in an mtag, and no reference
+ * counting is done on the softc.
+ * - On module unload we may race (?) with packet processing thread
+ * dereferencing our function pointers.
+ */
-/* lock per carp_if queue */
-#define CARP_LOCK_INIT(cif) mtx_init(&(cif)->vhif_mtx, "carp_if", \
+/* Accept incoming CARP packets. */
+static VNET_DEFINE(int, carp_allow) = 1;
+#define V_carp_allow VNET(carp_allow)
+
+/* Preempt slower nodes. */
+static VNET_DEFINE(int, carp_preempt) = 0;
+#define V_carp_preempt VNET(carp_preempt)
+
+/* Log level. */
+static VNET_DEFINE(int, carp_log) = 1;
+#define V_carp_log VNET(carp_log)
+
+/* Global advskew demotion. */
+static VNET_DEFINE(int, carp_demotion) = 0;
+#define V_carp_demotion VNET(carp_demotion)
+
+/* Send error demotion factor. */
+static VNET_DEFINE(int, carp_senderr_adj) = CARP_MAXSKEW;
+#define V_carp_senderr_adj VNET(carp_senderr_adj)
+
+/* Iface down demotion factor. */
+static VNET_DEFINE(int, carp_ifdown_adj) = CARP_MAXSKEW;
+#define V_carp_ifdown_adj VNET(carp_ifdown_adj)
+
+static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS);
+
+SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP");
+SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, allow, CTLFLAG_RW,
+ &VNET_NAME(carp_allow), 0, "Accept incoming CARP packets");
+SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_RW,
+ &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode");
+SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_RW,
+ &VNET_NAME(carp_log), 0, "CARP log level");
+SYSCTL_VNET_PROC(_net_inet_carp, OID_AUTO, demotion, CTLTYPE_INT|CTLFLAG_RW,
+ 0, 0, carp_demote_adj_sysctl, "I",
+ "Adjust demotion factor (skew of advskew)");
+SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, CTLFLAG_RW,
+ &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment");
+SYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, CTLFLAG_RW,
+ &VNET_NAME(carp_ifdown_adj), 0,
+ "Interface down demotion factor adjustment");
+
+VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats);
+VNET_PCPUSTAT_SYSINIT(carpstats);
+VNET_PCPUSTAT_SYSUNINIT(carpstats);
+
+#define CARPSTATS_ADD(name, val) \
+ counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \
+ sizeof(uint64_t)], (val))
+#define CARPSTATS_INC(name) CARPSTATS_ADD(name, 1)
+
+SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats,
+ carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)");
+
+#define CARP_LOCK_INIT(sc) mtx_init(&(sc)->sc_mtx, "carp_softc", \
NULL, MTX_DEF)
-#define CARP_LOCK_DESTROY(cif) mtx_destroy(&(cif)->vhif_mtx)
-#define CARP_LOCK_ASSERT(cif) mtx_assert(&(cif)->vhif_mtx, MA_OWNED)
-#define CARP_LOCK(cif) mtx_lock(&(cif)->vhif_mtx)
-#define CARP_UNLOCK(cif) mtx_unlock(&(cif)->vhif_mtx)
+#define CARP_LOCK_DESTROY(sc) mtx_destroy(&(sc)->sc_mtx)
+#define CARP_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED)
+#define CARP_LOCK(sc) mtx_lock(&(sc)->sc_mtx)
+#define CARP_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx)
+#define CIF_LOCK_INIT(cif) mtx_init(&(cif)->cif_mtx, "carp_if", \
+ NULL, MTX_DEF)
+#define CIF_LOCK_DESTROY(cif) mtx_destroy(&(cif)->cif_mtx)
+#define CIF_LOCK_ASSERT(cif) mtx_assert(&(cif)->cif_mtx, MA_OWNED)
+#define CIF_LOCK(cif) mtx_lock(&(cif)->cif_mtx)
+#define CIF_UNLOCK(cif) mtx_unlock(&(cif)->cif_mtx)
+#define CIF_FREE(cif) do { \
+ CIF_LOCK_ASSERT(cif); \
+ if (TAILQ_EMPTY(&(cif)->cif_vrs)) \
+ carp_free_if(cif); \
+ else \
+ CIF_UNLOCK(cif); \
+} while (0)
-#define CARP_SCLOCK(sc) mtx_lock(&SC2CIF(sc)->vhif_mtx)
-#define CARP_SCUNLOCK(sc) mtx_unlock(&SC2CIF(sc)->vhif_mtx)
-#define CARP_SCLOCK_ASSERT(sc) mtx_assert(&SC2CIF(sc)->vhif_mtx, MA_OWNED)
-
#define CARP_LOG(...) do { \
- if (carp_opts[CARPCTL_LOG] > 0) \
- log(LOG_INFO, __VA_ARGS__); \
+ if (V_carp_log > 0) \
+ log(LOG_INFO, "carp: " __VA_ARGS__); \
} while (0)
#define CARP_DEBUG(...) do { \
- if (carp_opts[CARPCTL_LOG] > 1) \
+ if (V_carp_log > 1) \
log(LOG_DEBUG, __VA_ARGS__); \
} while (0)
-static void carp_hmac_prepare(struct carp_softc *);
-static void carp_hmac_generate(struct carp_softc *, u_int32_t *,
- unsigned char *);
-static int carp_hmac_verify(struct carp_softc *, u_int32_t *,
- unsigned char *);
-static void carp_setroute(struct carp_softc *, int);
+#define IFNET_FOREACH_IFA(ifp, ifa) \
+ IF_ADDR_LOCK_ASSERT(ifp); \
+ TAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link) \
+ if ((ifa)->ifa_carp != NULL)
+
+#define CARP_FOREACH_IFA(sc, ifa) \
+ CARP_LOCK_ASSERT(sc); \
+ for (int _i = 0; \
+ _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 && \
+ ((ifa) = sc->sc_ifas[_i]) != NULL; \
+ ++_i)
+
+#define IFNET_FOREACH_CARP(ifp, sc) \
+ CIF_LOCK_ASSERT(ifp->if_carp); \
+ TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list)
+
+#define DEMOTE_ADVSKEW(sc) \
+ (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ? \
+ CARP_MAXSKEW : ((sc)->sc_advskew + V_carp_demotion))
+
static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
-static int carp_clone_create(struct if_clone *, int, caddr_t);
-static void carp_clone_destroy(struct ifnet *);
-static void carpdetach(struct carp_softc *, int);
-static int carp_prepare_ad(struct mbuf *, struct carp_softc *,
- struct carp_header *);
-static void carp_send_ad_all(void);
+static struct carp_softc
+ *carp_alloc(struct ifnet *);
+static void carp_detach_locked(struct ifaddr *);
+static void carp_destroy(struct carp_softc *);
+static struct carp_if
+ *carp_alloc_if(struct ifnet *);
+static void carp_free_if(struct carp_if *);
+static void carp_set_state(struct carp_softc *, int);
+static void carp_sc_state(struct carp_softc *);
+static void carp_setrun(struct carp_softc *, sa_family_t);
+static void carp_master_down(void *);
+static void carp_master_down_locked(struct carp_softc *);
static void carp_send_ad(void *);
static void carp_send_ad_locked(struct carp_softc *);
-#ifdef INET
-static void carp_send_arp(struct carp_softc *);
-#endif
-static void carp_master_down(void *);
-static void carp_master_down_locked(struct carp_softc *);
-static int carp_ioctl(struct ifnet *, u_long, caddr_t);
-static int carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *,
- struct route *);
-static void carp_start(struct ifnet *);
-static void carp_setrun(struct carp_softc *, sa_family_t);
-static void carp_set_state(struct carp_softc *, int);
-#ifdef INET
-static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
-#endif
-enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
+static void carp_addroute(struct carp_softc *);
+static void carp_ifa_addroute(struct ifaddr *);
+static void carp_delroute(struct carp_softc *);
+static void carp_ifa_delroute(struct ifaddr *);
+static void carp_send_ad_all(void *, int);
+static void carp_demote_adj(int, char *);
-#ifdef INET
-static void carp_multicast_cleanup(struct carp_softc *, int dofree);
-static int carp_set_addr(struct carp_softc *, struct sockaddr_in *);
-static int carp_del_addr(struct carp_softc *, struct sockaddr_in *);
-#endif
-static void carp_carpdev_state_locked(struct carp_if *);
-static void carp_sc_state_locked(struct carp_softc *);
-#ifdef INET6
-static void carp_send_na(struct carp_softc *);
-static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
-static int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
-static void carp_multicast6_cleanup(struct carp_softc *, int dofree);
-#endif
-
-static LIST_HEAD(, carp_softc) carpif_list;
+static LIST_HEAD(, carp_softc) carp_list;
static struct mtx carp_mtx;
-IFC_SIMPLE_DECLARE(carp, 0);
+static struct task carp_sendall_task =
+ TASK_INITIALIZER(0, carp_send_ad_all, NULL);
-static eventhandler_tag if_detach_event_tag;
-
-static __inline u_int16_t
-carp_cksum(struct mbuf *m, int len)
-{
- return (in_cksum(m, len));
-}
-
static void
carp_hmac_prepare(struct carp_softc *sc)
{
- u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
- u_int8_t vhid = sc->sc_vhid & 0xff;
+ uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
+ uint8_t vhid = sc->sc_vhid & 0xff;
struct ifaddr *ifa;
int i, found;
#ifdef INET
@@ -270,18 +333,15 @@
struct in6_addr last6, cur6, in6;
#endif
- if (sc->sc_carpdev)
- CARP_SCLOCK(sc);
+ CARP_LOCK_ASSERT(sc);
- /* XXX: possible race here */
-
- /* compute ipad from key */
+ /* Compute ipad from key. */
bzero(sc->sc_pad, sizeof(sc->sc_pad));
bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
for (i = 0; i < sizeof(sc->sc_pad); i++)
sc->sc_pad[i] ^= 0x36;
- /* precompute first part of inner hash */
+ /* Precompute first part of inner hash. */
SHA1Init(&sc->sc_sha1);
SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
@@ -293,8 +353,7 @@
found = 0;
last = cur;
cur.s_addr = 0xffffffff;
- IF_ADDR_RLOCK(SC2IFP(sc));
- TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
+ CARP_FOREACH_IFA(sc, ifa) {
in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
if (ifa->ifa_addr->sa_family == AF_INET &&
ntohl(in.s_addr) > ntohl(last.s_addr) &&
@@ -303,7 +362,6 @@
found++;
}
}
- IF_ADDR_RUNLOCK(SC2IFP(sc));
if (found)
SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
} while (found);
@@ -314,8 +372,7 @@
found = 0;
last6 = cur6;
memset(&cur6, 0xff, sizeof(cur6));
- IF_ADDR_RLOCK(SC2IFP(sc));
- TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
+ CARP_FOREACH_IFA(sc, ifa) {
in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
if (IN6_IS_SCOPE_EMBED(&in6))
in6.s6_addr16[1] = 0;
@@ -326,7 +383,6 @@
found++;
}
}
- IF_ADDR_RUNLOCK(SC2IFP(sc));
if (found)
SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
} while (found);
@@ -335,17 +391,16 @@
/* convert ipad to opad */
for (i = 0; i < sizeof(sc->sc_pad); i++)
sc->sc_pad[i] ^= 0x36 ^ 0x5c;
-
- if (sc->sc_carpdev)
- CARP_SCUNLOCK(sc);
}
static void
-carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2],
+carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
unsigned char md[20])
{
SHA1_CTX sha1ctx;
+ CARP_LOCK_ASSERT(sc);
+
/* fetch first half of inner hash */
bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
@@ -360,12 +415,12 @@
}
static int
-carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2],
+carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
unsigned char md[20])
{
unsigned char md2[20];
- CARP_SCLOCK_ASSERT(sc);
+ CARP_LOCK_ASSERT(sc);
carp_hmac_generate(sc, counter, md2);
@@ -372,192 +427,7 @@
return (bcmp(md, md2, sizeof(md2)));
}
-static void
-carp_setroute(struct carp_softc *sc, int cmd)
-{
- struct ifaddr *ifa;
- int s;
-
- if (sc->sc_carpdev)
- CARP_SCLOCK_ASSERT(sc);
-
- s = splnet();
- TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
-#ifdef INET
- if (ifa->ifa_addr->sa_family == AF_INET &&
- sc->sc_carpdev != NULL) {
- int count = carp_addrcount(
- (struct carp_if *)sc->sc_carpdev->if_carp,
- ifatoia(ifa), CARP_COUNT_MASTER);
-
- if ((cmd == RTM_ADD && count == 1) ||
- (cmd == RTM_DELETE && count == 0))
- rtinit(ifa, cmd, RTF_UP | RTF_HOST);
- }
-#endif
- }
- splx(s);
-}
-
-static int
-carp_clone_create(struct if_clone *ifc, int unit, caddr_t params)
-{
-
- struct carp_softc *sc;
- struct ifnet *ifp;
-
- sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
- ifp = SC2IFP(sc) = if_alloc(IFT_ETHER);
- if (ifp == NULL) {
- free(sc, M_CARP);
- return (ENOSPC);
- }
-
- sc->sc_flags_backup = 0;
- sc->sc_suppress = 0;
- sc->sc_advbase = CARP_DFLTINTV;
- sc->sc_vhid = -1; /* required setting */
- sc->sc_advskew = 0;
- sc->sc_init_counter = 1;
- sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */
-#ifdef INET
- sc->sc_imo.imo_membership = (struct in_multi **)malloc(
- (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP,
- M_WAITOK);
- sc->sc_imo.imo_mfilters = NULL;
- sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
- sc->sc_imo.imo_multicast_vif = -1;
-#endif
-#ifdef INET6
- sc->sc_im6o.im6o_membership = (struct in6_multi **)malloc(
- (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP,
- M_WAITOK);
- sc->sc_im6o.im6o_mfilters = NULL;
- sc->sc_im6o.im6o_max_memberships = IPV6_MIN_MEMBERSHIPS;
- sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
-#endif
-
- callout_init(&sc->sc_ad_tmo, CALLOUT_MPSAFE);
- callout_init(&sc->sc_md_tmo, CALLOUT_MPSAFE);
- callout_init(&sc->sc_md6_tmo, CALLOUT_MPSAFE);
-
- ifp->if_softc = sc;
- if_initname(ifp, CARP_IFNAME, unit);
- ifp->if_mtu = ETHERMTU;
- ifp->if_flags = IFF_LOOPBACK;
- ifp->if_ioctl = carp_ioctl;
- ifp->if_output = carp_looutput;
- ifp->if_start = carp_start;
- ifp->if_type = IFT_CARP;
- ifp->if_snd.ifq_maxlen = ifqmaxlen;
- ifp->if_hdrlen = 0;
- if_attach(ifp);
- bpfattach(SC2IFP(sc), DLT_NULL, sizeof(u_int32_t));
- mtx_lock(&carp_mtx);
- LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
- mtx_unlock(&carp_mtx);
- return (0);
-}
-
-static void
-carp_clone_destroy(struct ifnet *ifp)
-{
- struct carp_softc *sc = ifp->if_softc;
-
- if (sc->sc_carpdev)
- CARP_SCLOCK(sc);
- carpdetach(sc, 1); /* Returns unlocked. */
-
- mtx_lock(&carp_mtx);
- LIST_REMOVE(sc, sc_next);
- mtx_unlock(&carp_mtx);
- bpfdetach(ifp);
- if_detach(ifp);
- if_free_type(ifp, IFT_ETHER);
-#ifdef INET
- free(sc->sc_imo.imo_membership, M_CARP);
-#endif
-#ifdef INET6
- free(sc->sc_im6o.im6o_membership, M_CARP);
-#endif
- free(sc, M_CARP);
-}
-
/*
- * This function can be called on CARP interface destroy path,
- * and in case of the removal of the underlying interface as
- * well. We differentiate these two cases: in case of destruction
- * of the underlying interface, we do not cleanup our multicast
- * memberships, since they are already freed. But we purge pointers
- * to multicast structures, since they are no longer valid, to
- * avoid panic in future calls to carpdetach(). Also, we do not
- * release the lock on return, because the function will be
- * called once more, for another CARP instance on the same
- * interface.
- */
-static void
-carpdetach(struct carp_softc *sc, int unlock)
-{
- struct carp_if *cif;
-
- callout_stop(&sc->sc_ad_tmo);
- callout_stop(&sc->sc_md_tmo);
- callout_stop(&sc->sc_md6_tmo);
-
- if (sc->sc_suppress)
- carp_suppress_preempt--;
- sc->sc_suppress = 0;
-
- if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
- carp_suppress_preempt--;
- sc->sc_sendad_errors = 0;
-
- carp_set_state(sc, INIT);
- SC2IFP(sc)->if_flags &= ~IFF_UP;
- carp_setrun(sc, 0);
-#ifdef INET
- carp_multicast_cleanup(sc, unlock);
-#endif
-#ifdef INET6
- carp_multicast6_cleanup(sc, unlock);
-#endif
-
- if (sc->sc_carpdev != NULL) {
- cif = (struct carp_if *)sc->sc_carpdev->if_carp;
- CARP_LOCK_ASSERT(cif);
- TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
- if (!--cif->vhif_nvrs) {
- ifpromisc(sc->sc_carpdev, 0);
- sc->sc_carpdev->if_carp = NULL;
- CARP_LOCK_DESTROY(cif);
- free(cif, M_CARP);
- } else if (unlock)
- CARP_UNLOCK(cif);
- sc->sc_carpdev = NULL;
- }
-}
-
-/* Detach an interface from the carp. */
-static void
-carp_ifdetach(void *arg __unused, struct ifnet *ifp)
-{
- struct carp_if *cif = (struct carp_if *)ifp->if_carp;
- struct carp_softc *sc, *nextsc;
-
- if (cif == NULL)
- return;
-
- /*
- * XXX: At the end of for() cycle the lock will be destroyed.
- */
- CARP_LOCK(cif);
- for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
- nextsc = TAILQ_NEXT(sc, sc_list);
- carpdetach(sc, 0);
- }
-}
-
-/*
* process input packet.
* we have rearranged checks order compared to the rfc,
* but it seems more efficient this way or not possible otherwise.
@@ -572,25 +442,15 @@
CARPSTATS_INC(carps_ipackets);
- if (!carp_opts[CARPCTL_ALLOW]) {
+ if (!V_carp_allow) {
m_freem(m);
return;
}
- /* check if received on a valid carp interface */
- if (m->m_pkthdr.rcvif->if_carp == NULL) {
- CARPSTATS_INC(carps_badif);
- CARP_DEBUG("carp_input: packet received on non-carp "
- "interface: %s\n",
- m->m_pkthdr.rcvif->if_xname);
- m_freem(m);
- return;
- }
-
/* verify that the IP TTL is 255. */
if (ip->ip_ttl != CARP_DFLTTL) {
CARPSTATS_INC(carps_badttl);
- CARP_DEBUG("carp_input: received ttl %d != 255 on %s\n",
+ CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
ip->ip_ttl,
m->m_pkthdr.rcvif->if_xname);
m_freem(m);
@@ -601,9 +461,8 @@
if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
CARPSTATS_INC(carps_badlen);
- CARP_DEBUG("carp_input: received len %zd < "
- "sizeof(struct carp_header) on %s\n",
- m->m_len - sizeof(struct ip),
+ CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) "
+ "on %s\n", __func__, m->m_len - sizeof(struct ip),
m->m_pkthdr.rcvif->if_xname);
m_freem(m);
return;
@@ -612,7 +471,7 @@
if (iplen + sizeof(*ch) < m->m_len) {
if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
CARPSTATS_INC(carps_hdrops);
- CARP_DEBUG("carp_input: pullup failed\n");
+ CARP_DEBUG("%s: pullup failed\n", __func__);
return;
}
ip = mtod(m, struct ip *);
@@ -626,7 +485,7 @@
len = iplen + sizeof(*ch);
if (len > m->m_pkthdr.len) {
CARPSTATS_INC(carps_badlen);
- CARP_DEBUG("carp_input: packet too short %d on %s\n",
+ CARP_DEBUG("%s: packet too short %d on %s\n", __func__,
m->m_pkthdr.len,
m->m_pkthdr.rcvif->if_xname);
m_freem(m);
@@ -642,9 +501,9 @@
/* verify the CARP checksum */
m->m_data += iplen;
- if (carp_cksum(m, len - iplen)) {
+ if (in_cksum(m, len - iplen)) {
CARPSTATS_INC(carps_badsum);
- CARP_DEBUG("carp_input: checksum failed on %s\n",
+ CARP_DEBUG("%s: checksum failed on %s\n", __func__,
m->m_pkthdr.rcvif->if_xname);
m_freem(m);
return;
@@ -666,7 +525,7 @@
CARPSTATS_INC(carps_ipackets6);
- if (!carp_opts[CARPCTL_ALLOW]) {
+ if (!V_carp_allow) {
m_freem(m);
return (IPPROTO_DONE);
}
@@ -674,9 +533,8 @@
/* check if received on a valid carp interface */
if (m->m_pkthdr.rcvif->if_carp == NULL) {
CARPSTATS_INC(carps_badif);
- CARP_DEBUG("carp6_input: packet received on non-carp "
- "interface: %s\n",
- m->m_pkthdr.rcvif->if_xname);
+ CARP_DEBUG("%s: packet received on non-carp interface: %s\n",
+ __func__, m->m_pkthdr.rcvif->if_xname);
m_freem(m);
return (IPPROTO_DONE);
}
@@ -684,9 +542,8 @@
/* verify that the IP TTL is 255 */
if (ip6->ip6_hlim != CARP_DFLTTL) {
CARPSTATS_INC(carps_badttl);
- CARP_DEBUG("carp6_input: received ttl %d != 255 on %s\n",
- ip6->ip6_hlim,
- m->m_pkthdr.rcvif->if_xname);
+ CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
+ ip6->ip6_hlim, m->m_pkthdr.rcvif->if_xname);
m_freem(m);
return (IPPROTO_DONE);
}
@@ -696,7 +553,7 @@
IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
if (ch == NULL) {
CARPSTATS_INC(carps_badlen);
- CARP_DEBUG("carp6_input: packet size %u too small\n", len);
+ CARP_DEBUG("%s: packet size %u too small\n", __func__, len);
return (IPPROTO_DONE);
}
@@ -703,9 +560,9 @@
/* verify the CARP checksum */
m->m_data += *offp;
- if (carp_cksum(m, sizeof(*ch))) {
+ if (in_cksum(m, sizeof(*ch))) {
CARPSTATS_INC(carps_badsum);
- CARP_DEBUG("carp6_input: checksum failed, on %s\n",
+ CARP_DEBUG("%s: checksum failed, on %s\n", __func__,
m->m_pkthdr.rcvif->if_xname);
m_freem(m);
return (IPPROTO_DONE);
@@ -721,62 +578,46 @@
carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
{
struct ifnet *ifp = m->m_pkthdr.rcvif;
+ struct ifaddr *ifa;
struct carp_softc *sc;
- u_int64_t tmp_counter;
+ uint64_t tmp_counter;
struct timeval sc_tv, ch_tv;
/* verify that the VHID is valid on the receiving interface */
- CARP_LOCK(ifp->if_carp);
- TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list)
- if (sc->sc_vhid == ch->carp_vhid)
+ IF_ADDR_RLOCK(ifp);
+ IFNET_FOREACH_IFA(ifp, ifa)
+ if (ifa->ifa_addr->sa_family == af &&
+ ifa->ifa_carp->sc_vhid == ch->carp_vhid) {
+ ifa_ref(ifa);
break;
+ }
+ IF_ADDR_RUNLOCK(ifp);
- if (!sc || !((SC2IFP(sc)->if_flags & IFF_UP) &&
- (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) {
+ if (ifa == NULL) {
CARPSTATS_INC(carps_badvhid);
- CARP_UNLOCK(ifp->if_carp);
m_freem(m);
return;
}
- getmicrotime(&SC2IFP(sc)->if_lastchange);
- SC2IFP(sc)->if_ipackets++;
- SC2IFP(sc)->if_ibytes += m->m_pkthdr.len;
-
- if (bpf_peers_present(SC2IFP(sc)->if_bpf)) {
- uint32_t af1 = af;
-#ifdef INET
- struct ip *ip = mtod(m, struct ip *);
-
- /* BPF wants net byte order */
- if (af == AF_INET) {
- ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2));
- ip->ip_off = htons(ip->ip_off);
- }
-#endif
- bpf_mtap2(SC2IFP(sc)->if_bpf, &af1, sizeof(af1), m);
- }
-
/* verify the CARP version. */
if (ch->carp_version != CARP_VERSION) {
CARPSTATS_INC(carps_badver);
- SC2IFP(sc)->if_ierrors++;
- CARP_UNLOCK(ifp->if_carp);
- CARP_DEBUG("%s; invalid version %d\n",
- SC2IFP(sc)->if_xname,
+ CARP_DEBUG("%s: invalid version %d\n", ifp->if_xname,
ch->carp_version);
+ ifa_free(ifa);
m_freem(m);
return;
}
- /* verify the hash */
+ sc = ifa->ifa_carp;
+ CARP_LOCK(sc);
+ ifa_free(ifa);
+
if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
CARPSTATS_INC(carps_badauth);
- SC2IFP(sc)->if_ierrors++;
- CARP_UNLOCK(ifp->if_carp);
- CARP_DEBUG("%s: incorrect hash\n", SC2IFP(sc)->if_xname);
- m_freem(m);
- return;
+ CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__,
+ sc->sc_vhid, ifp->if_xname);
+ goto out;
}
tmp_counter = ntohl(ch->carp_counter[0]);
@@ -789,10 +630,7 @@
sc->sc_counter = tmp_counter;
sc_tv.tv_sec = sc->sc_advbase;
- if (carp_suppress_preempt && sc->sc_advskew < 240)
- sc_tv.tv_usec = 240 * 1000000 / 256;
- else
- sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
+ sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256;
ch_tv.tv_sec = ch->carp_advbase;
ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
@@ -807,12 +645,13 @@
if (timevalcmp(&sc_tv, &ch_tv, >) ||
timevalcmp(&sc_tv, &ch_tv, ==)) {
callout_stop(&sc->sc_ad_tmo);
- CARP_LOG("%s: MASTER -> BACKUP "
- "(more frequent advertisement received)\n",
- SC2IFP(sc)->if_xname);
+ CARP_LOG("VHID %u@%s: MASTER -> BACKUP "
+ "(more frequent advertisement received)\n",
+ sc->sc_vhid,
+ sc->sc_carpdev->if_xname);
carp_set_state(sc, BACKUP);
carp_setrun(sc, 0);
- carp_setroute(sc, RTM_DELETE);
+ carp_delroute(sc);
}
break;
case BACKUP:
@@ -820,11 +659,11 @@
* If we're pre-empting masters who advertise slower than us,
* and this one claims to be slower, treat him as down.
*/
- if (carp_opts[CARPCTL_PREEMPT] &&
- timevalcmp(&sc_tv, &ch_tv, <)) {
- CARP_LOG("%s: BACKUP -> MASTER "
+ if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) {
+ CARP_LOG("VHID %u@%s: BACKUP -> MASTER "
"(preempting a slower master)\n",
- SC2IFP(sc)->if_xname);
+ sc->sc_vhid,
+ sc->sc_carpdev->if_xname);
carp_master_down_locked(sc);
break;
}
@@ -836,9 +675,10 @@
*/
sc_tv.tv_sec = sc->sc_advbase * 3;
if (timevalcmp(&sc_tv, &ch_tv, <)) {
- CARP_LOG("%s: BACKUP -> MASTER "
+ CARP_LOG("VHID %u@%s: BACKUP -> MASTER "
"(master timed out)\n",
- SC2IFP(sc)->if_xname);
+ sc->sc_vhid,
+ sc->sc_carpdev->if_xname);
carp_master_down_locked(sc);
break;
}
@@ -851,10 +691,9 @@
break;
}
- CARP_UNLOCK(ifp->if_carp);
-
+out:
+ CARP_UNLOCK(sc);
m_freem(m);
- return;
}
static int
@@ -861,7 +700,6 @@
carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
{
struct m_tag *mtag;
- struct ifnet *ifp = SC2IFP(sc);
if (sc->sc_init_counter) {
/* this could also be seconds since unix epoch */
@@ -877,95 +715,117 @@
carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
/* Tag packet for carp_output */
- mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT);
- if (mtag == NULL) {
+ if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *),
+ M_NOWAIT)) == NULL) {
m_freem(m);
- SC2IFP(sc)->if_oerrors++;
+ CARPSTATS_INC(carps_onomem);
return (ENOMEM);
}
- bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *));
+ bcopy(&sc, mtag + 1, sizeof(sc));
m_tag_prepend(m, mtag);
return (0);
}
+/*
+ * To avoid LORs and possible recursions this function shouldn't
+ * be called directly, but scheduled via taskqueue.
+ */
static void
-carp_send_ad_all(void)
+carp_send_ad_all(void *ctx __unused, int pending __unused)
{
struct carp_softc *sc;
mtx_lock(&carp_mtx);
- LIST_FOREACH(sc, &carpif_list, sc_next) {
- if (sc->sc_carpdev == NULL)
- continue;
- CARP_SCLOCK(sc);
- if ((SC2IFP(sc)->if_flags & IFF_UP) &&
- (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING) &&
- sc->sc_state == MASTER)
+ LIST_FOREACH(sc, &carp_list, sc_next)
+ if (sc->sc_state == MASTER) {
+ CARP_LOCK(sc);
+ CURVNET_SET(sc->sc_carpdev->if_vnet);
carp_send_ad_locked(sc);
- CARP_SCUNLOCK(sc);
- }
+ CURVNET_RESTORE();
+ CARP_UNLOCK(sc);
+ }
mtx_unlock(&carp_mtx);
}
+/* Send a periodic advertisement, executed in callout context. */
static void
carp_send_ad(void *v)
{
struct carp_softc *sc = v;
- CARP_SCLOCK(sc);
+ CARP_LOCK_ASSERT(sc);
+ CURVNET_SET(sc->sc_carpdev->if_vnet);
carp_send_ad_locked(sc);
- CARP_SCUNLOCK(sc);
+ CURVNET_RESTORE();
+ CARP_UNLOCK(sc);
}
static void
+carp_send_ad_error(struct carp_softc *sc, int error)
+{
+
+ if (error) {
+ if (sc->sc_sendad_errors < INT_MAX)
+ sc->sc_sendad_errors++;
+ if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
+ static const char fmt[] = "send error %d on %s";
+ char msg[sizeof(fmt) + IFNAMSIZ];
+
+ sprintf(msg, fmt, error, sc->sc_carpdev->if_xname);
+ carp_demote_adj(V_carp_senderr_adj, msg);
+ }
+ sc->sc_sendad_success = 0;
+ } else {
+ if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS &&
+ ++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) {
+ static const char fmt[] = "send ok on %s";
+ char msg[sizeof(fmt) + IFNAMSIZ];
+
+ sprintf(msg, fmt, sc->sc_carpdev->if_xname);
+ carp_demote_adj(-V_carp_senderr_adj, msg);
+ sc->sc_sendad_errors = 0;
+ } else
+ sc->sc_sendad_errors = 0;
+ }
+}
+
+static void
carp_send_ad_locked(struct carp_softc *sc)
{
struct carp_header ch;
struct timeval tv;
+ struct sockaddr sa;
+ struct ifaddr *ifa;
struct carp_header *ch_ptr;
struct mbuf *m;
- int len, advbase, advskew;
+ int len, advskew;
- CARP_SCLOCK_ASSERT(sc);
+ CARP_LOCK_ASSERT(sc);
- /* bow out if we've lost our UPness or RUNNINGuiness */
- if (!((SC2IFP(sc)->if_flags & IFF_UP) &&
- (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) {
- advbase = 255;
- advskew = 255;
- } else {
- advbase = sc->sc_advbase;
- if (!carp_suppress_preempt || sc->sc_advskew > 240)
- advskew = sc->sc_advskew;
- else
- advskew = 240;
- tv.tv_sec = advbase;
- tv.tv_usec = advskew * 1000000 / 256;
- }
+ advskew = DEMOTE_ADVSKEW(sc);
+ tv.tv_sec = sc->sc_advbase;
+ tv.tv_usec = advskew * 1000000 / 256;
ch.carp_version = CARP_VERSION;
ch.carp_type = CARP_ADVERTISEMENT;
ch.carp_vhid = sc->sc_vhid;
- ch.carp_advbase = advbase;
+ ch.carp_advbase = sc->sc_advbase;
ch.carp_advskew = advskew;
ch.carp_authlen = 7; /* XXX DEFINE */
ch.carp_pad1 = 0; /* must be zero */
ch.carp_cksum = 0;
+ /* XXXGL: OpenBSD picks first ifaddr with needed family. */
+
#ifdef INET
- if (sc->sc_ia) {
+ if (sc->sc_naddrs) {
struct ip *ip;
- MGETHDR(m, M_DONTWAIT, MT_HEADER);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL) {
- SC2IFP(sc)->if_oerrors++;
CARPSTATS_INC(carps_onomem);
- /* XXX maybe less ? */
- if (advbase != 255 || advskew != 255)
- callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
- carp_send_ad, sc);
- return;
+ goto resched;
}
len = sizeof(*ip) + sizeof(ch);
m->m_pkthdr.len = len;
@@ -977,67 +837,47 @@
ip->ip_v = IPVERSION;
ip->ip_hl = sizeof(*ip) >> 2;
ip->ip_tos = IPTOS_LOWDELAY;
- ip->ip_len = len;
+ ip->ip_len = htons(len);
ip->ip_id = ip_newid();
- ip->ip_off = IP_DF;
+ ip->ip_off = htons(IP_DF);
ip->ip_ttl = CARP_DFLTTL;
ip->ip_p = IPPROTO_CARP;
ip->ip_sum = 0;
- ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr;
+
+ bzero(&sa, sizeof(sa));
+ sa.sa_family = AF_INET;
+ ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
+ if (ifa != NULL) {
+ ip->ip_src.s_addr =
+ ifatoia(ifa)->ia_addr.sin_addr.s_addr;
+ ifa_free(ifa);
+ } else
+ ip->ip_src.s_addr = 0;
ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
ch_ptr = (struct carp_header *)(&ip[1]);
bcopy(&ch, ch_ptr, sizeof(ch));
if (carp_prepare_ad(m, sc, ch_ptr))
- return;
+ goto resched;
m->m_data += sizeof(*ip);
- ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
+ ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip));
m->m_data -= sizeof(*ip);
- getmicrotime(&SC2IFP(sc)->if_lastchange);
- SC2IFP(sc)->if_opackets++;
- SC2IFP(sc)->if_obytes += len;
CARPSTATS_INC(carps_opackets);
- if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) {
- SC2IFP(sc)->if_oerrors++;
- if (sc->sc_sendad_errors < INT_MAX)
- sc->sc_sendad_errors++;
- if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
- carp_suppress_preempt++;
- if (carp_suppress_preempt == 1) {
- CARP_SCUNLOCK(sc);
- carp_send_ad_all();
- CARP_SCLOCK(sc);
- }
- }
- sc->sc_sendad_success = 0;
- } else {
- if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
- if (++sc->sc_sendad_success >=
- CARP_SENDAD_MIN_SUCCESS) {
- carp_suppress_preempt--;
- sc->sc_sendad_errors = 0;
- }
- } else
- sc->sc_sendad_errors = 0;
- }
+ carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT,
+ &sc->sc_carpdev->if_carp->cif_imo, NULL));
}
#endif /* INET */
#ifdef INET6
- if (sc->sc_ia6) {
+ if (sc->sc_naddrs6) {
struct ip6_hdr *ip6;
- MGETHDR(m, M_DONTWAIT, MT_HEADER);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL) {
- SC2IFP(sc)->if_oerrors++;
CARPSTATS_INC(carps_onomem);
- /* XXX maybe less ? */
- if (advbase != 255 || advskew != 255)
- callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
- carp_send_ad, sc);
- return;
+ goto resched;
}
len = sizeof(*ip6) + sizeof(ch);
m->m_pkthdr.len = len;
@@ -1050,66 +890,119 @@
ip6->ip6_vfc |= IPV6_VERSION;
ip6->ip6_hlim = CARP_DFLTTL;
ip6->ip6_nxt = IPPROTO_CARP;
- bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
- sizeof(struct in6_addr));
- /* set the multicast destination */
+ bzero(&sa, sizeof(sa));
+ /* set the source address */
+ sa.sa_family = AF_INET6;
+ ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
+ if (ifa != NULL) {
+ bcopy(IFA_IN6(ifa), &ip6->ip6_src,
+ sizeof(struct in6_addr));
+ ifa_free(ifa);
+ } else
+ /* This should never happen with IPv6. */
+ bzero(&ip6->ip6_src, sizeof(struct in6_addr));
+
+ /* Set the multicast destination. */
ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
ip6->ip6_dst.s6_addr8[15] = 0x12;
if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
- SC2IFP(sc)->if_oerrors++;
m_freem(m);
CARP_DEBUG("%s: in6_setscope failed\n", __func__);
- return;
+ goto resched;
}
ch_ptr = (struct carp_header *)(&ip6[1]);
bcopy(&ch, ch_ptr, sizeof(ch));
if (carp_prepare_ad(m, sc, ch_ptr))
- return;
+ goto resched;
m->m_data += sizeof(*ip6);
- ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6));
+ ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6));
m->m_data -= sizeof(*ip6);
- getmicrotime(&SC2IFP(sc)->if_lastchange);
- SC2IFP(sc)->if_opackets++;
- SC2IFP(sc)->if_obytes += len;
CARPSTATS_INC(carps_opackets6);
- if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) {
- SC2IFP(sc)->if_oerrors++;
- if (sc->sc_sendad_errors < INT_MAX)
- sc->sc_sendad_errors++;
- if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
- carp_suppress_preempt++;
- if (carp_suppress_preempt == 1) {
- CARP_SCUNLOCK(sc);
- carp_send_ad_all();
- CARP_SCLOCK(sc);
- }
- }
- sc->sc_sendad_success = 0;
- } else {
- if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
- if (++sc->sc_sendad_success >=
- CARP_SENDAD_MIN_SUCCESS) {
- carp_suppress_preempt--;
- sc->sc_sendad_errors = 0;
- }
- } else
- sc->sc_sendad_errors = 0;
- }
+ carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0,
+ &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL));
}
#endif /* INET6 */
- if (advbase != 255 || advskew != 255)
- callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
- carp_send_ad, sc);
+resched:
+ callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc);
+}
+static void
+carp_addroute(struct carp_softc *sc)
+{
+ struct ifaddr *ifa;
+
+ CARP_FOREACH_IFA(sc, ifa)
+ carp_ifa_addroute(ifa);
}
+static void
+carp_ifa_addroute(struct ifaddr *ifa)
+{
+
+ switch (ifa->ifa_addr->sa_family) {
#ifdef INET
+ case AF_INET:
+ in_addprefix(ifatoia(ifa), RTF_UP);
+ ifa_add_loopback_route(ifa,
+ (struct sockaddr *)&ifatoia(ifa)->ia_addr);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ ifa_add_loopback_route(ifa,
+ (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
+ in6_ifaddloop(ifa);
+ break;
+#endif
+ }
+}
+
+static void
+carp_delroute(struct carp_softc *sc)
+{
+ struct ifaddr *ifa;
+
+ CARP_FOREACH_IFA(sc, ifa)
+ carp_ifa_delroute(ifa);
+}
+
+static void
+carp_ifa_delroute(struct ifaddr *ifa)
+{
+
+ switch (ifa->ifa_addr->sa_family) {
+#ifdef INET
+ case AF_INET:
+ ifa_del_loopback_route(ifa,
+ (struct sockaddr *)&ifatoia(ifa)->ia_addr);
+ in_scrubprefix(ifatoia(ifa), LLE_STATIC);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ ifa_del_loopback_route(ifa,
+ (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
+ in6_ifremloop(ifa);
+ break;
+#endif
+ }
+}
+
+int
+carp_master(struct ifaddr *ifa)
+{
+ struct carp_softc *sc = ifa->ifa_carp;
+
+ return (sc->sc_state == MASTER);
+}
+
+#ifdef INET
/*
* Broadcast a gratuitous ARP request containing
* the virtual router MAC address for each IP address
@@ -1120,16 +1013,22 @@
{
struct ifaddr *ifa;
- TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
+ CARP_FOREACH_IFA(sc, ifa)
+ if (ifa->ifa_addr->sa_family == AF_INET)
+ arp_ifinit2(sc->sc_carpdev, ifa, LLADDR(&sc->sc_addr));
+}
- if (ifa->ifa_addr->sa_family != AF_INET)
- continue;
+int
+carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr)
+{
+ struct carp_softc *sc = ifa->ifa_carp;
-/* arprequest(sc->sc_carpdev, &in, &in, IF_LLADDR(sc->sc_ifp)); */
- arp_ifinit2(sc->sc_carpdev, ifa, IF_LLADDR(sc->sc_ifp));
+ if (sc->sc_state == MASTER) {
+ *enaddr = LLADDR(&sc->sc_addr);
+ return (1);
+ }
- DELAY(1000); /* XXX */
- }
+ return (0);
}
#endif
@@ -1137,251 +1036,131 @@
static void
carp_send_na(struct carp_softc *sc)
{
+ static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
struct ifaddr *ifa;
struct in6_addr *in6;
- static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
- TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
-
+ CARP_FOREACH_IFA(sc, ifa) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
- in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
+ in6 = IFA_IN6(ifa);
nd6_na_output(sc->sc_carpdev, &mcast, in6,
ND_NA_FLAG_OVERRIDE, 1, NULL);
DELAY(1000); /* XXX */
}
}
-#endif /* INET6 */
-#ifdef INET
-static int
-carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type)
-{
- struct carp_softc *vh;
- struct ifaddr *ifa;
- int count = 0;
-
- CARP_LOCK_ASSERT(cif);
-
- TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
- if ((type == CARP_COUNT_RUNNING &&
- (SC2IFP(vh)->if_flags & IFF_UP) &&
- (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) ||
- (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) {
- IF_ADDR_RLOCK(SC2IFP(vh));
- TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist,
- ifa_list) {
- if (ifa->ifa_addr->sa_family == AF_INET &&
- ia->ia_addr.sin_addr.s_addr ==
- ifatoia(ifa)->ia_addr.sin_addr.s_addr)
- count++;
- }
- IF_ADDR_RUNLOCK(SC2IFP(vh));
- }
- }
- return (count);
-}
-
-int
-carp_iamatch(struct ifnet *ifp, struct in_ifaddr *ia,
- struct in_addr *isaddr, u_int8_t **enaddr)
-{
- struct carp_if *cif;
- struct carp_softc *vh;
- int index, count = 0;
- struct ifaddr *ifa;
-
- cif = ifp->if_carp;
- CARP_LOCK(cif);
-
- if (carp_opts[CARPCTL_ARPBALANCE]) {
- /*
- * XXX proof of concept implementation.
- * We use the source ip to decide which virtual host should
- * handle the request. If we're master of that virtual host,
- * then we respond, otherwise, just drop the arp packet on
- * the floor.
- */
- count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING);
- if (count == 0) {
- /* should never reach this */
- CARP_UNLOCK(cif);
- return (0);
- }
-
- /* this should be a hash, like pf_hash() */
- index = ntohl(isaddr->s_addr) % count;
- count = 0;
-
- TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
- if ((SC2IFP(vh)->if_flags & IFF_UP) &&
- (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) {
- IF_ADDR_RLOCK(SC2IFP(vh));
- TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist,
- ifa_list) {
- if (ifa->ifa_addr->sa_family ==
- AF_INET &&
- ia->ia_addr.sin_addr.s_addr ==
- ifatoia(ifa)->ia_addr.sin_addr.s_addr) {
- if (count == index) {
- if (vh->sc_state ==
- MASTER) {
- *enaddr = IF_LLADDR(vh->sc_ifp);
- IF_ADDR_RUNLOCK(SC2IFP(vh));
- CARP_UNLOCK(cif);
- return (1);
- } else {
- IF_ADDR_RUNLOCK(SC2IFP(vh));
- CARP_UNLOCK(cif);
- return (0);
- }
- }
- count++;
- }
- }
- IF_ADDR_RUNLOCK(SC2IFP(vh));
- }
- }
- } else {
- TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
- if ((SC2IFP(vh)->if_flags & IFF_UP) &&
- (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
- ia->ia_ifp == SC2IFP(vh) &&
- vh->sc_state == MASTER) {
- *enaddr = IF_LLADDR(vh->sc_ifp);
- CARP_UNLOCK(cif);
- return (1);
- }
- }
- }
- CARP_UNLOCK(cif);
- return (0);
-}
-#endif
-
-#ifdef INET6
+/*
+ * Returns ifa in case it's a carp address and it is MASTER, or if the address
+ * matches and is not a carp address. Returns NULL otherwise.
+ */
struct ifaddr *
carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr)
{
- struct carp_if *cif;
- struct carp_softc *vh;
struct ifaddr *ifa;
- cif = ifp->if_carp;
- CARP_LOCK(cif);
- TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
- IF_ADDR_RLOCK(SC2IFP(vh));
- TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, ifa_list) {
- if (IN6_ARE_ADDR_EQUAL(taddr,
- &ifatoia6(ifa)->ia_addr.sin6_addr) &&
- (SC2IFP(vh)->if_flags & IFF_UP) &&
- (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
- vh->sc_state == MASTER) {
- ifa_ref(ifa);
- IF_ADDR_RUNLOCK(SC2IFP(vh));
- CARP_UNLOCK(cif);
- return (ifa);
- }
- }
- IF_ADDR_RUNLOCK(SC2IFP(vh));
+ ifa = NULL;
+ IF_ADDR_RLOCK(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr->sa_family != AF_INET6)
+ continue;
+ if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa)))
+ continue;
+ if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER)
+ ifa = NULL;
+ else
+ ifa_ref(ifa);
+ break;
}
- CARP_UNLOCK(cif);
-
- return (NULL);
+ IF_ADDR_RUNLOCK(ifp);
+
+ return (ifa);
}
caddr_t
carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr)
{
- struct m_tag *mtag;
- struct carp_if *cif;
- struct carp_softc *sc;
struct ifaddr *ifa;
- cif = ifp->if_carp;
- CARP_LOCK(cif);
- TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
- IF_ADDR_RLOCK(SC2IFP(sc));
- TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
- if (IN6_ARE_ADDR_EQUAL(taddr,
- &ifatoia6(ifa)->ia_addr.sin6_addr) &&
- (SC2IFP(sc)->if_flags & IFF_UP) &&
- (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING)) {
- struct ifnet *ifp = SC2IFP(sc);
- mtag = m_tag_get(PACKET_TAG_CARP,
- sizeof(struct ifnet *), M_NOWAIT);
- if (mtag == NULL) {
- /* better a bit than nothing */
- IF_ADDR_RUNLOCK(SC2IFP(sc));
- CARP_UNLOCK(cif);
- return (IF_LLADDR(sc->sc_ifp));
- }
- bcopy(&ifp, (caddr_t)(mtag + 1),
- sizeof(struct ifnet *));
- m_tag_prepend(m, mtag);
+ IF_ADDR_RLOCK(ifp);
+ IFNET_FOREACH_IFA(ifp, ifa)
+ if (ifa->ifa_addr->sa_family == AF_INET6 &&
+ IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) {
+ struct carp_softc *sc = ifa->ifa_carp;
+ struct m_tag *mtag;
- IF_ADDR_RUNLOCK(SC2IFP(sc));
- CARP_UNLOCK(cif);
- return (IF_LLADDR(sc->sc_ifp));
- }
+ IF_ADDR_RUNLOCK(ifp);
+
+ mtag = m_tag_get(PACKET_TAG_CARP,
+ sizeof(struct carp_softc *), M_NOWAIT);
+ if (mtag == NULL)
+ /* Better a bit than nothing. */
+ return (LLADDR(&sc->sc_addr));
+
+ bcopy(&sc, mtag + 1, sizeof(sc));
+ m_tag_prepend(m, mtag);
+
+ return (LLADDR(&sc->sc_addr));
}
- IF_ADDR_RUNLOCK(SC2IFP(sc));
- }
- CARP_UNLOCK(cif);
+ IF_ADDR_RUNLOCK(ifp);
return (NULL);
}
-#endif
+#endif /* INET6 */
-struct ifnet *
+int
carp_forus(struct ifnet *ifp, u_char *dhost)
{
- struct carp_if *cif;
- struct carp_softc *vh;
- u_int8_t *ena = dhost;
+ struct carp_softc *sc;
+ uint8_t *ena = dhost;
if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
- return (NULL);
+ return (0);
- cif = ifp->if_carp;
- CARP_LOCK(cif);
- TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list)
- if ((SC2IFP(vh)->if_flags & IFF_UP) &&
- (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
- vh->sc_state == MASTER &&
- !bcmp(dhost, IF_LLADDR(vh->sc_ifp), ETHER_ADDR_LEN)) {
- CARP_UNLOCK(cif);
- return (SC2IFP(vh));
+ CIF_LOCK(ifp->if_carp);
+ IFNET_FOREACH_CARP(ifp, sc) {
+ CARP_LOCK(sc);
+ if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr),
+ ETHER_ADDR_LEN)) {
+ CARP_UNLOCK(sc);
+ CIF_UNLOCK(ifp->if_carp);
+ return (1);
}
+ CARP_UNLOCK(sc);
+ }
+ CIF_UNLOCK(ifp->if_carp);
- CARP_UNLOCK(cif);
- return (NULL);
+ return (0);
}
+/* Master down timeout event, executed in callout context. */
static void
carp_master_down(void *v)
{
struct carp_softc *sc = v;
- CARP_SCLOCK(sc);
- carp_master_down_locked(sc);
- CARP_SCUNLOCK(sc);
+ CARP_LOCK_ASSERT(sc);
+
+ CURVNET_SET(sc->sc_carpdev->if_vnet);
+ if (sc->sc_state == BACKUP) {
+ CARP_LOG("VHID %u@%s: BACKUP -> MASTER (master down)\n",
+ sc->sc_vhid,
+ sc->sc_carpdev->if_xname);
+ carp_master_down_locked(sc);
+ }
+ CURVNET_RESTORE();
+
+ CARP_UNLOCK(sc);
}
static void
carp_master_down_locked(struct carp_softc *sc)
{
- if (sc->sc_carpdev)
- CARP_SCLOCK_ASSERT(sc);
+ CARP_LOCK_ASSERT(sc);
+
switch (sc->sc_state) {
- case INIT:
- printf("%s: master_down event in INIT state\n",
- SC2IFP(sc)->if_xname);
- break;
- case MASTER:
- break;
case BACKUP:
carp_set_state(sc, MASTER);
carp_send_ad_locked(sc);
@@ -1390,10 +1169,19 @@
#endif
#ifdef INET6
carp_send_na(sc);
-#endif /* INET6 */
+#endif
carp_setrun(sc, 0);
- carp_setroute(sc, RTM_ADD);
+ carp_addroute(sc);
break;
+ case INIT:
+ case MASTER:
+#ifdef INVARIANTS
+ panic("carp: VHID %u@%s: master_down event in %s state\n",
+ sc->sc_vhid,
+ sc->sc_carpdev->if_xname,
+ sc->sc_state ? "MASTER" : "INIT");
+#endif
+ break;
}
}
@@ -1406,28 +1194,19 @@
{
struct timeval tv;
- if (sc->sc_carpdev == NULL) {
- SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
- carp_set_state(sc, INIT);
- return;
- } else
- CARP_SCLOCK_ASSERT(sc);
+ CARP_LOCK_ASSERT(sc);
- if (SC2IFP(sc)->if_flags & IFF_UP &&
- sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6) &&
- sc->sc_carpdev->if_link_state == LINK_STATE_UP)
- SC2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING;
- else {
- SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
- carp_setroute(sc, RTM_DELETE);
+ if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 ||
+ sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
+ (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0))
return;
- }
switch (sc->sc_state) {
case INIT:
- CARP_LOG("%s: INIT -> BACKUP\n", SC2IFP(sc)->if_xname);
+ CARP_LOG("VHID %u@%s: INIT -> BACKUP\n",
+ sc->sc_vhid,
+ sc->sc_carpdev->if_xname);
carp_set_state(sc, BACKUP);
- carp_setroute(sc, RTM_DELETE);
carp_setrun(sc, 0);
break;
case BACKUP:
@@ -1440,20 +1219,24 @@
callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
carp_master_down, sc);
break;
-#endif /* INET */
+#endif
#ifdef INET6
case AF_INET6:
callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
carp_master_down, sc);
break;
-#endif /* INET6 */
+#endif
default:
+#ifdef INET
if (sc->sc_naddrs)
callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
carp_master_down, sc);
+#endif
+#ifdef INET6
if (sc->sc_naddrs6)
callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
carp_master_down, sc);
+#endif
break;
}
break;
@@ -1466,546 +1249,476 @@
}
}
-#ifdef INET
-static void
-carp_multicast_cleanup(struct carp_softc *sc, int dofree)
+/*
+ * Setup multicast structures.
+ */
+static int
+carp_multicast_setup(struct carp_if *cif, sa_family_t sa)
{
- struct ip_moptions *imo = &sc->sc_imo;
- u_int16_t n = imo->imo_num_memberships;
+ struct ifnet *ifp = cif->cif_ifp;
+ int error = 0;
- /* Clean up our own multicast memberships */
- while (n-- > 0) {
- if (imo->imo_membership[n] != NULL) {
- if (dofree)
- in_delmulti(imo->imo_membership[n]);
- imo->imo_membership[n] = NULL;
+ CIF_LOCK_ASSERT(cif);
+
+ switch (sa) {
+#ifdef INET
+ case AF_INET:
+ {
+ struct ip_moptions *imo = &cif->cif_imo;
+ struct in_addr addr;
+
+ if (imo->imo_membership)
+ return (0);
+
+ imo->imo_membership = (struct in_multi **)malloc(
+ (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP,
+ M_NOWAIT);
+ if (imo->imo_membership == NULL)
+ return (ENOMEM);
+ imo->imo_mfilters = NULL;
+ imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
+ imo->imo_multicast_vif = -1;
+
+ addr.s_addr = htonl(INADDR_CARP_GROUP);
+ if ((error = in_joingroup(ifp, &addr, NULL,
+ &imo->imo_membership[0])) != 0) {
+ free(imo->imo_membership, M_CARP);
+ break;
}
+ imo->imo_num_memberships++;
+ imo->imo_multicast_ifp = ifp;
+ imo->imo_multicast_ttl = CARP_DFLTTL;
+ imo->imo_multicast_loop = 0;
+ break;
+ }
+#endif
+#ifdef INET6
+ case AF_INET6:
+ {
+ struct ip6_moptions *im6o = &cif->cif_im6o;
+ struct in6_addr in6;
+ struct in6_multi *in6m;
+
+ if (im6o->im6o_membership)
+ return (0);
+
+ im6o->im6o_membership = (struct in6_multi **)malloc(
+ (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP,
+ M_ZERO | M_NOWAIT);
+ if (im6o->im6o_membership == NULL)
+ return (ENOMEM);
+ im6o->im6o_mfilters = NULL;
+ im6o->im6o_max_memberships = IPV6_MIN_MEMBERSHIPS;
+ im6o->im6o_multicast_hlim = CARP_DFLTTL;
+ im6o->im6o_multicast_ifp = ifp;
+
+ /* Join IPv6 CARP multicast group. */
+ bzero(&in6, sizeof(in6));
+ in6.s6_addr16[0] = htons(0xff02);
+ in6.s6_addr8[15] = 0x12;
+ if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
+ free(im6o->im6o_membership, M_CARP);
+ break;
+ }
+ in6m = NULL;
+ if ((error = in6_mc_join(ifp, &in6, NULL, &in6m, 0)) != 0) {
+ free(im6o->im6o_membership, M_CARP);
+ break;
+ }
+ im6o->im6o_membership[0] = in6m;
+ im6o->im6o_num_memberships++;
+
+ /* Join solicited multicast address. */
+ bzero(&in6, sizeof(in6));
+ in6.s6_addr16[0] = htons(0xff02);
+ in6.s6_addr32[1] = 0;
+ in6.s6_addr32[2] = htonl(1);
+ in6.s6_addr32[3] = 0;
+ in6.s6_addr8[12] = 0xff;
+ if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
+ in6_mc_leave(im6o->im6o_membership[0], NULL);
+ free(im6o->im6o_membership, M_CARP);
+ break;
+ }
+ in6m = NULL;
+ if ((error = in6_mc_join(ifp, &in6, NULL, &in6m, 0)) != 0) {
+ in6_mc_leave(im6o->im6o_membership[0], NULL);
+ free(im6o->im6o_membership, M_CARP);
+ break;
+ }
+ im6o->im6o_membership[1] = in6m;
+ im6o->im6o_num_memberships++;
+ break;
+ }
+#endif
}
- KASSERT(imo->imo_mfilters == NULL,
- ("%s: imo_mfilters != NULL", __func__));
- imo->imo_num_memberships = 0;
- imo->imo_multicast_ifp = NULL;
+
+ return (error);
}
-#endif
-#ifdef INET6
+/*
+ * Free multicast structures.
+ */
static void
-carp_multicast6_cleanup(struct carp_softc *sc, int dofree)
+carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa)
{
- struct ip6_moptions *im6o = &sc->sc_im6o;
- u_int16_t n = im6o->im6o_num_memberships;
- while (n-- > 0) {
- if (im6o->im6o_membership[n] != NULL) {
- if (dofree)
- in6_mc_leave(im6o->im6o_membership[n], NULL);
- im6o->im6o_membership[n] = NULL;
+ CIF_LOCK_ASSERT(cif);
+ switch (sa) {
+#ifdef INET
+ case AF_INET:
+ if (cif->cif_naddrs == 0) {
+ struct ip_moptions *imo = &cif->cif_imo;
+
+ in_leavegroup(imo->imo_membership[0], NULL);
+ KASSERT(imo->imo_mfilters == NULL,
+ ("%s: imo_mfilters != NULL", __func__));
+ free(imo->imo_membership, M_CARP);
+ imo->imo_membership = NULL;
+
}
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ if (cif->cif_naddrs6 == 0) {
+ struct ip6_moptions *im6o = &cif->cif_im6o;
+
+ in6_mc_leave(im6o->im6o_membership[0], NULL);
+ in6_mc_leave(im6o->im6o_membership[1], NULL);
+ KASSERT(im6o->im6o_mfilters == NULL,
+ ("%s: im6o_mfilters != NULL", __func__));
+ free(im6o->im6o_membership, M_CARP);
+ im6o->im6o_membership = NULL;
+ }
+ break;
+#endif
}
- KASSERT(im6o->im6o_mfilters == NULL,
- ("%s: im6o_mfilters != NULL", __func__));
- im6o->im6o_num_memberships = 0;
- im6o->im6o_multicast_ifp = NULL;
}
-#endif
-#ifdef INET
-static int
-carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
+int
+carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa)
{
- struct ifnet *ifp;
- struct carp_if *cif;
- struct in_ifaddr *ia, *ia_if;
- struct ip_moptions *imo = &sc->sc_imo;
- struct in_addr addr;
- u_long iaddr = htonl(sin->sin_addr.s_addr);
- int own, error;
+ struct m_tag *mtag;
+ struct carp_softc *sc;
- if (sin->sin_addr.s_addr == 0) {
- if (!(SC2IFP(sc)->if_flags & IFF_UP))
- carp_set_state(sc, INIT);
- if (sc->sc_naddrs)
- SC2IFP(sc)->if_flags |= IFF_UP;
- if (sc->sc_carpdev)
- CARP_SCLOCK(sc);
- carp_setrun(sc, 0);
- if (sc->sc_carpdev)
- CARP_SCUNLOCK(sc);
+ if (!sa)
return (0);
- }
- /* we have to do it by hands to check we won't match on us */
- ia_if = NULL; own = 0;
- IN_IFADDR_RLOCK();
- TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
- /* and, yeah, we need a multicast-capable iface too */
- if (ia->ia_ifp != SC2IFP(sc) &&
- (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
- (iaddr & ia->ia_subnetmask) == ia->ia_subnet) {
- if (!ia_if)
- ia_if = ia;
- if (sin->sin_addr.s_addr ==
- ia->ia_addr.sin_addr.s_addr)
- own++;
- }
+ switch (sa->sa_family) {
+#ifdef INET
+ case AF_INET:
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ break;
+#endif
+ default:
+ return (0);
}
- if (!ia_if) {
- IN_IFADDR_RUNLOCK();
- return (EADDRNOTAVAIL);
- }
+ mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
+ if (mtag == NULL)
+ return (0);
- ia = ia_if;
- ifa_ref(&ia->ia_ifa);
- IN_IFADDR_RUNLOCK();
+ bcopy(mtag + 1, &sc, sizeof(sc));
- ifp = ia->ia_ifp;
+ /* Set the source MAC address to the Virtual Router MAC Address. */
+ switch (ifp->if_type) {
+ case IFT_ETHER:
+ case IFT_BRIDGE:
+ case IFT_L2VLAN: {
+ struct ether_header *eh;
- if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
- (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp)) {
- ifa_free(&ia->ia_ifa);
- return (EADDRNOTAVAIL);
- }
+ eh = mtod(m, struct ether_header *);
+ eh->ether_shost[0] = 0;
+ eh->ether_shost[1] = 0;
+ eh->ether_shost[2] = 0x5e;
+ eh->ether_shost[3] = 0;
+ eh->ether_shost[4] = 1;
+ eh->ether_shost[5] = sc->sc_vhid;
+ }
+ break;
+ case IFT_FDDI: {
+ struct fddi_header *fh;
- if (imo->imo_num_memberships == 0) {
- addr.s_addr = htonl(INADDR_CARP_GROUP);
- if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) ==
- NULL) {
- ifa_free(&ia->ia_ifa);
- return (ENOBUFS);
+ fh = mtod(m, struct fddi_header *);
+ fh->fddi_shost[0] = 0;
+ fh->fddi_shost[1] = 0;
+ fh->fddi_shost[2] = 0x5e;
+ fh->fddi_shost[3] = 0;
+ fh->fddi_shost[4] = 1;
+ fh->fddi_shost[5] = sc->sc_vhid;
}
- imo->imo_num_memberships++;
- imo->imo_multicast_ifp = ifp;
- imo->imo_multicast_ttl = CARP_DFLTTL;
- imo->imo_multicast_loop = 0;
+ break;
+ case IFT_ISO88025: {
+ struct iso88025_header *th;
+ th = mtod(m, struct iso88025_header *);
+ th->iso88025_shost[0] = 3;
+ th->iso88025_shost[1] = 0;
+ th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1);
+ th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1);
+ th->iso88025_shost[4] = 0;
+ th->iso88025_shost[5] = 0;
+ }
+ break;
+ default:
+ printf("%s: carp is not supported for the %d interface type\n",
+ ifp->if_xname, ifp->if_type);
+ return (EOPNOTSUPP);
}
- if (!ifp->if_carp) {
+ return (0);
+}
- cif = malloc(sizeof(*cif), M_CARP,
- M_WAITOK|M_ZERO);
- if (!cif) {
- error = ENOBUFS;
- goto cleanup;
- }
- if ((error = ifpromisc(ifp, 1))) {
- free(cif, M_CARP);
- goto cleanup;
- }
-
- CARP_LOCK_INIT(cif);
- CARP_LOCK(cif);
- cif->vhif_ifp = ifp;
- TAILQ_INIT(&cif->vhif_vrs);
- ifp->if_carp = cif;
+static struct carp_softc*
+carp_alloc(struct ifnet *ifp)
+{
+ struct carp_softc *sc;
+ struct carp_if *cif;
- } else {
- struct carp_softc *vr;
-
- cif = (struct carp_if *)ifp->if_carp;
- CARP_LOCK(cif);
- TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
- if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
- CARP_UNLOCK(cif);
- error = EEXIST;
- goto cleanup;
- }
+ if ((cif = ifp->if_carp) == NULL) {
+ cif = carp_alloc_if(ifp);
+ if (cif == NULL)
+ return (NULL);
}
- sc->sc_ia = ia;
- sc->sc_carpdev = ifp;
- { /* XXX prevent endless loop if already in queue */
- struct carp_softc *vr, *after = NULL;
- int myself = 0;
- cif = (struct carp_if *)ifp->if_carp;
+ sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
- /* XXX: cif should not change, right? So we still hold the lock */
- CARP_LOCK_ASSERT(cif);
+ sc->sc_advbase = CARP_DFLTINTV;
+ sc->sc_vhid = -1; /* required setting */
+ sc->sc_init_counter = 1;
+ sc->sc_state = INIT;
- TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
- if (vr == sc)
- myself = 1;
- if (vr->sc_vhid < sc->sc_vhid)
- after = vr;
- }
+ sc->sc_ifasiz = sizeof(struct ifaddr *);
+ sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO);
+ sc->sc_carpdev = ifp;
- if (!myself) {
- /* We're trying to keep things in order */
- if (after == NULL) {
- TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
- } else {
- TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
- }
- cif->vhif_nvrs++;
- }
- }
+ CARP_LOCK_INIT(sc);
+#ifdef INET
+ callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
+#endif
+#ifdef INET6
+ callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
+#endif
+ callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
- sc->sc_naddrs++;
- SC2IFP(sc)->if_flags |= IFF_UP;
- if (own)
- sc->sc_advskew = 0;
- carp_sc_state_locked(sc);
- carp_setrun(sc, 0);
+ CIF_LOCK(cif);
+ TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list);
+ CIF_UNLOCK(cif);
- CARP_UNLOCK(cif);
- ifa_free(&ia->ia_ifa); /* XXXRW: should hold reference for softc. */
+ mtx_lock(&carp_mtx);
+ LIST_INSERT_HEAD(&carp_list, sc, sc_next);
+ mtx_unlock(&carp_mtx);
- return (0);
-
-cleanup:
- in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
- ifa_free(&ia->ia_ifa);
- return (error);
+ return (sc);
}
static int
-carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin)
+carp_grow_ifas(struct carp_softc *sc)
{
- int error = 0;
+ struct ifaddr **new;
- if (!--sc->sc_naddrs) {
- struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
- struct ip_moptions *imo = &sc->sc_imo;
+ CARP_LOCK_ASSERT(sc);
- CARP_LOCK(cif);
- callout_stop(&sc->sc_ad_tmo);
- SC2IFP(sc)->if_flags &= ~IFF_UP;
- SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
- sc->sc_vhid = -1;
- in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
- imo->imo_multicast_ifp = NULL;
- TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
- if (!--cif->vhif_nvrs) {
- sc->sc_carpdev->if_carp = NULL;
- CARP_LOCK_DESTROY(cif);
- free(cif, M_CARP);
- } else {
- CARP_UNLOCK(cif);
- }
- }
+ new = malloc(sc->sc_ifasiz * 2, M_CARP, M_NOWAIT|M_ZERO);
+ if (new == NULL)
+ return (ENOMEM);
+ bcopy(sc->sc_ifas, new, sc->sc_ifasiz);
+ free(sc->sc_ifas, M_CARP);
+ sc->sc_ifas = new;
+ sc->sc_ifasiz *= 2;
- return (error);
+ return (0);
}
-#endif
-#ifdef INET6
-static int
-carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
+static void
+carp_destroy(struct carp_softc *sc)
{
- struct ifnet *ifp;
- struct carp_if *cif;
- struct in6_ifaddr *ia, *ia_if;
- struct ip6_moptions *im6o = &sc->sc_im6o;
- struct in6_addr in6;
- int own, error;
+ struct ifnet *ifp = sc->sc_carpdev;
+ struct carp_if *cif = ifp->if_carp;
- error = 0;
+ CIF_LOCK_ASSERT(cif);
- if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
- if (!(SC2IFP(sc)->if_flags & IFF_UP))
- carp_set_state(sc, INIT);
- if (sc->sc_naddrs6)
- SC2IFP(sc)->if_flags |= IFF_UP;
- if (sc->sc_carpdev)
- CARP_SCLOCK(sc);
- carp_setrun(sc, 0);
- if (sc->sc_carpdev)
- CARP_SCUNLOCK(sc);
- return (0);
- }
+ TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list);
- /* we have to do it by hands to check we won't match on us */
- ia_if = NULL; own = 0;
- IN6_IFADDR_RLOCK();
- TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
- int i;
+ mtx_lock(&carp_mtx);
+ LIST_REMOVE(sc, sc_next);
+ mtx_unlock(&carp_mtx);
- for (i = 0; i < 4; i++) {
- if ((sin6->sin6_addr.s6_addr32[i] &
- ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
- (ia->ia_addr.sin6_addr.s6_addr32[i] &
- ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
- break;
- }
- /* and, yeah, we need a multicast-capable iface too */
- if (ia->ia_ifp != SC2IFP(sc) &&
- (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
- (i == 4)) {
- if (!ia_if)
- ia_if = ia;
- if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
- &ia->ia_addr.sin6_addr))
- own++;
- }
- }
+ CARP_LOCK(sc);
+ if (sc->sc_suppress)
+ carp_demote_adj(-V_carp_ifdown_adj, "vhid removed");
+ callout_drain(&sc->sc_ad_tmo);
+#ifdef INET
+ callout_drain(&sc->sc_md_tmo);
+#endif
+#ifdef INET6
+ callout_drain(&sc->sc_md6_tmo);
+#endif
+ CARP_LOCK_DESTROY(sc);
- if (!ia_if) {
- IN6_IFADDR_RUNLOCK();
- return (EADDRNOTAVAIL);
- }
- ia = ia_if;
- ifa_ref(&ia->ia_ifa);
- IN6_IFADDR_RUNLOCK();
- ifp = ia->ia_ifp;
+ free(sc->sc_ifas, M_CARP);
+ free(sc, M_CARP);
+}
- if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
- (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp)) {
- ifa_free(&ia->ia_ifa);
- return (EADDRNOTAVAIL);
- }
+static struct carp_if*
+carp_alloc_if(struct ifnet *ifp)
+{
+ struct carp_if *cif;
- if (!sc->sc_naddrs6) {
- struct in6_multi *in6m;
+ cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO);
- im6o->im6o_multicast_ifp = ifp;
+ if (ifpromisc(ifp, 1) != 0)
+ goto cleanup;
- /* join CARP multicast address */
- bzero(&in6, sizeof(in6));
- in6.s6_addr16[0] = htons(0xff02);
- in6.s6_addr8[15] = 0x12;
- if (in6_setscope(&in6, ifp, NULL) != 0)
- goto cleanup;
- in6m = NULL;
- error = in6_mc_join(ifp, &in6, NULL, &in6m, 0);
- if (error)
- goto cleanup;
- im6o->im6o_membership[0] = in6m;
- im6o->im6o_num_memberships++;
+ CIF_LOCK_INIT(cif);
+ cif->cif_ifp = ifp;
+ TAILQ_INIT(&cif->cif_vrs);
- /* join solicited multicast address */
- bzero(&in6, sizeof(in6));
- in6.s6_addr16[0] = htons(0xff02);
- in6.s6_addr32[1] = 0;
- in6.s6_addr32[2] = htonl(1);
- in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3];
- in6.s6_addr8[12] = 0xff;
- if (in6_setscope(&in6, ifp, NULL) != 0)
- goto cleanup;
- in6m = NULL;
- error = in6_mc_join(ifp, &in6, NULL, &in6m, 0);
- if (error)
- goto cleanup;
- im6o->im6o_membership[1] = in6m;
- im6o->im6o_num_memberships++;
- }
+ IF_ADDR_WLOCK(ifp);
+ ifp->if_carp = cif;
+ if_ref(ifp);
+ IF_ADDR_WUNLOCK(ifp);
- if (!ifp->if_carp) {
- cif = malloc(sizeof(*cif), M_CARP,
- M_WAITOK|M_ZERO);
- if (!cif) {
- error = ENOBUFS;
- goto cleanup;
- }
- if ((error = ifpromisc(ifp, 1))) {
- free(cif, M_CARP);
- goto cleanup;
- }
+ return (cif);
- CARP_LOCK_INIT(cif);
- CARP_LOCK(cif);
- cif->vhif_ifp = ifp;
- TAILQ_INIT(&cif->vhif_vrs);
- ifp->if_carp = cif;
+cleanup:
+ free(cif, M_CARP);
- } else {
- struct carp_softc *vr;
+ return (NULL);
+}
- cif = (struct carp_if *)ifp->if_carp;
- CARP_LOCK(cif);
- TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
- if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
- CARP_UNLOCK(cif);
- error = EINVAL;
- goto cleanup;
- }
- }
- sc->sc_ia6 = ia;
- sc->sc_carpdev = ifp;
+static void
+carp_free_if(struct carp_if *cif)
+{
+ struct ifnet *ifp = cif->cif_ifp;
- { /* XXX prevent endless loop if already in queue */
- struct carp_softc *vr, *after = NULL;
- int myself = 0;
- cif = (struct carp_if *)ifp->if_carp;
- CARP_LOCK_ASSERT(cif);
+ CIF_LOCK_ASSERT(cif);
+ KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty",
+ __func__));
- TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
- if (vr == sc)
- myself = 1;
- if (vr->sc_vhid < sc->sc_vhid)
- after = vr;
- }
+ IF_ADDR_WLOCK(ifp);
+ ifp->if_carp = NULL;
+ IF_ADDR_WUNLOCK(ifp);
- if (!myself) {
- /* We're trying to keep things in order */
- if (after == NULL) {
- TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
- } else {
- TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
- }
- cif->vhif_nvrs++;
- }
- }
+ CIF_LOCK_DESTROY(cif);
- sc->sc_naddrs6++;
- SC2IFP(sc)->if_flags |= IFF_UP;
- if (own)
- sc->sc_advskew = 0;
- carp_sc_state_locked(sc);
- carp_setrun(sc, 0);
+ ifpromisc(ifp, 0);
+ if_rele(ifp);
- CARP_UNLOCK(cif);
- ifa_free(&ia->ia_ifa); /* XXXRW: should hold reference for softc. */
-
- return (0);
-
-cleanup:
- if (!sc->sc_naddrs6)
- carp_multicast6_cleanup(sc, 1);
- ifa_free(&ia->ia_ifa);
- return (error);
+ free(cif, M_CARP);
}
-static int
-carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
+static void
+carp_carprcp(struct carpreq *carpr, struct carp_softc *sc, int priv)
{
- int error = 0;
- if (!--sc->sc_naddrs6) {
- struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
-
- CARP_LOCK(cif);
- callout_stop(&sc->sc_ad_tmo);
- SC2IFP(sc)->if_flags &= ~IFF_UP;
- SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
- sc->sc_vhid = -1;
- carp_multicast6_cleanup(sc, 1);
- TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
- if (!--cif->vhif_nvrs) {
- CARP_LOCK_DESTROY(cif);
- sc->sc_carpdev->if_carp = NULL;
- free(cif, M_CARP);
- } else
- CARP_UNLOCK(cif);
- }
-
- return (error);
+ CARP_LOCK(sc);
+ carpr->carpr_state = sc->sc_state;
+ carpr->carpr_vhid = sc->sc_vhid;
+ carpr->carpr_advbase = sc->sc_advbase;
+ carpr->carpr_advskew = sc->sc_advskew;
+ if (priv)
+ bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key));
+ else
+ bzero(carpr->carpr_key, sizeof(carpr->carpr_key));
+ CARP_UNLOCK(sc);
}
-#endif /* INET6 */
-static int
-carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
+int
+carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td)
{
- struct carp_softc *sc = ifp->if_softc, *vr;
struct carpreq carpr;
- struct ifaddr *ifa;
- struct ifreq *ifr;
- struct ifaliasreq *ifra;
- int locked = 0, error = 0;
+ struct ifnet *ifp;
+ struct carp_softc *sc = NULL;
+ int error = 0, locked = 0;
- ifa = (struct ifaddr *)addr;
- ifra = (struct ifaliasreq *)addr;
- ifr = (struct ifreq *)addr;
+ if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
+ return (error);
- switch (cmd) {
- case SIOCSIFADDR:
- switch (ifa->ifa_addr->sa_family) {
-#ifdef INET
- case AF_INET:
- SC2IFP(sc)->if_flags |= IFF_UP;
- bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
- sizeof(struct sockaddr));
- error = carp_set_addr(sc, satosin(ifa->ifa_addr));
- break;
-#endif /* INET */
-#ifdef INET6
- case AF_INET6:
- SC2IFP(sc)->if_flags |= IFF_UP;
- error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
- break;
-#endif /* INET6 */
- default:
- error = EAFNOSUPPORT;
- break;
- }
+ ifp = ifunit_ref(ifr->ifr_name);
+ if (ifp == NULL)
+ return (ENXIO);
+
+ switch (ifp->if_type) {
+ case IFT_ETHER:
+ case IFT_L2VLAN:
+ case IFT_BRIDGE:
+ case IFT_FDDI:
+ case IFT_ISO88025:
break;
+ default:
+ error = EOPNOTSUPP;
+ goto out;
+ }
- case SIOCAIFADDR:
- switch (ifa->ifa_addr->sa_family) {
-#ifdef INET
- case AF_INET:
- SC2IFP(sc)->if_flags |= IFF_UP;
- bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
- sizeof(struct sockaddr));
- error = carp_set_addr(sc, satosin(&ifra->ifra_addr));
+ if ((ifp->if_flags & IFF_MULTICAST) == 0) {
+ error = EADDRNOTAVAIL;
+ goto out;
+ }
+
+ switch (cmd) {
+ case SIOCSVH:
+ if ((error = priv_check(td, PRIV_NETINET_CARP)))
break;
-#endif /* INET */
-#ifdef INET6
- case AF_INET6:
- SC2IFP(sc)->if_flags |= IFF_UP;
- error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr));
+ if (carpr.carpr_vhid <= 0 || carpr.carpr_vhid > CARP_MAXVHID ||
+ carpr.carpr_advbase < 0 || carpr.carpr_advskew < 0) {
+ error = EINVAL;
break;
-#endif /* INET6 */
- default:
- error = EAFNOSUPPORT;
- break;
}
- break;
- case SIOCDIFADDR:
- switch (ifa->ifa_addr->sa_family) {
-#ifdef INET
- case AF_INET:
- error = carp_del_addr(sc, satosin(&ifra->ifra_addr));
- break;
-#endif /* INET */
-#ifdef INET6
- case AF_INET6:
- error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr));
- break;
-#endif /* INET6 */
- default:
- error = EAFNOSUPPORT;
- break;
+ if (ifp->if_carp) {
+ CIF_LOCK(ifp->if_carp);
+ IFNET_FOREACH_CARP(ifp, sc)
+ if (sc->sc_vhid == carpr.carpr_vhid)
+ break;
+ CIF_UNLOCK(ifp->if_carp);
}
- break;
+ if (sc == NULL) {
+ sc = carp_alloc(ifp);
+ if (sc == NULL) {
+ error = EINVAL; /* XXX: ifpromisc failed */
+ break;
+ }
- case SIOCSIFFLAGS:
- if (sc->sc_carpdev) {
- locked = 1;
- CARP_SCLOCK(sc);
+ CARP_LOCK(sc);
+ sc->sc_vhid = carpr.carpr_vhid;
+ LLADDR(&sc->sc_addr)[0] = 0;
+ LLADDR(&sc->sc_addr)[1] = 0;
+ LLADDR(&sc->sc_addr)[2] = 0x5e;
+ LLADDR(&sc->sc_addr)[3] = 0;
+ LLADDR(&sc->sc_addr)[4] = 1;
+ LLADDR(&sc->sc_addr)[5] = sc->sc_vhid;
+ } else
+ CARP_LOCK(sc);
+ locked = 1;
+ if (carpr.carpr_advbase > 0) {
+ if (carpr.carpr_advbase > 255 ||
+ carpr.carpr_advbase < CARP_DFLTINTV) {
+ error = EINVAL;
+ break;
+ }
+ sc->sc_advbase = carpr.carpr_advbase;
}
- if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) {
- callout_stop(&sc->sc_ad_tmo);
- callout_stop(&sc->sc_md_tmo);
- callout_stop(&sc->sc_md6_tmo);
- if (sc->sc_state == MASTER)
- carp_send_ad_locked(sc);
- carp_set_state(sc, INIT);
- carp_setrun(sc, 0);
- } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) {
- SC2IFP(sc)->if_flags |= IFF_UP;
- carp_setrun(sc, 0);
- }
- break;
-
- case SIOCSVH:
- error = priv_check(curthread, PRIV_NETINET_CARP);
- if (error)
+ if (carpr.carpr_advskew >= 255) {
+ error = EINVAL;
break;
- if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
- break;
- error = 1;
- if (sc->sc_carpdev) {
- locked = 1;
- CARP_SCLOCK(sc);
}
- if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) {
+ sc->sc_advskew = carpr.carpr_advskew;
+ if (carpr.carpr_key[0] != '\0') {
+ bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
+ carp_hmac_prepare(sc);
+ }
+ if (sc->sc_state != INIT &&
+ carpr.carpr_state != sc->sc_state) {
switch (carpr.carpr_state) {
case BACKUP:
callout_stop(&sc->sc_ad_tmo);
carp_set_state(sc, BACKUP);
carp_setrun(sc, 0);
- carp_setroute(sc, RTM_DELETE);
+ carp_delroute(sc);
break;
case MASTER:
carp_master_down_locked(sc);
@@ -2014,296 +1727,325 @@
break;
}
}
- if (carpr.carpr_vhid > 0) {
- if (carpr.carpr_vhid > 255) {
- error = EINVAL;
- break;
- }
- if (sc->sc_carpdev) {
- struct carp_if *cif;
- cif = (struct carp_if *)sc->sc_carpdev->if_carp;
- TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
- if (vr != sc &&
- vr->sc_vhid == carpr.carpr_vhid) {
- error = EEXIST;
- break;
- }
- if (error == EEXIST)
+ break;
+
+ case SIOCGVH:
+ {
+ int priveleged;
+
+ if (carpr.carpr_vhid < 0 || carpr.carpr_vhid > CARP_MAXVHID) {
+ error = EINVAL;
+ break;
+ }
+ if (carpr.carpr_count < 1) {
+ error = EMSGSIZE;
+ break;
+ }
+ if (ifp->if_carp == NULL) {
+ error = ENOENT;
+ break;
+ }
+
+ priveleged = (priv_check(td, PRIV_NETINET_CARP) == 0);
+ if (carpr.carpr_vhid != 0) {
+ CIF_LOCK(ifp->if_carp);
+ IFNET_FOREACH_CARP(ifp, sc)
+ if (sc->sc_vhid == carpr.carpr_vhid)
break;
- }
- sc->sc_vhid = carpr.carpr_vhid;
- IF_LLADDR(sc->sc_ifp)[0] = 0;
- IF_LLADDR(sc->sc_ifp)[1] = 0;
- IF_LLADDR(sc->sc_ifp)[2] = 0x5e;
- IF_LLADDR(sc->sc_ifp)[3] = 0;
- IF_LLADDR(sc->sc_ifp)[4] = 1;
- IF_LLADDR(sc->sc_ifp)[5] = sc->sc_vhid;
- error--;
- }
- if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
- if (carpr.carpr_advskew >= 255) {
- error = EINVAL;
+ CIF_UNLOCK(ifp->if_carp);
+ if (sc == NULL) {
+ error = ENOENT;
break;
}
- if (carpr.carpr_advbase > 255) {
- error = EINVAL;
+ carp_carprcp(&carpr, sc, priveleged);
+ error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
+ } else {
+ int i, count;
+
+ count = 0;
+ CIF_LOCK(ifp->if_carp);
+ IFNET_FOREACH_CARP(ifp, sc)
+ count++;
+
+ if (count > carpr.carpr_count) {
+ CIF_UNLOCK(ifp->if_carp);
+ error = EMSGSIZE;
break;
}
- sc->sc_advbase = carpr.carpr_advbase;
- sc->sc_advskew = carpr.carpr_advskew;
- error--;
+
+ i = 0;
+ IFNET_FOREACH_CARP(ifp, sc) {
+ carp_carprcp(&carpr, sc, priveleged);
+ carpr.carpr_count = count;
+ error = copyout(&carpr, ifr->ifr_data +
+ (i * sizeof(carpr)), sizeof(carpr));
+ if (error) {
+ CIF_UNLOCK(ifp->if_carp);
+ break;
+ }
+ i++;
+ }
+ CIF_UNLOCK(ifp->if_carp);
}
- bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
- if (error > 0)
- error = EINVAL;
- else {
- error = 0;
- carp_setrun(sc, 0);
- }
break;
-
- case SIOCGVH:
- /* XXX: lockless read */
- bzero(&carpr, sizeof(carpr));
- carpr.carpr_state = sc->sc_state;
- carpr.carpr_vhid = sc->sc_vhid;
- carpr.carpr_advbase = sc->sc_advbase;
- carpr.carpr_advskew = sc->sc_advskew;
- error = priv_check(curthread, PRIV_NETINET_CARP);
- if (error == 0)
- bcopy(sc->sc_key, carpr.carpr_key,
- sizeof(carpr.carpr_key));
- error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
- break;
-
+ }
default:
error = EINVAL;
}
+out:
if (locked)
- CARP_SCUNLOCK(sc);
+ CARP_UNLOCK(sc);
+ if_rele(ifp);
- carp_hmac_prepare(sc);
-
return (error);
}
-/*
- * XXX: this is looutput. We should eventually use it from there.
- */
static int
-carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
- struct route *ro)
+carp_get_vhid(struct ifaddr *ifa)
{
- u_int32_t af;
- struct rtentry *rt = NULL;
- M_ASSERTPKTHDR(m); /* check if we have the packet header */
+ if (ifa == NULL || ifa->ifa_carp == NULL)
+ return (0);
- if (ro != NULL)
- rt = ro->ro_rt;
- if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
- m_freem(m);
- return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
- rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
- }
-
- ifp->if_opackets++;
- ifp->if_obytes += m->m_pkthdr.len;
-
- /* BPF writes need to be handled specially. */
- if (dst->sa_family == AF_UNSPEC) {
- bcopy(dst->sa_data, &af, sizeof(af));
- dst->sa_family = af;
- }
-
-#if 1 /* XXX */
- switch (dst->sa_family) {
- case AF_INET:
- case AF_INET6:
- case AF_IPX:
- case AF_APPLETALK:
- break;
- default:
- printf("carp_looutput: af=%d unexpected\n", dst->sa_family);
- m_freem(m);
- return (EAFNOSUPPORT);
- }
-#endif
- return(if_simloop(ifp, m, dst->sa_family, 0));
+ return (ifa->ifa_carp->sc_vhid);
}
-/*
- * Start output on carp interface. This function should never be called.
- */
-static void
-carp_start(struct ifnet *ifp)
-{
-#ifdef DEBUG
- printf("%s: start called\n", ifp->if_xname);
-#endif
-}
-
int
-carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
- struct rtentry *rt)
+carp_attach(struct ifaddr *ifa, int vhid)
{
- struct m_tag *mtag;
+ struct ifnet *ifp = ifa->ifa_ifp;
+ struct carp_if *cif = ifp->if_carp;
struct carp_softc *sc;
- struct ifnet *carp_ifp;
+ int index, error;
- if (!sa)
- return (0);
+ if (ifp->if_carp == NULL)
+ return (ENOPROTOOPT);
- switch (sa->sa_family) {
+ switch (ifa->ifa_addr->sa_family) {
#ifdef INET
case AF_INET:
- break;
-#endif /* INET */
+#endif
#ifdef INET6
case AF_INET6:
+#endif
break;
-#endif /* INET6 */
default:
- return (0);
+ return (EPROTOTYPE);
}
- mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
- if (mtag == NULL)
- return (0);
+ CIF_LOCK(cif);
+ IFNET_FOREACH_CARP(ifp, sc)
+ if (sc->sc_vhid == vhid)
+ break;
+ if (sc == NULL) {
+ CIF_UNLOCK(cif);
+ return (ENOENT);
+ }
- bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *));
- sc = carp_ifp->if_softc;
+ if (ifa->ifa_carp) {
+ if (ifa->ifa_carp->sc_vhid != vhid)
+ carp_detach_locked(ifa);
+ else {
+ CIF_UNLOCK(cif);
+ return (0);
+ }
+ }
- /* Set the source MAC address to Virtual Router MAC Address */
- switch (ifp->if_type) {
- case IFT_ETHER:
- case IFT_L2VLAN: {
- struct ether_header *eh;
+ error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family);
+ if (error) {
+ CIF_FREE(cif);
+ return (error);
+ }
- eh = mtod(m, struct ether_header *);
- eh->ether_shost[0] = 0;
- eh->ether_shost[1] = 0;
- eh->ether_shost[2] = 0x5e;
- eh->ether_shost[3] = 0;
- eh->ether_shost[4] = 1;
- eh->ether_shost[5] = sc->sc_vhid;
+ CARP_LOCK(sc);
+ index = sc->sc_naddrs + sc->sc_naddrs6 + 1;
+ if (index > sc->sc_ifasiz / sizeof(struct ifaddr *))
+ if ((error = carp_grow_ifas(sc)) != 0) {
+ carp_multicast_cleanup(cif,
+ ifa->ifa_addr->sa_family);
+ CARP_UNLOCK(sc);
+ CIF_FREE(cif);
+ return (error);
}
- break;
- case IFT_FDDI: {
- struct fddi_header *fh;
- fh = mtod(m, struct fddi_header *);
- fh->fddi_shost[0] = 0;
- fh->fddi_shost[1] = 0;
- fh->fddi_shost[2] = 0x5e;
- fh->fddi_shost[3] = 0;
- fh->fddi_shost[4] = 1;
- fh->fddi_shost[5] = sc->sc_vhid;
- }
+ switch (ifa->ifa_addr->sa_family) {
+#ifdef INET
+ case AF_INET:
+ cif->cif_naddrs++;
+ sc->sc_naddrs++;
break;
- case IFT_ISO88025: {
- struct iso88025_header *th;
- th = mtod(m, struct iso88025_header *);
- th->iso88025_shost[0] = 3;
- th->iso88025_shost[1] = 0;
- th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1);
- th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1);
- th->iso88025_shost[4] = 0;
- th->iso88025_shost[5] = 0;
- }
+#endif
+#ifdef INET6
+ case AF_INET6:
+ cif->cif_naddrs6++;
+ sc->sc_naddrs6++;
break;
- default:
- printf("%s: carp is not supported for this interface type\n",
- ifp->if_xname);
- return (EOPNOTSUPP);
+#endif
}
+ ifa_ref(ifa);
+ sc->sc_ifas[index - 1] = ifa;
+ ifa->ifa_carp = sc;
+
+ carp_hmac_prepare(sc);
+ carp_sc_state(sc);
+
+ CARP_UNLOCK(sc);
+ CIF_UNLOCK(cif);
+
return (0);
}
+void
+carp_detach(struct ifaddr *ifa)
+{
+ struct ifnet *ifp = ifa->ifa_ifp;
+ struct carp_if *cif = ifp->if_carp;
+
+ CIF_LOCK(cif);
+ carp_detach_locked(ifa);
+ CIF_FREE(cif);
+}
+
static void
-carp_set_state(struct carp_softc *sc, int state)
+carp_detach_locked(struct ifaddr *ifa)
{
- int link_state;
+ struct ifnet *ifp = ifa->ifa_ifp;
+ struct carp_if *cif = ifp->if_carp;
+ struct carp_softc *sc = ifa->ifa_carp;
+ int i, index;
- if (sc->sc_carpdev)
- CARP_SCLOCK_ASSERT(sc);
+ KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa));
- if (sc->sc_state == state)
- return;
+ CIF_LOCK_ASSERT(cif);
+ CARP_LOCK(sc);
- sc->sc_state = state;
- switch (state) {
- case BACKUP:
- link_state = LINK_STATE_DOWN;
+ /* Shift array. */
+ index = sc->sc_naddrs + sc->sc_naddrs6;
+ for (i = 0; i < index; i++)
+ if (sc->sc_ifas[i] == ifa)
+ break;
+ KASSERT(i < index, ("%s: %p no backref", __func__, ifa));
+ for (; i < index - 1; i++)
+ sc->sc_ifas[i] = sc->sc_ifas[i+1];
+ sc->sc_ifas[index - 1] = NULL;
+
+ switch (ifa->ifa_addr->sa_family) {
+#ifdef INET
+ case AF_INET:
+ cif->cif_naddrs--;
+ sc->sc_naddrs--;
break;
- case MASTER:
- link_state = LINK_STATE_UP;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ cif->cif_naddrs6--;
+ sc->sc_naddrs6--;
break;
- default:
- link_state = LINK_STATE_UNKNOWN;
- break;
+#endif
}
- if_link_state_change(SC2IFP(sc), link_state);
+
+ carp_ifa_delroute(ifa);
+ carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family);
+
+ ifa->ifa_carp = NULL;
+ ifa_free(ifa);
+
+ carp_hmac_prepare(sc);
+ carp_sc_state(sc);
+
+ if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) {
+ CARP_UNLOCK(sc);
+ carp_destroy(sc);
+ } else
+ CARP_UNLOCK(sc);
}
-void
-carp_carpdev_state(struct ifnet *ifp)
+static void
+carp_set_state(struct carp_softc *sc, int state)
{
- struct carp_if *cif;
- cif = ifp->if_carp;
- CARP_LOCK(cif);
- carp_carpdev_state_locked(cif);
- CARP_UNLOCK(cif);
+ CARP_LOCK_ASSERT(sc);
+
+ if (sc->sc_state != state) {
+ const char *carp_states[] = { CARP_STATES };
+ char subsys[IFNAMSIZ+5];
+
+ sc->sc_state = state;
+
+ snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid,
+ sc->sc_carpdev->if_xname);
+ devctl_notify("CARP", subsys, carp_states[state], NULL);
+ }
}
static void
-carp_carpdev_state_locked(struct carp_if *cif)
+carp_linkstate(struct ifnet *ifp)
{
struct carp_softc *sc;
- TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
- carp_sc_state_locked(sc);
+ CIF_LOCK(ifp->if_carp);
+ IFNET_FOREACH_CARP(ifp, sc) {
+ CARP_LOCK(sc);
+ carp_sc_state(sc);
+ CARP_UNLOCK(sc);
+ }
+ CIF_UNLOCK(ifp->if_carp);
}
static void
-carp_sc_state_locked(struct carp_softc *sc)
+carp_sc_state(struct carp_softc *sc)
{
- CARP_SCLOCK_ASSERT(sc);
+ CARP_LOCK_ASSERT(sc);
+
if (sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
!(sc->sc_carpdev->if_flags & IFF_UP)) {
- sc->sc_flags_backup = SC2IFP(sc)->if_flags;
- SC2IFP(sc)->if_flags &= ~IFF_UP;
- SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
callout_stop(&sc->sc_ad_tmo);
+#ifdef INET
callout_stop(&sc->sc_md_tmo);
+#endif
+#ifdef INET6
callout_stop(&sc->sc_md6_tmo);
+#endif
carp_set_state(sc, INIT);
carp_setrun(sc, 0);
- if (!sc->sc_suppress) {
- carp_suppress_preempt++;
- if (carp_suppress_preempt == 1) {
- CARP_SCUNLOCK(sc);
- carp_send_ad_all();
- CARP_SCLOCK(sc);
- }
- }
+ if (!sc->sc_suppress)
+ carp_demote_adj(V_carp_ifdown_adj, "interface down");
sc->sc_suppress = 1;
} else {
- SC2IFP(sc)->if_flags |= sc->sc_flags_backup;
carp_set_state(sc, INIT);
carp_setrun(sc, 0);
if (sc->sc_suppress)
- carp_suppress_preempt--;
+ carp_demote_adj(-V_carp_ifdown_adj, "interface up");
sc->sc_suppress = 0;
}
+}
- return;
+static void
+carp_demote_adj(int adj, char *reason)
+{
+ atomic_add_int(&V_carp_demotion, adj);
+ CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason);
+ taskqueue_enqueue(taskqueue_swi, &carp_sendall_task);
}
+static int
+carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ int new, error;
+
+ new = V_carp_demotion;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error || !req->newptr)
+ return (error);
+
+ carp_demote_adj(new, "sysctl");
+
+ return (0);
+}
+
#ifdef INET
extern struct domain inetdomain;
static struct protosw in_carp_protosw = {
@@ -2336,10 +2078,6 @@
carp_mod_cleanup(void)
{
- if (if_detach_event_tag == NULL)
- return;
- EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag);
- if_clone_detach(&carp_cloner);
#ifdef INET
if (proto_reg[CARP_INET] == 0) {
(void)ipproto_unregister(IPPROTO_CARP);
@@ -2357,9 +2095,17 @@
carp_iamatch6_p = NULL;
carp_macmatch6_p = NULL;
#endif
+ carp_ioctl_p = NULL;
+ carp_attach_p = NULL;
+ carp_detach_p = NULL;
+ carp_get_vhid_p = NULL;
carp_linkstate_p = NULL;
carp_forus_p = NULL;
carp_output_p = NULL;
+ carp_demote_adj_p = NULL;
+ carp_master_p = NULL;
+ mtx_unlock(&carp_mtx);
+ taskqueue_drain(taskqueue_swi, &carp_sendall_task);
mtx_destroy(&carp_mtx);
}
@@ -2368,22 +2114,23 @@
{
int err;
- if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
- carp_ifdetach, NULL, EVENTHANDLER_PRI_ANY);
- if (if_detach_event_tag == NULL)
- return (ENOMEM);
mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF);
- LIST_INIT(&carpif_list);
- if_clone_attach(&carp_cloner);
- carp_linkstate_p = carp_carpdev_state;
+ LIST_INIT(&carp_list);
+ carp_get_vhid_p = carp_get_vhid;
carp_forus_p = carp_forus;
carp_output_p = carp_output;
+ carp_linkstate_p = carp_linkstate;
+ carp_ioctl_p = carp_ioctl;
+ carp_attach_p = carp_attach;
+ carp_detach_p = carp_detach;
+ carp_demote_adj_p = carp_demote_adj;
+ carp_master_p = carp_master;
#ifdef INET6
carp_iamatch6_p = carp_iamatch6;
carp_macmatch6_p = carp_macmatch6;
proto_reg[CARP_INET6] = pf_proto_register(PF_INET6,
(struct protosw *)&in6_carp_protosw);
- if (proto_reg[CARP_INET6] != 0) {
+ if (proto_reg[CARP_INET6]) {
printf("carp: error %d attaching to PF_INET6\n",
proto_reg[CARP_INET6]);
carp_mod_cleanup();
@@ -2399,7 +2146,7 @@
#ifdef INET
carp_iamatch_p = carp_iamatch;
proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw);
- if (proto_reg[CARP_INET] != 0) {
+ if (proto_reg[CARP_INET]) {
printf("carp: error %d attaching to PF_INET\n",
proto_reg[CARP_INET]);
carp_mod_cleanup();
@@ -2412,7 +2159,7 @@
return (err);
}
#endif
- return 0;
+ return (0);
}
static int
@@ -2423,17 +2170,13 @@
return carp_mod_load();
/* NOTREACHED */
case MOD_UNLOAD:
- /*
- * XXX: For now, disallow module unloading by default due to
- * a race condition where a thread may dereference one of the
- * function pointer hooks after the module has been
- * unloaded, during processing of a packet, causing a panic.
- */
-#ifdef CARPMOD_CAN_UNLOAD
- carp_mod_cleanup();
-#else
- return (EBUSY);
-#endif
+ mtx_lock(&carp_mtx);
+ if (LIST_EMPTY(&carp_list))
+ carp_mod_cleanup();
+ else {
+ mtx_unlock(&carp_mtx);
+ return (EBUSY);
+ }
break;
default:
Modified: trunk/sys/netinet/ip_carp.h
===================================================================
--- trunk/sys/netinet/ip_carp.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_carp.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,5 +1,5 @@
/* $MidnightBSD$ */
-/* $FreeBSD: stable/9/sys/netinet/ip_carp.h 211193 2010-08-11 20:18:19Z will $ */
+/* $FreeBSD: stable/10/sys/netinet/ip_carp.h 253087 2013-07-09 10:02:51Z ae $ */
/* $OpenBSD: ip_carp.h,v 1.8 2004/07/29 22:12:15 mcbride Exp $ */
/*
@@ -118,20 +118,18 @@
uint64_t carps_preempt; /* if enabled, preemptions */
};
-#ifdef _KERNEL
-#define CARPSTATS_ADD(name, val) carpstats.name += (val)
-#define CARPSTATS_INC(name) CARPSTATS_ADD(name, 1)
-#endif
-
/*
* Configuration structure for SIOCSVH SIOCGVH
*/
struct carpreq {
+ int carpr_count;
+ int carpr_vhid;
+#define CARP_MAXVHID 255
int carpr_state;
#define CARP_STATES "INIT", "BACKUP", "MASTER"
#define CARP_MAXSTATE 2
- int carpr_vhid;
int carpr_advskew;
+#define CARP_MAXSKEW 240
int carpr_advbase;
unsigned char carpr_key[CARP_KEY_LEN];
};
@@ -138,49 +136,39 @@
#define SIOCSVH _IOWR('i', 245, struct ifreq)
#define SIOCGVH _IOWR('i', 246, struct ifreq)
-/*
- * Names for CARP sysctl objects
- */
-#define CARPCTL_ALLOW 1 /* accept incoming CARP packets */
-#define CARPCTL_PREEMPT 2 /* high-pri backup preemption mode */
-#define CARPCTL_LOG 3 /* log bad packets */
-#define CARPCTL_STATS 4 /* statistics (read-only) */
-#define CARPCTL_ARPBALANCE 5 /* balance arp responses */
-#define CARPCTL_MAXID 6
-
-#define CARPCTL_NAMES { \
- { 0, 0 }, \
- { "allow", CTLTYPE_INT }, \
- { "preempt", CTLTYPE_INT }, \
- { "log", CTLTYPE_INT }, \
- { "stats", CTLTYPE_STRUCT }, \
- { "arpbalance", CTLTYPE_INT }, \
-}
-
#ifdef _KERNEL
-void carp_carpdev_state(struct ifnet *);
-void carp_input (struct mbuf *, int);
-int carp6_input (struct mbuf **, int *, int);
-int carp_output (struct ifnet *, struct mbuf *, struct sockaddr *,
- struct rtentry *);
-int carp_iamatch (struct ifnet *, struct in_ifaddr *, struct in_addr *,
- u_int8_t **);
+int carp_ioctl(struct ifreq *, u_long, struct thread *);
+int carp_attach(struct ifaddr *, int);
+void carp_detach(struct ifaddr *);
+void carp_carpdev_state(struct ifnet *);
+void carp_input (struct mbuf *, int);
+int carp6_input (struct mbuf **, int *, int);
+int carp_output (struct ifnet *, struct mbuf *,
+ const struct sockaddr *);
+int carp_master(struct ifaddr *);
+int carp_iamatch(struct ifaddr *, uint8_t **);
struct ifaddr *carp_iamatch6(struct ifnet *, struct in6_addr *);
caddr_t carp_macmatch6(struct ifnet *, struct mbuf *, const struct in6_addr *);
-struct ifnet *carp_forus (struct ifnet *, u_char *);
+int carp_forus(struct ifnet *, u_char *);
/* These are external networking stack hooks for CARP */
/* net/if.c */
+extern int (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *);
+extern int (*carp_attach_p)(struct ifaddr *, int);
+extern void (*carp_detach_p)(struct ifaddr *);
extern void (*carp_linkstate_p)(struct ifnet *);
+extern void (*carp_demote_adj_p)(int, char *);
+extern int (*carp_master_p)(struct ifaddr *);
/* net/if_bridge.c net/if_ethersubr.c */
-extern struct ifnet *(*carp_forus_p)(struct ifnet *, u_char *);
+extern int (*carp_forus_p)(struct ifnet *, u_char *);
/* net/if_ethersubr.c */
extern int (*carp_output_p)(struct ifnet *, struct mbuf *,
- struct sockaddr *, struct rtentry *);
+ const struct sockaddr *);
+/* net/rtsock.c */
+extern int (*carp_get_vhid_p)(struct ifaddr *);
#ifdef INET
/* netinet/if_ether.c */
-extern int (*carp_iamatch_p)(struct ifnet *, struct in_ifaddr *,
- struct in_addr *, u_int8_t **);
+extern int (*carp_iamatch_p)(struct ifaddr *, uint8_t **);
#endif
#ifdef INET6
/* netinet6/nd6_nbr.c */
Modified: trunk/sys/netinet/ip_divert.c
===================================================================
--- trunk/sys/netinet/ip_divert.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_divert.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -29,16 +29,14 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/ip_divert.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/ip_divert.c 241913 2012-10-22 21:09:03Z glebius $");
-#if !defined(KLD_MODULE)
#include "opt_inet.h"
+#include "opt_inet6.h"
#include "opt_sctp.h"
#ifndef INET
-#error "IPDIVERT requires INET."
+#error "IPDIVERT requires INET"
#endif
-#endif
-#include "opt_inet6.h"
#include <sys/param.h>
#include <sys/kernel.h>
@@ -211,17 +209,13 @@
/* Delayed checksums are currently not compatible with divert. */
if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
- ip->ip_len = ntohs(ip->ip_len);
in_delayed_cksum(m);
m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
- ip->ip_len = htons(ip->ip_len);
}
#ifdef SCTP
if (m->m_pkthdr.csum_flags & CSUM_SCTP) {
- ip->ip_len = ntohs(ip->ip_len);
sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
m->m_pkthdr.csum_flags &= ~CSUM_SCTP;
- ip->ip_len = htons(ip->ip_len);
}
#endif
bzero(&divsrc, sizeof(divsrc));
@@ -393,10 +387,6 @@
INP_RUNLOCK(inp);
goto cantsend;
}
-
- /* Convert fields to host order for ip_output() */
- ip->ip_len = ntohs(ip->ip_len);
- ip->ip_off = ntohs(ip->ip_off);
break;
#ifdef INET6
case IPV6_VERSION >> 4:
@@ -409,8 +399,6 @@
INP_RUNLOCK(inp);
goto cantsend;
}
-
- ip6->ip6_plen = ntohs(ip6->ip6_plen);
break;
}
#endif
Modified: trunk/sys/netinet/ip_divert.h
===================================================================
--- trunk/sys/netinet/ip_divert.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_divert.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -30,7 +30,7 @@
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGES.
*
- * $FreeBSD: stable/9/sys/netinet/ip_divert.h 201527 2010-01-04 19:01:22Z luigi $
+ * $FreeBSD: stable/10/sys/netinet/ip_divert.h 201527 2010-01-04 19:01:22Z luigi $
*/
#ifndef _NETINET_IP_DIVERT_H_
Modified: trunk/sys/netinet/ip_dummynet.h
===================================================================
--- trunk/sys/netinet/ip_dummynet.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_dummynet.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -25,12 +25,12 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/netinet/ip_dummynet.h 206845 2010-04-19 16:17:30Z luigi $
+ * $FreeBSD: stable/10/sys/netinet/ip_dummynet.h 301772 2016-06-10 00:00:25Z truckman $
*/
#ifndef _IP_DUMMYNET_H
#define _IP_DUMMYNET_H
-
+#define NEW_AQM
/*
* Definition of the kernel-userland API for dummynet.
*
@@ -86,7 +86,13 @@
/* special commands for emulation of sysctl variables */
DN_SYSCTL_GET,
DN_SYSCTL_SET,
-
+#ifdef NEW_AQM
+ /* subtypes used for setting/getting extra parameters.
+ * these subtypes used with IP_DUMMYNET3 command (get)
+ * and DN_TEXT (set). */
+ DN_AQM_PARAMS, /* AQM extra params */
+ DN_SCH_PARAMS, /* scheduler extra params */
+#endif
DN_LAST,
};
@@ -105,6 +111,10 @@
DN_HAS_PROFILE = 0x0010, /* a link has a profile */
DN_IS_RED = 0x0020,
DN_IS_GENTLE_RED= 0x0040,
+ DN_IS_ECN = 0x0080,
+ #ifdef NEW_AQM
+ DN_IS_AQM = 0x0100, /* AQMs: e.g Codel & PIE */
+ #endif
DN_PIPE_CMD = 0x1000, /* pipe config... */
};
@@ -172,8 +182,8 @@
struct ipfw_flow_id fid;
uint64_t tot_pkts; /* statistics counters */
uint64_t tot_bytes;
- uint32_t length; /* Queue lenght, in packets */
- uint32_t len_bytes; /* Queue lenght, in bytes */
+ uint32_t length; /* Queue length, in packets */
+ uint32_t len_bytes; /* Queue length, in bytes */
uint32_t drops;
};
@@ -210,8 +220,20 @@
int samples[ED_MAX_SAMPLES_NO]; /* may be shorter */
};
+#ifdef NEW_AQM
+/* Extra parameters for AQM and scheduler.
+ * This struct is used to pass and retrieve parameters (configurations)
+ * to/from AQM and Scheduler.
+ */
+struct dn_extra_parms {
+ struct dn_id oid;
+ char name[16];
+ uint32_t nr;
+#define DN_MAX_EXTRA_PARM 10
+ int64_t par[DN_MAX_EXTRA_PARM];
+};
+#endif
-
/*
* Overall structure of dummynet
Modified: trunk/sys/netinet/ip_ecn.c
===================================================================
--- trunk/sys/netinet/ip_ecn.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_ecn.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -36,7 +36,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/ip_ecn.c 172467 2007-10-07 20:44:24Z silby $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/ip_ecn.c 172467 2007-10-07 20:44:24Z silby $");
#include "opt_inet.h"
#include "opt_inet6.h"
Modified: trunk/sys/netinet/ip_ecn.h
===================================================================
--- trunk/sys/netinet/ip_ecn.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_ecn.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,5 +1,5 @@
/* $MidnightBSD$ */
-/* $FreeBSD: stable/9/sys/netinet/ip_ecn.h 139823 2005-01-07 01:45:51Z imp $ */
+/* $FreeBSD: stable/10/sys/netinet/ip_ecn.h 139823 2005-01-07 01:45:51Z imp $ */
/* $KAME: ip_ecn.h,v 1.8 2002/01/07 11:34:47 kjc Exp $ */
/*-
Modified: trunk/sys/netinet/ip_encap.c
===================================================================
--- trunk/sys/netinet/ip_encap.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_encap.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -58,7 +58,7 @@
/* XXX is M_NETADDR correct? */
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/ip_encap.c 201145 2009-12-28 22:56:30Z antoine $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/ip_encap.c 201145 2009-12-28 22:56:30Z antoine $");
#include "opt_mrouting.h"
#include "opt_inet.h"
Modified: trunk/sys/netinet/ip_encap.h
===================================================================
--- trunk/sys/netinet/ip_encap.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_encap.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,5 +1,5 @@
/* $MidnightBSD$ */
-/* $FreeBSD: stable/9/sys/netinet/ip_encap.h 139823 2005-01-07 01:45:51Z imp $ */
+/* $FreeBSD: stable/10/sys/netinet/ip_encap.h 139823 2005-01-07 01:45:51Z imp $ */
/* $KAME: ip_encap.h,v 1.7 2000/03/25 07:23:37 sumikawa Exp $ */
/*-
Modified: trunk/sys/netinet/ip_fastfwd.c
===================================================================
--- trunk/sys/netinet/ip_fastfwd.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_fastfwd.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -75,10 +75,11 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/ip_fastfwd.c 243586 2012-11-27 01:59:51Z ae $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/ip_fastfwd.c 272868 2014-10-09 23:45:26Z hrs $");
#include "opt_ipfw.h"
#include "opt_ipstealth.h"
+#include "opt_kdtrace.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -86,6 +87,7 @@
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
+#include <sys/sdt.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
@@ -98,6 +100,7 @@
#include <net/vnet.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
@@ -165,7 +168,7 @@
struct sockaddr_in *dst = NULL;
struct ifnet *ifp;
struct in_addr odest, dest;
- u_short sum, ip_len;
+ uint16_t sum, ip_len, ip_off;
int error = 0;
int hlen, mtu;
struct m_tag *fwd_tag = NULL;
@@ -295,9 +298,9 @@
* Only IP packets without options
*/
if (ip->ip_hl != (sizeof(struct ip) >> 2)) {
- if (ip_doopts == 1)
+ if (V_ip_doopts == 1)
return m;
- else if (ip_doopts == 2) {
+ else if (V_ip_doopts == 2) {
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_FILTER_PROHIB,
0, 0);
return NULL; /* mbuf already free'd */
@@ -339,12 +342,6 @@
* Step 3: incoming packet firewall processing
*/
- /*
- * Convert to host representation
- */
- ip->ip_len = ntohs(ip->ip_len);
- ip->ip_off = ntohs(ip->ip_off);
-
odest.s_addr = dest.s_addr = ip->ip_dst.s_addr;
/*
@@ -463,8 +460,6 @@
forwardlocal:
/*
* Return packet for processing by ip_input().
- * Keep host byte order as expected at ip_input's
- * "ours"-label.
*/
m->m_flags |= M_FASTFWD_OURS;
if (ro.ro_rt)
@@ -490,13 +485,14 @@
/*
* Step 6: send off the packet
*/
+ ip_len = ntohs(ip->ip_len);
+ ip_off = ntohs(ip->ip_off);
/*
* Check if route is dampned (when ARP is unable to resolve)
*/
if ((ro.ro_rt->rt_flags & RTF_REJECT) &&
- (ro.ro_rt->rt_rmx.rmx_expire == 0 ||
- time_uptime < ro.ro_rt->rt_rmx.rmx_expire)) {
+ (ro.ro_rt->rt_expire == 0 || time_uptime < ro.ro_rt->rt_expire)) {
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
goto consumed;
}
@@ -505,7 +501,7 @@
/*
* Check if there is enough space in the interface queue
*/
- if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
+ if ((ifp->if_snd.ifq_len + ip_len / ifp->if_mtu + 1) >=
ifp->if_snd.ifq_maxlen) {
IPSTAT_INC(ips_odropped);
/* would send source quench here but that is depreciated */
@@ -524,21 +520,21 @@
/*
* Check if packet fits MTU or if hardware will fragment for us
*/
- if (ro.ro_rt->rt_rmx.rmx_mtu)
- mtu = min(ro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu);
+ if (ro.ro_rt->rt_mtu)
+ mtu = min(ro.ro_rt->rt_mtu, ifp->if_mtu);
else
mtu = ifp->if_mtu;
- if (ip->ip_len <= mtu ||
- (ifp->if_hwassist & CSUM_FRAGMENT && (ip->ip_off & IP_DF) == 0)) {
+ if (ip_len <= mtu ||
+ (ifp->if_hwassist & CSUM_FRAGMENT && (ip_off & IP_DF) == 0)) {
/*
- * Restore packet header fields to original values
+ * Avoid confusing lower layers.
*/
- ip->ip_len = htons(ip->ip_len);
- ip->ip_off = htons(ip->ip_off);
+ m_clrprotoflags(m);
/*
* Send off the packet via outgoing interface
*/
+ IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
error = (*ifp->if_output)(ifp, m,
(struct sockaddr *)dst, &ro);
} else {
@@ -545,7 +541,7 @@
/*
* Handle EMSGSIZE with icmp reply needfrag for TCP MTU discovery
*/
- if (ip->ip_off & IP_DF) {
+ if (ip_off & IP_DF) {
IPSTAT_INC(ips_cantfrag);
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
0, mtu);
@@ -555,14 +551,8 @@
* We have to fragment the packet
*/
m->m_pkthdr.csum_flags |= CSUM_IP;
- /*
- * ip_fragment expects ip_len and ip_off in host byte
- * order but returns all packets in network byte order
- */
- if (ip_fragment(ip, &m, mtu, ifp->if_hwassist,
- (~ifp->if_hwassist & CSUM_DELAY_IP))) {
+ if (ip_fragment(ip, &m, mtu, ifp->if_hwassist))
goto drop;
- }
KASSERT(m != NULL, ("null mbuf and no error"));
/*
* Send off the fragments via outgoing interface
@@ -571,7 +561,12 @@
do {
m0 = m->m_nextpkt;
m->m_nextpkt = NULL;
+ /*
+ * Avoid confusing lower layers.
+ */
+ m_clrprotoflags(m);
+ IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
error = (*ifp->if_output)(ifp, m,
(struct sockaddr *)dst, &ro);
if (error)
@@ -591,7 +586,7 @@
if (error != 0)
IPSTAT_INC(ips_odropped);
else {
- ro.ro_rt->rt_rmx.rmx_pksent++;
+ counter_u64_add(ro.ro_rt->rt_pksent, 1);
IPSTAT_INC(ips_forward);
IPSTAT_INC(ips_fastforward);
}
Modified: trunk/sys/netinet/ip_fw.h
===================================================================
--- trunk/sys/netinet/ip_fw.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_fw.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/netinet/ip_fw.h 234597 2012-04-23 07:15:15Z melifaro $
+ * $FreeBSD: stable/10/sys/netinet/ip_fw.h 287963 2015-09-18 17:29:24Z melifaro $
*/
#ifndef _IPFW2_H
@@ -615,6 +615,7 @@
uint8_t type; /* entry type */
uint8_t masklen; /* mask length */
uint16_t tbl; /* table number */
+ uint16_t flags; /* record flags */
uint32_t value; /* value */
union {
/* Longest field needs to be aligned by 4-byte boundary */
@@ -622,6 +623,7 @@
char iface[IF_NAMESIZE]; /* interface name */
} k;
} ipfw_table_xentry;
+#define IPFW_TCF_INET 0x01 /* CIDR flags: IPv4 record */
typedef struct _ipfw_table {
u_int32_t size; /* size of entries in bytes */
Modified: trunk/sys/netinet/ip_gre.c
===================================================================
--- trunk/sys/netinet/ip_gre.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_gre.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,8 +1,7 @@
/* $MidnightBSD$ */
-/* $NetBSD: ip_gre.c,v 1.29 2003/09/05 23:02:43 itojun Exp $ */
-
/*-
* Copyright (c) 1998 The NetBSD Foundation, Inc.
+ * Copyright (c) 2014 Andrey V. Elsukov <ae at FreeBSD.org>
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -30,19 +29,14 @@
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $NetBSD: ip_gre.c,v 1.29 2003/09/05 23:02:43 itojun Exp $
*/
-/*
- * deencapsulate tunneled packets and send them on
- * output half is in net/if_gre.[ch]
- * This currently handles IPPROTO_GRE, IPPROTO_MOBILE
- */
-
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/ip_gre.c 204522 2010-03-01 17:05:46Z joel $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/ip_gre.c 284072 2015-06-06 13:26:13Z ae $");
#include "opt_inet.h"
-#include "opt_atalk.h"
#include "opt_inet6.h"
#include <sys/param.h>
@@ -54,275 +48,136 @@
#include <sys/errno.h>
#include <sys/time.h>
#include <sys/kernel.h>
-#include <sys/syslog.h>
-#include <net/bpf.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/sysctl.h>
#include <net/ethernet.h>
#include <net/if.h>
-#include <net/netisr.h>
-#include <net/route.h>
+#include <net/if_var.h>
+#include <net/vnet.h>
#include <net/raw_cb.h>
-#ifdef INET
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
+#include <netinet/ip_encap.h>
#include <netinet/ip_var.h>
-#include <netinet/ip_gre.h>
#include <machine/in_cksum.h>
-#else
-#error ip_gre input without IP?
-#endif
-#ifdef NETATALK
-#include <netatalk/at.h>
-#include <netatalk/at_var.h>
-#include <netatalk/at_extern.h>
+#ifdef INET6
+#include <netinet/ip6.h>
#endif
/* Needs IP headers. */
#include <net/if_gre.h>
-
#include <machine/stdarg.h>
-#if 1
-void gre_inet_ntoa(struct in_addr in); /* XXX */
-#endif
+extern struct domain inetdomain;
+static void gre_input10(struct mbuf *, int);
+static const struct protosw in_gre_protosw = {
+ .pr_type = SOCK_RAW,
+ .pr_domain = &inetdomain,
+ .pr_protocol = IPPROTO_GRE,
+ .pr_flags = PR_ATOMIC|PR_ADDR,
+ .pr_input = gre_input10,
+ .pr_output = (pr_output_t *)rip_output,
+ .pr_ctlinput = rip_ctlinput,
+ .pr_ctloutput = rip_ctloutput,
+ .pr_usrreqs = &rip_usrreqs
+};
-static struct gre_softc *gre_lookup(struct mbuf *, u_int8_t);
+#define GRE_TTL 30
+VNET_DEFINE(int, ip_gre_ttl) = GRE_TTL;
+#define V_ip_gre_ttl VNET(ip_gre_ttl)
+SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, grettl, CTLFLAG_RW,
+ &VNET_NAME(ip_gre_ttl), 0, "");
-static struct mbuf *gre_input2(struct mbuf *, int, u_char);
-
-/*
- * De-encapsulate a packet and feed it back through ip input (this
- * routine is called whenever IP gets a packet with proto type
- * IPPROTO_GRE and a local destination address).
- * This really is simple
- */
-void
-gre_input(struct mbuf *m, int off)
+static void
+gre_input10(struct mbuf *m, int off)
{
int proto;
proto = (mtod(m, struct ip *))->ip_p;
-
- m = gre_input2(m, off, proto);
-
- /*
- * If no matching tunnel that is up is found. We inject
- * the mbuf to raw ip socket to see if anyone picks it up.
- */
- if (m != NULL)
- rip_input(m, off);
+ gre_input(&m, &off, proto);
}
-/*
- * Decapsulate. Does the real work and is called from gre_input()
- * (above). Returns an mbuf back if packet is not yet processed,
- * and NULL if it needs no further processing. proto is the protocol
- * number of the "calling" foo_input() routine.
- */
-static struct mbuf *
-gre_input2(struct mbuf *m ,int hlen, u_char proto)
+static int
+in_gre_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
{
- struct greip *gip;
- int isr;
+ GRE_RLOCK_TRACKER;
struct gre_softc *sc;
- u_int16_t flags;
- u_int32_t af;
+ struct ip *ip;
- if ((sc = gre_lookup(m, proto)) == NULL) {
- /* No matching tunnel or tunnel is down. */
- return (m);
- }
+ sc = (struct gre_softc *)arg;
+ if ((GRE2IFP(sc)->if_flags & IFF_UP) == 0)
+ return (0);
- if (m->m_len < sizeof(*gip)) {
- m = m_pullup(m, sizeof(*gip));
- if (m == NULL)
- return (NULL);
- }
- gip = mtod(m, struct greip *);
+ M_ASSERTPKTHDR(m);
+ /*
+ * We expect that payload contains at least IPv4
+ * or IPv6 packet.
+ */
+ if (m->m_pkthdr.len < sizeof(struct greip) + sizeof(struct ip))
+ return (0);
- GRE2IFP(sc)->if_ipackets++;
- GRE2IFP(sc)->if_ibytes += m->m_pkthdr.len;
+ GRE_RLOCK(sc);
+ if (sc->gre_family == 0)
+ goto bad;
- switch (proto) {
- case IPPROTO_GRE:
- hlen += sizeof(struct gre_h);
+ KASSERT(sc->gre_family == AF_INET,
+ ("wrong gre_family: %d", sc->gre_family));
- /* process GRE flags as packet can be of variable len */
- flags = ntohs(gip->gi_flags);
+ ip = mtod(m, struct ip *);
+ if (sc->gre_oip.ip_src.s_addr != ip->ip_dst.s_addr ||
+ sc->gre_oip.ip_dst.s_addr != ip->ip_src.s_addr)
+ goto bad;
- /* Checksum & Offset are present */
- if ((flags & GRE_CP) | (flags & GRE_RP))
- hlen += 4;
- /* We don't support routing fields (variable length) */
- if (flags & GRE_RP)
- return (m);
- if (flags & GRE_KP)
- hlen += 4;
- if (flags & GRE_SP)
- hlen += 4;
-
- switch (ntohs(gip->gi_ptype)) { /* ethertypes */
- case WCCP_PROTOCOL_TYPE:
- if (sc->wccp_ver == WCCP_V2)
- hlen += 4;
- /* FALLTHROUGH */
- case ETHERTYPE_IP: /* shouldn't need a schednetisr(), */
- isr = NETISR_IP;/* as we are in ip_input */
- af = AF_INET;
- break;
-#ifdef INET6
- case ETHERTYPE_IPV6:
- isr = NETISR_IPV6;
- af = AF_INET6;
- break;
-#endif
-#ifdef NETATALK
- case ETHERTYPE_ATALK:
- isr = NETISR_ATALK1;
- af = AF_APPLETALK;
- break;
-#endif
- default:
- /* Others not yet supported. */
- return (m);
- }
- break;
- default:
- /* Others not yet supported. */
- return (m);
- }
-
- if (hlen > m->m_pkthdr.len) {
- m_freem(m);
- return (NULL);
- }
- /* Unlike NetBSD, in FreeBSD m_adj() adjusts m->m_pkthdr.len as well */
- m_adj(m, hlen);
-
- if (bpf_peers_present(GRE2IFP(sc)->if_bpf)) {
- bpf_mtap2(GRE2IFP(sc)->if_bpf, &af, sizeof(af), m);
- }
-
- m->m_pkthdr.rcvif = GRE2IFP(sc);
-
- netisr_queue(isr, m);
-
- /* Packet is done, no further processing needed. */
- return (NULL);
+ GRE_RUNLOCK(sc);
+ return (32 * 2);
+bad:
+ GRE_RUNLOCK(sc);
+ return (0);
}
-/*
- * input routine for IPPRPOTO_MOBILE
- * This is a little bit diffrent from the other modes, as the
- * encapsulating header was not prepended, but instead inserted
- * between IP header and payload
- */
-
-void
-gre_mobile_input(struct mbuf *m, int hlen)
+int
+in_gre_output(struct mbuf *m, int af, int hlen)
{
- struct ip *ip;
- struct mobip_h *mip;
- struct gre_softc *sc;
- int msiz;
+ struct greip *gi;
- if ((sc = gre_lookup(m, IPPROTO_MOBILE)) == NULL) {
- /* No matching tunnel or tunnel is down. */
- m_freem(m);
- return;
+ gi = mtod(m, struct greip *);
+ switch (af) {
+ case AF_INET:
+ /*
+ * gre_transmit() has used M_PREPEND() that doesn't guarantee
+ * m_data is contiguous more than hlen bytes. Use m_copydata()
+ * here to avoid m_pullup().
+ */
+ m_copydata(m, hlen + offsetof(struct ip, ip_tos),
+ sizeof(u_char), &gi->gi_ip.ip_tos);
+ m_copydata(m, hlen + offsetof(struct ip, ip_id),
+ sizeof(u_short), (caddr_t)&gi->gi_ip.ip_id);
+ break;
+#ifdef INET6
+ case AF_INET6:
+ gi->gi_ip.ip_tos = 0; /* XXX */
+ gi->gi_ip.ip_id = ip_newid();
+ break;
+#endif
}
-
- if (m->m_len < sizeof(*mip)) {
- m = m_pullup(m, sizeof(*mip));
- if (m == NULL)
- return;
- }
- ip = mtod(m, struct ip *);
- mip = mtod(m, struct mobip_h *);
-
- GRE2IFP(sc)->if_ipackets++;
- GRE2IFP(sc)->if_ibytes += m->m_pkthdr.len;
-
- if (ntohs(mip->mh.proto) & MOB_H_SBIT) {
- msiz = MOB_H_SIZ_L;
- mip->mi.ip_src.s_addr = mip->mh.osrc;
- } else
- msiz = MOB_H_SIZ_S;
-
- if (m->m_len < (ip->ip_hl << 2) + msiz) {
- m = m_pullup(m, (ip->ip_hl << 2) + msiz);
- if (m == NULL)
- return;
- ip = mtod(m, struct ip *);
- mip = mtod(m, struct mobip_h *);
- }
-
- mip->mi.ip_dst.s_addr = mip->mh.odst;
- mip->mi.ip_p = (ntohs(mip->mh.proto) >> 8);
-
- if (gre_in_cksum((u_int16_t *)&mip->mh, msiz) != 0) {
- m_freem(m);
- return;
- }
-
- bcopy((caddr_t)(ip) + (ip->ip_hl << 2) + msiz, (caddr_t)(ip) +
- (ip->ip_hl << 2), m->m_len - msiz - (ip->ip_hl << 2));
- m->m_len -= msiz;
- m->m_pkthdr.len -= msiz;
-
- /*
- * On FreeBSD, rip_input() supplies us with ip->ip_len
- * already converted into host byteorder and also decreases
- * it by the lengh of IP header, however, ip_input() expects
- * that this field is in the original format (network byteorder
- * and full size of IP packet), so that adjust accordingly.
- */
- ip->ip_len = htons(ip->ip_len + sizeof(struct ip) - msiz);
-
- ip->ip_sum = 0;
- ip->ip_sum = in_cksum(m, (ip->ip_hl << 2));
-
- if (bpf_peers_present(GRE2IFP(sc)->if_bpf)) {
- u_int32_t af = AF_INET;
- bpf_mtap2(GRE2IFP(sc)->if_bpf, &af, sizeof(af), m);
- }
-
- m->m_pkthdr.rcvif = GRE2IFP(sc);
-
- netisr_queue(NETISR_IP, m);
+ gi->gi_ip.ip_ttl = V_ip_gre_ttl;
+ gi->gi_ip.ip_len = htons(m->m_pkthdr.len);
+ return (ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL));
}
-/*
- * Find the gre interface associated with our src/dst/proto set.
- *
- * XXXRW: Need some sort of drain/refcount mechanism so that the softc
- * reference remains valid after it's returned from gre_lookup(). Right
- * now, I'm thinking it should be reference-counted with a gre_dropref()
- * when the caller is done with the softc. This is complicated by how
- * to handle destroying the gre softc; probably using a gre_drain() in
- * in_gre.c during destroy.
- */
-static struct gre_softc *
-gre_lookup(struct mbuf *m, u_int8_t proto)
+int
+in_gre_attach(struct gre_softc *sc)
{
- struct ip *ip = mtod(m, struct ip *);
- struct gre_softc *sc;
- mtx_lock(&gre_mtx);
- for (sc = LIST_FIRST(&gre_softc_list); sc != NULL;
- sc = LIST_NEXT(sc, sc_list)) {
- if ((sc->g_dst.s_addr == ip->ip_src.s_addr) &&
- (sc->g_src.s_addr == ip->ip_dst.s_addr) &&
- (sc->g_proto == proto) &&
- ((GRE2IFP(sc)->if_flags & IFF_UP) != 0)) {
- mtx_unlock(&gre_mtx);
- return (sc);
- }
- }
- mtx_unlock(&gre_mtx);
-
- return (NULL);
+ KASSERT(sc->gre_ecookie == NULL, ("gre_ecookie isn't NULL"));
+ sc->gre_ecookie = encap_attach_func(AF_INET, IPPROTO_GRE,
+ in_gre_encapcheck, &in_gre_protosw, sc);
+ if (sc->gre_ecookie == NULL)
+ return (EEXIST);
+ return (0);
}
Modified: trunk/sys/netinet/ip_icmp.c
===================================================================
--- trunk/sys/netinet/ip_icmp.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_icmp.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,10 +31,9 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/ip_icmp.c 242640 2012-11-06 00:49:52Z melifaro $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/ip_icmp.c 283901 2015-06-02 03:14:42Z ae $");
#include "opt_inet.h"
-#include "opt_ipsec.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -65,10 +64,6 @@
#include <netinet/icmp_var.h>
#ifdef INET
-#ifdef IPSEC
-#include <netipsec/ipsec.h>
-#include <netipsec/key.h>
-#endif
#include <machine/in_cksum.h>
@@ -90,13 +85,18 @@
#define V_icmplim_output VNET(icmplim_output)
SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, icmplim_output, CTLFLAG_RW,
&VNET_NAME(icmplim_output), 0,
- "Enable rate limiting of ICMP responses");
+ "Enable logging of ICMP response rate limiting");
#ifdef INET
-VNET_DEFINE(struct icmpstat, icmpstat);
-SYSCTL_VNET_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RW,
- &VNET_NAME(icmpstat), icmpstat, "");
+VNET_PCPUSTAT_DEFINE(struct icmpstat, icmpstat);
+VNET_PCPUSTAT_SYSINIT(icmpstat);
+SYSCTL_VNET_PCPUSTAT(_net_inet_icmp, ICMPCTL_STATS, stats, struct icmpstat,
+ icmpstat, "ICMP statistics (struct icmpstat, netinet/icmp_var.h)");
+#ifdef VIMAGE
+VNET_PCPUSTAT_SYSUNINIT(icmpstat);
+#endif /* VIMAGE */
+
static VNET_DEFINE(int, icmpmaskrepl) = 0;
#define V_icmpmaskrepl VNET(icmpmaskrepl)
SYSCTL_VNET_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_RW,
@@ -144,6 +144,10 @@
&VNET_NAME(icmpbmcastecho), 0,
"");
+static VNET_DEFINE(int, icmptstamprepl) = 1;
+#define V_icmptstamprepl VNET(icmptstamprepl)
+SYSCTL_INT(_net_inet_icmp, OID_AUTO, tstamprepl, CTLFLAG_RW,
+ &VNET_NAME(icmptstamprepl), 0, "Respond to ICMP Timestamp packets");
#ifdef ICMPPRINTFS
int icmpprintfs = 0;
@@ -198,7 +202,7 @@
kmod_icmpstat_inc(int statnum)
{
- (*((u_long *)&V_icmpstat + statnum))++;
+ counter_u64_add(VNET(icmpstat)[statnum], 1);
}
/*
@@ -230,7 +234,7 @@
*/
if (n->m_flags & M_DECRYPTED)
goto freeit;
- if (oip->ip_off & ~(IP_MF|IP_DF))
+ if (oip->ip_off & htons(~(IP_MF|IP_DF)))
goto freeit;
if (n->m_flags & (M_BCAST|M_MCAST))
goto freeit;
@@ -264,7 +268,7 @@
tcphlen = th->th_off << 2;
if (tcphlen < sizeof(struct tcphdr))
goto freeit;
- if (oip->ip_len < oiphlen + tcphlen)
+ if (ntohs(oip->ip_len) < oiphlen + tcphlen)
goto freeit;
if (oiphlen + tcphlen > n->m_len && n->m_next == NULL)
goto stdreply;
@@ -271,9 +275,10 @@
if (n->m_len < oiphlen + tcphlen &&
((n = m_pullup(n, oiphlen + tcphlen)) == NULL))
goto freeit;
- icmpelen = max(tcphlen, min(V_icmp_quotelen, oip->ip_len - oiphlen));
+ icmpelen = max(tcphlen, min(V_icmp_quotelen,
+ ntohs(oip->ip_len) - oiphlen));
} else
-stdreply: icmpelen = max(8, min(V_icmp_quotelen, oip->ip_len - oiphlen));
+stdreply: icmpelen = max(8, min(V_icmp_quotelen, ntohs(oip->ip_len) - oiphlen));
icmplen = min(oiphlen + icmpelen, nlen);
if (icmplen < sizeof(struct ip))
@@ -280,9 +285,9 @@
goto freeit;
if (MHLEN > sizeof(struct ip) + ICMP_MINLEN + icmplen)
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
else
- m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
if (m == NULL)
goto freeit;
#ifdef MAC
@@ -323,8 +328,6 @@
*/
m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
nip = &icp->icmp_ip;
- nip->ip_len = htons(nip->ip_len);
- nip->ip_off = htons(nip->ip_off);
/*
* Set up ICMP message mbuf and copy old IP header (without options
@@ -339,11 +342,12 @@
m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
nip = mtod(m, struct ip *);
bcopy((caddr_t)oip, (caddr_t)nip, sizeof(struct ip));
- nip->ip_len = m->m_len;
+ nip->ip_len = htons(m->m_len);
nip->ip_v = IPVERSION;
nip->ip_hl = 5;
nip->ip_p = IPPROTO_ICMP;
nip->ip_tos = 0;
+ nip->ip_off = 0;
icmp_reflect(m);
freeit:
@@ -361,7 +365,7 @@
struct ip *ip = mtod(m, struct ip *);
struct sockaddr_in icmpsrc, icmpdst, icmpgw;
int hlen = off;
- int icmplen = ip->ip_len;
+ int icmplen = ntohs(ip->ip_len) - off;
int i, code;
void (*ctlfunc)(int, struct sockaddr *, void *);
int fibnum;
@@ -502,7 +506,6 @@
ICMPSTAT_INC(icps_badlen);
goto freeit;
}
- icp->icmp_ip.ip_len = ntohs(icp->icmp_ip.ip_len);
/* Discard ICMP's in response to multicast packets */
if (IN_MULTICAST(ntohl(icp->icmp_ip.ip_dst.s_addr)))
goto badcode;
@@ -538,6 +541,8 @@
goto reflect;
case ICMP_TSTAMP:
+ if (V_icmptstamprepl == 0)
+ break;
if (!V_icmpbmcastecho
&& (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
ICMPSTAT_INC(icps_bmcasttstamp);
@@ -595,7 +600,6 @@
}
ifa_free(&ia->ia_ifa);
reflect:
- ip->ip_len += hlen; /* since ip_input deducts this */
ICMPSTAT_INC(icps_reflect);
ICMPSTAT_INC(icps_outhist[icp->icmp_type]);
icmp_reflect(m);
@@ -656,9 +660,6 @@
(struct sockaddr *)&icmpgw, fibnum);
}
pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&icmpsrc);
-#ifdef IPSEC
- key_sa_routechange((struct sockaddr *)&icmpsrc);
-#endif
break;
/*
@@ -705,8 +706,6 @@
goto done; /* Ip_output() will check for broadcast */
}
- m_addr_changed(m);
-
t = ip->ip_dst;
ip->ip_dst = ip->ip_src;
@@ -815,7 +814,7 @@
*/
cp = (u_char *) (ip + 1);
if ((opts = ip_srcroute(m)) == 0 &&
- (opts = m_gethdr(M_DONTWAIT, MT_DATA))) {
+ (opts = m_gethdr(M_NOWAIT, MT_DATA))) {
opts->m_len = sizeof(struct in_addr);
mtod(opts, struct in_addr *)->s_addr = 0;
}
@@ -863,19 +862,7 @@
printf("%d\n", opts->m_len);
#endif
}
- /*
- * Now strip out original options by copying rest of first
- * mbuf's data back, and adjust the IP length.
- */
- ip->ip_len -= optlen;
- ip->ip_v = IPVERSION;
- ip->ip_hl = 5;
- m->m_len -= optlen;
- if (m->m_flags & M_PKTHDR)
- m->m_pkthdr.len -= optlen;
- optlen += sizeof(struct ip);
- bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
- (unsigned)(m->m_len - sizeof(struct ip)));
+ ip_stripoptions(m);
}
m_tag_delete_nonpersistent(m);
m->m_flags &= ~(M_BCAST|M_MCAST);
@@ -901,7 +888,7 @@
m->m_len -= hlen;
icp = mtod(m, struct icmp *);
icp->icmp_cksum = 0;
- icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
+ icp->icmp_cksum = in_cksum(m, ntohs(ip->ip_len) - hlen);
m->m_data -= hlen;
m->m_len += hlen;
m->m_pkthdr.rcvif = (struct ifnet *)0;
Modified: trunk/sys/netinet/ip_icmp.h
===================================================================
--- trunk/sys/netinet/ip_icmp.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_icmp.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* @(#)ip_icmp.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet/ip_icmp.h 207369 2010-04-29 11:52:42Z bz $
+ * $FreeBSD: stable/10/sys/netinet/ip_icmp.h 207369 2010-04-29 11:52:42Z bz $
*/
#ifndef _NETINET_IP_ICMP_H_
Modified: trunk/sys/netinet/ip_id.c
===================================================================
--- trunk/sys/netinet/ip_id.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_id.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -27,7 +27,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/ip_id.c 185571 2008-12-02 21:37:28Z bz $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/ip_id.c 250300 2013-05-06 16:42:18Z andre $");
/*
* IP ID generation is a fascinating topic.
Modified: trunk/sys/netinet/ip_input.c
===================================================================
--- trunk/sys/netinet/ip_input.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_input.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,12 +31,13 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/ip_input.c 243586 2012-11-27 01:59:51Z ae $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/ip_input.c 300518 2016-05-23 16:20:50Z loos $");
#include "opt_bootp.h"
#include "opt_ipfw.h"
#include "opt_ipstealth.h"
#include "opt_ipsec.h"
+#include "opt_kdtrace.h"
#include "opt_route.h"
#include <sys/param.h>
@@ -50,6 +51,7 @@
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/rwlock.h>
+#include <sys/sdt.h>
#include <sys/syslog.h>
#include <sys/sysctl.h>
@@ -61,9 +63,9 @@
#include <net/route.h>
#include <net/netisr.h>
#include <net/vnet.h>
-#include <net/flowtable.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
@@ -154,11 +156,6 @@
VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table */
VNET_DEFINE(u_long, in_ifaddrhmask); /* mask for hash table */
-VNET_DEFINE(struct ipstat, ipstat);
-SYSCTL_VNET_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW,
- &VNET_NAME(ipstat), ipstat,
- "IP statistics (struct ipstat, netinet/ip_var.h)");
-
static VNET_DEFINE(uma_zone_t, ipq_zone);
static VNET_DEFINE(TAILQ_HEAD(ipqhead, ipq), ipq[IPREASS_NHASH]);
static struct mtx ipqlock;
@@ -201,30 +198,29 @@
"IP stealth mode, no TTL decrementation on forwarding");
#endif
-#ifdef FLOWTABLE
-static VNET_DEFINE(int, ip_output_flowtable_size) = 2048;
-VNET_DEFINE(struct flowtable *, ip_ft);
-#define V_ip_output_flowtable_size VNET(ip_output_flowtable_size)
+static void ip_freef(struct ipqhead *, struct ipq *);
-SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, output_flowtable_size, CTLFLAG_RDTUN,
- &VNET_NAME(ip_output_flowtable_size), 2048,
- "number of entries in the per-cpu output flow caches");
-#endif
+/*
+ * IP statistics are stored in the "array" of counter(9)s.
+ */
+VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat);
+VNET_PCPUSTAT_SYSINIT(ipstat);
+SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat,
+ "IP statistics (struct ipstat, netinet/ip_var.h)");
-static void ip_freef(struct ipqhead *, struct ipq *);
+#ifdef VIMAGE
+VNET_PCPUSTAT_SYSUNINIT(ipstat);
+#endif /* VIMAGE */
/*
* Kernel module interface for updating ipstat. The argument is an index
- * into ipstat treated as an array of u_long. While this encodes the general
- * layout of ipstat into the caller, it doesn't encode its location, so that
- * future changes to add, for example, per-CPU stats support won't cause
- * binary compatibility problems for kernel modules.
+ * into ipstat treated as an array.
*/
void
kmod_ipstat_inc(int statnum)
{
- (*((u_long *)&V_ipstat + statnum))++;
+ counter_u64_add(VNET(ipstat)[statnum], 1);
}
void
@@ -231,7 +227,7 @@
kmod_ipstat_dec(int statnum)
{
- (*((u_long *)&V_ipstat + statnum))--;
+ counter_u64_add(VNET(ipstat)[statnum], -1);
}
static int
@@ -303,24 +299,6 @@
printf("%s: WARNING: unable to register pfil hook, "
"error %d\n", __func__, i);
-#ifdef FLOWTABLE
- if (TUNABLE_INT_FETCH("net.inet.ip.output_flowtable_size",
- &V_ip_output_flowtable_size)) {
- if (V_ip_output_flowtable_size < 256)
- V_ip_output_flowtable_size = 256;
- if (!powerof2(V_ip_output_flowtable_size)) {
- printf("flowtable must be power of 2 size\n");
- V_ip_output_flowtable_size = 2048;
- }
- } else {
- /*
- * round up to the next power of 2
- */
- V_ip_output_flowtable_size = 1 << fls((1024 + maxusers * 64)-1);
- }
- V_ip_ft = flowtable_alloc("ipv4", V_ip_output_flowtable_size, FL_PCPU);
-#endif
-
/* Skip initialization of globals for non-default instances. */
if (!IS_DEFAULT_VNET(curvnet))
return;
@@ -357,7 +335,12 @@
void
ip_destroy(void)
{
+ int i;
+ if ((i = pfil_head_unregister(&V_inet_pfil_hook)) != 0)
+ printf("%s: WARNING: unable to unregister pfil hook, "
+ "error %d\n", __func__, i);
+
/* Cleanup in_ifaddr hash table; should be empty. */
hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask);
@@ -381,7 +364,7 @@
struct ifaddr *ifa;
struct ifnet *ifp;
int checkif, hlen = 0;
- u_short sum;
+ uint16_t sum, ip_len;
int dchg = 0; /* dest changed after fw */
struct in_addr odst; /* original dst address */
@@ -388,14 +371,11 @@
M_ASSERTPKTHDR(m);
if (m->m_flags & M_FASTFWD_OURS) {
- /*
- * Firewall or NAT changed destination to local.
- * We expect ip_len and ip_off to be in host byte order.
- */
m->m_flags &= ~M_FASTFWD_OURS;
/* Set up some basics that will be used later. */
ip = mtod(m, struct ip *);
hlen = ip->ip_hl << 2;
+ ip_len = ntohs(ip->ip_len);
goto ours;
}
@@ -429,6 +409,8 @@
ip = mtod(m, struct ip *);
}
+ IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL);
+
/* 127/8 must not appear on wire - RFC1122 */
ifp = m->m_pkthdr.rcvif;
if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
@@ -459,15 +441,11 @@
return;
#endif
- /*
- * Convert fields to host representation.
- */
- ip->ip_len = ntohs(ip->ip_len);
- if (ip->ip_len < hlen) {
+ ip_len = ntohs(ip->ip_len);
+ if (ip_len < hlen) {
IPSTAT_INC(ips_badlen);
goto bad;
}
- ip->ip_off = ntohs(ip->ip_off);
/*
* Check that the amount of data in the buffers
@@ -475,17 +453,17 @@
* Trim mbufs if longer than we expect.
* Drop packet if shorter than we expect.
*/
- if (m->m_pkthdr.len < ip->ip_len) {
+ if (m->m_pkthdr.len < ip_len) {
tooshort:
IPSTAT_INC(ips_tooshort);
goto bad;
}
- if (m->m_pkthdr.len > ip->ip_len) {
+ if (m->m_pkthdr.len > ip_len) {
if (m->m_len == m->m_pkthdr.len) {
- m->m_len = ip->ip_len;
- m->m_pkthdr.len = ip->ip_len;
+ m->m_len = ip_len;
+ m->m_pkthdr.len = ip_len;
} else
- m_adj(m, ip->ip_len - m->m_pkthdr.len);
+ m_adj(m, ip_len - m->m_pkthdr.len);
}
#ifdef IPSEC
/*
@@ -522,8 +500,7 @@
goto ours;
}
if (m->m_flags & M_IP_NEXTHOP) {
- dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL);
- if (dchg != 0) {
+ if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) {
/*
* Directly ship the packet on. This allows
* forwarding packets originally destined to us
@@ -534,6 +511,7 @@
}
}
passin:
+
/*
* Process options and, if not destined for us,
* ship it on. ip_dooptions returns 1 when an
@@ -728,7 +706,8 @@
* Attempt reassembly; if it succeeds, proceed.
* ip_reass() will return a different mbuf.
*/
- if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
+ if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
+ /* XXXGL: shouldn't we save & set m_flags? */
m = ip_reass(m);
if (m == NULL)
return;
@@ -737,12 +716,6 @@
hlen = ip->ip_hl << 2;
}
- /*
- * Further protocols expect the packet length to be w/o the
- * IP header.
- */
- ip->ip_len -= hlen;
-
#ifdef IPSEC
/*
* enforce IPsec policy checking if we are seeing last header.
@@ -827,6 +800,8 @@
NULL, 0, sysctl_maxnipq, "I",
"Maximum number of IPv4 fragment reassembly queue entries");
+#define M_IP_FRAG M_PROTO9
+
/*
* Take incoming datagram fragment and try to reassemble it into
* whole datagram. If the argument is the first fragment or one
@@ -910,28 +885,27 @@
* Adjust ip_len to not reflect header,
* convert offset of this to bytes.
*/
- ip->ip_len -= hlen;
- if (ip->ip_off & IP_MF) {
+ ip->ip_len = htons(ntohs(ip->ip_len) - hlen);
+ if (ip->ip_off & htons(IP_MF)) {
/*
* Make sure that fragments have a data length
* that's a non-zero multiple of 8 bytes.
*/
- if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) {
+ if (ip->ip_len == htons(0) || (ntohs(ip->ip_len) & 0x7) != 0) {
IPSTAT_INC(ips_toosmall); /* XXX */
goto dropfrag;
}
- m->m_flags |= M_FRAG;
+ m->m_flags |= M_IP_FRAG;
} else
- m->m_flags &= ~M_FRAG;
- ip->ip_off <<= 3;
+ m->m_flags &= ~M_IP_FRAG;
+ ip->ip_off = htons(ntohs(ip->ip_off) << 3);
-
/*
* Attempt reassembly; if it succeeds, proceed.
* ip_reass() will return a different mbuf.
*/
IPSTAT_INC(ips_fragments);
- m->m_pkthdr.header = ip;
+ m->m_pkthdr.PH_loc.ptr = ip;
/* Previous ip_reass() started here. */
/*
@@ -974,7 +948,7 @@
#endif
}
-#define GETIP(m) ((struct ip*)((m)->m_pkthdr.header))
+#define GETIP(m) ((struct ip*)((m)->m_pkthdr.PH_loc.ptr))
/*
* Handle ECN by comparing this segment with the first one;
@@ -996,7 +970,7 @@
* Find a segment which begins after this one does.
*/
for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
- if (GETIP(q)->ip_off > ip->ip_off)
+ if (ntohs(GETIP(q)->ip_off) > ntohs(ip->ip_off))
break;
/*
@@ -1009,14 +983,15 @@
* segment, then it's checksum is invalidated.
*/
if (p) {
- i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off;
+ i = ntohs(GETIP(p)->ip_off) + ntohs(GETIP(p)->ip_len) -
+ ntohs(ip->ip_off);
if (i > 0) {
- if (i >= ip->ip_len)
+ if (i >= ntohs(ip->ip_len))
goto dropfrag;
m_adj(m, i);
m->m_pkthdr.csum_flags = 0;
- ip->ip_off += i;
- ip->ip_len -= i;
+ ip->ip_off = htons(ntohs(ip->ip_off) + i);
+ ip->ip_len = htons(ntohs(ip->ip_len) - i);
}
m->m_nextpkt = p->m_nextpkt;
p->m_nextpkt = m;
@@ -1029,12 +1004,13 @@
* While we overlap succeeding segments trim them or,
* if they are completely covered, dequeue them.
*/
- for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off;
- q = nq) {
- i = (ip->ip_off + ip->ip_len) - GETIP(q)->ip_off;
- if (i < GETIP(q)->ip_len) {
- GETIP(q)->ip_len -= i;
- GETIP(q)->ip_off += i;
+ for (; q != NULL && ntohs(ip->ip_off) + ntohs(ip->ip_len) >
+ ntohs(GETIP(q)->ip_off); q = nq) {
+ i = (ntohs(ip->ip_off) + ntohs(ip->ip_len)) -
+ ntohs(GETIP(q)->ip_off);
+ if (i < ntohs(GETIP(q)->ip_len)) {
+ GETIP(q)->ip_len = htons(ntohs(GETIP(q)->ip_len) - i);
+ GETIP(q)->ip_off = htons(ntohs(GETIP(q)->ip_off) + i);
m_adj(q, i);
q->m_pkthdr.csum_flags = 0;
break;
@@ -1058,7 +1034,7 @@
*/
next = 0;
for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
- if (GETIP(q)->ip_off != next) {
+ if (ntohs(GETIP(q)->ip_off) != next) {
if (fp->ipq_nfrags > V_maxfragsperpacket) {
IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags);
ip_freef(head, fp);
@@ -1065,10 +1041,10 @@
}
goto done;
}
- next += GETIP(q)->ip_len;
+ next += ntohs(GETIP(q)->ip_len);
}
/* Make sure the last packet didn't have the IP_MF flag */
- if (p->m_flags & M_FRAG) {
+ if (p->m_flags & M_IP_FRAG) {
if (fp->ipq_nfrags > V_maxfragsperpacket) {
IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags);
ip_freef(head, fp);
@@ -1109,8 +1085,9 @@
* (and not in for{} loop), though it implies we are not going to
* reassemble more than 64k fragments.
*/
- m->m_pkthdr.csum_data =
- (m->m_pkthdr.csum_data & 0xffff) + (m->m_pkthdr.csum_data >> 16);
+ while (m->m_pkthdr.csum_data & 0xffff0000)
+ m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
+ (m->m_pkthdr.csum_data >> 16);
#ifdef MAC
mac_ipq_reassemble(fp, m);
mac_ipq_destroy(fp);
@@ -1121,7 +1098,7 @@
* packet; dequeue and discard fragment reassembly header.
* Make header visible.
*/
- ip->ip_len = (ip->ip_hl << 2) + next;
+ ip->ip_len = htons((ip->ip_hl << 2) + next);
ip->ip_src = fp->ipq_src;
ip->ip_dst = fp->ipq_dst;
TAILQ_REMOVE(head, fp, ipq_list);
@@ -1368,6 +1345,7 @@
struct ip *ip = mtod(m, struct ip *);
struct in_ifaddr *ia;
struct mbuf *mcopy;
+ struct sockaddr_in *sin;
struct in_addr dest;
struct route ro;
int error, type = 0, code = 0, mtu = 0;
@@ -1389,7 +1367,23 @@
}
#endif
- ia = ip_rtaddr(ip->ip_dst, M_GETFIB(m));
+ bzero(&ro, sizeof(ro));
+ sin = (struct sockaddr_in *)&ro.ro_dst;
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
+ sin->sin_addr = ip->ip_dst;
+#ifdef RADIX_MPATH
+ rtalloc_mpath_fib(&ro,
+ ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
+ M_GETFIB(m));
+#else
+ in_rtalloc_ign(&ro, 0, M_GETFIB(m));
+#endif
+ if (ro.ro_rt != NULL) {
+ ia = ifatoia(ro.ro_rt->rt_ifa);
+ ifa_ref(&ia->ia_ifa);
+ } else
+ ia = NULL;
#ifndef IPSEC
/*
* 'ia' may be NULL if there is no route for this destination.
@@ -1398,6 +1392,7 @@
*/
if (!srcrt && ia == NULL) {
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
+ RO_RTFREE(&ro);
return;
}
#endif
@@ -1418,8 +1413,8 @@
* assume exclusive access to the IP header in `m', so any
* data in a cluster may change before we reach icmp_error().
*/
- MGETHDR(mcopy, M_DONTWAIT, m->m_type);
- if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_DONTWAIT)) {
+ mcopy = m_gethdr(M_NOWAIT, m->m_type);
+ if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) {
/*
* It's probably ok if the pkthdr dup fails (because
* the deep copy of the tag chain failed), but for now
@@ -1430,7 +1425,7 @@
mcopy = NULL;
}
if (mcopy != NULL) {
- mcopy->m_len = min(ip->ip_len, M_TRAILINGSPACE(mcopy));
+ mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy));
mcopy->m_pkthdr.len = mcopy->m_len;
m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
}
@@ -1454,16 +1449,8 @@
dest.s_addr = 0;
if (!srcrt && V_ipsendredirects &&
ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) {
- struct sockaddr_in *sin;
struct rtentry *rt;
- bzero(&ro, sizeof(ro));
- sin = (struct sockaddr_in *)&ro.ro_dst;
- sin->sin_family = AF_INET;
- sin->sin_len = sizeof(*sin);
- sin->sin_addr = ip->ip_dst;
- in_rtalloc_ign(&ro, 0, M_GETFIB(m));
-
rt = ro.ro_rt;
if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
@@ -1482,20 +1469,12 @@
code = ICMP_REDIRECT_HOST;
}
}
- if (rt)
- RTFREE(rt);
}
- /*
- * Try to cache the route MTU from ip_output so we can consider it for
- * the ICMP_UNREACH_NEEDFRAG "Next-Hop MTU" field described in RFC1191.
- */
- bzero(&ro, sizeof(ro));
-
error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL);
if (error == EMSGSIZE && ro.ro_rt)
- mtu = ro.ro_rt->rt_rmx.rmx_mtu;
+ mtu = ro.ro_rt->rt_mtu;
RO_RTFREE(&ro);
if (error)
@@ -1558,7 +1537,7 @@
if (ia != NULL)
mtu = ia->ia_ifp->if_mtu;
else
- mtu = ip_next_mtu(ip->ip_len, 0);
+ mtu = ip_next_mtu(ntohs(ip->ip_len), 0);
}
IPSTAT_INC(ips_cantfrag);
break;
@@ -1604,8 +1583,8 @@
bintime(&bt);
if (inp->inp_socket->so_options & SO_BINTIME) {
- *mp = sbcreatecontrol((caddr_t) &bt, sizeof(bt),
- SCM_BINTIME, SOL_SOCKET);
+ *mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt),
+ SCM_BINTIME, SOL_SOCKET);
if (*mp)
mp = &(*mp)->m_next;
}
@@ -1613,20 +1592,20 @@
struct timeval tv;
bintime2timeval(&bt, &tv);
- *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
- SCM_TIMESTAMP, SOL_SOCKET);
+ *mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv),
+ SCM_TIMESTAMP, SOL_SOCKET);
if (*mp)
mp = &(*mp)->m_next;
}
}
if (inp->inp_flags & INP_RECVDSTADDR) {
- *mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
+ *mp = sbcreatecontrol((caddr_t)&ip->ip_dst,
sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
if (*mp)
mp = &(*mp)->m_next;
}
if (inp->inp_flags & INP_RECVTTL) {
- *mp = sbcreatecontrol((caddr_t) &ip->ip_ttl,
+ *mp = sbcreatecontrol((caddr_t)&ip->ip_ttl,
sizeof(u_char), IP_RECVTTL, IPPROTO_IP);
if (*mp)
mp = &(*mp)->m_next;
@@ -1638,7 +1617,7 @@
*/
/* options were tossed already */
if (inp->inp_flags & INP_RECVOPTS) {
- *mp = sbcreatecontrol((caddr_t) opts_deleted_above,
+ *mp = sbcreatecontrol((caddr_t)opts_deleted_above,
sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
if (*mp)
mp = &(*mp)->m_next;
@@ -1645,7 +1624,7 @@
}
/* ip_srcroute doesn't do what we want here, need to fix */
if (inp->inp_flags & INP_RECVRETOPTS) {
- *mp = sbcreatecontrol((caddr_t) ip_srcroute(m),
+ *mp = sbcreatecontrol((caddr_t)ip_srcroute(m),
sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
if (*mp)
mp = &(*mp)->m_next;
@@ -1660,32 +1639,32 @@
struct sockaddr_dl *sdp;
struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
- if (((ifp = m->m_pkthdr.rcvif))
- && ( ifp->if_index && (ifp->if_index <= V_if_index))) {
+ if ((ifp = m->m_pkthdr.rcvif) &&
+ ifp->if_index && ifp->if_index <= V_if_index) {
sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr;
/*
* Change our mind and don't try copy.
*/
- if ((sdp->sdl_family != AF_LINK)
- || (sdp->sdl_len > sizeof(sdlbuf))) {
+ if (sdp->sdl_family != AF_LINK ||
+ sdp->sdl_len > sizeof(sdlbuf)) {
goto makedummy;
}
bcopy(sdp, sdl2, sdp->sdl_len);
} else {
makedummy:
- sdl2->sdl_len
- = offsetof(struct sockaddr_dl, sdl_data[0]);
+ sdl2->sdl_len =
+ offsetof(struct sockaddr_dl, sdl_data[0]);
sdl2->sdl_family = AF_LINK;
sdl2->sdl_index = 0;
sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
}
- *mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len,
- IP_RECVIF, IPPROTO_IP);
+ *mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len,
+ IP_RECVIF, IPPROTO_IP);
if (*mp)
mp = &(*mp)->m_next;
}
if (inp->inp_flags & INP_RECVTOS) {
- *mp = sbcreatecontrol((caddr_t) &ip->ip_tos,
+ *mp = sbcreatecontrol((caddr_t)&ip->ip_tos,
sizeof(u_char), IP_RECVTOS, IPPROTO_IP);
if (*mp)
mp = &(*mp)->m_next;
Modified: trunk/sys/netinet/ip_ipsec.c
===================================================================
--- trunk/sys/netinet/ip_ipsec.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_ipsec.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -29,7 +29,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/ip_ipsec.c 222845 2011-06-08 03:02:11Z bz $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/ip_ipsec.c 291355 2015-11-26 02:24:45Z gnn $");
#include "opt_ipsec.h"
#include "opt_sctp.h"
@@ -46,7 +46,6 @@
#include <sys/sysctl.h>
#include <net/if.h>
-#include <net/route.h>
#include <net/vnet.h>
#include <netinet/in.h>
@@ -93,7 +92,7 @@
int
ip_ipsec_filtertunnel(struct mbuf *m)
{
-#if defined(IPSEC)
+#ifdef IPSEC
/*
* Bypass packet filtering for packets previously handled by IPsec.
@@ -118,10 +117,9 @@
struct m_tag *mtag;
struct tdb_ident *tdbi;
struct secpolicy *sp;
- int s, error;
+ int error;
mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
- s = splnet();
if (mtag != NULL) {
tdbi = (struct tdb_ident *)(mtag + 1);
sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
@@ -130,7 +128,6 @@
IP_FORWARDING, &error);
}
if (sp == NULL) { /* NB: can happen if error */
- splx(s);
/*XXX error stat???*/
DPRINTF(("ip_input: no SP for forwarding\n")); /*XXX*/
return 1;
@@ -141,7 +138,6 @@
*/
error = ipsec_in_reject(sp, m);
KEY_FREESP(&sp);
- splx(s);
if (error) {
IPSTAT_INC(ips_cantforward);
return 1;
@@ -165,7 +161,7 @@
struct m_tag *mtag;
struct tdb_ident *tdbi;
struct secpolicy *sp;
- int s, error;
+ int error;
/*
* enforce IPsec policy checking if we are seeing last header.
* note that we do not visit this with protocols with pcb layer
@@ -179,7 +175,6 @@
* packet is returned to the ip input queue for delivery.
*/
mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
- s = splnet();
if (mtag != NULL) {
tdbi = (struct tdb_ident *)(mtag + 1);
sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
@@ -199,7 +194,6 @@
DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/
return 1;
}
- splx(s);
if (error)
return 1;
}
@@ -221,37 +215,7 @@
* tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
* XXX quickhack!!!
*/
- struct secpolicy *sp = NULL;
- int ipsecerror;
- int ipsechdr;
- struct route *ro;
- sp = ipsec_getpolicybyaddr(m,
- IPSEC_DIR_OUTBOUND,
- IP_FORWARDING,
- &ipsecerror);
- if (sp != NULL) {
- /* count IPsec header size */
- ipsechdr = ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, NULL);
-
- /*
- * find the correct route for outer IPv4
- * header, compute tunnel MTU.
- */
- if (sp->req != NULL &&
- sp->req->sav != NULL &&
- sp->req->sav->sah != NULL) {
- ro = &sp->req->sav->sah->route_cache.sa_route;
- if (ro->ro_rt && ro->ro_rt->rt_ifp) {
- mtu =
- ro->ro_rt->rt_rmx.rmx_mtu ?
- ro->ro_rt->rt_rmx.rmx_mtu :
- ro->ro_rt->rt_ifp->if_mtu;
- mtu -= ipsechdr;
- }
- }
- KEY_FREESP(&sp);
- }
- return mtu;
+ return (mtu - ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, NULL));
}
/*
@@ -265,10 +229,12 @@
{
#ifdef IPSEC
struct secpolicy *sp = NULL;
- struct ip *ip = mtod(*m, struct ip *);
struct tdb_ident *tdbi;
struct m_tag *mtag;
- int s;
+
+ if (!key_havesp(IPSEC_DIR_OUTBOUND))
+ return 0;
+
/*
* Check the security policy (SP) for the packet and, if
* required, do IPsec-related processing. There are two
@@ -279,7 +245,6 @@
* the lookup and related policy checking.
*/
mtag = m_tag_find(*m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
- s = splnet();
if (mtag != NULL) {
tdbi = (struct tdb_ident *)(mtag + 1);
sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND);
@@ -328,7 +293,6 @@
* done: below.
*/
KEY_FREESP(&sp), sp = NULL;
- splx(s);
goto done;
}
}
@@ -343,12 +307,12 @@
}
#ifdef SCTP
if ((*m)->m_pkthdr.csum_flags & CSUM_SCTP) {
+ struct ip *ip = mtod(*m, struct ip *);
+
sctp_delayed_cksum(*m, (uint32_t)(ip->ip_hl << 2));
(*m)->m_pkthdr.csum_flags &= ~CSUM_SCTP;
}
#endif
- ip->ip_len = htons(ip->ip_len);
- ip->ip_off = htons(ip->ip_off);
/* NB: callee frees mbuf */
*error = ipsec4_process_packet(*m, sp->req, *flags, 0);
@@ -359,8 +323,6 @@
* IPsec processing and return without error.
*/
*error = 0;
- ip->ip_len = ntohs(ip->ip_len);
- ip->ip_off = ntohs(ip->ip_off);
goto done;
}
/*
@@ -372,10 +334,8 @@
*/
if (*error == ENOENT)
*error = 0;
- splx(s);
goto reinjected;
} else { /* sp == NULL */
- splx(s);
if (*error != 0) {
/*
Modified: trunk/sys/netinet/ip_ipsec.h
===================================================================
--- trunk/sys/netinet/ip_ipsec.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_ipsec.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -27,7 +27,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/netinet/ip_ipsec.h 199102 2009-11-09 19:53:34Z trasz $
+ * $FreeBSD: stable/10/sys/netinet/ip_ipsec.h 199102 2009-11-09 19:53:34Z trasz $
*/
#ifndef _NETINET_IP_IPSEC_H_
Modified: trunk/sys/netinet/ip_mroute.c
===================================================================
--- trunk/sys/netinet/ip_mroute.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_mroute.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -68,7 +68,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/ip_mroute.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/ip_mroute.c 314667 2017-03-04 13:03:31Z avg $");
#include "opt_inet.h"
#include "opt_mrouting.h"
@@ -94,6 +94,7 @@
#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/time.h>
+#include <sys/counter.h>
#include <net/if.h>
#include <net/netisr.h>
@@ -146,11 +147,11 @@
static int ip_mrouter_cnt; /* # of vnets with active mrouters */
static int ip_mrouter_unloading; /* Allow no more V_ip_mrouter sockets */
-static VNET_DEFINE(struct mrtstat, mrtstat);
-#define V_mrtstat VNET(mrtstat)
-SYSCTL_VNET_STRUCT(_net_inet_ip, OID_AUTO, mrtstat, CTLFLAG_RW,
- &VNET_NAME(mrtstat), mrtstat,
- "IPv4 Multicast Forwarding Statistics (struct mrtstat, "
+static VNET_PCPUSTAT_DEFINE(struct mrtstat, mrtstat);
+VNET_PCPUSTAT_SYSINIT(mrtstat);
+VNET_PCPUSTAT_SYSUNINIT(mrtstat);
+SYSCTL_VNET_PCPUSTAT(_net_inet_ip, OID_AUTO, mrtstat, struct mrtstat,
+ mrtstat, "IPv4 Multicast Forwarding Statistics (struct mrtstat, "
"netinet/ip_mroute.h)");
static VNET_DEFINE(u_long, mfchash);
@@ -226,13 +227,13 @@
#define BW_UPCALLS_PERIOD (hz) /* periodical flush of bw upcalls */
-static VNET_DEFINE(struct pimstat, pimstat);
-#define V_pimstat VNET(pimstat)
+static VNET_PCPUSTAT_DEFINE(struct pimstat, pimstat);
+VNET_PCPUSTAT_SYSINIT(pimstat);
+VNET_PCPUSTAT_SYSUNINIT(pimstat);
SYSCTL_NODE(_net_inet, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM");
-SYSCTL_VNET_STRUCT(_net_inet_pim, PIMCTL_STATS, stats, CTLFLAG_RD,
- &VNET_NAME(pimstat), pimstat,
- "PIM Statistics (struct pimstat, netinet/pim_var.h)");
+SYSCTL_VNET_PCPUSTAT(_net_inet_pim, PIMCTL_STATS, stats, struct pimstat,
+ pimstat, "PIM Statistics (struct pimstat, netinet/pim_var.h)");
static u_long pim_squelch_wholepkt = 0;
SYSCTL_ULONG(_net_inet_pim, OID_AUTO, squelch_wholepkt, CTLFLAG_RW,
@@ -609,7 +610,7 @@
if_detached_event(void *arg __unused, struct ifnet *ifp)
{
vifi_t vifi;
- int i;
+ u_long i;
MROUTER_LOCK();
@@ -634,8 +635,8 @@
continue;
for (i = 0; i < mfchashsize; i++) {
struct mfc *rt, *nrt;
- for (rt = LIST_FIRST(&V_mfchashtbl[i]); rt; rt = nrt) {
- nrt = LIST_NEXT(rt, mfc_hash);
+
+ LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) {
if (rt->mfc_parent == vifi) {
expire_mfc(rt);
}
@@ -704,10 +705,9 @@
static int
X_ip_mrouter_done(void)
{
+ struct ifnet *ifp;
+ u_long i;
vifi_t vifi;
- int i;
- struct ifnet *ifp;
- struct ifreq ifr;
MROUTER_LOCK();
@@ -732,11 +732,6 @@
for (vifi = 0; vifi < V_numvifs; vifi++) {
if (!in_nullhost(V_viftable[vifi].v_lcl_addr) &&
!(V_viftable[vifi].v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) {
- struct sockaddr_in *so = (struct sockaddr_in *)&(ifr.ifr_addr);
-
- so->sin_len = sizeof(struct sockaddr_in);
- so->sin_family = AF_INET;
- so->sin_addr.s_addr = INADDR_ANY;
ifp = V_viftable[vifi].v_ifp;
if_allmulti(ifp, 0);
}
@@ -759,8 +754,8 @@
*/
for (i = 0; i < mfchashsize; i++) {
struct mfc *rt, *nrt;
- for (rt = LIST_FIRST(&V_mfchashtbl[i]); rt; rt = nrt) {
- nrt = LIST_NEXT(rt, mfc_hash);
+
+ LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) {
expire_mfc(rt);
}
}
@@ -803,7 +798,7 @@
int
set_api_config(uint32_t *apival)
{
- int i;
+ u_long i;
/*
* We can set the API capabilities only if it is the first operation
@@ -825,6 +820,7 @@
for (i = 0; i < mfchashsize; i++) {
if (LIST_FIRST(&V_mfchashtbl[i]) != NULL) {
+ MFC_UNLOCK();
*apival = 0;
return EPERM;
}
@@ -1307,7 +1303,7 @@
return ENOBUFS;
}
- mb0 = m_copypacket(m, M_DONTWAIT);
+ mb0 = m_copypacket(m, M_NOWAIT);
if (mb0 && (M_HASCL(mb0) || mb0->m_len < hlen))
mb0 = m_pullup(mb0, hlen);
if (mb0 == NULL) {
@@ -1438,7 +1434,7 @@
static void
expire_upcalls(void *arg)
{
- int i;
+ u_long i;
CURVNET_SET((struct vnet *) arg);
@@ -1450,9 +1446,7 @@
if (V_nexpire[i] == 0)
continue;
- for (rt = LIST_FIRST(&V_mfchashtbl[i]); rt; rt = nrt) {
- nrt = LIST_NEXT(rt, mfc_hash);
-
+ LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) {
if (TAILQ_EMPTY(&rt->mfc_stall))
continue;
@@ -1494,7 +1488,7 @@
{
struct ip *ip = mtod(m, struct ip *);
vifi_t vifi;
- int plen = ip->ip_len;
+ int plen = ntohs(ip->ip_len);
VIF_LOCK_ASSERT();
@@ -1670,7 +1664,7 @@
* the IP header is actually copied, not just referenced,
* so that ip_output() only scribbles on the copy.
*/
- mb_copy = m_copypacket(m, M_DONTWAIT);
+ mb_copy = m_copypacket(m, M_NOWAIT);
if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen))
mb_copy = m_pullup(mb_copy, hlen);
if (mb_copy == NULL)
@@ -2084,13 +2078,12 @@
* Allocate a new mbuf, initialize it with the header and
* the payload for the pending calls.
*/
- MGETHDR(m, M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL) {
log(LOG_WARNING, "bw_upcalls_send: cannot allocate mbuf\n");
return;
}
- m->m_len = m->m_pkthdr.len = 0;
m_copyback(m, 0, sizeof(struct igmpmsg), (caddr_t)&igmpmsg);
m_copyback(m, sizeof(struct igmpmsg), len, (caddr_t)&V_bw_upcalls[0]);
@@ -2385,7 +2378,7 @@
* Copy the old packet & pullup its IP header into the
* new mbuf so we can modify it.
*/
- mb_copy = m_copypacket(m, M_DONTWAIT);
+ mb_copy = m_copypacket(m, M_NOWAIT);
if (mb_copy == NULL)
return NULL;
mb_copy = m_pullup(mb_copy, ip->ip_hl << 2);
@@ -2399,15 +2392,14 @@
/* Compute the MTU after the PIM Register encapsulation */
mtu = 0xffff - sizeof(pim_encap_iphdr) - sizeof(pim_encap_pimhdr);
- if (ip->ip_len <= mtu) {
+ if (ntohs(ip->ip_len) <= mtu) {
/* Turn the IP header into a valid one */
- ip->ip_len = htons(ip->ip_len);
- ip->ip_off = htons(ip->ip_off);
ip->ip_sum = 0;
ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
} else {
/* Fragment the packet */
- if (ip_fragment(ip, &mb_copy, mtu, 0, CSUM_DELAY_IP) != 0) {
+ mb_copy->m_pkthdr.csum_flags |= CSUM_IP;
+ if (ip_fragment(ip, &mb_copy, mtu, 0) != 0) {
m_freem(mb_copy);
return NULL;
}
@@ -2432,7 +2424,7 @@
/*
* Add a new mbuf with an upcall header
*/
- MGETHDR(mb_first, M_DONTWAIT, MT_DATA);
+ mb_first = m_gethdr(M_NOWAIT, MT_DATA);
if (mb_first == NULL) {
m_freem(mb_copy);
return ENOBUFS;
@@ -2490,7 +2482,7 @@
/*
* Add a new mbuf with the encapsulating header
*/
- MGETHDR(mb_first, M_DONTWAIT, MT_DATA);
+ mb_first = m_gethdr(M_NOWAIT, MT_DATA);
if (mb_first == NULL) {
m_freem(mb_copy);
return ENOBUFS;
@@ -2507,7 +2499,8 @@
ip_outer = mtod(mb_first, struct ip *);
*ip_outer = pim_encap_iphdr;
ip_outer->ip_id = ip_newid();
- ip_outer->ip_len = len + sizeof(pim_encap_iphdr) + sizeof(pim_encap_pimhdr);
+ ip_outer->ip_len = htons(len + sizeof(pim_encap_iphdr) +
+ sizeof(pim_encap_pimhdr));
ip_outer->ip_src = V_viftable[vifi].v_lcl_addr;
ip_outer->ip_dst = rt->mfc_rp;
/*
@@ -2515,8 +2508,8 @@
* IP_DF bit.
*/
ip_outer->ip_tos = ip->ip_tos;
- if (ntohs(ip->ip_off) & IP_DF)
- ip_outer->ip_off |= IP_DF;
+ if (ip->ip_off & htons(IP_DF))
+ ip_outer->ip_off |= htons(IP_DF);
pimhdr = (struct pim_encap_pimhdr *)((caddr_t)ip_outer
+ sizeof(pim_encap_iphdr));
*pimhdr = pim_encap_pimhdr;
@@ -2564,14 +2557,13 @@
* is passed to if_simloop().
*/
void
-pim_input(struct mbuf *m, int off)
+pim_input(struct mbuf *m, int iphlen)
{
struct ip *ip = mtod(m, struct ip *);
struct pim *pim;
int minlen;
- int datalen = ip->ip_len;
+ int datalen = ntohs(ip->ip_len) - iphlen;
int ip_tos;
- int iphlen = off;
/* Keep statistics */
PIMSTAT_INC(pims_rcv_total_msgs);
@@ -2601,8 +2593,7 @@
* Get the IP and PIM headers in contiguous memory, and
* possibly the PIM REGISTER header.
*/
- if ((m->m_flags & M_EXT || m->m_len < minlen) &&
- (m = m_pullup(m, minlen)) == 0) {
+ if (m->m_len < minlen && (m = m_pullup(m, minlen)) == 0) {
CTR1(KTR_IPMF, "%s: m_pullup() failed", __func__);
return;
}
@@ -2817,9 +2808,9 @@
MALLOC(V_nexpire, u_char *, mfchashsize, M_MRTABLE, M_WAITOK|M_ZERO);
bzero(V_bw_meter_timers, sizeof(V_bw_meter_timers));
- callout_init(&V_expire_upcalls_ch, CALLOUT_MPSAFE);
- callout_init(&V_bw_upcalls_ch, CALLOUT_MPSAFE);
- callout_init(&V_bw_meter_ch, CALLOUT_MPSAFE);
+ callout_init(&V_expire_upcalls_ch, 1);
+ callout_init(&V_bw_upcalls_ch, 1);
+ callout_init(&V_bw_meter_ch, 1);
}
VNET_SYSINIT(vnet_mroute_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_mroute_init,
@@ -2847,7 +2838,8 @@
if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
if_detached_event, NULL, EVENTHANDLER_PRI_ANY);
if (if_detach_event_tag == NULL) {
- printf("ip_mroute: unable to ifnet_deperture_even handler\n");
+ printf("ip_mroute: unable to register "
+ "ifnet_departure_event handler\n");
MROUTER_LOCK_DESTROY();
return (EINVAL);
}
Modified: trunk/sys/netinet/ip_mroute.h
===================================================================
--- trunk/sys/netinet/ip_mroute.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_mroute.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
* SUCH DAMAGE.
*
* @(#)ip_mroute.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet/ip_mroute.h 240305 2012-09-10 11:38:02Z glebius $
+ * $FreeBSD: stable/10/sys/netinet/ip_mroute.h 253084 2013-07-09 09:50:15Z ae $
*/
#ifndef _NETINET_IP_MROUTE_H_
@@ -207,23 +207,24 @@
* The kernel's multicast routing statistics.
*/
struct mrtstat {
- u_long mrts_mfc_lookups; /* # forw. cache hash table hits */
- u_long mrts_mfc_misses; /* # forw. cache hash table misses */
- u_long mrts_upcalls; /* # calls to multicast routing daemon */
- u_long mrts_no_route; /* no route for packet's origin */
- u_long mrts_bad_tunnel; /* malformed tunnel options */
- u_long mrts_cant_tunnel; /* no room for tunnel options */
- u_long mrts_wrong_if; /* arrived on wrong interface */
- u_long mrts_upq_ovflw; /* upcall Q overflow */
- u_long mrts_cache_cleanups; /* # entries with no upcalls */
- u_long mrts_drop_sel; /* pkts dropped selectively */
- u_long mrts_q_overflow; /* pkts dropped - Q overflow */
- u_long mrts_pkt2large; /* pkts dropped - size > BKT SIZE */
- u_long mrts_upq_sockfull; /* upcalls dropped - socket full */
+ uint64_t mrts_mfc_lookups; /* # forw. cache hash table hits */
+ uint64_t mrts_mfc_misses; /* # forw. cache hash table misses */
+ uint64_t mrts_upcalls; /* # calls to multicast routing daemon */
+ uint64_t mrts_no_route; /* no route for packet's origin */
+ uint64_t mrts_bad_tunnel; /* malformed tunnel options */
+ uint64_t mrts_cant_tunnel; /* no room for tunnel options */
+ uint64_t mrts_wrong_if; /* arrived on wrong interface */
+ uint64_t mrts_upq_ovflw; /* upcall Q overflow */
+ uint64_t mrts_cache_cleanups; /* # entries with no upcalls */
+ uint64_t mrts_drop_sel; /* pkts dropped selectively */
+ uint64_t mrts_q_overflow; /* pkts dropped - Q overflow */
+ uint64_t mrts_pkt2large; /* pkts dropped - size > BKT SIZE */
+ uint64_t mrts_upq_sockfull; /* upcalls dropped - socket full */
};
#ifdef _KERNEL
-#define MRTSTAT_ADD(name, val) V_mrtstat.name += (val)
+#define MRTSTAT_ADD(name, val) \
+ VNET_PCPUSTAT_ADD(struct mrtstat, mrtstat, name, (val))
#define MRTSTAT_INC(name) MRTSTAT_ADD(name, 1)
#endif
Modified: trunk/sys/netinet/ip_options.c
===================================================================
--- trunk/sys/netinet/ip_options.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_options.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/ip_options.c 213832 2010-10-14 12:32:49Z bz $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/ip_options.c 272869 2014-10-09 23:46:17Z hrs $");
#include "opt_ipstealth.h"
@@ -66,18 +66,21 @@
#include <sys/socketvar.h>
-static int ip_dosourceroute = 0;
-SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
- &ip_dosourceroute, 0, "Enable forwarding source routed IP packets");
+static VNET_DEFINE(int, ip_dosourceroute);
+SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_dosourceroute), 0,
+ "Enable forwarding source routed IP packets");
+#define V_ip_dosourceroute VNET(ip_dosourceroute)
-static int ip_acceptsourceroute = 0;
+static VNET_DEFINE(int, ip_acceptsourceroute);
SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute,
- CTLFLAG_RW, &ip_acceptsourceroute, 0,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_acceptsourceroute), 0,
"Enable accepting source routed IP packets");
+#define V_ip_acceptsourceroute VNET(ip_acceptsourceroute)
-int ip_doopts = 1; /* 0 = ignore, 1 = process, 2 = reject */
-SYSCTL_INT(_net_inet_ip, OID_AUTO, process_options, CTLFLAG_RW,
- &ip_doopts, 0, "Enable IP options processing ([LS]SRR, RR, TS)");
+VNET_DEFINE(int, ip_doopts) = 1; /* 0 = ignore, 1 = process, 2 = reject */
+SYSCTL_INT(_net_inet_ip, OID_AUTO, process_options, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(ip_doopts), 0, "Enable IP options processing ([LS]SRR, RR, TS)");
static void save_rte(struct mbuf *m, u_char *, struct in_addr);
@@ -105,9 +108,9 @@
struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
/* Ignore or reject packets with IP options. */
- if (ip_doopts == 0)
+ if (V_ip_doopts == 0)
return 0;
- else if (ip_doopts == 2) {
+ else if (V_ip_doopts == 2) {
type = ICMP_UNREACH;
code = ICMP_UNREACH_FILTER_PROHIB;
goto bad;
@@ -168,7 +171,7 @@
code = ICMP_UNREACH_SRCFAIL;
goto bad;
}
- if (!ip_dosourceroute)
+ if (!V_ip_dosourceroute)
goto nosourcerouting;
/*
* Loose routing, and not at next destination
@@ -181,7 +184,7 @@
/*
* End of source route. Should be for us.
*/
- if (!ip_acceptsourceroute)
+ if (!V_ip_acceptsourceroute)
goto nosourcerouting;
save_rte(m, cp, ip->ip_src);
break;
@@ -190,7 +193,7 @@
if (V_ipstealth)
goto dropit;
#endif
- if (!ip_dosourceroute) {
+ if (!V_ip_dosourceroute) {
if (V_ipforwarding) {
char buf[16]; /* aaa.bbb.ccc.ddd\0 */
/*
@@ -412,7 +415,7 @@
if (opts->ip_nhops == 0)
return (NULL);
- m = m_get(M_DONTWAIT, MT_DATA);
+ m = m_get(M_NOWAIT, MT_DATA);
if (m == NULL)
return (NULL);
@@ -454,29 +457,23 @@
}
/*
- * Strip out IP options, at higher level protocol in the kernel. Second
- * argument is buffer to which options will be moved, and return value is
- * their length.
- *
- * XXX should be deleted; last arg currently ignored.
+ * Strip out IP options, at higher level protocol in the kernel.
*/
void
-ip_stripoptions(struct mbuf *m, struct mbuf *mopt)
+ip_stripoptions(struct mbuf *m)
{
- int i;
struct ip *ip = mtod(m, struct ip *);
- caddr_t opts;
int olen;
- olen = (ip->ip_hl << 2) - sizeof (struct ip);
- opts = (caddr_t)(ip + 1);
- i = m->m_len - (sizeof (struct ip) + olen);
- bcopy(opts + olen, opts, (unsigned)i);
+ olen = (ip->ip_hl << 2) - sizeof(struct ip);
m->m_len -= olen;
if (m->m_flags & M_PKTHDR)
m->m_pkthdr.len -= olen;
- ip->ip_v = IPVERSION;
+ ip->ip_len = htons(ntohs(ip->ip_len) - olen);
ip->ip_hl = sizeof(struct ip) >> 2;
+
+ bcopy((char *)ip + sizeof(struct ip) + olen, (ip + 1),
+ (size_t )(m->m_len - sizeof(struct ip)));
}
/*
@@ -495,7 +492,7 @@
unsigned optlen;
optlen = opt->m_len - sizeof(p->ipopt_dst);
- if (optlen + ip->ip_len > IP_MAXPACKET) {
+ if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET) {
*phlen = 0;
return (m); /* XXX should fail */
}
@@ -502,12 +499,12 @@
if (p->ipopt_dst.s_addr)
ip->ip_dst = p->ipopt_dst;
if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
- MGETHDR(n, M_DONTWAIT, MT_DATA);
+ n = m_gethdr(M_NOWAIT, MT_DATA);
if (n == NULL) {
*phlen = 0;
return (m);
}
- M_MOVE_PKTHDR(n, m);
+ m_move_pkthdr(n, m);
n->m_pkthdr.rcvif = NULL;
n->m_pkthdr.len += optlen;
m->m_len -= sizeof(struct ip);
@@ -528,7 +525,7 @@
*phlen = sizeof(struct ip) + optlen;
ip->ip_v = IPVERSION;
ip->ip_hl = *phlen >> 2;
- ip->ip_len += optlen;
+ ip->ip_len = htons(ntohs(ip->ip_len) + optlen);
return (m);
}
Modified: trunk/sys/netinet/ip_options.h
===================================================================
--- trunk/sys/netinet/ip_options.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_options.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -29,7 +29,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/netinet/ip_options.h 189343 2009-03-04 02:51:22Z bms $
+ * $FreeBSD: stable/10/sys/netinet/ip_options.h 272868 2014-10-09 23:45:26Z hrs $
*/
#ifndef _NETINET_IP_OPTIONS_H_
@@ -48,7 +48,8 @@
struct ipoptrt ip_srcrt;
};
-extern int ip_doopts; /* process or ignore IP options */
+VNET_DECLARE(int, ip_doopts); /* process or ignore IP options */
+#define V_ip_doopts VNET(ip_doopts)
int ip_checkrouteralert(struct mbuf *);
int ip_dooptions(struct mbuf *, int);
@@ -55,7 +56,7 @@
struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
int ip_optcopy(struct ip *, struct ip *);
int ip_pcbopts(struct inpcb *, int, struct mbuf *);
-void ip_stripoptions(struct mbuf *, struct mbuf *);
+void ip_stripoptions(struct mbuf *);
struct mbuf *ip_srcroute(struct mbuf *);
#endif /* !_NETINET_IP_OPTIONS_H_ */
Modified: trunk/sys/netinet/ip_output.c
===================================================================
--- trunk/sys/netinet/ip_output.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_output.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,13 +31,15 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/ip_output.c 243586 2012-11-27 01:59:51Z ae $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/ip_output.c 284496 2015-06-17 07:21:43Z hselasky $");
+#include "opt_inet.h"
#include "opt_ipfw.h"
#include "opt_ipsec.h"
-#include "opt_route.h"
+#include "opt_kdtrace.h"
#include "opt_mbuf_stress_test.h"
#include "opt_mpath.h"
+#include "opt_route.h"
#include "opt_sctp.h"
#include <sys/param.h>
@@ -48,6 +50,7 @@
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
+#include <sys/sdt.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
@@ -65,6 +68,7 @@
#include <net/vnet.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
@@ -103,7 +107,6 @@
/*
* IP output. The packet in mbuf chain m contains a skeletal IP
* header (with len, off, ttl, proto, tos, src, dst).
- * ip_len and ip_off are in host format.
* The mbuf chain containing the packet will be freed.
* The mbuf opt, if present, will not be freed.
* If route ro is present and has ro_rt initialized, route lookup would be
@@ -125,12 +128,15 @@
int n; /* scratchpad */
int error = 0;
struct sockaddr_in *dst;
+ const struct sockaddr_in *gw;
struct in_ifaddr *ia;
- int isbroadcast, sw_csum;
+ int isbroadcast;
+ uint16_t ip_len, ip_off;
struct route iproute;
struct rtentry *rte; /* cache for ro->ro_rt */
struct in_addr odst;
struct m_tag *fwd_tag = NULL;
+ int have_ia_ref;
#ifdef IPSEC
int no_route_but_check_spd = 0;
#endif
@@ -139,9 +145,9 @@
if (inp != NULL) {
INP_LOCK_ASSERT(inp);
M_SETFIB(m, inp->inp_inc.inc_fibnum);
- if (inp->inp_flags & (INP_HW_FLOWID|INP_SW_FLOWID)) {
+ if (inp->inp_flowtype != M_HASHTYPE_NONE) {
m->m_pkthdr.flowid = inp->inp_flowid;
- m->m_flags |= M_FLOWID;
+ M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
}
}
@@ -151,19 +157,8 @@
}
#ifdef FLOWTABLE
- if (ro->ro_rt == NULL) {
- struct flentry *fle;
-
- /*
- * The flow table returns route entries valid for up to 30
- * seconds; we rely on the remainder of ip_output() taking no
- * longer than that long for the stability of ro_rt. The
- * flow ID assignment must have happened before this point.
- */
- fle = flowtable_lookup_mbuf(V_ip_ft, m, AF_INET);
- if (fle != NULL)
- flow_to_route(fle, ro);
- }
+ if (ro->ro_rt == NULL)
+ (void )flowtable_lookup(AF_INET, m, ro);
#endif
if (opt) {
@@ -173,6 +168,8 @@
hlen = len; /* ip->ip_hl is updated above */
}
ip = mtod(m, struct ip *);
+ ip_len = ntohs(ip->ip_len);
+ ip_off = ntohs(ip->ip_off);
/*
* Fill in IP header. If we are not allowing fragmentation,
@@ -195,9 +192,17 @@
hlen = ip->ip_hl << 2;
}
+ /*
+ * dst/gw handling:
+ *
+ * dst can be rewritten but always point to &ro->ro_dst
+ * gw is readonly but can be pointed either to dst OR rt_gatewy
+ * therefore we need restore GW if we're re-doing lookup
+ */
+ gw = dst = (struct sockaddr_in *)&ro->ro_dst;
again:
- dst = (struct sockaddr_in *)&ro->ro_dst;
ia = NULL;
+ have_ia_ref = 0;
/*
* If there is a cached route,
* check that it is to the same destination
@@ -214,6 +219,7 @@
RO_RTFREE(ro);
ro->ro_lle = NULL;
rte = NULL;
+ gw = dst;
}
if (rte == NULL && fwd_tag == NULL) {
bzero(dst, sizeof(*dst));
@@ -234,6 +240,7 @@
error = ENETUNREACH;
goto bad;
}
+ have_ia_ref = 1;
ip->ip_dst.s_addr = INADDR_BROADCAST;
dst->sin_addr = ip->ip_dst;
ifp = ia->ia_ifp;
@@ -246,6 +253,7 @@
error = ENETUNREACH;
goto bad;
}
+ have_ia_ref = 1;
ifp = ia->ia_ifp;
ip->ip_ttl = 1;
isbroadcast = in_broadcast(dst->sin_addr, ifp);
@@ -257,6 +265,8 @@
*/
ifp = imo->imo_multicast_ifp;
IFP_TO_IA(ifp, ia);
+ if (ia)
+ have_ia_ref = 1;
isbroadcast = 0; /* fool gcc */
} else {
/*
@@ -292,15 +302,14 @@
goto bad;
}
ia = ifatoia(rte->rt_ifa);
- ifa_ref(&ia->ia_ifa);
ifp = rte->rt_ifp;
- rte->rt_rmx.rmx_pksent++;
+ counter_u64_add(rte->rt_pksent, 1);
if (rte->rt_flags & RTF_GATEWAY)
- dst = (struct sockaddr_in *)rte->rt_gateway;
+ gw = (struct sockaddr_in *)rte->rt_gateway;
if (rte->rt_flags & RTF_HOST)
isbroadcast = (rte->rt_flags & RTF_BROADCAST);
else
- isbroadcast = in_broadcast(dst->sin_addr, ifp);
+ isbroadcast = in_broadcast(gw->sin_addr, ifp);
}
/*
* Calculate MTU. If we have a route that is up, use that,
@@ -314,9 +323,9 @@
* them, there is no way for one to update all its
* routes when the MTU is changed.
*/
- if (rte->rt_rmx.rmx_mtu > ifp->if_mtu)
- rte->rt_rmx.rmx_mtu = ifp->if_mtu;
- mtu = rte->rt_rmx.rmx_mtu;
+ if (rte->rt_mtu > ifp->if_mtu)
+ rte->rt_mtu = ifp->if_mtu;
+ mtu = rte->rt_mtu;
} else {
mtu = ifp->if_mtu;
}
@@ -326,11 +335,11 @@
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
m->m_flags |= M_MCAST;
/*
- * IP destination address is multicast. Make sure "dst"
+ * IP destination address is multicast. Make sure "gw"
* still points to the address in "ro". (It may have been
* changed to point to a gateway address, above.)
*/
- dst = (struct sockaddr_in *)&ro->ro_dst;
+ gw = dst;
/*
* See if the caller provided any multicast options
*/
@@ -436,7 +445,7 @@
* packet or packet fragments, unless ALTQ is enabled on the given
* interface in which case packetdrop should be done by queueing.
*/
- n = ip->ip_len / mtu + 1; /* how many fragments ? */
+ n = ip_len / mtu + 1; /* how many fragments ? */
if (
#ifdef ALTQ
(!ALTQ_IS_ENABLED(&ifp->if_snd)) &&
@@ -463,7 +472,7 @@
goto bad;
}
/* don't allow broadcast messages to be fragmented */
- if (ip->ip_len > mtu) {
+ if (ip_len > mtu) {
error = EMSGSIZE;
goto bad;
}
@@ -530,7 +539,7 @@
error = netisr_queue(NETISR_IP, m);
goto done;
} else {
- if (ia != NULL)
+ if (have_ia_ref)
ifa_free(&ia->ia_ifa);
goto again; /* Redo the routing table lookup. */
}
@@ -558,12 +567,11 @@
/* Or forward to some other address? */
if ((m->m_flags & M_IP_NEXTHOP) &&
(fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
- dst = (struct sockaddr_in *)&ro->ro_dst;
bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
m->m_flags |= M_SKIP_FIREWALL;
m->m_flags &= ~M_IP_NEXTHOP;
m_tag_delete(m, fwd_tag);
- if (ia != NULL)
+ if (have_ia_ref)
ifa_free(&ia->ia_ifa);
goto again;
}
@@ -580,31 +588,29 @@
}
m->m_pkthdr.csum_flags |= CSUM_IP;
- sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist;
- if (sw_csum & CSUM_DELAY_DATA) {
+ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) {
in_delayed_cksum(m);
- sw_csum &= ~CSUM_DELAY_DATA;
+ m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
}
#ifdef SCTP
- if (sw_csum & CSUM_SCTP) {
+ if (m->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) {
sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
- sw_csum &= ~CSUM_SCTP;
+ m->m_pkthdr.csum_flags &= ~CSUM_SCTP;
}
#endif
- m->m_pkthdr.csum_flags &= ifp->if_hwassist;
/*
* If small enough for interface, or the interface will take
* care of the fragmentation for us, we can just send directly.
*/
- if (ip->ip_len <= mtu ||
+ if (ip_len <= mtu ||
(m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 ||
- ((ip->ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) {
- ip->ip_len = htons(ip->ip_len);
- ip->ip_off = htons(ip->ip_off);
+ ((ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) {
ip->ip_sum = 0;
- if (sw_csum & CSUM_DELAY_IP)
+ if (m->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) {
ip->ip_sum = in_cksum(m, hlen);
+ m->m_pkthdr.csum_flags &= ~CSUM_IP;
+ }
/*
* Record statistics for this interface address.
@@ -622,20 +628,21 @@
}
#ifdef MBUF_STRESS_TEST
if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
- m = m_fragment(m, M_DONTWAIT, mbuf_frag_size);
+ m = m_fragment(m, M_NOWAIT, mbuf_frag_size);
#endif
/*
* Reset layer specific mbuf flags
* to avoid confusing lower layers.
*/
- m->m_flags &= ~(M_PROTOFLAGS);
+ m_clrprotoflags(m);
+ IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
error = (*ifp->if_output)(ifp, m,
- (struct sockaddr *)dst, ro);
+ (const struct sockaddr *)gw, ro);
goto done;
}
/* Balk when DF bit is set or the interface didn't support TSO. */
- if ((ip->ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) {
+ if ((ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) {
error = EMSGSIZE;
IPSTAT_INC(ips_cantfrag);
goto bad;
@@ -645,7 +652,7 @@
* Too large for interface; fragment if possible. If successful,
* on return, m will point to a list of packets to be sent.
*/
- error = ip_fragment(ip, &m, mtu, ifp->if_hwassist, sw_csum);
+ error = ip_fragment(ip, &m, mtu, ifp->if_hwassist);
if (error)
goto bad;
for (; m; m = m0) {
@@ -661,10 +668,11 @@
* Reset layer specific mbuf flags
* to avoid confusing upper layers.
*/
- m->m_flags &= ~(M_PROTOFLAGS);
+ m_clrprotoflags(m);
+ IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
error = (*ifp->if_output)(ifp, m,
- (struct sockaddr *)dst, ro);
+ (const struct sockaddr *)gw, ro);
} else
m_freem(m);
}
@@ -675,7 +683,7 @@
done:
if (ro == &iproute)
RO_RTFREE(ro);
- if (ia != NULL)
+ if (have_ia_ref)
ifa_free(&ia->ia_ifa);
return (error);
bad:
@@ -690,11 +698,10 @@
* chain of fragments that should be freed by the caller.
*
* if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist)
- * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP).
*/
int
ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
- u_long if_hwassist_flags, int sw_csum)
+ u_long if_hwassist_flags)
{
int error = 0;
int hlen = ip->ip_hl << 2;
@@ -704,8 +711,12 @@
int firstlen;
struct mbuf **mnext;
int nfrags;
+ uint16_t ip_len, ip_off;
- if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */
+ ip_len = ntohs(ip->ip_len);
+ ip_off = ntohs(ip->ip_off);
+
+ if (ip_off & IP_DF) { /* Fragmentation not allowed */
IPSTAT_INC(ips_cantfrag);
return EMSGSIZE;
}
@@ -720,14 +731,12 @@
* If the interface will not calculate checksums on
* fragmented packets, then do it here.
*/
- if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
- (if_hwassist_flags & CSUM_IP_FRAGS) == 0) {
+ if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
in_delayed_cksum(m0);
m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
}
#ifdef SCTP
- if (m0->m_pkthdr.csum_flags & CSUM_SCTP &&
- (if_hwassist_flags & CSUM_IP_FRAGS) == 0) {
+ if (m0->m_pkthdr.csum_flags & CSUM_SCTP) {
sctp_delayed_cksum(m0, hlen);
m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
}
@@ -744,10 +753,8 @@
* be less than the receiver's page size ?
*/
int newlen;
- struct mbuf *m;
- for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next)
- off += m->m_len;
+ off = MIN(mtu, m0->m_pkthdr.len);
/*
* firstlen (off - hlen) must be aligned on an
@@ -779,19 +786,31 @@
* The fragments are linked off the m_nextpkt of the original
* packet, which after processing serves as the first fragment.
*/
- for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) {
+ for (nfrags = 1; off < ip_len; off += len, nfrags++) {
struct ip *mhip; /* ip header on the fragment */
struct mbuf *m;
int mhlen = sizeof (struct ip);
- MGETHDR(m, M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL) {
error = ENOBUFS;
IPSTAT_INC(ips_odropped);
goto done;
}
- m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
/*
+ * Make sure the complete packet header gets copied
+ * from the originating mbuf to the newly created
+ * mbuf. This also ensures that existing firewall
+ * classification(s), VLAN tags and so on get copied
+ * to the resulting fragmented packet(s):
+ */
+ if (m_dup_pkthdr(m, m0, M_NOWAIT) == 0) {
+ m_free(m);
+ error = ENOBUFS;
+ IPSTAT_INC(ips_odropped);
+ goto done;
+ }
+ /*
* In the first mbuf, leave room for the link header, then
* copy the original IP header including options. The payload
* goes into an additional mbuf chain returned by m_copym().
@@ -805,15 +824,14 @@
mhip->ip_hl = mhlen >> 2;
}
m->m_len = mhlen;
- /* XXX do we need to add ip->ip_off below ? */
- mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
- if (off + len >= ip->ip_len) { /* last fragment */
- len = ip->ip_len - off;
- m->m_flags |= M_LASTFRAG;
- } else
+ /* XXX do we need to add ip_off below ? */
+ mhip->ip_off = ((off - hlen) >> 3) + ip_off;
+ if (off + len >= ip_len)
+ len = ip_len - off;
+ else
mhip->ip_off |= IP_MF;
mhip->ip_len = htons((u_short)(len + mhlen));
- m->m_next = m_copym(m0, off, len, M_DONTWAIT);
+ m->m_next = m_copym(m0, off, len, M_NOWAIT);
if (m->m_next == NULL) { /* copy failed */
m_free(m);
error = ENOBUFS; /* ??? */
@@ -821,36 +839,33 @@
goto done;
}
m->m_pkthdr.len = mhlen + len;
- m->m_pkthdr.rcvif = NULL;
#ifdef MAC
mac_netinet_fragment(m0, m);
#endif
- m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
mhip->ip_off = htons(mhip->ip_off);
mhip->ip_sum = 0;
- if (sw_csum & CSUM_DELAY_IP)
+ if (m->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) {
mhip->ip_sum = in_cksum(m, mhlen);
+ m->m_pkthdr.csum_flags &= ~CSUM_IP;
+ }
*mnext = m;
mnext = &m->m_nextpkt;
}
IPSTAT_ADD(ips_ofragments, nfrags);
- /* set first marker for fragment chain */
- m0->m_flags |= M_FIRSTFRAG | M_FRAG;
- m0->m_pkthdr.csum_data = nfrags;
-
/*
* Update first fragment by trimming what's been copied out
* and updating header.
*/
- m_adj(m0, hlen + firstlen - ip->ip_len);
+ m_adj(m0, hlen + firstlen - ip_len);
m0->m_pkthdr.len = hlen + firstlen;
ip->ip_len = htons((u_short)m0->m_pkthdr.len);
- ip->ip_off |= IP_MF;
- ip->ip_off = htons(ip->ip_off);
+ ip->ip_off = htons(ip_off | IP_MF);
ip->ip_sum = 0;
- if (sw_csum & CSUM_DELAY_IP)
+ if (m0->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) {
ip->ip_sum = in_cksum(m0, hlen);
+ m0->m_pkthdr.csum_flags &= ~CSUM_IP;
+ }
done:
*m_frag = m0;
@@ -861,26 +876,23 @@
in_delayed_cksum(struct mbuf *m)
{
struct ip *ip;
- u_short csum, offset;
+ uint16_t csum, offset, ip_len;
ip = mtod(m, struct ip *);
offset = ip->ip_hl << 2 ;
- csum = in_cksum_skip(m, ip->ip_len, offset);
+ ip_len = ntohs(ip->ip_len);
+ csum = in_cksum_skip(m, ip_len, offset);
if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
csum = 0xffff;
offset += m->m_pkthdr.csum_data; /* checksum offset */
- if (offset + sizeof(u_short) > m->m_len) {
- printf("delayed m_pullup, m->len: %d off: %d p: %d\n",
- m->m_len, offset, ip->ip_p);
- /*
- * XXX
- * this shouldn't happen, but if it does, the
- * correct behavior may be to insert the checksum
- * in the appropriate next mbuf in the chain.
- */
- return;
+ /* find the mbuf in the chain where the checksum starts*/
+ while ((m != NULL) && (offset >= m->m_len)) {
+ offset -= m->m_len;
+ m = m->m_next;
}
+ KASSERT(m != NULL, ("in_delayed_cksum: checksum outside mbuf chain."));
+ KASSERT(offset + sizeof(u_short) <= m->m_len, ("in_delayed_cksum: checksum split between mbufs."));
*(u_short *)(m->m_data + offset) = csum;
}
@@ -902,13 +914,10 @@
switch (sopt->sopt_name) {
case SO_REUSEADDR:
INP_WLOCK(inp);
- if (IN_MULTICAST(ntohl(inp->inp_laddr.s_addr))) {
- if ((so->so_options &
- (SO_REUSEADDR | SO_REUSEPORT)) != 0)
- inp->inp_flags2 |= INP_REUSEPORT;
- else
- inp->inp_flags2 &= ~INP_REUSEPORT;
- }
+ if ((so->so_options & SO_REUSEADDR) != 0)
+ inp->inp_flags2 |= INP_REUSEADDR;
+ else
+ inp->inp_flags2 &= ~INP_REUSEADDR;
INP_WUNLOCK(inp);
error = 0;
break;
@@ -947,7 +956,7 @@
error = EMSGSIZE;
break;
}
- MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
+ m = m_get(sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
if (m == NULL) {
error = ENOBUFS;
break;
@@ -1285,7 +1294,7 @@
* Make a deep copy of the packet because we're going to
* modify the pack in order to generate checksums.
*/
- copym = m_dup(m, M_DONTWAIT);
+ copym = m_dup(m, M_NOWAIT);
if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
copym = m_pullup(copym, hlen);
if (copym != NULL) {
@@ -1302,8 +1311,6 @@
* than the interface's MTU. Can this possibly matter?
*/
ip = mtod(copym, struct ip *);
- ip->ip_len = htons(ip->ip_len);
- ip->ip_off = htons(ip->ip_off);
ip->ip_sum = 0;
ip->ip_sum = in_cksum(copym, hlen);
#if 1 /* XXX */
Modified: trunk/sys/netinet/ip_var.h
===================================================================
--- trunk/sys/netinet/ip_var.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/ip_var.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* @(#)ip_var.h 8.2 (Berkeley) 1/9/95
- * $FreeBSD: stable/9/sys/netinet/ip_var.h 243586 2012-11-27 01:59:51Z ae $
+ * $FreeBSD: stable/10/sys/netinet/ip_var.h 263307 2014-03-18 16:56:05Z glebius $
*/
#ifndef _NETINET_IP_VAR_H_
@@ -94,50 +94,54 @@
u_short imo_max_memberships; /* max memberships this socket */
struct in_multi **imo_membership; /* group memberships */
struct in_mfilter *imo_mfilters; /* source filters */
+ STAILQ_ENTRY(ip_moptions) imo_link;
};
struct ipstat {
- u_long ips_total; /* total packets received */
- u_long ips_badsum; /* checksum bad */
- u_long ips_tooshort; /* packet too short */
- u_long ips_toosmall; /* not enough data */
- u_long ips_badhlen; /* ip header length < data size */
- u_long ips_badlen; /* ip length < ip header length */
- u_long ips_fragments; /* fragments received */
- u_long ips_fragdropped; /* frags dropped (dups, out of space) */
- u_long ips_fragtimeout; /* fragments timed out */
- u_long ips_forward; /* packets forwarded */
- u_long ips_fastforward; /* packets fast forwarded */
- u_long ips_cantforward; /* packets rcvd for unreachable dest */
- u_long ips_redirectsent; /* packets forwarded on same net */
- u_long ips_noproto; /* unknown or unsupported protocol */
- u_long ips_delivered; /* datagrams delivered to upper level*/
- u_long ips_localout; /* total ip packets generated here */
- u_long ips_odropped; /* lost packets due to nobufs, etc. */
- u_long ips_reassembled; /* total packets reassembled ok */
- u_long ips_fragmented; /* datagrams successfully fragmented */
- u_long ips_ofragments; /* output fragments created */
- u_long ips_cantfrag; /* don't fragment flag was set, etc. */
- u_long ips_badoptions; /* error in option processing */
- u_long ips_noroute; /* packets discarded due to no route */
- u_long ips_badvers; /* ip version != 4 */
- u_long ips_rawout; /* total raw ip packets generated */
- u_long ips_toolong; /* ip length > max ip packet size */
- u_long ips_notmember; /* multicasts for unregistered grps */
- u_long ips_nogif; /* no match gif found */
- u_long ips_badaddr; /* invalid address on header */
+ uint64_t ips_total; /* total packets received */
+ uint64_t ips_badsum; /* checksum bad */
+ uint64_t ips_tooshort; /* packet too short */
+ uint64_t ips_toosmall; /* not enough data */
+ uint64_t ips_badhlen; /* ip header length < data size */
+ uint64_t ips_badlen; /* ip length < ip header length */
+ uint64_t ips_fragments; /* fragments received */
+ uint64_t ips_fragdropped; /* frags dropped (dups, out of space) */
+ uint64_t ips_fragtimeout; /* fragments timed out */
+ uint64_t ips_forward; /* packets forwarded */
+ uint64_t ips_fastforward; /* packets fast forwarded */
+ uint64_t ips_cantforward; /* packets rcvd for unreachable dest */
+ uint64_t ips_redirectsent; /* packets forwarded on same net */
+ uint64_t ips_noproto; /* unknown or unsupported protocol */
+ uint64_t ips_delivered; /* datagrams delivered to upper level*/
+ uint64_t ips_localout; /* total ip packets generated here */
+ uint64_t ips_odropped; /* lost packets due to nobufs, etc. */
+ uint64_t ips_reassembled; /* total packets reassembled ok */
+ uint64_t ips_fragmented; /* datagrams successfully fragmented */
+ uint64_t ips_ofragments; /* output fragments created */
+ uint64_t ips_cantfrag; /* don't fragment flag was set, etc. */
+ uint64_t ips_badoptions; /* error in option processing */
+ uint64_t ips_noroute; /* packets discarded due to no route */
+ uint64_t ips_badvers; /* ip version != 4 */
+ uint64_t ips_rawout; /* total raw ip packets generated */
+ uint64_t ips_toolong; /* ip length > max ip packet size */
+ uint64_t ips_notmember; /* multicasts for unregistered grps */
+ uint64_t ips_nogif; /* no match gif found */
+ uint64_t ips_badaddr; /* invalid address on header */
};
#ifdef _KERNEL
+#include <sys/counter.h>
#include <net/vnet.h>
+VNET_PCPUSTAT_DECLARE(struct ipstat, ipstat);
/*
* In-kernel consumers can use these accessor macros directly to update
* stats.
*/
-#define IPSTAT_ADD(name, val) V_ipstat.name += (val)
-#define IPSTAT_SUB(name, val) V_ipstat.name -= (val)
+#define IPSTAT_ADD(name, val) \
+ VNET_PCPUSTAT_ADD(struct ipstat, ipstat, name, (val))
+#define IPSTAT_SUB(name, val) IPSTAT_ADD(name, -(val))
#define IPSTAT_INC(name) IPSTAT_ADD(name, 1)
#define IPSTAT_DEC(name) IPSTAT_SUB(name, 1)
@@ -145,11 +149,11 @@
* Kernel module consumers must use this accessor macro.
*/
void kmod_ipstat_inc(int statnum);
-#define KMOD_IPSTAT_INC(name) \
- kmod_ipstat_inc(offsetof(struct ipstat, name) / sizeof(u_long))
+#define KMOD_IPSTAT_INC(name) \
+ kmod_ipstat_inc(offsetof(struct ipstat, name) / sizeof(uint64_t))
void kmod_ipstat_dec(int statnum);
-#define KMOD_IPSTAT_DEC(name) \
- kmod_ipstat_dec(offsetof(struct ipstat, name) / sizeof(u_long))
+#define KMOD_IPSTAT_DEC(name) \
+ kmod_ipstat_dec(offsetof(struct ipstat, name) / sizeof(uint64_t))
/* flags passed to ip_output as last parameter */
#define IP_FORWARDING 0x1 /* most of ip header exists */
@@ -159,12 +163,6 @@
#define IP_ROUTETOIF SO_DONTROUTE /* 0x10 bypass routing tables */
#define IP_ALLOWBROADCAST SO_BROADCAST /* 0x20 can send broadcast packets */
-/*
- * mbuf flag used by ip_fastfwd
- */
-#define M_FASTFWD_OURS M_PROTO1 /* changed dst to local */
-#define M_IP_NEXTHOP M_PROTO2 /* explicit ip nexthop */
-
#ifdef __NO_STRICT_ALIGNMENT
#define IP_HDR_ALIGNED_P(ip) 1
#else
@@ -176,7 +174,6 @@
struct route;
struct sockopt;
-VNET_DECLARE(struct ipstat, ipstat);
VNET_DECLARE(u_short, ip_id); /* ip packet ctr, for ids */
VNET_DECLARE(int, ip_defttl); /* default IP ttl */
VNET_DECLARE(int, ipforwarding); /* ip forwarding */
@@ -192,7 +189,6 @@
VNET_DECLARE(int, drop_redirect);
extern struct pr_usrreqs rip_usrreqs;
-#define V_ipstat VNET(ipstat)
#define V_ip_id VNET(ip_id)
#define V_ip_defttl VNET(ip_defttl)
#define V_ipforwarding VNET(ipforwarding)
@@ -211,7 +207,7 @@
int ip_ctloutput(struct socket *, struct sockopt *sopt);
void ip_drain(void);
int ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
- u_long if_hwassist_flags, int sw_csum);
+ u_long if_hwassist_flags);
void ip_forward(struct mbuf *m, int srcrt);
void ip_init(void);
#ifdef VIMAGE
@@ -295,9 +291,7 @@
struct ip_fw_args;
typedef int (*ip_fw_chk_ptr_t)(struct ip_fw_args *args);
typedef int (*ip_fw_ctl_ptr_t)(struct sockopt *);
-VNET_DECLARE(ip_fw_chk_ptr_t, ip_fw_chk_ptr);
VNET_DECLARE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr);
-#define V_ip_fw_chk_ptr VNET(ip_fw_chk_ptr)
#define V_ip_fw_ctl_ptr VNET(ip_fw_ctl_ptr)
/* Divert hooks. */
Modified: trunk/sys/netinet/khelp/h_ertt.c
===================================================================
--- trunk/sys/netinet/khelp/h_ertt.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/khelp/h_ertt.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2009-2010
* Swinburne University of Technology, Melbourne, Australia
@@ -37,7 +38,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/khelp/h_ertt.c 239474 2012-08-21 02:17:06Z lstewart $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/khelp/h_ertt.c 239346 2012-08-17 01:49:51Z lstewart $");
#include <sys/param.h>
#include <sys/kernel.h>
Modified: trunk/sys/netinet/khelp/h_ertt.h
===================================================================
--- trunk/sys/netinet/khelp/h_ertt.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/khelp/h_ertt.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2009-2010
* Swinburne University of Technology, Melbourne, Australia
@@ -30,7 +31,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/netinet/khelp/h_ertt.h 220560 2011-04-12 08:13:18Z lstewart $
+ * $FreeBSD: stable/10/sys/netinet/khelp/h_ertt.h 220560 2011-04-12 08:13:18Z lstewart $
*/
/*
Modified: trunk/sys/netinet/libalias/HISTORY
===================================================================
--- trunk/sys/netinet/libalias/HISTORY 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/HISTORY 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,4 +1,5 @@
-$FreeBSD: stable/9/sys/netinet/libalias/HISTORY 63899 2000-07-26 23:15:46Z archie $
+$MidnightBSD$
+$FreeBSD: stable/10/sys/netinet/libalias/HISTORY 63899 2000-07-26 23:15:46Z archie $
Version 1.0: August 11, 1996 (cjm)
Property changes on: trunk/sys/netinet/libalias/HISTORY
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/netinet/libalias/alias.c
===================================================================
--- trunk/sys/netinet/libalias/alias.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2001 Charles Mott <cm at linktel.net>
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/libalias/alias.c 214754 2010-11-03 21:10:12Z n_hibma $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/libalias/alias.c 318519 2017-05-19 07:31:48Z eugen $");
/*
Alias.c provides supervisory control for the functions of the
@@ -699,12 +700,14 @@
struct alias_link *lnk;
LIBALIAS_LOCK_ASSERT(la);
- (void)create;
/* Return if proxy-only mode is enabled */
if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY)
return (PKT_ALIAS_OK);
+ if (!create)
+ return (PKT_ALIAS_IGNORED);
+
lnk = FindProtoOut(la, *ip_src, ip_dst, ip_p);
if (lnk != NULL) {
struct in_addr alias_address;
@@ -1749,40 +1752,22 @@
struct mbuf *
m_megapullup(struct mbuf *m, int len) {
struct mbuf *mcl;
-
+
if (len > m->m_pkthdr.len)
goto bad;
-
- /* Do not reallocate packet if it is sequentional,
- * writable and has some extra space for expansion.
- * XXX: Constant 100bytes is completely empirical. */
-#define RESERVE 100
- if (m->m_next == NULL && M_WRITABLE(m) && M_TRAILINGSPACE(m) >= RESERVE)
+
+ if (m->m_next == NULL && M_WRITABLE(m))
return (m);
- if (len <= MCLBYTES - RESERVE) {
- mcl = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
- } else if (len < MJUM16BYTES) {
- int size;
- if (len <= MJUMPAGESIZE - RESERVE) {
- size = MJUMPAGESIZE;
- } else if (len <= MJUM9BYTES - RESERVE) {
- size = MJUM9BYTES;
- } else {
- size = MJUM16BYTES;
- };
- mcl = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, size);
- } else {
- goto bad;
- }
+ mcl = m_get2(len, M_NOWAIT, MT_DATA, M_PKTHDR);
if (mcl == NULL)
goto bad;
-
+ m_align(mcl, len);
m_move_pkthdr(mcl, m);
m_copydata(m, 0, len, mtod(mcl, caddr_t));
mcl->m_len = mcl->m_pkthdr.len = len;
m_freem(m);
-
+
return (mcl);
bad:
m_freem(m);
Modified: trunk/sys/netinet/libalias/alias.h
===================================================================
--- trunk/sys/netinet/libalias/alias.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/* lint -save -library Flexelint comment for external headers */
/*-
@@ -25,7 +26,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/netinet/libalias/alias.h 223080 2011-06-14 13:35:24Z ae $
+ * $FreeBSD: stable/10/sys/netinet/libalias/alias.h 223080 2011-06-14 13:35:24Z ae $
*/
/*
Modified: trunk/sys/netinet/libalias/alias_cuseeme.c
===================================================================
--- trunk/sys/netinet/libalias/alias_cuseeme.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_cuseeme.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1998 Brian Somers <brian at Awfulhak.org>
* with the aid of code written by
@@ -27,7 +28,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/libalias/alias_cuseeme.c 190841 2009-04-08 11:56:49Z piso $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/libalias/alias_cuseeme.c 190841 2009-04-08 11:56:49Z piso $");
#ifdef _KERNEL
#include <sys/param.h>
Modified: trunk/sys/netinet/libalias/alias_db.c
===================================================================
--- trunk/sys/netinet/libalias/alias_db.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_db.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2001 Charles Mott <cm at linktel.net>
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/libalias/alias_db.c 223437 2011-06-22 20:00:27Z ae $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/libalias/alias_db.c 248158 2013-03-11 12:22:44Z glebius $");
/*
Alias_db.c encapsulates all data structures used for storing
@@ -2729,7 +2730,6 @@
InitPunchFW(struct libalias *la)
{
- LIBALIAS_LOCK_ASSERT(la);
la->fireWallField = malloc(la->fireWallNumNums);
if (la->fireWallField) {
memset(la->fireWallField, 0, la->fireWallNumNums);
@@ -2745,7 +2745,6 @@
UninitPunchFW(struct libalias *la)
{
- LIBALIAS_LOCK_ASSERT(la);
ClearAllFWHoles(la);
if (la->fireWallFD >= 0)
close(la->fireWallFD);
@@ -2765,7 +2764,6 @@
struct ip_fw rule; /* On-the-fly built rule */
int fwhole; /* Where to punch hole */
- LIBALIAS_LOCK_ASSERT(la);
la = lnk->la;
/* Don't do anything unless we are asked to */
@@ -2839,7 +2837,6 @@
{
struct libalias *la;
- LIBALIAS_LOCK_ASSERT(la);
la = lnk->la;
if (lnk->link_type == LINK_TCP) {
int fwhole = lnk->data.tcp->fwhole; /* Where is the firewall
@@ -2864,7 +2861,6 @@
struct ip_fw rule; /* On-the-fly built rule */
int i;
- LIBALIAS_LOCK_ASSERT(la);
if (la->fireWallFD < 0)
return;
@@ -2878,7 +2874,7 @@
memset(la->fireWallField, 0, la->fireWallNumNums);
}
-#endif
+#endif /* !NO_FW_PUNCH */
void
LibAliasSetFWBase(struct libalias *la, unsigned int base, unsigned int num)
Modified: trunk/sys/netinet/libalias/alias_dummy.c
===================================================================
--- trunk/sys/netinet/libalias/alias_dummy.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_dummy.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2005 Paolo Pisati <piso at FreeBSD.org>
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/libalias/alias_dummy.c 190841 2009-04-08 11:56:49Z piso $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/libalias/alias_dummy.c 190841 2009-04-08 11:56:49Z piso $");
/*
* Alias_dummy is just an empty skeleton used to demostrate how to write
Modified: trunk/sys/netinet/libalias/alias_ftp.c
===================================================================
--- trunk/sys/netinet/libalias/alias_ftp.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_ftp.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2001 Charles Mott <cm at linktel.net>
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/libalias/alias_ftp.c 223437 2011-06-22 20:00:27Z ae $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/libalias/alias_ftp.c 223437 2011-06-22 20:00:27Z ae $");
/*
Alias_ftp.c performs special processing for FTP sessions under
Modified: trunk/sys/netinet/libalias/alias_irc.c
===================================================================
--- trunk/sys/netinet/libalias/alias_irc.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_irc.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2001 Charles Mott <cm at linktel.net>
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/libalias/alias_irc.c 190841 2009-04-08 11:56:49Z piso $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/libalias/alias_irc.c 190841 2009-04-08 11:56:49Z piso $");
/* Alias_irc.c intercepts packages contain IRC CTCP commands, and
changes DCC commands to export a port on the aliasing host instead
Modified: trunk/sys/netinet/libalias/alias_local.h
===================================================================
--- trunk/sys/netinet/libalias/alias_local.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_local.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2001 Charles Mott <cm at linktel.net>
* All rights reserved.
@@ -23,7 +24,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/netinet/libalias/alias_local.h 223437 2011-06-22 20:00:27Z ae $
+ * $FreeBSD: stable/10/sys/netinet/libalias/alias_local.h 223437 2011-06-22 20:00:27Z ae $
*/
/*
Modified: trunk/sys/netinet/libalias/alias_mod.c
===================================================================
--- trunk/sys/netinet/libalias/alias_mod.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_mod.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2005 Paolo Pisati <piso at FreeBSD.org>
* All rights reserved.
@@ -25,7 +26,7 @@
*
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/libalias/alias_mod.c 201758 2010-01-07 21:01:37Z mbr $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/libalias/alias_mod.c 201758 2010-01-07 21:01:37Z mbr $");
#ifdef _KERNEL
#include <sys/libkern.h>
Modified: trunk/sys/netinet/libalias/alias_mod.h
===================================================================
--- trunk/sys/netinet/libalias/alias_mod.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_mod.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2005 Paolo Pisati <piso at FreeBSD.org>
* All rights reserved.
@@ -23,7 +24,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/netinet/libalias/alias_mod.h 190841 2009-04-08 11:56:49Z piso $
+ * $FreeBSD: stable/10/sys/netinet/libalias/alias_mod.h 190841 2009-04-08 11:56:49Z piso $
*/
/*
Modified: trunk/sys/netinet/libalias/alias_nbt.c
===================================================================
--- trunk/sys/netinet/libalias/alias_nbt.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_nbt.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Written by Atsushi Murai <amurai at spec.co.jp>
* Copyright (c) 1998, System Planning and Engineering Co.
@@ -29,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/libalias/alias_nbt.c 190938 2009-04-11 15:19:09Z piso $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/libalias/alias_nbt.c 190938 2009-04-11 15:19:09Z piso $");
/*
alias_nbt.c performs special processing for NetBios over TCP/IP
Modified: trunk/sys/netinet/libalias/alias_old.c
===================================================================
--- trunk/sys/netinet/libalias/alias_old.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_old.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2004 Poul-Henning Kamp <phk at FreeBSD.org>
* All rights reserved.
Modified: trunk/sys/netinet/libalias/alias_pptp.c
===================================================================
--- trunk/sys/netinet/libalias/alias_pptp.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_pptp.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*
* alias_pptp.c
*
@@ -37,7 +38,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/libalias/alias_pptp.c 190841 2009-04-08 11:56:49Z piso $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/libalias/alias_pptp.c 190841 2009-04-08 11:56:49Z piso $");
/* Includes */
#ifdef _KERNEL
Modified: trunk/sys/netinet/libalias/alias_proxy.c
===================================================================
--- trunk/sys/netinet/libalias/alias_proxy.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_proxy.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2001 Charles Mott <cm at linktel.net>
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/libalias/alias_proxy.c 243282 2012-11-19 15:26:03Z emaste $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/libalias/alias_proxy.c 241648 2012-10-17 20:23:07Z emaste $");
/* file: alias_proxy.c
Modified: trunk/sys/netinet/libalias/alias_sctp.c
===================================================================
--- trunk/sys/netinet/libalias/alias_sctp.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_sctp.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2008
* Swinburne University of Technology, Melbourne, Australia.
@@ -68,7 +69,7 @@
* - Dynamic control of hash-table size
*/
-/* $FreeBSD: stable/9/sys/netinet/libalias/alias_sctp.c 249132 2013-04-05 08:22:11Z mav $ */
+/* $FreeBSD: stable/10/sys/netinet/libalias/alias_sctp.c 332284 2018-04-08 16:29:24Z tuexen $ */
#ifdef _KERNEL
#include <machine/stdarg.h>
@@ -185,7 +186,7 @@
/* Use kernel allocator. */
#ifdef _SYS_MALLOC_H_
#define sn_malloc(x) malloc(x, M_SCTPNAT, M_NOWAIT|M_ZERO)
-#define sn_calloc(n,x) sn_malloc(x * n)
+#define sn_calloc(n,x) sn_malloc((x) * (n))
#define sn_free(x) free(x, M_SCTPNAT)
#endif// #ifdef _SYS_MALLOC_H_
@@ -420,9 +421,9 @@
error = sysctl_handle_int(oidp, &level, 0, req);
if (error) return (error);
- sysctl_log_level = (level > SN_LOG_DEBUG_MAX)?(SN_LOG_DEBUG_MAX):(level);
- sysctl_log_level = (level < SN_LOG_LOW)?(SN_LOG_LOW):(level);
-
+ level = (level > SN_LOG_DEBUG_MAX)?(SN_LOG_DEBUG_MAX):(level);
+ level = (level < SN_LOG_LOW)?(SN_LOG_LOW):(level);
+ sysctl_log_level = level;
return (0);
}
@@ -2114,13 +2115,13 @@
* @return pointer to association or NULL
*/
static struct sctp_nat_assoc*
-FindSctpLocalT(struct libalias *la, struct in_addr g_addr, uint32_t l_vtag, uint16_t g_port, uint16_t l_port)
+FindSctpLocalT(struct libalias *la, struct in_addr g_addr, uint32_t l_vtag, uint16_t g_port, uint16_t l_port)
{
u_int i;
struct sctp_nat_assoc *assoc = NULL, *lastmatch = NULL;
struct sctp_GlobalAddress *G_Addr = NULL;
int cnt = 0;
-
+
if (l_vtag != 0) { /* an init packet, vtag==0 */
i = SN_TABLE_HASH(l_vtag, g_port, la->sctpNatTableSize);
LIST_FOREACH(assoc, &la->sctpTableGlobal[i], list_G) {
@@ -2127,11 +2128,11 @@
if ((assoc->g_vtag == l_vtag) && (assoc->g_port == g_port) && (assoc->l_port == l_port)) {
if (assoc->num_Gaddr) {
LIST_FOREACH(G_Addr, &(assoc->Gaddr), list_Gaddr) {
- if(G_Addr->g_addr.s_addr == G_Addr->g_addr.s_addr)
- return(assoc); /* full match */
+ if (G_Addr->g_addr.s_addr == g_addr.s_addr)
+ return (assoc); /* full match */
}
} else {
- if (++cnt > 1) return(NULL);
+ if (++cnt > 1) return (NULL);
lastmatch = assoc;
}
}
@@ -2138,7 +2139,7 @@
}
}
/* If there is more than one match we do not know which local address to send to */
- return( cnt ? lastmatch : NULL );
+ return (cnt ? lastmatch : NULL);
}
/** @ingroup Hash
Modified: trunk/sys/netinet/libalias/alias_sctp.h
===================================================================
--- trunk/sys/netinet/libalias/alias_sctp.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_sctp.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2008
* Swinburne University of Technology, Melbourne, Australia.
@@ -45,7 +46,7 @@
*
*/
-/* $FreeBSD: stable/9/sys/netinet/libalias/alias_sctp.h 222809 2011-06-07 06:57:22Z ae $ */
+/* $FreeBSD: stable/10/sys/netinet/libalias/alias_sctp.h 235644 2012-05-19 05:14:24Z marcel $ */
#ifndef _ALIAS_SCTP_H_
#define _ALIAS_SCTP_H_
@@ -92,7 +93,6 @@
#ifndef _KERNEL
#include <stdlib.h>
#include <stdio.h>
-#include <curses.h>
#endif //#ifdef _KERNEL
Modified: trunk/sys/netinet/libalias/alias_skinny.c
===================================================================
--- trunk/sys/netinet/libalias/alias_skinny.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_skinny.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* alias_skinny.c
*
@@ -27,7 +28,7 @@
*
* Author: Joe Marcus Clarke <marcus at FreeBSD.org>
*
- * $FreeBSD: stable/9/sys/netinet/libalias/alias_skinny.c 190841 2009-04-08 11:56:49Z piso $
+ * $FreeBSD: stable/10/sys/netinet/libalias/alias_skinny.c 240725 2012-09-20 03:29:43Z kevlo $
*/
#ifdef _KERNEL
@@ -340,7 +341,7 @@
* through the packet using len to determine message boundaries.
* This comes into play big time with port messages being in the
* same packet as register messages. Also, open receive channel
- * acks are usually buried in a pakcet some 400 bytes long.
+ * acks are usually buried in a packet some 400 bytes long.
*/
while (dlen >= skinny_hdr_len) {
len = (sd->len);
Modified: trunk/sys/netinet/libalias/alias_smedia.c
===================================================================
--- trunk/sys/netinet/libalias/alias_smedia.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_smedia.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*
* alias_smedia.c
*
@@ -64,7 +65,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/libalias/alias_smedia.c 190841 2009-04-08 11:56:49Z piso $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/libalias/alias_smedia.c 190841 2009-04-08 11:56:49Z piso $");
/*
Alias_smedia.c is meant to contain the aliasing code for streaming media
Modified: trunk/sys/netinet/libalias/alias_util.c
===================================================================
--- trunk/sys/netinet/libalias/alias_util.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/alias_util.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2001 Charles Mott <cm at linktel.net>
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/libalias/alias_util.c 174348 2007-12-06 09:31:13Z des $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/libalias/alias_util.c 174348 2007-12-06 09:31:13Z des $");
/*
Modified: trunk/sys/netinet/libalias/libalias.3
===================================================================
--- trunk/sys/netinet/libalias/libalias.3 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/libalias/libalias.3 2018-05-25 13:10:11 UTC (rev 9927)
@@ -23,7 +23,8 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.\" $FreeBSD: stable/9/sys/netinet/libalias/libalias.3 240059 2012-09-02 18:54:51Z eadler $
+.\" $FreeBSD: stable/10/sys/netinet/libalias/libalias.3 246144 2013-01-31 10:29:22Z glebius $
+.\" $MidnightBSD$
.\"
.Dd July 4, 2011
.Dt LIBALIAS 3
@@ -201,11 +202,10 @@
If this mode bit is set, traffic on the local network which does not
originate from unregistered address spaces will be ignored.
Standard Class A, B and C unregistered addresses are:
-.Bd -literal -offset indent
+.Pp
10.0.0.0 -> 10.255.255.255 (Class A subnet)
172.16.0.0 -> 172.31.255.255 (Class B subnets)
192.168.0.0 -> 192.168.255.255 (Class C subnets)
-.Ed
.Pp
This option is useful in the case that the packet aliasing host has both
registered and unregistered subnets on different interfaces.
@@ -499,7 +499,7 @@
New traffic generated by any of the local machines, designated in the
several function calls, will be aliased to the same address.
Consider the following example:
-.Bd -literal -offset indent
+.Pp
LibAliasRedirectAddr(la, inet_aton("192.168.0.2"),
inet_aton("141.221.254.101"));
LibAliasRedirectAddr(la, inet_aton("192.168.0.3"),
@@ -506,7 +506,6 @@
inet_aton("141.221.254.101"));
LibAliasRedirectAddr(la, inet_aton("192.168.0.4"),
inet_aton("141.221.254.101"));
-.Ed
.Pp
Any outgoing connections such as
.Xr telnet 1
@@ -903,39 +902,6 @@
This function can be used if an already-aliased packet needs to have its
original IP header restored for further processing (e.g.\& logging).
.Ed
-.Sh AUTHORS
-.An Charles Mott Aq cm at linktel.net ,
-versions 1.0 - 1.8, 2.0 - 2.4.
-.An Eivind Eklund Aq eivind at FreeBSD.org ,
-versions 1.8b, 1.9 and 2.5.
-Added IRC DCC support as well as contributing a number of architectural
-improvements; added the firewall bypass for FTP/IRC DCC.
-.An Erik Salander Aq erik at whistle.com
-added support for PPTP and RTSP.
-.An Junichi Satoh Aq junichi at junichi.org
-added support for RTSP/PNA.
-.An Ruslan Ermilov Aq ru at FreeBSD.org
-added support for PPTP and LSNAT as well as general hacking.
-.An Paolo Pisati Aq piso at FreeBSD.org
-made the library modular, moving support for all
-protocols (except for IP, TCP and UDP) to external modules.
-.Sh ACKNOWLEDGEMENTS
-Listed below, in approximate chronological order, are individuals who
-have provided valuable comments and/or debugging assistance.
-.Bd -ragged -offset indent
-.An -split
-.An Gary Roberts
-.An Tom Torrance
-.An Reto Burkhalter
-.An Martin Renters
-.An Brian Somers
-.An Paul Traina
-.An Ari Suutari
-.An Dave Remien
-.An J. Fortes
-.An Andrzej Bialecki
-.An Gordon Burditt
-.Ed
.Sh CONCEPTUAL BACKGROUND
This section is intended for those who are planning to modify the source
code or want to create somewhat esoteric applications using the packet
@@ -1477,3 +1443,38 @@
facility and the
.Dv LOG_INFO
level.
+.Sh AUTHORS
+.An Charles Mott Aq cm at linktel.net ,
+versions 1.0 - 1.8, 2.0 - 2.4.
+.An Eivind Eklund Aq eivind at FreeBSD.org ,
+versions 1.8b, 1.9 and 2.5.
+Added IRC DCC support as well as contributing a number of architectural
+improvements; added the firewall bypass for FTP/IRC DCC.
+.An Erik Salander Aq erik at whistle.com
+added support for PPTP and RTSP.
+.An Junichi Satoh Aq junichi at junichi.org
+added support for RTSP/PNA.
+.An Ruslan Ermilov Aq ru at FreeBSD.org
+added support for PPTP and LSNAT as well as general hacking.
+.An Gleb Smirnoff Aq glebius at FreeBSD.org
+ported the library to kernel space.
+.An Paolo Pisati Aq piso at FreeBSD.org
+made the library modular, moving support for all
+protocols (except for IP, TCP and UDP) to external modules.
+.Sh ACKNOWLEDGEMENTS
+Listed below, in approximate chronological order, are individuals who
+have provided valuable comments and/or debugging assistance.
+.Bd -ragged -offset indent
+.An -split
+.An Gary Roberts
+.An Tom Torrance
+.An Reto Burkhalter
+.An Martin Renters
+.An Brian Somers
+.An Paul Traina
+.An Ari Suutari
+.An Dave Remien
+.An J. Fortes
+.An Andrzej Bialecki
+.An Gordon Burditt
+.Ed
Property changes on: trunk/sys/netinet/libalias/libalias.3
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Modified: trunk/sys/netinet/pim.h
===================================================================
--- trunk/sys/netinet/pim.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/pim.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/netinet/pim.h 139823 2005-01-07 01:45:51Z imp $
+ * $FreeBSD: stable/10/sys/netinet/pim.h 139823 2005-01-07 01:45:51Z imp $
*/
#ifndef _NETINET_PIM_H_
Modified: trunk/sys/netinet/pim_var.h
===================================================================
--- trunk/sys/netinet/pim_var.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/pim_var.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/netinet/pim_var.h 208744 2010-06-02 15:44:43Z zec $
+ * $FreeBSD: stable/10/sys/netinet/pim_var.h 254925 2013-08-26 18:16:05Z jhb $
*/
#ifndef _NETINET_PIM_VAR_H_
@@ -47,21 +47,22 @@
* PIM statistics kept in the kernel
*/
struct pimstat {
- u_quad_t pims_rcv_total_msgs; /* total PIM messages received */
- u_quad_t pims_rcv_total_bytes; /* total PIM bytes received */
- u_quad_t pims_rcv_tooshort; /* rcvd with too few bytes */
- u_quad_t pims_rcv_badsum; /* rcvd with bad checksum */
- u_quad_t pims_rcv_badversion; /* rcvd bad PIM version */
- u_quad_t pims_rcv_registers_msgs; /* rcvd regs. msgs (data only) */
- u_quad_t pims_rcv_registers_bytes; /* rcvd regs. bytes (data only) */
- u_quad_t pims_rcv_registers_wrongiif; /* rcvd regs. on wrong iif */
- u_quad_t pims_rcv_badregisters; /* rcvd invalid registers */
- u_quad_t pims_snd_registers_msgs; /* sent regs. msgs (data only) */
- u_quad_t pims_snd_registers_bytes; /* sent regs. bytes (data only) */
+ uint64_t pims_rcv_total_msgs; /* total PIM messages received */
+ uint64_t pims_rcv_total_bytes; /* total PIM bytes received */
+ uint64_t pims_rcv_tooshort; /* rcvd with too few bytes */
+ uint64_t pims_rcv_badsum; /* rcvd with bad checksum */
+ uint64_t pims_rcv_badversion; /* rcvd bad PIM version */
+ uint64_t pims_rcv_registers_msgs; /* rcvd regs. msgs (data only) */
+ uint64_t pims_rcv_registers_bytes; /* rcvd regs. bytes (data only) */
+ uint64_t pims_rcv_registers_wrongiif; /* rcvd regs. on wrong iif */
+ uint64_t pims_rcv_badregisters; /* rcvd invalid registers */
+ uint64_t pims_snd_registers_msgs; /* sent regs. msgs (data only) */
+ uint64_t pims_snd_registers_bytes; /* sent regs. bytes (data only) */
};
#ifdef _KERNEL
-#define PIMSTAT_ADD(name, val) V_pimstat.name += (val)
+#define PIMSTAT_ADD(name, val) \
+ VNET_PCPUSTAT_ADD(struct pimstat, pimstat, name, (val))
#define PIMSTAT_INC(name) PIMSTAT_ADD(name, 1)
#endif
@@ -71,11 +72,6 @@
#define PIMCTL_STATS 1 /* statistics (read-only) */
#define PIMCTL_MAXID 2
-#define PIMCTL_NAMES { \
- { 0, 0 }, \
- { "stats", CTLTYPE_STRUCT }, \
-}
-
#ifdef _KERNEL
void pim_input(struct mbuf *, int);
Modified: trunk/sys/netinet/raw_ip.c
===================================================================
--- trunk/sys/netinet/raw_ip.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/raw_ip.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/raw_ip.c 227423 2011-11-10 19:10:53Z andre $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/raw_ip.c 266718 2014-05-26 22:54:15Z smh $");
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -101,9 +101,6 @@
int (*ng_ipfw_input_p)(struct mbuf **, int,
struct ip_fw_args *, int);
-/* Hook for telling pf that the destination address changed */
-void (*m_addr_chg_pf_p)(struct mbuf *m);
-
#ifdef INET
/*
* Hooks for multicast routing. They all default to NULL, so leave them not
@@ -290,6 +287,11 @@
last = NULL;
ifp = m->m_pkthdr.rcvif;
+ /*
+ * Applications on raw sockets expect host byte order.
+ */
+ ip->ip_len = ntohs(ip->ip_len);
+ ip->ip_off = ntohs(ip->ip_off);
hash = INP_PCBHASH_RAW(proto, ip->ip_src.s_addr,
ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask);
@@ -438,7 +440,7 @@
m_freem(m);
return(EMSGSIZE);
}
- M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
+ M_PREPEND(m, sizeof(struct ip), M_NOWAIT);
if (m == NULL)
return(ENOBUFS);
@@ -446,25 +448,26 @@
ip = mtod(m, struct ip *);
ip->ip_tos = inp->inp_ip_tos;
if (inp->inp_flags & INP_DONTFRAG)
- ip->ip_off = IP_DF;
+ ip->ip_off = htons(IP_DF);
else
- ip->ip_off = 0;
+ ip->ip_off = htons(0);
ip->ip_p = inp->inp_ip_p;
- ip->ip_len = m->m_pkthdr.len;
+ ip->ip_len = htons(m->m_pkthdr.len);
ip->ip_src = inp->inp_laddr;
+ ip->ip_dst.s_addr = dst;
if (jailed(inp->inp_cred)) {
/*
* prison_local_ip4() would be good enough but would
* let a source of INADDR_ANY pass, which we do not
- * want to see from jails. We do not go through the
- * pain of in_pcbladdr() for raw sockets.
+ * want to see from jails.
*/
- if (ip->ip_src.s_addr == INADDR_ANY)
- error = prison_get_ip4(inp->inp_cred,
- &ip->ip_src);
- else
+ if (ip->ip_src.s_addr == INADDR_ANY) {
+ error = in_pcbladdr(inp, &ip->ip_dst, &ip->ip_src,
+ inp->inp_cred);
+ } else {
error = prison_local_ip4(inp->inp_cred,
&ip->ip_src);
+ }
if (error != 0) {
INP_RUNLOCK(inp);
m_freem(m);
@@ -471,7 +474,6 @@
return (error);
}
}
- ip->ip_dst.s_addr = dst;
ip->ip_ttl = inp->inp_ip_ttl;
} else {
if (m->m_pkthdr.len > IP_MAXPACKET) {
@@ -502,6 +504,13 @@
ip->ip_id = ip_newid();
/*
+ * Applications on raw sockets pass us packets
+ * in host byte order.
+ */
+ ip->ip_len = htons(ip->ip_len);
+ ip->ip_off = htons(ip->ip_off);
+
+ /*
* XXX prevent ip_output from overwriting header fields.
*/
flags |= IP_RAWOUTPUT;
@@ -538,6 +547,8 @@
*
* When adding new socket options here, make sure to add access control
* checks here as necessary.
+ *
+ * XXX-BZ inp locking?
*/
int
rip_ctloutput(struct socket *so, struct sockopt *sopt)
Modified: trunk/sys/netinet/sctp.h
===================================================================
--- trunk/sys/netinet/sctp.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp.h 237898 2012-07-01 08:04:10Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp.h 294158 2016-01-16 16:46:00Z tuexen $");
#ifndef _NETINET_SCTP_H_
#define _NETINET_SCTP_H_
@@ -44,13 +44,13 @@
#define SCTP_PACKED __attribute__((packed))
/*
- * SCTP protocol - RFC2960.
+ * SCTP protocol - RFC4960.
*/
struct sctphdr {
uint16_t src_port; /* source port */
uint16_t dest_port; /* destination port */
uint32_t v_tag; /* verification tag of packet */
- uint32_t checksum; /* Adler32 C-Sum */
+ uint32_t checksum; /* CRC32C checksum */
/* chunks follow... */
} SCTP_PACKED;
@@ -122,6 +122,14 @@
#define SCTP_DEFAULT_PRINFO 0x00000022
#define SCTP_PEER_ADDR_THLDS 0x00000023
#define SCTP_REMOTE_UDP_ENCAPS_PORT 0x00000024
+#define SCTP_ECN_SUPPORTED 0x00000025
+#define SCTP_PR_SUPPORTED 0x00000026
+#define SCTP_AUTH_SUPPORTED 0x00000027
+#define SCTP_ASCONF_SUPPORTED 0x00000028
+#define SCTP_RECONFIG_SUPPORTED 0x00000029
+#define SCTP_NRSACK_SUPPORTED 0x00000030
+#define SCTP_PKTDROP_SUPPORTED 0x00000031
+#define SCTP_MAX_CWND 0x00000032
/*
* read-only options
@@ -134,6 +142,8 @@
#define SCTP_GET_ASSOC_NUMBER 0x00000104 /* ro */
#define SCTP_GET_ASSOC_ID_LIST 0x00000105 /* ro */
#define SCTP_TIMEOUTS 0x00000106
+#define SCTP_PR_STREAM_STATUS 0x00000107
+#define SCTP_PR_ASSOC_STATUS 0x00000108
/*
* user socket options: BSD implementation specific
@@ -366,6 +376,12 @@
/*
* error cause parameters (user visible)
*/
+struct sctp_gen_error_cause {
+ uint16_t code;
+ uint16_t length;
+ uint8_t info[];
+} SCTP_PACKED;
+
struct sctp_error_cause {
uint16_t code;
uint16_t length;
@@ -373,36 +389,45 @@
} SCTP_PACKED;
struct sctp_error_invalid_stream {
- struct sctp_error_cause cause; /* code=SCTP_ERROR_INVALID_STREAM */
+ struct sctp_error_cause cause; /* code=SCTP_CAUSE_INVALID_STREAM */
uint16_t stream_id; /* stream id of the DATA in error */
uint16_t reserved;
} SCTP_PACKED;
struct sctp_error_missing_param {
- struct sctp_error_cause cause; /* code=SCTP_ERROR_MISSING_PARAM */
+ struct sctp_error_cause cause; /* code=SCTP_CAUSE_MISSING_PARAM */
uint32_t num_missing_params; /* number of missing parameters */
- /* uint16_t param_type's follow */
+ uint16_t type[];
} SCTP_PACKED;
struct sctp_error_stale_cookie {
- struct sctp_error_cause cause; /* code=SCTP_ERROR_STALE_COOKIE */
+ struct sctp_error_cause cause; /* code=SCTP_CAUSE_STALE_COOKIE */
uint32_t stale_time; /* time in usec of staleness */
} SCTP_PACKED;
struct sctp_error_out_of_resource {
- struct sctp_error_cause cause; /* code=SCTP_ERROR_OUT_OF_RESOURCES */
+ struct sctp_error_cause cause; /* code=SCTP_CAUSE_OUT_OF_RESOURCES */
} SCTP_PACKED;
struct sctp_error_unresolv_addr {
- struct sctp_error_cause cause; /* code=SCTP_ERROR_UNRESOLVABLE_ADDR */
-
+ struct sctp_error_cause cause; /* code=SCTP_CAUSE_UNRESOLVABLE_ADDR */
} SCTP_PACKED;
struct sctp_error_unrecognized_chunk {
- struct sctp_error_cause cause; /* code=SCTP_ERROR_UNRECOG_CHUNK */
+ struct sctp_error_cause cause; /* code=SCTP_CAUSE_UNRECOG_CHUNK */
struct sctp_chunkhdr ch;/* header from chunk in error */
} SCTP_PACKED;
+struct sctp_error_no_user_data {
+ struct sctp_error_cause cause; /* code=SCTP_CAUSE_NO_USER_DATA */
+ uint32_t tsn; /* TSN of the empty data chunk */
+} SCTP_PACKED;
+
+struct sctp_error_auth_invalid_hmac {
+ struct sctp_error_cause cause; /* code=SCTP_CAUSE_UNSUPPORTED_HMACID */
+ uint16_t hmac_id;
+} SCTP_PACKED;
+
/*
* Main SCTP chunk types we place these here so natd and f/w's in user land
* can find them.
@@ -426,7 +451,7 @@
/* RFC4895 */
#define SCTP_AUTHENTICATION 0x0f
/* EY nr_sack chunk id*/
-#define SCTP_NR_SELECTIVE_ACK 0x10
+#define SCTP_NR_SELECTIVE_ACK 0x10
/************0x40 series ***********/
/************0x80 series ***********/
/* RFC5061 */
@@ -501,47 +526,47 @@
#define SCTP_PCB_FLAGS_SOCKET_GONE 0x10000000
#define SCTP_PCB_FLAGS_SOCKET_ALLGONE 0x20000000
#define SCTP_PCB_FLAGS_SOCKET_CANT_READ 0x40000000
+
/* flags to copy to new PCB */
#define SCTP_PCB_COPY_FLAGS (SCTP_PCB_FLAGS_BOUNDALL|\
SCTP_PCB_FLAGS_WAKEINPUT|\
SCTP_PCB_FLAGS_BOUND_V6)
-
/*
* PCB Features (in sctp_features bitmask)
*/
-#define SCTP_PCB_FLAGS_DO_NOT_PMTUD 0x00000001
-#define SCTP_PCB_FLAGS_EXT_RCVINFO 0x00000002 /* deprecated */
-#define SCTP_PCB_FLAGS_DONOT_HEARTBEAT 0x00000004
-#define SCTP_PCB_FLAGS_FRAG_INTERLEAVE 0x00000008
-#define SCTP_PCB_FLAGS_INTERLEAVE_STRMS 0x00000010
-#define SCTP_PCB_FLAGS_DO_ASCONF 0x00000020
-#define SCTP_PCB_FLAGS_AUTO_ASCONF 0x00000040
-#define SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE 0x00000080
+#define SCTP_PCB_FLAGS_DO_NOT_PMTUD 0x0000000000000001
+#define SCTP_PCB_FLAGS_EXT_RCVINFO 0x0000000000000002 /* deprecated */
+#define SCTP_PCB_FLAGS_DONOT_HEARTBEAT 0x0000000000000004
+#define SCTP_PCB_FLAGS_FRAG_INTERLEAVE 0x0000000000000008
+#define SCTP_PCB_FLAGS_INTERLEAVE_STRMS 0x0000000000000010
+#define SCTP_PCB_FLAGS_DO_ASCONF 0x0000000000000020
+#define SCTP_PCB_FLAGS_AUTO_ASCONF 0x0000000000000040
+#define SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE 0x0000000000000080
/* socket options */
-#define SCTP_PCB_FLAGS_NODELAY 0x00000100
-#define SCTP_PCB_FLAGS_AUTOCLOSE 0x00000200
-#define SCTP_PCB_FLAGS_RECVDATAIOEVNT 0x00000400 /* deprecated */
-#define SCTP_PCB_FLAGS_RECVASSOCEVNT 0x00000800
-#define SCTP_PCB_FLAGS_RECVPADDREVNT 0x00001000
-#define SCTP_PCB_FLAGS_RECVPEERERR 0x00002000
-#define SCTP_PCB_FLAGS_RECVSENDFAILEVNT 0x00004000 /* deprecated */
-#define SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT 0x00008000
-#define SCTP_PCB_FLAGS_ADAPTATIONEVNT 0x00010000
-#define SCTP_PCB_FLAGS_PDAPIEVNT 0x00020000
-#define SCTP_PCB_FLAGS_AUTHEVNT 0x00040000
-#define SCTP_PCB_FLAGS_STREAM_RESETEVNT 0x00080000
-#define SCTP_PCB_FLAGS_NO_FRAGMENT 0x00100000
-#define SCTP_PCB_FLAGS_EXPLICIT_EOR 0x00400000
-#define SCTP_PCB_FLAGS_NEEDS_MAPPED_V4 0x00800000
-#define SCTP_PCB_FLAGS_MULTIPLE_ASCONFS 0x01000000
-#define SCTP_PCB_FLAGS_PORTREUSE 0x02000000
-#define SCTP_PCB_FLAGS_DRYEVNT 0x04000000
-#define SCTP_PCB_FLAGS_RECVRCVINFO 0x08000000
-#define SCTP_PCB_FLAGS_RECVNXTINFO 0x10000000
-#define SCTP_PCB_FLAGS_ASSOC_RESETEVNT 0x20000000
-#define SCTP_PCB_FLAGS_STREAM_CHANGEEVNT 0x40000000
-#define SCTP_PCB_FLAGS_RECVNSENDFAILEVNT 0x80000000
+#define SCTP_PCB_FLAGS_NODELAY 0x0000000000000100
+#define SCTP_PCB_FLAGS_AUTOCLOSE 0x0000000000000200
+#define SCTP_PCB_FLAGS_RECVDATAIOEVNT 0x0000000000000400 /* deprecated */
+#define SCTP_PCB_FLAGS_RECVASSOCEVNT 0x0000000000000800
+#define SCTP_PCB_FLAGS_RECVPADDREVNT 0x0000000000001000
+#define SCTP_PCB_FLAGS_RECVPEERERR 0x0000000000002000
+#define SCTP_PCB_FLAGS_RECVSENDFAILEVNT 0x0000000000004000 /* deprecated */
+#define SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT 0x0000000000008000
+#define SCTP_PCB_FLAGS_ADAPTATIONEVNT 0x0000000000010000
+#define SCTP_PCB_FLAGS_PDAPIEVNT 0x0000000000020000
+#define SCTP_PCB_FLAGS_AUTHEVNT 0x0000000000040000
+#define SCTP_PCB_FLAGS_STREAM_RESETEVNT 0x0000000000080000
+#define SCTP_PCB_FLAGS_NO_FRAGMENT 0x0000000000100000
+#define SCTP_PCB_FLAGS_EXPLICIT_EOR 0x0000000000400000
+#define SCTP_PCB_FLAGS_NEEDS_MAPPED_V4 0x0000000000800000
+#define SCTP_PCB_FLAGS_MULTIPLE_ASCONFS 0x0000000001000000
+#define SCTP_PCB_FLAGS_PORTREUSE 0x0000000002000000
+#define SCTP_PCB_FLAGS_DRYEVNT 0x0000000004000000
+#define SCTP_PCB_FLAGS_RECVRCVINFO 0x0000000008000000
+#define SCTP_PCB_FLAGS_RECVNXTINFO 0x0000000010000000
+#define SCTP_PCB_FLAGS_ASSOC_RESETEVNT 0x0000000020000000
+#define SCTP_PCB_FLAGS_STREAM_CHANGEEVNT 0x0000000040000000
+#define SCTP_PCB_FLAGS_RECVNSENDFAILEVNT 0x0000000080000000
/*-
* mobility_features parameters (by micchie).Note
Modified: trunk/sys/netinet/sctp_asconf.c
===================================================================
--- trunk/sys/netinet/sctp_asconf.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_asconf.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_asconf.c 238613 2012-07-19 09:32:59Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_asconf.c 296052 2016-02-25 18:46:06Z tuexen $");
#include <netinet/sctp_os.h>
#include <netinet/sctp_var.h>
@@ -51,59 +51,8 @@
*/
-static void
-sctp_asconf_get_source_ip(struct mbuf *m, struct sockaddr *sa)
-{
- struct ip *iph;
-
-#ifdef INET
- struct sockaddr_in *sin;
-
-#endif
-#ifdef INET6
- struct sockaddr_in6 *sin6;
-
-#endif
-
- iph = mtod(m, struct ip *);
- switch (iph->ip_v) {
-#ifdef INET
- case IPVERSION:
- {
- /* IPv4 source */
- sin = (struct sockaddr_in *)sa;
- bzero(sin, sizeof(*sin));
- sin->sin_family = AF_INET;
- sin->sin_len = sizeof(struct sockaddr_in);
- sin->sin_port = 0;
- sin->sin_addr.s_addr = iph->ip_src.s_addr;
- break;
- }
-#endif
-#ifdef INET6
- case (IPV6_VERSION >> 4):
- {
- /* IPv6 source */
- struct ip6_hdr *ip6;
-
- sin6 = (struct sockaddr_in6 *)sa;
- bzero(sin6, sizeof(*sin6));
- sin6->sin6_family = AF_INET6;
- sin6->sin6_len = sizeof(struct sockaddr_in6);
- sin6->sin6_port = 0;
- ip6 = mtod(m, struct ip6_hdr *);
- sin6->sin6_addr = ip6->ip6_src;
- break;
- }
-#endif /* INET6 */
- default:
- break;
- }
- return;
-}
-
/*
- * draft-ietf-tsvwg-addip-sctp
+ * RFC 5061
*
* An ASCONF parameter queue exists per asoc which holds the pending address
* operations. Lists are updated upon receipt of ASCONF-ACK.
@@ -132,7 +81,7 @@
struct sctp_asconf_paramhdr *aph;
m_reply = sctp_get_mbuf_for_msg(sizeof(struct sctp_asconf_paramhdr),
- 0, M_DONTWAIT, 1, MT_DATA);
+ 0, M_NOWAIT, 1, MT_DATA);
if (m_reply == NULL) {
SCTPDBG(SCTP_DEBUG_ASCONF1,
"asconf_success_response: couldn't get mbuf!\n");
@@ -160,7 +109,7 @@
m_reply = sctp_get_mbuf_for_msg((sizeof(struct sctp_asconf_paramhdr) +
tlv_length +
sizeof(struct sctp_error_cause)),
- 0, M_DONTWAIT, 1, MT_DATA);
+ 0, M_NOWAIT, 1, MT_DATA);
if (m_reply == NULL) {
SCTPDBG(SCTP_DEBUG_ASCONF1,
"asconf_error_response: couldn't get mbuf!\n");
@@ -195,14 +144,19 @@
}
static struct mbuf *
-sctp_process_asconf_add_ip(struct mbuf *m, struct sctp_asconf_paramhdr *aph,
+sctp_process_asconf_add_ip(struct sockaddr *src, struct sctp_asconf_paramhdr *aph,
struct sctp_tcb *stcb, int send_hb, int response_required)
{
struct sctp_nets *net;
struct mbuf *m_reply = NULL;
- struct sockaddr_storage sa_source, sa_store;
+ union sctp_sockstore store;
struct sctp_paramhdr *ph;
- uint16_t param_type, param_length, aparam_length;
+ uint16_t param_type, aparam_length;
+
+#if defined(INET) || defined(INET6)
+ uint16_t param_length;
+
+#endif
struct sockaddr *sa;
int zero_address = 0;
int bad_address = 0;
@@ -221,9 +175,10 @@
aparam_length = ntohs(aph->ph.param_length);
ph = (struct sctp_paramhdr *)(aph + 1);
param_type = ntohs(ph->param_type);
+#if defined(INET) || defined(INET6)
param_length = ntohs(ph->param_length);
-
- sa = (struct sockaddr *)&sa_store;
+#endif
+ sa = &store.sa;
switch (param_type) {
#ifdef INET
case SCTP_IPV4_ADDRESS:
@@ -232,7 +187,7 @@
return (NULL);
}
v4addr = (struct sctp_ipv4addr_param *)ph;
- sin = (struct sockaddr_in *)&sa_store;
+ sin = &store.sin;
bzero(sin, sizeof(*sin));
sin->sin_family = AF_INET;
sin->sin_len = sizeof(struct sockaddr_in);
@@ -255,7 +210,7 @@
return (NULL);
}
v6addr = (struct sctp_ipv6addr_param *)ph;
- sin6 = (struct sockaddr_in6 *)&sa_store;
+ sin6 = &store.sin6;
bzero(sin6, sizeof(*sin6));
sin6->sin6_family = AF_INET6;
sin6->sin6_len = sizeof(struct sockaddr_in6);
@@ -280,11 +235,10 @@
/* if 0.0.0.0/::0, add the source address instead */
if (zero_address && SCTP_BASE_SYSCTL(sctp_nat_friendly)) {
- sa = (struct sockaddr *)&sa_source;
- sctp_asconf_get_source_ip(m, sa);
+ sa = src;
SCTPDBG(SCTP_DEBUG_ASCONF1,
"process_asconf_add_ip: using source addr ");
- SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, src);
}
/* add the address */
if (bad_address) {
@@ -344,13 +298,19 @@
}
static struct mbuf *
-sctp_process_asconf_delete_ip(struct mbuf *m, struct sctp_asconf_paramhdr *aph,
+sctp_process_asconf_delete_ip(struct sockaddr *src,
+ struct sctp_asconf_paramhdr *aph,
struct sctp_tcb *stcb, int response_required)
{
struct mbuf *m_reply = NULL;
- struct sockaddr_storage sa_source, sa_store;
+ union sctp_sockstore store;
struct sctp_paramhdr *ph;
- uint16_t param_type, param_length, aparam_length;
+ uint16_t param_type, aparam_length;
+
+#if defined(INET) || defined(INET6)
+ uint16_t param_length;
+
+#endif
struct sockaddr *sa;
int zero_address = 0;
int result;
@@ -366,15 +326,13 @@
#endif
- /* get the source IP address for src and 0.0.0.0/::0 delete checks */
- sctp_asconf_get_source_ip(m, (struct sockaddr *)&sa_source);
-
aparam_length = ntohs(aph->ph.param_length);
ph = (struct sctp_paramhdr *)(aph + 1);
param_type = ntohs(ph->param_type);
+#if defined(INET) || defined(INET6)
param_length = ntohs(ph->param_length);
-
- sa = (struct sockaddr *)&sa_store;
+#endif
+ sa = &store.sa;
switch (param_type) {
#ifdef INET
case SCTP_IPV4_ADDRESS:
@@ -383,7 +341,7 @@
return (NULL);
}
v4addr = (struct sctp_ipv4addr_param *)ph;
- sin = (struct sockaddr_in *)&sa_store;
+ sin = &store.sin;
bzero(sin, sizeof(*sin));
sin->sin_family = AF_INET;
sin->sin_len = sizeof(struct sockaddr_in);
@@ -403,7 +361,7 @@
return (NULL);
}
v6addr = (struct sctp_ipv6addr_param *)ph;
- sin6 = (struct sockaddr_in6 *)&sa_store;
+ sin6 = &store.sin6;
bzero(sin6, sizeof(*sin6));
sin6->sin6_family = AF_INET6;
sin6->sin6_len = sizeof(struct sockaddr_in6);
@@ -425,7 +383,7 @@
}
/* make sure the source address is not being deleted */
- if (sctp_cmpaddr(sa, (struct sockaddr *)&sa_source)) {
+ if (sctp_cmpaddr(sa, src)) {
/* trying to delete the source address! */
SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_delete_ip: tried to delete source addr\n");
m_reply = sctp_asconf_error_response(aph->correlation_id,
@@ -435,8 +393,7 @@
}
/* if deleting 0.0.0.0/::0, delete all addresses except src addr */
if (zero_address && SCTP_BASE_SYSCTL(sctp_nat_friendly)) {
- result = sctp_asconf_del_remote_addrs_except(stcb,
- (struct sockaddr *)&sa_source);
+ result = sctp_asconf_del_remote_addrs_except(stcb, src);
if (result) {
/* src address did not exist? */
@@ -476,14 +433,19 @@
}
static struct mbuf *
-sctp_process_asconf_set_primary(struct mbuf *m,
+sctp_process_asconf_set_primary(struct sockaddr *src,
struct sctp_asconf_paramhdr *aph,
struct sctp_tcb *stcb, int response_required)
{
struct mbuf *m_reply = NULL;
- struct sockaddr_storage sa_source, sa_store;
+ union sctp_sockstore store;
struct sctp_paramhdr *ph;
- uint16_t param_type, param_length, aparam_length;
+ uint16_t param_type, aparam_length;
+
+#if defined(INET) || defined(INET6)
+ uint16_t param_length;
+
+#endif
struct sockaddr *sa;
int zero_address = 0;
@@ -501,9 +463,10 @@
aparam_length = ntohs(aph->ph.param_length);
ph = (struct sctp_paramhdr *)(aph + 1);
param_type = ntohs(ph->param_type);
+#if defined(INET) || defined(INET6)
param_length = ntohs(ph->param_length);
-
- sa = (struct sockaddr *)&sa_store;
+#endif
+ sa = &store.sa;
switch (param_type) {
#ifdef INET
case SCTP_IPV4_ADDRESS:
@@ -512,7 +475,7 @@
return (NULL);
}
v4addr = (struct sctp_ipv4addr_param *)ph;
- sin = (struct sockaddr_in *)&sa_store;
+ sin = &store.sin;
bzero(sin, sizeof(*sin));
sin->sin_family = AF_INET;
sin->sin_len = sizeof(struct sockaddr_in);
@@ -530,7 +493,7 @@
return (NULL);
}
v6addr = (struct sctp_ipv6addr_param *)ph;
- sin6 = (struct sockaddr_in6 *)&sa_store;
+ sin6 = &store.sin6;
bzero(sin6, sizeof(*sin6));
sin6->sin6_family = AF_INET6;
sin6->sin6_len = sizeof(struct sockaddr_in6);
@@ -551,11 +514,10 @@
/* if 0.0.0.0/::0, use the source address instead */
if (zero_address && SCTP_BASE_SYSCTL(sctp_nat_friendly)) {
- sa = (struct sockaddr *)&sa_source;
- sctp_asconf_get_source_ip(m, sa);
+ sa = src;
SCTPDBG(SCTP_DEBUG_ASCONF1,
"process_asconf_set_primary: using source addr ");
- SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, src);
}
/* set the primary address */
if (sctp_set_primary_addr(stcb, sa, NULL) == 0) {
@@ -594,7 +556,9 @@
(stcb->asoc.primary_destination->dest_state &
SCTP_ADDR_UNCONFIRMED) == 0) {
- sctp_timer_stop(SCTP_TIMER_TYPE_PRIM_DELETED, stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_TIMER + SCTP_LOC_7);
+ sctp_timer_stop(SCTP_TIMER_TYPE_PRIM_DELETED,
+ stcb->sctp_ep, stcb, NULL,
+ SCTP_FROM_SCTP_ASCONF + SCTP_LOC_1);
if (sctp_is_mobility_feature_on(stcb->sctp_ep,
SCTP_MOBILITY_FASTHANDOFF)) {
sctp_assoc_immediate_retrans(stcb,
@@ -627,6 +591,7 @@
*/
void
sctp_handle_asconf(struct mbuf *m, unsigned int offset,
+ struct sockaddr *src,
struct sctp_asconf_chunk *cp, struct sctp_tcb *stcb,
int first)
{
@@ -634,7 +599,7 @@
uint32_t serial_num;
struct mbuf *n, *m_ack, *m_result, *m_tail;
struct sctp_asconf_ack_chunk *ack_cp;
- struct sctp_asconf_paramhdr *aph, *ack_aph;
+ struct sctp_asconf_paramhdr *aph;
struct sctp_ipv6addr_param *p_addr;
unsigned int asconf_limit, cnt;
int error = 0; /* did an error occur? */
@@ -689,7 +654,7 @@
}
}
m_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_asconf_ack_chunk), 0,
- M_DONTWAIT, 1, MT_DATA);
+ M_NOWAIT, 1, MT_DATA);
if (m_ack == NULL) {
SCTPDBG(SCTP_DEBUG_ASCONF1,
"handle_asconf: couldn't get mbuf!\n");
@@ -717,13 +682,6 @@
}
/* param_length is already validated in process_control... */
offset += ntohs(p_addr->ph.param_length); /* skip lookup addr */
-
- /* get pointer to first asconf param in ASCONF-ACK */
- ack_aph = (struct sctp_asconf_paramhdr *)(mtod(m_ack, caddr_t)+sizeof(struct sctp_asconf_ack_chunk));
- if (ack_aph == NULL) {
- SCTPDBG(SCTP_DEBUG_ASCONF1, "Gak in asconf2\n");
- return;
- }
/* get pointer to first asconf param in ASCONF */
aph = (struct sctp_asconf_paramhdr *)sctp_m_getptr(m, offset, sizeof(struct sctp_asconf_paramhdr), (uint8_t *) & aparam_buf);
if (aph == NULL) {
@@ -762,14 +720,12 @@
}
switch (param_type) {
case SCTP_ADD_IP_ADDRESS:
- asoc->peer_supports_asconf = 1;
- m_result = sctp_process_asconf_add_ip(m, aph, stcb,
+ m_result = sctp_process_asconf_add_ip(src, aph, stcb,
(cnt < SCTP_BASE_SYSCTL(sctp_hb_maxburst)), error);
cnt++;
break;
case SCTP_DEL_IP_ADDRESS:
- asoc->peer_supports_asconf = 1;
- m_result = sctp_process_asconf_delete_ip(m, aph, stcb,
+ m_result = sctp_process_asconf_delete_ip(src, aph, stcb,
error);
break;
case SCTP_ERROR_CAUSE_IND:
@@ -776,8 +732,7 @@
/* not valid in an ASCONF chunk */
break;
case SCTP_SET_PRIM_ADDR:
- asoc->peer_supports_asconf = 1;
- m_result = sctp_process_asconf_set_primary(m, aph,
+ m_result = sctp_process_asconf_set_primary(src, aph,
stcb, error);
break;
case SCTP_NAT_VTAGS:
@@ -857,11 +812,7 @@
* this could happen if the source address was just newly
* added
*/
- struct sockaddr_storage addr;
- struct sockaddr *src = (struct sockaddr *)&addr;
-
SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: looking up net for IP source address\n");
- sctp_asconf_get_source_ip(m, src);
SCTPDBG(SCTP_DEBUG_ASCONF1, "Looking for IP source: ");
SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, src);
/* look up the from address */
@@ -920,10 +871,12 @@
static uint32_t
sctp_addr_match(struct sctp_paramhdr *ph, struct sockaddr *sa)
{
+#if defined(INET) || defined(INET6)
uint16_t param_type, param_length;
param_type = ntohs(ph->param_type);
param_length = ntohs(ph->param_length);
+#endif
switch (sa->sa_family) {
#ifdef INET6
case AF_INET6:
@@ -934,7 +887,7 @@
v6addr = (struct sctp_ipv6addr_param *)ph;
if ((param_type == SCTP_IPV6_ADDRESS) &&
- param_length == sizeof(struct sctp_ipv6addr_param) &&
+ (param_length == sizeof(struct sctp_ipv6addr_param)) &&
(memcmp(&v6addr->addr, &sin6->sin6_addr,
sizeof(struct in6_addr)) == 0)) {
return (1);
@@ -950,7 +903,7 @@
v4addr = (struct sctp_ipv4addr_param *)ph;
if ((param_type == SCTP_IPV4_ADDRESS) &&
- param_length == sizeof(struct sctp_ipv4addr_param) &&
+ (param_length == sizeof(struct sctp_ipv4addr_param)) &&
(memcmp(&v4addr->addr, &sin->sin_addr,
sizeof(struct in_addr)) == 0)) {
return (1);
@@ -970,8 +923,6 @@
void
sctp_asconf_cleanup(struct sctp_tcb *stcb, struct sctp_nets *net)
{
- /* mark peer as ASCONF incapable */
- stcb->asoc.peer_supports_asconf = 0;
/*
* clear out any existing asconfs going out
*/
@@ -1043,7 +994,7 @@
SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, &stcb->asoc.primary_destination->ro._l_addr.sa);
sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb,
stcb->asoc.deleted_primary,
- SCTP_FROM_SCTP_TIMER + SCTP_LOC_8);
+ SCTP_FROM_SCTP_ASCONF + SCTP_LOC_3);
stcb->asoc.num_send_timers_up--;
if (stcb->asoc.num_send_timers_up < 0) {
stcb->asoc.num_send_timers_up = 0;
@@ -1082,7 +1033,7 @@
SCTPDBG(SCTP_DEBUG_ASCONF1, "net_immediate_retrans: RTO is %d\n", net->RTO);
sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, net,
- SCTP_FROM_SCTP_TIMER + SCTP_LOC_5);
+ SCTP_FROM_SCTP_ASCONF + SCTP_LOC_4);
stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net);
net->error_count = 0;
TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
@@ -1159,7 +1110,8 @@
* not be changed.
*/
SCTP_RTALLOC((sctp_route_t *) & net->ro,
- stcb->sctp_ep->def_vrf_id);
+ stcb->sctp_ep->def_vrf_id,
+ stcb->sctp_ep->fibnum);
if (net->ro.ro_rt == NULL)
continue;
@@ -1251,7 +1203,6 @@
uint16_t type)
{
struct sctp_asconf_addr *aa, *aa_next;
- struct sockaddr *sa;
/* make sure the request isn't already in the queue */
TAILQ_FOREACH_SAFE(aa, &stcb->asoc.asconf_queue, next, aa_next) {
@@ -1314,8 +1265,7 @@
{
struct sockaddr_in6 *sin6;
- sin6 = (struct sockaddr_in6 *)&ifa->address.sa;
- sa = (struct sockaddr *)sin6;
+ sin6 = &ifa->address.sin6;
aa->ap.addrp.ph.param_type = SCTP_IPV6_ADDRESS;
aa->ap.addrp.ph.param_length = (sizeof(struct sctp_ipv6addr_param));
aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_paramhdr) +
@@ -1330,8 +1280,7 @@
{
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)&ifa->address.sa;
- sa = (struct sockaddr *)sin;
+ sin = &ifa->address.sin;
aa->ap.addrp.ph.param_type = SCTP_IPV4_ADDRESS;
aa->ap.addrp.ph.param_length = (sizeof(struct sctp_ipv4addr_param));
aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_paramhdr) +
@@ -1354,13 +1303,13 @@
if (SCTP_BASE_SYSCTL(sctp_debug_on) & SCTP_DEBUG_ASCONF2) {
if (type == SCTP_ADD_IP_ADDRESS) {
SCTP_PRINTF("asconf_queue_mgmt: inserted asconf ADD_IP_ADDRESS: ");
- SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, sa);
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, &ifa->address.sa);
} else if (type == SCTP_DEL_IP_ADDRESS) {
SCTP_PRINTF("asconf_queue_mgmt: appended asconf DEL_IP_ADDRESS: ");
- SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, sa);
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, &ifa->address.sa);
} else {
SCTP_PRINTF("asconf_queue_mgmt: appended asconf SET_PRIM_ADDR: ");
- SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, sa);
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, &ifa->address.sa);
}
}
#endif
@@ -1381,9 +1330,10 @@
{
uint32_t status;
int pending_delete_queued = 0;
+ int last;
/* see if peer supports ASCONF */
- if (stcb->asoc.peer_supports_asconf == 0) {
+ if (stcb->asoc.asconf_supported == 0) {
return (-1);
}
/*
@@ -1390,15 +1340,21 @@
* if this is deleting the last address from the assoc, mark it as
* pending.
*/
- if ((type == SCTP_DEL_IP_ADDRESS) && !stcb->asoc.asconf_del_pending &&
- (sctp_local_addr_count(stcb) < 2)) {
- /* set the pending delete info only */
- stcb->asoc.asconf_del_pending = 1;
- stcb->asoc.asconf_addr_del_pending = ifa;
- atomic_add_int(&ifa->refcount, 1);
- SCTPDBG(SCTP_DEBUG_ASCONF2,
- "asconf_queue_add: mark delete last address pending\n");
- return (-1);
+ if ((type == SCTP_DEL_IP_ADDRESS) && !stcb->asoc.asconf_del_pending) {
+ if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+ last = (sctp_local_addr_count(stcb) == 0);
+ } else {
+ last = (sctp_local_addr_count(stcb) == 1);
+ }
+ if (last) {
+ /* set the pending delete info only */
+ stcb->asoc.asconf_del_pending = 1;
+ stcb->asoc.asconf_addr_del_pending = ifa;
+ atomic_add_int(&ifa->refcount, 1);
+ SCTPDBG(SCTP_DEBUG_ASCONF2,
+ "asconf_queue_add: mark delete last address pending\n");
+ return (-1);
+ }
}
/* queue an asconf parameter */
status = sctp_asconf_queue_mgmt(stcb, ifa, type);
@@ -1467,13 +1423,12 @@
{
struct sctp_ifa *ifa;
struct sctp_asconf_addr *aa, *aa_next;
- uint32_t vrf_id;
if (stcb == NULL) {
return (-1);
}
/* see if peer supports ASCONF */
- if (stcb->asoc.peer_supports_asconf == 0) {
+ if (stcb->asoc.asconf_supported == 0) {
return (-1);
}
/* make sure the request isn't already in the queue */
@@ -1499,12 +1454,7 @@
} /* for each aa */
/* find any existing ifa-- NOTE ifa CAN be allowed to be NULL */
- if (stcb) {
- vrf_id = stcb->asoc.vrf_id;
- } else {
- vrf_id = SCTP_DEFAULT_VRFID;
- }
- ifa = sctp_find_ifa_by_addr(sa, vrf_id, SCTP_ADDR_NOT_LOCKED);
+ ifa = sctp_find_ifa_by_addr(sa, stcb->asoc.vrf_id, SCTP_ADDR_NOT_LOCKED);
/* adding new request to the queue */
SCTP_MALLOC(aa, struct sctp_asconf_addr *, sizeof(*aa),
@@ -1593,7 +1543,7 @@
* notifications based on the error response
*/
static void
-sctp_asconf_process_error(struct sctp_tcb *stcb,
+sctp_asconf_process_error(struct sctp_tcb *stcb SCTP_UNUSED,
struct sctp_asconf_paramhdr *aph)
{
struct sctp_error_cause *eh;
@@ -1631,10 +1581,7 @@
switch (param_type) {
case SCTP_ADD_IP_ADDRESS:
case SCTP_DEL_IP_ADDRESS:
- stcb->asoc.peer_supports_asconf = 0;
- break;
case SCTP_SET_PRIM_ADDR:
- stcb->asoc.peer_supports_asconf = 0;
break;
default:
break;
@@ -1670,8 +1617,6 @@
SCTPDBG(SCTP_DEBUG_ASCONF1,
"process_param_ack: set primary IP address\n");
/* nothing to do... peer may start using this addr */
- if (flag == 0)
- stcb->asoc.peer_supports_asconf = 0;
break;
default:
/* should NEVER happen */
@@ -1689,11 +1634,11 @@
* cleanup from a bad asconf ack parameter
*/
static void
-sctp_asconf_ack_clear(struct sctp_tcb *stcb)
+sctp_asconf_ack_clear(struct sctp_tcb *stcb SCTP_UNUSED)
{
/* assume peer doesn't really know how to do asconfs */
- stcb->asoc.peer_supports_asconf = 0;
/* XXX we could free the pending queue here */
+
}
void
@@ -1736,8 +1681,14 @@
* abort the asoc, since someone probably just hijacked us...
*/
if (serial_num == (asoc->asconf_seq_out + 1)) {
+ struct mbuf *op_err;
+ char msg[SCTP_DIAG_INFO_LEN];
+
SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf_ack: got unexpected next serial number! Aborting asoc!\n");
- sctp_abort_an_association(stcb->sctp_ep, stcb, NULL, SCTP_SO_NOT_LOCKED);
+ snprintf(msg, sizeof(msg), "Never sent serial number %8.8x",
+ serial_num);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
*abort_no_unlock = 1;
return;
}
@@ -1750,7 +1701,7 @@
if (serial_num == asoc->asconf_seq_out - 1) {
/* stop our timer */
sctp_timer_stop(SCTP_TIMER_TYPE_ASCONF, stcb->sctp_ep, stcb, net,
- SCTP_FROM_SCTP_ASCONF + SCTP_LOC_3);
+ SCTP_FROM_SCTP_ASCONF + SCTP_LOC_5);
}
/* process the ASCONF-ACK contents */
ack_length = ntohs(cp->ch.chunk_length) -
@@ -1922,7 +1873,6 @@
{
int status;
-
if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0 ||
sctp_is_feature_off(inp, SCTP_PCB_FLAGS_DO_ASCONF)) {
/* subset bound, no ASCONF allowed case, so ignore */
@@ -1933,14 +1883,22 @@
* this is boundall or subset bound w/ASCONF allowed
*/
- /* first, make sure it's a good address family */
+ /* first, make sure that the address is IPv4 or IPv6 and not jailed */
switch (ifa->address.sa.sa_family) {
#ifdef INET6
case AF_INET6:
+ if (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+ &ifa->address.sin6.sin6_addr) != 0) {
+ return;
+ }
break;
#endif
#ifdef INET
case AF_INET:
+ if (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+ &ifa->address.sin.sin_addr) != 0) {
+ return;
+ }
break;
#endif
default:
@@ -1971,13 +1929,13 @@
{
struct sockaddr_in6 *sin6;
- sin6 = (struct sockaddr_in6 *)&ifa->address.sin6;
+ sin6 = &ifa->address.sin6;
if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
/* we skip unspecifed addresses */
return;
}
if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
- if (stcb->asoc.local_scope == 0) {
+ if (stcb->asoc.scope.local_scope == 0) {
return;
}
/* is it the right link local scope? */
@@ -1985,7 +1943,7 @@
return;
}
}
- if (stcb->asoc.site_scope == 0 &&
+ if (stcb->asoc.scope.site_scope == 0 &&
IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) {
return;
}
@@ -2004,12 +1962,12 @@
SCTP_IPV6_V6ONLY(inp6))
return;
- sin = (struct sockaddr_in *)&ifa->address.sa;
+ sin = &ifa->address.sin;
if (sin->sin_addr.s_addr == 0) {
/* we skip unspecifed addresses */
return;
}
- if (stcb->asoc.ipv4_local_scope == 0 &&
+ if (stcb->asoc.scope.ipv4_local_scope == 0 &&
IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
return;
}
@@ -2024,7 +1982,7 @@
/* queue an asconf for this address add/delete */
if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF)) {
/* does the peer do asconf? */
- if (stcb->asoc.peer_supports_asconf) {
+ if (stcb->asoc.asconf_supported) {
/* queue an asconf for this addr */
status = sctp_asconf_queue_add(stcb, ifa, type);
@@ -2034,7 +1992,8 @@
* sent when the state goes open.
*/
if (status == 0 &&
- SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+ ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED))) {
#ifdef SCTP_TIMER_BASED_ASCONF
sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp,
stcb, stcb->asoc.primary_destination);
@@ -2161,13 +2120,17 @@
else
continue;
}
- sin6 = (struct sockaddr_in6 *)&ifa->address.sin6;
+ sin6 = &ifa->address.sin6;
if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
/* we skip unspecifed addresses */
continue;
}
+ if (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+ &sin6->sin6_addr) != 0) {
+ continue;
+ }
if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
- if (stcb->asoc.local_scope == 0) {
+ if (stcb->asoc.scope.local_scope == 0) {
continue;
}
/* is it the right link local scope? */
@@ -2191,12 +2154,16 @@
SCTP_IPV6_V6ONLY(inp6))
continue;
- sin = (struct sockaddr_in *)&ifa->address.sa;
+ sin = &ifa->address.sin;
if (sin->sin_addr.s_addr == 0) {
/* we skip unspecifed addresses */
continue;
}
- if (stcb->asoc.ipv4_local_scope == 0 &&
+ if (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+ &sin->sin_addr) != 0) {
+ continue;
+ }
+ if (stcb->asoc.scope.ipv4_local_scope == 0 &&
IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
continue;
}
@@ -2259,13 +2226,7 @@
}
} else {
/* Need to check scopes for this guy */
- if (sctp_is_address_in_scope(ifa,
- stcb->asoc.ipv4_addr_legal,
- stcb->asoc.ipv6_addr_legal,
- stcb->asoc.loopback_scope,
- stcb->asoc.ipv4_local_scope,
- stcb->asoc.local_scope,
- stcb->asoc.site_scope, 0) == 0) {
+ if (sctp_is_address_in_scope(ifa, &stcb->asoc.scope, 0) == 0) {
continue;
}
}
@@ -2272,7 +2233,7 @@
}
/* queue an asconf for this address add/delete */
if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF) &&
- stcb->asoc.peer_supports_asconf) {
+ stcb->asoc.asconf_supported == 1) {
/* queue an asconf for this addr */
status = sctp_asconf_queue_add(stcb, ifa, type);
/*
@@ -2280,7 +2241,8 @@
* count of queued params. If in the non-open
* state, these get sent when the assoc goes open.
*/
- if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+ if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
if (status >= 0) {
num_queued++;
}
@@ -2338,9 +2300,10 @@
/* set primary queuing succeeded */
SCTPDBG(SCTP_DEBUG_ASCONF1,
"set_primary_ip_address_sa: queued on tcb=%p, ",
- stcb);
+ (void *)stcb);
SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
- if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+ if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
#ifdef SCTP_TIMER_BASED_ASCONF
sctp_timer_start(SCTP_TIMER_TYPE_ASCONF,
stcb->sctp_ep, stcb,
@@ -2351,7 +2314,7 @@
}
} else {
SCTPDBG(SCTP_DEBUG_ASCONF1, "set_primary_ip_address_sa: failed to add to queue on tcb=%p, ",
- stcb);
+ (void *)stcb);
SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
return (-1);
}
@@ -2374,9 +2337,10 @@
SCTP_SET_PRIM_ADDR)) {
/* set primary queuing succeeded */
SCTPDBG(SCTP_DEBUG_ASCONF1, "set_primary_ip_address: queued on stcb=%p, ",
- stcb);
+ (void *)stcb);
SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, &ifa->address.sa);
- if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+ if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
#ifdef SCTP_TIMER_BASED_ASCONF
sctp_timer_start(SCTP_TIMER_TYPE_ASCONF,
stcb->sctp_ep, stcb,
@@ -2498,7 +2462,7 @@
return (NULL);
}
LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
- if (stcb->asoc.loopback_scope == 0 &&
+ if (stcb->asoc.scope.loopback_scope == 0 &&
SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
/* Skip if loopback_scope not set */
continue;
@@ -2507,15 +2471,19 @@
switch (sctp_ifa->address.sa.sa_family) {
#ifdef INET
case AF_INET:
- if (stcb->asoc.ipv4_addr_legal) {
+ if (stcb->asoc.scope.ipv4_addr_legal) {
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)&sctp_ifa->address.sa;
+ sin = &sctp_ifa->address.sin;
if (sin->sin_addr.s_addr == 0) {
/* skip unspecifed addresses */
continue;
}
- if (stcb->asoc.ipv4_local_scope == 0 &&
+ if (prison_check_ip4(stcb->sctp_ep->ip_inp.inp.inp_cred,
+ &sin->sin_addr) != 0) {
+ continue;
+ }
+ if (stcb->asoc.scope.ipv4_local_scope == 0 &&
IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))
continue;
@@ -2534,13 +2502,13 @@
#endif
#ifdef INET6
case AF_INET6:
- if (stcb->asoc.ipv6_addr_legal) {
+ if (stcb->asoc.scope.ipv6_addr_legal) {
struct sockaddr_in6 *sin6;
if (sctp_ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
continue;
}
- sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sa;
+ sin6 = &sctp_ifa->address.sin6;
if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
/*
* we skip unspecifed
@@ -2548,10 +2516,14 @@
*/
continue;
}
- if (stcb->asoc.local_scope == 0 &&
+ if (prison_check_ip6(stcb->sctp_ep->ip_inp.inp.inp_cred,
+ &sin6->sin6_addr) != 0) {
+ continue;
+ }
+ if (stcb->asoc.scope.local_scope == 0 &&
IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))
continue;
- if (stcb->asoc.site_scope == 0 &&
+ if (stcb->asoc.scope.site_scope == 0 &&
IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))
continue;
@@ -2630,7 +2602,7 @@
* it's simpler to fill in the asconf chunk header lookup address on
* the fly
*/
- m_asconf_chk = sctp_get_mbuf_for_msg(sizeof(struct sctp_asconf_chunk), 0, M_DONTWAIT, 1, MT_DATA);
+ m_asconf_chk = sctp_get_mbuf_for_msg(sizeof(struct sctp_asconf_chunk), 0, M_NOWAIT, 1, MT_DATA);
if (m_asconf_chk == NULL) {
/* no mbuf's */
SCTPDBG(SCTP_DEBUG_ASCONF1,
@@ -2637,7 +2609,7 @@
"compose_asconf: couldn't get chunk mbuf!\n");
return (NULL);
}
- m_asconf = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+ m_asconf = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
if (m_asconf == NULL) {
/* no mbuf's */
SCTPDBG(SCTP_DEBUG_ASCONF1,
@@ -2666,7 +2638,8 @@
/* get the parameter length */
p_length = SCTP_SIZE32(aa->ap.aph.ph.param_length);
/* will it fit in current chunk? */
- if (SCTP_BUF_LEN(m_asconf) + p_length > stcb->asoc.smallest_mtu) {
+ if ((SCTP_BUF_LEN(m_asconf) + p_length > stcb->asoc.smallest_mtu) ||
+ (SCTP_BUF_LEN(m_asconf) + p_length > MCLBYTES)) {
/* won't fit, so we're done with this chunk */
break;
}
@@ -2787,7 +2760,7 @@
/* chain it all together */
SCTP_BUF_NEXT(m_asconf_chk) = m_asconf;
*retlen = SCTP_BUF_LEN(m_asconf_chk) + SCTP_BUF_LEN(m_asconf);
- acp->ch.chunk_length = ntohs(*retlen);
+ acp->ch.chunk_length = htons(*retlen);
return (m_asconf_chk);
}
@@ -2807,19 +2780,16 @@
struct sctp_paramhdr tmp_param, *ph;
uint16_t plen, ptype;
struct sctp_ifa *sctp_ifa;
+ union sctp_sockstore store;
#ifdef INET6
struct sctp_ipv6addr_param addr6_store;
- struct sockaddr_in6 sin6;
#endif
#ifdef INET
struct sctp_ipv4addr_param addr4_store;
- struct sockaddr_in sin;
#endif
- struct sockaddr *sa;
- uint32_t vrf_id;
SCTPDBG(SCTP_DEBUG_ASCONF2, "processing init-ack addresses\n");
if (stcb == NULL) /* Un-needed check for SA */
@@ -2831,21 +2801,6 @@
if ((offset + sizeof(struct sctp_paramhdr)) > length) {
return;
}
- /* init the addresses */
-#ifdef INET6
- bzero(&sin6, sizeof(sin6));
- sin6.sin6_family = AF_INET6;
- sin6.sin6_len = sizeof(sin6);
- sin6.sin6_port = stcb->rport;
-#endif
-
-#ifdef INET
- bzero(&sin, sizeof(sin));
- sin.sin_family = AF_INET;
- sin.sin_len = sizeof(sin);
- sin.sin_port = stcb->rport;
-#endif
-
/* go through the addresses in the init-ack */
ph = (struct sctp_paramhdr *)
sctp_m_getptr(m, offset, sizeof(struct sctp_paramhdr),
@@ -2868,9 +2823,11 @@
a6p == NULL) {
return;
}
- memcpy(&sin6.sin6_addr, a6p->addr,
- sizeof(struct in6_addr));
- sa = (struct sockaddr *)&sin6;
+ memset(&store, 0, sizeof(union sctp_sockstore));
+ store.sin6.sin6_family = AF_INET6;
+ store.sin6.sin6_len = sizeof(struct sockaddr_in6);
+ store.sin6.sin6_port = stcb->rport;
+ memcpy(&store.sin6.sin6_addr, a6p->addr, sizeof(struct in6_addr));
break;
}
#endif
@@ -2887,8 +2844,11 @@
a4p == NULL) {
return;
}
- sin.sin_addr.s_addr = a4p->addr;
- sa = (struct sockaddr *)&sin;
+ memset(&store, 0, sizeof(union sctp_sockstore));
+ store.sin.sin_family = AF_INET;
+ store.sin.sin_len = sizeof(struct sockaddr_in);
+ store.sin.sin_port = stcb->rport;
+ store.sin.sin_addr.s_addr = a4p->addr;
break;
}
#endif
@@ -2897,12 +2857,7 @@
}
/* see if this address really (still) exists */
- if (stcb) {
- vrf_id = stcb->asoc.vrf_id;
- } else {
- vrf_id = SCTP_DEFAULT_VRFID;
- }
- sctp_ifa = sctp_find_ifa_by_addr(sa, vrf_id,
+ sctp_ifa = sctp_find_ifa_by_addr(&store.sa, stcb->asoc.vrf_id,
SCTP_ADDR_NOT_LOCKED);
if (sctp_ifa == NULL) {
/* address doesn't exist anymore */
@@ -2911,9 +2866,9 @@
/* are ASCONFs allowed ? */
if ((sctp_is_feature_on(stcb->sctp_ep,
SCTP_PCB_FLAGS_DO_ASCONF)) &&
- stcb->asoc.peer_supports_asconf) {
+ stcb->asoc.asconf_supported) {
/* queue an ASCONF DEL_IP_ADDRESS */
- status = sctp_asconf_queue_sa_delete(stcb, sa);
+ status = sctp_asconf_queue_sa_delete(stcb, &store.sa);
/*
* if queued ok, and in correct state, send
* out the ASCONF.
@@ -3160,7 +3115,11 @@
switch (sctp_ifa->address.sa.sa_family) {
#ifdef INET
case AF_INET:
- sin = (struct sockaddr_in *)&sctp_ifa->address.sin;
+ sin = &sctp_ifa->address.sin;
+ if (prison_check_ip4(stcb->sctp_ep->ip_inp.inp.inp_cred,
+ &sin->sin_addr) != 0) {
+ continue;
+ }
if ((ipv4_scope == 0) &&
(IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) {
/* private address not in scope */
@@ -3170,7 +3129,11 @@
#endif
#ifdef INET6
case AF_INET6:
- sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sin6;
+ sin6 = &sctp_ifa->address.sin6;
+ if (prison_check_ip6(stcb->sctp_ep->ip_inp.inp.inp_cred,
+ &sin6->sin6_addr) != 0) {
+ continue;
+ }
if ((local_scope == 0) &&
(IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))) {
continue;
@@ -3286,6 +3249,7 @@
} else {
struct sctp_asconf_iterator *asc;
struct sctp_laddr *wi;
+ int ret;
SCTP_MALLOC(asc, struct sctp_asconf_iterator *,
sizeof(struct sctp_asconf_iterator),
@@ -3307,7 +3271,7 @@
wi->action = type;
atomic_add_int(&ifa->refcount, 1);
LIST_INSERT_HEAD(&asc->list_of_work, wi, sctp_nxt_addr);
- (void)sctp_initiate_iterator(sctp_asconf_iterator_ep,
+ ret = sctp_initiate_iterator(sctp_asconf_iterator_ep,
sctp_asconf_iterator_stcb,
sctp_asconf_iterator_ep_end,
SCTP_PCB_ANY_FLAGS,
@@ -3315,6 +3279,12 @@
SCTP_ASOC_ANY_STATE,
(void *)asc, 0,
sctp_asconf_iterator_end, inp, 0);
+ if (ret) {
+ SCTP_PRINTF("Failed to initiate iterator for addr_mgmt_ep_sa\n");
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_ASCONF, EFAULT);
+ sctp_asconf_iterator_end(asc, 0);
+ return (EFAULT);
+ }
}
return (0);
} else {
@@ -3404,6 +3374,11 @@
TAILQ_INSERT_TAIL(&stcb->asoc.asconf_queue, aa, next);
break;
#endif
+ default:
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "sctp_asconf_send_nat_state_update: unknown address family\n");
+ SCTP_FREE(aa, SCTP_M_ASC_ADDR);
+ return;
}
SCTP_MALLOC(aa, struct sctp_asconf_addr *, sizeof(*aa),
SCTP_M_ASC_ADDR);
@@ -3437,6 +3412,11 @@
TAILQ_INSERT_TAIL(&stcb->asoc.asconf_queue, aa, next);
break;
#endif
+ default:
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "sctp_asconf_send_nat_state_update: unknown address family\n");
+ SCTP_FREE(aa, SCTP_M_ASC_ADDR);
+ return;
}
/* Now we must hunt the addresses and add all global addresses */
if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
@@ -3456,6 +3436,10 @@
#ifdef INET
case AF_INET:
to = &sctp_ifap->address.sin;
+ if (prison_check_ip4(stcb->sctp_ep->ip_inp.inp.inp_cred,
+ &to->sin_addr) != 0) {
+ continue;
+ }
if (IN4_ISPRIVATE_ADDRESS(&to->sin_addr)) {
continue;
}
@@ -3467,6 +3451,10 @@
#ifdef INET6
case AF_INET6:
to6 = &sctp_ifap->address.sin6;
+ if (prison_check_ip6(stcb->sctp_ep->ip_inp.inp.inp_cred,
+ &to6->sin6_addr) != 0) {
+ continue;
+ }
if (IN6_IS_ADDR_LOOPBACK(&to6->sin6_addr)) {
continue;
}
Modified: trunk/sys/netinet/sctp_asconf.h
===================================================================
--- trunk/sys/netinet/sctp_asconf.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_asconf.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_asconf.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_asconf.h 237715 2012-06-28 16:01:08Z tuexen $");
#ifndef _NETINET_SCTP_ASCONF_H_
#define _NETINET_SCTP_ASCONF_H_
@@ -47,8 +47,8 @@
extern struct mbuf *sctp_compose_asconf(struct sctp_tcb *, int *, int);
extern void
-sctp_handle_asconf(struct mbuf *, unsigned int, struct sctp_asconf_chunk *,
- struct sctp_tcb *, int i);
+sctp_handle_asconf(struct mbuf *, unsigned int, struct sockaddr *,
+ struct sctp_asconf_chunk *, struct sctp_tcb *, int);
extern void
sctp_handle_asconf_ack(struct mbuf *, int, struct sctp_asconf_ack_chunk *,
Modified: trunk/sys/netinet/sctp_auth.c
===================================================================
--- trunk/sys/netinet/sctp_auth.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_auth.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_auth.c 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_auth.c 294174 2016-01-16 17:56:06Z tuexen $");
#include <netinet/sctp_os.h>
#include <netinet/sctp.h>
@@ -134,11 +134,6 @@
if (list == NULL)
return (-1);
- /* is chunk restricted? */
- if ((chunk == SCTP_ASCONF) ||
- (chunk == SCTP_ASCONF_ACK)) {
- return (-1);
- }
if (list->chunks[chunk] == 1) {
list->chunks[chunk] = 0;
list->num_chunks--;
@@ -159,16 +154,6 @@
}
/*
- * set the default list of chunks requiring AUTH
- */
-void
-sctp_auth_set_default_chunks(sctp_auth_chklist_t * list)
-{
- (void)sctp_auth_add_chunk(SCTP_ASCONF, list);
- (void)sctp_auth_add_chunk(SCTP_ASCONF_ACK, list);
-}
-
-/*
* return the current number and list of required chunks caller must
* guarantee ptr has space for up to 256 bytes
*/
@@ -334,10 +319,6 @@
{
sctp_key_t *new_key;
- /* validate keylen */
- if (keylen > SCTP_AUTH_RANDOM_SIZE_MAX)
- keylen = SCTP_AUTH_RANDOM_SIZE_MAX;
-
new_key = sctp_alloc_key(keylen);
if (new_key == NULL) {
/* out of memory */
@@ -375,7 +356,7 @@
uint32_t i;
uint32_t key1len, key2len;
uint8_t *key_1, *key_2;
- uint8_t temp[SCTP_AUTH_RANDOM_SIZE_MAX];
+ uint8_t val1, val2;
/* sanity/length check */
key1len = sctp_get_keylen(key1);
@@ -387,38 +368,24 @@
else if (key2len == 0)
return (1);
- if (key1len != key2len) {
- if (key1len >= key2len)
- maxlen = key1len;
- else
- maxlen = key2len;
- bzero(temp, maxlen);
- if (key1len < maxlen) {
- /* prepend zeroes to key1 */
- bcopy(key1->key, temp + (maxlen - key1len), key1len);
- key_1 = temp;
- key_2 = key2->key;
- } else {
- /* prepend zeroes to key2 */
- bcopy(key2->key, temp + (maxlen - key2len), key2len);
- key_1 = key1->key;
- key_2 = temp;
- }
+ if (key1len < key2len) {
+ maxlen = key2len;
} else {
maxlen = key1len;
- key_1 = key1->key;
- key_2 = key2->key;
}
-
+ key_1 = key1->key;
+ key_2 = key2->key;
+ /* check for numeric equality */
for (i = 0; i < maxlen; i++) {
- if (*key_1 > *key_2)
+ /* left-pad with zeros */
+ val1 = (i < (maxlen - key1len)) ? 0 : *(key_1++);
+ val2 = (i < (maxlen - key2len)) ? 0 : *(key_2++);
+ if (val1 > val2) {
return (1);
- else if (*key_1 < *key_2)
+ } else if (val1 < val2) {
return (-1);
- key_1++;
- key_2++;
+ }
}
-
/* keys are equal value, so check lengths */
if (key1len == key2len)
return (0);
@@ -592,7 +559,7 @@
atomic_add_int(&skey->refcount, 1);
SCTPDBG(SCTP_DEBUG_AUTH2,
"%s: stcb %p key %u refcount acquire to %d\n",
- __FUNCTION__, stcb, key_id, skey->refcount);
+ __func__, (void *)stcb, key_id, skey->refcount);
}
}
@@ -610,20 +577,20 @@
/* decrement the ref count */
if (skey) {
- sctp_free_sharedkey(skey);
SCTPDBG(SCTP_DEBUG_AUTH2,
"%s: stcb %p key %u refcount release to %d\n",
- __FUNCTION__, stcb, key_id, skey->refcount);
+ __func__, (void *)stcb, key_id, skey->refcount);
/* see if a notification should be generated */
- if ((skey->refcount <= 1) && (skey->deactivated)) {
+ if ((skey->refcount <= 2) && (skey->deactivated)) {
/* notify ULP that key is no longer used */
sctp_ulp_notify(SCTP_NOTIFY_AUTH_FREE_KEY, stcb,
key_id, 0, so_locked);
SCTPDBG(SCTP_DEBUG_AUTH2,
"%s: stcb %p key %u no longer used, %d\n",
- __FUNCTION__, stcb, key_id, skey->refcount);
+ __func__, (void *)stcb, key_id, skey->refcount);
}
+ sctp_free_sharedkey(skey);
}
}
@@ -665,7 +632,7 @@
sctp_hmaclist_t *
-sctp_alloc_hmaclist(uint8_t num_hmacs)
+sctp_alloc_hmaclist(uint16_t num_hmacs)
{
sctp_hmaclist_t *new_list;
int alloc_size;
@@ -704,15 +671,7 @@
return (-1);
}
if ((hmac_id != SCTP_AUTH_HMAC_ID_SHA1) &&
-#ifdef HAVE_SHA224
- (hmac_id != SCTP_AUTH_HMAC_ID_SHA224) &&
-#endif
-#ifdef HAVE_SHA2
- (hmac_id != SCTP_AUTH_HMAC_ID_SHA256) &&
- (hmac_id != SCTP_AUTH_HMAC_ID_SHA384) &&
- (hmac_id != SCTP_AUTH_HMAC_ID_SHA512) &&
-#endif
- 1) {
+ (hmac_id != SCTP_AUTH_HMAC_ID_SHA256)) {
return (-1);
}
/* Now is it already in the list */
@@ -755,8 +714,9 @@
new_list = sctp_alloc_hmaclist(2);
if (new_list == NULL)
return (NULL);
+ /* We prefer SHA256, so list it first */
+ (void)sctp_auth_add_hmacid(new_list, SCTP_AUTH_HMAC_ID_SHA256);
(void)sctp_auth_add_hmacid(new_list, SCTP_AUTH_HMAC_ID_SHA1);
- (void)sctp_auth_add_hmacid(new_list, SCTP_AUTH_HMAC_ID_SHA256);
return (new_list);
}
@@ -812,19 +772,13 @@
sctp_verify_hmac_param(struct sctp_auth_hmac_algo *hmacs, uint32_t num_hmacs)
{
uint32_t i;
- uint16_t hmac_id;
- uint32_t sha1_supported = 0;
for (i = 0; i < num_hmacs; i++) {
- hmac_id = ntohs(hmacs->hmac_ids[i]);
- if (hmac_id == SCTP_AUTH_HMAC_ID_SHA1)
- sha1_supported = 1;
+ if (ntohs(hmacs->hmac_ids[i]) == SCTP_AUTH_HMAC_ID_SHA1) {
+ return (0);
+ }
}
- /* all HMAC id's are supported */
- if (sha1_supported == 0)
- return (-1);
- else
- return (0);
+ return (-1);
}
sctp_authinfo_t *
@@ -878,18 +832,8 @@
switch (hmac_algo) {
case SCTP_AUTH_HMAC_ID_SHA1:
return (SCTP_AUTH_DIGEST_LEN_SHA1);
-#ifdef HAVE_SHA224
- case SCTP_AUTH_HMAC_ID_SHA224:
- return (SCTP_AUTH_DIGEST_LEN_SHA224);
-#endif
-#ifdef HAVE_SHA2
case SCTP_AUTH_HMAC_ID_SHA256:
return (SCTP_AUTH_DIGEST_LEN_SHA256);
- case SCTP_AUTH_HMAC_ID_SHA384:
- return (SCTP_AUTH_DIGEST_LEN_SHA384);
- case SCTP_AUTH_HMAC_ID_SHA512:
- return (SCTP_AUTH_DIGEST_LEN_SHA512);
-#endif
default:
/* unknown HMAC algorithm: can't do anything */
return (0);
@@ -901,17 +845,9 @@
{
switch (hmac_algo) {
case SCTP_AUTH_HMAC_ID_SHA1:
-#ifdef HAVE_SHA224
- case SCTP_AUTH_HMAC_ID_SHA224:
-#endif
return (64);
-#ifdef HAVE_SHA2
case SCTP_AUTH_HMAC_ID_SHA256:
return (64);
- case SCTP_AUTH_HMAC_ID_SHA384:
- case SCTP_AUTH_HMAC_ID_SHA512:
- return (128);
-#endif
case SCTP_AUTH_HMAC_ID_RSVD:
default:
/* unknown HMAC algorithm: can't do anything */
@@ -924,23 +860,11 @@
{
switch (hmac_algo) {
case SCTP_AUTH_HMAC_ID_SHA1:
- SHA1_Init(&ctx->sha1);
+ SCTP_SHA1_INIT(&ctx->sha1);
break;
-#ifdef HAVE_SHA224
- case SCTP_AUTH_HMAC_ID_SHA224:
- break;
-#endif
-#ifdef HAVE_SHA2
case SCTP_AUTH_HMAC_ID_SHA256:
- SHA256_Init(&ctx->sha256);
+ SCTP_SHA256_INIT(&ctx->sha256);
break;
- case SCTP_AUTH_HMAC_ID_SHA384:
- SHA384_Init(&ctx->sha384);
- break;
- case SCTP_AUTH_HMAC_ID_SHA512:
- SHA512_Init(&ctx->sha512);
- break;
-#endif
case SCTP_AUTH_HMAC_ID_RSVD:
default:
/* unknown HMAC algorithm: can't do anything */
@@ -954,23 +878,11 @@
{
switch (hmac_algo) {
case SCTP_AUTH_HMAC_ID_SHA1:
- SHA1_Update(&ctx->sha1, text, textlen);
+ SCTP_SHA1_UPDATE(&ctx->sha1, text, textlen);
break;
-#ifdef HAVE_SHA224
- case SCTP_AUTH_HMAC_ID_SHA224:
- break;
-#endif
-#ifdef HAVE_SHA2
case SCTP_AUTH_HMAC_ID_SHA256:
- SHA256_Update(&ctx->sha256, text, textlen);
+ SCTP_SHA256_UPDATE(&ctx->sha256, text, textlen);
break;
- case SCTP_AUTH_HMAC_ID_SHA384:
- SHA384_Update(&ctx->sha384, text, textlen);
- break;
- case SCTP_AUTH_HMAC_ID_SHA512:
- SHA512_Update(&ctx->sha512, text, textlen);
- break;
-#endif
case SCTP_AUTH_HMAC_ID_RSVD:
default:
/* unknown HMAC algorithm: can't do anything */
@@ -984,24 +896,11 @@
{
switch (hmac_algo) {
case SCTP_AUTH_HMAC_ID_SHA1:
- SHA1_Final(digest, &ctx->sha1);
+ SCTP_SHA1_FINAL(digest, &ctx->sha1);
break;
-#ifdef HAVE_SHA224
- case SCTP_AUTH_HMAC_ID_SHA224:
- break;
-#endif
-#ifdef HAVE_SHA2
case SCTP_AUTH_HMAC_ID_SHA256:
- SHA256_Final(digest, &ctx->sha256);
+ SCTP_SHA256_FINAL(digest, &ctx->sha256);
break;
- case SCTP_AUTH_HMAC_ID_SHA384:
- /* SHA384 is truncated SHA512 */
- SHA384_Final(digest, &ctx->sha384);
- break;
- case SCTP_AUTH_HMAC_ID_SHA512:
- SHA512_Final(digest, &ctx->sha512);
- break;
-#endif
case SCTP_AUTH_HMAC_ID_RSVD:
default:
/* unknown HMAC algorithm: can't do anything */
@@ -1540,8 +1439,8 @@
p_random = (struct sctp_auth_random *)phdr;
random_len = plen - sizeof(*p_random);
} else if (ptype == SCTP_HMAC_LIST) {
- int num_hmacs;
- int i;
+ uint16_t num_hmacs;
+ uint16_t i;
if (plen > sizeof(hmacs_store))
break;
@@ -1753,8 +1652,8 @@
/* is the indicated HMAC supported? */
if (!sctp_auth_is_supported_hmac(stcb->asoc.local_hmacs, hmac_id)) {
- struct mbuf *m_err;
- struct sctp_auth_invalid_hmac *err;
+ struct mbuf *op_err;
+ struct sctp_error_auth_invalid_hmac *cause;
SCTP_STAT_INCR(sctps_recvivalhmacid);
SCTPDBG(SCTP_DEBUG_AUTH1,
@@ -1764,20 +1663,19 @@
* report this in an Error Chunk: Unsupported HMAC
* Identifier
*/
- m_err = sctp_get_mbuf_for_msg(sizeof(*err), 0, M_DONTWAIT,
- 1, MT_HEADER);
- if (m_err != NULL) {
+ op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_error_auth_invalid_hmac),
+ 0, M_NOWAIT, 1, MT_HEADER);
+ if (op_err != NULL) {
/* pre-reserve some space */
- SCTP_BUF_RESV_UF(m_err, sizeof(struct sctp_chunkhdr));
+ SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
/* fill in the error */
- err = mtod(m_err, struct sctp_auth_invalid_hmac *);
- bzero(err, sizeof(*err));
- err->ph.param_type = htons(SCTP_CAUSE_UNSUPPORTED_HMACID);
- err->ph.param_length = htons(sizeof(*err));
- err->hmac_id = ntohs(hmac_id);
- SCTP_BUF_LEN(m_err) = sizeof(*err);
+ cause = mtod(op_err, struct sctp_error_auth_invalid_hmac *);
+ cause->cause.code = htons(SCTP_CAUSE_UNSUPPORTED_HMACID);
+ cause->cause.length = htons(sizeof(struct sctp_error_auth_invalid_hmac));
+ cause->hmac_id = ntohs(hmac_id);
+ SCTP_BUF_LEN(op_err) = sizeof(struct sctp_error_auth_invalid_hmac);
/* queue it */
- sctp_queue_op_err(stcb, m_err);
+ sctp_queue_op_err(stcb, op_err);
}
return (-1);
}
@@ -1870,7 +1768,7 @@
return;
m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_authkey_event),
- 0, M_DONTWAIT, 1, MT_HEADER);
+ 0, M_NOWAIT, 1, MT_HEADER);
if (m_notify == NULL)
/* no space left */
return;
@@ -1877,6 +1775,7 @@
SCTP_BUF_LEN(m_notify) = 0;
auth = mtod(m_notify, struct sctp_authkey_event *);
+ memset(auth, 0, sizeof(struct sctp_authkey_event));
auth->auth_type = SCTP_AUTHENTICATION_EVENT;
auth->auth_flags = 0;
auth->auth_length = sizeof(*auth);
@@ -2035,8 +1934,7 @@
"SCTP: peer sent chunk list w/o AUTH\n");
return (-1);
}
- if (!SCTP_BASE_SYSCTL(sctp_asconf_auth_nochk) && peer_supports_asconf &&
- !peer_supports_auth) {
+ if (peer_supports_asconf && !peer_supports_auth) {
SCTPDBG(SCTP_DEBUG_AUTH1,
"SCTP: peer supports ASCONF but not AUTH\n");
return (-1);
Modified: trunk/sys/netinet/sctp_auth.h
===================================================================
--- trunk/sys/netinet/sctp_auth.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_auth.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,33 +32,26 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_auth.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_auth.h 271750 2014-09-18 09:49:49Z tuexen $");
#ifndef _NETINET_SCTP_AUTH_H_
#define _NETINET_SCTP_AUTH_H_
+#include <netinet/sctp_os.h>
/* digest lengths */
#define SCTP_AUTH_DIGEST_LEN_SHA1 20
-#define SCTP_AUTH_DIGEST_LEN_SHA224 28
#define SCTP_AUTH_DIGEST_LEN_SHA256 32
-#define SCTP_AUTH_DIGEST_LEN_SHA384 48
-#define SCTP_AUTH_DIGEST_LEN_SHA512 64
-#define SCTP_AUTH_DIGEST_LEN_MAX 64
+#define SCTP_AUTH_DIGEST_LEN_MAX SCTP_AUTH_DIGEST_LEN_SHA256
/* random sizes */
#define SCTP_AUTH_RANDOM_SIZE_DEFAULT 32
#define SCTP_AUTH_RANDOM_SIZE_REQUIRED 32
-#define SCTP_AUTH_RANDOM_SIZE_MAX 256
/* union of all supported HMAC algorithm contexts */
typedef union sctp_hash_context {
- SHA1_CTX sha1;
-#ifdef HAVE_SHA2
- SHA256_CTX sha256;
- SHA384_CTX sha384;
- SHA512_CTX sha512;
-#endif
+ SCTP_SHA1_CTX sha1;
+ SCTP_SHA256_CTX sha256;
} sctp_hash_context_t;
typedef struct sctp_key {
@@ -120,7 +113,6 @@
extern int sctp_auth_add_chunk(uint8_t chunk, sctp_auth_chklist_t * list);
extern int sctp_auth_delete_chunk(uint8_t chunk, sctp_auth_chklist_t * list);
extern size_t sctp_auth_get_chklist_size(const sctp_auth_chklist_t * list);
-extern void sctp_auth_set_default_chunks(sctp_auth_chklist_t * list);
extern int
sctp_serialize_auth_chunks(const sctp_auth_chklist_t * list,
uint8_t * ptr);
@@ -163,7 +155,7 @@
/* hmac list handling */
-extern sctp_hmaclist_t *sctp_alloc_hmaclist(uint8_t num_hmacs);
+extern sctp_hmaclist_t *sctp_alloc_hmaclist(uint16_t num_hmacs);
extern void sctp_free_hmaclist(sctp_hmaclist_t * list);
extern int sctp_auth_add_hmacid(sctp_hmaclist_t * list, uint16_t hmac_id);
extern sctp_hmaclist_t *sctp_copy_hmaclist(sctp_hmaclist_t * list);
Modified: trunk/sys/netinet/sctp_bsd_addr.c
===================================================================
--- trunk/sys/netinet/sctp_bsd_addr.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_bsd_addr.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_bsd_addr.c 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_bsd_addr.c 296052 2016-02-25 18:46:06Z tuexen $");
#include <netinet/sctp_os.h>
#include <netinet/sctp_var.h>
@@ -97,22 +97,15 @@
void
sctp_startup_iterator(void)
{
- static int called = 0;
- int ret;
-
- if (called) {
+ if (sctp_it_ctl.thread_proc) {
/* You only get one */
return;
}
- /* init the iterator head */
- called = 1;
- sctp_it_ctl.iterator_running = 0;
- sctp_it_ctl.iterator_flags = 0;
- sctp_it_ctl.cur_it = NULL;
+ /* Initialize global locks here, thus only once. */
SCTP_ITERATOR_LOCK_INIT();
SCTP_IPI_ITERATOR_WQ_INIT();
TAILQ_INIT(&sctp_it_ctl.iteratorhead);
- ret = kproc_create(sctp_iterator_thread,
+ kproc_create(sctp_iterator_thread,
(void *)NULL,
&sctp_it_ctl.thread_proc,
RFPROC,
@@ -153,12 +146,12 @@
static uint32_t
-sctp_is_desired_interface_type(struct ifaddr *ifa)
+sctp_is_desired_interface_type(struct ifnet *ifn)
{
int result;
/* check the interface type to see if it's one we care about */
- switch (ifa->ifa_ifp->if_type) {
+ switch (ifn->if_type) {
case IFT_ETHER:
case IFT_ISO88023:
case IFT_ISO88024:
@@ -217,6 +210,10 @@
IFNET_RLOCK();
TAILQ_FOREACH(ifn, &MODULE_GLOBAL(ifnet), if_list) {
+ if (sctp_is_desired_interface_type(ifn) == 0) {
+ /* non desired type */
+ continue;
+ }
IF_ADDR_RLOCK(ifn);
TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) {
if (ifa->ifa_addr == NULL) {
@@ -241,10 +238,6 @@
default:
continue;
}
- if (sctp_is_desired_interface_type(ifa) == 0) {
- /* non desired type */
- continue;
- }
switch (ifa->ifa_addr->sa_family) {
#ifdef INET
case AF_INET:
@@ -301,6 +294,9 @@
{
uint32_t ifa_flags = 0;
+ if (SCTP_BASE_VAR(sctp_pcb_initialized) == 0) {
+ return;
+ }
/*
* BSD only has one VRF, if this changes we will need to hook in the
* right things here to get the id to pass to the address managment
@@ -318,6 +314,10 @@
if (ifa->ifa_addr == NULL) {
return;
}
+ if (sctp_is_desired_interface_type(ifa->ifa_ifp) == 0) {
+ /* non desired type */
+ return;
+ }
switch (ifa->ifa_addr->sa_family) {
#ifdef INET
case AF_INET:
@@ -339,22 +339,16 @@
/* non inet/inet6 skip */
return;
}
-
- if (sctp_is_desired_interface_type(ifa) == 0) {
- /* non desired type */
- return;
- }
if (cmd == RTM_ADD) {
(void)sctp_add_addr_to_vrf(SCTP_DEFAULT_VRFID, (void *)ifa->ifa_ifp,
- ifa->ifa_ifp->if_index, ifa->ifa_ifp->if_type,
- ifa->ifa_ifp->if_xname,
+ ifa->ifa_ifp->if_index, ifa->ifa_ifp->if_type, ifa->ifa_ifp->if_xname,
(void *)ifa, ifa->ifa_addr, ifa_flags, 1);
} else {
sctp_del_addr_from_vrf(SCTP_DEFAULT_VRFID, ifa->ifa_addr,
ifa->ifa_ifp->if_index,
- ifa->ifa_ifp->if_xname
- );
+ ifa->ifa_ifp->if_xname);
+
/*
* We don't bump refcount here so when it completes the
* final delete will happen.
@@ -412,9 +406,7 @@
}
#ifdef SCTP_MBUF_LOGGING
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- if (SCTP_BUF_IS_EXTENDED(m)) {
- sctp_log_mb(m, SCTP_MBUF_IALLOC);
- }
+ sctp_log_mb(m, SCTP_MBUF_IALLOC);
}
#endif
return (m);
@@ -423,11 +415,12 @@
#ifdef SCTP_PACKET_LOGGING
void
-sctp_packet_log(struct mbuf *m, int length)
+sctp_packet_log(struct mbuf *m)
{
int *lenat, thisone;
void *copyto;
uint32_t *tick_tock;
+ int length;
int total_len;
int grabbed_lock = 0;
int value, newval, thisend, thisbegin;
@@ -437,6 +430,7 @@
* (value) -ticks of log (ticks) o -ip packet o -as logged -
* where this started (thisbegin) x <--end points here
*/
+ length = SCTP_HEADER_LEN(m);
total_len = SCTP_SIZE32((length + (4 * sizeof(int))));
/* Log a packet to the buffer. */
if (total_len > SCTP_PACKET_LOG_SIZE) {
Modified: trunk/sys/netinet/sctp_bsd_addr.h
===================================================================
--- trunk/sys/netinet/sctp_bsd_addr.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_bsd_addr.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_bsd_addr.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_bsd_addr.h 237540 2012-06-24 21:25:54Z tuexen $");
#ifndef _NETINET_SCTP_BSD_ADDR_H_
#define _NETINET_SCTP_BSD_ADDR_H_
@@ -54,7 +54,7 @@
#ifdef SCTP_PACKET_LOGGING
-void sctp_packet_log(struct mbuf *m, int length);
+void sctp_packet_log(struct mbuf *m);
int sctp_copy_out_packet_log(uint8_t * target, int length);
#endif
Modified: trunk/sys/netinet/sctp_cc_functions.c
===================================================================
--- trunk/sys/netinet/sctp_cc_functions.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_cc_functions.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_cc_functions.c 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_cc_functions.c 302237 2016-06-27 22:10:07Z bdrewery $");
#include <netinet/sctp_os.h>
#include <netinet/sctp_var.h>
@@ -54,6 +54,19 @@
#define SHIFT_MPTCP_MULTI 8
static void
+sctp_enforce_cwnd_limit(struct sctp_association *assoc, struct sctp_nets *net)
+{
+ if ((assoc->max_cwnd > 0) &&
+ (net->cwnd > assoc->max_cwnd) &&
+ (net->cwnd > (net->mtu - sizeof(struct sctphdr)))) {
+ net->cwnd = assoc->max_cwnd;
+ if (net->cwnd < (net->mtu - sizeof(struct sctphdr))) {
+ net->cwnd = net->mtu - sizeof(struct sctphdr);
+ }
+ }
+}
+
+static void
sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net)
{
struct sctp_association *assoc;
@@ -81,8 +94,9 @@
net->cwnd = net->mtu - sizeof(struct sctphdr);
}
}
+ sctp_enforce_cwnd_limit(assoc, net);
net->ssthresh = assoc->peers_rwnd;
- SDT_PROBE(sctp, cwnd, net, init,
+ SDT_PROBE5(sctp, cwnd, net, init,
stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net,
0, net->cwnd);
if (SCTP_BASE_SYSCTL(sctp_logging_level) &
@@ -179,7 +193,8 @@
}
}
net->cwnd = net->ssthresh;
- SDT_PROBE(sctp, cwnd, net, fr,
+ sctp_enforce_cwnd_limit(asoc, net);
+ SDT_PROBE5(sctp, cwnd, net, fr,
stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net,
old_cwnd, net->cwnd);
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
@@ -212,7 +227,8 @@
}
sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
- stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
+ stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_CC_FUNCTIONS + SCTP_LOC_1);
sctp_timer_start(SCTP_TIMER_TYPE_SEND,
stcb->sctp_ep, stcb, net);
}
@@ -246,7 +262,7 @@
*/
/* Probe point 5 */
probepoint |= ((5 << 16) | 1);
- SDT_PROBE(sctp, cwnd, net, rttvar,
+ SDT_PROBE5(sctp, cwnd, net, rttvar,
vtag,
((net->cc_mod.rtcc.lbw << 32) | nbw),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -267,7 +283,7 @@
oth |= net->cc_mod.rtcc.step_cnt;
oth <<= 16;
oth |= net->cc_mod.rtcc.last_step_state;
- SDT_PROBE(sctp, cwnd, net, rttstep,
+ SDT_PROBE5(sctp, cwnd, net, rttstep,
vtag,
((net->cc_mod.rtcc.lbw << 32) | nbw),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -291,7 +307,7 @@
*/
/* Probe point 6 */
probepoint |= ((6 << 16) | 0);
- SDT_PROBE(sctp, cwnd, net, rttvar,
+ SDT_PROBE5(sctp, cwnd, net, rttvar,
vtag,
((net->cc_mod.rtcc.lbw << 32) | nbw),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -303,7 +319,7 @@
oth |= net->cc_mod.rtcc.step_cnt;
oth <<= 16;
oth |= net->cc_mod.rtcc.last_step_state;
- SDT_PROBE(sctp, cwnd, net, rttstep,
+ SDT_PROBE5(sctp, cwnd, net, rttstep,
vtag,
((net->cc_mod.rtcc.lbw << 32) | nbw),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -334,7 +350,7 @@
*/
/* Probe point 7 */
probepoint |= ((7 << 16) | net->cc_mod.rtcc.ret_from_eq);
- SDT_PROBE(sctp, cwnd, net, rttvar,
+ SDT_PROBE5(sctp, cwnd, net, rttvar,
vtag,
((net->cc_mod.rtcc.lbw << 32) | nbw),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -383,7 +399,7 @@
/* We caused it maybe.. back off? */
/* PROBE POINT 1 */
probepoint |= ((1 << 16) | 1);
- SDT_PROBE(sctp, cwnd, net, rttvar,
+ SDT_PROBE5(sctp, cwnd, net, rttvar,
vtag,
((net->cc_mod.rtcc.lbw << 32) | nbw),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -401,7 +417,7 @@
}
/* Probe point 2 */
probepoint |= ((2 << 16) | 0);
- SDT_PROBE(sctp, cwnd, net, rttvar,
+ SDT_PROBE5(sctp, cwnd, net, rttvar,
vtag,
((net->cc_mod.rtcc.lbw << 32) | nbw),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -414,7 +430,7 @@
oth |= net->cc_mod.rtcc.step_cnt;
oth <<= 16;
oth |= net->cc_mod.rtcc.last_step_state;
- SDT_PROBE(sctp, cwnd, net, rttstep,
+ SDT_PROBE5(sctp, cwnd, net, rttstep,
vtag,
((net->cc_mod.rtcc.lbw << 32) | nbw),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -427,6 +443,7 @@
if ((net->cc_mod.rtcc.vol_reduce) &&
(inst_ind != SCTP_INST_GAINING)) {
net->cwnd += net->mtu;
+ sctp_enforce_cwnd_limit(&stcb->asoc, net);
net->cc_mod.rtcc.vol_reduce--;
}
net->cc_mod.rtcc.last_step_state = 2;
@@ -437,7 +454,7 @@
/* bw & rtt decreased */
/* Probe point 3 */
probepoint |= ((3 << 16) | 0);
- SDT_PROBE(sctp, cwnd, net, rttvar,
+ SDT_PROBE5(sctp, cwnd, net, rttvar,
vtag,
((net->cc_mod.rtcc.lbw << 32) | nbw),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -449,7 +466,7 @@
oth |= net->cc_mod.rtcc.step_cnt;
oth <<= 16;
oth |= net->cc_mod.rtcc.last_step_state;
- SDT_PROBE(sctp, cwnd, net, rttstep,
+ SDT_PROBE5(sctp, cwnd, net, rttstep,
vtag,
((net->cc_mod.rtcc.lbw << 32) | nbw),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -458,6 +475,7 @@
if ((net->cc_mod.rtcc.vol_reduce) &&
(inst_ind != SCTP_INST_GAINING)) {
net->cwnd += net->mtu;
+ sctp_enforce_cwnd_limit(&stcb->asoc, net);
net->cc_mod.rtcc.vol_reduce--;
}
net->cc_mod.rtcc.last_step_state = 3;
@@ -468,7 +486,7 @@
/* The bw decreased but rtt stayed the same */
/* Probe point 4 */
probepoint |= ((4 << 16) | 0);
- SDT_PROBE(sctp, cwnd, net, rttvar,
+ SDT_PROBE5(sctp, cwnd, net, rttvar,
vtag,
((net->cc_mod.rtcc.lbw << 32) | nbw),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -480,7 +498,7 @@
oth |= net->cc_mod.rtcc.step_cnt;
oth <<= 16;
oth |= net->cc_mod.rtcc.last_step_state;
- SDT_PROBE(sctp, cwnd, net, rttstep,
+ SDT_PROBE5(sctp, cwnd, net, rttstep,
vtag,
((net->cc_mod.rtcc.lbw << 32) | nbw),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -489,6 +507,7 @@
if ((net->cc_mod.rtcc.vol_reduce) &&
(inst_ind != SCTP_INST_GAINING)) {
net->cwnd += net->mtu;
+ sctp_enforce_cwnd_limit(&stcb->asoc, net);
net->cc_mod.rtcc.vol_reduce--;
}
net->cc_mod.rtcc.last_step_state = 4;
@@ -517,7 +536,7 @@
*/
/* PROBE POINT 0 */
probepoint = (((uint64_t) net->cwnd) << 32);
- SDT_PROBE(sctp, cwnd, net, rttvar,
+ SDT_PROBE5(sctp, cwnd, net, rttvar,
vtag,
((net->cc_mod.rtcc.lbw << 32) | nbw),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -529,7 +548,7 @@
oth |= net->cc_mod.rtcc.step_cnt;
oth <<= 16;
oth |= net->cc_mod.rtcc.last_step_state;
- SDT_PROBE(sctp, cwnd, net, rttstep,
+ SDT_PROBE5(sctp, cwnd, net, rttstep,
vtag,
((net->cc_mod.rtcc.lbw << 32) | nbw),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -629,7 +648,7 @@
/* Can't determine do not change */
probepoint |= ((0xd << 16) | inst_ind);
}
- SDT_PROBE(sctp, cwnd, net, rttvar,
+ SDT_PROBE5(sctp, cwnd, net, rttvar,
vtag,
((nbw << 32) | inst_bw),
((net->cc_mod.rtcc.lbw_rtt << 32) | rtt),
@@ -789,7 +808,7 @@
(((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) |
(stcb->rport);
- SDT_PROBE(sctp, cwnd, net, rttvar,
+ SDT_PROBE5(sctp, cwnd, net, rttvar,
vtag,
nbw,
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -883,11 +902,12 @@
break;
}
net->cwnd += incr;
+ sctp_enforce_cwnd_limit(asoc, net);
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
sctp_log_cwnd(stcb, net, incr,
SCTP_CWND_LOG_FROM_SS);
}
- SDT_PROBE(sctp, cwnd, net, ack,
+ SDT_PROBE5(sctp, cwnd, net, ack,
stcb->asoc.my_vtag,
((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
net,
@@ -949,7 +969,8 @@
break;
}
net->cwnd += incr;
- SDT_PROBE(sctp, cwnd, net, ack,
+ sctp_enforce_cwnd_limit(asoc, net);
+ SDT_PROBE5(sctp, cwnd, net, ack,
stcb->asoc.my_vtag,
((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
net,
@@ -981,11 +1002,11 @@
old_cwnd = net->cwnd;
net->cwnd = net->mtu;
- SDT_PROBE(sctp, cwnd, net, ack,
+ SDT_PROBE5(sctp, cwnd, net, ack,
stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net,
old_cwnd, net->cwnd);
SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n",
- net, net->cwnd);
+ (void *)net, net->cwnd);
}
@@ -1052,7 +1073,7 @@
}
net->cwnd = net->mtu;
net->partial_bytes_acked = 0;
- SDT_PROBE(sctp, cwnd, net, to,
+ SDT_PROBE5(sctp, cwnd, net, to,
stcb->asoc.my_vtag,
((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
net,
@@ -1112,7 +1133,7 @@
net->RTO <<= 1;
}
net->cwnd = net->ssthresh;
- SDT_PROBE(sctp, cwnd, net, ecn,
+ SDT_PROBE5(sctp, cwnd, net, ecn,
stcb->asoc.my_vtag,
((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
net,
@@ -1131,12 +1152,9 @@
uint32_t * bottle_bw, uint32_t * on_queue)
{
uint32_t bw_avail;
- int rtt;
unsigned int incr;
int old_cwnd = net->cwnd;
- /* need real RTT in msd for this calc */
- rtt = net->rtt / 1000;
/* get bottle neck bw */
*bottle_bw = ntohl(cp->bottle_bw);
/* and whats on queue */
@@ -1145,10 +1163,11 @@
* adjust the on-queue if our flight is more it could be that the
* router has not yet gotten data "in-flight" to it
*/
- if (*on_queue < net->flight_size)
+ if (*on_queue < net->flight_size) {
*on_queue = net->flight_size;
- /* calculate the available space */
- bw_avail = (*bottle_bw * rtt) / 1000;
+ }
+ /* rtt is measured in micro seconds, bottle_bw in bytes per second */
+ bw_avail = (uint32_t) (((uint64_t) (*bottle_bw) * net->rtt) / (uint64_t) 1000000);
if (bw_avail > *bottle_bw) {
/*
* Cap the growth to no more than the bottle neck. This can
@@ -1168,7 +1187,6 @@
int seg_inflight, seg_onqueue, my_portion;
net->partial_bytes_acked = 0;
-
/* how much are we over queue size? */
incr = *on_queue - bw_avail;
if (stcb->asoc.seen_a_sack_this_pkt) {
@@ -1231,9 +1249,10 @@
/* We always have 1 MTU */
net->cwnd = net->mtu;
}
+ sctp_enforce_cwnd_limit(&stcb->asoc, net);
if (net->cwnd - old_cwnd != 0) {
/* log only changes */
- SDT_PROBE(sctp, cwnd, net, pd,
+ SDT_PROBE5(sctp, cwnd, net, pd,
stcb->asoc.my_vtag,
((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
net,
@@ -1255,7 +1274,8 @@
net->ssthresh = net->cwnd;
if (burst_limit) {
net->cwnd = (net->flight_size + (burst_limit * net->mtu));
- SDT_PROBE(sctp, cwnd, net, bl,
+ sctp_enforce_cwnd_limit(&stcb->asoc, net);
+ SDT_PROBE5(sctp, cwnd, net, bl,
stcb->asoc.my_vtag,
((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
net,
@@ -1331,7 +1351,7 @@
probepoint = (((uint64_t) net->cwnd) << 32);
/* Probe point 8 */
probepoint |= ((8 << 16) | 0);
- SDT_PROBE(sctp, cwnd, net, rttvar,
+ SDT_PROBE5(sctp, cwnd, net, rttvar,
vtag,
((net->cc_mod.rtcc.lbw << 32) | 0),
((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
@@ -1394,7 +1414,7 @@
vtag = (net->rtt << 32) |
(((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) |
(stcb->rport);
- SDT_PROBE(sctp, cwnd, net, rttvar,
+ SDT_PROBE5(sctp, cwnd, net, rttvar,
vtag,
0,
0,
@@ -1593,6 +1613,7 @@
sctp_hs_cwnd_increase(struct sctp_tcb *stcb, struct sctp_nets *net)
{
int cur_val, i, indx, incr;
+ int old_cwnd = net->cwnd;
cur_val = net->cwnd >> 10;
indx = SCTP_HS_TABLE_SIZE - 1;
@@ -1601,14 +1622,8 @@
/* normal mode */
if (net->net_ack > net->mtu) {
net->cwnd += net->mtu;
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
- sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_FROM_SS);
- }
} else {
net->cwnd += net->net_ack;
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
- sctp_log_cwnd(stcb, net, net->net_ack, SCTP_CWND_LOG_FROM_SS);
- }
}
} else {
for (i = net->last_hs_used; i < SCTP_HS_TABLE_SIZE; i++) {
@@ -1620,10 +1635,11 @@
net->last_hs_used = indx;
incr = ((sctp_cwnd_adjust[indx].increase) << 10);
net->cwnd += incr;
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
- sctp_log_cwnd(stcb, net, incr, SCTP_CWND_LOG_FROM_SS);
- }
}
+ sctp_enforce_cwnd_limit(&stcb->asoc, net);
+ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
+ sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SS);
+ }
}
static void
@@ -1661,6 +1677,7 @@
net->last_hs_used = indx;
}
}
+ sctp_enforce_cwnd_limit(&stcb->asoc, net);
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_FR);
}
@@ -1717,7 +1734,8 @@
}
sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
- stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
+ stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_CC_FUNCTIONS + SCTP_LOC_2);
sctp_timer_start(SCTP_TIMER_TYPE_SEND,
stcb->sctp_ep, stcb, net);
}
@@ -1792,9 +1810,7 @@
if (net->cwnd <= net->ssthresh) {
/* We are in slow start */
if (net->flight_size + net->net_ack >= net->cwnd) {
-
sctp_hs_cwnd_increase(stcb, net);
-
} else {
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
sctp_log_cwnd(stcb, net, net->net_ack,
@@ -1808,6 +1824,7 @@
(net->partial_bytes_acked >= net->cwnd)) {
net->partial_bytes_acked -= net->cwnd;
net->cwnd += net->mtu;
+ sctp_enforce_cwnd_limit(asoc, net);
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
sctp_log_cwnd(stcb, net, net->mtu,
SCTP_CWND_LOG_FROM_CA);
@@ -1918,10 +1935,9 @@
return;
}
net->cc_mod.htcp_ca.bytecount += net->net_ack;
-
- if (net->cc_mod.htcp_ca.bytecount >= net->cwnd - ((net->cc_mod.htcp_ca.alpha >> 7 ? : 1) * net->mtu)
- && now - net->cc_mod.htcp_ca.lasttime >= net->cc_mod.htcp_ca.minRTT
- && net->cc_mod.htcp_ca.minRTT > 0) {
+ if ((net->cc_mod.htcp_ca.bytecount >= net->cwnd - (((net->cc_mod.htcp_ca.alpha >> 7) ? (net->cc_mod.htcp_ca.alpha >> 7) : 1) * net->mtu)) &&
+ (now - net->cc_mod.htcp_ca.lasttime >= net->cc_mod.htcp_ca.minRTT) &&
+ (net->cc_mod.htcp_ca.minRTT > 0)) {
uint32_t cur_Bi = net->cc_mod.htcp_ca.bytecount / net->mtu * hz / (now - net->cc_mod.htcp_ca.lasttime);
if (htcp_ccount(&net->cc_mod.htcp_ca) <= 3) {
@@ -2047,6 +2063,7 @@
SCTP_CWND_LOG_FROM_SS);
}
}
+ sctp_enforce_cwnd_limit(&stcb->asoc, net);
} else {
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
sctp_log_cwnd(stcb, net, net->net_ack,
@@ -2068,6 +2085,7 @@
*/
net->cwnd += net->mtu;
net->partial_bytes_acked = 0;
+ sctp_enforce_cwnd_limit(&stcb->asoc, net);
htcp_alpha_update(&net->cc_mod.htcp_ca);
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
sctp_log_cwnd(stcb, net, net->mtu,
@@ -2114,6 +2132,7 @@
*/
net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND));
net->ssthresh = stcb->asoc.peers_rwnd;
+ sctp_enforce_cwnd_limit(&stcb->asoc, net);
htcp_init(net);
if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
@@ -2217,6 +2236,7 @@
htcp_reset(&net->cc_mod.htcp_ca);
net->ssthresh = htcp_recalc_ssthresh(net);
net->cwnd = net->ssthresh;
+ sctp_enforce_cwnd_limit(asoc, net);
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
SCTP_CWND_LOG_FROM_FR);
@@ -2247,7 +2267,8 @@
}
sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
- stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
+ stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_CC_FUNCTIONS + SCTP_LOC_3);
sctp_timer_start(SCTP_TIMER_TYPE_SEND,
stcb->sctp_ep, stcb, net);
}
@@ -2296,6 +2317,7 @@
net->RTO <<= 1;
}
net->cwnd = net->ssthresh;
+ sctp_enforce_cwnd_limit(&stcb->asoc, net);
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
}
Modified: trunk/sys/netinet/sctp_constants.h
===================================================================
--- trunk/sys/netinet/sctp_constants.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_constants.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,21 +32,15 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_constants.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_constants.h 296052 2016-02-25 18:46:06Z tuexen $");
#ifndef _NETINET_SCTP_CONSTANTS_H_
#define _NETINET_SCTP_CONSTANTS_H_
+
/* IANA assigned port number for SCTP over UDP encapsulation */
-/* For freebsd we cannot bind the port at
- * startup. Otherwise what will happen is
- * we really won't be bound. The user must
- * put it into the sysctl... or we need
- * to build a special timer for this to allow
- * us to wait 1 second or so after the system
- * comes up.
- */
-#define SCTP_OVER_UDP_TUNNELING_PORT 0
+#define SCTP_OVER_UDP_TUNNELING_PORT 9899
+
/* Number of packets to get before sack sent by default */
#define SCTP_DEFAULT_SACK_FREQ 2
@@ -73,6 +67,8 @@
*/
#define SCTP_LARGEST_INIT_ACCEPTED (65535 - 2048)
+/* Largest length of a chunk */
+#define SCTP_MAX_CHUNK_LENGTH 0xffff
/* Number of addresses where we just skip the counting */
#define SCTP_COUNT_LIMIT 40
@@ -274,18 +270,9 @@
/* how many addresses per assoc remote and local */
#define SCTP_SCALE_FOR_ADDR 2
-/* default AUTO_ASCONF mode enable(1)/disable(0) value (sysctl) */
-#define SCTP_DEFAULT_AUTO_ASCONF 1
-
/* default MULTIPLE_ASCONF mode enable(1)/disable(0) value (sysctl) */
#define SCTP_DEFAULT_MULTIPLE_ASCONFS 0
-/* default MOBILITY_BASE mode enable(1)/disable(0) value (sysctl) */
-#define SCTP_DEFAULT_MOBILITY_BASE 0
-
-/* default MOBILITY_FASTHANDOFF mode enable(1)/disable(0) value (sysctl) */
-#define SCTP_DEFAULT_MOBILITY_FASTHANDOFF 0
-
/*
* Theshold for rwnd updates, we have to read (sb_hiwat >>
* SCTP_RWND_HIWAT_SHIFT) before we will look to see if we need to send a
@@ -371,6 +358,7 @@
#define SCTP_DATAGRAM_ACKED 10010
#define SCTP_DATAGRAM_MARKED 20010
#define SCTP_FORWARD_TSN_SKIP 30010
+#define SCTP_DATAGRAM_NR_ACKED 40010
/* chunk output send from locations */
#define SCTP_OUTPUT_FROM_USR_SEND 0
@@ -473,7 +461,7 @@
/*
- * SCTP states for internal state machine XXX (should match "user" values)
+ * SCTP states for internal state machine
*/
#define SCTP_STATE_EMPTY 0x0000
#define SCTP_STATE_INUSE 0x0001
@@ -521,9 +509,6 @@
/* How long a cookie lives in milli-seconds */
#define SCTP_DEFAULT_COOKIE_LIFE 60000
-/* resource limit of streams */
-#define MAX_SCTP_STREAMS 2048
-
/* Maximum the mapping array will grow to (TSN mapping array) */
#define SCTP_MAPPING_ARRAY 512
@@ -630,10 +615,6 @@
/* 30 seconds + RTO (in ms) */
#define SCTP_HB_DEFAULT_MSEC 30000
-/* Max time I will wait for Shutdown to complete */
-#define SCTP_DEF_MAX_SHUTDOWN_SEC 180
-
-
/*
* This is how long a secret lives, NOT how long a cookie lives how many
* ticks the current secret will live.
@@ -658,6 +639,7 @@
/* How many streams I request initally by default */
#define SCTP_OSTREAM_INITIAL 10
+#define SCTP_ISTREAM_INITIAL 2048
/*
* How many smallest_mtu's need to increase before a window update sack is
@@ -728,7 +710,6 @@
/* small chunk store for looking at chunk_list in auth */
#define SCTP_SMALL_CHUNK_STORE 260
-#define SCTP_DEFAULT_MINSEGMENT 512 /* MTU size ... if no mtu disc */
#define SCTP_HOW_MANY_SECRETS 2 /* how many secrets I keep */
#define SCTP_NUMBER_OF_SECRETS 8 /* or 8 * 4 = 32 octets */
@@ -774,6 +755,9 @@
*/
#define SCTP_DEFAULT_SPLIT_POINT_MIN 2904
+/* Maximum length of diagnostic information in error causes */
+#define SCTP_DIAG_INFO_LEN 64
+
/* ABORT CODES and other tell-tale location
* codes are generated by adding the below
* to the instance id.
@@ -780,18 +764,19 @@
*/
/* File defines */
-#define SCTP_FROM_SCTP_INPUT 0x10000000
-#define SCTP_FROM_SCTP_PCB 0x20000000
-#define SCTP_FROM_SCTP_INDATA 0x30000000
-#define SCTP_FROM_SCTP_TIMER 0x40000000
-#define SCTP_FROM_SCTP_USRREQ 0x50000000
-#define SCTP_FROM_SCTPUTIL 0x60000000
-#define SCTP_FROM_SCTP6_USRREQ 0x70000000
-#define SCTP_FROM_SCTP_ASCONF 0x80000000
-#define SCTP_FROM_SCTP_OUTPUT 0x90000000
-#define SCTP_FROM_SCTP_PEELOFF 0xa0000000
-#define SCTP_FROM_SCTP_PANDA 0xb0000000
-#define SCTP_FROM_SCTP_SYSCTL 0xc0000000
+#define SCTP_FROM_SCTP_INPUT 0x10000000
+#define SCTP_FROM_SCTP_PCB 0x20000000
+#define SCTP_FROM_SCTP_INDATA 0x30000000
+#define SCTP_FROM_SCTP_TIMER 0x40000000
+#define SCTP_FROM_SCTP_USRREQ 0x50000000
+#define SCTP_FROM_SCTPUTIL 0x60000000
+#define SCTP_FROM_SCTP6_USRREQ 0x70000000
+#define SCTP_FROM_SCTP_ASCONF 0x80000000
+#define SCTP_FROM_SCTP_OUTPUT 0x90000000
+#define SCTP_FROM_SCTP_PEELOFF 0xa0000000
+#define SCTP_FROM_SCTP_PANDA 0xb0000000
+#define SCTP_FROM_SCTP_SYSCTL 0xc0000000
+#define SCTP_FROM_SCTP_CC_FUNCTIONS 0xd0000000
/* Location ID's */
#define SCTP_LOC_1 0x00000001
@@ -827,6 +812,8 @@
#define SCTP_LOC_31 0x0000001f
#define SCTP_LOC_32 0x00000020
#define SCTP_LOC_33 0x00000021
+#define SCTP_LOC_34 0x00000022
+#define SCTP_LOC_35 0x00000023
/* Free assoc codes */
@@ -992,12 +979,13 @@
(((uint8_t *)&(a)->s_addr)[1] == 168)))
#define IN4_ISLOOPBACK_ADDRESS(a) \
- ((((uint8_t *)&(a)->s_addr)[0] == 127) && \
- (((uint8_t *)&(a)->s_addr)[1] == 0) && \
- (((uint8_t *)&(a)->s_addr)[2] == 0) && \
- (((uint8_t *)&(a)->s_addr)[3] == 1))
+ (((uint8_t *)&(a)->s_addr)[0] == 127)
+#define IN4_ISLINKLOCAL_ADDRESS(a) \
+ ((((uint8_t *)&(a)->s_addr)[0] == 169) && \
+ (((uint8_t *)&(a)->s_addr)[1] == 254))
+
#if defined(_KERNEL)
#define SCTP_GETTIME_TIMEVAL(x) (getmicrouptime(x))
#define SCTP_GETPTIME_TIMEVAL(x) (microuptime(x))
Modified: trunk/sys/netinet/sctp_crc32.c
===================================================================
--- trunk/sys/netinet/sctp_crc32.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_crc32.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_crc32.c 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_crc32.c 235828 2012-05-23 11:26:28Z tuexen $");
#include <netinet/sctp_os.h>
#include <netinet/sctp.h>
Modified: trunk/sys/netinet/sctp_crc32.h
===================================================================
--- trunk/sys/netinet/sctp_crc32.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_crc32.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_crc32.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_crc32.h 235828 2012-05-23 11:26:28Z tuexen $");
#ifndef _NETINET_SCTP_CRC32_H_
#define _NETINET_SCTP_CRC32_H_
Modified: trunk/sys/netinet/sctp_dtrace_declare.h
===================================================================
--- trunk/sys/netinet/sctp_dtrace_declare.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_dtrace_declare.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_dtrace_declare.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_dtrace_declare.h 235828 2012-05-23 11:26:28Z tuexen $");
#ifndef _NETINET_SCTP_DTRACE_DECLARE_H_
#define _NETINET_SCTP_DTRACE_DECLARE_H_
Modified: trunk/sys/netinet/sctp_dtrace_define.h
===================================================================
--- trunk/sys/netinet/sctp_dtrace_define.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_dtrace_define.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_dtrace_define.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_dtrace_define.h 260817 2014-01-17 10:58:59Z avg $");
#ifndef _NETINET_SCTP_DTRACE_DEFINE_H_
#define _NETINET_SCTP_DTRACE_DEFINE_H_
@@ -46,189 +46,132 @@
/* Cwnd probe - tracks changes in the congestion window on a netp */
/********************************************************/
/* Initial */
-SDT_PROBE_DEFINE(sctp, cwnd, net, init, init);
-/* The Vtag for this end */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, init, 0, "uint32_t");
-/* The port number of the local side << 16 | port number of remote
- * in network byte order.
- */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, init, 1, "uint32_t");
-/* The pointer to the struct sctp_nets * changing */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, init, 2, "uintptr_t");
-/* The old value of the cwnd */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, init, 3, "int");
-/* The new value of the cwnd */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, init, 4, "int");
+SDT_PROBE_DEFINE5(sctp, cwnd, net, init,
+ "uint32_t", /* The Vtag for this end */
+ "uint32_t", /*
+ * The port number of the local side << 16 | port number
+ * of remote in network byte order.
+ */
+ "uintptr_t", /* The pointer to the struct sctp_nets * changing */
+ "int", /* The old value of the cwnd */
+ "int"); /* The new value of the cwnd */
-
/* ACK-INCREASE */
-SDT_PROBE_DEFINE(sctp, cwnd, net, ack, ack);
-/* The Vtag for this end */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, ack, 0, "uint32_t");
-/* The port number of the local side << 16 | port number of remote
- * in network byte order.
- */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, ack, 1, "uint32_t");
-/* The pointer to the struct sctp_nets * changing */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, ack, 2, "uintptr_t");
-/* The old value of the cwnd */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, ack, 3, "int");
-/* The new value of the cwnd */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, ack, 4, "int");
+SDT_PROBE_DEFINE5(sctp, cwnd, net, ack,
+ "uint32_t", /* The Vtag for this end */
+ "uint32_t", /*
+ * The port number of the local side << 16 | port number
+ * of remote in network byte order.
+ */
+ "uintptr_t", /* The pointer to the struct sctp_nets * changing */
+ "int", /* The old value of the cwnd */
+ "int"); /* The new value of the cwnd */
-
/* ACK-INCREASE */
-SDT_PROBE_DEFINE(sctp, cwnd, net, rttvar, rttvar);
-/* The Vtag << 32 | localport << 16 | remoteport */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttvar, 0, "uint64_t");
-/* obw | nbw */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttvar, 1, "uint64_t");
-/* bwrtt | newrtt */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttvar, 2, "uint64_t");
-/* flight */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttvar, 3, "uint64_t");
-/* (cwnd << 32) | point << 16 | retval(0/1) */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttvar, 4, "uint64_t");
+SDT_PROBE_DEFINE5(sctp, cwnd, net, rttvar,
+ "uint64_t", /* The Vtag << 32 | localport << 16 | remoteport */
+ "uint64_t", /* obw | nbw */
+ "uint64_t", /* bwrtt | newrtt */
+ "uint64_t", /* flight */
+ "uint64_t"); /* (cwnd << 32) | point << 16 | retval(0/1) */
+SDT_PROBE_DEFINE5(sctp, cwnd, net, rttstep,
+ "uint64_t", /* The Vtag << 32 | localport << 16 | remoteport */
+ "uint64_t", /* obw | nbw */
+ "uint64_t", /* bwrtt | newrtt */
+ "uint64_t", /* flight */
+ "uint64_t"); /* (cwnd << 32) | point << 16 | retval(0/1) */
-SDT_PROBE_DEFINE(sctp, cwnd, net, rttstep, rttstep);
-/* The Vtag << 32 | localport << 16 | remoteport */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttstep, 0, "uint64_t");
-/* obw | nbw */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttstep, 1, "uint64_t");
-/* bwrtt | nrtt */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttstep, 2, "uint64_t");
-/* cwnd_saved | stepcnt << 16 | oldstep */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttstep, 3, "uint64_t");
-/* (cwnd << 32) | point << 16 | retval(0/1) */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, rttstep, 4, "uint64_t");
-
-
/* FastRetransmit-DECREASE */
-SDT_PROBE_DEFINE(sctp, cwnd, net, fr, fr);
-/* The Vtag for this end */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, fr, 0, "uint32_t");
-/* The port number of the local side << 16 | port number of remote
- * in network byte order.
- */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, fr, 1, "uint32_t");
-/* The pointer to the struct sctp_nets * changing */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, fr, 2, "uintptr_t");
-/* The old value of the cwnd */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, fr, 3, "int");
-/* The new value of the cwnd */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, fr, 4, "int");
+SDT_PROBE_DEFINE5(sctp, cwnd, net, fr,
+ "uint32_t", /* The Vtag for this end */
+ "uint32_t", /*
+ * The port number of the local side << 16 | port number
+ * of remote in network byte order.
+ */
+ "uintptr_t", /* The pointer to the struct sctp_nets * changing */
+ "int", /* The old value of the cwnd */
+ "int"); /* The new value of the cwnd */
-
/* TimeOut-DECREASE */
-SDT_PROBE_DEFINE(sctp, cwnd, net, to, to);
-/* The Vtag for this end */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, to, 0, "uint32_t");
-/* The port number of the local side << 16 | port number of remote
- * in network byte order.
- */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, to, 1, "uint32_t");
-/* The pointer to the struct sctp_nets * changing */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, to, 2, "uintptr_t");
-/* The old value of the cwnd */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, to, 3, "int");
-/* The new value of the cwnd */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, to, 4, "int");
+SDT_PROBE_DEFINE5(sctp, cwnd, net, to,
+ "uint32_t", /* The Vtag for this end */
+ "uint32_t", /*
+ * The port number of the local side << 16 | port number
+ * of remote in network byte order.
+ */
+ "uintptr_t", /* The pointer to the struct sctp_nets * changing */
+ "int", /* The old value of the cwnd */
+ "int"); /* The new value of the cwnd */
-
/* BurstLimit-DECREASE */
-SDT_PROBE_DEFINE(sctp, cwnd, net, bl, bl);
-/* The Vtag for this end */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, bl, 0, "uint32_t");
-/* The port number of the local side << 16 | port number of remote
- * in network byte order.
- */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, bl, 1, "uint32_t");
-/* The pointer to the struct sctp_nets * changing */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, bl, 2, "uintptr_t");
-/* The old value of the cwnd */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, bl, 3, "int");
-/* The new value of the cwnd */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, bl, 4, "int");
+SDT_PROBE_DEFINE5(sctp, cwnd, net, bl,
+ "uint32_t", /* The Vtag for this end */
+ "uint32_t", /*
+ * The port number of the local side << 16 | port number
+ * of remote in network byte order.
+ */
+ "uintptr_t", /* The pointer to the struct sctp_nets * changing */
+ "int", /* The old value of the cwnd */
+ "int"); /* The new value of the cwnd */
-
/* ECN-DECREASE */
-SDT_PROBE_DEFINE(sctp, cwnd, net, ecn, ecn);
-/* The Vtag for this end */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, ecn, 0, "uint32_t");
-/* The port number of the local side << 16 | port number of remote
- * in network byte order.
- */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, ecn, 1, "uint32_t");
-/* The pointer to the struct sctp_nets * changing */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, ecn, 2, "uintptr_t");
-/* The old value of the cwnd */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, ecn, 3, "int");
-/* The new value of the cwnd */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, ecn, 4, "int");
+SDT_PROBE_DEFINE5(sctp, cwnd, net, ecn,
+ "uint32_t", /* The Vtag for this end */
+ "uint32_t", /*
+ * The port number of the local side << 16 | port number
+ * of remote in network byte order.
+ */
+ "uintptr_t", /* The pointer to the struct sctp_nets * changing */
+ "int", /* The old value of the cwnd */
+ "int"); /* The new value of the cwnd */
-
/* PacketDrop-DECREASE */
-SDT_PROBE_DEFINE(sctp, cwnd, net, pd, pd);
-/* The Vtag for this end */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, pd, 0, "uint32_t");
-/* The port number of the local side << 16 | port number of remote
- * in network byte order.
- */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, pd, 1, "uint32_t");
-/* The pointer to the struct sctp_nets * changing */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, pd, 2, "uintptr_t");
-/* The old value of the cwnd */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, pd, 3, "int");
-/* The new value of the cwnd */
-SDT_PROBE_ARGTYPE(sctp, cwnd, net, pd, 4, "int");
+SDT_PROBE_DEFINE5(sctp, cwnd, net, pd,
+ "uint32_t", /* The Vtag for this end */
+ "uint32_t", /*
+ * The port number of the local side << 16 | port number
+ * of remote in network byte order.
+ */
+ "uintptr_t", /* The pointer to the struct sctp_nets * changing */
+ "int", /* The old value of the cwnd */
+ "int"); /* The new value of the cwnd */
-
-
/********************************************************/
/* Rwnd probe - tracks changes in the receiver window for an assoc */
/********************************************************/
-SDT_PROBE_DEFINE(sctp, rwnd, assoc, val, val);
-/* The Vtag for this end */
-SDT_PROBE_ARGTYPE(sctp, rwnd, assoc, val, 0, "uint32_t");
-/* The port number of the local side << 16 | port number of remote
- * in network byte order.
- */
-SDT_PROBE_ARGTYPE(sctp, rwnd, assoc, val, 1, "uint32_t");
-/* The up/down amount */
-SDT_PROBE_ARGTYPE(sctp, rwnd, assoc, val, 2, "int");
-/* The new value of the cwnd */
-SDT_PROBE_ARGTYPE(sctp, rwnd, assoc, val, 3, "int");
+SDT_PROBE_DEFINE4(sctp, rwnd, assoc, val,
+ "uint32_t", /* The Vtag for this end */
+ "uint32_t", /*
+ * The port number of the local side << 16 | port number
+ * of remote in network byte order.
+ */
+ "int", /* The up/down amount */
+ "int"); /* The new value of the cwnd */
/********************************************************/
/* flight probe - tracks changes in the flight size on a net or assoc */
/********************************************************/
-SDT_PROBE_DEFINE(sctp, flightsize, net, val, val);
-/* The Vtag for this end */
-SDT_PROBE_ARGTYPE(sctp, flightsize, net, val, 0, "uint32_t");
-/* The port number of the local side << 16 | port number of remote
- * in network byte order.
- */
-SDT_PROBE_ARGTYPE(sctp, flightsize, net, val, 1, "uint32_t");
-/* The pointer to the struct sctp_nets * changing */
-SDT_PROBE_ARGTYPE(sctp, flightsize, net, val, 2, "uintptr_t");
-/* The up/down amount */
-SDT_PROBE_ARGTYPE(sctp, flightsize, net, val, 3, "int");
-/* The new value of the cwnd */
-SDT_PROBE_ARGTYPE(sctp, flightsize, net, val, 4, "int");
+SDT_PROBE_DEFINE5(sctp, flightsize, net, val,
+ "uint32_t", /* The Vtag for this end */
+ "uint32_t", /*
+ * The port number of the local side << 16 | port number
+ * of remote in network byte order.
+ */
+ "uintptr_t", /* The pointer to the struct sctp_nets * changing */
+ "int", /* The up/down amount */
+ "int"); /* The new value of the cwnd */
+
/********************************************************/
/* The total flight version */
/********************************************************/
-SDT_PROBE_DEFINE(sctp, flightsize, assoc, val, val);
-/* The Vtag for this end */
-SDT_PROBE_ARGTYPE(sctp, flightsize, assoc, val, 0, "uint32_t");
-/* The port number of the local side << 16 | port number of remote
- * in network byte order.
- */
-SDT_PROBE_ARGTYPE(sctp, flightsize, assoc, val, 1, "uint32_t");
-/* The up/down amount */
-SDT_PROBE_ARGTYPE(sctp, flightsize, assoc, val, 2, "int");
-/* The new value of the cwnd */
-SDT_PROBE_ARGTYPE(sctp, flightsize, assoc, val, 3, "int");
+SDT_PROBE_DEFINE4(sctp, flightsize, assoc, val,
+ "uint32_t", /* The Vtag for this end */
+ "uint32_t", /*
+ * The port number of the local side << 16 | port number
+ * of remote in network byte order.
+ */
+ "int", /* The up/down amount */
+ "int"); /* The new value of the cwnd */
#endif
Modified: trunk/sys/netinet/sctp_header.h
===================================================================
--- trunk/sys/netinet/sctp_header.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_header.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_header.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_header.h 294158 2016-01-16 16:46:00Z tuexen $");
#ifndef _NETINET_SCTP_HEADER_H_
#define _NETINET_SCTP_HEADER_H_
@@ -83,12 +83,6 @@
uint16_t addr_type[2]; /* array of supported address types */
} SCTP_PACKED;
-/* ECN parameter */
-struct sctp_ecn_supported_param {
- struct sctp_paramhdr ph;/* type=SCTP_ECN_CAPABLE */
-} SCTP_PACKED;
-
-
/* heartbeat info parameter */
struct sctp_heartbeat_info_param {
struct sctp_paramhdr ph;
@@ -209,34 +203,6 @@
*/
} SCTP_PACKED;
-
-/* Used for NAT state error cause */
-struct sctp_missing_nat_state {
- uint16_t cause;
- uint16_t length;
- uint8_t data[];
-} SCTP_PACKED;
-
-
-struct sctp_inv_mandatory_param {
- uint16_t cause;
- uint16_t length;
- uint32_t num_param;
- uint16_t param;
- /*
- * We include this to 0 it since only a missing cookie will cause
- * this error.
- */
- uint16_t resv;
-} SCTP_PACKED;
-
-struct sctp_unresolv_addr {
- uint16_t cause;
- uint16_t length;
- uint16_t addr_type;
- uint16_t reserved; /* Only one invalid addr type */
-} SCTP_PACKED;
-
/* state cookie parameter */
struct sctp_state_cookie_param {
struct sctp_paramhdr ph;
@@ -377,28 +343,11 @@
struct sctp_chunkhdr ch;
} SCTP_PACKED;
-/* Oper error holding a stale cookie */
-struct sctp_stale_cookie_msg {
- struct sctp_paramhdr ph;/* really an error cause */
- uint32_t time_usec;
-} SCTP_PACKED;
-
struct sctp_adaptation_layer_indication {
struct sctp_paramhdr ph;
uint32_t indication;
} SCTP_PACKED;
-struct sctp_cookie_while_shutting_down {
- struct sctphdr sh;
- struct sctp_chunkhdr ch;
- struct sctp_paramhdr ph;/* really an error cause */
-} SCTP_PACKED;
-
-struct sctp_shutdown_complete_msg {
- struct sctphdr sh;
- struct sctp_shutdown_complete_chunk shut_cmp;
-} SCTP_PACKED;
-
/*
* draft-ietf-tsvwg-addip-sctp
*/
@@ -457,6 +406,11 @@
/**********STREAM RESET STUFF ******************/
+struct sctp_stream_reset_request {
+ struct sctp_paramhdr ph;
+ uint32_t request_seq;
+} SCTP_PACKED;
+
struct sctp_stream_reset_out_request {
struct sctp_paramhdr ph;
uint32_t request_seq; /* monotonically increasing seq no */
@@ -471,7 +425,6 @@
uint16_t list_of_streams[]; /* if not all list of streams */
} SCTP_PACKED;
-
struct sctp_stream_reset_tsn_request {
struct sctp_paramhdr ph;
uint32_t request_seq;
@@ -511,16 +464,6 @@
* streams then the request will need to be an overlay structure.
*/
-struct sctp_stream_reset_out_req {
- struct sctp_chunkhdr ch;
- struct sctp_stream_reset_out_request sr_req;
-} SCTP_PACKED;
-
-struct sctp_stream_reset_in_req {
- struct sctp_chunkhdr ch;
- struct sctp_stream_reset_in_request sr_req;
-} SCTP_PACKED;
-
struct sctp_stream_reset_tsn_req {
struct sctp_chunkhdr ch;
struct sctp_stream_reset_tsn_request sr_req;
@@ -567,12 +510,6 @@
uint8_t hmac[];
} SCTP_PACKED;
-struct sctp_auth_invalid_hmac {
- struct sctp_paramhdr ph;
- uint16_t hmac_id;
- uint16_t padding;
-} SCTP_PACKED;
-
/*
* we pre-reserve enough room for a ECNE or CWR AND a SACK with no missing
* pieces. If ENCE is missing we could have a couple of blocks. This way we
Modified: trunk/sys/netinet/sctp_indata.c
===================================================================
--- trunk/sys/netinet/sctp_indata.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_indata.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_indata.c 238253 2012-07-08 16:14:42Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_indata.c 294219 2016-01-17 12:39:35Z tuexen $");
#include <netinet/sctp_os.h>
#include <netinet/sctp_var.h>
@@ -224,9 +224,9 @@
}
seinfo = (struct sctp_extrcvinfo *)sinfo;
if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVNXTINFO) &&
- (seinfo->sreinfo_next_flags & SCTP_NEXT_MSG_AVAIL)) {
+ (seinfo->serinfo_next_flags & SCTP_NEXT_MSG_AVAIL)) {
provide_nxt = 1;
- len += CMSG_SPACE(sizeof(struct sctp_rcvinfo));
+ len += CMSG_SPACE(sizeof(struct sctp_nxtinfo));
} else {
provide_nxt = 0;
}
@@ -242,7 +242,7 @@
use_extended = 0;
}
- ret = sctp_get_mbuf_for_msg(len, 0, M_DONTWAIT, 1, MT_DATA);
+ ret = sctp_get_mbuf_for_msg(len, 0, M_NOWAIT, 1, MT_DATA);
if (ret == NULL) {
/* No space */
return (ret);
@@ -251,6 +251,11 @@
/* We need a CMSG header followed by the struct */
cmh = mtod(ret, struct cmsghdr *);
+ /*
+ * Make sure that there is no un-initialized padding between the
+ * cmsg header and cmsg data and after the cmsg data.
+ */
+ memset(cmh, 0, len);
if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVRCVINFO)) {
cmh->cmsg_level = IPPROTO_SCTP;
cmh->cmsg_len = CMSG_LEN(sizeof(struct sctp_rcvinfo));
@@ -272,20 +277,20 @@
cmh->cmsg_len = CMSG_LEN(sizeof(struct sctp_nxtinfo));
cmh->cmsg_type = SCTP_NXTINFO;
nxtinfo = (struct sctp_nxtinfo *)CMSG_DATA(cmh);
- nxtinfo->nxt_sid = seinfo->sreinfo_next_stream;
+ nxtinfo->nxt_sid = seinfo->serinfo_next_stream;
nxtinfo->nxt_flags = 0;
- if (seinfo->sreinfo_next_flags & SCTP_NEXT_MSG_IS_UNORDERED) {
+ if (seinfo->serinfo_next_flags & SCTP_NEXT_MSG_IS_UNORDERED) {
nxtinfo->nxt_flags |= SCTP_UNORDERED;
}
- if (seinfo->sreinfo_next_flags & SCTP_NEXT_MSG_IS_NOTIFICATION) {
+ if (seinfo->serinfo_next_flags & SCTP_NEXT_MSG_IS_NOTIFICATION) {
nxtinfo->nxt_flags |= SCTP_NOTIFICATION;
}
- if (seinfo->sreinfo_next_flags & SCTP_NEXT_MSG_ISCOMPLETE) {
+ if (seinfo->serinfo_next_flags & SCTP_NEXT_MSG_ISCOMPLETE) {
nxtinfo->nxt_flags |= SCTP_COMPLETE;
}
- nxtinfo->nxt_ppid = seinfo->sreinfo_next_ppid;
- nxtinfo->nxt_length = seinfo->sreinfo_next_length;
- nxtinfo->nxt_assoc_id = seinfo->sreinfo_next_aid;
+ nxtinfo->nxt_ppid = seinfo->serinfo_next_ppid;
+ nxtinfo->nxt_length = seinfo->serinfo_next_length;
+ nxtinfo->nxt_assoc_id = seinfo->serinfo_next_aid;
cmh = (struct cmsghdr *)((caddr_t)cmh + CMSG_SPACE(sizeof(struct sctp_nxtinfo)));
SCTP_BUF_LEN(ret) += CMSG_SPACE(sizeof(struct sctp_nxtinfo));
}
@@ -562,7 +567,8 @@
struct sctp_queued_to_read *at;
int queue_needed;
uint16_t nxt_todel;
- struct mbuf *oper;
+ struct mbuf *op_err;
+ char msg[SCTP_DIAG_INFO_LEN];
queue_needed = 1;
asoc->size_on_all_streams += control->length;
@@ -573,13 +579,13 @@
sctp_log_strm_del(control, NULL, SCTP_STR_LOG_FROM_INTO_STRD);
}
SCTPDBG(SCTP_DEBUG_INDATA1,
- "queue to stream called for ssn:%u lastdel:%u nxt:%u\n",
- (uint32_t) control->sinfo_stream,
- (uint32_t) strm->last_sequence_delivered,
- (uint32_t) nxt_todel);
+ "queue to stream called for sid:%u ssn:%u tsn:%u lastdel:%u nxt:%u\n",
+ (uint32_t) control->sinfo_stream, (uint32_t) control->sinfo_ssn,
+ (uint32_t) control->sinfo_tsn,
+ (uint32_t) strm->last_sequence_delivered, (uint32_t) nxt_todel);
if (SCTP_SSN_GE(strm->last_sequence_delivered, control->sinfo_ssn)) {
/* The incoming sseq is behind where we last delivered? */
- SCTPDBG(SCTP_DEBUG_INDATA1, "Duplicate S-SEQ:%d delivered:%d from peer, Abort association\n",
+ SCTPDBG(SCTP_DEBUG_INDATA1, "Duplicate S-SEQ:%d delivered:%d from peer, Abort association\n",
control->sinfo_ssn, strm->last_sequence_delivered);
protocol_error:
/*
@@ -587,30 +593,30 @@
* association destruction
*/
TAILQ_INSERT_HEAD(&strm->inqueue, control, next);
- oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (oper) {
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
-
- SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
- (sizeof(uint32_t) * 3);
- ph = mtod(oper, struct sctp_paramhdr *);
- ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
- ph->param_length = htons(SCTP_BUF_LEN(oper));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_1);
- ippp++;
- *ippp = control->sinfo_tsn;
- ippp++;
- *ippp = ((control->sinfo_stream << 16) | control->sinfo_ssn);
- }
+ snprintf(msg, sizeof(msg), "Delivered SSN=%4.4x, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+ strm->last_sequence_delivered, control->sinfo_tsn,
+ control->sinfo_stream, control->sinfo_ssn);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_1;
- sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
return;
}
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *so;
+
+ so = SCTP_INP_SO(stcb->sctp_ep);
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+ SCTP_SOCKET_UNLOCK(so, 1);
+ return;
+ }
+#endif
if (nxt_todel == control->sinfo_ssn) {
/* can be delivered right away? */
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_STR_LOGGING_ENABLE) {
@@ -626,7 +632,7 @@
sctp_add_to_readq(stcb->sctp_ep, stcb,
control,
&stcb->sctp_socket->so_rcv, 1,
- SCTP_READ_LOCK_NOT_HELD, SCTP_SO_NOT_LOCKED);
+ SCTP_READ_LOCK_NOT_HELD, SCTP_SO_LOCKED);
TAILQ_FOREACH_SAFE(control, &strm->inqueue, next, at) {
/* all delivered */
nxt_todel = strm->last_sequence_delivered + 1;
@@ -650,7 +656,7 @@
control,
&stcb->sctp_socket->so_rcv, 1,
SCTP_READ_LOCK_NOT_HELD,
- SCTP_SO_NOT_LOCKED);
+ SCTP_SO_LOCKED);
continue;
}
break;
@@ -662,6 +668,9 @@
* to put it on the queue.
*/
if (SCTP_TSN_GE(asoc->cumulative_tsn, control->sinfo_tsn)) {
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
goto protocol_error;
}
if (TAILQ_EMPTY(&strm->inqueue)) {
@@ -708,6 +717,9 @@
control->whoFrom = NULL;
}
sctp_free_a_readq(stcb, control);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
return;
} else {
if (TAILQ_NEXT(at, next) == NULL) {
@@ -727,6 +739,9 @@
}
}
}
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
}
/*
@@ -790,13 +805,12 @@
* but should we?
*/
if (stcb->sctp_socket) {
- pd_point = min(SCTP_SB_LIMIT_RCV(stcb->sctp_socket),
+ pd_point = min(SCTP_SB_LIMIT_RCV(stcb->sctp_socket) >> SCTP_PARTIAL_DELIVERY_SHIFT,
stcb->sctp_ep->partial_delivery_point);
} else {
pd_point = stcb->sctp_ep->partial_delivery_point;
}
if (sctp_is_all_msg_on_reasm(asoc, &tsize) || (tsize >= pd_point)) {
-
/*
* Yes, we setup to start reception, by
* backing down the TSN just in case we
@@ -841,7 +855,8 @@
sctp_queue_data_for_reasm(struct sctp_tcb *stcb, struct sctp_association *asoc,
struct sctp_tmit_chunk *chk, int *abort_flag)
{
- struct mbuf *oper;
+ struct mbuf *op_err;
+ char msg[SCTP_DIAG_INFO_LEN];
uint32_t cum_ackp1, prev_tsn, post_tsn;
struct sctp_tmit_chunk *at, *prev, *next;
@@ -866,30 +881,14 @@
* a FIRST fragment mark.
*/
SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, its not first, no fragmented delivery in progress\n");
- oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
-
- if (oper) {
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
-
- SCTP_BUF_LEN(oper) =
- sizeof(struct sctp_paramhdr) +
- (sizeof(uint32_t) * 3);
- ph = mtod(oper, struct sctp_paramhdr *);
- ph->param_type =
- htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
- ph->param_length = htons(SCTP_BUF_LEN(oper));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_2);
- ippp++;
- *ippp = chk->rec.data.TSN_seq;
- ippp++;
- *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
-
- }
+ snprintf(msg, sizeof(msg),
+ "Expected B-bit for TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+ chk->rec.data.TSN_seq,
+ chk->rec.data.stream_number,
+ chk->rec.data.stream_seq);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_2;
- sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
} else if (asoc->fragmented_delivery_inprogress &&
(chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) == SCTP_DATA_FIRST_FRAG) {
@@ -899,28 +898,14 @@
* MIDDLE fragment NOT a FIRST
*/
SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, it IS a first and fragmented delivery in progress\n");
- oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (oper) {
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
-
- SCTP_BUF_LEN(oper) =
- sizeof(struct sctp_paramhdr) +
- (3 * sizeof(uint32_t));
- ph = mtod(oper, struct sctp_paramhdr *);
- ph->param_type =
- htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
- ph->param_length = htons(SCTP_BUF_LEN(oper));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_3);
- ippp++;
- *ippp = chk->rec.data.TSN_seq;
- ippp++;
- *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
- }
+ snprintf(msg, sizeof(msg),
+ "Didn't expect B-bit for TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+ chk->rec.data.TSN_seq,
+ chk->rec.data.stream_number,
+ chk->rec.data.stream_seq);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_3;
- sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
} else if (asoc->fragmented_delivery_inprogress) {
/*
@@ -933,30 +918,15 @@
SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, it IS not same stream number %d vs %d\n",
chk->rec.data.stream_number,
asoc->str_of_pdapi);
- oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (oper) {
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
-
- SCTP_BUF_LEN(oper) =
- sizeof(struct sctp_paramhdr) +
- (sizeof(uint32_t) * 3);
- ph = mtod(oper,
- struct sctp_paramhdr *);
- ph->param_type =
- htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
- ph->param_length =
- htons(SCTP_BUF_LEN(oper));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_4);
- ippp++;
- *ippp = chk->rec.data.TSN_seq;
- ippp++;
- *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
- }
+ snprintf(msg, sizeof(msg),
+ "Expected SID=%4.4x, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+ asoc->str_of_pdapi,
+ chk->rec.data.TSN_seq,
+ chk->rec.data.stream_number,
+ chk->rec.data.stream_seq);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_4;
- sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
} else if ((asoc->fragment_flags & SCTP_DATA_UNORDERED) !=
SCTP_DATA_UNORDERED &&
@@ -965,31 +935,15 @@
SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, it IS not same stream seq %d vs %d\n",
chk->rec.data.stream_seq,
asoc->ssn_of_pdapi);
- oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (oper) {
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
-
- SCTP_BUF_LEN(oper) =
- sizeof(struct sctp_paramhdr) +
- (3 * sizeof(uint32_t));
- ph = mtod(oper,
- struct sctp_paramhdr *);
- ph->param_type =
- htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
- ph->param_length =
- htons(SCTP_BUF_LEN(oper));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_5);
- ippp++;
- *ippp = chk->rec.data.TSN_seq;
- ippp++;
- *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
-
- }
+ snprintf(msg, sizeof(msg),
+ "Expected SSN=%4.4x, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+ asoc->ssn_of_pdapi,
+ chk->rec.data.TSN_seq,
+ chk->rec.data.stream_number,
+ chk->rec.data.stream_seq);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_5;
- sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
}
}
@@ -1059,31 +1013,14 @@
SCTP_DATA_FIRST_FRAG) {
SCTPDBG(SCTP_DEBUG_INDATA1, "Prev check - It can be a midlle or last but not a first\n");
SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, it's a FIRST!\n");
- oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (oper) {
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
-
- SCTP_BUF_LEN(oper) =
- sizeof(struct sctp_paramhdr) +
- (3 * sizeof(uint32_t));
- ph = mtod(oper,
- struct sctp_paramhdr *);
- ph->param_type =
- htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
- ph->param_length =
- htons(SCTP_BUF_LEN(oper));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_6);
- ippp++;
- *ippp = chk->rec.data.TSN_seq;
- ippp++;
- *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
-
- }
+ snprintf(msg, sizeof(msg),
+ "Can't handle B-bit, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+ chk->rec.data.TSN_seq,
+ chk->rec.data.stream_number,
+ chk->rec.data.stream_seq);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_6;
- sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
return;
}
@@ -1093,36 +1030,39 @@
* Huh, need the correct STR here,
* they must be the same.
*/
- SCTP_PRINTF("Prev check - Gak, Evil plot, ssn:%d not the same as at:%d\n",
+ SCTPDBG(SCTP_DEBUG_INDATA1, "Prev check - Gak, Evil plot, sid:%d not the same as at:%d\n",
chk->rec.data.stream_number,
prev->rec.data.stream_number);
- oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (oper) {
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
-
- SCTP_BUF_LEN(oper) =
- sizeof(struct sctp_paramhdr) +
- (3 * sizeof(uint32_t));
- ph = mtod(oper,
- struct sctp_paramhdr *);
- ph->param_type =
- htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
- ph->param_length =
- htons(SCTP_BUF_LEN(oper));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_7);
- ippp++;
- *ippp = chk->rec.data.TSN_seq;
- ippp++;
- *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
- }
+ snprintf(msg, sizeof(msg),
+ "Expect SID=%4.4x, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+ prev->rec.data.stream_number,
+ chk->rec.data.TSN_seq,
+ chk->rec.data.stream_number,
+ chk->rec.data.stream_seq);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_7;
- sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
return;
}
+ if ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) !=
+ (prev->rec.data.rcv_flags & SCTP_DATA_UNORDERED)) {
+ /*
+ * Huh, need the same ordering here,
+ * they must be the same.
+ */
+ SCTPDBG(SCTP_DEBUG_INDATA1, "Prev check - Gak, Evil plot, U-bit not constant\n");
+ snprintf(msg, sizeof(msg),
+ "Expect U-bit=%d for TSN=%8.8x, got U-bit=%d",
+ (prev->rec.data.rcv_flags & SCTP_DATA_UNORDERED) ? 1 : 0,
+ chk->rec.data.TSN_seq,
+ (chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) ? 1 : 0);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_8;
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
+ *abort_flag = 1;
+ return;
+ }
if ((prev->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == 0 &&
chk->rec.data.stream_seq !=
prev->rec.data.stream_seq) {
@@ -1133,30 +1073,15 @@
SCTPDBG(SCTP_DEBUG_INDATA1, "Prev check - Gak, Evil plot, sseq:%d not the same as at:%d\n",
chk->rec.data.stream_seq,
prev->rec.data.stream_seq);
- oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (oper) {
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
-
- SCTP_BUF_LEN(oper) =
- sizeof(struct sctp_paramhdr) +
- (3 * sizeof(uint32_t));
- ph = mtod(oper,
- struct sctp_paramhdr *);
- ph->param_type =
- htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
- ph->param_length =
- htons(SCTP_BUF_LEN(oper));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_8);
- ippp++;
- *ippp = chk->rec.data.TSN_seq;
- ippp++;
- *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
- }
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_8;
- sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+ snprintf(msg, sizeof(msg),
+ "Expect SSN=%4.4x, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+ prev->rec.data.stream_seq,
+ chk->rec.data.TSN_seq,
+ chk->rec.data.stream_number,
+ chk->rec.data.stream_seq);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_9;
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
return;
}
@@ -1166,31 +1091,14 @@
if ((chk->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) !=
SCTP_DATA_FIRST_FRAG) {
SCTPDBG(SCTP_DEBUG_INDATA1, "Prev check - Gak, evil plot, its not FIRST and it must be!\n");
- oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (oper) {
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
-
- SCTP_BUF_LEN(oper) =
- sizeof(struct sctp_paramhdr) +
- (3 * sizeof(uint32_t));
- ph = mtod(oper,
- struct sctp_paramhdr *);
- ph->param_type =
- htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
- ph->param_length =
- htons(SCTP_BUF_LEN(oper));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_9);
- ippp++;
- *ippp = chk->rec.data.TSN_seq;
- ippp++;
- *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
-
- }
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_9;
- sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+ snprintf(msg, sizeof(msg),
+ "Expect B-bit, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+ chk->rec.data.TSN_seq,
+ chk->rec.data.stream_number,
+ chk->rec.data.stream_seq);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_10;
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
return;
}
@@ -1210,30 +1118,14 @@
!= SCTP_DATA_LAST_FRAG) {
SCTPDBG(SCTP_DEBUG_INDATA1, "Next chk - Next is FIRST, we must be LAST\n");
SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, its not a last!\n");
- oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (oper) {
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
-
- SCTP_BUF_LEN(oper) =
- sizeof(struct sctp_paramhdr) +
- (3 * sizeof(uint32_t));
- ph = mtod(oper,
- struct sctp_paramhdr *);
- ph->param_type =
- htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
- ph->param_length =
- htons(SCTP_BUF_LEN(oper));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_10);
- ippp++;
- *ippp = chk->rec.data.TSN_seq;
- ippp++;
- *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
- }
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_10;
- sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+ snprintf(msg, sizeof(msg),
+ "Expect only E-bit, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+ chk->rec.data.TSN_seq,
+ chk->rec.data.stream_number,
+ chk->rec.data.stream_seq);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_11;
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
return;
}
@@ -1249,31 +1141,14 @@
SCTP_DATA_LAST_FRAG) {
SCTPDBG(SCTP_DEBUG_INDATA1, "Next chk - Next is a MIDDLE/LAST\n");
SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, new prev chunk is a LAST\n");
- oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (oper) {
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
-
- SCTP_BUF_LEN(oper) =
- sizeof(struct sctp_paramhdr) +
- (3 * sizeof(uint32_t));
- ph = mtod(oper,
- struct sctp_paramhdr *);
- ph->param_type =
- htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
- ph->param_length =
- htons(SCTP_BUF_LEN(oper));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_11);
- ippp++;
- *ippp = chk->rec.data.TSN_seq;
- ippp++;
- *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
-
- }
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_11;
- sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+ snprintf(msg, sizeof(msg),
+ "Didn't expect E-bit, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+ chk->rec.data.TSN_seq,
+ chk->rec.data.stream_number,
+ chk->rec.data.stream_seq);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_12;
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
return;
}
@@ -1286,34 +1161,36 @@
SCTPDBG(SCTP_DEBUG_INDATA1, "Next chk - Gak, Evil plot, ssn:%d not the same as at:%d\n",
chk->rec.data.stream_number,
next->rec.data.stream_number);
- oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (oper) {
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
-
- SCTP_BUF_LEN(oper) =
- sizeof(struct sctp_paramhdr) +
- (3 * sizeof(uint32_t));
- ph = mtod(oper,
- struct sctp_paramhdr *);
- ph->param_type =
- htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
- ph->param_length =
- htons(SCTP_BUF_LEN(oper));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_12);
- ippp++;
- *ippp = chk->rec.data.TSN_seq;
- ippp++;
- *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
-
- }
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_12;
- sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+ snprintf(msg, sizeof(msg),
+ "Required SID %4.4x, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+ next->rec.data.stream_number,
+ chk->rec.data.TSN_seq,
+ chk->rec.data.stream_number,
+ chk->rec.data.stream_seq);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_13;
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
return;
}
+ if ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) !=
+ (next->rec.data.rcv_flags & SCTP_DATA_UNORDERED)) {
+ /*
+ * Huh, need the same ordering here,
+ * they must be the same.
+ */
+ SCTPDBG(SCTP_DEBUG_INDATA1, "Next check - Gak, Evil plot, U-bit not constant\n");
+ snprintf(msg, sizeof(msg),
+ "Expect U-bit=%d for TSN=%8.8x, got U-bit=%d",
+ (next->rec.data.rcv_flags & SCTP_DATA_UNORDERED) ? 1 : 0,
+ chk->rec.data.TSN_seq,
+ (chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) ? 1 : 0);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_14;
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
+ *abort_flag = 1;
+ return;
+ }
if ((next->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == 0 &&
chk->rec.data.stream_seq !=
next->rec.data.stream_seq) {
@@ -1324,30 +1201,15 @@
SCTPDBG(SCTP_DEBUG_INDATA1, "Next chk - Gak, Evil plot, sseq:%d not the same as at:%d\n",
chk->rec.data.stream_seq,
next->rec.data.stream_seq);
- oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (oper) {
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
-
- SCTP_BUF_LEN(oper) =
- sizeof(struct sctp_paramhdr) +
- (3 * sizeof(uint32_t));
- ph = mtod(oper,
- struct sctp_paramhdr *);
- ph->param_type =
- htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
- ph->param_length =
- htons(SCTP_BUF_LEN(oper));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_13);
- ippp++;
- *ippp = chk->rec.data.TSN_seq;
- ippp++;
- *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
- }
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_13;
- sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+ snprintf(msg, sizeof(msg),
+ "Required SSN %4.4x, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+ next->rec.data.stream_seq,
+ chk->rec.data.TSN_seq,
+ chk->rec.data.stream_number,
+ chk->rec.data.stream_seq);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_15;
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
return;
}
@@ -1418,7 +1280,6 @@
return (0);
}
-
static int
sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc,
struct mbuf **m, int offset, struct sctp_data_chunk *ch, int chk_length,
@@ -1433,7 +1294,8 @@
int the_len;
int need_reasm_check = 0;
uint16_t strmno, strmseq;
- struct mbuf *oper;
+ struct mbuf *op_err;
+ char msg[SCTP_DIAG_INFO_LEN];
struct sctp_queued_to_read *control;
int ordered;
uint32_t protocol_id;
@@ -1500,15 +1362,12 @@
*/
if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
(stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
- (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET))
- ) {
+ (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET))) {
/*
* wait a minute, this guy is gone, there is no longer a
* receiver. Send peer an ABORT!
*/
- struct mbuf *op_err;
-
- op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
+ op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, "");
sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
return (0);
@@ -1530,7 +1389,7 @@
*/
if (stcb->sctp_socket->so_rcv.sb_cc) {
/* some to read, wake-up */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
so = SCTP_INP_SO(stcb->sctp_ep);
@@ -1546,7 +1405,7 @@
}
#endif
sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
}
@@ -1568,30 +1427,25 @@
}
strmno = ntohs(ch->dp.stream_id);
if (strmno >= asoc->streamincnt) {
- struct sctp_paramhdr *phdr;
- struct mbuf *mb;
+ struct sctp_error_invalid_stream *cause;
- mb = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) * 2),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (mb != NULL) {
+ op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_error_invalid_stream),
+ 0, M_NOWAIT, 1, MT_DATA);
+ if (op_err != NULL) {
/* add some space up front so prepend will work well */
- SCTP_BUF_RESV_UF(mb, sizeof(struct sctp_chunkhdr));
- phdr = mtod(mb, struct sctp_paramhdr *);
+ SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
+ cause = mtod(op_err, struct sctp_error_invalid_stream *);
/*
* Error causes are just param's and this one has
* two back to back phdr, one with the error type
* and size, the other with the streamid and a rsvd
*/
- SCTP_BUF_LEN(mb) = (sizeof(struct sctp_paramhdr) * 2);
- phdr->param_type = htons(SCTP_CAUSE_INVALID_STREAM);
- phdr->param_length =
- htons(sizeof(struct sctp_paramhdr) * 2);
- phdr++;
- /* We insert the stream in the type field */
- phdr->param_type = ch->dp.stream_id;
- /* And set the length to 0 for the rsvd field */
- phdr->param_length = 0;
- sctp_queue_op_err(stcb, mb);
+ SCTP_BUF_LEN(op_err) = sizeof(struct sctp_error_invalid_stream);
+ cause->cause.code = htons(SCTP_CAUSE_INVALID_STREAM);
+ cause->cause.length = htons(sizeof(struct sctp_error_invalid_stream));
+ cause->stream_id = ch->dp.stream_id;
+ cause->reserved = htons(0);
+ sctp_queue_op_err(stcb, op_err);
}
SCTP_STAT_INCR(sctps_badsid);
SCTP_TCB_LOCK_ASSERT(stcb);
@@ -1636,27 +1490,13 @@
/* The incoming sseq is behind where we last delivered? */
SCTPDBG(SCTP_DEBUG_INDATA1, "EVIL/Broken-Dup S-SEQ:%d delivered:%d from peer, Abort!\n",
strmseq, asoc->strmin[strmno].last_sequence_delivered);
- oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (oper) {
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
- SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
- (3 * sizeof(uint32_t));
- ph = mtod(oper, struct sctp_paramhdr *);
- ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
- ph->param_length = htons(SCTP_BUF_LEN(oper));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_14);
- ippp++;
- *ippp = tsn;
- ippp++;
- *ippp = ((strmno << 16) | strmseq);
-
- }
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_14;
- sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+ snprintf(msg, sizeof(msg), "Delivered SSN=%4.4x, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+ asoc->strmin[strmno].last_sequence_delivered,
+ tsn, strmno, strmseq);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_16;
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
*abort_flag = 1;
return (0);
}
@@ -1669,16 +1509,10 @@
if (last_chunk == 0) {
dmbuf = SCTP_M_COPYM(*m,
(offset + sizeof(struct sctp_data_chunk)),
- the_len, M_DONTWAIT);
+ the_len, M_NOWAIT);
#ifdef SCTP_MBUF_LOGGING
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- struct mbuf *mat;
-
- for (mat = dmbuf; mat; mat = SCTP_BUF_NEXT(mat)) {
- if (SCTP_BUF_IS_EXTENDED(mat)) {
- sctp_log_mb(mat, SCTP_MBUF_ICOPY);
- }
- }
+ sctp_log_mbc(dmbuf, SCTP_MBUF_ICOPY);
}
#endif
} else {
@@ -1730,7 +1564,6 @@
sctp_alloc_a_readq(stcb, control);
sctp_build_readq_entry_mac(control, stcb, asoc->context, net, tsn,
protocol_id,
- stcb->asoc.context,
strmno, strmseq,
chunk_flags,
dmbuf);
@@ -1791,7 +1624,6 @@
asoc->highest_tsn_inside_nr_map = tsn;
}
SCTP_STAT_INCR(sctps_recvexpressm);
- control->sinfo_tsn = tsn;
asoc->tsn_last_delivered = tsn;
asoc->fragment_flags = chunk_flags;
asoc->tsn_of_pdapi_last_delivered = tsn;
@@ -1858,7 +1690,6 @@
sctp_alloc_a_readq(stcb, control);
sctp_build_readq_entry_mac(control, stcb, asoc->context, net, tsn,
protocol_id,
- stcb->asoc.context,
strmno, strmseq,
chunk_flags,
dmbuf);
@@ -1898,29 +1729,15 @@
control->whoFrom = NULL;
}
sctp_free_a_readq(stcb, control);
- oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (oper) {
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
-
- SCTP_BUF_LEN(oper) =
- sizeof(struct sctp_paramhdr) +
- (3 * sizeof(uint32_t));
- ph = mtod(oper, struct sctp_paramhdr *);
- ph->param_type =
- htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
- ph->param_length = htons(SCTP_BUF_LEN(oper));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_15);
- ippp++;
- *ippp = tsn;
- ippp++;
- *ippp = ((strmno << 16) | strmseq);
+ snprintf(msg, sizeof(msg), "Reas. queue emtpy, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+ tsn, strmno, strmseq);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_17;
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
+ *abort_flag = 1;
+ if (last_chunk) {
+ *m = NULL;
}
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_15;
- sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
- *abort_flag = 1;
return (0);
} else {
if (sctp_does_tsn_belong_to_reasm(asoc, control->sinfo_tsn)) {
@@ -1931,32 +1748,15 @@
control->whoFrom = NULL;
}
sctp_free_a_readq(stcb, control);
-
- oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (oper) {
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
-
- SCTP_BUF_LEN(oper) =
- sizeof(struct sctp_paramhdr) +
- (3 * sizeof(uint32_t));
- ph = mtod(oper,
- struct sctp_paramhdr *);
- ph->param_type =
- htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
- ph->param_length =
- htons(SCTP_BUF_LEN(oper));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_16);
- ippp++;
- *ippp = tsn;
- ippp++;
- *ippp = ((strmno << 16) | strmseq);
+ snprintf(msg, sizeof(msg), "PD ongoing, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+ tsn, strmno, strmseq);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_18;
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
+ *abort_flag = 1;
+ if (last_chunk) {
+ *m = NULL;
}
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_16;
- sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
- *abort_flag = 1;
return (0);
}
}
@@ -1977,31 +1777,15 @@
control->whoFrom = NULL;
}
sctp_free_a_readq(stcb, control);
- oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (oper) {
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
-
- SCTP_BUF_LEN(oper) =
- sizeof(struct sctp_paramhdr) +
- (3 * sizeof(uint32_t));
- ph = mtod(oper,
- struct sctp_paramhdr *);
- ph->param_type =
- htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
- ph->param_length =
- htons(SCTP_BUF_LEN(oper));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_17);
- ippp++;
- *ippp = tsn;
- ippp++;
- *ippp = ((strmno << 16) | strmseq);
+ snprintf(msg, sizeof(msg), "No PD ongoing, got TSN=%8.8x, SID=%4.4x, SSN=%4.4x",
+ tsn, strmno, strmseq);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_19;
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
+ *abort_flag = 1;
+ if (last_chunk) {
+ *m = NULL;
}
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_17;
- sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
- *abort_flag = 1;
return (0);
}
}
@@ -2065,6 +1849,9 @@
} else {
sctp_queue_data_to_stream(stcb, asoc, control, abort_flag);
if (*abort_flag) {
+ if (last_chunk) {
+ *m = NULL;
+ }
return (0);
}
}
@@ -2077,7 +1864,9 @@
* the assoc is now gone and chk was put onto the
* reasm queue, which has all been freed.
*/
- *m = NULL;
+ if (last_chunk) {
+ *m = NULL;
+ }
return (0);
}
}
@@ -2114,8 +1903,9 @@
*/
struct sctp_queued_to_read *ctl, *nctl;
- sctp_reset_in_stream(stcb, liste->number_entries, liste->req.list_of_streams);
+ sctp_reset_in_stream(stcb, liste->number_entries, liste->list_of_streams);
TAILQ_REMOVE(&asoc->resetHead, liste, next_resp);
+ sctp_send_deferred_reset_response(stcb, liste, SCTP_STREAM_RESET_RESULT_PERFORMED);
SCTP_FREE(liste, SCTP_M_STRESET);
/* sa_ignore FREED_MEMORY */
liste = TAILQ_FIRST(&asoc->resetHead);
@@ -2389,7 +2179,8 @@
*/
if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
- stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_INDATA + SCTP_LOC_18);
+ stcb->sctp_ep, stcb, NULL,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_20);
}
sctp_send_shutdown(stcb,
((stcb->asoc.alternate) ? stcb->asoc.alternate : stcb->asoc.primary_destination));
@@ -2495,7 +2286,7 @@
* delivery queue and something can be delivered.
*/
if (stcb->sctp_socket) {
- pd_point = min(SCTP_SB_LIMIT_RCV(stcb->sctp_socket),
+ pd_point = min(SCTP_SB_LIMIT_RCV(stcb->sctp_socket) >> SCTP_PARTIAL_DELIVERY_SHIFT,
stcb->sctp_ep->partial_delivery_point);
} else {
pd_point = stcb->sctp_ep->partial_delivery_point;
@@ -2517,10 +2308,8 @@
int
sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length,
- struct sctphdr *sh, struct sctp_inpcb *inp,
- struct sctp_tcb *stcb, struct sctp_nets *net, uint32_t * high_tsn,
- uint8_t use_mflowid, uint32_t mflowid,
- uint32_t vrf_id, uint16_t port)
+ struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+ struct sctp_nets *net, uint32_t * high_tsn)
{
struct sctp_data_chunk *ch, chunk_buf;
struct sctp_association *asoc;
@@ -2560,7 +2349,7 @@
*/
if (SCTP_BUF_LEN(m) < (long)MLEN && SCTP_BUF_NEXT(m) == NULL) {
/* we only handle mbufs that are singletons.. not chains */
- m = sctp_get_mbuf_for_msg(SCTP_BUF_LEN(m), 0, M_DONTWAIT, 1, MT_DATA);
+ m = sctp_get_mbuf_for_msg(SCTP_BUF_LEN(m), 0, M_NOWAIT, 1, MT_DATA);
if (m) {
/* ok lets see if we can copy the data up */
caddr_t *from, *to;
@@ -2600,37 +2389,31 @@
continue;
}
if (ch->ch.chunk_type == SCTP_DATA) {
- if ((size_t)chk_length < sizeof(struct sctp_data_chunk) + 1) {
+ if ((size_t)chk_length < sizeof(struct sctp_data_chunk)) {
/*
* Need to send an abort since we had a
* invalid data chunk.
*/
struct mbuf *op_err;
+ char msg[SCTP_DIAG_INFO_LEN];
- op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 2 * sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
+ snprintf(msg, sizeof(msg), "DATA chunk of length %d",
+ chk_length);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_21;
+ sctp_abort_an_association(inp, stcb, op_err, SCTP_SO_NOT_LOCKED);
+ return (2);
+ }
+ if ((size_t)chk_length == sizeof(struct sctp_data_chunk)) {
+ /*
+ * Need to send an abort since we had an
+ * empty data chunk.
+ */
+ struct mbuf *op_err;
- if (op_err) {
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
-
- SCTP_BUF_LEN(op_err) = sizeof(struct sctp_paramhdr) +
- (2 * sizeof(uint32_t));
- ph = mtod(op_err, struct sctp_paramhdr *);
- ph->param_type =
- htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
- ph->param_length = htons(SCTP_BUF_LEN(op_err));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_19);
- ippp++;
- *ippp = asoc->cumulative_tsn;
-
- }
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_19;
- sctp_abort_association(inp, stcb, m, iphlen, sh,
- op_err,
- use_mflowid, mflowid,
- vrf_id, port);
+ op_err = sctp_generate_no_user_data_cause(ch->dp.tsn);
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_22;
+ sctp_abort_an_association(inp, stcb, op_err, SCTP_SO_NOT_LOCKED);
return (2);
}
#ifdef SCTP_AUDITING_ENABLED
@@ -2692,13 +2475,12 @@
*/
if (SCTP_BASE_SYSCTL(sctp_strict_data_order)) {
struct mbuf *op_err;
+ char msg[SCTP_DIAG_INFO_LEN];
- op_err = sctp_generate_invmanparam(SCTP_CAUSE_PROTOCOL_VIOLATION);
- sctp_abort_association(inp, stcb,
- m, iphlen,
- sh, op_err,
- use_mflowid, mflowid,
- vrf_id, port);
+ snprintf(msg, sizeof(msg), "DATA chunk followed by chunk of type %2.2x",
+ ch->ch.chunk_type);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+ sctp_abort_an_association(inp, stcb, op_err, SCTP_SO_NOT_LOCKED);
return (2);
}
break;
@@ -2706,34 +2488,21 @@
/* unknown chunk type, use bit rules */
if (ch->ch.chunk_type & 0x40) {
/* Add a error report to the queue */
- struct mbuf *merr;
- struct sctp_paramhdr *phd;
+ struct mbuf *op_err;
+ struct sctp_gen_error_cause *cause;
- merr = sctp_get_mbuf_for_msg(sizeof(*phd), 0, M_DONTWAIT, 1, MT_DATA);
- if (merr) {
- phd = mtod(merr, struct sctp_paramhdr *);
- /*
- * We cheat and use param
- * type since we did not
- * bother to define a error
- * cause struct. They are
- * the same basic format
- * with different names.
- */
- phd->param_type =
- htons(SCTP_CAUSE_UNRECOG_CHUNK);
- phd->param_length =
- htons(chk_length + sizeof(*phd));
- SCTP_BUF_LEN(merr) = sizeof(*phd);
- SCTP_BUF_NEXT(merr) = SCTP_M_COPYM(m, *offset, chk_length, M_DONTWAIT);
- if (SCTP_BUF_NEXT(merr)) {
- if (sctp_pad_lastmbuf(SCTP_BUF_NEXT(merr), SCTP_SIZE32(chk_length) - chk_length, NULL)) {
- sctp_m_freem(merr);
- } else {
- sctp_queue_op_err(stcb, merr);
- }
+ op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_gen_error_cause),
+ 0, M_NOWAIT, 1, MT_DATA);
+ if (op_err != NULL) {
+ cause = mtod(op_err, struct sctp_gen_error_cause *);
+ cause->code = htons(SCTP_CAUSE_UNRECOG_CHUNK);
+ cause->length = htons(chk_length + sizeof(struct sctp_gen_error_cause));
+ SCTP_BUF_LEN(op_err) = sizeof(struct sctp_gen_error_cause);
+ SCTP_BUF_NEXT(op_err) = SCTP_M_COPYM(m, *offset, chk_length, M_NOWAIT);
+ if (SCTP_BUF_NEXT(op_err) != NULL) {
+ sctp_queue_op_err(stcb, op_err);
} else {
- sctp_m_freem(merr);
+ sctp_m_freem(op_err);
}
}
}
@@ -2763,7 +2532,7 @@
/*
* we need to report rwnd overrun drops.
*/
- sctp_send_packet_dropped(stcb, net, *mm, iphlen, 0);
+ sctp_send_packet_dropped(stcb, net, *mm, length, iphlen, 0);
}
if (num_chunks) {
/*
@@ -2825,12 +2594,14 @@
* cumack trackers for first transmissions,
* and retransmissions.
*/
- if ((tp1->whoTo->find_pseudo_cumack == 1) && (tp1->sent < SCTP_DATAGRAM_RESEND) &&
+ if ((tp1->sent < SCTP_DATAGRAM_RESEND) &&
+ (tp1->whoTo->find_pseudo_cumack == 1) &&
(tp1->snd_count == 1)) {
tp1->whoTo->pseudo_cumack = tp1->rec.data.TSN_seq;
tp1->whoTo->find_pseudo_cumack = 0;
}
- if ((tp1->whoTo->find_rtx_pseudo_cumack == 1) && (tp1->sent < SCTP_DATAGRAM_RESEND) &&
+ if ((tp1->sent < SCTP_DATAGRAM_RESEND) &&
+ (tp1->whoTo->find_rtx_pseudo_cumack == 1) &&
(tp1->snd_count > 1)) {
tp1->whoTo->rtx_pseudo_cumack = tp1->rec.data.TSN_seq;
tp1->whoTo->find_rtx_pseudo_cumack = 0;
@@ -2974,9 +2745,10 @@
* All chunks NOT UNSENT fall through here and are marked
* (leave PR-SCTP ones that are to skip alone though)
*/
- if (tp1->sent != SCTP_FORWARD_TSN_SKIP)
+ if ((tp1->sent != SCTP_FORWARD_TSN_SKIP) &&
+ (tp1->sent != SCTP_DATAGRAM_NR_ACKED)) {
tp1->sent = SCTP_DATAGRAM_MARKED;
-
+ }
if (tp1->rec.data.chunk_was_revoked) {
/* deflate the cwnd */
tp1->whoTo->cwnd -= tp1->book_size;
@@ -2983,7 +2755,21 @@
tp1->rec.data.chunk_was_revoked = 0;
}
/* NR Sack code here */
- if (nr_sacking) {
+ if (nr_sacking &&
+ (tp1->sent != SCTP_DATAGRAM_NR_ACKED)) {
+ if (stcb->asoc.strmout[tp1->rec.data.stream_number].chunks_on_queues > 0) {
+ stcb->asoc.strmout[tp1->rec.data.stream_number].chunks_on_queues--;
+#ifdef INVARIANTS
+ } else {
+ panic("No chunks on the queues for sid %u.", tp1->rec.data.stream_number);
+#endif
+ }
+ if ((stcb->asoc.strmout[tp1->rec.data.stream_number].chunks_on_queues == 0) &&
+ (stcb->asoc.strmout[tp1->rec.data.stream_number].state == SCTP_STREAM_RESET_PENDING) &&
+ TAILQ_EMPTY(&stcb->asoc.strmout[tp1->rec.data.stream_number].outqueue)) {
+ stcb->asoc.trigger_reset = 1;
+ }
+ tp1->sent = SCTP_DATAGRAM_NR_ACKED;
if (tp1->data) {
/*
* sa_ignore
@@ -3088,7 +2874,6 @@
uint32_t biggest_tsn_acked)
{
struct sctp_tmit_chunk *tp1;
- int tot_revoked = 0;
TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) {
if (SCTP_TSN_GT(tp1->rec.data.TSN_seq, cumack)) {
@@ -3123,7 +2908,6 @@
* artificial inflation of the flight_size.
*/
tp1->whoTo->cwnd += tp1->book_size;
- tot_revoked++;
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_SACK_LOGGING_ENABLE) {
sctp_log_sack(asoc->last_acked_seq,
cumack,
@@ -3174,7 +2958,7 @@
num_dests_sacked++;
}
}
- if (stcb->asoc.peer_supports_prsctp) {
+ if (stcb->asoc.prsctp_supported) {
(void)SCTP_GETTIME_TIMEVAL(&now);
}
TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) {
@@ -3195,7 +2979,7 @@
/* done */
break;
}
- if (stcb->asoc.peer_supports_prsctp) {
+ if (stcb->asoc.prsctp_supported) {
if ((PR_SCTP_TTL_ENABLED(tp1->flags)) && tp1->sent < SCTP_DATAGRAM_ACKED) {
/* Is it expired? */
if (timevalcmp(&now, &tp1->rec.data.timetodrop, >)) {
@@ -3449,7 +3233,7 @@
/* remove from the total flight */
sctp_total_flight_decrease(stcb, tp1);
- if ((stcb->asoc.peer_supports_prsctp) &&
+ if ((stcb->asoc.prsctp_supported) &&
(PR_SCTP_RTX_ENABLED(tp1->flags))) {
/*
* Has it been retransmitted tv_sec times? -
@@ -3594,17 +3378,19 @@
struct timeval now;
int now_filled = 0;
- if (asoc->peer_supports_prsctp == 0) {
+ if (asoc->prsctp_supported == 0) {
return (NULL);
}
TAILQ_FOREACH_SAFE(tp1, &asoc->sent_queue, sctp_next, tp2) {
if (tp1->sent != SCTP_FORWARD_TSN_SKIP &&
- tp1->sent != SCTP_DATAGRAM_RESEND) {
+ tp1->sent != SCTP_DATAGRAM_RESEND &&
+ tp1->sent != SCTP_DATAGRAM_NR_ACKED) {
/* no chance to advance, out of here */
break;
}
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_TRY_ADVANCE) {
- if (tp1->sent == SCTP_FORWARD_TSN_SKIP) {
+ if ((tp1->sent == SCTP_FORWARD_TSN_SKIP) ||
+ (tp1->sent == SCTP_DATAGRAM_NR_ACKED)) {
sctp_misc_ints(SCTP_FWD_TSN_CHECK,
asoc->advanced_peer_ack_point,
tp1->rec.data.TSN_seq, 0, 0);
@@ -3652,7 +3438,8 @@
* the chunk, advance our peer ack point and we can check
* the next chunk.
*/
- if (tp1->sent == SCTP_FORWARD_TSN_SKIP) {
+ if ((tp1->sent == SCTP_FORWARD_TSN_SKIP) ||
+ (tp1->sent == SCTP_DATAGRAM_NR_ACKED)) {
/* advance PeerAckPoint goes forward */
if (SCTP_TSN_GT(tp1->rec.data.TSN_seq, asoc->advanced_peer_ack_point)) {
asoc->advanced_peer_ack_point = tp1->rec.data.TSN_seq;
@@ -3677,12 +3464,18 @@
{
struct sctp_tmit_chunk *chk;
int inflight = 0, resend = 0, inbetween = 0, acked = 0, above = 0;
- int entry_flight, entry_cnt, ret;
+ int ret;
+#ifndef INVARIANTS
+ int entry_flight, entry_cnt;
+
+#endif
+
+ ret = 0;
+#ifndef INVARIANTS
entry_flight = asoc->total_flight;
entry_cnt = asoc->total_flight_count;
- ret = 0;
-
+#endif
if (asoc->pr_sctp_cnt >= asoc->sent_queue_cnt)
return (0);
@@ -3729,7 +3522,7 @@
if ((tp1->sent >= SCTP_DATAGRAM_ACKED) || (tp1->data == NULL)) {
/* TSN's skipped we do NOT move back. */
sctp_misc_ints(SCTP_FLIGHT_LOG_DWN_WP_FWD,
- tp1->whoTo->flight_size,
+ tp1->whoTo ? tp1->whoTo->flight_size : 0,
tp1->book_size,
(uintptr_t) tp1->whoTo,
tp1->rec.data.TSN_seq);
@@ -3829,34 +3622,17 @@
send_s = asoc->sending_seq;
}
if (SCTP_TSN_GE(cumack, send_s)) {
-#ifndef INVARIANTS
- struct mbuf *oper;
+ struct mbuf *op_err;
+ char msg[SCTP_DIAG_INFO_LEN];
-#endif
-#ifdef INVARIANTS
- panic("Impossible sack 1");
-#else
-
*abort_now = 1;
/* XXX */
- oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (oper) {
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
-
- SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
- sizeof(uint32_t);
- ph = mtod(oper, struct sctp_paramhdr *);
- ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
- ph->param_length = htons(SCTP_BUF_LEN(oper));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_25);
- }
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_25;
- sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+ snprintf(msg, sizeof(msg), "Cum ack %8.8x greater or equal than TSN %8.8x",
+ cumack, send_s);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_23;
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
return;
-#endif
}
}
asoc->this_sack_highest_gap = cumack;
@@ -3957,7 +3733,20 @@
tp1->whoTo->cwnd -= tp1->book_size;
tp1->rec.data.chunk_was_revoked = 0;
}
- tp1->sent = SCTP_DATAGRAM_ACKED;
+ if (tp1->sent != SCTP_DATAGRAM_NR_ACKED) {
+ if (asoc->strmout[tp1->rec.data.stream_number].chunks_on_queues > 0) {
+ asoc->strmout[tp1->rec.data.stream_number].chunks_on_queues--;
+#ifdef INVARIANTS
+ } else {
+ panic("No chunks on the queues for sid %u.", tp1->rec.data.stream_number);
+#endif
+ }
+ }
+ if ((asoc->strmout[tp1->rec.data.stream_number].chunks_on_queues == 0) &&
+ (asoc->strmout[tp1->rec.data.stream_number].state == SCTP_STREAM_RESET_PENDING) &&
+ TAILQ_EMPTY(&asoc->strmout[tp1->rec.data.stream_number].outqueue)) {
+ asoc->trigger_reset = 1;
+ }
TAILQ_REMOVE(&asoc->sent_queue, tp1, sctp_next);
if (tp1->data) {
/* sa_ignore NO_NULL_CHK */
@@ -3983,7 +3772,7 @@
}
/* sa_ignore NO_NULL_CHK */
if (stcb->sctp_socket) {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
@@ -3992,7 +3781,7 @@
/* sa_ignore NO_NULL_CHK */
sctp_wakeup_log(stcb, 1, SCTP_WAKESND_FROM_SACK);
}
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(stcb->sctp_ep);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -4006,7 +3795,7 @@
}
#endif
sctp_sowwakeup_locked(stcb->sctp_ep, stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
} else {
@@ -4042,7 +3831,9 @@
}
if (net->dest_state & SCTP_ADDR_PF) {
net->dest_state &= ~SCTP_ADDR_PF;
- sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_3);
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
+ stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_24);
sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net);
asoc->cc_functions.sctp_cwnd_update_exit_pf(stcb, net);
/* Done with this net */
@@ -4128,7 +3919,7 @@
} else if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
stcb, net,
- SCTP_FROM_SCTP_INDATA + SCTP_LOC_22);
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_25);
}
}
}
@@ -4196,27 +3987,15 @@
(asoc->stream_queue_cnt == 0)) {
if (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT) {
/* Need to abort here */
- struct mbuf *oper;
+ struct mbuf *op_err;
abort_out_now:
*abort_now = 1;
/* XXX */
- oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (oper) {
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
-
- SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
- sizeof(uint32_t);
- ph = mtod(oper, struct sctp_paramhdr *);
- ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
- ph->param_length = htons(SCTP_BUF_LEN(oper));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_24);
- }
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_24;
- sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+ op_err = sctp_generate_cause(SCTP_CAUSE_USER_INITIATED_ABT, "");
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_26;
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
+ return;
} else {
struct sctp_nets *netp;
@@ -4242,11 +4021,6 @@
(asoc->stream_queue_cnt == 0)) {
struct sctp_nets *netp;
- if (asoc->alternate) {
- netp = asoc->alternate;
- } else {
- netp = asoc->primary_destination;
- }
if (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT) {
goto abort_out_now;
}
@@ -4253,8 +4027,13 @@
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT);
SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ sctp_stop_timers_for_shutdown(stcb);
+ if (asoc->alternate) {
+ netp = asoc->alternate;
+ } else {
+ netp = asoc->primary_destination;
+ }
sctp_send_shutdown_ack(stcb, netp);
- sctp_stop_timers_for_shutdown(stcb);
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK,
stcb->sctp_ep, stcb, netp);
}
@@ -4268,7 +4047,7 @@
asoc->advanced_peer_ack_point = cumack;
}
/* PR-Sctp issues need to be addressed too */
- if ((asoc->peer_supports_prsctp) && (asoc->pr_sctp_cnt > 0)) {
+ if ((asoc->prsctp_supported) && (asoc->pr_sctp_cnt > 0)) {
struct sctp_tmit_chunk *lchk;
uint32_t old_adv_peer_ack_point;
@@ -4409,7 +4188,8 @@
send_s = asoc->sending_seq;
}
if (SCTP_TSN_GE(cum_ack, send_s)) {
- struct mbuf *oper;
+ struct mbuf *op_err;
+ char msg[SCTP_DIAG_INFO_LEN];
/*
* no way, we have not even sent this TSN out yet.
@@ -4419,27 +4199,16 @@
cum_ack, send_s);
if (tp1) {
SCTP_PRINTF("Got send_s from tsn:%x + 1 of tp1:%p\n",
- tp1->rec.data.TSN_seq, tp1);
+ tp1->rec.data.TSN_seq, (void *)tp1);
}
hopeless_peer:
*abort_now = 1;
/* XXX */
- oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (oper) {
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
-
- SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
- sizeof(uint32_t);
- ph = mtod(oper, struct sctp_paramhdr *);
- ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
- ph->param_length = htons(SCTP_BUF_LEN(oper));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_25);
- }
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_25;
- sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+ snprintf(msg, sizeof(msg), "Cum ack %8.8x greater or equal than TSN %8.8x",
+ cum_ack, send_s);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_27;
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
return;
}
}
@@ -4470,7 +4239,7 @@
/* stop any timers */
TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
- stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_26);
+ stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_28);
net->partial_bytes_acked = 0;
net->flight_size = 0;
}
@@ -4616,7 +4385,9 @@
tp1->whoTo->cwnd -= tp1->book_size;
tp1->rec.data.chunk_was_revoked = 0;
}
- tp1->sent = SCTP_DATAGRAM_ACKED;
+ if (tp1->sent != SCTP_DATAGRAM_NR_ACKED) {
+ tp1->sent = SCTP_DATAGRAM_ACKED;
+ }
}
} else {
break;
@@ -4673,7 +4444,7 @@
if (net->new_pseudo_cumack)
sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
stcb, net,
- SCTP_FROM_SCTP_INDATA + SCTP_LOC_27);
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_29);
}
} else {
@@ -4680,7 +4451,7 @@
if (accum_moved) {
TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
- stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_28);
+ stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_30);
}
}
}
@@ -4693,13 +4464,22 @@
if (SCTP_TSN_GT(tp1->rec.data.TSN_seq, cum_ack)) {
break;
}
- if (tp1->sent == SCTP_DATAGRAM_UNSENT) {
- /* no more sent on list */
- SCTP_PRINTF("Warning, tp1->sent == %d and its now acked?\n",
- tp1->sent);
+ if (tp1->sent != SCTP_DATAGRAM_NR_ACKED) {
+ if (asoc->strmout[tp1->rec.data.stream_number].chunks_on_queues > 0) {
+ asoc->strmout[tp1->rec.data.stream_number].chunks_on_queues--;
+#ifdef INVARIANTS
+ } else {
+ panic("No chunks on the queues for sid %u.", tp1->rec.data.stream_number);
+#endif
+ }
}
+ if ((asoc->strmout[tp1->rec.data.stream_number].chunks_on_queues == 0) &&
+ (asoc->strmout[tp1->rec.data.stream_number].state == SCTP_STREAM_RESET_PENDING) &&
+ TAILQ_EMPTY(&asoc->strmout[tp1->rec.data.stream_number].outqueue)) {
+ asoc->trigger_reset = 1;
+ }
TAILQ_REMOVE(&asoc->sent_queue, tp1, sctp_next);
- if (tp1->pr_sctp_on) {
+ if (PR_SCTP_ENABLED(tp1->flags)) {
if (asoc->pr_sctp_cnt != 0)
asoc->pr_sctp_cnt--;
}
@@ -4709,7 +4489,7 @@
sctp_free_bufspace(stcb, asoc, tp1, 1);
sctp_m_freem(tp1->data);
tp1->data = NULL;
- if (asoc->peer_supports_prsctp && PR_SCTP_BUF_ENABLED(tp1->flags)) {
+ if (asoc->prsctp_supported && PR_SCTP_BUF_ENABLED(tp1->flags)) {
asoc->sent_queue_cnt_removeable--;
}
}
@@ -4735,7 +4515,7 @@
}
/* sa_ignore NO_NULL_CHK */
if ((wake_him) && (stcb->sctp_socket)) {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
@@ -4743,7 +4523,7 @@
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_WAKE_LOGGING_ENABLE) {
sctp_wakeup_log(stcb, wake_him, SCTP_WAKESND_FROM_SACK);
}
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(stcb->sctp_ep);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -4757,7 +4537,7 @@
}
#endif
sctp_sowwakeup_locked(stcb->sctp_ep, stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
} else {
@@ -4849,7 +4629,9 @@
}
if (net->dest_state & SCTP_ADDR_PF) {
net->dest_state &= ~SCTP_ADDR_PF;
- sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_3);
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
+ stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_31);
sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net);
asoc->cc_functions.sctp_cwnd_update_exit_pf(stcb, net);
/* Done with this net */
@@ -4872,7 +4654,8 @@
TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
/* stop all timers */
sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
- stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_30);
+ stcb, net,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
net->flight_size = 0;
net->partial_bytes_acked = 0;
}
@@ -4922,36 +4705,18 @@
(asoc->stream_queue_cnt == 0)) {
if (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT) {
/* Need to abort here */
- struct mbuf *oper;
+ struct mbuf *op_err;
abort_out_now:
*abort_now = 1;
/* XXX */
- oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (oper) {
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
-
- SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
- sizeof(uint32_t);
- ph = mtod(oper, struct sctp_paramhdr *);
- ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
- ph->param_length = htons(SCTP_BUF_LEN(oper));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_31);
- }
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_31;
- sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+ op_err = sctp_generate_cause(SCTP_CAUSE_USER_INITIATED_ABT, "");
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_33;
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
return;
} else {
struct sctp_nets *netp;
- if (asoc->alternate) {
- netp = asoc->alternate;
- } else {
- netp = asoc->primary_destination;
- }
if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
(SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
@@ -4959,6 +4724,11 @@
SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
sctp_stop_timers_for_shutdown(stcb);
+ if (asoc->alternate) {
+ netp = asoc->alternate;
+ } else {
+ netp = asoc->primary_destination;
+ }
sctp_send_shutdown(stcb, netp);
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
stcb->sctp_ep, stcb, netp);
@@ -4970,11 +4740,6 @@
(asoc->stream_queue_cnt == 0)) {
struct sctp_nets *netp;
- if (asoc->alternate) {
- netp = asoc->alternate;
- } else {
- netp = asoc->primary_destination;
- }
if (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT) {
goto abort_out_now;
}
@@ -4981,8 +4746,13 @@
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT);
SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ sctp_stop_timers_for_shutdown(stcb);
+ if (asoc->alternate) {
+ netp = asoc->alternate;
+ } else {
+ netp = asoc->primary_destination;
+ }
sctp_send_shutdown_ack(stcb, netp);
- sctp_stop_timers_for_shutdown(stcb);
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK,
stcb->sctp_ep, stcb, netp);
return;
@@ -5093,7 +4863,7 @@
} else if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
stcb, net,
- SCTP_FROM_SCTP_INDATA + SCTP_LOC_22);
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_34);
}
}
}
@@ -5134,7 +4904,7 @@
asoc->advanced_peer_ack_point = cum_ack;
}
/* C2. try to further move advancedPeerAckPoint ahead */
- if ((asoc->peer_supports_prsctp) && (asoc->pr_sctp_cnt > 0)) {
+ if ((asoc->prsctp_supported) && (asoc->pr_sctp_cnt > 0)) {
struct sctp_tmit_chunk *lchk;
uint32_t old_adv_peer_ack_point;
@@ -5371,7 +5141,8 @@
asoc->cumulative_tsn = new_cum_tsn;
if (gap >= m_size) {
if ((long)gap > sctp_sbspace(&stcb->asoc, &stcb->sctp_socket->so_rcv)) {
- struct mbuf *oper;
+ struct mbuf *op_err;
+ char msg[SCTP_DIAG_INFO_LEN];
/*
* out of range (of single byte chunks in the rwnd I
@@ -5378,26 +5149,12 @@
* give out). This must be an attacker.
*/
*abort_flag = 1;
- oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (oper) {
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
-
- SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
- (sizeof(uint32_t) * 3);
- ph = mtod(oper, struct sctp_paramhdr *);
- ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
- ph->param_length = htons(SCTP_BUF_LEN(oper));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_33);
- ippp++;
- *ippp = asoc->highest_tsn_inside_map;
- ippp++;
- *ippp = new_cum_tsn;
- }
- stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_33;
- sctp_abort_an_association(stcb->sctp_ep, stcb, oper, SCTP_SO_NOT_LOCKED);
+ snprintf(msg, sizeof(msg),
+ "New cum ack %8.8x too high, highest TSN %8.8x",
+ new_cum_tsn, asoc->highest_tsn_inside_map);
+ op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_35;
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
return;
}
SCTP_STAT_INCR(sctps_fwdtsn_map_over);
Modified: trunk/sys/netinet/sctp_indata.h
===================================================================
--- trunk/sys/netinet/sctp_indata.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_indata.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_indata.h 238253 2012-07-08 16:14:42Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_indata.h 294147 2016-01-16 12:47:28Z tuexen $");
#ifndef _NETINET_SCTP_INDATA_H_
#define _NETINET_SCTP_INDATA_H_
@@ -48,7 +48,7 @@
struct mbuf *dm);
-#define sctp_build_readq_entry_mac(_ctl, in_it, a, net, tsn, ppid, context, stream_no, stream_seq, flags, dm) do { \
+#define sctp_build_readq_entry_mac(_ctl, in_it, context, net, tsn, ppid, stream_no, stream_seq, flags, dm) do { \
if (_ctl) { \
atomic_add_int(&((net)->ref_count), 1); \
(_ctl)->sinfo_stream = stream_no; \
@@ -55,7 +55,7 @@
(_ctl)->sinfo_ssn = stream_seq; \
(_ctl)->sinfo_flags = (flags << 8); \
(_ctl)->sinfo_ppid = ppid; \
- (_ctl)->sinfo_context = a; \
+ (_ctl)->sinfo_context = context; \
(_ctl)->sinfo_timetolive = 0; \
(_ctl)->sinfo_tsn = tsn; \
(_ctl)->sinfo_cumtsn = tsn; \
@@ -112,11 +112,9 @@
sctp_update_acked(struct sctp_tcb *, struct sctp_shutdown_chunk *, int *);
int
-sctp_process_data(struct mbuf **, int, int *, int, struct sctphdr *,
+sctp_process_data(struct mbuf **, int, int *, int,
struct sctp_inpcb *, struct sctp_tcb *,
- struct sctp_nets *, uint32_t *,
- uint8_t, uint32_t,
- uint32_t, uint16_t);
+ struct sctp_nets *, uint32_t *);
void sctp_slide_mapping_arrays(struct sctp_tcb *stcb);
Modified: trunk/sys/netinet/sctp_input.c
===================================================================
--- trunk/sys/netinet/sctp_input.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_input.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_input.c 240580 2012-09-17 00:47:35Z eadler $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_input.c 296052 2016-02-25 18:46:06Z tuexen $");
#include <netinet/sctp_os.h>
#include <netinet/sctp_var.h>
@@ -48,7 +48,9 @@
#include <netinet/sctp_bsd_addr.h>
#include <netinet/sctp_timer.h>
#include <netinet/sctp_crc32.h>
+#if defined(INET) || defined(INET6)
#include <netinet/udp.h>
+#endif
#include <sys/smp.h>
@@ -81,10 +83,11 @@
/* INIT handler */
static void
-sctp_handle_init(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh,
+sctp_handle_init(struct mbuf *m, int iphlen, int offset,
+ struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh,
struct sctp_init_chunk *cp, struct sctp_inpcb *inp,
- struct sctp_tcb *stcb, int *abort_no_unlock,
- uint8_t use_mflowid, uint32_t mflowid,
+ struct sctp_tcb *stcb, struct sctp_nets *net, int *abort_no_unlock,
+ uint8_t mflowtype, uint32_t mflowid,
uint32_t vrf_id, uint16_t port)
{
struct sctp_init *init;
@@ -91,15 +94,15 @@
struct mbuf *op_err;
SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_init: handling INIT tcb:%p\n",
- stcb);
+ (void *)stcb);
if (stcb == NULL) {
SCTP_INP_RLOCK(inp);
}
/* validate length */
if (ntohs(cp->ch.chunk_length) < sizeof(struct sctp_init_chunk)) {
- op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
- sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
- use_mflowid, mflowid,
+ op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
+ sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
+ mflowtype, mflowid,
vrf_id, port);
if (stcb)
*abort_no_unlock = 1;
@@ -109,9 +112,9 @@
init = &cp->init;
if (init->initiate_tag == 0) {
/* protocol error... send abort */
- op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
- sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
- use_mflowid, mflowid,
+ op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
+ sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
+ mflowtype, mflowid,
vrf_id, port);
if (stcb)
*abort_no_unlock = 1;
@@ -119,9 +122,9 @@
}
if (ntohl(init->a_rwnd) < SCTP_MIN_RWND) {
/* invalid parameter... send abort */
- op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
- sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
- use_mflowid, mflowid,
+ op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
+ sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
+ mflowtype, mflowid,
vrf_id, port);
if (stcb)
*abort_no_unlock = 1;
@@ -129,9 +132,9 @@
}
if (init->num_inbound_streams == 0) {
/* protocol error... send abort */
- op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
- sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
- use_mflowid, mflowid,
+ op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
+ sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
+ mflowtype, mflowid,
vrf_id, port);
if (stcb)
*abort_no_unlock = 1;
@@ -139,9 +142,9 @@
}
if (init->num_outbound_streams == 0) {
/* protocol error... send abort */
- op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
- sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
- use_mflowid, mflowid,
+ op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
+ sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
+ mflowtype, mflowid,
vrf_id, port);
if (stcb)
*abort_no_unlock = 1;
@@ -150,8 +153,10 @@
if (sctp_validate_init_auth_params(m, offset + sizeof(*cp),
offset + ntohs(cp->ch.chunk_length))) {
/* auth parameter(s) error... send abort */
- sctp_abort_association(inp, stcb, m, iphlen, sh, NULL,
- use_mflowid, mflowid,
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ "Problem with AUTH parameters");
+ sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
+ mflowtype, mflowid,
vrf_id, port);
if (stcb)
*abort_no_unlock = 1;
@@ -179,8 +184,10 @@
* state :-)
*/
if (SCTP_BASE_SYSCTL(sctp_blackhole) == 0) {
- sctp_send_abort(m, iphlen, sh, 0, NULL,
- use_mflowid, mflowid,
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ "No listener");
+ sctp_send_abort(m, iphlen, src, dst, sh, 0, op_err,
+ mflowtype, mflowid, inp->fibnum,
vrf_id, port);
}
goto outnow;
@@ -192,8 +199,9 @@
sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CONTROL_PROC, SCTP_SO_NOT_LOCKED);
} else {
SCTPDBG(SCTP_DEBUG_INPUT3, "sctp_handle_init: sending INIT-ACK\n");
- sctp_send_initiate_ack(inp, stcb, m, iphlen, offset, sh, cp,
- use_mflowid, mflowid,
+ sctp_send_initiate_ack(inp, stcb, net, m, iphlen, offset,
+ src, dst, sh, cp,
+ mflowtype, mflowid,
vrf_id, port,
((stcb == NULL) ? SCTP_HOLDS_LOCK : SCTP_NOT_LOCKED));
}
@@ -310,6 +318,13 @@
if (chk->rec.data.stream_number >= newcnt) {
TAILQ_REMOVE(&asoc->send_queue, chk, sctp_next);
asoc->send_queue_cnt--;
+ if (asoc->strmout[chk->rec.data.stream_number].chunks_on_queues > 0) {
+ asoc->strmout[chk->rec.data.stream_number].chunks_on_queues--;
+#ifdef INVARIANTS
+ } else {
+ panic("No chunks on the queues for sid %u.", chk->rec.data.stream_number);
+#endif
+ }
if (chk->data != NULL) {
sctp_free_bufspace(stcb, asoc, chk, 1);
sctp_ulp_notify(SCTP_NOTIFY_UNSENT_DG_FAIL, stcb,
@@ -343,6 +358,7 @@
sctp_free_a_strmoq(stcb, sp, SCTP_SO_NOT_LOCKED);
/* sa_ignore FREED_MEMORY */
}
+ outs->state = SCTP_STREAM_CLOSED;
}
}
/* cut back the count */
@@ -349,8 +365,12 @@
asoc->pre_open_streams = newcnt;
}
SCTP_TCB_SEND_UNLOCK(stcb);
- asoc->strm_realoutsize = asoc->streamoutcnt = asoc->pre_open_streams;
-
+ asoc->streamoutcnt = asoc->pre_open_streams;
+ if (asoc->strmout) {
+ for (i = 0; i < asoc->streamoutcnt; i++) {
+ asoc->strmout[i].state = SCTP_STREAM_OPEN;
+ }
+ }
/* EY - nr_sack: initialize highest tsn in nr_mapping_array */
asoc->highest_tsn_inside_nr_map = asoc->highest_tsn_inside_map;
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) {
@@ -381,9 +401,10 @@
}
SCTP_FREE(asoc->strmin, SCTP_M_STRMI);
}
- asoc->streamincnt = ntohs(init->num_outbound_streams);
- if (asoc->streamincnt > MAX_SCTP_STREAMS) {
- asoc->streamincnt = MAX_SCTP_STREAMS;
+ if (asoc->max_inbound_streams > ntohs(init->num_outbound_streams)) {
+ asoc->streamincnt = ntohs(init->num_outbound_streams);
+ } else {
+ asoc->streamincnt = asoc->max_inbound_streams;
}
SCTP_MALLOC(asoc->strmin, struct sctp_stream_in *, asoc->streamincnt *
sizeof(struct sctp_stream_in), SCTP_M_STRMI);
@@ -395,11 +416,6 @@
for (i = 0; i < asoc->streamincnt; i++) {
asoc->strmin[i].stream_no = i;
asoc->strmin[i].last_sequence_delivered = 0xffff;
- /*
- * U-stream ranges will be set when the cookie is unpacked.
- * Or for the INIT sender they are un set (if pr-sctp not
- * supported) when the INIT-ACK arrives.
- */
TAILQ_INIT(&asoc->strmin[i].inqueue);
asoc->strmin[i].delivery_started = 0;
}
@@ -420,10 +436,11 @@
* INIT-ACK message processing/consumption returns value < 0 on error
*/
static int
-sctp_process_init_ack(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh,
+sctp_process_init_ack(struct mbuf *m, int iphlen, int offset,
+ struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh,
struct sctp_init_ack_chunk *cp, struct sctp_tcb *stcb,
struct sctp_nets *net, int *abort_no_unlock,
- uint8_t use_mflowid, uint32_t mflowid,
+ uint8_t mflowtype, uint32_t mflowid,
uint32_t vrf_id)
{
struct sctp_association *asoc;
@@ -434,7 +451,6 @@
/* First verify that we have no illegal param's */
abort_flag = 0;
- op_err = NULL;
op_err = sctp_arethere_unrecognized_parameters(m,
(offset + sizeof(struct sctp_init_chunk)),
@@ -455,20 +471,22 @@
initack_limit = offset + ntohs(cp->ch.chunk_length);
/* load all addresses */
if ((retval = sctp_load_addresses_from_init(stcb, m,
- (offset + sizeof(struct sctp_init_chunk)), initack_limit, sh,
- NULL))) {
- /* Huh, we should abort */
+ (offset + sizeof(struct sctp_init_chunk)), initack_limit,
+ src, dst, NULL))) {
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ "Problem with address parameters");
SCTPDBG(SCTP_DEBUG_INPUT1,
"Load addresses from INIT causes an abort %d\n",
retval);
- sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh, NULL,
- use_mflowid, mflowid,
+ sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
+ src, dst, sh, op_err,
+ mflowtype, mflowid,
vrf_id, net->port);
*abort_no_unlock = 1;
return (-1);
}
/* if the peer doesn't support asconf, flush the asconf queue */
- if (asoc->peer_supports_asconf == 0) {
+ if (asoc->asconf_supported == 0) {
struct sctp_asconf_addr *param, *nparam;
TAILQ_FOREACH_SAFE(param, &asoc->asconf_queue, next, nparam) {
@@ -501,12 +519,11 @@
* primary.
*/
sctp_timer_stop(SCTP_TIMER_TYPE_INIT, stcb->sctp_ep, stcb,
- asoc->primary_destination, SCTP_FROM_SCTP_INPUT + SCTP_LOC_4);
+ asoc->primary_destination, SCTP_FROM_SCTP_INPUT + SCTP_LOC_3);
/* calculate the RTO */
net->RTO = sctp_calculate_rto(stcb, asoc, net, &asoc->time_entered, sctp_align_safe_nocopy,
SCTP_RTT_FROM_NON_DATA);
-
retval = sctp_send_cookie_echo(m, offset, stcb, net);
if (retval < 0) {
/*
@@ -515,30 +532,25 @@
* abandon the peer, its broke.
*/
if (retval == -3) {
+ uint16_t len;
+
+ len = (uint16_t) (sizeof(struct sctp_error_missing_param) + sizeof(uint16_t));
/* We abort with an error of missing mandatory param */
- op_err =
- sctp_generate_invmanparam(SCTP_CAUSE_MISSING_PARAM);
- if (op_err) {
- /*
- * Expand beyond to include the mandatory
- * param cookie
- */
- struct sctp_inv_mandatory_param *mp;
+ op_err = sctp_get_mbuf_for_msg(len, 0, M_NOWAIT, 1, MT_DATA);
+ if (op_err != NULL) {
+ struct sctp_error_missing_param *cause;
- SCTP_BUF_LEN(op_err) =
- sizeof(struct sctp_inv_mandatory_param);
- mp = mtod(op_err,
- struct sctp_inv_mandatory_param *);
+ SCTP_BUF_LEN(op_err) = len;
+ cause = mtod(op_err, struct sctp_error_missing_param *);
/* Subtract the reserved param */
- mp->length =
- htons(sizeof(struct sctp_inv_mandatory_param) - 2);
- mp->num_param = htonl(1);
- mp->param = htons(SCTP_STATE_COOKIE);
- mp->resv = 0;
+ cause->cause.code = htons(SCTP_CAUSE_MISSING_PARAM);
+ cause->cause.length = htons(len);
+ cause->num_missing_params = htonl(1);
+ cause->type[0] = htons(SCTP_STATE_COOKIE);
}
sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
- sh, op_err,
- use_mflowid, mflowid,
+ src, dst, sh, op_err,
+ mflowtype, mflowid,
vrf_id, net->port);
*abort_no_unlock = 1;
}
@@ -551,21 +563,12 @@
sctp_handle_heartbeat_ack(struct sctp_heartbeat_chunk *cp,
struct sctp_tcb *stcb, struct sctp_nets *net)
{
- struct sockaddr_storage store;
+ union sctp_sockstore store;
struct sctp_nets *r_net, *f_net;
struct timeval tv;
int req_prim = 0;
uint16_t old_error_counter;
-#ifdef INET
- struct sockaddr_in *sin;
-
-#endif
-#ifdef INET6
- struct sockaddr_in6 *sin6;
-
-#endif
-
if (ntohs(cp->ch.chunk_length) != sizeof(struct sctp_heartbeat_chunk)) {
/* Invalid length */
return;
@@ -575,12 +578,11 @@
#ifdef INET
case AF_INET:
if (cp->heartbeat.hb_info.addr_len == sizeof(struct sockaddr_in)) {
- sin = (struct sockaddr_in *)&store;
- sin->sin_family = cp->heartbeat.hb_info.addr_family;
- sin->sin_len = cp->heartbeat.hb_info.addr_len;
- sin->sin_port = stcb->rport;
- memcpy(&sin->sin_addr, cp->heartbeat.hb_info.address,
- sizeof(sin->sin_addr));
+ store.sin.sin_family = cp->heartbeat.hb_info.addr_family;
+ store.sin.sin_len = cp->heartbeat.hb_info.addr_len;
+ store.sin.sin_port = stcb->rport;
+ memcpy(&store.sin.sin_addr, cp->heartbeat.hb_info.address,
+ sizeof(store.sin.sin_addr));
} else {
return;
}
@@ -589,12 +591,10 @@
#ifdef INET6
case AF_INET6:
if (cp->heartbeat.hb_info.addr_len == sizeof(struct sockaddr_in6)) {
- sin6 = (struct sockaddr_in6 *)&store;
- sin6->sin6_family = cp->heartbeat.hb_info.addr_family;
- sin6->sin6_len = cp->heartbeat.hb_info.addr_len;
- sin6->sin6_port = stcb->rport;
- memcpy(&sin6->sin6_addr, cp->heartbeat.hb_info.address,
- sizeof(sin6->sin6_addr));
+ store.sin6.sin6_family = cp->heartbeat.hb_info.addr_family;
+ store.sin6.sin6_len = cp->heartbeat.hb_info.addr_len;
+ store.sin6.sin6_port = stcb->rport;
+ memcpy(&store.sin6.sin6_addr, cp->heartbeat.hb_info.address, sizeof(struct in6_addr));
} else {
return;
}
@@ -603,7 +603,7 @@
default:
return;
}
- r_net = sctp_findnet(stcb, (struct sockaddr *)&store);
+ r_net = sctp_findnet(stcb, &store.sa);
if (r_net == NULL) {
SCTPDBG(SCTP_DEBUG_INPUT1, "Huh? I can't find the address I sent it to, discard\n");
return;
@@ -634,7 +634,8 @@
}
sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED,
stcb, 0, (void *)r_net, SCTP_SO_NOT_LOCKED);
- sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, r_net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_3);
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb,
+ r_net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_4);
sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, r_net);
}
old_error_counter = r_net->error_count;
@@ -655,7 +656,8 @@
stcb->asoc.cc_functions.sctp_cwnd_update_exit_pf(stcb, net);
}
if (old_error_counter > 0) {
- sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, r_net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_3);
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep,
+ stcb, r_net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_5);
sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, r_net);
}
if (r_net == stcb->asoc.primary_destination) {
@@ -674,7 +676,9 @@
sctp_is_mobility_feature_on(stcb->sctp_ep,
SCTP_MOBILITY_PRIM_DELETED)) {
- sctp_timer_stop(SCTP_TIMER_TYPE_PRIM_DELETED, stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_TIMER + SCTP_LOC_7);
+ sctp_timer_stop(SCTP_TIMER_TYPE_PRIM_DELETED,
+ stcb->sctp_ep, stcb, NULL,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_6);
if (sctp_is_mobility_feature_on(stcb->sctp_ep,
SCTP_MOBILITY_FASTHANDOFF)) {
sctp_assoc_immediate_retrans(stcb,
@@ -745,7 +749,7 @@
* return 0 means we want you to proceed with the abort non-zero
* means no abort processing
*/
- if (stcb->asoc.peer_supports_auth == 0) {
+ if (stcb->asoc.auth_supported == 0) {
SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_nat_missing_state: Peer does not support AUTH, cannot send an asconf\n");
return (0);
}
@@ -758,7 +762,7 @@
sctp_handle_abort(struct sctp_abort_chunk *abort,
struct sctp_tcb *stcb, struct sctp_nets *net)
{
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
@@ -775,10 +779,10 @@
* Need to check the cause codes for our two magic nat
* aborts which don't kill the assoc necessarily.
*/
- struct sctp_missing_nat_state *natc;
+ struct sctp_gen_error_cause *cause;
- natc = (struct sctp_missing_nat_state *)(abort + 1);
- error = ntohs(natc->cause);
+ cause = (struct sctp_gen_error_cause *)(abort + 1);
+ error = ntohs(cause->code);
if (error == SCTP_CAUSE_NAT_COLLIDING_STATE) {
SCTPDBG(SCTP_DEBUG_INPUT2, "Received Colliding state abort flags:%x\n",
abort->ch.chunk_flags);
@@ -796,7 +800,8 @@
error = 0;
}
/* stop any receive timers */
- sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_6);
+ sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_7);
/* notify user of the abort and clean up... */
sctp_abort_notification(stcb, 1, error, abort, SCTP_SO_NOT_LOCKED);
/* free the tcb */
@@ -808,7 +813,7 @@
#ifdef SCTP_ASOCLOG_OF_TSNS
sctp_print_out_track_log(stcb);
#endif
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(stcb->sctp_ep);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -818,8 +823,8 @@
#endif
stcb->asoc.state |= SCTP_STATE_WAS_ABORTED;
(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
- SCTP_FROM_SCTP_INPUT + SCTP_LOC_6);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_8);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_abort: finished\n");
@@ -860,8 +865,9 @@
{
struct sctp_association *asoc;
int some_on_streamwheel;
+ int old_state;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
@@ -878,12 +884,12 @@
if (ntohs(cp->ch.chunk_length) != sizeof(struct sctp_shutdown_chunk)) {
/* Shutdown NOT the expected size */
return;
- } else {
- sctp_update_acked(stcb, cp, abort_flag);
- if (*abort_flag) {
- return;
- }
}
+ old_state = SCTP_GET_STATE(asoc);
+ sctp_update_acked(stcb, cp, abort_flag);
+ if (*abort_flag) {
+ return;
+ }
if (asoc->control_pdapi) {
/*
* With a normal shutdown we assume the end of last record.
@@ -893,7 +899,7 @@
asoc->control_pdapi->pdapi_aborted = 1;
asoc->control_pdapi = NULL;
SCTP_INP_READ_UNLOCK(stcb->sctp_ep);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(stcb->sctp_ep);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -906,8 +912,10 @@
return;
}
#endif
- sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ if (stcb->sctp_socket) {
+ sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
+ }
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
}
@@ -933,7 +941,8 @@
* stop the shutdown timer, since we WILL move to
* SHUTDOWN-ACK-SENT.
*/
- sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_8);
+ sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb,
+ net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_9);
}
/* Now is there unsent data on a stream somewhere? */
some_on_streamwheel = sctp_is_there_unsent_data(stcb, SCTP_SO_NOT_LOCKED);
@@ -946,17 +955,21 @@
} else {
/* no outstanding data to send, so move on... */
/* send SHUTDOWN-ACK */
- sctp_send_shutdown_ack(stcb, net);
/* move to SHUTDOWN-ACK-SENT state */
if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
(SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT);
SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
- sctp_stop_timers_for_shutdown(stcb);
- sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK, stcb->sctp_ep,
- stcb, net);
+ if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT) {
+ SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT);
+ sctp_stop_timers_for_shutdown(stcb);
+ sctp_send_shutdown_ack(stcb, net);
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK,
+ stcb->sctp_ep, stcb, net);
+ } else if (old_state == SCTP_STATE_SHUTDOWN_ACK_SENT) {
+ sctp_send_shutdown_ack(stcb, net);
+ }
}
}
@@ -967,7 +980,7 @@
{
struct sctp_association *asoc;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
so = SCTP_INP_SO(stcb->sctp_ep);
@@ -1001,7 +1014,7 @@
asoc->control_pdapi->pdapi_aborted = 1;
asoc->control_pdapi = NULL;
SCTP_INP_READ_UNLOCK(stcb->sctp_ep);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
SCTP_SOCKET_LOCK(so, 1);
@@ -1014,18 +1027,20 @@
}
#endif
sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
}
- /* are the queues empty? */
+#ifdef INVARIANTS
if (!TAILQ_EMPTY(&asoc->send_queue) ||
!TAILQ_EMPTY(&asoc->sent_queue) ||
!stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc)) {
- sctp_report_all_outbound(stcb, 0, 0, SCTP_SO_NOT_LOCKED);
+ panic("Queues are not empty when handling SHUTDOWN-ACK");
}
+#endif
/* stop the timer */
- sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_9);
+ sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_10);
/* send SHUTDOWN-COMPLETE */
sctp_send_shutdown_complete(stcb, net, 0);
/* notify upper layer protocol */
@@ -1038,7 +1053,7 @@
}
SCTP_STAT_INCR_COUNTER32(sctps_shutdown);
/* free the TCB but first save off the ep */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
SCTP_SOCKET_LOCK(so, 1);
@@ -1046,8 +1061,8 @@
atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
- SCTP_FROM_SCTP_INPUT + SCTP_LOC_10);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_11);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
}
@@ -1070,7 +1085,7 @@
sctp_asconf_cleanup(stcb, net);
break;
case SCTP_FORWARD_CUM_TSN:
- stcb->asoc.peer_supports_prsctp = 0;
+ stcb->asoc.prsctp_supported = 0;
break;
default:
SCTPDBG(SCTP_DEBUG_INPUT2,
@@ -1084,6 +1099,7 @@
* Skip past the param header and then we will find the param that caused the
* problem. There are a number of param's in a ASCONF OR the prsctp param
* these will turn of specific features.
+ * XXX: Is this the right thing to do?
*/
static void
sctp_process_unrecog_param(struct sctp_tcb *stcb, struct sctp_paramhdr *phdr)
@@ -1094,7 +1110,7 @@
switch (ntohs(pbad->param_type)) {
/* pr-sctp draft */
case SCTP_PRSCTP_SUPPORTED:
- stcb->asoc.peer_supports_prsctp = 0;
+ stcb->asoc.prsctp_supported = 0;
break;
case SCTP_SUPPORTED_CHUNK_EXT:
break;
@@ -1105,7 +1121,7 @@
case SCTP_ADD_IP_ADDRESS:
case SCTP_DEL_IP_ADDRESS:
case SCTP_SET_PRIM_ADDR:
- stcb->asoc.peer_supports_asconf = 0;
+ stcb->asoc.asconf_supported = 0;
break;
case SCTP_SUCCESS_REPORT:
case SCTP_ERROR_CAUSE_IND:
@@ -1112,7 +1128,7 @@
SCTPDBG(SCTP_DEBUG_INPUT2, "Huh, the peer does not support success? or error cause?\n");
SCTPDBG(SCTP_DEBUG_INPUT2,
"Turning off ASCONF to this strange peer\n");
- stcb->asoc.peer_supports_asconf = 0;
+ stcb->asoc.asconf_supported = 0;
break;
default:
SCTPDBG(SCTP_DEBUG_INPUT2,
@@ -1133,7 +1149,7 @@
struct sctp_association *asoc;
int adjust;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
@@ -1196,7 +1212,7 @@
asoc->max_init_times) {
sctp_abort_notification(stcb, 0, 0, NULL, SCTP_SO_NOT_LOCKED);
/* now free the asoc */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(stcb->sctp_ep);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -1205,8 +1221,8 @@
atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
- SCTP_FROM_SCTP_INPUT + SCTP_LOC_11);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_12);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
return (-1);
@@ -1279,10 +1295,11 @@
}
static int
-sctp_handle_init_ack(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh,
+sctp_handle_init_ack(struct mbuf *m, int iphlen, int offset,
+ struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh,
struct sctp_init_ack_chunk *cp, struct sctp_tcb *stcb,
struct sctp_nets *net, int *abort_no_unlock,
- uint8_t use_mflowid, uint32_t mflowid,
+ uint8_t mflowtype, uint32_t mflowid,
uint32_t vrf_id)
{
struct sctp_init_ack *init_ack;
@@ -1298,10 +1315,10 @@
}
if (ntohs(cp->ch.chunk_length) < sizeof(struct sctp_init_ack_chunk)) {
/* Invalid length */
- op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
- sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
- op_err,
- use_mflowid, mflowid,
+ op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
+ sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
+ src, dst, sh, op_err,
+ mflowtype, mflowid,
vrf_id, net->port);
*abort_no_unlock = 1;
return (-1);
@@ -1310,10 +1327,10 @@
/* validate parameters */
if (init_ack->initiate_tag == 0) {
/* protocol error... send an abort */
- op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
- sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
- op_err,
- use_mflowid, mflowid,
+ op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
+ sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
+ src, dst, sh, op_err,
+ mflowtype, mflowid,
vrf_id, net->port);
*abort_no_unlock = 1;
return (-1);
@@ -1320,10 +1337,10 @@
}
if (ntohl(init_ack->a_rwnd) < SCTP_MIN_RWND) {
/* protocol error... send an abort */
- op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
- sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
- op_err,
- use_mflowid, mflowid,
+ op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
+ sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
+ src, dst, sh, op_err,
+ mflowtype, mflowid,
vrf_id, net->port);
*abort_no_unlock = 1;
return (-1);
@@ -1330,10 +1347,10 @@
}
if (init_ack->num_inbound_streams == 0) {
/* protocol error... send an abort */
- op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
- sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
- op_err,
- use_mflowid, mflowid,
+ op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
+ sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
+ src, dst, sh, op_err,
+ mflowtype, mflowid,
vrf_id, net->port);
*abort_no_unlock = 1;
return (-1);
@@ -1340,10 +1357,10 @@
}
if (init_ack->num_outbound_streams == 0) {
/* protocol error... send an abort */
- op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
- sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
- op_err,
- use_mflowid, mflowid,
+ op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
+ sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
+ src, dst, sh, op_err,
+ mflowtype, mflowid,
vrf_id, net->port);
*abort_no_unlock = 1;
return (-1);
@@ -1366,9 +1383,9 @@
sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED,
stcb, 0, (void *)stcb->asoc.primary_destination, SCTP_SO_NOT_LOCKED);
}
- if (sctp_process_init_ack(m, iphlen, offset, sh, cp, stcb,
+ if (sctp_process_init_ack(m, iphlen, offset, src, dst, sh, cp, stcb,
net, abort_no_unlock,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
vrf_id) < 0) {
/* error in parsing parameters */
return (-1);
@@ -1420,11 +1437,12 @@
static struct sctp_tcb *
sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
+ struct sockaddr *src, struct sockaddr *dst,
struct sctphdr *sh, struct sctp_state_cookie *cookie, int cookie_len,
struct sctp_inpcb *inp, struct sctp_nets **netp,
struct sockaddr *init_src, int *notification,
int auth_skipped, uint32_t auth_offset, uint32_t auth_len,
- uint8_t use_mflowid, uint32_t mflowid,
+ uint8_t mflowtype, uint32_t mflowid,
uint32_t vrf_id, uint16_t port);
@@ -1436,11 +1454,12 @@
*/
static struct sctp_tcb *
sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
+ struct sockaddr *src, struct sockaddr *dst,
struct sctphdr *sh, struct sctp_state_cookie *cookie, int cookie_len,
struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets **netp,
struct sockaddr *init_src, int *notification,
int auth_skipped, uint32_t auth_offset, uint32_t auth_len,
- uint8_t use_mflowid, uint32_t mflowid,
+ uint8_t mflowtype, uint32_t mflowid,
uint32_t vrf_id, uint16_t port)
{
struct sctp_association *asoc;
@@ -1448,12 +1467,16 @@
struct sctp_init_ack_chunk *initack_cp, initack_buf;
struct sctp_nets *net;
struct mbuf *op_err;
- struct sctp_paramhdr *ph;
int init_offset, initack_offset, i;
int retval;
int spec_flag = 0;
uint32_t how_indx;
+#if defined(SCTP_DETAILED_STR_STATS)
+ int j;
+
+#endif
+
net = *netp;
/* I know that the TCB is non-NULL from the caller */
asoc = &stcb->asoc;
@@ -1467,19 +1490,9 @@
if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) {
/* SHUTDOWN came in after sending INIT-ACK */
sctp_send_shutdown_ack(stcb, stcb->asoc.primary_destination);
- op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (op_err == NULL) {
- /* FOOBAR */
- return (NULL);
- }
- /* Set the len */
- SCTP_BUF_LEN(op_err) = sizeof(struct sctp_paramhdr);
- ph = mtod(op_err, struct sctp_paramhdr *);
- ph->param_type = htons(SCTP_CAUSE_COOKIE_IN_SHUTDOWN);
- ph->param_length = htons(sizeof(struct sctp_paramhdr));
- sctp_send_operr_to(m, sh, cookie->peers_vtag, op_err,
- use_mflowid, mflowid,
+ op_err = sctp_generate_cause(SCTP_CAUSE_COOKIE_IN_SHUTDOWN, "");
+ sctp_send_operr_to(src, dst, sh, cookie->peers_vtag, op_err,
+ mflowtype, mflowid, inp->fibnum,
vrf_id, net->port);
if (how_indx < sizeof(asoc->cookie_how))
asoc->cookie_how[how_indx] = 2;
@@ -1543,8 +1556,7 @@
return (NULL);
}
- switch SCTP_GET_STATE
- (asoc) {
+ switch (SCTP_GET_STATE(asoc)) {
case SCTP_STATE_COOKIE_WAIT:
case SCTP_STATE_COOKIE_ECHOED:
/*
@@ -1561,9 +1573,12 @@
return (NULL);
}
/* we have already processed the INIT so no problem */
- sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb,
- net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_12);
- sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_13);
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp,
+ stcb, net,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_13);
+ sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp,
+ stcb, net,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_14);
/* update current state */
if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)
SCTP_STAT_INCR_COUNTER32(sctps_activeestab);
@@ -1581,7 +1596,7 @@
(stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
(inp->sctp_socket->so_qlimit == 0)
) {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
@@ -1593,7 +1608,7 @@
*/
stcb->sctp_ep->sctp_flags |=
SCTP_PCB_FLAGS_CONNECTED;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(stcb->sctp_ep);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -1606,7 +1621,7 @@
}
#endif
soisconnected(stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
}
@@ -1634,7 +1649,7 @@
* have simply lost the COOKIE-ACK
*/
break;
- } /* end switch */
+ } /* end switch */
sctp_stop_all_cookie_timers(stcb);
/*
* We ignore the return code here.. not sure if we should
@@ -1643,7 +1658,7 @@
*/
if (sctp_load_addresses_from_init(stcb, m,
init_offset + sizeof(struct sctp_init_chunk),
- initack_offset, sh, init_src)) {
+ initack_offset, src, dst, init_src)) {
if (how_indx < sizeof(asoc->cookie_how))
asoc->cookie_how[how_indx] = 4;
return (NULL);
@@ -1685,27 +1700,9 @@
* Now we have colliding state. We must send an abort here
* with colliding state indication.
*/
- op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (op_err == NULL) {
- /* FOOBAR */
- return (NULL);
- }
- /* pre-reserve some space */
-#ifdef INET6
- SCTP_BUF_RESV_UF(op_err, sizeof(struct ip6_hdr));
-#else
- SCTP_BUF_RESV_UF(op_err, sizeof(struct ip));
-#endif
- SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr));
- SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
- /* Set the len */
- SCTP_BUF_LEN(op_err) = sizeof(struct sctp_paramhdr);
- ph = mtod(op_err, struct sctp_paramhdr *);
- ph->param_type = htons(SCTP_CAUSE_NAT_COLLIDING_STATE);
- ph->param_length = htons(sizeof(struct sctp_paramhdr));
- sctp_send_abort(m, iphlen, sh, 0, op_err,
- use_mflowid, mflowid,
+ op_err = sctp_generate_cause(SCTP_CAUSE_NAT_COLLIDING_STATE, "");
+ sctp_send_abort(m, iphlen, src, dst, sh, 0, op_err,
+ mflowtype, mflowid, inp->fibnum,
vrf_id, port);
return (NULL);
}
@@ -1741,7 +1738,8 @@
}
if (how_indx < sizeof(asoc->cookie_how))
asoc->cookie_how[how_indx] = 8;
- sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_14);
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_15);
sctp_stop_all_cookie_timers(stcb);
/*
* since we did not send a HB make sure we don't double
@@ -1787,7 +1785,7 @@
}
if (sctp_load_addresses_from_init(stcb, m,
init_offset + sizeof(struct sctp_init_chunk),
- initack_offset, sh, init_src)) {
+ initack_offset, src, dst, init_src)) {
if (how_indx < sizeof(asoc->cookie_how))
asoc->cookie_how[how_indx] = 10;
return (NULL);
@@ -1799,13 +1797,13 @@
if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
(stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
(inp->sctp_socket->so_qlimit == 0)) {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
stcb->sctp_ep->sctp_flags |=
SCTP_PCB_FLAGS_CONNECTED;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(stcb->sctp_ep);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -1818,7 +1816,7 @@
}
#endif
soisconnected(stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
}
@@ -1862,16 +1860,22 @@
cookie->tie_tag_peer_vtag != 0) {
struct sctpasochead *head;
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *so;
+
+#endif
+
if (asoc->peer_supports_nat) {
/*
- * This is a gross gross hack. just call the
+ * This is a gross gross hack. Just call the
* cookie_new code since we are allowing a duplicate
* association. I hope this works...
*/
- return (sctp_process_cookie_new(m, iphlen, offset, sh, cookie, cookie_len,
+ return (sctp_process_cookie_new(m, iphlen, offset, src, dst,
+ sh, cookie, cookie_len,
inp, netp, init_src, notification,
auth_skipped, auth_offset, auth_len,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
vrf_id, port));
}
/*
@@ -1880,8 +1884,10 @@
/* temp code */
if (how_indx < sizeof(asoc->cookie_how))
asoc->cookie_how[how_indx] = 12;
- sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_15);
- sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_16);
+ sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, net,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_16);
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_17);
/* notify upper layer */
*notification = SCTP_NOTIFY_ASSOC_RESTART;
@@ -1925,6 +1931,10 @@
asoc->mapping_array_size);
}
SCTP_TCB_UNLOCK(stcb);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ so = SCTP_INP_SO(stcb->sctp_ep);
+ SCTP_SOCKET_LOCK(so, 1);
+#endif
SCTP_INP_INFO_WLOCK();
SCTP_INP_WLOCK(stcb->sctp_ep);
SCTP_TCB_LOCK(stcb);
@@ -1932,10 +1942,20 @@
/* send up all the data */
SCTP_TCB_SEND_LOCK(stcb);
- sctp_report_all_outbound(stcb, 0, 1, SCTP_SO_NOT_LOCKED);
+ sctp_report_all_outbound(stcb, 0, 1, SCTP_SO_LOCKED);
for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+ stcb->asoc.strmout[i].chunks_on_queues = 0;
+#if defined(SCTP_DETAILED_STR_STATS)
+ for (j = 0; j < SCTP_PR_SCTP_MAX + 1; j++) {
+ asoc->strmout[i].abandoned_sent[j] = 0;
+ asoc->strmout[i].abandoned_unsent[j] = 0;
+ }
+#else
+ asoc->strmout[i].abandoned_sent[0] = 0;
+ asoc->strmout[i].abandoned_unsent[0] = 0;
+#endif
stcb->asoc.strmout[i].stream_no = i;
- stcb->asoc.strmout[i].next_sequence_sent = 0;
+ stcb->asoc.strmout[i].next_sequence_send = 0;
stcb->asoc.strmout[i].last_msg_incomplete = 0;
}
/* process the INIT-ACK info (my info) */
@@ -1953,11 +1973,15 @@
*/
LIST_INSERT_HEAD(head, stcb, sctp_asocs);
- /* process the INIT info (peer's info) */
SCTP_TCB_SEND_UNLOCK(stcb);
SCTP_INP_WUNLOCK(stcb->sctp_ep);
SCTP_INP_INFO_WUNLOCK();
-
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ asoc->total_flight = 0;
+ asoc->total_flight_count = 0;
+ /* process the INIT info (peer's info) */
retval = sctp_process_init(init_cp, stcb);
if (retval < 0) {
if (how_indx < sizeof(asoc->cookie_how))
@@ -1973,7 +1997,7 @@
if (sctp_load_addresses_from_init(stcb, m,
init_offset + sizeof(struct sctp_init_chunk),
- initack_offset, sh, init_src)) {
+ initack_offset, src, dst, init_src)) {
if (how_indx < sizeof(asoc->cookie_how))
asoc->cookie_how[how_indx] = 14;
@@ -2004,18 +2028,18 @@
*/
static struct sctp_tcb *
sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
+ struct sockaddr *src, struct sockaddr *dst,
struct sctphdr *sh, struct sctp_state_cookie *cookie, int cookie_len,
struct sctp_inpcb *inp, struct sctp_nets **netp,
struct sockaddr *init_src, int *notification,
int auth_skipped, uint32_t auth_offset, uint32_t auth_len,
- uint8_t use_mflowid, uint32_t mflowid,
+ uint8_t mflowtype, uint32_t mflowid,
uint32_t vrf_id, uint16_t port)
{
struct sctp_tcb *stcb;
struct sctp_init_chunk *init_cp, init_buf;
struct sctp_init_ack_chunk *initack_cp, initack_buf;
- struct sockaddr_storage sa_store;
- struct sockaddr *initack_src = (struct sockaddr *)&sa_store;
+ union sctp_sockstore store;
struct sctp_association *asoc;
int init_offset, initack_offset, initack_limit;
int retval;
@@ -2022,15 +2046,7 @@
int error = 0;
uint8_t auth_chunk_buf[SCTP_PARAM_BUFFER_SIZE];
-#ifdef INET
- struct sockaddr_in *sin;
-
-#endif
-#ifdef INET6
- struct sockaddr_in6 *sin6;
-
-#endif
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
so = SCTP_INP_SO(inp);
@@ -2092,6 +2108,7 @@
*/
stcb = sctp_aloc_assoc(inp, init_src, &error,
ntohl(initack_cp->init.initiate_tag), vrf_id,
+ ntohs(initack_cp->init.num_outbound_streams),
(struct thread *)NULL
);
if (stcb == NULL) {
@@ -2100,11 +2117,10 @@
/* memory problem? */
SCTPDBG(SCTP_DEBUG_INPUT1,
"process_cookie_new: no room for another TCB!\n");
- op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
-
+ op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, "");
sctp_abort_association(inp, (struct sctp_tcb *)NULL, m, iphlen,
- sh, op_err,
- use_mflowid, mflowid,
+ src, dst, sh, op_err,
+ mflowtype, mflowid,
vrf_id, port);
return (NULL);
}
@@ -2114,13 +2130,13 @@
asoc = &stcb->asoc;
/* get scope variables out of cookie */
- asoc->ipv4_local_scope = cookie->ipv4_scope;
- asoc->site_scope = cookie->site_scope;
- asoc->local_scope = cookie->local_scope;
- asoc->loopback_scope = cookie->loopback_scope;
+ asoc->scope.ipv4_local_scope = cookie->ipv4_scope;
+ asoc->scope.site_scope = cookie->site_scope;
+ asoc->scope.local_scope = cookie->local_scope;
+ asoc->scope.loopback_scope = cookie->loopback_scope;
- if ((asoc->ipv4_addr_legal != cookie->ipv4_addr_legal) ||
- (asoc->ipv6_addr_legal != cookie->ipv6_addr_legal)) {
+ if ((asoc->scope.ipv4_addr_legal != cookie->ipv4_addr_legal) ||
+ (asoc->scope.ipv6_addr_legal != cookie->ipv6_addr_legal)) {
struct mbuf *op_err;
/*
@@ -2129,19 +2145,19 @@
* association.
*/
atomic_add_int(&stcb->asoc.refcnt, 1);
- op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
+ op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, "");
sctp_abort_association(inp, (struct sctp_tcb *)NULL, m, iphlen,
- sh, op_err,
- use_mflowid, mflowid,
+ src, dst, sh, op_err,
+ mflowtype, mflowid,
vrf_id, port);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_TCB_UNLOCK(stcb);
SCTP_SOCKET_LOCK(so, 1);
SCTP_TCB_LOCK(stcb);
#endif
(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
- SCTP_FROM_SCTP_INPUT + SCTP_LOC_16);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_18);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
atomic_subtract_int(&stcb->asoc.refcnt, 1);
@@ -2166,13 +2182,14 @@
retval = 0;
if (retval < 0) {
atomic_add_int(&stcb->asoc.refcnt, 1);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_TCB_UNLOCK(stcb);
SCTP_SOCKET_LOCK(so, 1);
SCTP_TCB_LOCK(stcb);
#endif
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_16);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_19);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
atomic_subtract_int(&stcb->asoc.refcnt, 1);
@@ -2180,16 +2197,17 @@
}
/* load all addresses */
if (sctp_load_addresses_from_init(stcb, m,
- init_offset + sizeof(struct sctp_init_chunk), initack_offset, sh,
- init_src)) {
+ init_offset + sizeof(struct sctp_init_chunk), initack_offset,
+ src, dst, init_src)) {
atomic_add_int(&stcb->asoc.refcnt, 1);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_TCB_UNLOCK(stcb);
SCTP_SOCKET_LOCK(so, 1);
SCTP_TCB_LOCK(stcb);
#endif
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_17);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_20);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
atomic_subtract_int(&stcb->asoc.refcnt, 1);
@@ -2212,13 +2230,14 @@
SCTPDBG(SCTP_DEBUG_AUTH1,
"COOKIE-ECHO: AUTH failed\n");
atomic_add_int(&stcb->asoc.refcnt, 1);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_TCB_UNLOCK(stcb);
SCTP_SOCKET_LOCK(so, 1);
SCTP_TCB_LOCK(stcb);
#endif
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_18);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_21);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
atomic_subtract_int(&stcb->asoc.refcnt, 1);
@@ -2254,34 +2273,32 @@
#ifdef INET
case SCTP_IPV4_ADDRESS:
/* source addr is IPv4 */
- sin = (struct sockaddr_in *)initack_src;
- memset(sin, 0, sizeof(*sin));
- sin->sin_family = AF_INET;
- sin->sin_len = sizeof(struct sockaddr_in);
- sin->sin_addr.s_addr = cookie->laddress[0];
+ memset(&store.sin, 0, sizeof(struct sockaddr_in));
+ store.sin.sin_family = AF_INET;
+ store.sin.sin_len = sizeof(struct sockaddr_in);
+ store.sin.sin_addr.s_addr = cookie->laddress[0];
break;
#endif
#ifdef INET6
case SCTP_IPV6_ADDRESS:
/* source addr is IPv6 */
- sin6 = (struct sockaddr_in6 *)initack_src;
- memset(sin6, 0, sizeof(*sin6));
- sin6->sin6_family = AF_INET6;
- sin6->sin6_len = sizeof(struct sockaddr_in6);
- sin6->sin6_scope_id = cookie->scope_id;
- memcpy(&sin6->sin6_addr, cookie->laddress,
- sizeof(sin6->sin6_addr));
+ memset(&store.sin6, 0, sizeof(struct sockaddr_in6));
+ store.sin6.sin6_family = AF_INET6;
+ store.sin6.sin6_len = sizeof(struct sockaddr_in6);
+ store.sin6.sin6_scope_id = cookie->scope_id;
+ memcpy(&store.sin6.sin6_addr, cookie->laddress, sizeof(struct in6_addr));
break;
#endif
default:
atomic_add_int(&stcb->asoc.refcnt, 1);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_TCB_UNLOCK(stcb);
SCTP_SOCKET_LOCK(so, 1);
SCTP_TCB_LOCK(stcb);
#endif
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_19);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_22);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
atomic_subtract_int(&stcb->asoc.refcnt, 1);
@@ -2302,7 +2319,7 @@
* a bit of protection is worth having..
*/
stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
SCTP_SOCKET_LOCK(so, 1);
@@ -2314,7 +2331,7 @@
}
#endif
soisconnected(stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
} else if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
@@ -2334,12 +2351,18 @@
sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE)) {
sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE, inp, stcb, NULL);
}
- /* calculate the RTT */
(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
- if ((netp) && (*netp)) {
+ if ((netp != NULL) && (*netp != NULL)) {
+ /* calculate the RTT and set the encaps port */
(*netp)->RTO = sctp_calculate_rto(stcb, asoc, *netp,
&cookie->time_entered, sctp_align_unsafe_makecopy,
SCTP_RTT_FROM_NON_DATA);
+#if defined(INET) || defined(INET6)
+ if (((*netp)->port == 0) && (port != 0)) {
+ sctp_pathmtu_adjustment(stcb, (*netp)->mtu - sizeof(struct udphdr));
+ }
+ (*netp)->port = port;
+#endif
}
/* respond with a COOKIE-ACK */
sctp_send_cookie_ack(stcb);
@@ -2351,7 +2374,7 @@
sctp_check_address_list(stcb, m,
initack_offset + sizeof(struct sctp_init_ack_chunk),
initack_limit - (initack_offset + sizeof(struct sctp_init_ack_chunk)),
- initack_src, cookie->local_scope, cookie->site_scope,
+ &store.sa, cookie->local_scope, cookie->site_scope,
cookie->ipv4_scope, cookie->loopback_scope);
@@ -2377,11 +2400,12 @@
*/
static struct mbuf *
sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
+ struct sockaddr *src, struct sockaddr *dst,
struct sctphdr *sh, struct sctp_cookie_echo_chunk *cp,
struct sctp_inpcb **inp_p, struct sctp_tcb **stcb, struct sctp_nets **netp,
int auth_skipped, uint32_t auth_offset, uint32_t auth_len,
struct sctp_tcb **locked_tcb,
- uint8_t use_mflowid, uint32_t mflowid,
+ uint8_t mflowtype, uint32_t mflowid,
uint32_t vrf_id, uint16_t port)
{
struct sctp_state_cookie *cookie;
@@ -2393,13 +2417,10 @@
uint8_t calc_sig[SCTP_SIGNATURE_SIZE], tmp_sig[SCTP_SIGNATURE_SIZE];
uint8_t *sig;
uint8_t cookie_ok = 0;
- unsigned int size_of_pkt, sig_offset, cookie_offset;
+ unsigned int sig_offset, cookie_offset;
unsigned int cookie_len;
struct timeval now;
struct timeval time_expires;
- struct sockaddr_storage dest_store;
- struct sockaddr *localep_sa = (struct sockaddr *)&dest_store;
- struct ip *iph;
int notification = 0;
struct sctp_nets *netl;
int had_a_existing_tcb = 0;
@@ -2420,53 +2441,12 @@
if (inp_p == NULL) {
return (NULL);
}
- /* First get the destination address setup too. */
- iph = mtod(m, struct ip *);
- switch (iph->ip_v) {
-#ifdef INET
- case IPVERSION:
- {
- /* its IPv4 */
- struct sockaddr_in *lsin;
-
- lsin = (struct sockaddr_in *)(localep_sa);
- memset(lsin, 0, sizeof(*lsin));
- lsin->sin_family = AF_INET;
- lsin->sin_len = sizeof(*lsin);
- lsin->sin_port = sh->dest_port;
- lsin->sin_addr.s_addr = iph->ip_dst.s_addr;
- size_of_pkt = SCTP_GET_IPV4_LENGTH(iph);
- break;
- }
-#endif
-#ifdef INET6
- case IPV6_VERSION >> 4:
- {
- /* its IPv6 */
- struct ip6_hdr *ip6;
- struct sockaddr_in6 *lsin6;
-
- lsin6 = (struct sockaddr_in6 *)(localep_sa);
- memset(lsin6, 0, sizeof(*lsin6));
- lsin6->sin6_family = AF_INET6;
- lsin6->sin6_len = sizeof(struct sockaddr_in6);
- ip6 = mtod(m, struct ip6_hdr *);
- lsin6->sin6_port = sh->dest_port;
- lsin6->sin6_addr = ip6->ip6_dst;
- size_of_pkt = SCTP_GET_IPV6_LENGTH(ip6) + iphlen;
- break;
- }
-#endif
- default:
- return (NULL);
- }
-
cookie = &cp->cookie;
cookie_offset = offset + sizeof(struct sctp_chunkhdr);
cookie_len = ntohs(cp->ch.chunk_length);
- if ((cookie->peerport != sh->src_port) &&
- (cookie->myport != sh->dest_port) &&
+ if ((cookie->peerport != sh->src_port) ||
+ (cookie->myport != sh->dest_port) ||
(cookie->my_vtag != sh->v_tag)) {
/*
* invalid ports or bad tag. Note that we always leave the
@@ -2477,11 +2457,10 @@
*/
return (NULL);
}
- if (cookie_len > size_of_pkt ||
- cookie_len < sizeof(struct sctp_cookie_echo_chunk) +
+ if (cookie_len < sizeof(struct sctp_cookie_echo_chunk) +
sizeof(struct sctp_init_chunk) +
sizeof(struct sctp_init_ack_chunk) + SCTP_SIGNATURE_SIZE) {
- /* cookie too long! or too small */
+ /* cookie too small */
return (NULL);
}
/*
@@ -2489,12 +2468,7 @@
* calculated in the sctp_hmac_m() call).
*/
sig_offset = offset + cookie_len - SCTP_SIGNATURE_SIZE;
- if (sig_offset > size_of_pkt) {
- /* packet not correct size! */
- /* XXX this may already be accounted for earlier... */
- return (NULL);
- }
- m_sig = m_split(m, sig_offset, M_DONTWAIT);
+ m_sig = m_split(m, sig_offset, M_NOWAIT);
if (m_sig == NULL) {
/* out of memory or ?? */
return (NULL);
@@ -2501,13 +2475,7 @@
}
#ifdef SCTP_MBUF_LOGGING
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- struct mbuf *mat;
-
- for (mat = m_sig; mat; mat = SCTP_BUF_NEXT(mat)) {
- if (SCTP_BUF_IS_EXTENDED(mat)) {
- sctp_log_mb(mat, SCTP_MBUF_SPLIT);
- }
- }
+ sctp_log_mbc(m_sig, SCTP_MBUF_SPLIT);
}
#endif
@@ -2596,20 +2564,20 @@
if (timevalcmp(&now, &time_expires, >)) {
/* cookie is stale! */
struct mbuf *op_err;
- struct sctp_stale_cookie_msg *scm;
+ struct sctp_error_stale_cookie *cause;
uint32_t tim;
- op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_stale_cookie_msg),
- 0, M_DONTWAIT, 1, MT_DATA);
+ op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_error_stale_cookie),
+ 0, M_NOWAIT, 1, MT_DATA);
if (op_err == NULL) {
/* FOOBAR */
return (NULL);
}
/* Set the len */
- SCTP_BUF_LEN(op_err) = sizeof(struct sctp_stale_cookie_msg);
- scm = mtod(op_err, struct sctp_stale_cookie_msg *);
- scm->ph.param_type = htons(SCTP_CAUSE_STALE_COOKIE);
- scm->ph.param_length = htons((sizeof(struct sctp_paramhdr) +
+ SCTP_BUF_LEN(op_err) = sizeof(struct sctp_error_stale_cookie);
+ cause = mtod(op_err, struct sctp_error_stale_cookie *);
+ cause->cause.code = htons(SCTP_CAUSE_STALE_COOKIE);
+ cause->cause.length = htons((sizeof(struct sctp_paramhdr) +
(sizeof(uint32_t))));
/* seconds to usec */
tim = (now.tv_sec - time_expires.tv_sec) * 1000000;
@@ -2616,9 +2584,9 @@
/* add in usec */
if (tim == 0)
tim = now.tv_usec - cookie->time_entered.tv_usec;
- scm->time_usec = htonl(tim);
- sctp_send_operr_to(m, sh, cookie->peers_vtag, op_err,
- use_mflowid, mflowid,
+ cause->stale_time = htonl(tim);
+ sctp_send_operr_to(src, dst, sh, cookie->peers_vtag, op_err,
+ mflowtype, mflowid, l_inp->fibnum,
vrf_id, port);
return (NULL);
}
@@ -2659,9 +2627,9 @@
/* This should not happen */
return (NULL);
}
- if ((*stcb == NULL) && to) {
+ if (*stcb == NULL) {
/* Yep, lets check */
- *stcb = sctp_findassociation_ep_addr(inp_p, to, netp, localep_sa, NULL);
+ *stcb = sctp_findassociation_ep_addr(inp_p, to, netp, dst, NULL);
if (*stcb == NULL) {
/*
* We should have only got back the same inp. If we
@@ -2693,29 +2661,28 @@
SCTP_INP_INCR_REF((*stcb)->sctp_ep);
if ((*stcb)->sctp_ep != l_inp) {
SCTP_PRINTF("Huh? ep:%p diff then l_inp:%p?\n",
- (*stcb)->sctp_ep, l_inp);
+ (void *)(*stcb)->sctp_ep, (void *)l_inp);
}
}
}
}
- if (to == NULL) {
- return (NULL);
- }
cookie_len -= SCTP_SIGNATURE_SIZE;
if (*stcb == NULL) {
/* this is the "normal" case... get a new TCB */
- *stcb = sctp_process_cookie_new(m, iphlen, offset, sh, cookie,
- cookie_len, *inp_p, netp, to, ¬ification,
+ *stcb = sctp_process_cookie_new(m, iphlen, offset, src, dst, sh,
+ cookie, cookie_len, *inp_p,
+ netp, to, ¬ification,
auth_skipped, auth_offset, auth_len,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
vrf_id, port);
} else {
/* this is abnormal... cookie-echo on existing TCB */
had_a_existing_tcb = 1;
- *stcb = sctp_process_cookie_existing(m, iphlen, offset, sh,
+ *stcb = sctp_process_cookie_existing(m, iphlen, offset,
+ src, dst, sh,
cookie, cookie_len, *inp_p, *stcb, netp, to,
¬ification, auth_skipped, auth_offset, auth_len,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
vrf_id, port);
}
@@ -2723,11 +2690,9 @@
/* still no TCB... must be bad cookie-echo */
return (NULL);
}
- if ((*netp != NULL) && (use_mflowid != 0)) {
+ if (*netp != NULL) {
+ (*netp)->flowtype = mflowtype;
(*netp)->flowid = mflowid;
-#ifdef INVARIANTS
- (*netp)->flowidset = 1;
-#endif
}
/*
* Ok, we built an association so confirm the address we sent the
@@ -2789,18 +2754,18 @@
if (so == NULL) {
struct mbuf *op_err;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *pcb_so;
#endif
/* Too many sockets */
SCTPDBG(SCTP_DEBUG_INPUT1, "process_cookie_new: no room for another socket!\n");
- op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
+ op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, "");
sctp_abort_association(*inp_p, NULL, m, iphlen,
- sh, op_err,
- use_mflowid, mflowid,
+ src, dst, sh, op_err,
+ mflowtype, mflowid,
vrf_id, port);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
pcb_so = SCTP_INP_SO(*inp_p);
atomic_add_int(&(*stcb)->asoc.refcnt, 1);
SCTP_TCB_UNLOCK((*stcb));
@@ -2808,8 +2773,9 @@
SCTP_TCB_LOCK((*stcb));
atomic_subtract_int(&(*stcb)->asoc.refcnt, 1);
#endif
- (void)sctp_free_assoc(*inp_p, *stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_20);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ (void)sctp_free_assoc(*inp_p, *stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_23);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(pcb_so, 1);
#endif
return (NULL);
@@ -2831,11 +2797,19 @@
inp->sctp_mobility_features = (*inp_p)->sctp_mobility_features;
inp->sctp_socket = so;
inp->sctp_frag_point = (*inp_p)->sctp_frag_point;
+ inp->max_cwnd = (*inp_p)->max_cwnd;
inp->sctp_cmt_on_off = (*inp_p)->sctp_cmt_on_off;
- inp->sctp_ecn_enable = (*inp_p)->sctp_ecn_enable;
+ inp->ecn_supported = (*inp_p)->ecn_supported;
+ inp->prsctp_supported = (*inp_p)->prsctp_supported;
+ inp->auth_supported = (*inp_p)->auth_supported;
+ inp->asconf_supported = (*inp_p)->asconf_supported;
+ inp->reconfig_supported = (*inp_p)->reconfig_supported;
+ inp->nrsack_supported = (*inp_p)->nrsack_supported;
+ inp->pktdrop_supported = (*inp_p)->pktdrop_supported;
inp->partial_delivery_point = (*inp_p)->partial_delivery_point;
inp->sctp_context = (*inp_p)->sctp_context;
inp->local_strreset_support = (*inp_p)->local_strreset_support;
+ inp->fibnum = (*inp_p)->fibnum;
inp->inp_starting_point_for_iterator = NULL;
/*
* copy in the authentication parameters from the
@@ -2899,13 +2873,13 @@
* Pull it from the incomplete queue and wake the
* guy
*/
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
atomic_add_int(&(*stcb)->asoc.refcnt, 1);
SCTP_TCB_UNLOCK((*stcb));
SCTP_SOCKET_LOCK(so, 1);
#endif
soisconnected(so);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_TCB_LOCK((*stcb));
atomic_subtract_int(&(*stcb)->asoc.refcnt, 1);
SCTP_SOCKET_UNLOCK(so, 1);
@@ -2932,9 +2906,9 @@
SCTPDBG(SCTP_DEBUG_INPUT2,
"sctp_handle_cookie_ack: handling COOKIE-ACK\n");
- if (stcb == NULL)
+ if ((stcb == NULL) || (net == NULL)) {
return;
-
+ }
asoc = &stcb->asoc;
sctp_stop_all_cookie_timers(stcb);
@@ -2961,12 +2935,12 @@
sctp_ulp_notify(SCTP_NOTIFY_ASSOC_UP, stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
(stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(stcb->sctp_ep);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -2977,7 +2951,7 @@
if ((stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) == 0) {
soisconnected(stcb->sctp_socket);
}
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
}
@@ -3009,7 +2983,7 @@
* in flight)
*/
if ((sctp_is_feature_on(stcb->sctp_ep, SCTP_PCB_FLAGS_DO_ASCONF)) &&
- (stcb->asoc.peer_supports_asconf) &&
+ (stcb->asoc.asconf_supported == 1) &&
(!TAILQ_EMPTY(&stcb->asoc.asconf_queue))) {
#ifdef SCTP_TIMER_BASED_ASCONF
sctp_timer_start(SCTP_TIMER_TYPE_ASCONF,
@@ -3170,7 +3144,6 @@
uint32_t cwr_tsn;
cwr_tsn = ntohl(cp->tsn);
-
override = cp->ch.chunk_flags & SCTP_CWR_REDUCE_OVERRIDE;
TAILQ_FOREACH(chk, &stcb->asoc.control_send_queue, sctp_next) {
if (chk->rec.chunk_id.id != SCTP_ECN_ECHO) {
@@ -3186,10 +3159,8 @@
stcb->asoc.ecn_echo_cnt_onq--;
TAILQ_REMOVE(&stcb->asoc.control_send_queue, chk,
sctp_next);
- if (chk->data) {
- sctp_m_freem(chk->data);
- chk->data = NULL;
- }
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
stcb->asoc.ctrl_queue_cnt--;
sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
if (override == 0) {
@@ -3205,7 +3176,7 @@
{
struct sctp_association *asoc;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
@@ -3227,20 +3198,22 @@
/* notify upper layer protocol */
if (stcb->sctp_socket) {
sctp_ulp_notify(SCTP_NOTIFY_ASSOC_DOWN, stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
- /* are the queues empty? they should be */
- if (!TAILQ_EMPTY(&asoc->send_queue) ||
- !TAILQ_EMPTY(&asoc->sent_queue) ||
- !stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc)) {
- sctp_report_all_outbound(stcb, 0, 0, SCTP_SO_NOT_LOCKED);
- }
}
+#ifdef INVARIANTS
+ if (!TAILQ_EMPTY(&asoc->send_queue) ||
+ !TAILQ_EMPTY(&asoc->sent_queue) ||
+ !stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc)) {
+ panic("Queues are not empty when handling SHUTDOWN-COMPLETE");
+ }
+#endif
/* stop the timer */
- sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWNACK, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_22);
+ sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWNACK, stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_24);
SCTP_STAT_INCR_COUNTER32(sctps_shutdown);
/* free the TCB */
SCTPDBG(SCTP_DEBUG_INPUT2,
"sctp_handle_shutdown_complete: calls free-asoc\n");
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(stcb->sctp_ep);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -3248,8 +3221,9 @@
SCTP_TCB_LOCK(stcb);
atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
- (void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_23);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ (void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_25);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
return;
@@ -3356,7 +3330,8 @@
/* restart the timer */
sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
- stcb, tp1->whoTo, SCTP_FROM_SCTP_INPUT + SCTP_LOC_24);
+ stcb, tp1->whoTo,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_26);
sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
stcb, tp1->whoTo);
@@ -3424,7 +3399,8 @@
* this, otherwise we let the timer fire.
*/
sctp_timer_stop(SCTP_TIMER_TYPE_INIT, stcb->sctp_ep,
- stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_25);
+ stcb, net,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_27);
sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
}
break;
@@ -3494,9 +3470,9 @@
}
void
-sctp_reset_in_stream(struct sctp_tcb *stcb, int number_entries, uint16_t * list)
+sctp_reset_in_stream(struct sctp_tcb *stcb, uint32_t number_entries, uint16_t * list)
{
- int i;
+ uint32_t i;
uint16_t temp;
/*
@@ -3522,36 +3498,57 @@
}
static void
-sctp_reset_out_streams(struct sctp_tcb *stcb, int number_entries, uint16_t * list)
+sctp_reset_out_streams(struct sctp_tcb *stcb, uint32_t number_entries, uint16_t * list)
{
- int i;
+ uint32_t i;
+ uint16_t temp;
- if (number_entries == 0) {
+ if (number_entries > 0) {
+ for (i = 0; i < number_entries; i++) {
+ temp = ntohs(list[i]);
+ if (temp >= stcb->asoc.streamoutcnt) {
+ /* no such stream */
+ continue;
+ }
+ stcb->asoc.strmout[temp].next_sequence_send = 0;
+ }
+ } else {
for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
- stcb->asoc.strmout[i].next_sequence_sent = 0;
+ stcb->asoc.strmout[i].next_sequence_send = 0;
}
- } else if (number_entries) {
+ }
+ sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_SEND, stcb, number_entries, (void *)list, SCTP_SO_NOT_LOCKED);
+}
+
+static void
+sctp_reset_clear_pending(struct sctp_tcb *stcb, uint32_t number_entries, uint16_t * list)
+{
+ uint32_t i;
+ uint16_t temp;
+
+ if (number_entries > 0) {
for (i = 0; i < number_entries; i++) {
- uint16_t temp;
-
temp = ntohs(list[i]);
if (temp >= stcb->asoc.streamoutcnt) {
/* no such stream */
continue;
}
- stcb->asoc.strmout[temp].next_sequence_sent = 0;
+ stcb->asoc.strmout[temp].state = SCTP_STREAM_OPEN;
}
+ } else {
+ for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+ stcb->asoc.strmout[i].state = SCTP_STREAM_OPEN;
+ }
}
- sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_SEND, stcb, number_entries, (void *)list, SCTP_SO_NOT_LOCKED);
}
-struct sctp_stream_reset_out_request *
+struct sctp_stream_reset_request *
sctp_find_stream_reset(struct sctp_tcb *stcb, uint32_t seq, struct sctp_tmit_chunk **bchk)
{
struct sctp_association *asoc;
- struct sctp_stream_reset_out_req *req;
- struct sctp_stream_reset_out_request *r;
+ struct sctp_chunkhdr *ch;
+ struct sctp_stream_reset_request *r;
struct sctp_tmit_chunk *chk;
int len, clen;
@@ -3573,8 +3570,8 @@
*bchk = chk;
}
clen = chk->send_size;
- req = mtod(chk->data, struct sctp_stream_reset_out_req *);
- r = &req->sr_req;
+ ch = mtod(chk->data, struct sctp_chunkhdr *);
+ r = (struct sctp_stream_reset_request *)(ch + 1);
if (ntohl(r->request_seq) == seq) {
/* found it */
return (r);
@@ -3582,7 +3579,7 @@
len = SCTP_SIZE32(ntohs(r->ph.param_length));
if (clen > (len + (int)sizeof(struct sctp_chunkhdr))) {
/* move to the next one, there can only be a max of two */
- r = (struct sctp_stream_reset_out_request *)((caddr_t)r + len);
+ r = (struct sctp_stream_reset_request *)((caddr_t)r + len);
if (ntohl(r->request_seq) == seq) {
return (r);
}
@@ -3602,7 +3599,8 @@
}
asoc = &stcb->asoc;
- sctp_timer_stop(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo, SCTP_FROM_SCTP_INPUT + SCTP_LOC_26);
+ sctp_timer_stop(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb,
+ chk->whoTo, SCTP_FROM_SCTP_INPUT + SCTP_LOC_28);
TAILQ_REMOVE(&asoc->control_send_queue,
chk,
sctp_next);
@@ -3626,8 +3624,10 @@
int lparm_len;
struct sctp_association *asoc = &stcb->asoc;
struct sctp_tmit_chunk *chk;
- struct sctp_stream_reset_out_request *srparam;
- int number_entries;
+ struct sctp_stream_reset_request *req_param;
+ struct sctp_stream_reset_out_request *req_out_param;
+ struct sctp_stream_reset_in_request *req_in_param;
+ uint32_t number_entries;
if (asoc->stream_reset_outstanding == 0) {
/* duplicate */
@@ -3634,12 +3634,15 @@
return (0);
}
if (seq == stcb->asoc.str_reset_seq_out) {
- srparam = sctp_find_stream_reset(stcb, seq, &chk);
- if (srparam) {
+ req_param = sctp_find_stream_reset(stcb, seq, &chk);
+ if (req_param != NULL) {
stcb->asoc.str_reset_seq_out++;
- type = ntohs(srparam->ph.param_type);
- lparm_len = ntohs(srparam->ph.param_length);
+ type = ntohs(req_param->ph.param_type);
+ lparm_len = ntohs(req_param->ph.param_length);
if (type == SCTP_STR_RESET_OUT_REQUEST) {
+ int no_clear = 0;
+
+ req_out_param = (struct sctp_stream_reset_out_request *)req_param;
number_entries = (lparm_len - sizeof(struct sctp_stream_reset_out_request)) / sizeof(uint16_t);
asoc->stream_reset_out_is_outstanding = 0;
if (asoc->stream_reset_outstanding)
@@ -3646,23 +3649,35 @@
asoc->stream_reset_outstanding--;
if (action == SCTP_STREAM_RESET_RESULT_PERFORMED) {
/* do it */
- sctp_reset_out_streams(stcb, number_entries, srparam->list_of_streams);
+ sctp_reset_out_streams(stcb, number_entries, req_out_param->list_of_streams);
} else if (action == SCTP_STREAM_RESET_RESULT_DENIED) {
- sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_DENIED_OUT, stcb, number_entries, srparam->list_of_streams, SCTP_SO_NOT_LOCKED);
+ sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_DENIED_OUT, stcb, number_entries, req_out_param->list_of_streams, SCTP_SO_NOT_LOCKED);
+ } else if (action == SCTP_STREAM_RESET_RESULT_IN_PROGRESS) {
+ /*
+ * Set it up so we don't stop
+ * retransmitting
+ */
+ asoc->stream_reset_outstanding++;
+ stcb->asoc.str_reset_seq_out--;
+ asoc->stream_reset_out_is_outstanding = 1;
+ no_clear = 1;
} else {
- sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_FAILED_OUT, stcb, number_entries, srparam->list_of_streams, SCTP_SO_NOT_LOCKED);
+ sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_FAILED_OUT, stcb, number_entries, req_out_param->list_of_streams, SCTP_SO_NOT_LOCKED);
}
+ if (no_clear == 0) {
+ sctp_reset_clear_pending(stcb, number_entries, req_out_param->list_of_streams);
+ }
} else if (type == SCTP_STR_RESET_IN_REQUEST) {
- /* Answered my request */
+ req_in_param = (struct sctp_stream_reset_in_request *)req_param;
number_entries = (lparm_len - sizeof(struct sctp_stream_reset_in_request)) / sizeof(uint16_t);
if (asoc->stream_reset_outstanding)
asoc->stream_reset_outstanding--;
if (action == SCTP_STREAM_RESET_RESULT_DENIED) {
sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_DENIED_IN, stcb,
- number_entries, srparam->list_of_streams, SCTP_SO_NOT_LOCKED);
+ number_entries, req_in_param->list_of_streams, SCTP_SO_NOT_LOCKED);
} else if (action != SCTP_STREAM_RESET_RESULT_PERFORMED) {
sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_FAILED_IN, stcb,
- number_entries, srparam->list_of_streams, SCTP_SO_NOT_LOCKED);
+ number_entries, req_in_param->list_of_streams, SCTP_SO_NOT_LOCKED);
}
} else if (type == SCTP_STR_RESET_ADD_OUT_STREAMS) {
/* Ok we now may have more streams */
@@ -3678,7 +3693,12 @@
asoc->stream_reset_outstanding--;
if (action == SCTP_STREAM_RESET_RESULT_PERFORMED) {
/* Put the new streams into effect */
- stcb->asoc.streamoutcnt += num_stream;
+ int i;
+
+ for (i = asoc->streamoutcnt; i < (asoc->streamoutcnt + num_stream); i++) {
+ asoc->strmout[i].state = SCTP_STREAM_OPEN;
+ }
+ asoc->streamoutcnt += num_stream;
sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt, 0);
} else if (action == SCTP_STREAM_RESET_RESULT_DENIED) {
sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt,
@@ -3711,6 +3731,9 @@
/* huh ? */
return (0);
}
+ if (ntohs(respin->ph.param_length) < sizeof(struct sctp_stream_reset_response_tsn)) {
+ return (0);
+ }
if (action == SCTP_STREAM_RESET_RESULT_PERFORMED) {
resp = (struct sctp_stream_reset_response_tsn *)respin;
asoc->stream_reset_outstanding--;
@@ -3752,6 +3775,9 @@
}
}
}
+ if (asoc->stream_reset_outstanding == 0) {
+ sctp_send_stream_reset_out_if_possible(stcb, SCTP_SO_NOT_LOCKED);
+ }
return (0);
}
@@ -3782,22 +3808,33 @@
} else if (stcb->asoc.stream_reset_out_is_outstanding == 0) {
len = ntohs(req->ph.param_length);
number_entries = ((len - sizeof(struct sctp_stream_reset_in_request)) / sizeof(uint16_t));
- for (i = 0; i < number_entries; i++) {
- temp = ntohs(req->list_of_streams[i]);
- req->list_of_streams[i] = temp;
+ if (number_entries) {
+ for (i = 0; i < number_entries; i++) {
+ temp = ntohs(req->list_of_streams[i]);
+ if (temp >= stcb->asoc.streamoutcnt) {
+ asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
+ goto bad_boy;
+ }
+ req->list_of_streams[i] = temp;
+ }
+ for (i = 0; i < number_entries; i++) {
+ if (stcb->asoc.strmout[req->list_of_streams[i]].state == SCTP_STREAM_OPEN) {
+ stcb->asoc.strmout[req->list_of_streams[i]].state = SCTP_STREAM_RESET_PENDING;
+ }
+ }
+ } else {
+ /* Its all */
+ for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+ if (stcb->asoc.strmout[i].state == SCTP_STREAM_OPEN)
+ stcb->asoc.strmout[i].state = SCTP_STREAM_RESET_PENDING;
+ }
}
asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED;
- sctp_add_stream_reset_out(chk, number_entries, req->list_of_streams,
- asoc->str_reset_seq_out,
- seq, (asoc->sending_seq - 1));
- asoc->stream_reset_out_is_outstanding = 1;
- asoc->str_reset = chk;
- sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo);
- stcb->asoc.stream_reset_outstanding++;
} else {
/* Can't do it, since we have sent one out */
asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_ERR_IN_PROGRESS;
}
+bad_boy:
sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
asoc->str_reset_seq_in++;
} else if (asoc->str_reset_seq_in - 1 == seq) {
@@ -3807,6 +3844,7 @@
} else {
sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_RESULT_ERR_BAD_SEQNO);
}
+ sctp_send_stream_reset_out_if_possible(stcb, SCTP_SO_NOT_LOCKED);
}
static int
@@ -3925,12 +3963,12 @@
sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
return;
}
+ liste->seq = seq;
liste->tsn = tsn;
liste->number_entries = number_entries;
- memcpy(&liste->req, req,
- (sizeof(struct sctp_stream_reset_out_request) + (number_entries * sizeof(uint16_t))));
+ memcpy(&liste->list_of_streams, req->list_of_streams, number_entries * sizeof(uint16_t));
TAILQ_INSERT_TAIL(&asoc->resetHead, liste, next_resp);
- asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED;
+ asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_IN_PROGRESS;
}
sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
asoc->str_reset_seq_in++;
@@ -4067,7 +4105,7 @@
mychk += num_stream;
if (mychk < 0x10000) {
stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED;
- if (sctp_send_str_reset_req(stcb, 0, NULL, 0, 0, 0, 1, num_stream, 0, 1)) {
+ if (sctp_send_str_reset_req(stcb, 0, NULL, 0, 0, 1, num_stream, 0, 1)) {
stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
}
} else {
@@ -4098,9 +4136,9 @@
#endif
static int
sctp_handle_stream_reset(struct sctp_tcb *stcb, struct mbuf *m, int offset,
- struct sctp_stream_reset_out_req *sr_req)
+ struct sctp_chunkhdr *ch_req)
{
- int chk_length, param_len, ptype;
+ uint16_t remaining_length, param_len, ptype;
struct sctp_paramhdr pstore;
uint8_t cstore[SCTP_CHUNK_BUFFER_SIZE];
uint32_t seq = 0;
@@ -4113,7 +4151,7 @@
int num_param = 0;
/* now it may be a reset or a reset-response */
- chk_length = ntohs(sr_req->ch.chunk_length);
+ remaining_length = ntohs(ch_req->chunk_length) - sizeof(struct sctp_chunkhdr);
/* setup for adding the response */
sctp_alloc_a_chunk(stcb, chk);
@@ -4120,13 +4158,15 @@
if (chk == NULL) {
return (ret_code);
}
+ chk->copy_by_ref = 0;
chk->rec.chunk_id.id = SCTP_STREAM_RESET;
chk->rec.chunk_id.can_take_data = 0;
+ chk->flags = 0;
chk->asoc = &stcb->asoc;
chk->no_fr_allowed = 0;
chk->book_size = chk->send_size = sizeof(struct sctp_chunkhdr);
chk->book_size_scale = 0;
- chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+ chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
if (chk->data == NULL) {
strres_nochunk:
if (chk->data) {
@@ -4149,20 +4189,27 @@
ch->chunk_length = htons(chk->send_size);
SCTP_BUF_LEN(chk->data) = SCTP_SIZE32(chk->send_size);
offset += sizeof(struct sctp_chunkhdr);
- while ((size_t)chk_length >= sizeof(struct sctp_stream_reset_tsn_request)) {
+ while (remaining_length >= sizeof(struct sctp_paramhdr)) {
ph = (struct sctp_paramhdr *)sctp_m_getptr(m, offset, sizeof(pstore), (uint8_t *) & pstore);
- if (ph == NULL)
+ if (ph == NULL) {
+ /* TSNH */
break;
+ }
param_len = ntohs(ph->param_length);
- if (param_len < (int)sizeof(struct sctp_stream_reset_tsn_request)) {
- /* bad param */
+ if ((param_len > remaining_length) ||
+ (param_len < (sizeof(struct sctp_paramhdr) + sizeof(uint32_t)))) {
+ /* bad parameter length */
break;
}
- ph = (struct sctp_paramhdr *)sctp_m_getptr(m, offset, min(param_len, (int)sizeof(cstore)),
+ ph = (struct sctp_paramhdr *)sctp_m_getptr(m, offset, min(param_len, sizeof(cstore)),
(uint8_t *) & cstore);
+ if (ph == NULL) {
+ /* TSNH */
+ break;
+ }
ptype = ntohs(ph->param_type);
num_param++;
- if (param_len > (int)sizeof(cstore)) {
+ if (param_len > sizeof(cstore)) {
trunc = 1;
} else {
trunc = 0;
@@ -4174,6 +4221,9 @@
if (ptype == SCTP_STR_RESET_OUT_REQUEST) {
struct sctp_stream_reset_out_request *req_out;
+ if (param_len < sizeof(struct sctp_stream_reset_out_request)) {
+ break;
+ }
req_out = (struct sctp_stream_reset_out_request *)ph;
num_req++;
if (stcb->asoc.stream_reset_outstanding) {
@@ -4187,6 +4237,9 @@
} else if (ptype == SCTP_STR_RESET_ADD_OUT_STREAMS) {
struct sctp_stream_reset_add_strm *str_add;
+ if (param_len < sizeof(struct sctp_stream_reset_add_strm)) {
+ break;
+ }
str_add = (struct sctp_stream_reset_add_strm *)ph;
num_req++;
sctp_handle_str_reset_add_strm(stcb, chk, str_add);
@@ -4193,6 +4246,9 @@
} else if (ptype == SCTP_STR_RESET_ADD_IN_STREAMS) {
struct sctp_stream_reset_add_strm *str_add;
+ if (param_len < sizeof(struct sctp_stream_reset_add_strm)) {
+ break;
+ }
str_add = (struct sctp_stream_reset_add_strm *)ph;
num_req++;
sctp_handle_str_reset_add_out_strm(stcb, chk, str_add);
@@ -4217,6 +4273,9 @@
struct sctp_stream_reset_response *resp;
uint32_t result;
+ if (param_len < sizeof(struct sctp_stream_reset_response)) {
+ break;
+ }
resp = (struct sctp_stream_reset_response *)ph;
seq = ntohl(resp->response_seq);
result = ntohl(resp->result);
@@ -4228,7 +4287,11 @@
break;
}
offset += SCTP_SIZE32(param_len);
- chk_length -= SCTP_SIZE32(param_len);
+ if (remaining_length >= SCTP_SIZE32(param_len)) {
+ remaining_length -= SCTP_SIZE32(param_len);
+ } else {
+ remaining_length = 0;
+ }
}
if (num_req == 0) {
/* we have no response free the stuff */
@@ -4411,12 +4474,15 @@
#endif
static struct sctp_tcb *
sctp_process_control(struct mbuf *m, int iphlen, int *offset, int length,
+ struct sockaddr *src, struct sockaddr *dst,
struct sctphdr *sh, struct sctp_chunkhdr *ch, struct sctp_inpcb *inp,
struct sctp_tcb *stcb, struct sctp_nets **netp, int *fwd_tsn_seen,
- uint8_t use_mflowid, uint32_t mflowid,
+ uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum,
uint32_t vrf_id, uint16_t port)
{
struct sctp_association *asoc;
+ struct mbuf *op_err;
+ char msg[SCTP_DIAG_INFO_LEN];
uint32_t vtag_in;
int num_chunks = 0; /* number of control chunks processed */
uint32_t chk_length;
@@ -4436,13 +4502,13 @@
int auth_skipped = 0;
int asconf_cnt = 0;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
SCTPDBG(SCTP_DEBUG_INPUT1, "sctp_process_control: iphlen=%u, offset=%u, length=%u stcb:%p\n",
- iphlen, *offset, length, stcb);
+ iphlen, *offset, length, (void *)stcb);
/* validate chunk header length... */
if (ntohs(ch->chunk_length) < sizeof(*ch)) {
@@ -4480,7 +4546,7 @@
*/
if ((ch->chunk_type == SCTP_AUTHENTICATION) &&
(stcb == NULL) &&
- !SCTP_BASE_SYSCTL(sctp_auth_disable)) {
+ (inp->auth_supported == 1)) {
/* save this chunk for later processing */
auth_skipped = 1;
auth_offset = *offset;
@@ -4527,7 +4593,9 @@
if (asconf_len < sizeof(struct sctp_asconf_paramhdr))
break;
stcb = sctp_findassociation_ep_asconf(m,
- *offset, sh, &inp, netp, vrf_id);
+ *offset,
+ dst,
+ sh, &inp, netp, vrf_id);
if (stcb != NULL)
break;
asconf_offset += SCTP_SIZE32(asconf_len);
@@ -4568,9 +4636,12 @@
}
}
if (stcb == NULL) {
+ snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__);
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ msg);
/* no association, so it's out of the blue... */
- sctp_handle_ootb(m, iphlen, *offset, sh, inp,
- use_mflowid, mflowid,
+ sctp_handle_ootb(m, iphlen, *offset, src, dst, sh, inp, op_err,
+ mflowtype, mflowid, inp->fibnum,
vrf_id, port);
*offset = length;
if (locked_tcb) {
@@ -4583,8 +4654,10 @@
if ((ch->chunk_type == SCTP_ABORT_ASSOCIATION) ||
(ch->chunk_type == SCTP_SHUTDOWN_COMPLETE) ||
(ch->chunk_type == SCTP_PACKET_DROPPED)) {
- if ((vtag_in == asoc->my_vtag) ||
- ((ch->chunk_flags & SCTP_HAD_NO_TCB) &&
+ /* Take the T-bit always into account. */
+ if ((((ch->chunk_flags & SCTP_HAD_NO_TCB) == 0) &&
+ (vtag_in == asoc->my_vtag)) ||
+ (((ch->chunk_flags & SCTP_HAD_NO_TCB) == SCTP_HAD_NO_TCB) &&
(vtag_in == asoc->peer_vtag))) {
/* this is valid */
} else {
@@ -4607,8 +4680,12 @@
if (locked_tcb) {
SCTP_TCB_UNLOCK(locked_tcb);
}
- sctp_handle_ootb(m, iphlen, *offset, sh, inp,
- use_mflowid, mflowid,
+ snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__);
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ msg);
+ sctp_handle_ootb(m, iphlen, *offset, src, dst,
+ sh, inp, op_err,
+ mflowtype, mflowid, fibnum,
vrf_id, port);
return (NULL);
}
@@ -4736,7 +4813,7 @@
/* check to see if this chunk required auth, but isn't */
if ((stcb != NULL) &&
- !SCTP_BASE_SYSCTL(sctp_auth_disable) &&
+ (stcb->asoc.auth_supported == 1) &&
sctp_auth_is_required_chunk(ch->chunk_type, stcb->asoc.local_auth_chunks) &&
!stcb->asoc.authenticated) {
/* "silently" ignore */
@@ -4749,29 +4826,27 @@
/* The INIT chunk must be the only chunk. */
if ((num_chunks > 1) ||
(length - *offset > (int)SCTP_SIZE32(chk_length))) {
- sctp_abort_association(inp, stcb, m,
- iphlen, sh, NULL,
- use_mflowid, mflowid,
- vrf_id, port);
+ /* RFC 4960 requires that no ABORT is sent */
*offset = length;
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
return (NULL);
}
/* Honor our resource limit. */
if (chk_length > SCTP_LARGEST_INIT_ACCEPTED) {
- struct mbuf *op_err;
-
- op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
- sctp_abort_association(inp, stcb, m,
- iphlen, sh, op_err,
- use_mflowid, mflowid,
+ op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, "");
+ sctp_abort_association(inp, stcb, m, iphlen,
+ src, dst, sh, op_err,
+ mflowtype, mflowid,
vrf_id, port);
*offset = length;
return (NULL);
}
- sctp_handle_init(m, iphlen, *offset, sh,
+ sctp_handle_init(m, iphlen, *offset, src, dst, sh,
(struct sctp_init_chunk *)ch, inp,
- stcb, &abort_no_unlock,
- use_mflowid, mflowid,
+ stcb, *netp, &abort_no_unlock,
+ mflowtype, mflowid,
vrf_id, port);
*offset = length;
if ((!abort_no_unlock) && (locked_tcb)) {
@@ -4788,13 +4863,13 @@
if ((stcb) && (stcb->asoc.total_output_queue_size)) {
;
} else {
- if (locked_tcb != stcb) {
+ if ((locked_tcb != NULL) && (locked_tcb != stcb)) {
/* Very unlikely */
SCTP_TCB_UNLOCK(locked_tcb);
}
*offset = length;
if (stcb) {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(inp);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -4802,8 +4877,9 @@
SCTP_TCB_LOCK(stcb);
atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_27);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_29);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
}
@@ -4820,11 +4896,12 @@
return (NULL);
}
if ((netp) && (*netp)) {
- ret = sctp_handle_init_ack(m, iphlen, *offset, sh,
+ ret = sctp_handle_init_ack(m, iphlen, *offset,
+ src, dst, sh,
(struct sctp_init_ack_chunk *)ch,
stcb, *netp,
&abort_no_unlock,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
vrf_id);
} else {
ret = -1;
@@ -4943,8 +5020,7 @@
SCTPDBG(SCTP_DEBUG_INDATA1, "No stcb when processing NR-SACK chunk\n");
break;
}
- if ((stcb->asoc.sctp_nr_sack_on_off == 0) ||
- (stcb->asoc.peer_supports_nr_sack == 0)) {
+ if (stcb->asoc.nrsack_supported == 0) {
goto unknown_chunk;
}
if (chk_length < sizeof(struct sctp_nr_sack_chunk)) {
@@ -5057,7 +5133,7 @@
break;
case SCTP_ABORT_ASSOCIATION:
SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ABORT, stcb %p\n",
- stcb);
+ (void *)stcb);
if ((stcb) && netp && *netp)
sctp_handle_abort((struct sctp_abort_chunk *)ch,
stcb, *netp);
@@ -5066,7 +5142,7 @@
break;
case SCTP_SHUTDOWN:
SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN, stcb %p\n",
- stcb);
+ (void *)stcb);
if ((stcb == NULL) || (chk_length != sizeof(struct sctp_shutdown_chunk))) {
*offset = length;
if (locked_tcb) {
@@ -5086,7 +5162,7 @@
}
break;
case SCTP_SHUTDOWN_ACK:
- SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN-ACK, stcb %p\n", stcb);
+ SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN-ACK, stcb %p\n", (void *)stcb);
if ((stcb) && (netp) && (*netp))
sctp_handle_shutdown_ack((struct sctp_shutdown_ack_chunk *)ch, stcb, *netp);
*offset = length;
@@ -5102,7 +5178,7 @@
break;
case SCTP_COOKIE_ECHO:
SCTPDBG(SCTP_DEBUG_INPUT3,
- "SCTP_COOKIE-ECHO, stcb %p\n", stcb);
+ "SCTP_COOKIE-ECHO, stcb %p\n", (void *)stcb);
if ((stcb) && (stcb->asoc.total_output_queue_size)) {
;
} else {
@@ -5127,12 +5203,10 @@
if ((stcb == NULL) && (inp->sctp_socket->so_qlen >= inp->sctp_socket->so_qlimit)) {
if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
(SCTP_BASE_SYSCTL(sctp_abort_if_one_2_one_hits_limit))) {
- struct mbuf *op_err;
-
- op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
- sctp_abort_association(inp, stcb, m,
- iphlen, sh, op_err,
- use_mflowid, mflowid,
+ op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, "");
+ sctp_abort_association(inp, stcb, m, iphlen,
+ src, dst, sh, op_err,
+ mflowtype, mflowid,
vrf_id, port);
}
*offset = length;
@@ -5158,7 +5232,9 @@
if (netp) {
ret_buf =
sctp_handle_cookie_echo(m, iphlen,
- *offset, sh,
+ *offset,
+ src, dst,
+ sh,
(struct sctp_cookie_echo_chunk *)ch,
&inp, &stcb, netp,
auth_skipped,
@@ -5165,7 +5241,7 @@
auth_offset,
auth_len,
&locked_tcb,
- use_mflowid,
+ mflowtype,
mflowid,
vrf_id,
port);
@@ -5202,7 +5278,7 @@
}
break;
case SCTP_COOKIE_ACK:
- SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_COOKIE-ACK, stcb %p\n", stcb);
+ SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_COOKIE-ACK, stcb %p\n", (void *)stcb);
if ((stcb == NULL) || chk_length != sizeof(struct sctp_cookie_ack_chunk)) {
if (locked_tcb) {
SCTP_TCB_UNLOCK(locked_tcb);
@@ -5214,7 +5290,7 @@
if ((stcb) && (stcb->asoc.total_output_queue_size)) {
;
} else if (stcb) {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(inp);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -5222,8 +5298,9 @@
SCTP_TCB_LOCK(stcb);
atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_27);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_30);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
*offset = length;
@@ -5255,6 +5332,9 @@
return (NULL);
}
if (stcb) {
+ if (stcb->asoc.ecn_supported == 0) {
+ goto unknown_chunk;
+ }
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
stcb->asoc.overall_error_count,
@@ -5280,6 +5360,9 @@
return (NULL);
}
if (stcb) {
+ if (stcb->asoc.ecn_supported == 0) {
+ goto unknown_chunk;
+ }
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
stcb->asoc.overall_error_count,
@@ -5292,7 +5375,7 @@
}
break;
case SCTP_SHUTDOWN_COMPLETE:
- SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN-COMPLETE, stcb %p\n", stcb);
+ SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN-COMPLETE, stcb %p\n", (void *)stcb);
/* must be first and only chunk */
if ((num_chunks > 1) ||
(length - *offset > (int)SCTP_SIZE32(chk_length))) {
@@ -5313,6 +5396,9 @@
SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ASCONF\n");
/* He's alive so give him credit */
if (stcb) {
+ if (stcb->asoc.asconf_supported == 0) {
+ goto unknown_chunk;
+ }
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
stcb->asoc.overall_error_count,
@@ -5321,7 +5407,7 @@
__LINE__);
}
stcb->asoc.overall_error_count = 0;
- sctp_handle_asconf(m, *offset,
+ sctp_handle_asconf(m, *offset, src,
(struct sctp_asconf_chunk *)ch, stcb, asconf_cnt == 0);
asconf_cnt++;
}
@@ -5337,6 +5423,9 @@
return (NULL);
}
if ((stcb) && netp && *netp) {
+ if (stcb->asoc.asconf_supported == 0) {
+ goto unknown_chunk;
+ }
/* He's alive so give him credit */
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
@@ -5366,6 +5455,9 @@
if (stcb) {
int abort_flag = 0;
+ if (stcb->asoc.prsctp_supported == 0) {
+ goto unknown_chunk;
+ }
stcb->asoc.overall_error_count = 0;
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
@@ -5377,7 +5469,7 @@
*fwd_tsn_seen = 1;
if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
/* We are not interested anymore */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(inp);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -5385,8 +5477,9 @@
SCTP_TCB_LOCK(stcb);
atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_29);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_31);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
*offset = length;
@@ -5420,32 +5513,10 @@
*offset = length;
return (NULL);
}
- if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
- /* We are not interested anymore */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
- so = SCTP_INP_SO(inp);
- atomic_add_int(&stcb->asoc.refcnt, 1);
- SCTP_TCB_UNLOCK(stcb);
- SCTP_SOCKET_LOCK(so, 1);
- SCTP_TCB_LOCK(stcb);
- atomic_subtract_int(&stcb->asoc.refcnt, 1);
-#endif
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_30);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
- SCTP_SOCKET_UNLOCK(so, 1);
-#endif
- *offset = length;
- return (NULL);
+ if (stcb->asoc.reconfig_supported == 0) {
+ goto unknown_chunk;
}
- if (stcb->asoc.peer_supports_strreset == 0) {
- /*
- * hmm, peer should have announced this, but
- * we will turn it on since he is sending us
- * a stream reset.
- */
- stcb->asoc.peer_supports_strreset = 1;
- }
- if (sctp_handle_stream_reset(stcb, m, *offset, (struct sctp_stream_reset_out_req *)ch)) {
+ if (sctp_handle_stream_reset(stcb, m, *offset, ch)) {
/* stop processing */
*offset = length;
return (NULL);
@@ -5463,6 +5534,9 @@
return (NULL);
}
if (ch && (stcb) && netp && (*netp)) {
+ if (stcb->asoc.pktdrop_supported == 0) {
+ goto unknown_chunk;
+ }
sctp_handle_packet_dropped((struct sctp_pktdrop_chunk *)ch,
stcb, *netp,
min(chk_length, (sizeof(chunk_buf) - 4)));
@@ -5469,12 +5543,8 @@
}
break;
-
case SCTP_AUTHENTICATION:
SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_AUTHENTICATION\n");
- if (SCTP_BASE_SYSCTL(sctp_auth_disable))
- goto unknown_chunk;
-
if (stcb == NULL) {
/* save the first AUTH for later processing */
if (auth_skipped == 0) {
@@ -5485,6 +5555,9 @@
/* skip this chunk (temporarily) */
goto next_chunk;
}
+ if (stcb->asoc.auth_supported == 0) {
+ goto unknown_chunk;
+ }
if ((chk_length < (sizeof(struct sctp_auth_chunk))) ||
(chk_length > (sizeof(struct sctp_auth_chunk) +
SCTP_AUTH_DIGEST_LEN_MAX))) {
@@ -5515,43 +5588,27 @@
unknown_chunk:
/* it's an unknown chunk! */
if ((ch->chunk_type & 0x40) && (stcb != NULL)) {
- struct mbuf *mm;
- struct sctp_paramhdr *phd;
+ struct sctp_gen_error_cause *cause;
+ int len;
- mm = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (mm) {
- phd = mtod(mm, struct sctp_paramhdr *);
- /*
- * We cheat and use param type since
- * we did not bother to define a
- * error cause struct. They are the
- * same basic format with different
- * names.
- */
- phd->param_type = htons(SCTP_CAUSE_UNRECOG_CHUNK);
- phd->param_length = htons(chk_length + sizeof(*phd));
- SCTP_BUF_LEN(mm) = sizeof(*phd);
- SCTP_BUF_NEXT(mm) = SCTP_M_COPYM(m, *offset, chk_length, M_DONTWAIT);
- if (SCTP_BUF_NEXT(mm)) {
- if (sctp_pad_lastmbuf(SCTP_BUF_NEXT(mm), SCTP_SIZE32(chk_length) - chk_length, NULL)) {
- sctp_m_freem(mm);
- } else {
+ op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_gen_error_cause),
+ 0, M_NOWAIT, 1, MT_DATA);
+ if (op_err != NULL) {
+ len = min(SCTP_SIZE32(chk_length), (uint32_t) (length - *offset));
+ cause = mtod(op_err, struct sctp_gen_error_cause *);
+ cause->code = htons(SCTP_CAUSE_UNRECOG_CHUNK);
+ cause->length = htons(len + sizeof(struct sctp_gen_error_cause));
+ SCTP_BUF_LEN(op_err) = sizeof(struct sctp_gen_error_cause);
+ SCTP_BUF_NEXT(op_err) = SCTP_M_COPYM(m, *offset, len, M_NOWAIT);
+ if (SCTP_BUF_NEXT(op_err) != NULL) {
#ifdef SCTP_MBUF_LOGGING
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- struct mbuf *mat;
-
- for (mat = SCTP_BUF_NEXT(mm); mat; mat = SCTP_BUF_NEXT(mat)) {
- if (SCTP_BUF_IS_EXTENDED(mat)) {
- sctp_log_mb(mat, SCTP_MBUF_ICOPY);
- }
- }
- }
+ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
+ sctp_log_mbc(SCTP_BUF_NEXT(op_err), SCTP_MBUF_ICOPY);
+ }
#endif
- sctp_queue_op_err(stcb, mm);
- }
+ sctp_queue_op_err(stcb, op_err);
} else {
- sctp_m_freem(mm);
+ sctp_m_freem(op_err);
}
}
}
@@ -5589,49 +5646,29 @@
}
-#ifdef INVARIANTS
-#ifdef __GNUC__
-__attribute__((noinline))
-#endif
- void
- sctp_validate_no_locks(struct sctp_inpcb *inp)
-{
- struct sctp_tcb *lstcb;
-
- LIST_FOREACH(lstcb, &inp->sctp_asoc_list, sctp_tcblist) {
- if (mtx_owned(&lstcb->tcb_mtx)) {
- panic("Own lock on stcb at return from input");
- }
- }
- if (mtx_owned(&inp->inp_create_mtx)) {
- panic("Own create lock on inp");
- }
- if (mtx_owned(&inp->inp_mtx)) {
- panic("Own inp lock on inp");
- }
-}
-
-#endif
-
/*
* common input chunk processing (v4 and v6)
*/
void
-sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset,
- int length, struct sctphdr *sh, struct sctp_chunkhdr *ch,
- struct sctp_inpcb *inp, struct sctp_tcb *stcb,
- struct sctp_nets *net, uint8_t ecn_bits,
- uint8_t use_mflowid, uint32_t mflowid,
+sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int length,
+ struct sockaddr *src, struct sockaddr *dst,
+ struct sctphdr *sh, struct sctp_chunkhdr *ch,
+#if !defined(SCTP_WITH_NO_CSUM)
+ uint8_t compute_crc,
+#endif
+ uint8_t ecn_bits,
+ uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum,
uint32_t vrf_id, uint16_t port)
{
- /*
- * Control chunk processing
- */
uint32_t high_tsn;
int fwd_tsn_seen = 0, data_processed = 0;
- struct mbuf *m = *mm;
+ struct mbuf *m = *mm, *op_err;
+ char msg[SCTP_DIAG_INFO_LEN];
int un_sent;
int cnt_ctrl_ready = 0;
+ struct sctp_inpcb *inp = NULL, *inp_decr = NULL;
+ struct sctp_tcb *stcb = NULL;
+ struct sctp_nets *net = NULL;
SCTP_STAT_INCR(sctps_recvdatagrams);
#ifdef SCTP_AUDITING_ENABLED
@@ -5638,15 +5675,148 @@
sctp_audit_log(0xE0, 1);
sctp_auditing(0, inp, stcb, net);
#endif
+#if !defined(SCTP_WITH_NO_CSUM)
+ if (compute_crc != 0) {
+ uint32_t check, calc_check;
+ check = sh->checksum;
+ sh->checksum = 0;
+ calc_check = sctp_calculate_cksum(m, iphlen);
+ sh->checksum = check;
+ if (calc_check != check) {
+ SCTPDBG(SCTP_DEBUG_INPUT1, "Bad CSUM on SCTP packet calc_check:%x check:%x m:%p mlen:%d iphlen:%d\n",
+ calc_check, check, (void *)m, length, iphlen);
+ stcb = sctp_findassociation_addr(m, offset, src, dst,
+ sh, ch, &inp, &net, vrf_id);
+#if defined(INET) || defined(INET6)
+ if ((ch->chunk_type != SCTP_INITIATION) &&
+ (net != NULL) && (net->port != port)) {
+ if (net->port == 0) {
+ /* UDP encapsulation turned on. */
+ net->mtu -= sizeof(struct udphdr);
+ if (stcb->asoc.smallest_mtu > net->mtu) {
+ sctp_pathmtu_adjustment(stcb, net->mtu);
+ }
+ } else if (port == 0) {
+ /* UDP encapsulation turned off. */
+ net->mtu += sizeof(struct udphdr);
+ /* XXX Update smallest_mtu */
+ }
+ net->port = port;
+ }
+#endif
+ if (net != NULL) {
+ net->flowtype = mflowtype;
+ net->flowid = mflowid;
+ }
+ if ((inp != NULL) && (stcb != NULL)) {
+ sctp_send_packet_dropped(stcb, net, m, length, iphlen, 1);
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_INPUT_ERROR, SCTP_SO_NOT_LOCKED);
+ } else if ((inp != NULL) && (stcb == NULL)) {
+ inp_decr = inp;
+ }
+ SCTP_STAT_INCR(sctps_badsum);
+ SCTP_STAT_INCR_COUNTER32(sctps_checksumerrors);
+ goto out;
+ }
+ }
+#endif
+ /* Destination port of 0 is illegal, based on RFC4960. */
+ if (sh->dest_port == 0) {
+ SCTP_STAT_INCR(sctps_hdrops);
+ goto out;
+ }
+ stcb = sctp_findassociation_addr(m, offset, src, dst,
+ sh, ch, &inp, &net, vrf_id);
+#if defined(INET) || defined(INET6)
+ if ((ch->chunk_type != SCTP_INITIATION) &&
+ (net != NULL) && (net->port != port)) {
+ if (net->port == 0) {
+ /* UDP encapsulation turned on. */
+ net->mtu -= sizeof(struct udphdr);
+ if (stcb->asoc.smallest_mtu > net->mtu) {
+ sctp_pathmtu_adjustment(stcb, net->mtu);
+ }
+ } else if (port == 0) {
+ /* UDP encapsulation turned off. */
+ net->mtu += sizeof(struct udphdr);
+ /* XXX Update smallest_mtu */
+ }
+ net->port = port;
+ }
+#endif
+ if (net != NULL) {
+ net->flowtype = mflowtype;
+ net->flowid = mflowid;
+ }
+ if (inp == NULL) {
+ SCTP_STAT_INCR(sctps_noport);
+ if (badport_bandlim(BANDLIM_SCTP_OOTB) < 0) {
+ goto out;
+ }
+ if (ch->chunk_type == SCTP_SHUTDOWN_ACK) {
+ sctp_send_shutdown_complete2(src, dst, sh,
+ mflowtype, mflowid, fibnum,
+ vrf_id, port);
+ goto out;
+ }
+ if (ch->chunk_type == SCTP_SHUTDOWN_COMPLETE) {
+ goto out;
+ }
+ if (ch->chunk_type != SCTP_ABORT_ASSOCIATION) {
+ if ((SCTP_BASE_SYSCTL(sctp_blackhole) == 0) ||
+ ((SCTP_BASE_SYSCTL(sctp_blackhole) == 1) &&
+ (ch->chunk_type != SCTP_INIT))) {
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ "Out of the blue");
+ sctp_send_abort(m, iphlen, src, dst,
+ sh, 0, op_err,
+ mflowtype, mflowid, fibnum,
+ vrf_id, port);
+ }
+ }
+ goto out;
+ } else if (stcb == NULL) {
+ inp_decr = inp;
+ }
+#ifdef IPSEC
+ /*-
+ * I very much doubt any of the IPSEC stuff will work but I have no
+ * idea, so I will leave it in place.
+ */
+ if (inp != NULL) {
+ switch (dst->sa_family) {
+#ifdef INET
+ case AF_INET:
+ if (ipsec4_in_reject(m, &inp->ip_inp.inp)) {
+ IPSECSTAT_INC(ips_in_polvio);
+ SCTP_STAT_INCR(sctps_hdrops);
+ goto out;
+ }
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ if (ipsec6_in_reject(m, &inp->ip_inp.inp)) {
+ IPSEC6STAT_INC(ips_in_polvio);
+ SCTP_STAT_INCR(sctps_hdrops);
+ goto out;
+ }
+ break;
+#endif
+ default:
+ break;
+ }
+ }
+#endif
SCTPDBG(SCTP_DEBUG_INPUT1, "Ok, Common input processing called, m:%p iphlen:%d offset:%d length:%d stcb:%p\n",
- m, iphlen, offset, length, stcb);
+ (void *)m, iphlen, offset, length, (void *)stcb);
if (stcb) {
/* always clear this before beginning a packet */
stcb->asoc.authenticated = 0;
stcb->asoc.seen_a_sack_this_pkt = 0;
SCTPDBG(SCTP_DEBUG_INPUT1, "stcb:%p state:%x\n",
- stcb, stcb->asoc.state);
+ (void *)stcb, stcb->asoc.state);
if ((stcb->asoc.state & SCTP_STATE_WAS_ABORTED) ||
(stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED)) {
@@ -5657,18 +5827,23 @@
* NOT respond to any packet.. its OOTB.
*/
SCTP_TCB_UNLOCK(stcb);
- sctp_handle_ootb(m, iphlen, offset, sh, inp,
- use_mflowid, mflowid,
+ stcb = NULL;
+ snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__);
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ msg);
+ sctp_handle_ootb(m, iphlen, offset, src, dst, sh, inp, op_err,
+ mflowtype, mflowid, inp->fibnum,
vrf_id, port);
- goto out_now;
+ goto out;
}
}
if (IS_SCTP_CONTROL(ch)) {
/* process the control portion of the SCTP packet */
/* sa_ignore NO_NULL_CHK */
- stcb = sctp_process_control(m, iphlen, &offset, length, sh, ch,
+ stcb = sctp_process_control(m, iphlen, &offset, length,
+ src, dst, sh, ch,
inp, stcb, &net, &fwd_tsn_seen,
- use_mflowid, mflowid,
+ mflowtype, mflowid, fibnum,
vrf_id, port);
if (stcb) {
/*
@@ -5676,12 +5851,23 @@
* it changes our INP.
*/
inp = stcb->sctp_ep;
- if ((net) && (port)) {
+#if defined(INET) || defined(INET6)
+ if ((ch->chunk_type != SCTP_INITIATION) &&
+ (net != NULL) && (net->port != port)) {
if (net->port == 0) {
- sctp_pathmtu_adjustment(stcb, net->mtu - sizeof(struct udphdr));
+ /* UDP encapsulation turned on. */
+ net->mtu -= sizeof(struct udphdr);
+ if (stcb->asoc.smallest_mtu > net->mtu) {
+ sctp_pathmtu_adjustment(stcb, net->mtu);
+ }
+ } else if (port == 0) {
+ /* UDP encapsulation turned off. */
+ net->mtu += sizeof(struct udphdr);
+ /* XXX Update smallest_mtu */
}
net->port = port;
}
+#endif
}
} else {
/*
@@ -5695,25 +5881,26 @@
* chunks
*/
if ((stcb != NULL) &&
- !SCTP_BASE_SYSCTL(sctp_auth_disable) &&
+ (stcb->asoc.auth_supported == 1) &&
sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.local_auth_chunks)) {
/* "silently" ignore */
SCTP_STAT_INCR(sctps_recvauthmissing);
- SCTP_TCB_UNLOCK(stcb);
- goto out_now;
+ goto out;
}
if (stcb == NULL) {
/* out of the blue DATA chunk */
- sctp_handle_ootb(m, iphlen, offset, sh, inp,
- use_mflowid, mflowid,
+ snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__);
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ msg);
+ sctp_handle_ootb(m, iphlen, offset, src, dst, sh, inp, op_err,
+ mflowtype, mflowid, fibnum,
vrf_id, port);
- goto out_now;
+ goto out;
}
if (stcb->asoc.my_vtag != ntohl(sh->v_tag)) {
/* v_tag mismatch! */
SCTP_STAT_INCR(sctps_badvtag);
- SCTP_TCB_UNLOCK(stcb);
- goto out_now;
+ goto out;
}
}
@@ -5723,7 +5910,7 @@
* packet while processing control, or we're done with this
* packet (done or skip rest of data), so we drop it...
*/
- goto out_now;
+ goto out;
}
/*
* DATA chunk processing
@@ -5736,7 +5923,7 @@
*/
if ((length > offset) &&
(stcb != NULL) &&
- !SCTP_BASE_SYSCTL(sctp_auth_disable) &&
+ (stcb->asoc.auth_supported == 1) &&
sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.local_auth_chunks) &&
!stcb->asoc.authenticated) {
/* "silently" ignore */
@@ -5774,11 +5961,13 @@
/*
* We consider OOTB any data sent during asoc setup.
*/
- sctp_handle_ootb(m, iphlen, offset, sh, inp,
- use_mflowid, mflowid,
+ snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__);
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ msg);
+ sctp_handle_ootb(m, iphlen, offset, src, dst, sh, inp, op_err,
+ mflowtype, mflowid, inp->fibnum,
vrf_id, port);
- SCTP_TCB_UNLOCK(stcb);
- goto out_now;
+ goto out;
/* sa_ignore NOTREACHED */
break;
case SCTP_STATE_EMPTY: /* should not happen */
@@ -5786,8 +5975,7 @@
case SCTP_STATE_SHUTDOWN_RECEIVED: /* This is a peer error */
case SCTP_STATE_SHUTDOWN_ACK_SENT:
default:
- SCTP_TCB_UNLOCK(stcb);
- goto out_now;
+ goto out;
/* sa_ignore NOTREACHED */
break;
case SCTP_STATE_OPEN:
@@ -5795,16 +5983,15 @@
break;
}
/* plow through the data chunks while length > offset */
- retval = sctp_process_data(mm, iphlen, &offset, length, sh,
- inp, stcb, net, &high_tsn,
- use_mflowid, mflowid,
- vrf_id, port);
+ retval = sctp_process_data(mm, iphlen, &offset, length,
+ inp, stcb, net, &high_tsn);
if (retval == 2) {
/*
* The association aborted, NO UNLOCK needed since
* the association is destroyed.
*/
- goto out_now;
+ stcb = NULL;
+ goto out;
}
data_processed = 1;
/*
@@ -5814,7 +6001,7 @@
}
/* take care of ecn */
if ((data_processed == 1) &&
- (stcb->asoc.ecn_allowed == 1) &&
+ (stcb->asoc.ecn_supported == 1) &&
((ecn_bits & SCTP_CE_BITS) == SCTP_CE_BITS)) {
/* Yep, we need to add a ECNE */
sctp_send_ecn_echo(stcb, net, high_tsn);
@@ -5849,7 +6036,7 @@
if (!TAILQ_EMPTY(&stcb->asoc.control_send_queue)) {
cnt_ctrl_ready = stcb->asoc.ctrl_queue_cnt - stcb->asoc.ecn_echo_cnt_onq;
}
- if (cnt_ctrl_ready ||
+ if (cnt_ctrl_ready || stcb->asoc.trigger_reset ||
((un_sent) &&
(stcb->asoc.peers_rwnd > 0 ||
(stcb->asoc.peers_rwnd <= 0 && stcb->asoc.total_flight == 0)))) {
@@ -5861,11 +6048,16 @@
sctp_audit_log(0xE0, 3);
sctp_auditing(2, inp, stcb, net);
#endif
- SCTP_TCB_UNLOCK(stcb);
-out_now:
-#ifdef INVARIANTS
- sctp_validate_no_locks(inp);
-#endif
+out:
+ if (stcb != NULL) {
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ if (inp_decr != NULL) {
+ /* reduce ref-count */
+ SCTP_INP_WLOCK(inp_decr);
+ SCTP_INP_DECR_REF(inp_decr);
+ SCTP_INP_WUNLOCK(inp_decr);
+ }
return;
}
@@ -5874,9 +6066,9 @@
sctp_print_mbuf_chain(struct mbuf *m)
{
for (; m; m = SCTP_BUF_NEXT(m)) {
- SCTP_PRINTF("%p: m_len = %ld\n", m, SCTP_BUF_LEN(m));
+ SCTP_PRINTF("%p: m_len = %ld\n", (void *)m, SCTP_BUF_LEN(m));
if (SCTP_BUF_IS_EXTENDED(m))
- SCTP_PRINTF("%p: extend_size = %d\n", m, SCTP_BUF_EXTEND_SIZE(m));
+ SCTP_PRINTF("%p: extend_size = %d\n", (void *)m, SCTP_BUF_EXTEND_SIZE(m));
}
}
@@ -5890,259 +6082,111 @@
int iphlen;
uint32_t vrf_id = 0;
uint8_t ecn_bits;
+ struct sockaddr_in src, dst;
struct ip *ip;
struct sctphdr *sh;
- struct sctp_inpcb *inp = NULL;
- struct sctp_nets *net;
- struct sctp_tcb *stcb = NULL;
struct sctp_chunkhdr *ch;
- int refcount_up = 0;
- int length, mlen, offset;
- uint32_t mflowid;
- uint8_t use_mflowid;
+ int length, offset;
#if !defined(SCTP_WITH_NO_CSUM)
- uint32_t check, calc_check;
+ uint8_t compute_crc;
#endif
+ uint32_t mflowid;
+ uint8_t mflowtype;
+ uint16_t fibnum;
+ iphlen = off;
if (SCTP_GET_PKT_VRFID(i_pak, vrf_id)) {
SCTP_RELEASE_PKT(i_pak);
return;
}
- mlen = SCTP_HEADER_LEN(i_pak);
- iphlen = off;
m = SCTP_HEADER_TO_CHAIN(i_pak);
-
- net = NULL;
- SCTP_STAT_INCR(sctps_recvpackets);
- SCTP_STAT_INCR_COUNTER64(sctps_inpackets);
-
-
#ifdef SCTP_MBUF_LOGGING
/* Log in any input mbufs */
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- struct mbuf *mat;
-
- for (mat = m; mat; mat = SCTP_BUF_NEXT(mat)) {
- if (SCTP_BUF_IS_EXTENDED(mat)) {
- sctp_log_mb(mat, SCTP_MBUF_INPUT);
- }
- }
+ sctp_log_mbc(m, SCTP_MBUF_INPUT);
}
#endif
-#ifdef SCTP_PACKET_LOGGING
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
- sctp_packet_log(m, mlen);
+#ifdef SCTP_PACKET_LOGGING
+ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) {
+ sctp_packet_log(m);
+ }
#endif
- if (m->m_flags & M_FLOWID) {
- mflowid = m->m_pkthdr.flowid;
- use_mflowid = 1;
- } else {
- mflowid = 0;
- use_mflowid = 0;
- }
- /*
- * Must take out the iphlen, since mlen expects this (only effect lb
- * case)
- */
- mlen -= iphlen;
-
- /*
- * Get IP, SCTP, and first chunk header together in first mbuf.
- */
- ip = mtod(m, struct ip *);
- offset = iphlen + sizeof(*sh) + sizeof(*ch);
+ SCTPDBG(SCTP_DEBUG_CRCOFFLOAD,
+ "sctp_input(): Packet of length %d received on %s with csum_flags 0x%b.\n",
+ m->m_pkthdr.len,
+ if_name(m->m_pkthdr.rcvif),
+ (int)m->m_pkthdr.csum_flags, CSUM_BITS);
+ mflowid = m->m_pkthdr.flowid;
+ mflowtype = M_HASHTYPE_GET(m);
+ fibnum = M_GETFIB(m);
+ SCTP_STAT_INCR(sctps_recvpackets);
+ SCTP_STAT_INCR_COUNTER64(sctps_inpackets);
+ /* Get IP, SCTP, and first chunk header together in the first mbuf. */
+ offset = iphlen + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
if (SCTP_BUF_LEN(m) < offset) {
- if ((m = m_pullup(m, offset)) == 0) {
+ if ((m = m_pullup(m, offset)) == NULL) {
SCTP_STAT_INCR(sctps_hdrops);
return;
}
- ip = mtod(m, struct ip *);
}
- /* validate mbuf chain length with IP payload length */
- if (mlen < (SCTP_GET_IPV4_LENGTH(ip) - iphlen)) {
+ ip = mtod(m, struct ip *);
+ sh = (struct sctphdr *)((caddr_t)ip + iphlen);
+ ch = (struct sctp_chunkhdr *)((caddr_t)sh + sizeof(struct sctphdr));
+ offset -= sizeof(struct sctp_chunkhdr);
+ memset(&src, 0, sizeof(struct sockaddr_in));
+ src.sin_family = AF_INET;
+ src.sin_len = sizeof(struct sockaddr_in);
+ src.sin_port = sh->src_port;
+ src.sin_addr = ip->ip_src;
+ memset(&dst, 0, sizeof(struct sockaddr_in));
+ dst.sin_family = AF_INET;
+ dst.sin_len = sizeof(struct sockaddr_in);
+ dst.sin_port = sh->dest_port;
+ dst.sin_addr = ip->ip_dst;
+ length = ntohs(ip->ip_len);
+ /* Validate mbuf chain length with IP payload length. */
+ if (SCTP_HEADER_LEN(m) != length) {
+ SCTPDBG(SCTP_DEBUG_INPUT1,
+ "sctp_input() length:%d reported length:%d\n", length, SCTP_HEADER_LEN(m));
SCTP_STAT_INCR(sctps_hdrops);
- goto bad;
+ goto out;
}
- sh = (struct sctphdr *)((caddr_t)ip + iphlen);
- ch = (struct sctp_chunkhdr *)((caddr_t)sh + sizeof(*sh));
- SCTPDBG(SCTP_DEBUG_INPUT1,
- "sctp_input() length:%d iphlen:%d\n", mlen, iphlen);
-
/* SCTP does not allow broadcasts or multicasts */
- if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
- goto bad;
+ if (IN_MULTICAST(ntohl(dst.sin_addr.s_addr))) {
+ goto out;
}
- if (SCTP_IS_IT_BROADCAST(ip->ip_dst, m)) {
- /*
- * We only look at broadcast if its a front state, All
- * others we will not have a tcb for anyway.
- */
- goto bad;
+ if (SCTP_IS_IT_BROADCAST(dst.sin_addr, m)) {
+ goto out;
}
- /* validate SCTP checksum */
- SCTPDBG(SCTP_DEBUG_CRCOFFLOAD,
- "sctp_input(): Packet of length %d received on %s with csum_flags 0x%x.\n",
- m->m_pkthdr.len,
- if_name(m->m_pkthdr.rcvif),
- m->m_pkthdr.csum_flags);
+ ecn_bits = ip->ip_tos;
#if defined(SCTP_WITH_NO_CSUM)
SCTP_STAT_INCR(sctps_recvnocrc);
#else
if (m->m_pkthdr.csum_flags & CSUM_SCTP_VALID) {
SCTP_STAT_INCR(sctps_recvhwcrc);
- goto sctp_skip_csum_4;
+ compute_crc = 0;
+ } else {
+ SCTP_STAT_INCR(sctps_recvswcrc);
+ compute_crc = 1;
}
- check = sh->checksum; /* save incoming checksum */
- sh->checksum = 0; /* prepare for calc */
- calc_check = sctp_calculate_cksum(m, iphlen);
- sh->checksum = check;
- SCTP_STAT_INCR(sctps_recvswcrc);
- if (calc_check != check) {
- SCTPDBG(SCTP_DEBUG_INPUT1, "Bad CSUM on SCTP packet calc_check:%x check:%x m:%p mlen:%d iphlen:%d\n",
- calc_check, check, m, mlen, iphlen);
-
- stcb = sctp_findassociation_addr(m,
- offset - sizeof(*ch),
- sh, ch, &inp, &net,
- vrf_id);
- if ((net) && (port)) {
- if (net->port == 0) {
- sctp_pathmtu_adjustment(stcb, net->mtu - sizeof(struct udphdr));
- }
- net->port = port;
- }
- if ((net != NULL) && (use_mflowid != 0)) {
- net->flowid = mflowid;
-#ifdef INVARIANTS
- net->flowidset = 1;
#endif
- }
- if ((inp) && (stcb)) {
- sctp_send_packet_dropped(stcb, net, m, iphlen, 1);
- sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_INPUT_ERROR, SCTP_SO_NOT_LOCKED);
- } else if ((inp != NULL) && (stcb == NULL)) {
- refcount_up = 1;
- }
- SCTP_STAT_INCR(sctps_badsum);
- SCTP_STAT_INCR_COUNTER32(sctps_checksumerrors);
- goto bad;
- }
-sctp_skip_csum_4:
+ sctp_common_input_processing(&m, iphlen, offset, length,
+ (struct sockaddr *)&src,
+ (struct sockaddr *)&dst,
+ sh, ch,
+#if !defined(SCTP_WITH_NO_CSUM)
+ compute_crc,
#endif
- /* destination port of 0 is illegal, based on RFC2960. */
- if (sh->dest_port == 0) {
- SCTP_STAT_INCR(sctps_hdrops);
- goto bad;
- }
- /*
- * Locate pcb and tcb for datagram sctp_findassociation_addr() wants
- * IP/SCTP/first chunk header...
- */
- stcb = sctp_findassociation_addr(m, offset - sizeof(*ch),
- sh, ch, &inp, &net, vrf_id);
- if ((net) && (port)) {
- if (net->port == 0) {
- sctp_pathmtu_adjustment(stcb, net->mtu - sizeof(struct udphdr));
- }
- net->port = port;
- }
- if ((net != NULL) && (use_mflowid != 0)) {
- net->flowid = mflowid;
-#ifdef INVARIANTS
- net->flowidset = 1;
-#endif
- }
- /* inp's ref-count increased && stcb locked */
- if (inp == NULL) {
- struct sctp_init_chunk *init_chk, chunk_buf;
-
- SCTP_STAT_INCR(sctps_noport);
- if (badport_bandlim(BANDLIM_SCTP_OOTB) < 0)
- goto bad;
- SCTPDBG(SCTP_DEBUG_INPUT1,
- "Sending a ABORT from packet entry!\n");
- if (ch->chunk_type == SCTP_INITIATION) {
- /*
- * we do a trick here to get the INIT tag, dig in
- * and get the tag from the INIT and put it in the
- * common header.
- */
- init_chk = (struct sctp_init_chunk *)sctp_m_getptr(m,
- iphlen + sizeof(*sh), sizeof(*init_chk),
- (uint8_t *) & chunk_buf);
- if (init_chk != NULL)
- sh->v_tag = init_chk->init.initiate_tag;
- }
- if (ch->chunk_type == SCTP_SHUTDOWN_ACK) {
- sctp_send_shutdown_complete2(m, sh,
- use_mflowid, mflowid,
- vrf_id, port);
- goto bad;
- }
- if (ch->chunk_type == SCTP_SHUTDOWN_COMPLETE) {
- goto bad;
- }
- if (ch->chunk_type != SCTP_ABORT_ASSOCIATION) {
- if ((SCTP_BASE_SYSCTL(sctp_blackhole) == 0) ||
- ((SCTP_BASE_SYSCTL(sctp_blackhole) == 1) &&
- (ch->chunk_type != SCTP_INIT))) {
- sctp_send_abort(m, iphlen, sh, 0, NULL,
- use_mflowid, mflowid,
- vrf_id, port);
- }
- }
- goto bad;
- } else if (stcb == NULL) {
- refcount_up = 1;
- }
-#ifdef IPSEC
- /*
- * I very much doubt any of the IPSEC stuff will work but I have no
- * idea, so I will leave it in place.
- */
- if (inp && ipsec4_in_reject(m, &inp->ip_inp.inp)) {
- MODULE_GLOBAL(ipsec4stat).in_polvio++;
- SCTP_STAT_INCR(sctps_hdrops);
- goto bad;
- }
-#endif /* IPSEC */
-
- /*
- * common chunk processing
- */
- length = ip->ip_len + iphlen;
- offset -= sizeof(struct sctp_chunkhdr);
-
- ecn_bits = ip->ip_tos;
-
- /* sa_ignore NO_NULL_CHK */
- sctp_common_input_processing(&m, iphlen, offset, length, sh, ch,
- inp, stcb, net, ecn_bits,
- use_mflowid, mflowid,
+ ecn_bits,
+ mflowtype, mflowid, fibnum,
vrf_id, port);
- /* inp's ref-count reduced && stcb unlocked */
+out:
if (m) {
sctp_m_freem(m);
}
- if ((inp) && (refcount_up)) {
- /* reduce ref-count */
- SCTP_INP_DECR_REF(inp);
- }
return;
-bad:
- if (stcb) {
- SCTP_TCB_UNLOCK(stcb);
- }
- if ((inp) && (refcount_up)) {
- /* reduce ref-count */
- SCTP_INP_DECR_REF(inp);
- }
- if (m) {
- sctp_m_freem(m);
- }
- return;
}
#if defined(__FreeBSD__) && defined(SCTP_MCORE_INPUT) && defined(SMP)
@@ -6161,7 +6205,7 @@
uint32_t flowid, tag;
if (mp_ncpus > 1) {
- if (m->m_flags & M_FLOWID) {
+ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
flowid = m->m_pkthdr.flowid;
} else {
/*
@@ -6168,20 +6212,19 @@
* No flow id built by lower layers fix it so we
* create one.
*/
- ip = mtod(m, struct ip *);
- offset = off + sizeof(*sh);
+ offset = off + sizeof(struct sctphdr);
if (SCTP_BUF_LEN(m) < offset) {
- if ((m = m_pullup(m, offset)) == 0) {
+ if ((m = m_pullup(m, offset)) == NULL) {
SCTP_STAT_INCR(sctps_hdrops);
return;
}
- ip = mtod(m, struct ip *);
}
+ ip = mtod(m, struct ip *);
sh = (struct sctphdr *)((caddr_t)ip + off);
tag = htonl(sh->v_tag);
flowid = tag ^ ntohs(sh->dest_port) ^ ntohs(sh->src_port);
m->m_pkthdr.flowid = flowid;
- m->m_flags |= M_FLOWID;
+ M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
}
cpu_to_use = sctp_cpuarry[flowid % mp_ncpus];
sctp_queue_to_mcore(m, off, cpu_to_use);
Modified: trunk/sys/netinet/sctp_input.h
===================================================================
--- trunk/sys/netinet/sctp_input.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_input.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_input.h 238253 2012-07-08 16:14:42Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_input.h 284633 2015-06-20 08:25:27Z tuexen $");
#ifndef _NETINET_SCTP_INPUT_H_
#define _NETINET_SCTP_INPUT_H_
@@ -40,18 +40,21 @@
#if defined(_KERNEL) || defined(__Userspace__)
void
sctp_common_input_processing(struct mbuf **, int, int, int,
+ struct sockaddr *, struct sockaddr *,
struct sctphdr *, struct sctp_chunkhdr *,
- struct sctp_inpcb *, struct sctp_tcb *,
- struct sctp_nets *, uint8_t,
- uint8_t, uint32_t,
+#if !defined(SCTP_WITH_NO_CSUM)
+ uint8_t,
+#endif
+ uint8_t,
+ uint8_t, uint32_t, uint16_t,
uint32_t, uint16_t);
-struct sctp_stream_reset_out_request *
+struct sctp_stream_reset_request *
sctp_find_stream_reset(struct sctp_tcb *stcb, uint32_t seq,
struct sctp_tmit_chunk **bchk);
void
-sctp_reset_in_stream(struct sctp_tcb *stcb, int number_entries,
+sctp_reset_in_stream(struct sctp_tcb *stcb, uint32_t number_entries,
uint16_t * list);
Modified: trunk/sys/netinet/sctp_lock_bsd.h
===================================================================
--- trunk/sys/netinet/sctp_lock_bsd.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_lock_bsd.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_lock_bsd.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_lock_bsd.h 235828 2012-05-23 11:26:28Z tuexen $");
#ifndef _NETINET_SCTP_LOCK_BSD_H_
#define _NETINET_SCTP_LOCK_BSD_H_
Modified: trunk/sys/netinet/sctp_os.h
===================================================================
--- trunk/sys/netinet/sctp_os.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_os.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_os.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_os.h 235828 2012-05-23 11:26:28Z tuexen $");
#ifndef _NETINET_SCTP_OS_H_
#define _NETINET_SCTP_OS_H_
Modified: trunk/sys/netinet/sctp_os_bsd.h
===================================================================
--- trunk/sys/netinet/sctp_os_bsd.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_os_bsd.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_os_bsd.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_os_bsd.h 314327 2017-02-27 08:27:38Z avg $");
#ifndef _NETINET_SCTP_OS_BSD_H_
#define _NETINET_SCTP_OS_BSD_H_
@@ -96,7 +96,6 @@
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet6/in6_pcb.h>
-#include <netinet/icmp6.h>
#include <netinet6/ip6protosw.h>
#include <netinet6/nd6.h>
#include <netinet6/scope6_var.h>
@@ -105,6 +104,9 @@
#include <netinet/ip_options.h>
+#include <crypto/sha1.h>
+#include <crypto/sha2/sha256.h>
+
#ifndef in6pcb
#define in6pcb inpcb
#endif
@@ -150,46 +152,31 @@
#define V_system_base_info VNET(system_base_info)
#define SCTP_BASE_INFO(__m) V_system_base_info.sctppcbinfo.__m
#define SCTP_BASE_STATS V_system_base_info.sctpstat
-#define SCTP_BASE_STATS_SYSCTL VNET_NAME(system_base_info.sctpstat)
-#define SCTP_BASE_STAT(__m) V_system_base_info.sctpstat.__m
-#define SCTP_BASE_SYSCTL(__m) VNET_NAME(system_base_info.sctpsysctl.__m)
+#define SCTP_BASE_STAT(__m) V_system_base_info.sctpstat.__m
+#define SCTP_BASE_SYSCTL(__m) V_system_base_info.sctpsysctl.__m
#define SCTP_BASE_VAR(__m) V_system_base_info.__m
-/*
- *
- */
-#define USER_ADDR_NULL (NULL) /* FIX ME: temp */
-
#define SCTP_PRINTF(params...) printf(params)
#if defined(SCTP_DEBUG)
#define SCTPDBG(level, params...) \
{ \
- do { \
- if (SCTP_BASE_SYSCTL(sctp_debug_on) & level ) { \
- SCTP_PRINTF(params); \
- } \
- } while (0); \
+ do { \
+ if (SCTP_BASE_SYSCTL(sctp_debug_on) & level ) { \
+ SCTP_PRINTF(params); \
+ } \
+ } while (0); \
}
#define SCTPDBG_ADDR(level, addr) \
{ \
- do { \
- if (SCTP_BASE_SYSCTL(sctp_debug_on) & level ) { \
- sctp_print_address(addr); \
- } \
- } while (0); \
+ do { \
+ if (SCTP_BASE_SYSCTL(sctp_debug_on) & level ) { \
+ sctp_print_address(addr); \
+ } \
+ } while (0); \
}
-#define SCTPDBG_PKT(level, iph, sh) \
-{ \
- do { \
- if (SCTP_BASE_SYSCTL(sctp_debug_on) & level) { \
- sctp_print_address_pkt(iph, sh); \
- } \
- } while (0); \
-}
#else
#define SCTPDBG(level, params...)
#define SCTPDBG_ADDR(level, addr)
-#define SCTPDBG_PKT(level, iph, sh)
#endif
#ifdef SCTP_LTRACE_CHUNKS
@@ -201,11 +188,11 @@
#ifdef SCTP_LTRACE_ERRORS
#define SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, file, err) \
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LTRACE_ERROR_ENABLE) \
- SCTP_PRINTF("mbuf:%p inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \
+ SCTP_PRINTF("mbuf:%p inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \
m, inp, stcb, net, file, __LINE__, err);
#define SCTP_LTRACE_ERR_RET(inp, stcb, net, file, err) \
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LTRACE_ERROR_ENABLE) \
- SCTP_PRINTF("inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \
+ SCTP_PRINTF("inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \
inp, stcb, net, file, __LINE__, err);
#else
#define SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, file, err)
@@ -239,16 +226,16 @@
* general memory allocation
*/
#define SCTP_MALLOC(var, type, size, name) \
- do { \
- var = (type)malloc(size, name, M_NOWAIT); \
- } while (0)
+ do { \
+ var = (type)malloc(size, name, M_NOWAIT); \
+ } while (0)
#define SCTP_FREE(var, type) free(var, type)
#define SCTP_MALLOC_SONAME(var, type, size) \
- do { \
- var = (type)malloc(size, M_SONAME, M_WAITOK | M_ZERO); \
- } while (0)
+ do { \
+ var = (type)malloc(size, M_SONAME, M_WAITOK | M_ZERO); \
+ } while (0)
#define SCTP_FREE_SONAME(var) free(var, M_SONAME)
@@ -335,11 +322,11 @@
/* MTU */
/*************************/
#define SCTP_GATHER_MTU_FROM_IFN_INFO(ifn, ifn_index, af) ((struct ifnet *)ifn)->if_mtu
-#define SCTP_GATHER_MTU_FROM_ROUTE(sctp_ifa, sa, rt) ((rt != NULL) ? rt->rt_rmx.rmx_mtu : 0)
+#define SCTP_GATHER_MTU_FROM_ROUTE(sctp_ifa, sa, rt) ((uint32_t)((rt != NULL) ? rt->rt_mtu : 0))
#define SCTP_GATHER_MTU_FROM_INTFC(sctp_ifn) ((sctp_ifn->ifn_p != NULL) ? ((struct ifnet *)(sctp_ifn->ifn_p))->if_mtu : 0)
#define SCTP_SET_MTU_OF_ROUTE(sa, rt, mtu) do { \
if (rt != NULL) \
- rt->rt_rmx.rmx_mtu = mtu; \
+ rt->rt_mtu = mtu; \
} while(0)
/* (de-)register interface event notifications */
@@ -366,7 +353,7 @@
*/
#define SCTP_HEADER_TO_CHAIN(m) (m)
#define SCTP_DETACH_HEADER_FROM_CHAIN(m)
-#define SCTP_HEADER_LEN(m) (m->m_pkthdr.len)
+#define SCTP_HEADER_LEN(m) ((m)->m_pkthdr.len)
#define SCTP_GET_HEADER_FOR_OUTPUT(o_pak) 0
#define SCTP_RELEASE_HEADER(m)
#define SCTP_RELEASE_PKT(m) sctp_m_freem(m)
@@ -395,10 +382,6 @@
* its a NOP.
*/
-/* Macro's for getting length from V6/V4 header */
-#define SCTP_GET_IPV4_LENGTH(iph) (iph->ip_len)
-#define SCTP_GET_IPV6_LENGTH(ip6) (ntohs(ip6->ip6_plen))
-
/* get the v6 hop limit */
#define SCTP_GET_HLIM(inp, ro) in6_selecthlim((struct in6pcb *)&inp->ip_inp.inp, (ro ? (ro->ro_rt ? (ro->ro_rt->rt_ifp) : (NULL)) : (NULL)));
@@ -429,13 +412,8 @@
typedef struct route sctp_route_t;
typedef struct rtentry sctp_rtentry_t;
-/*
- * XXX multi-FIB support was backed out in r179783 and it seems clear that the
- * VRF support as currently in FreeBSD is not ready to support multi-FIB.
- * It might be best to implement multi-FIB support for both v4 and v6 indepedent
- * of VRFs and leave those to a real MPLS stack.
- */
-#define SCTP_RTALLOC(ro, vrf_id) rtalloc_ign((struct route *)ro, 0UL)
+#define SCTP_RTALLOC(ro, vrf_id, fibnum) \
+ rtalloc_ign_fib((struct route *)ro, 0UL, fibnum)
/* Future zero copy wakeup/send function */
#define SCTP_ZERO_COPY_EVENT(inp, so)
@@ -443,6 +421,11 @@
#define SCTP_ZERO_COPY_SENDQ_EVENT(inp, so)
/*
+ * SCTP protocol specific mbuf flags.
+ */
+#define M_NOTIFICATION M_PROTO1 /* SCTP notification */
+
+/*
* IP output routines
*/
#define SCTP_IP_OUTPUT(result, o_pak, ro, stcb, vrf_id) \
@@ -453,6 +436,7 @@
local_stcb->sctp_ep && \
local_stcb->sctp_ep->sctp_socket) \
o_flgs |= local_stcb->sctp_ep->sctp_socket->so_options & SO_DONTROUTE; \
+ m_clrprotoflags(o_pak); \
result = ip_output(o_pak, NULL, ro, o_flgs, 0, NULL); \
}
@@ -459,6 +443,7 @@
#define SCTP_IP6_OUTPUT(result, o_pak, ro, ifp, stcb, vrf_id) \
{ \
struct sctp_tcb *local_stcb = stcb; \
+ m_clrprotoflags(o_pak); \
if (local_stcb && local_stcb->sctp_ep) \
result = ip6_output(o_pak, \
((struct in6pcb *)(local_stcb->sctp_ep))->in6p_outputopts, \
@@ -475,23 +460,18 @@
/*
* SCTP AUTH
*/
-#define HAVE_SHA2
-
#define SCTP_READ_RANDOM(buf, len) read_random(buf, len)
-#ifdef USE_SCTP_SHA1
-#include <netinet/sctp_sha1.h>
-#else
-#include <crypto/sha1.h>
/* map standard crypto API names */
-#define SHA1_Init SHA1Init
-#define SHA1_Update SHA1Update
-#define SHA1_Final(x,y) SHA1Final((caddr_t)x, y)
-#endif
+#define SCTP_SHA1_CTX SHA1_CTX
+#define SCTP_SHA1_INIT SHA1Init
+#define SCTP_SHA1_UPDATE SHA1Update
+#define SCTP_SHA1_FINAL(x,y) SHA1Final((caddr_t)x, y)
-#if defined(HAVE_SHA2)
-#include <crypto/sha2/sha2.h>
-#endif
+#define SCTP_SHA256_CTX SHA256_CTX
+#define SCTP_SHA256_INIT SHA256_Init
+#define SCTP_SHA256_UPDATE SHA256_Update
+#define SCTP_SHA256_FINAL(x,y) SHA256_Final((caddr_t)x, y)
#endif
Modified: trunk/sys/netinet/sctp_output.c
===================================================================
--- trunk/sys/netinet/sctp_output.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_output.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_output.c 240305 2012-09-10 11:38:02Z glebius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_output.c 296052 2016-02-25 18:46:06Z tuexen $");
#include <netinet/sctp_os.h>
#include <sys/proc.h>
@@ -51,7 +51,9 @@
#include <netinet/sctp_bsd_addr.h>
#include <netinet/sctp_input.h>
#include <netinet/sctp_crc32.h>
+#if defined(INET) || defined(INET6)
#include <netinet/udp.h>
+#endif
#include <netinet/udp_var.h>
#include <machine/in_cksum.h>
@@ -1864,15 +1866,10 @@
int
sctp_is_address_in_scope(struct sctp_ifa *ifa,
- int ipv4_addr_legal,
- int ipv6_addr_legal,
- int loopback_scope,
- int ipv4_local_scope,
- int local_scope SCTP_UNUSED,/* XXX */
- int site_scope,
+ struct sctp_scoping *scope,
int do_update)
{
- if ((loopback_scope == 0) &&
+ if ((scope->loopback_scope == 0) &&
(ifa->ifn_p) && SCTP_IFN_IS_IFT_LOOP(ifa->ifn_p)) {
/*
* skip loopback if not in scope *
@@ -1882,15 +1879,15 @@
switch (ifa->address.sa.sa_family) {
#ifdef INET
case AF_INET:
- if (ipv4_addr_legal) {
+ if (scope->ipv4_addr_legal) {
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)&ifa->address.sin;
+ sin = &ifa->address.sin;
if (sin->sin_addr.s_addr == 0) {
/* not in scope , unspecified */
return (0);
}
- if ((ipv4_local_scope == 0) &&
+ if ((scope->ipv4_local_scope == 0) &&
(IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) {
/* private address not in scope */
return (0);
@@ -1902,7 +1899,7 @@
#endif
#ifdef INET6
case AF_INET6:
- if (ipv6_addr_legal) {
+ if (scope->ipv6_addr_legal) {
struct sockaddr_in6 *sin6;
/*
@@ -1916,7 +1913,7 @@
return (0);
}
/* ok to use deprecated addresses? */
- sin6 = (struct sockaddr_in6 *)&ifa->address.sin6;
+ sin6 = &ifa->address.sin6;
if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
/* skip unspecifed addresses */
return (0);
@@ -1925,7 +1922,7 @@
(IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))) {
return (0);
}
- if ((site_scope == 0) &&
+ if ((scope->site_scope == 0) &&
(IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) {
return (0);
}
@@ -1941,27 +1938,31 @@
}
static struct mbuf *
-sctp_add_addr_to_mbuf(struct mbuf *m, struct sctp_ifa *ifa)
+sctp_add_addr_to_mbuf(struct mbuf *m, struct sctp_ifa *ifa, uint16_t * len)
{
+#if defined(INET) || defined(INET6)
struct sctp_paramhdr *parmh;
struct mbuf *mret;
- int len;
+ uint16_t plen;
+#endif
+
switch (ifa->address.sa.sa_family) {
#ifdef INET
case AF_INET:
- len = sizeof(struct sctp_ipv4addr_param);
+ plen = (uint16_t) sizeof(struct sctp_ipv4addr_param);
break;
#endif
#ifdef INET6
case AF_INET6:
- len = sizeof(struct sctp_ipv6addr_param);
+ plen = (uint16_t) sizeof(struct sctp_ipv6addr_param);
break;
#endif
default:
return (m);
}
- if (M_TRAILINGSPACE(m) >= len) {
+#if defined(INET) || defined(INET6)
+ if (M_TRAILINGSPACE(m) >= plen) {
/* easy side we just drop it on the end */
parmh = (struct sctp_paramhdr *)(SCTP_BUF_AT(m, SCTP_BUF_LEN(m)));
mret = m;
@@ -1971,7 +1972,7 @@
while (SCTP_BUF_NEXT(mret) != NULL) {
mret = SCTP_BUF_NEXT(mret);
}
- SCTP_BUF_NEXT(mret) = sctp_get_mbuf_for_msg(len, 0, M_DONTWAIT, 1, MT_DATA);
+ SCTP_BUF_NEXT(mret) = sctp_get_mbuf_for_msg(plen, 0, M_NOWAIT, 1, MT_DATA);
if (SCTP_BUF_NEXT(mret) == NULL) {
/* We are hosed, can't add more addresses */
return (m);
@@ -1987,12 +1988,12 @@
struct sctp_ipv4addr_param *ipv4p;
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)&ifa->address.sin;
+ sin = &ifa->address.sin;
ipv4p = (struct sctp_ipv4addr_param *)parmh;
parmh->param_type = htons(SCTP_IPV4_ADDRESS);
- parmh->param_length = htons(len);
+ parmh->param_length = htons(plen);
ipv4p->addr = sin->sin_addr.s_addr;
- SCTP_BUF_LEN(mret) += len;
+ SCTP_BUF_LEN(mret) += plen;
break;
}
#endif
@@ -2002,15 +2003,15 @@
struct sctp_ipv6addr_param *ipv6p;
struct sockaddr_in6 *sin6;
- sin6 = (struct sockaddr_in6 *)&ifa->address.sin6;
+ sin6 = &ifa->address.sin6;
ipv6p = (struct sctp_ipv6addr_param *)parmh;
parmh->param_type = htons(SCTP_IPV6_ADDRESS);
- parmh->param_length = htons(len);
+ parmh->param_length = htons(plen);
memcpy(ipv6p->addr, &sin6->sin6_addr,
sizeof(ipv6p->addr));
/* clear embedded scope in the address */
in6_clearscope((struct in6_addr *)ipv6p->addr);
- SCTP_BUF_LEN(mret) += len;
+ SCTP_BUF_LEN(mret) += plen;
break;
}
#endif
@@ -2017,7 +2018,11 @@
default:
return (m);
}
+ if (len != NULL) {
+ *len += plen;
+ }
return (mret);
+#endif
}
@@ -2024,7 +2029,8 @@
struct mbuf *
sctp_add_addresses_to_i_ia(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
struct sctp_scoping *scope,
- struct mbuf *m_at, int cnt_inits_to)
+ struct mbuf *m_at, int cnt_inits_to,
+ uint16_t * padding_len, uint16_t * chunk_len)
{
struct sctp_vrf *vrf = NULL;
int cnt, limit_out = 0, total_count;
@@ -2057,16 +2063,24 @@
continue;
}
LIST_FOREACH(sctp_ifap, &sctp_ifnp->ifalist, next_ifa) {
+#ifdef INET
+ if ((sctp_ifap->address.sa.sa_family == AF_INET) &&
+ (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+ &sctp_ifap->address.sin.sin_addr) != 0)) {
+ continue;
+ }
+#endif
+#ifdef INET6
+ if ((sctp_ifap->address.sa.sa_family == AF_INET6) &&
+ (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+ &sctp_ifap->address.sin6.sin6_addr) != 0)) {
+ continue;
+ }
+#endif
if (sctp_is_addr_restricted(stcb, sctp_ifap)) {
continue;
}
- if (sctp_is_address_in_scope(sctp_ifap,
- scope->ipv4_addr_legal,
- scope->ipv6_addr_legal,
- scope->loopback_scope,
- scope->ipv4_local_scope,
- scope->local_scope,
- scope->site_scope, 1) == 0) {
+ if (sctp_is_address_in_scope(sctp_ifap, scope, 1) == 0) {
continue;
}
cnt++;
@@ -2092,19 +2106,36 @@
continue;
}
LIST_FOREACH(sctp_ifap, &sctp_ifnp->ifalist, next_ifa) {
+#ifdef INET
+ if ((sctp_ifap->address.sa.sa_family == AF_INET) &&
+ (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+ &sctp_ifap->address.sin.sin_addr) != 0)) {
+ continue;
+ }
+#endif
+#ifdef INET6
+ if ((sctp_ifap->address.sa.sa_family == AF_INET6) &&
+ (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+ &sctp_ifap->address.sin6.sin6_addr) != 0)) {
+ continue;
+ }
+#endif
if (sctp_is_addr_restricted(stcb, sctp_ifap)) {
continue;
}
if (sctp_is_address_in_scope(sctp_ifap,
- scope->ipv4_addr_legal,
- scope->ipv6_addr_legal,
- scope->loopback_scope,
- scope->ipv4_local_scope,
- scope->local_scope,
- scope->site_scope, 0) == 0) {
+ scope, 0) == 0) {
continue;
}
- m_at = sctp_add_addr_to_mbuf(m_at, sctp_ifap);
+ if ((chunk_len != NULL) &&
+ (padding_len != NULL) &&
+ (*padding_len > 0)) {
+ memset(mtod(m_at, caddr_t)+*chunk_len, 0, *padding_len);
+ SCTP_BUF_LEN(m_at) += *padding_len;
+ *chunk_len += *padding_len;
+ *padding_len = 0;
+ }
+ m_at = sctp_add_addr_to_mbuf(m_at, sctp_ifap, chunk_len);
if (limit_out) {
cnt++;
total_count++;
@@ -2146,12 +2177,7 @@
continue;
}
if (sctp_is_address_in_scope(laddr->ifa,
- scope->ipv4_addr_legal,
- scope->ipv6_addr_legal,
- scope->loopback_scope,
- scope->ipv4_local_scope,
- scope->local_scope,
- scope->site_scope, 1) == 0) {
+ scope, 1) == 0) {
continue;
}
cnt++;
@@ -2171,15 +2197,18 @@
continue;
}
if (sctp_is_address_in_scope(laddr->ifa,
- scope->ipv4_addr_legal,
- scope->ipv6_addr_legal,
- scope->loopback_scope,
- scope->ipv4_local_scope,
- scope->local_scope,
- scope->site_scope, 0) == 0) {
+ scope, 0) == 0) {
continue;
}
- m_at = sctp_add_addr_to_mbuf(m_at, laddr->ifa);
+ if ((chunk_len != NULL) &&
+ (padding_len != NULL) &&
+ (*padding_len > 0)) {
+ memset(mtod(m_at, caddr_t)+*chunk_len, 0, *padding_len);
+ SCTP_BUF_LEN(m_at) += *padding_len;
+ *chunk_len += *padding_len;
+ *padding_len = 0;
+ }
+ m_at = sctp_add_addr_to_mbuf(m_at, laddr->ifa, chunk_len);
cnt++;
if (cnt >= SCTP_ADDRESS_LIMIT) {
break;
@@ -2389,7 +2418,7 @@
LIST_FOREACH(laddr, &stcb->asoc.sctp_restricted_addrs, sctp_nxt_addr) {
if (laddr->ifa == NULL) {
SCTPDBG(SCTP_DEBUG_OUTPUT1, "%s: NULL ifa\n",
- __FUNCTION__);
+ __func__);
continue;
}
if (laddr->ifa == ifa) {
@@ -2411,7 +2440,7 @@
LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
if (laddr->ifa == NULL) {
SCTPDBG(SCTP_DEBUG_OUTPUT1, "%s: NULL ifa\n",
- __FUNCTION__);
+ __func__);
continue;
}
if ((laddr->ifa == ifa) && laddr->action == 0)
@@ -2455,6 +2484,20 @@
if (sctp_ifn) {
/* is a preferred one on the interface we route out? */
LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+#ifdef INET
+ if ((sctp_ifa->address.sa.sa_family == AF_INET) &&
+ (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+ &sctp_ifa->address.sin.sin_addr) != 0)) {
+ continue;
+ }
+#endif
+#ifdef INET6
+ if ((sctp_ifa->address.sa.sa_family == AF_INET6) &&
+ (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+ &sctp_ifa->address.sin6.sin6_addr) != 0)) {
+ continue;
+ }
+#endif
if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
(non_asoc_addr_ok == 0))
continue;
@@ -2578,6 +2621,20 @@
if (sctp_ifn) {
/* first try for a preferred address on the ep */
LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+#ifdef INET
+ if ((sctp_ifa->address.sa.sa_family == AF_INET) &&
+ (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+ &sctp_ifa->address.sin.sin_addr) != 0)) {
+ continue;
+ }
+#endif
+#ifdef INET6
+ if ((sctp_ifa->address.sa.sa_family == AF_INET6) &&
+ (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+ &sctp_ifa->address.sin6.sin6_addr) != 0)) {
+ continue;
+ }
+#endif
if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0))
continue;
if (sctp_is_addr_in_ep(inp, sctp_ifa)) {
@@ -2598,6 +2655,20 @@
}
/* next try for an acceptable address on the ep */
LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+#ifdef INET
+ if ((sctp_ifa->address.sa.sa_family == AF_INET) &&
+ (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+ &sctp_ifa->address.sin.sin_addr) != 0)) {
+ continue;
+ }
+#endif
+#ifdef INET6
+ if ((sctp_ifa->address.sa.sa_family == AF_INET6) &&
+ (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+ &sctp_ifa->address.sin6.sin6_addr) != 0)) {
+ continue;
+ }
+#endif
if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0))
continue;
if (sctp_is_addr_in_ep(inp, sctp_ifa)) {
@@ -2702,6 +2773,7 @@
static struct sctp_ifa *
sctp_select_nth_preferred_addr_from_ifn_boundall(struct sctp_ifn *ifn,
+ struct sctp_inpcb *inp,
struct sctp_tcb *stcb,
int non_asoc_addr_ok,
uint8_t dest_is_loop,
@@ -2723,6 +2795,20 @@
}
#endif /* INET6 */
LIST_FOREACH(ifa, &ifn->ifalist, next_ifa) {
+#ifdef INET
+ if ((ifa->address.sa.sa_family == AF_INET) &&
+ (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+ &ifa->address.sin.sin_addr) != 0)) {
+ continue;
+ }
+#endif
+#ifdef INET6
+ if ((ifa->address.sa.sa_family == AF_INET6) &&
+ (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+ &ifa->address.sin6.sin6_addr) != 0)) {
+ continue;
+ }
+#endif
if ((ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
(non_asoc_addr_ok == 0))
continue;
@@ -2782,13 +2868,7 @@
}
#endif
if (stcb) {
- if (sctp_is_address_in_scope(ifa,
- stcb->asoc.ipv4_addr_legal,
- stcb->asoc.ipv6_addr_legal,
- stcb->asoc.loopback_scope,
- stcb->asoc.ipv4_local_scope,
- stcb->asoc.local_scope,
- stcb->asoc.site_scope, 0) == 0) {
+ if (sctp_is_address_in_scope(ifa, &stcb->asoc.scope, 0) == 0) {
continue;
}
if (((non_asoc_addr_ok == 0) &&
@@ -2814,6 +2894,7 @@
static int
sctp_count_num_preferred_boundall(struct sctp_ifn *ifn,
+ struct sctp_inpcb *inp,
struct sctp_tcb *stcb,
int non_asoc_addr_ok,
uint8_t dest_is_loop,
@@ -2824,6 +2905,21 @@
int num_eligible_addr = 0;
LIST_FOREACH(ifa, &ifn->ifalist, next_ifa) {
+#ifdef INET
+ if ((ifa->address.sa.sa_family == AF_INET) &&
+ (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+ &ifa->address.sin.sin_addr) != 0)) {
+ continue;
+ }
+#endif
+#ifdef INET6
+ if ((ifa->address.sa.sa_family == AF_INET6) &&
+ (stcb != NULL) &&
+ (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+ &ifa->address.sin6.sin6_addr) != 0)) {
+ continue;
+ }
+#endif
if ((ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
(non_asoc_addr_ok == 0)) {
continue;
@@ -2834,13 +2930,7 @@
continue;
}
if (stcb) {
- if (sctp_is_address_in_scope(ifa,
- stcb->asoc.ipv4_addr_legal,
- stcb->asoc.ipv6_addr_legal,
- stcb->asoc.loopback_scope,
- stcb->asoc.ipv4_local_scope,
- stcb->asoc.local_scope,
- stcb->asoc.site_scope, 0) == 0) {
+ if (sctp_is_address_in_scope(ifa, &stcb->asoc.scope, 0) == 0) {
continue;
}
if (((non_asoc_addr_ok == 0) &&
@@ -2861,7 +2951,8 @@
}
static struct sctp_ifa *
-sctp_choose_boundall(struct sctp_tcb *stcb,
+sctp_choose_boundall(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb,
struct sctp_nets *net,
sctp_route_t * ro,
uint32_t vrf_id,
@@ -2916,7 +3007,7 @@
cur_addr_num = net->indx_of_eligible_next_to_use;
}
num_preferred = sctp_count_num_preferred_boundall(sctp_ifn,
- stcb,
+ inp, stcb,
non_asoc_addr_ok,
dest_is_loop,
dest_is_priv, fam);
@@ -2943,7 +3034,7 @@
*/
SCTPDBG(SCTP_DEBUG_OUTPUT2, "cur_addr_num:%d\n", cur_addr_num);
- sctp_ifa = sctp_select_nth_preferred_addr_from_ifn_boundall(sctp_ifn, stcb, non_asoc_addr_ok, dest_is_loop,
+ sctp_ifa = sctp_select_nth_preferred_addr_from_ifn_boundall(sctp_ifn, inp, stcb, non_asoc_addr_ok, dest_is_loop,
dest_is_priv, cur_addr_num, fam, ro);
/* if sctp_ifa is NULL something changed??, fall to plan b. */
@@ -2974,7 +3065,7 @@
SCTPDBG(SCTP_DEBUG_OUTPUT2, "already seen\n");
continue;
}
- num_preferred = sctp_count_num_preferred_boundall(sctp_ifn, stcb, non_asoc_addr_ok,
+ num_preferred = sctp_count_num_preferred_boundall(sctp_ifn, inp, stcb, non_asoc_addr_ok,
dest_is_loop, dest_is_priv, fam);
SCTPDBG(SCTP_DEBUG_OUTPUT2,
"Found ifn:%p %d preferred source addresses\n",
@@ -2986,7 +3077,7 @@
}
SCTPDBG(SCTP_DEBUG_OUTPUT2,
"num preferred:%d on interface:%p cur_addr_num:%d\n",
- num_preferred, sctp_ifn, cur_addr_num);
+ num_preferred, (void *)sctp_ifn, cur_addr_num);
/*
* Ok we have num_eligible_addr set with how many we can
@@ -2996,7 +3087,7 @@
if (cur_addr_num >= num_preferred) {
cur_addr_num = 0;
}
- sifa = sctp_select_nth_preferred_addr_from_ifn_boundall(sctp_ifn, stcb, non_asoc_addr_ok, dest_is_loop,
+ sifa = sctp_select_nth_preferred_addr_from_ifn_boundall(sctp_ifn, inp, stcb, non_asoc_addr_ok, dest_is_loop,
dest_is_priv, cur_addr_num, fam, ro);
if (sifa == NULL)
continue;
@@ -3023,7 +3114,23 @@
goto plan_d;
}
LIST_FOREACH(sctp_ifa, &emit_ifn->ifalist, next_ifa) {
- SCTPDBG(SCTP_DEBUG_OUTPUT2, "ifa:%p\n", sctp_ifa);
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "ifa:%p\n", (void *)sctp_ifa);
+#ifdef INET
+ if ((sctp_ifa->address.sa.sa_family == AF_INET) &&
+ (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+ &sctp_ifa->address.sin.sin_addr) != 0)) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "Jailed\n");
+ continue;
+ }
+#endif
+#ifdef INET6
+ if ((sctp_ifa->address.sa.sa_family == AF_INET6) &&
+ (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+ &sctp_ifa->address.sin6.sin6_addr) != 0)) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "Jailed\n");
+ continue;
+ }
+#endif
if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
(non_asoc_addr_ok == 0)) {
SCTPDBG(SCTP_DEBUG_OUTPUT2, "Defer\n");
@@ -3036,13 +3143,7 @@
continue;
}
if (stcb) {
- if (sctp_is_address_in_scope(sifa,
- stcb->asoc.ipv4_addr_legal,
- stcb->asoc.ipv6_addr_legal,
- stcb->asoc.loopback_scope,
- stcb->asoc.ipv4_local_scope,
- stcb->asoc.local_scope,
- stcb->asoc.site_scope, 0) == 0) {
+ if (sctp_is_address_in_scope(sifa, &stcb->asoc.scope, 0) == 0) {
SCTPDBG(SCTP_DEBUG_OUTPUT2, "NOT in scope\n");
sifa = NULL;
continue;
@@ -3060,8 +3161,6 @@
sifa = NULL;
continue;
}
- } else {
- SCTP_PRINTF("Stcb is null - no print\n");
}
atomic_add_int(&sifa->refcount, 1);
goto out;
@@ -3073,7 +3172,7 @@
* out and see if we can find an acceptable address somewhere
* amongst all interfaces.
*/
- SCTPDBG(SCTP_DEBUG_OUTPUT2, "Trying Plan D looked_at is %p\n", looked_at);
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "Trying Plan D looked_at is %p\n", (void *)looked_at);
LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
if (dest_is_loop == 0 && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
/* wrong base scope */
@@ -3080,6 +3179,20 @@
continue;
}
LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+#ifdef INET
+ if ((sctp_ifa->address.sa.sa_family == AF_INET) &&
+ (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+ &sctp_ifa->address.sin.sin_addr) != 0)) {
+ continue;
+ }
+#endif
+#ifdef INET6
+ if ((sctp_ifa->address.sa.sa_family == AF_INET6) &&
+ (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+ &sctp_ifa->address.sin6.sin6_addr) != 0)) {
+ continue;
+ }
+#endif
if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
(non_asoc_addr_ok == 0))
continue;
@@ -3089,13 +3202,7 @@
if (sifa == NULL)
continue;
if (stcb) {
- if (sctp_is_address_in_scope(sifa,
- stcb->asoc.ipv4_addr_legal,
- stcb->asoc.ipv6_addr_legal,
- stcb->asoc.loopback_scope,
- stcb->asoc.ipv4_local_scope,
- stcb->asoc.local_scope,
- stcb->asoc.site_scope, 0) == 0) {
+ if (sctp_is_address_in_scope(sifa, &stcb->asoc.scope, 0) == 0) {
sifa = NULL;
continue;
}
@@ -3116,12 +3223,14 @@
}
}
#ifdef INET
- if ((retried == 0) && (stcb->asoc.ipv4_local_scope == 0)) {
- stcb->asoc.ipv4_local_scope = 1;
- retried = 1;
- goto again_with_private_addresses_allowed;
- } else if (retried == 1) {
- stcb->asoc.ipv4_local_scope = 0;
+ if (stcb) {
+ if ((retried == 0) && (stcb->asoc.scope.ipv4_local_scope == 0)) {
+ stcb->asoc.scope.ipv4_local_scope = 1;
+ retried = 1;
+ goto again_with_private_addresses_allowed;
+ } else if (retried == 1) {
+ stcb->asoc.scope.ipv4_local_scope = 0;
+ }
}
#endif
out:
@@ -3136,6 +3245,20 @@
LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
struct sctp_ifa *tmp_sifa;
+#ifdef INET
+ if ((sctp_ifa->address.sa.sa_family == AF_INET) &&
+ (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+ &sctp_ifa->address.sin.sin_addr) != 0)) {
+ continue;
+ }
+#endif
+#ifdef INET6
+ if ((sctp_ifa->address.sa.sa_family == AF_INET6) &&
+ (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+ &sctp_ifa->address.sin6.sin6_addr) != 0)) {
+ continue;
+ }
+#endif
if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
(non_asoc_addr_ok == 0))
continue;
@@ -3150,12 +3273,7 @@
}
if (stcb) {
if (sctp_is_address_in_scope(tmp_sifa,
- stcb->asoc.ipv4_addr_legal,
- stcb->asoc.ipv6_addr_legal,
- stcb->asoc.loopback_scope,
- stcb->asoc.ipv4_local_scope,
- stcb->asoc.local_scope,
- stcb->asoc.site_scope, 0) == 0) {
+ &stcb->asoc.scope, 0) == 0) {
continue;
}
if (((non_asoc_addr_ok == 0) &&
@@ -3274,7 +3392,7 @@
/*
* Need a route to cache.
*/
- SCTP_RTALLOC(ro, vrf_id);
+ SCTP_RTALLOC(ro, vrf_id, inp->fibnum);
}
if (ro->ro_rt == NULL) {
return (NULL);
@@ -3326,7 +3444,7 @@
/*
* Bound all case
*/
- answer = sctp_choose_boundall(stcb, net, ro, vrf_id,
+ answer = sctp_choose_boundall(inp, stcb, net, ro, vrf_id,
dest_is_priv, dest_is_loop,
non_asoc_addr_ok, fam);
SCTP_IPI_ADDR_RUNLOCK();
@@ -3372,7 +3490,7 @@
return (found);
}
m_copydata(control, at, sizeof(cmh), (caddr_t)&cmh);
- if (cmh.cmsg_len < CMSG_ALIGN(sizeof(struct cmsghdr))) {
+ if (cmh.cmsg_len < CMSG_ALIGN(sizeof(cmh))) {
/* We dont't have a complete CMSG header. */
return (found);
}
@@ -3387,11 +3505,11 @@
(cmh.cmsg_type == SCTP_PRINFO) ||
(cmh.cmsg_type == SCTP_AUTHINFO))))) {
if (c_type == cmh.cmsg_type) {
- if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < cpsize) {
+ if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < cpsize) {
return (found);
}
/* It is exactly what we want. Copy it out. */
- m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), cpsize, (caddr_t)data);
+ m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), cpsize, (caddr_t)data);
return (1);
} else {
struct sctp_sndrcvinfo *sndrcvinfo;
@@ -3405,10 +3523,10 @@
}
switch (cmh.cmsg_type) {
case SCTP_SNDINFO:
- if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct sctp_sndinfo)) {
+ if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct sctp_sndinfo)) {
return (found);
}
- m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct sctp_sndinfo), (caddr_t)&sndinfo);
+ m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct sctp_sndinfo), (caddr_t)&sndinfo);
sndrcvinfo->sinfo_stream = sndinfo.snd_sid;
sndrcvinfo->sinfo_flags = sndinfo.snd_flags;
sndrcvinfo->sinfo_ppid = sndinfo.snd_ppid;
@@ -3416,18 +3534,22 @@
sndrcvinfo->sinfo_assoc_id = sndinfo.snd_assoc_id;
break;
case SCTP_PRINFO:
- if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct sctp_prinfo)) {
+ if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct sctp_prinfo)) {
return (found);
}
- m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct sctp_prinfo), (caddr_t)&prinfo);
- sndrcvinfo->sinfo_timetolive = prinfo.pr_value;
+ m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct sctp_prinfo), (caddr_t)&prinfo);
+ if (prinfo.pr_policy != SCTP_PR_SCTP_NONE) {
+ sndrcvinfo->sinfo_timetolive = prinfo.pr_value;
+ } else {
+ sndrcvinfo->sinfo_timetolive = 0;
+ }
sndrcvinfo->sinfo_flags |= prinfo.pr_policy;
break;
case SCTP_AUTHINFO:
- if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct sctp_authinfo)) {
+ if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct sctp_authinfo)) {
return (found);
}
- m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct sctp_authinfo), (caddr_t)&authinfo);
+ m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct sctp_authinfo), (caddr_t)&authinfo);
sndrcvinfo->sinfo_keynumber_valid = 1;
sndrcvinfo->sinfo_keynumber = authinfo.auth_keynumber;
break;
@@ -3467,7 +3589,7 @@
return (1);
}
m_copydata(control, at, sizeof(cmh), (caddr_t)&cmh);
- if (cmh.cmsg_len < CMSG_ALIGN(sizeof(struct cmsghdr))) {
+ if (cmh.cmsg_len < CMSG_ALIGN(sizeof(cmh))) {
/* We dont't have a complete CMSG header. */
*error = EINVAL;
return (1);
@@ -3480,11 +3602,11 @@
if (cmh.cmsg_level == IPPROTO_SCTP) {
switch (cmh.cmsg_type) {
case SCTP_INIT:
- if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct sctp_initmsg)) {
+ if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct sctp_initmsg)) {
*error = EINVAL;
return (1);
}
- m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct sctp_initmsg), (caddr_t)&initmsg);
+ m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct sctp_initmsg), (caddr_t)&initmsg);
if (initmsg.sinit_max_attempts)
stcb->asoc.max_init_times = initmsg.sinit_max_attempts;
if (initmsg.sinit_num_ostreams)
@@ -3497,6 +3619,11 @@
struct sctp_stream_out *tmp_str;
unsigned int i;
+#if defined(SCTP_DETAILED_STR_STATS)
+ int j;
+
+#endif
+
/* Default is NOT correct */
SCTPDBG(SCTP_DEBUG_OUTPUT1, "Ok, default:%d pre_open:%d\n",
stcb->asoc.streamoutcnt, stcb->asoc.pre_open_streams);
@@ -3514,10 +3641,21 @@
stcb->asoc.pre_open_streams = stcb->asoc.streamoutcnt;
}
for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
- stcb->asoc.strmout[i].next_sequence_sent = 0;
TAILQ_INIT(&stcb->asoc.strmout[i].outqueue);
+ stcb->asoc.strmout[i].chunks_on_queues = 0;
+ stcb->asoc.strmout[i].next_sequence_send = 0;
+#if defined(SCTP_DETAILED_STR_STATS)
+ for (j = 0; j < SCTP_PR_SCTP_MAX + 1; j++) {
+ stcb->asoc.strmout[i].abandoned_sent[j] = 0;
+ stcb->asoc.strmout[i].abandoned_unsent[j] = 0;
+ }
+#else
+ stcb->asoc.strmout[i].abandoned_sent[0] = 0;
+ stcb->asoc.strmout[i].abandoned_unsent[0] = 0;
+#endif
stcb->asoc.strmout[i].stream_no = i;
stcb->asoc.strmout[i].last_msg_incomplete = 0;
+ stcb->asoc.strmout[i].state = SCTP_STREAM_OPENING;
stcb->asoc.ss_functions.sctp_ss_init_stream(&stcb->asoc.strmout[i], NULL);
}
}
@@ -3524,7 +3662,7 @@
break;
#ifdef INET
case SCTP_DSTADDRV4:
- if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct in_addr)) {
+ if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct in_addr)) {
*error = EINVAL;
return (1);
}
@@ -3532,7 +3670,7 @@
sin.sin_family = AF_INET;
sin.sin_len = sizeof(struct sockaddr_in);
sin.sin_port = stcb->rport;
- m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct in_addr), (caddr_t)&sin.sin_addr);
+ m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct in_addr), (caddr_t)&sin.sin_addr);
if ((sin.sin_addr.s_addr == INADDR_ANY) ||
(sin.sin_addr.s_addr == INADDR_BROADCAST) ||
IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
@@ -3548,7 +3686,7 @@
#endif
#ifdef INET6
case SCTP_DSTADDRV6:
- if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct in6_addr)) {
+ if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct in6_addr)) {
*error = EINVAL;
return (1);
}
@@ -3556,7 +3694,7 @@
sin6.sin6_family = AF_INET6;
sin6.sin6_len = sizeof(struct sockaddr_in6);
sin6.sin6_port = stcb->rport;
- m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct in6_addr), (caddr_t)&sin6.sin6_addr);
+ m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct in6_addr), (caddr_t)&sin6.sin6_addr);
if (IN6_IS_ADDR_UNSPECIFIED(&sin6.sin6_addr) ||
IN6_IS_ADDR_MULTICAST(&sin6.sin6_addr)) {
*error = EINVAL;
@@ -3596,7 +3734,7 @@
static struct sctp_tcb *
sctp_findassociation_cmsgs(struct sctp_inpcb **inp_p,
- in_port_t port,
+ uint16_t port,
struct mbuf *control,
struct sctp_nets **net_p,
int *error)
@@ -3624,7 +3762,7 @@
return (NULL);
}
m_copydata(control, at, sizeof(cmh), (caddr_t)&cmh);
- if (cmh.cmsg_len < CMSG_ALIGN(sizeof(struct cmsghdr))) {
+ if (cmh.cmsg_len < CMSG_ALIGN(sizeof(cmh))) {
/* We dont't have a complete CMSG header. */
*error = EINVAL;
return (NULL);
@@ -3638,7 +3776,7 @@
switch (cmh.cmsg_type) {
#ifdef INET
case SCTP_DSTADDRV4:
- if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct in_addr)) {
+ if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct in_addr)) {
*error = EINVAL;
return (NULL);
}
@@ -3646,13 +3784,13 @@
sin.sin_family = AF_INET;
sin.sin_len = sizeof(struct sockaddr_in);
sin.sin_port = port;
- m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct in_addr), (caddr_t)&sin.sin_addr);
+ m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct in_addr), (caddr_t)&sin.sin_addr);
addr = (struct sockaddr *)&sin;
break;
#endif
#ifdef INET6
case SCTP_DSTADDRV6:
- if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct in6_addr)) {
+ if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct in6_addr)) {
*error = EINVAL;
return (NULL);
}
@@ -3660,7 +3798,7 @@
sin6.sin6_family = AF_INET6;
sin6.sin6_len = sizeof(struct sockaddr_in6);
sin6.sin6_port = port;
- m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct in6_addr), (caddr_t)&sin6.sin6_addr);
+ m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct in6_addr), (caddr_t)&sin6.sin6_addr);
#ifdef INET
if (IN6_IS_ADDR_V4MAPPED(&sin6.sin6_addr)) {
in6_sin6_2_sin(&sin, &sin6);
@@ -3697,14 +3835,13 @@
int sig_offset;
uint16_t cookie_sz;
- mret = NULL;
mret = sctp_get_mbuf_for_msg((sizeof(struct sctp_state_cookie) +
sizeof(struct sctp_paramhdr)), 0,
- M_DONTWAIT, 1, MT_DATA);
+ M_NOWAIT, 1, MT_DATA);
if (mret == NULL) {
return (NULL);
}
- copy_init = SCTP_M_COPYM(init, init_offset, M_COPYALL, M_DONTWAIT);
+ copy_init = SCTP_M_COPYM(init, init_offset, M_COPYALL, M_NOWAIT);
if (copy_init == NULL) {
sctp_m_freem(mret);
return (NULL);
@@ -3711,17 +3848,11 @@
}
#ifdef SCTP_MBUF_LOGGING
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- struct mbuf *mat;
-
- for (mat = copy_init; mat; mat = SCTP_BUF_NEXT(mat)) {
- if (SCTP_BUF_IS_EXTENDED(mat)) {
- sctp_log_mb(mat, SCTP_MBUF_ICOPY);
- }
- }
+ sctp_log_mbc(copy_init, SCTP_MBUF_ICOPY);
}
#endif
copy_initack = SCTP_M_COPYM(initack, initack_offset, M_COPYALL,
- M_DONTWAIT);
+ M_NOWAIT);
if (copy_initack == NULL) {
sctp_m_freem(mret);
sctp_m_freem(copy_init);
@@ -3729,13 +3860,7 @@
}
#ifdef SCTP_MBUF_LOGGING
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- struct mbuf *mat;
-
- for (mat = copy_initack; mat; mat = SCTP_BUF_NEXT(mat)) {
- if (SCTP_BUF_IS_EXTENDED(mat)) {
- sctp_log_mb(mat, SCTP_MBUF_ICOPY);
- }
- }
+ sctp_log_mbc(copy_initack, SCTP_MBUF_ICOPY);
}
#endif
/* easy side we just drop it on the end */
@@ -3771,7 +3896,7 @@
break;
}
}
- sig = sctp_get_mbuf_for_msg(SCTP_SECRET_SIZE, 0, M_DONTWAIT, 1, MT_DATA);
+ sig = sctp_get_mbuf_for_msg(SCTP_SECRET_SIZE, 0, M_NOWAIT, 1, MT_DATA);
if (sig == NULL) {
/* no space, so free the entire chain */
sctp_m_freem(mret);
@@ -3793,7 +3918,7 @@
static uint8_t
sctp_get_ect(struct sctp_tcb *stcb)
{
- if ((stcb != NULL) && (stcb->asoc.ecn_allowed == 1)) {
+ if ((stcb != NULL) && (stcb->asoc.ecn_supported == 1)) {
return (SCTP_ECT0_BIT);
} else {
return (0);
@@ -3813,7 +3938,7 @@
SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT1, &net->ro._l_addr.sa);
if (net->dest_state & SCTP_ADDR_CONFIRMED) {
if ((net->dest_state & SCTP_ADDR_REACHABLE) && stcb) {
- SCTPDBG(SCTP_DEBUG_OUTPUT1, "no route takes interface %p down\n", net);
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "no route takes interface %p down\n", (void *)net);
sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN,
stcb, 0,
(void *)net,
@@ -3864,7 +3989,7 @@
uint32_t v_tag,
uint16_t port,
union sctp_sockstore *over_addr,
- uint8_t use_mflowid, uint32_t mflowid,
+ uint8_t mflowtype, uint32_t mflowid,
#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
int so_locked SCTP_UNUSED
#else
@@ -3890,8 +4015,11 @@
struct sctphdr *sctphdr;
int packet_length;
int ret;
+
+#if defined(INET) || defined(INET6)
uint32_t vrf_id;
+#endif
#if defined(INET) || defined(INET6)
struct mbuf *o_pak;
sctp_route_t *ro = NULL;
@@ -3900,7 +4028,7 @@
#endif
uint8_t tos_value;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so = NULL;
#endif
@@ -3910,12 +4038,13 @@
sctp_m_freem(m);
return (EFAULT);
}
+#if defined(INET) || defined(INET6)
if (stcb) {
vrf_id = stcb->asoc.vrf_id;
} else {
vrf_id = inp->def_vrf_id;
}
-
+#endif
/* fill in the HMAC digest for any AUTH chunk in the packet */
if ((auth != NULL) && (stcb != NULL)) {
sctp_fill_hmac_digest_m(m, auth_offset, auth, stcb, auth_keyid);
@@ -3936,11 +4065,11 @@
sctp_route_t iproute;
int len;
- len = sizeof(struct ip) + sizeof(struct sctphdr);
+ len = SCTP_MIN_V4_OVERHEAD;
if (port) {
len += sizeof(struct udphdr);
}
- newm = sctp_get_mbuf_for_msg(len, 1, M_DONTWAIT, 1, MT_DATA);
+ newm = sctp_get_mbuf_for_msg(len, 1, M_NOWAIT, 1, MT_DATA);
if (newm == NULL) {
sctp_m_freem(m);
SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
@@ -3951,18 +4080,11 @@
SCTP_BUF_NEXT(newm) = m;
m = newm;
if (net != NULL) {
-#ifdef INVARIANTS
- if (net->flowidset == 0) {
- panic("Flow ID not set");
- }
-#endif
m->m_pkthdr.flowid = net->flowid;
- m->m_flags |= M_FLOWID;
+ M_HASHTYPE_SET(m, net->flowtype);
} else {
- if (use_mflowid != 0) {
- m->m_pkthdr.flowid = mflowid;
- m->m_flags |= M_FLOWID;
- }
+ m->m_pkthdr.flowid = mflowid;
+ M_HASHTYPE_SET(m, mflowtype);
}
packet_length = sctp_calculate_len(m);
ip = mtod(m, struct ip *);
@@ -3981,15 +4103,15 @@
tos_value |= sctp_get_ect(stcb);
}
if ((nofragment_flag) && (port == 0)) {
- ip->ip_off = IP_DF;
- } else
- ip->ip_off = 0;
-
+ ip->ip_off = htons(IP_DF);
+ } else {
+ ip->ip_off = htons(0);
+ }
/* FreeBSD has a function for ip_id's */
ip->ip_id = ip_newid();
ip->ip_ttl = inp->ip_inp.inp.inp_ip_ttl;
- ip->ip_len = packet_length;
+ ip->ip_len = htons(packet_length);
ip->ip_tos = tos_value;
if (port) {
ip->ip_p = IPPROTO_UDP;
@@ -4052,7 +4174,7 @@
sctp_free_ifa(_lsrc);
} else {
ip->ip_src = over_addr->sin.sin_addr;
- SCTP_RTALLOC(ro, vrf_id);
+ SCTP_RTALLOC(ro, vrf_id, inp->fibnum);
}
}
if (port) {
@@ -4107,7 +4229,7 @@
SCTPDBG(SCTP_DEBUG_OUTPUT3, "Destination is %x\n",
(uint32_t) (ntohl(ip->ip_dst.s_addr)));
SCTPDBG(SCTP_DEBUG_OUTPUT3, "RTP route is %p through\n",
- ro->ro_rt);
+ (void *)ro->ro_rt);
if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
/* failed to prepend data, give up */
@@ -4115,10 +4237,6 @@
sctp_m_freem(m);
return (ENOMEM);
}
-#ifdef SCTP_PACKET_LOGGING
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
- sctp_packet_log(m, packet_length);
-#endif
SCTP_ATTACH_CHAIN(o_pak, m, packet_length);
if (port) {
#if defined(SCTP_WITH_NO_CSUM)
@@ -4135,12 +4253,16 @@
SCTP_STAT_INCR(sctps_sendnocrc);
#else
m->m_pkthdr.csum_flags = CSUM_SCTP;
- m->m_pkthdr.csum_data = 0;
+ m->m_pkthdr.csum_data = offsetof(struct sctphdr, checksum);
SCTP_STAT_INCR(sctps_sendhwcrc);
#endif
}
+#ifdef SCTP_PACKET_LOGGING
+ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
+ sctp_packet_log(o_pak);
+#endif
/* send it out. table id is taken from stcb */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) {
so = SCTP_INP_SO(inp);
SCTP_SOCKET_UNLOCK(so, 0);
@@ -4147,7 +4269,7 @@
}
#endif
SCTP_IP_OUTPUT(ret, o_pak, ro, stcb, vrf_id);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) {
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -4225,11 +4347,11 @@
flowlabel = ntohl(((struct in6pcb *)inp)->in6p_flowinfo);
}
flowlabel &= 0x000fffff;
- len = sizeof(struct ip6_hdr) + sizeof(struct sctphdr);
+ len = SCTP_MIN_OVERHEAD;
if (port) {
len += sizeof(struct udphdr);
}
- newm = sctp_get_mbuf_for_msg(len, 1, M_DONTWAIT, 1, MT_DATA);
+ newm = sctp_get_mbuf_for_msg(len, 1, M_NOWAIT, 1, MT_DATA);
if (newm == NULL) {
sctp_m_freem(m);
SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
@@ -4240,18 +4362,11 @@
SCTP_BUF_NEXT(newm) = m;
m = newm;
if (net != NULL) {
-#ifdef INVARIANTS
- if (net->flowidset == 0) {
- panic("Flow ID not set");
- }
-#endif
m->m_pkthdr.flowid = net->flowid;
- m->m_flags |= M_FLOWID;
+ M_HASHTYPE_SET(m, net->flowtype);
} else {
- if (use_mflowid != 0) {
- m->m_pkthdr.flowid = mflowid;
- m->m_flags |= M_FLOWID;
- }
+ m->m_pkthdr.flowid = mflowid;
+ M_HASHTYPE_SET(m, mflowtype);
}
packet_length = sctp_calculate_len(m);
@@ -4373,7 +4488,7 @@
sctp_free_ifa(_lsrc);
} else {
lsa6->sin6_addr = over_addr->sin6.sin6_addr;
- SCTP_RTALLOC(ro, vrf_id);
+ SCTP_RTALLOC(ro, vrf_id, inp->fibnum);
}
(void)sa6_recoverscope(sin6);
}
@@ -4464,10 +4579,6 @@
SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
return (ENOMEM);
}
-#ifdef SCTP_PACKET_LOGGING
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
- sctp_packet_log(m, packet_length);
-#endif
SCTP_ATTACH_CHAIN(o_pak, m, packet_length);
if (port) {
#if defined(SCTP_WITH_NO_CSUM)
@@ -4484,19 +4595,23 @@
SCTP_STAT_INCR(sctps_sendnocrc);
#else
m->m_pkthdr.csum_flags = CSUM_SCTP_IPV6;
- m->m_pkthdr.csum_data = 0;
+ m->m_pkthdr.csum_data = offsetof(struct sctphdr, checksum);
SCTP_STAT_INCR(sctps_sendhwcrc);
#endif
}
/* send it out. table id is taken from stcb */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) {
so = SCTP_INP_SO(inp);
SCTP_SOCKET_UNLOCK(so, 0);
}
#endif
+#ifdef SCTP_PACKET_LOGGING
+ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING)
+ sctp_packet_log(o_pak);
+#endif
SCTP_IP6_OUTPUT(ret, o_pak, (struct route_in6 *)ro, &ifp, stcb, vrf_id);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) {
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -4575,21 +4690,18 @@
#endif
)
{
- struct mbuf *m, *m_at, *mp_last;
+ struct mbuf *m, *m_last;
struct sctp_nets *net;
struct sctp_init_chunk *init;
struct sctp_supported_addr_param *sup_addr;
struct sctp_adaptation_layer_indication *ali;
- struct sctp_ecn_supported_param *ecn;
- struct sctp_prsctp_supported_param *prsctp;
struct sctp_supported_chunk_types_param *pr_supported;
+ struct sctp_paramhdr *ph;
int cnt_inits_to = 0;
- int padval, ret;
- int num_ext;
- int p_len;
+ int ret;
+ uint16_t num_ext, chunk_len, padding_len, parameter_len;
/* INIT's always go to the primary (and usually ONLY address) */
- mp_last = NULL;
net = stcb->asoc.primary_destination;
if (net == NULL) {
net = TAILQ_FIRST(&stcb->asoc.nets);
@@ -4606,15 +4718,12 @@
}
SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT\n");
#ifdef INET6
- if (((struct sockaddr *)&(net->ro._l_addr))->sa_family == AF_INET6) {
+ if (net->ro._l_addr.sa.sa_family == AF_INET6) {
/*
* special hook, if we are sending to link local it will not
* show up in our private address count.
*/
- struct sockaddr_in6 *sin6l;
-
- sin6l = &net->ro._l_addr.sin6;
- if (IN6_IS_ADDR_LINKLOCAL(&sin6l->sin6_addr))
+ if (IN6_IS_ADDR_LINKLOCAL(&net->ro._l_addr.sin6.sin6_addr))
cnt_inits_to = 1;
}
#endif
@@ -4626,20 +4735,15 @@
/* start the INIT timer */
sctp_timer_start(SCTP_TIMER_TYPE_INIT, inp, stcb, net);
- m = sctp_get_mbuf_for_msg(MCLBYTES, 1, M_DONTWAIT, 1, MT_DATA);
+ m = sctp_get_mbuf_for_msg(MCLBYTES, 1, M_NOWAIT, 1, MT_DATA);
if (m == NULL) {
/* No memory, INIT timer will re-attempt. */
SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT - mbuf?\n");
return;
}
- SCTP_BUF_LEN(m) = sizeof(struct sctp_init_chunk);
- /*
- * assume peer supports asconf in order to be able to queue local
- * address changes while an INIT is in flight and before the assoc
- * is established.
- */
- stcb->asoc.peer_supports_asconf = 1;
- /* Now lets put the SCTP header in place */
+ chunk_len = (uint16_t) sizeof(struct sctp_init_chunk);
+ padding_len = 0;
+ /* Now lets put the chunk header in place */
init = mtod(m, struct sctp_init_chunk *);
/* now the chunk header */
init->ch.chunk_type = SCTP_INITIATION;
@@ -4651,190 +4755,187 @@
/* set up some of the credits. */
init->init.a_rwnd = htonl(max(inp->sctp_socket ? SCTP_SB_LIMIT_RCV(inp->sctp_socket) : 0,
SCTP_MINIMAL_RWND));
-
init->init.num_outbound_streams = htons(stcb->asoc.pre_open_streams);
init->init.num_inbound_streams = htons(stcb->asoc.max_inbound_streams);
init->init.initial_tsn = htonl(stcb->asoc.init_seq_number);
- /* now the address restriction */
- /* XXX Should we take the address family of the socket into account? */
- sup_addr = (struct sctp_supported_addr_param *)((caddr_t)init +
- sizeof(*init));
- sup_addr->ph.param_type = htons(SCTP_SUPPORTED_ADDRTYPE);
-#ifdef INET6
-#ifdef INET
- /* we support 2 types: IPv4/IPv6 */
- sup_addr->ph.param_length = htons(sizeof(struct sctp_paramhdr) + 2 * sizeof(uint16_t));
- sup_addr->addr_type[0] = htons(SCTP_IPV4_ADDRESS);
- sup_addr->addr_type[1] = htons(SCTP_IPV6_ADDRESS);
-#else
- /* we support 1 type: IPv6 */
- sup_addr->ph.param_length = htons(sizeof(struct sctp_paramhdr) + sizeof(uint16_t));
- sup_addr->addr_type[0] = htons(SCTP_IPV6_ADDRESS);
- sup_addr->addr_type[1] = htons(0); /* this is the padding */
-#endif
-#else
- /* we support 1 type: IPv4 */
- sup_addr->ph.param_length = htons(sizeof(struct sctp_paramhdr) + sizeof(uint16_t));
- sup_addr->addr_type[0] = htons(SCTP_IPV4_ADDRESS);
- sup_addr->addr_type[1] = htons(0); /* this is the padding */
-#endif
- SCTP_BUF_LEN(m) += sizeof(struct sctp_supported_addr_param);
- /* adaptation layer indication parameter */
- ali = (struct sctp_adaptation_layer_indication *)((caddr_t)sup_addr + sizeof(struct sctp_supported_addr_param));
- ali->ph.param_type = htons(SCTP_ULP_ADAPTATION);
- ali->ph.param_length = htons(sizeof(*ali));
- ali->indication = ntohl(inp->sctp_ep.adaptation_layer_indicator);
- SCTP_BUF_LEN(m) += sizeof(*ali);
- ecn = (struct sctp_ecn_supported_param *)((caddr_t)ali + sizeof(*ali));
+ /* Adaptation layer indication parameter */
+ if (inp->sctp_ep.adaptation_layer_indicator_provided) {
+ parameter_len = (uint16_t) sizeof(struct sctp_adaptation_layer_indication);
+ ali = (struct sctp_adaptation_layer_indication *)(mtod(m, caddr_t)+chunk_len);
+ ali->ph.param_type = htons(SCTP_ULP_ADAPTATION);
+ ali->ph.param_length = htons(parameter_len);
+ ali->indication = htonl(inp->sctp_ep.adaptation_layer_indicator);
+ chunk_len += parameter_len;
+ }
+ /* ECN parameter */
+ if (stcb->asoc.ecn_supported == 1) {
+ parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
+ ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
+ ph->param_type = htons(SCTP_ECN_CAPABLE);
+ ph->param_length = htons(parameter_len);
+ chunk_len += parameter_len;
+ }
+ /* PR-SCTP supported parameter */
+ if (stcb->asoc.prsctp_supported == 1) {
+ parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
+ ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
+ ph->param_type = htons(SCTP_PRSCTP_SUPPORTED);
+ ph->param_length = htons(parameter_len);
+ chunk_len += parameter_len;
+ }
+ /* Add NAT friendly parameter. */
if (SCTP_BASE_SYSCTL(sctp_inits_include_nat_friendly)) {
- /* Add NAT friendly parameter */
- struct sctp_paramhdr *ph;
-
- ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+ parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
+ ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
ph->param_type = htons(SCTP_HAS_NAT_SUPPORT);
- ph->param_length = htons(sizeof(struct sctp_paramhdr));
- SCTP_BUF_LEN(m) += sizeof(struct sctp_paramhdr);
- ecn = (struct sctp_ecn_supported_param *)((caddr_t)ph + sizeof(*ph));
+ ph->param_length = htons(parameter_len);
+ chunk_len += parameter_len;
}
- /* now any cookie time extensions */
- if (stcb->asoc.cookie_preserve_req) {
- struct sctp_cookie_perserve_param *cookie_preserve;
-
- cookie_preserve = (struct sctp_cookie_perserve_param *)(ecn);
- cookie_preserve->ph.param_type = htons(SCTP_COOKIE_PRESERVE);
- cookie_preserve->ph.param_length = htons(
- sizeof(*cookie_preserve));
- cookie_preserve->time = htonl(stcb->asoc.cookie_preserve_req);
- SCTP_BUF_LEN(m) += sizeof(*cookie_preserve);
- ecn = (struct sctp_ecn_supported_param *)(
- (caddr_t)cookie_preserve + sizeof(*cookie_preserve));
- stcb->asoc.cookie_preserve_req = 0;
+ /* And now tell the peer which extensions we support */
+ num_ext = 0;
+ pr_supported = (struct sctp_supported_chunk_types_param *)(mtod(m, caddr_t)+chunk_len);
+ if (stcb->asoc.prsctp_supported == 1) {
+ pr_supported->chunk_types[num_ext++] = SCTP_FORWARD_CUM_TSN;
}
- /* ECN parameter */
- if (stcb->asoc.ecn_allowed == 1) {
- ecn->ph.param_type = htons(SCTP_ECN_CAPABLE);
- ecn->ph.param_length = htons(sizeof(*ecn));
- SCTP_BUF_LEN(m) += sizeof(*ecn);
- prsctp = (struct sctp_prsctp_supported_param *)((caddr_t)ecn +
- sizeof(*ecn));
- } else {
- prsctp = (struct sctp_prsctp_supported_param *)((caddr_t)ecn);
- }
- /* And now tell the peer we do pr-sctp */
- prsctp->ph.param_type = htons(SCTP_PRSCTP_SUPPORTED);
- prsctp->ph.param_length = htons(sizeof(*prsctp));
- SCTP_BUF_LEN(m) += sizeof(*prsctp);
-
- /* And now tell the peer we do all the extensions */
- pr_supported = (struct sctp_supported_chunk_types_param *)
- ((caddr_t)prsctp + sizeof(*prsctp));
- pr_supported->ph.param_type = htons(SCTP_SUPPORTED_CHUNK_EXT);
- num_ext = 0;
- pr_supported->chunk_types[num_ext++] = SCTP_ASCONF;
- pr_supported->chunk_types[num_ext++] = SCTP_ASCONF_ACK;
- pr_supported->chunk_types[num_ext++] = SCTP_FORWARD_CUM_TSN;
- pr_supported->chunk_types[num_ext++] = SCTP_PACKET_DROPPED;
- pr_supported->chunk_types[num_ext++] = SCTP_STREAM_RESET;
- if (!SCTP_BASE_SYSCTL(sctp_auth_disable)) {
+ if (stcb->asoc.auth_supported == 1) {
pr_supported->chunk_types[num_ext++] = SCTP_AUTHENTICATION;
}
- if (stcb->asoc.sctp_nr_sack_on_off == 1) {
+ if (stcb->asoc.asconf_supported == 1) {
+ pr_supported->chunk_types[num_ext++] = SCTP_ASCONF;
+ pr_supported->chunk_types[num_ext++] = SCTP_ASCONF_ACK;
+ }
+ if (stcb->asoc.reconfig_supported == 1) {
+ pr_supported->chunk_types[num_ext++] = SCTP_STREAM_RESET;
+ }
+ if (stcb->asoc.nrsack_supported == 1) {
pr_supported->chunk_types[num_ext++] = SCTP_NR_SELECTIVE_ACK;
}
- p_len = sizeof(*pr_supported) + num_ext;
- pr_supported->ph.param_length = htons(p_len);
- bzero((caddr_t)pr_supported + p_len, SCTP_SIZE32(p_len) - p_len);
- SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
-
-
+ if (stcb->asoc.pktdrop_supported == 1) {
+ pr_supported->chunk_types[num_ext++] = SCTP_PACKET_DROPPED;
+ }
+ if (num_ext > 0) {
+ parameter_len = (uint16_t) sizeof(struct sctp_supported_chunk_types_param) + num_ext;
+ pr_supported->ph.param_type = htons(SCTP_SUPPORTED_CHUNK_EXT);
+ pr_supported->ph.param_length = htons(parameter_len);
+ padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
+ chunk_len += parameter_len;
+ }
/* add authentication parameters */
- if (!SCTP_BASE_SYSCTL(sctp_auth_disable)) {
- struct sctp_auth_random *randp;
- struct sctp_auth_hmac_algo *hmacs;
- struct sctp_auth_chunk_list *chunks;
-
+ if (stcb->asoc.auth_supported) {
/* attach RANDOM parameter, if available */
if (stcb->asoc.authinfo.random != NULL) {
- randp = (struct sctp_auth_random *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
- p_len = sizeof(*randp) + stcb->asoc.authinfo.random_len;
+ struct sctp_auth_random *randp;
+
+ if (padding_len > 0) {
+ memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
+ chunk_len += padding_len;
+ padding_len = 0;
+ }
+ randp = (struct sctp_auth_random *)(mtod(m, caddr_t)+chunk_len);
+ parameter_len = (uint16_t) sizeof(struct sctp_auth_random) + stcb->asoc.authinfo.random_len;
/* random key already contains the header */
- bcopy(stcb->asoc.authinfo.random->key, randp, p_len);
- /* zero out any padding required */
- bzero((caddr_t)randp + p_len, SCTP_SIZE32(p_len) - p_len);
- SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+ memcpy(randp, stcb->asoc.authinfo.random->key, parameter_len);
+ padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
+ chunk_len += parameter_len;
}
/* add HMAC_ALGO parameter */
- hmacs = (struct sctp_auth_hmac_algo *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
- p_len = sctp_serialize_hmaclist(stcb->asoc.local_hmacs,
- (uint8_t *) hmacs->hmac_ids);
- if (p_len > 0) {
- p_len += sizeof(*hmacs);
+ if (stcb->asoc.local_hmacs != NULL) {
+ struct sctp_auth_hmac_algo *hmacs;
+
+ if (padding_len > 0) {
+ memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
+ chunk_len += padding_len;
+ padding_len = 0;
+ }
+ hmacs = (struct sctp_auth_hmac_algo *)(mtod(m, caddr_t)+chunk_len);
+ parameter_len = (uint16_t) (sizeof(struct sctp_auth_hmac_algo) +
+ stcb->asoc.local_hmacs->num_algo * sizeof(uint16_t));
hmacs->ph.param_type = htons(SCTP_HMAC_LIST);
- hmacs->ph.param_length = htons(p_len);
- /* zero out any padding required */
- bzero((caddr_t)hmacs + p_len, SCTP_SIZE32(p_len) - p_len);
- SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+ hmacs->ph.param_length = htons(parameter_len);
+ sctp_serialize_hmaclist(stcb->asoc.local_hmacs, (uint8_t *) hmacs->hmac_ids);
+ padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
+ chunk_len += parameter_len;
}
/* add CHUNKS parameter */
- chunks = (struct sctp_auth_chunk_list *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
- p_len = sctp_serialize_auth_chunks(stcb->asoc.local_auth_chunks,
- chunks->chunk_types);
- if (p_len > 0) {
- p_len += sizeof(*chunks);
+ if (stcb->asoc.local_auth_chunks != NULL) {
+ struct sctp_auth_chunk_list *chunks;
+
+ if (padding_len > 0) {
+ memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
+ chunk_len += padding_len;
+ padding_len = 0;
+ }
+ chunks = (struct sctp_auth_chunk_list *)(mtod(m, caddr_t)+chunk_len);
+ parameter_len = (uint16_t) (sizeof(struct sctp_auth_chunk_list) +
+ sctp_auth_get_chklist_size(stcb->asoc.local_auth_chunks));
chunks->ph.param_type = htons(SCTP_CHUNK_LIST);
- chunks->ph.param_length = htons(p_len);
- /* zero out any padding required */
- bzero((caddr_t)chunks + p_len, SCTP_SIZE32(p_len) - p_len);
- SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+ chunks->ph.param_length = htons(parameter_len);
+ sctp_serialize_auth_chunks(stcb->asoc.local_auth_chunks, chunks->chunk_types);
+ padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
+ chunk_len += parameter_len;
}
}
- /* now the addresses */
- {
- struct sctp_scoping scp;
+ /* now any cookie time extensions */
+ if (stcb->asoc.cookie_preserve_req) {
+ struct sctp_cookie_perserve_param *cookie_preserve;
- /*
- * To optimize this we could put the scoping stuff into a
- * structure and remove the individual uint8's from the
- * assoc structure. Then we could just sifa in the address
- * within the stcb. But for now this is a quick hack to get
- * the address stuff teased apart.
- */
-
- scp.ipv4_addr_legal = stcb->asoc.ipv4_addr_legal;
- scp.ipv6_addr_legal = stcb->asoc.ipv6_addr_legal;
- scp.loopback_scope = stcb->asoc.loopback_scope;
- scp.ipv4_local_scope = stcb->asoc.ipv4_local_scope;
- scp.local_scope = stcb->asoc.local_scope;
- scp.site_scope = stcb->asoc.site_scope;
-
- sctp_add_addresses_to_i_ia(inp, stcb, &scp, m, cnt_inits_to);
+ if (padding_len > 0) {
+ memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
+ chunk_len += padding_len;
+ padding_len = 0;
+ }
+ parameter_len = (uint16_t) sizeof(struct sctp_cookie_perserve_param);
+ cookie_preserve = (struct sctp_cookie_perserve_param *)(mtod(m, caddr_t)+chunk_len);
+ cookie_preserve->ph.param_type = htons(SCTP_COOKIE_PRESERVE);
+ cookie_preserve->ph.param_length = htons(parameter_len);
+ cookie_preserve->time = htonl(stcb->asoc.cookie_preserve_req);
+ stcb->asoc.cookie_preserve_req = 0;
+ chunk_len += parameter_len;
}
+ if (stcb->asoc.scope.ipv4_addr_legal || stcb->asoc.scope.ipv6_addr_legal) {
+ uint8_t i;
- /* calulate the size and update pkt header and chunk header */
- p_len = 0;
- for (m_at = m; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
- if (SCTP_BUF_NEXT(m_at) == NULL)
- mp_last = m_at;
- p_len += SCTP_BUF_LEN(m_at);
+ if (padding_len > 0) {
+ memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
+ chunk_len += padding_len;
+ padding_len = 0;
+ }
+ parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
+ if (stcb->asoc.scope.ipv4_addr_legal) {
+ parameter_len += (uint16_t) sizeof(uint16_t);
+ }
+ if (stcb->asoc.scope.ipv6_addr_legal) {
+ parameter_len += (uint16_t) sizeof(uint16_t);
+ }
+ sup_addr = (struct sctp_supported_addr_param *)(mtod(m, caddr_t)+chunk_len);
+ sup_addr->ph.param_type = htons(SCTP_SUPPORTED_ADDRTYPE);
+ sup_addr->ph.param_length = htons(parameter_len);
+ i = 0;
+ if (stcb->asoc.scope.ipv4_addr_legal) {
+ sup_addr->addr_type[i++] = htons(SCTP_IPV4_ADDRESS);
+ }
+ if (stcb->asoc.scope.ipv6_addr_legal) {
+ sup_addr->addr_type[i++] = htons(SCTP_IPV6_ADDRESS);
+ }
+ padding_len = 4 - 2 * i;
+ chunk_len += parameter_len;
}
- init->ch.chunk_length = htons(p_len);
+ SCTP_BUF_LEN(m) = chunk_len;
+ /* now the addresses */
/*
- * We sifa 0 here to NOT set IP_DF if its IPv4, we ignore the return
- * here since the timer will drive a retranmission.
+ * To optimize this we could put the scoping stuff into a structure
+ * and remove the individual uint8's from the assoc structure. Then
+ * we could just sifa in the address within the stcb. But for now
+ * this is a quick hack to get the address stuff teased apart.
*/
+ m_last = sctp_add_addresses_to_i_ia(inp, stcb, &stcb->asoc.scope,
+ m, cnt_inits_to,
+ &padding_len, &chunk_len);
- /* I don't expect this to execute but we will be safe here */
- padval = p_len % 4;
- if ((padval) && (mp_last)) {
- /*
- * The compiler worries that mp_last may not be set even
- * though I think it is impossible :-> however we add
- * mp_last here just in case.
- */
- ret = sctp_add_pad_tombuf(mp_last, (4 - padval));
- if (ret) {
- /* Houston we have a problem, no space */
+ init->ch.chunk_length = htons(chunk_len);
+ if (padding_len > 0) {
+ if (sctp_add_pad_tombuf(m_last, padding_len) == NULL) {
sctp_m_freem(m);
return;
}
@@ -4971,7 +5072,6 @@
*nat_friendly = 1;
/* fall through */
case SCTP_PRSCTP_SUPPORTED:
-
if (padded_size != sizeof(struct sctp_paramhdr)) {
SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error prsctp/nat support %d\n", plen);
goto invalid_size;
@@ -4979,7 +5079,7 @@
at += padded_size;
break;
case SCTP_ECN_CAPABLE:
- if (padded_size != sizeof(struct sctp_ecn_supported_param)) {
+ if (padded_size != sizeof(struct sctp_paramhdr)) {
SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error ecn %d\n", plen);
goto invalid_size;
}
@@ -5009,13 +5109,14 @@
if (op_err == NULL) {
/* Ok need to try to get a mbuf */
#ifdef INET6
- l_len = sizeof(struct ip6_hdr) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+ l_len = SCTP_MIN_OVERHEAD;
#else
- l_len = sizeof(struct ip) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+ l_len = SCTP_MIN_V4_OVERHEAD;
#endif
+ l_len += sizeof(struct sctp_chunkhdr);
l_len += plen;
l_len += sizeof(struct sctp_paramhdr);
- op_err = sctp_get_mbuf_for_msg(l_len, 0, M_DONTWAIT, 1, MT_DATA);
+ op_err = sctp_get_mbuf_for_msg(l_len, 0, M_NOWAIT, 1, MT_DATA);
if (op_err) {
SCTP_BUF_LEN(op_err) = 0;
/*
@@ -5078,13 +5179,14 @@
/* Ok need to try to get an mbuf */
#ifdef INET6
- l_len = sizeof(struct ip6_hdr) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+ l_len = SCTP_MIN_OVERHEAD;
#else
- l_len = sizeof(struct ip) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+ l_len = SCTP_MIN_V4_OVERHEAD;
#endif
+ l_len += sizeof(struct sctp_chunkhdr);
l_len += plen;
l_len += sizeof(struct sctp_paramhdr);
- op_err = sctp_get_mbuf_for_msg(l_len, 0, M_DONTWAIT, 1, MT_DATA);
+ op_err = sctp_get_mbuf_for_msg(l_len, 0, M_NOWAIT, 1, MT_DATA);
if (op_err) {
SCTP_BUF_LEN(op_err) = 0;
#ifdef INET6
@@ -5153,12 +5255,13 @@
int l_len;
#ifdef INET6
- l_len = sizeof(struct ip6_hdr) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+ l_len = SCTP_MIN_OVERHEAD;
#else
- l_len = sizeof(struct ip) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+ l_len = SCTP_MIN_V4_OVERHEAD;
#endif
+ l_len += sizeof(struct sctp_chunkhdr);
l_len += (2 * sizeof(struct sctp_paramhdr));
- op_err = sctp_get_mbuf_for_msg(l_len, 0, M_DONTWAIT, 1, MT_DATA);
+ op_err = sctp_get_mbuf_for_msg(l_len, 0, M_NOWAIT, 1, MT_DATA);
if (op_err) {
SCTP_BUF_LEN(op_err) = 0;
#ifdef INET6
@@ -5192,7 +5295,7 @@
static int
sctp_are_there_new_addresses(struct sctp_association *asoc,
- struct mbuf *in_initpkt, int offset)
+ struct mbuf *in_initpkt, int offset, struct sockaddr *src)
{
/*
* Given a INIT packet, look through the packet to verify that there
@@ -5207,7 +5310,7 @@
uint16_t ptype, plen;
uint8_t fnd;
struct sctp_nets *net;
- struct ip *iph;
+ int check_src;
#ifdef INET
struct sockaddr_in sin4, *sa4;
@@ -5215,7 +5318,6 @@
#endif
#ifdef INET6
struct sockaddr_in6 sin6, *sa6;
- struct ip6_hdr *ip6h;
#endif
@@ -5229,57 +5331,63 @@
sin6.sin6_family = AF_INET6;
sin6.sin6_len = sizeof(sin6);
#endif
- sa_touse = NULL;
/* First what about the src address of the pkt ? */
- iph = mtod(in_initpkt, struct ip *);
- switch (iph->ip_v) {
+ check_src = 0;
+ switch (src->sa_family) {
#ifdef INET
- case IPVERSION:
- /* source addr is IPv4 */
- sin4.sin_addr = iph->ip_src;
- sa_touse = (struct sockaddr *)&sin4;
+ case AF_INET:
+ if (asoc->scope.ipv4_addr_legal) {
+ check_src = 1;
+ }
break;
#endif
#ifdef INET6
- case IPV6_VERSION >> 4:
- /* source addr is IPv6 */
- ip6h = mtod(in_initpkt, struct ip6_hdr *);
- sin6.sin6_addr = ip6h->ip6_src;
- sa_touse = (struct sockaddr *)&sin6;
+ case AF_INET6:
+ if (asoc->scope.ipv6_addr_legal) {
+ check_src = 1;
+ }
break;
#endif
default:
- return (1);
+ /* TSNH */
+ break;
}
+ if (check_src) {
+ fnd = 0;
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+ sa = (struct sockaddr *)&net->ro._l_addr;
+ if (sa->sa_family == src->sa_family) {
+#ifdef INET
+ if (sa->sa_family == AF_INET) {
+ struct sockaddr_in *src4;
- fnd = 0;
- TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
- sa = (struct sockaddr *)&net->ro._l_addr;
- if (sa->sa_family == sa_touse->sa_family) {
-#ifdef INET
- if (sa->sa_family == AF_INET) {
- sa4 = (struct sockaddr_in *)sa;
- if (sa4->sin_addr.s_addr == sin4.sin_addr.s_addr) {
- fnd = 1;
- break;
+ sa4 = (struct sockaddr_in *)sa;
+ src4 = (struct sockaddr_in *)src;
+ if (sa4->sin_addr.s_addr == src4->sin_addr.s_addr) {
+ fnd = 1;
+ break;
+ }
}
- }
#endif
#ifdef INET6
- if (sa->sa_family == AF_INET6) {
- sa6 = (struct sockaddr_in6 *)sa;
- if (SCTP6_ARE_ADDR_EQUAL(sa6, &sin6)) {
- fnd = 1;
- break;
+ if (sa->sa_family == AF_INET6) {
+ struct sockaddr_in6 *src6;
+
+ sa6 = (struct sockaddr_in6 *)sa;
+ src6 = (struct sockaddr_in6 *)src;
+ if (SCTP6_ARE_ADDR_EQUAL(sa6, src6)) {
+ fnd = 1;
+ break;
+ }
}
+#endif
}
-#endif
}
+ if (fnd == 0) {
+ /* New address added! no need to look futher. */
+ return (1);
+ }
}
- if (fnd == 0) {
- /* New address added! no need to look futher. */
- return (1);
- }
/* Ok so far lets munge through the rest of the packet */
offset += sizeof(struct sctp_init_chunk);
phdr = sctp_get_next_param(in_initpkt, offset, ¶ms, sizeof(params));
@@ -5299,9 +5407,11 @@
phdr == NULL) {
return (1);
}
- p4 = (struct sctp_ipv4addr_param *)phdr;
- sin4.sin_addr.s_addr = p4->addr;
- sa_touse = (struct sockaddr *)&sin4;
+ if (asoc->scope.ipv4_addr_legal) {
+ p4 = (struct sctp_ipv4addr_param *)phdr;
+ sin4.sin_addr.s_addr = p4->addr;
+ sa_touse = (struct sockaddr *)&sin4;
+ }
break;
}
#endif
@@ -5316,10 +5426,12 @@
phdr == NULL) {
return (1);
}
- p6 = (struct sctp_ipv6addr_param *)phdr;
- memcpy((caddr_t)&sin6.sin6_addr, p6->addr,
- sizeof(p6->addr));
- sa_touse = (struct sockaddr *)&sin6;
+ if (asoc->scope.ipv6_addr_legal) {
+ p6 = (struct sctp_ipv6addr_param *)phdr;
+ memcpy((caddr_t)&sin6.sin6_addr, p6->addr,
+ sizeof(p6->addr));
+ sa_touse = (struct sockaddr *)&sin6;
+ }
break;
}
#endif
@@ -5375,34 +5487,34 @@
*/
void
sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
- struct mbuf *init_pkt, int iphlen, int offset,
+ struct sctp_nets *src_net, struct mbuf *init_pkt,
+ int iphlen, int offset,
+ struct sockaddr *src, struct sockaddr *dst,
struct sctphdr *sh, struct sctp_init_chunk *init_chk,
- uint8_t use_mflowid, uint32_t mflowid,
+ uint8_t mflowtype, uint32_t mflowid,
uint32_t vrf_id, uint16_t port, int hold_inp_lock)
{
struct sctp_association *asoc;
- struct mbuf *m, *m_at, *m_tmp, *m_cookie, *op_err, *mp_last;
+ struct mbuf *m, *m_tmp, *m_last, *m_cookie, *op_err;
struct sctp_init_ack_chunk *initack;
struct sctp_adaptation_layer_indication *ali;
- struct sctp_ecn_supported_param *ecn;
- struct sctp_prsctp_supported_param *prsctp;
struct sctp_supported_chunk_types_param *pr_supported;
- union sctp_sockstore store, store1, *over_addr;
+ struct sctp_paramhdr *ph;
+ union sctp_sockstore *over_addr;
+ struct sctp_scoping scp;
#ifdef INET
- struct sockaddr_in *sin, *to_sin;
+ struct sockaddr_in *dst4 = (struct sockaddr_in *)dst;
+ struct sockaddr_in *src4 = (struct sockaddr_in *)src;
+ struct sockaddr_in *sin;
#endif
#ifdef INET6
- struct sockaddr_in6 *sin6, *to_sin6;
+ struct sockaddr_in6 *dst6 = (struct sockaddr_in6 *)dst;
+ struct sockaddr_in6 *src6 = (struct sockaddr_in6 *)src;
+ struct sockaddr_in6 *sin6;
#endif
- struct ip *iph;
-
-#ifdef INET6
- struct ip6_hdr *ip6;
-
-#endif
struct sockaddr *to;
struct sctp_state_cookie stc;
struct sctp_nets *net = NULL;
@@ -5409,30 +5521,50 @@
uint8_t *signature = NULL;
int cnt_inits_to = 0;
uint16_t his_limit, i_want;
- int abort_flag, padval;
- int num_ext;
- int p_len;
+ int abort_flag;
int nat_friendly = 0;
struct socket *so;
+ uint16_t num_ext, chunk_len, padding_len, parameter_len;
- if (stcb)
+ if (stcb) {
asoc = &stcb->asoc;
- else
+ } else {
asoc = NULL;
- mp_last = NULL;
+ }
if ((asoc != NULL) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_WAIT) &&
- (sctp_are_there_new_addresses(asoc, init_pkt, offset))) {
- /* new addresses, out of here in non-cookie-wait states */
- /*
- * Send a ABORT, we don't add the new address error clause
- * though we even set the T bit and copy in the 0 tag.. this
- * looks no different than if no listener was present.
- */
- sctp_send_abort(init_pkt, iphlen, sh, 0, NULL,
- use_mflowid, mflowid,
- vrf_id, port);
- return;
+ (SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_WAIT)) {
+ if (sctp_are_there_new_addresses(asoc, init_pkt, offset, src)) {
+ /*
+ * new addresses, out of here in non-cookie-wait
+ * states
+ *
+ * Send an ABORT, without the new address error cause.
+ * This looks no different than if no listener was
+ * present.
+ */
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ "Address added");
+ sctp_send_abort(init_pkt, iphlen, src, dst, sh, 0, op_err,
+ mflowtype, mflowid, inp->fibnum,
+ vrf_id, port);
+ return;
+ }
+ if (src_net != NULL && (src_net->port != port)) {
+ /*
+ * change of remote encapsulation port, out of here
+ * in non-cookie-wait states
+ *
+ * Send an ABORT, without an specific error cause. This
+ * looks no different than if no listener was
+ * present.
+ */
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ "Remote encapsulation port changed");
+ sctp_send_abort(init_pkt, iphlen, src, dst, sh, 0, op_err,
+ mflowtype, mflowid, inp->fibnum,
+ vrf_id, port);
+ return;
+ }
}
abort_flag = 0;
op_err = sctp_arethere_unrecognized_parameters(init_pkt,
@@ -5440,13 +5572,20 @@
&abort_flag, (struct sctp_chunkhdr *)init_chk, &nat_friendly);
if (abort_flag) {
do_a_abort:
- sctp_send_abort(init_pkt, iphlen, sh,
+ if (op_err == NULL) {
+ char msg[SCTP_DIAG_INFO_LEN];
+
+ snprintf(msg, sizeof(msg), "%s:%d at %s", __FILE__, __LINE__, __func__);
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ msg);
+ }
+ sctp_send_abort(init_pkt, iphlen, src, dst, sh,
init_chk->init.initiate_tag, op_err,
- use_mflowid, mflowid,
+ mflowtype, mflowid, inp->fibnum,
vrf_id, port);
return;
}
- m = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+ m = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
if (m == NULL) {
/* No memory, INIT timer will re-attempt. */
if (op_err)
@@ -5453,8 +5592,17 @@
sctp_m_freem(op_err);
return;
}
- SCTP_BUF_LEN(m) = sizeof(struct sctp_init_chunk);
+ chunk_len = (uint16_t) sizeof(struct sctp_init_ack_chunk);
+ padding_len = 0;
+ /*
+ * We might not overwrite the identification[] completely and on
+ * some platforms time_entered will contain some padding. Therefore
+ * zero out the cookie to avoid putting uninitialized memory on the
+ * wire.
+ */
+ memset(&stc, 0, sizeof(struct sctp_state_cookie));
+
/* the time I built cookie */
(void)SCTP_GETTIME_TIMEVAL(&stc.time_entered);
@@ -5482,84 +5630,31 @@
*/
stc.site_scope = stc.local_scope = stc.loopback_scope = 0;
if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
- struct inpcb *in_inp;
-
- /* Its a V6 socket */
- in_inp = (struct inpcb *)inp;
stc.ipv6_addr_legal = 1;
- /* Now look at the binding flag to see if V4 will be legal */
- if (SCTP_IPV6_V6ONLY(in_inp) == 0) {
+ if (SCTP_IPV6_V6ONLY(inp)) {
+ stc.ipv4_addr_legal = 0;
+ } else {
stc.ipv4_addr_legal = 1;
- } else {
- /* V4 addresses are NOT legal on the association */
- stc.ipv4_addr_legal = 0;
}
} else {
- /* Its a V4 socket, no - V6 */
+ stc.ipv6_addr_legal = 0;
stc.ipv4_addr_legal = 1;
- stc.ipv6_addr_legal = 0;
}
-
-#ifdef SCTP_DONT_DO_PRIVADDR_SCOPE
- stc.ipv4_scope = 1;
-#else
stc.ipv4_scope = 0;
-#endif
- /* now for scope setup */
- memset((caddr_t)&store, 0, sizeof(store));
- memset((caddr_t)&store1, 0, sizeof(store1));
-#ifdef INET
- sin = &store.sin;
- to_sin = &store1.sin;
-#endif
-#ifdef INET6
- sin6 = &store.sin6;
- to_sin6 = &store1.sin6;
-#endif
- iph = mtod(init_pkt, struct ip *);
- /* establish the to_addr's */
- switch (iph->ip_v) {
-#ifdef INET
- case IPVERSION:
- to_sin->sin_port = sh->dest_port;
- to_sin->sin_family = AF_INET;
- to_sin->sin_len = sizeof(struct sockaddr_in);
- to_sin->sin_addr = iph->ip_dst;
- break;
-#endif
-#ifdef INET6
- case IPV6_VERSION >> 4:
- ip6 = mtod(init_pkt, struct ip6_hdr *);
- to_sin6->sin6_addr = ip6->ip6_dst;
- to_sin6->sin6_scope_id = 0;
- to_sin6->sin6_port = sh->dest_port;
- to_sin6->sin6_family = AF_INET6;
- to_sin6->sin6_len = sizeof(struct sockaddr_in6);
- break;
-#endif
- default:
- goto do_a_abort;
- break;
- }
-
if (net == NULL) {
- to = (struct sockaddr *)&store;
- switch (iph->ip_v) {
+ to = src;
+ switch (dst->sa_family) {
#ifdef INET
- case IPVERSION:
+ case AF_INET:
{
- sin->sin_family = AF_INET;
- sin->sin_len = sizeof(struct sockaddr_in);
- sin->sin_port = sh->src_port;
- sin->sin_addr = iph->ip_src;
/* lookup address */
- stc.address[0] = sin->sin_addr.s_addr;
+ stc.address[0] = src4->sin_addr.s_addr;
stc.address[1] = 0;
stc.address[2] = 0;
stc.address[3] = 0;
stc.addr_type = SCTP_IPV4_ADDRESS;
/* local from address */
- stc.laddress[0] = to_sin->sin_addr.s_addr;
+ stc.laddress[0] = dst4->sin_addr.s_addr;
stc.laddress[1] = 0;
stc.laddress[2] = 0;
stc.laddress[3] = 0;
@@ -5566,15 +5661,12 @@
stc.laddr_type = SCTP_IPV4_ADDRESS;
/* scope_id is only for v6 */
stc.scope_id = 0;
-#ifndef SCTP_DONT_DO_PRIVADDR_SCOPE
- if (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
+ if ((IN4_ISPRIVATE_ADDRESS(&src4->sin_addr)) ||
+ (IN4_ISPRIVATE_ADDRESS(&dst4->sin_addr))) {
stc.ipv4_scope = 1;
}
-#else
- stc.ipv4_scope = 1;
-#endif /* SCTP_DONT_DO_PRIVADDR_SCOPE */
/* Must use the address in this case */
- if (sctp_is_address_on_local_host((struct sockaddr *)sin, vrf_id)) {
+ if (sctp_is_address_on_local_host(src, vrf_id)) {
stc.loopback_scope = 1;
stc.ipv4_scope = 1;
stc.site_scope = 1;
@@ -5584,41 +5676,28 @@
}
#endif
#ifdef INET6
- case IPV6_VERSION >> 4:
+ case AF_INET6:
{
- ip6 = mtod(init_pkt, struct ip6_hdr *);
- sin6->sin6_family = AF_INET6;
- sin6->sin6_len = sizeof(struct sockaddr_in6);
- sin6->sin6_port = sh->src_port;
- sin6->sin6_addr = ip6->ip6_src;
- /* lookup address */
- memcpy(&stc.address, &sin6->sin6_addr,
- sizeof(struct in6_addr));
- sin6->sin6_scope_id = 0;
stc.addr_type = SCTP_IPV6_ADDRESS;
- stc.scope_id = 0;
- if (sctp_is_address_on_local_host((struct sockaddr *)sin6, vrf_id)) {
- /*
- * FIX ME: does this have scope from
- * rcvif?
- */
- (void)sa6_recoverscope(sin6);
- stc.scope_id = sin6->sin6_scope_id;
- sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone));
+ memcpy(&stc.address, &src6->sin6_addr, sizeof(struct in6_addr));
+ stc.scope_id = in6_getscope(&src6->sin6_addr);
+ if (sctp_is_address_on_local_host(src, vrf_id)) {
stc.loopback_scope = 1;
stc.local_scope = 0;
stc.site_scope = 1;
stc.ipv4_scope = 1;
- } else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+ } else if (IN6_IS_ADDR_LINKLOCAL(&src6->sin6_addr) ||
+ IN6_IS_ADDR_LINKLOCAL(&dst6->sin6_addr)) {
/*
- * If the new destination is a
- * LINK_LOCAL we must have common
- * both site and local scope. Don't
- * set local scope though since we
- * must depend on the source to be
- * added implicitly. We cannot
- * assure just because we share one
- * link that all links are common.
+ * If the new destination or source
+ * is a LINK_LOCAL we must have
+ * common both site and local scope.
+ * Don't set local scope though
+ * since we must depend on the
+ * source to be added implicitly. We
+ * cannot assure just because we
+ * share one link that all links are
+ * common.
*/
stc.local_scope = 0;
stc.site_scope = 1;
@@ -5634,22 +5713,16 @@
* pull out the scope_id from
* incoming pkt
*/
+ } else if (IN6_IS_ADDR_SITELOCAL(&src6->sin6_addr) ||
+ IN6_IS_ADDR_SITELOCAL(&dst6->sin6_addr)) {
/*
- * FIX ME: does this have scope from
- * rcvif?
+ * If the new destination or source
+ * is SITE_LOCAL then we must have
+ * site scope in common.
*/
- (void)sa6_recoverscope(sin6);
- stc.scope_id = sin6->sin6_scope_id;
- sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone));
- } else if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) {
- /*
- * If the new destination is
- * SITE_LOCAL then we must have site
- * scope in common.
- */
stc.site_scope = 1;
}
- memcpy(&stc.laddress, &to_sin6->sin6_addr, sizeof(struct in6_addr));
+ memcpy(&stc.laddress, &dst6->sin6_addr, sizeof(struct in6_addr));
stc.laddr_type = SCTP_IPV6_ADDRESS;
break;
}
@@ -5667,10 +5740,10 @@
#endif
- stc.loopback_scope = asoc->loopback_scope;
- stc.ipv4_scope = asoc->ipv4_local_scope;
- stc.site_scope = asoc->site_scope;
- stc.local_scope = asoc->local_scope;
+ stc.loopback_scope = asoc->scope.loopback_scope;
+ stc.ipv4_scope = asoc->scope.ipv4_local_scope;
+ stc.site_scope = asoc->scope.site_scope;
+ stc.local_scope = asoc->scope.local_scope;
#ifdef INET6
/* Why do we not consider IPv4 LL addresses? */
TAILQ_FOREACH(lnet, &asoc->nets, sctp_next) {
@@ -5729,7 +5802,7 @@
if (net->src_addr_selected == 0) {
/*
* strange case here, the INIT should have
- * did the selection.
+ * done the selection.
*/
net->ro._s_addr = sctp_source_address_selection(inp,
stcb, (sctp_route_t *) & net->ro,
@@ -5819,10 +5892,10 @@
his_limit = ntohs(init_chk->init.num_inbound_streams);
/* choose what I want */
if (asoc != NULL) {
- if (asoc->streamoutcnt > inp->sctp_ep.pre_open_stream_count) {
+ if (asoc->streamoutcnt > asoc->pre_open_streams) {
i_want = asoc->streamoutcnt;
} else {
- i_want = inp->sctp_ep.pre_open_stream_count;
+ i_want = asoc->pre_open_streams;
}
} else {
i_want = inp->sctp_ep.pre_open_stream_count;
@@ -5834,163 +5907,180 @@
/* I can have what I want :> */
initack->init.num_outbound_streams = htons(i_want);
}
- /* tell him his limt. */
+ /* tell him his limit. */
initack->init.num_inbound_streams =
htons(inp->sctp_ep.max_open_streams_intome);
/* adaptation layer indication parameter */
- ali = (struct sctp_adaptation_layer_indication *)((caddr_t)initack + sizeof(*initack));
- ali->ph.param_type = htons(SCTP_ULP_ADAPTATION);
- ali->ph.param_length = htons(sizeof(*ali));
- ali->indication = ntohl(inp->sctp_ep.adaptation_layer_indicator);
- SCTP_BUF_LEN(m) += sizeof(*ali);
- ecn = (struct sctp_ecn_supported_param *)((caddr_t)ali + sizeof(*ali));
-
+ if (inp->sctp_ep.adaptation_layer_indicator_provided) {
+ parameter_len = (uint16_t) sizeof(struct sctp_adaptation_layer_indication);
+ ali = (struct sctp_adaptation_layer_indication *)(mtod(m, caddr_t)+chunk_len);
+ ali->ph.param_type = htons(SCTP_ULP_ADAPTATION);
+ ali->ph.param_length = htons(parameter_len);
+ ali->indication = htonl(inp->sctp_ep.adaptation_layer_indicator);
+ chunk_len += parameter_len;
+ }
/* ECN parameter */
- if (((asoc != NULL) && (asoc->ecn_allowed == 1)) ||
- (inp->sctp_ecn_enable == 1)) {
- ecn->ph.param_type = htons(SCTP_ECN_CAPABLE);
- ecn->ph.param_length = htons(sizeof(*ecn));
- SCTP_BUF_LEN(m) += sizeof(*ecn);
-
- prsctp = (struct sctp_prsctp_supported_param *)((caddr_t)ecn +
- sizeof(*ecn));
- } else {
- prsctp = (struct sctp_prsctp_supported_param *)((caddr_t)ecn);
+ if (((asoc != NULL) && (asoc->ecn_supported == 1)) ||
+ ((asoc == NULL) && (inp->ecn_supported == 1))) {
+ parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
+ ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
+ ph->param_type = htons(SCTP_ECN_CAPABLE);
+ ph->param_length = htons(parameter_len);
+ chunk_len += parameter_len;
}
- /* And now tell the peer we do pr-sctp */
- prsctp->ph.param_type = htons(SCTP_PRSCTP_SUPPORTED);
- prsctp->ph.param_length = htons(sizeof(*prsctp));
- SCTP_BUF_LEN(m) += sizeof(*prsctp);
+ /* PR-SCTP supported parameter */
+ if (((asoc != NULL) && (asoc->prsctp_supported == 1)) ||
+ ((asoc == NULL) && (inp->prsctp_supported == 1))) {
+ parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
+ ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
+ ph->param_type = htons(SCTP_PRSCTP_SUPPORTED);
+ ph->param_length = htons(parameter_len);
+ chunk_len += parameter_len;
+ }
+ /* Add NAT friendly parameter */
if (nat_friendly) {
- /* Add NAT friendly parameter */
- struct sctp_paramhdr *ph;
-
- ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+ parameter_len = (uint16_t) sizeof(struct sctp_paramhdr);
+ ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len);
ph->param_type = htons(SCTP_HAS_NAT_SUPPORT);
- ph->param_length = htons(sizeof(struct sctp_paramhdr));
- SCTP_BUF_LEN(m) += sizeof(struct sctp_paramhdr);
+ ph->param_length = htons(parameter_len);
+ chunk_len += parameter_len;
}
- /* And now tell the peer we do all the extensions */
- pr_supported = (struct sctp_supported_chunk_types_param *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
- pr_supported->ph.param_type = htons(SCTP_SUPPORTED_CHUNK_EXT);
+ /* And now tell the peer which extensions we support */
num_ext = 0;
- pr_supported->chunk_types[num_ext++] = SCTP_ASCONF;
- pr_supported->chunk_types[num_ext++] = SCTP_ASCONF_ACK;
- pr_supported->chunk_types[num_ext++] = SCTP_FORWARD_CUM_TSN;
- pr_supported->chunk_types[num_ext++] = SCTP_PACKET_DROPPED;
- pr_supported->chunk_types[num_ext++] = SCTP_STREAM_RESET;
- if (!SCTP_BASE_SYSCTL(sctp_auth_disable))
+ pr_supported = (struct sctp_supported_chunk_types_param *)(mtod(m, caddr_t)+chunk_len);
+ if (((asoc != NULL) && (asoc->prsctp_supported == 1)) ||
+ ((asoc == NULL) && (inp->prsctp_supported == 1))) {
+ pr_supported->chunk_types[num_ext++] = SCTP_FORWARD_CUM_TSN;
+ }
+ if (((asoc != NULL) && (asoc->auth_supported == 1)) ||
+ ((asoc == NULL) && (inp->auth_supported == 1))) {
pr_supported->chunk_types[num_ext++] = SCTP_AUTHENTICATION;
- if (SCTP_BASE_SYSCTL(sctp_nr_sack_on_off))
+ }
+ if (((asoc != NULL) && (asoc->asconf_supported == 1)) ||
+ ((asoc == NULL) && (inp->asconf_supported == 1))) {
+ pr_supported->chunk_types[num_ext++] = SCTP_ASCONF;
+ pr_supported->chunk_types[num_ext++] = SCTP_ASCONF_ACK;
+ }
+ if (((asoc != NULL) && (asoc->reconfig_supported == 1)) ||
+ ((asoc == NULL) && (inp->reconfig_supported == 1))) {
+ pr_supported->chunk_types[num_ext++] = SCTP_STREAM_RESET;
+ }
+ if (((asoc != NULL) && (asoc->nrsack_supported == 1)) ||
+ ((asoc == NULL) && (inp->nrsack_supported == 1))) {
pr_supported->chunk_types[num_ext++] = SCTP_NR_SELECTIVE_ACK;
- p_len = sizeof(*pr_supported) + num_ext;
- pr_supported->ph.param_length = htons(p_len);
- bzero((caddr_t)pr_supported + p_len, SCTP_SIZE32(p_len) - p_len);
- SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
-
+ }
+ if (((asoc != NULL) && (asoc->pktdrop_supported == 1)) ||
+ ((asoc == NULL) && (inp->pktdrop_supported == 1))) {
+ pr_supported->chunk_types[num_ext++] = SCTP_PACKET_DROPPED;
+ }
+ if (num_ext > 0) {
+ parameter_len = (uint16_t) sizeof(struct sctp_supported_chunk_types_param) + num_ext;
+ pr_supported->ph.param_type = htons(SCTP_SUPPORTED_CHUNK_EXT);
+ pr_supported->ph.param_length = htons(parameter_len);
+ padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
+ chunk_len += parameter_len;
+ }
/* add authentication parameters */
- if (!SCTP_BASE_SYSCTL(sctp_auth_disable)) {
+ if (((asoc != NULL) && (asoc->auth_supported == 1)) ||
+ ((asoc == NULL) && (inp->auth_supported == 1))) {
struct sctp_auth_random *randp;
struct sctp_auth_hmac_algo *hmacs;
struct sctp_auth_chunk_list *chunks;
- uint16_t random_len;
+ if (padding_len > 0) {
+ memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
+ chunk_len += padding_len;
+ padding_len = 0;
+ }
/* generate and add RANDOM parameter */
- random_len = SCTP_AUTH_RANDOM_SIZE_DEFAULT;
- randp = (struct sctp_auth_random *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+ randp = (struct sctp_auth_random *)(mtod(m, caddr_t)+chunk_len);
+ parameter_len = (uint16_t) sizeof(struct sctp_auth_random) +
+ SCTP_AUTH_RANDOM_SIZE_DEFAULT;
randp->ph.param_type = htons(SCTP_RANDOM);
- p_len = sizeof(*randp) + random_len;
- randp->ph.param_length = htons(p_len);
- SCTP_READ_RANDOM(randp->random_data, random_len);
- /* zero out any padding required */
- bzero((caddr_t)randp + p_len, SCTP_SIZE32(p_len) - p_len);
- SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+ randp->ph.param_length = htons(parameter_len);
+ SCTP_READ_RANDOM(randp->random_data, SCTP_AUTH_RANDOM_SIZE_DEFAULT);
+ padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
+ chunk_len += parameter_len;
+ if (padding_len > 0) {
+ memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
+ chunk_len += padding_len;
+ padding_len = 0;
+ }
/* add HMAC_ALGO parameter */
- hmacs = (struct sctp_auth_hmac_algo *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
- p_len = sctp_serialize_hmaclist(inp->sctp_ep.local_hmacs,
+ hmacs = (struct sctp_auth_hmac_algo *)(mtod(m, caddr_t)+chunk_len);
+ parameter_len = (uint16_t) sizeof(struct sctp_auth_hmac_algo) +
+ sctp_serialize_hmaclist(inp->sctp_ep.local_hmacs,
(uint8_t *) hmacs->hmac_ids);
- if (p_len > 0) {
- p_len += sizeof(*hmacs);
- hmacs->ph.param_type = htons(SCTP_HMAC_LIST);
- hmacs->ph.param_length = htons(p_len);
- /* zero out any padding required */
- bzero((caddr_t)hmacs + p_len, SCTP_SIZE32(p_len) - p_len);
- SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+ hmacs->ph.param_type = htons(SCTP_HMAC_LIST);
+ hmacs->ph.param_length = htons(parameter_len);
+ padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
+ chunk_len += parameter_len;
+
+ if (padding_len > 0) {
+ memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
+ chunk_len += padding_len;
+ padding_len = 0;
}
/* add CHUNKS parameter */
- chunks = (struct sctp_auth_chunk_list *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
- p_len = sctp_serialize_auth_chunks(inp->sctp_ep.local_auth_chunks,
+ chunks = (struct sctp_auth_chunk_list *)(mtod(m, caddr_t)+chunk_len);
+ parameter_len = (uint16_t) sizeof(struct sctp_auth_chunk_list) +
+ sctp_serialize_auth_chunks(inp->sctp_ep.local_auth_chunks,
chunks->chunk_types);
- if (p_len > 0) {
- p_len += sizeof(*chunks);
- chunks->ph.param_type = htons(SCTP_CHUNK_LIST);
- chunks->ph.param_length = htons(p_len);
- /* zero out any padding required */
- bzero((caddr_t)chunks + p_len, SCTP_SIZE32(p_len) - p_len);
- SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
- }
+ chunks->ph.param_type = htons(SCTP_CHUNK_LIST);
+ chunks->ph.param_length = htons(parameter_len);
+ padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
+ chunk_len += parameter_len;
}
- m_at = m;
+ SCTP_BUF_LEN(m) = chunk_len;
+ m_last = m;
/* now the addresses */
- {
- struct sctp_scoping scp;
-
- /*
- * To optimize this we could put the scoping stuff into a
- * structure and remove the individual uint8's from the stc
- * structure. Then we could just sifa in the address within
- * the stc.. but for now this is a quick hack to get the
- * address stuff teased apart.
- */
- scp.ipv4_addr_legal = stc.ipv4_addr_legal;
- scp.ipv6_addr_legal = stc.ipv6_addr_legal;
- scp.loopback_scope = stc.loopback_scope;
- scp.ipv4_local_scope = stc.ipv4_scope;
- scp.local_scope = stc.local_scope;
- scp.site_scope = stc.site_scope;
- m_at = sctp_add_addresses_to_i_ia(inp, stcb, &scp, m_at, cnt_inits_to);
+ /*
+ * To optimize this we could put the scoping stuff into a structure
+ * and remove the individual uint8's from the stc structure. Then we
+ * could just sifa in the address within the stc.. but for now this
+ * is a quick hack to get the address stuff teased apart.
+ */
+ scp.ipv4_addr_legal = stc.ipv4_addr_legal;
+ scp.ipv6_addr_legal = stc.ipv6_addr_legal;
+ scp.loopback_scope = stc.loopback_scope;
+ scp.ipv4_local_scope = stc.ipv4_scope;
+ scp.local_scope = stc.local_scope;
+ scp.site_scope = stc.site_scope;
+ m_last = sctp_add_addresses_to_i_ia(inp, stcb, &scp, m_last,
+ cnt_inits_to,
+ &padding_len, &chunk_len);
+ /* padding_len can only be positive, if no addresses have been added */
+ if (padding_len > 0) {
+ memset(mtod(m, caddr_t)+chunk_len, 0, padding_len);
+ chunk_len += padding_len;
+ SCTP_BUF_LEN(m) += padding_len;
+ padding_len = 0;
}
-
/* tack on the operational error if present */
if (op_err) {
- struct mbuf *ol;
- int llen;
-
- llen = 0;
- ol = op_err;
-
- while (ol) {
- llen += SCTP_BUF_LEN(ol);
- ol = SCTP_BUF_NEXT(ol);
+ parameter_len = 0;
+ for (m_tmp = op_err; m_tmp != NULL; m_tmp = SCTP_BUF_NEXT(m_tmp)) {
+ parameter_len += SCTP_BUF_LEN(m_tmp);
}
- if (llen % 4) {
- /* must add a pad to the param */
- uint32_t cpthis = 0;
- int padlen;
-
- padlen = 4 - (llen % 4);
- m_copyback(op_err, llen, padlen, (caddr_t)&cpthis);
+ padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
+ SCTP_BUF_NEXT(m_last) = op_err;
+ while (SCTP_BUF_NEXT(m_last) != NULL) {
+ m_last = SCTP_BUF_NEXT(m_last);
}
- while (SCTP_BUF_NEXT(m_at) != NULL) {
- m_at = SCTP_BUF_NEXT(m_at);
- }
- SCTP_BUF_NEXT(m_at) = op_err;
- while (SCTP_BUF_NEXT(m_at) != NULL) {
- m_at = SCTP_BUF_NEXT(m_at);
- }
+ chunk_len += parameter_len;
}
- /* pre-calulate the size and update pkt header and chunk header */
- p_len = 0;
- for (m_tmp = m; m_tmp; m_tmp = SCTP_BUF_NEXT(m_tmp)) {
- p_len += SCTP_BUF_LEN(m_tmp);
- if (SCTP_BUF_NEXT(m_tmp) == NULL) {
- /* m_tmp should now point to last one */
- break;
+ if (padding_len > 0) {
+ m_last = sctp_add_pad_tombuf(m_last, padding_len);
+ if (m_last == NULL) {
+ /* Houston we have a problem, no space */
+ sctp_m_freem(m);
+ return;
}
+ chunk_len += padding_len;
+ padding_len = 0;
}
-
/* Now we must build a cookie */
m_cookie = sctp_add_cookie(init_pkt, offset, m, 0, &stc, &signature);
if (m_cookie == NULL) {
@@ -5999,21 +6089,22 @@
return;
}
/* Now append the cookie to the end and update the space/size */
- SCTP_BUF_NEXT(m_tmp) = m_cookie;
-
- for (m_tmp = m_cookie; m_tmp; m_tmp = SCTP_BUF_NEXT(m_tmp)) {
- p_len += SCTP_BUF_LEN(m_tmp);
+ SCTP_BUF_NEXT(m_last) = m_cookie;
+ parameter_len = 0;
+ for (m_tmp = m_cookie; m_tmp != NULL; m_tmp = SCTP_BUF_NEXT(m_tmp)) {
+ parameter_len += SCTP_BUF_LEN(m_tmp);
if (SCTP_BUF_NEXT(m_tmp) == NULL) {
- /* m_tmp should now point to last one */
- mp_last = m_tmp;
- break;
+ m_last = m_tmp;
}
}
+ padding_len = SCTP_SIZE32(parameter_len) - parameter_len;
+ chunk_len += parameter_len;
+
/*
* Place in the size, but we don't include the last pad (if any) in
* the INIT-ACK.
*/
- initack->ch.chunk_length = htons(p_len);
+ initack->ch.chunk_length = htons(chunk_len);
/*
* Time to sign the cookie, we don't sign over the cookie signature
@@ -6027,17 +6118,14 @@
* We sifa 0 here to NOT set IP_DF if its IPv4, we ignore the return
* here since the timer will drive a retranmission.
*/
- padval = p_len % 4;
- if ((padval) && (mp_last)) {
- /* see my previous comments on mp_last */
- if (sctp_add_pad_tombuf(mp_last, (4 - padval))) {
- /* Houston we have a problem, no space */
+ if (padding_len > 0) {
+ if (sctp_add_pad_tombuf(m_last, padding_len) == NULL) {
sctp_m_freem(m);
return;
}
}
if (stc.loopback_scope) {
- over_addr = &store1;
+ over_addr = (union sctp_sockstore *)dst;
} else {
over_addr = NULL;
}
@@ -6046,7 +6134,7 @@
0, 0,
inp->sctp_lport, sh->src_port, init_chk->init.initiate_tag,
port, over_addr,
- use_mflowid, mflowid,
+ mflowtype, mflowid,
SCTP_SO_NOT_LOCKED);
SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
}
@@ -6062,7 +6150,7 @@
struct sctp_tmit_chunk *chk, *nchk;
SCTP_TCB_LOCK_ASSERT(stcb);
- if ((asoc->peer_supports_prsctp) &&
+ if ((asoc->prsctp_supported) &&
(asoc->sent_queue_cnt_removeable > 0)) {
TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
/*
@@ -6175,17 +6263,15 @@
static void
sctp_set_prsctp_policy(struct sctp_stream_queue_pending *sp)
{
- sp->pr_sctp_on = 0;
/*
* We assume that the user wants PR_SCTP_TTL if the user provides a
- * positive lifetime but does not specify any PR_SCTP policy. This
- * is a BAD assumption and causes problems at least with the
- * U-Vancovers MPI folks. I will change this to be no policy means
- * NO PR-SCTP.
+ * positive lifetime but does not specify any PR_SCTP policy.
*/
if (PR_SCTP_ENABLED(sp->sinfo_flags)) {
sp->act_flags |= PR_SCTP_POLICY(sp->sinfo_flags);
- sp->pr_sctp_on = 1;
+ } else if (sp->timetolive > 0) {
+ sp->sinfo_flags |= SCTP_PR_SCTP_TTL;
+ sp->act_flags |= PR_SCTP_POLICY(sp->sinfo_flags);
} else {
return;
}
@@ -6276,7 +6362,6 @@
sp->timetolive = srcv->sinfo_timetolive;
sp->ppid = srcv->sinfo_ppid;
sp->context = srcv->sinfo_context;
- sp->strseq = 0;
if (sp->sinfo_flags & SCTP_ADDR_OVER) {
sp->net = net;
atomic_add_int(&sp->net->ref_count, 1);
@@ -6317,10 +6402,6 @@
sctp_snd_sb_alloc(stcb, sp->length);
atomic_add_int(&stcb->asoc.stream_queue_cnt, 1);
TAILQ_INSERT_TAIL(&strm->outqueue, sp, next);
- if ((srcv->sinfo_flags & SCTP_UNORDERED) == 0) {
- sp->strseq = strm->next_sequence_sent;
- strm->next_sequence_sent++;
- }
stcb->asoc.ss_functions.sctp_ss_add_to_stream(stcb, &stcb->asoc, strm, sp, 1);
m = NULL;
if (hold_stcb_lock == 0) {
@@ -6366,7 +6447,7 @@
if (outchain == NULL) {
/* This is the general case */
new_mbuf:
- outchain = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_HEADER);
+ outchain = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_HEADER);
if (outchain == NULL) {
goto error_out;
}
@@ -6420,7 +6501,7 @@
/* now we need another one */
sizeofcpy -= len;
}
- m = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_HEADER);
+ m = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_HEADER);
if (m == NULL) {
/* We failed */
goto error_out;
@@ -6434,16 +6515,10 @@
return (outchain);
} else {
/* copy the old fashion way */
- appendchain = SCTP_M_COPYM(clonechain, 0, M_COPYALL, M_DONTWAIT);
+ appendchain = SCTP_M_COPYM(clonechain, 0, M_COPYALL, M_NOWAIT);
#ifdef SCTP_MBUF_LOGGING
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- struct mbuf *mat;
-
- for (mat = appendchain; mat; mat = SCTP_BUF_NEXT(mat)) {
- if (SCTP_BUF_IS_EXTENDED(mat)) {
- sctp_log_mb(mat, SCTP_MBUF_ICOPY);
- }
- }
+ sctp_log_mbc(appendchain, SCTP_MBUF_ICOPY);
}
#endif
}
@@ -6494,7 +6569,7 @@
}
}
-int
+static int
sctp_med_chunk_output(struct sctp_inpcb *inp,
struct sctp_tcb *stcb,
struct sctp_association *asoc,
@@ -6527,8 +6602,8 @@
/* TSNH */
return;
}
- if ((ca->m) && ca->sndlen) {
- m = SCTP_M_COPYM(ca->m, 0, M_COPYALL, M_DONTWAIT);
+ if (ca->sndlen > 0) {
+ m = SCTP_M_COPYM(ca->m, 0, M_COPYALL, M_NOWAIT);
if (m == NULL) {
/* can't copy so we are done */
ca->cnt_failed++;
@@ -6536,13 +6611,7 @@
}
#ifdef SCTP_MBUF_LOGGING
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- struct mbuf *mat;
-
- for (mat = m; mat; mat = SCTP_BUF_NEXT(mat)) {
- if (SCTP_BUF_IS_EXTENDED(mat)) {
- sctp_log_mb(mat, SCTP_MBUF_ICOPY);
- }
- }
+ sctp_log_mbc(m, SCTP_MBUF_ICOPY);
}
#endif
} else {
@@ -6556,38 +6625,40 @@
}
if (ca->sndrcv.sinfo_flags & SCTP_ABORT) {
/* Abort this assoc with m as the user defined reason */
- if (m) {
+ if (m != NULL) {
+ SCTP_BUF_PREPEND(m, sizeof(struct sctp_paramhdr), M_NOWAIT);
+ } else {
+ m = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr),
+ 0, M_NOWAIT, 1, MT_DATA);
+ SCTP_BUF_LEN(m) = sizeof(struct sctp_paramhdr);
+ }
+ if (m != NULL) {
struct sctp_paramhdr *ph;
- SCTP_BUF_PREPEND(m, sizeof(struct sctp_paramhdr), M_DONTWAIT);
- if (m) {
- ph = mtod(m, struct sctp_paramhdr *);
- ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
- ph->param_length = htons(ca->sndlen);
- }
- /*
- * We add one here to keep the assoc from
- * dis-appearing on us.
- */
- atomic_add_int(&stcb->asoc.refcnt, 1);
- sctp_abort_an_association(inp, stcb, m, SCTP_SO_NOT_LOCKED);
- /*
- * sctp_abort_an_association calls sctp_free_asoc()
- * free association will NOT free it since we
- * incremented the refcnt .. we do this to prevent
- * it being freed and things getting tricky since we
- * could end up (from free_asoc) calling inpcb_free
- * which would get a recursive lock call to the
- * iterator lock.. But as a consequence of that the
- * stcb will return to us un-locked.. since
- * free_asoc returns with either no TCB or the TCB
- * unlocked, we must relock.. to unlock in the
- * iterator timer :-0
- */
- SCTP_TCB_LOCK(stcb);
- atomic_add_int(&stcb->asoc.refcnt, -1);
- goto no_chunk_output;
+ ph = mtod(m, struct sctp_paramhdr *);
+ ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
+ ph->param_length = htons(sizeof(struct sctp_paramhdr) + ca->sndlen);
}
+ /*
+ * We add one here to keep the assoc from dis-appearing on
+ * us.
+ */
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ sctp_abort_an_association(inp, stcb, m, SCTP_SO_NOT_LOCKED);
+ /*
+ * sctp_abort_an_association calls sctp_free_asoc() free
+ * association will NOT free it since we incremented the
+ * refcnt .. we do this to prevent it being freed and things
+ * getting tricky since we could end up (from free_asoc)
+ * calling inpcb_free which would get a recursive lock call
+ * to the iterator lock.. But as a consequence of that the
+ * stcb will return to us un-locked.. since free_asoc
+ * returns with either no TCB or the TCB unlocked, we must
+ * relock.. to unlock in the iterator timer :-0
+ */
+ SCTP_TCB_LOCK(stcb);
+ atomic_add_int(&stcb->asoc.refcnt, -1);
+ goto no_chunk_output;
} else {
if (m) {
ret = sctp_msg_append(stcb, net, m,
@@ -6617,12 +6688,13 @@
* only send SHUTDOWN the first time
* through
*/
- sctp_send_shutdown(stcb, net);
if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ sctp_stop_timers_for_shutdown(stcb);
+ sctp_send_shutdown(stcb, net);
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb,
net);
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
@@ -6661,10 +6733,17 @@
if (TAILQ_EMPTY(&asoc->send_queue) &&
TAILQ_EMPTY(&asoc->sent_queue) &&
(asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
+ struct mbuf *op_err;
+ char msg[SCTP_DIAG_INFO_LEN];
+
abort_anyway:
+ snprintf(msg, sizeof(msg),
+ "%s:%d at %s", __FILE__, __LINE__, __func__);
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ msg);
atomic_add_int(&stcb->asoc.refcnt, 1);
sctp_abort_an_association(stcb->sctp_ep, stcb,
- NULL, SCTP_SO_NOT_LOCKED);
+ op_err, SCTP_SO_NOT_LOCKED);
atomic_add_int(&stcb->asoc.refcnt, -1);
goto no_chunk_output;
}
@@ -6680,14 +6759,13 @@
if ((sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY)) &&
(stcb->asoc.total_flight > 0) &&
- (un_sent < (int)(stcb->asoc.smallest_mtu - SCTP_MIN_OVERHEAD))
- ) {
+ (un_sent < (int)(stcb->asoc.smallest_mtu - SCTP_MIN_OVERHEAD))) {
do_chunk_output = 0;
}
if (do_chunk_output)
sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_NOT_LOCKED);
else if (added_control) {
- int num_out = 0, reason = 0, now_filled = 0;
+ int num_out, reason, now_filled = 0;
struct timeval now;
int frag_point;
@@ -6723,13 +6801,6 @@
SCTP_FREE(ca, SCTP_M_COPYAL);
}
-
-#define MC_ALIGN(m, len) do { \
- SCTP_BUF_RESV_UF(m, ((MCLBYTES - (len)) & ~(sizeof(long) - 1)); \
-} while (0)
-
-
-
static struct mbuf *
sctp_copy_out_all(struct uio *uio, int len)
{
@@ -6736,7 +6807,7 @@
struct mbuf *ret, *at;
int left, willcpy, cancpy, error;
- ret = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_WAIT, 1, MT_DATA);
+ ret = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_WAITOK, 1, MT_DATA);
if (ret == NULL) {
/* TSNH */
return (NULL);
@@ -6759,7 +6830,7 @@
SCTP_BUF_NEXT_PKT(at) = SCTP_BUF_NEXT(at) = 0;
left -= willcpy;
if (left > 0) {
- SCTP_BUF_NEXT(at) = sctp_get_mbuf_for_msg(left, 0, M_WAIT, 1, MT_DATA);
+ SCTP_BUF_NEXT(at) = sctp_get_mbuf_for_msg(left, 0, M_WAITOK, 1, MT_DATA);
if (SCTP_BUF_NEXT(at) == NULL) {
goto err_out_now;
}
@@ -6810,13 +6881,10 @@
/* Gather the length of the send */
struct mbuf *mat;
- mat = m;
ca->sndlen = 0;
- while (m) {
- ca->sndlen += SCTP_BUF_LEN(m);
- m = SCTP_BUF_NEXT(m);
+ for (mat = m; mat; mat = SCTP_BUF_NEXT(mat)) {
+ ca->sndlen += SCTP_BUF_LEN(mat);
}
- ca->m = mat;
}
ret = sctp_initiate_iterator(NULL, sctp_sendall_iterator, NULL,
SCTP_PCB_ANY_FLAGS, SCTP_PCB_ANY_FEATURES,
@@ -7152,6 +7220,11 @@
}
atomic_subtract_int(&asoc->stream_queue_cnt, 1);
TAILQ_REMOVE(&strq->outqueue, sp, next);
+ if ((strq->state == SCTP_STREAM_RESET_PENDING) &&
+ (strq->chunks_on_queues == 0) &&
+ TAILQ_EMPTY(&strq->outqueue)) {
+ stcb->asoc.trigger_reset = 1;
+ }
stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, strq, sp, send_lock_up);
if (sp->net) {
sctp_free_remote_addr(sp->net);
@@ -7196,7 +7269,7 @@
}
/* Whack down the size */
atomic_subtract_int(&stcb->asoc.total_output_queue_size, sp->length);
- if ((stcb->sctp_socket != NULL) && \
+ if ((stcb->sctp_socket != NULL) &&
((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
(stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) {
atomic_subtract_int(&stcb->sctp_socket->so_snd.sb_cc, sp->length);
@@ -7215,9 +7288,6 @@
}
}
some_taken = sp->some_taken;
- if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
- sp->msg_is_complete = 1;
- }
re_look:
length = sp->length;
if (sp->msg_is_complete) {
@@ -7323,7 +7393,7 @@
struct mbuf *m;
dont_do_it:
- chk->data = SCTP_M_COPYM(sp->data, 0, to_move, M_DONTWAIT);
+ chk->data = SCTP_M_COPYM(sp->data, 0, to_move, M_NOWAIT);
chk->last_mbuf = NULL;
if (chk->data == NULL) {
sp->some_taken = some_taken;
@@ -7334,13 +7404,7 @@
}
#ifdef SCTP_MBUF_LOGGING
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- struct mbuf *mat;
-
- for (mat = chk->data; mat; mat = SCTP_BUF_NEXT(mat)) {
- if (SCTP_BUF_IS_EXTENDED(mat)) {
- sctp_log_mb(mat, SCTP_MBUF_ICOPY);
- }
- }
+ sctp_log_mbc(chk->data, SCTP_MBUF_ICOPY);
}
#endif
/* Pull off the data */
@@ -7402,7 +7466,7 @@
/* Not enough room for a chunk header, get some */
struct mbuf *m;
- m = sctp_get_mbuf_for_msg(1, 0, M_DONTWAIT, 0, MT_DATA);
+ m = sctp_get_mbuf_for_msg(1, 0, M_NOWAIT, 0, MT_DATA);
if (m == NULL) {
/*
* we're in trouble here. _PREPEND below will free
@@ -7413,7 +7477,7 @@
SCTP_TCB_SEND_LOCK(stcb);
send_lock_up = 1;
}
- if (chk->data == NULL) {
+ if (sp->data == NULL) {
/* unsteal the data */
sp->data = chk->data;
sp->tail_mbuf = chk->last_mbuf;
@@ -7439,7 +7503,7 @@
M_ALIGN(chk->data, 4);
}
}
- SCTP_BUF_PREPEND(chk->data, sizeof(struct sctp_data_chunk), M_DONTWAIT);
+ SCTP_BUF_PREPEND(chk->data, sizeof(struct sctp_data_chunk), M_NOWAIT);
if (chk->data == NULL) {
/* HELP, TSNH since we assured it would not above? */
#ifdef INVARIANTS
@@ -7461,7 +7525,11 @@
chk->asoc = &stcb->asoc;
chk->pad_inplace = 0;
chk->no_fr_allowed = 0;
- chk->rec.data.stream_seq = sp->strseq;
+ chk->rec.data.stream_seq = strq->next_sequence_send;
+ if ((rcv_flags & SCTP_DATA_LAST_FRAG) &&
+ !(rcv_flags & SCTP_DATA_UNORDERED)) {
+ strq->next_sequence_send++;
+ }
chk->rec.data.stream_number = sp->stream;
chk->rec.data.payloadtype = sp->ppid;
chk->rec.data.context = sp->context;
@@ -7525,22 +7593,15 @@
int pads;
pads = SCTP_SIZE32(chk->book_size) - chk->send_size;
- if (sctp_pad_lastmbuf(chk->data, pads, chk->last_mbuf) == 0) {
+ lm = sctp_pad_lastmbuf(chk->data, pads, chk->last_mbuf);
+ if (lm != NULL) {
+ chk->last_mbuf = lm;
chk->pad_inplace = 1;
}
- if ((lm = SCTP_BUF_NEXT(chk->last_mbuf)) != NULL) {
- /* pad added an mbuf */
- chk->last_mbuf = lm;
- }
chk->send_size += pads;
}
- /* We only re-set the policy if it is on */
- if (sp->pr_sctp_on) {
- sctp_set_prsctp_policy(sp);
+ if (PR_SCTP_ENABLED(chk->flags)) {
asoc->pr_sctp_cnt++;
- chk->pr_sctp_on = 1;
- } else {
- chk->pr_sctp_on = 0;
}
if (sp->msg_is_complete && (sp->length == 0) && (sp->sender_all_done)) {
/* All done pull and kill the message */
@@ -7559,6 +7620,11 @@
send_lock_up = 1;
}
TAILQ_REMOVE(&strq->outqueue, sp, next);
+ if ((strq->state == SCTP_STREAM_RESET_PENDING) &&
+ (strq->chunks_on_queues == 0) &&
+ TAILQ_EMPTY(&strq->outqueue)) {
+ stcb->asoc.trigger_reset = 1;
+ }
stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, strq, sp, send_lock_up);
if (sp->net) {
sctp_free_remote_addr(sp->net);
@@ -7578,6 +7644,7 @@
*locked = 1;
}
asoc->chunks_on_out_queue++;
+ strq->chunks_on_queues++;
TAILQ_INSERT_TAIL(&asoc->send_queue, chk, sctp_next);
asoc->send_queue_cnt++;
out_of:
@@ -7729,7 +7796,7 @@
#endif
)
{
- /*
+ /**
* Ok this is the generic chunk service queue. we must do the
* following: - Service the stream queue that is next, moving any
* message (note I must get a complete message i.e. FIRST/MIDDLE and
@@ -7765,8 +7832,8 @@
int quit_now = 0;
*num_out = 0;
+ *reason_code = 0;
auth_keyid = stcb->asoc.authinfo.active_keyid;
-
if ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) ||
(asoc->state & SCTP_STATE_SHUTDOWN_RECEIVED) ||
(sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR))) {
@@ -7785,7 +7852,7 @@
#endif
SCTP_TCB_LOCK_ASSERT(stcb);
hbflag = 0;
- if ((control_only) || (asoc->stream_reset_outstanding))
+ if (control_only)
no_data_chunks = 1;
else
no_data_chunks = 0;
@@ -7972,12 +8039,12 @@
switch (((struct sockaddr *)&net->ro._l_addr)->sa_family) {
#ifdef INET
case AF_INET:
- mtu = net->mtu - (sizeof(struct ip) + sizeof(struct sctphdr));
+ mtu = net->mtu - SCTP_MIN_V4_OVERHEAD;
break;
#endif
#ifdef INET6
case AF_INET6:
- mtu = net->mtu - (sizeof(struct ip6_hdr) + sizeof(struct sctphdr));
+ mtu = net->mtu - SCTP_MIN_OVERHEAD;
break;
#endif
default:
@@ -7999,6 +8066,7 @@
} else {
r_mtu = mtu;
}
+ error = 0;
/************************/
/* ASCONF transmission */
/************************/
@@ -8122,6 +8190,12 @@
* it is used to do appropriate
* source address selection.
*/
+ if (*now_filled == 0) {
+ (void)SCTP_GETTIME_TIMEVAL(now);
+ *now_filled = 1;
+ }
+ net->last_sent_time = *now;
+ hbflag = 0;
if ((error = sctp_lowlevel_chunk_output(inp, stcb, net,
(struct sockaddr *)&net->ro._l_addr,
outchain, auth_offset, auth,
@@ -8132,21 +8206,18 @@
net->port, NULL,
0, 0,
so_locked))) {
+ /*
+ * error, we could not
+ * output
+ */
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error);
+ if (from_where == 0) {
+ SCTP_STAT_INCR(sctps_lowlevelerrusr);
+ }
if (error == ENOBUFS) {
asoc->ifp_had_enobuf = 1;
SCTP_STAT_INCR(sctps_lowlevelerr);
}
- if (from_where == 0) {
- SCTP_STAT_INCR(sctps_lowlevelerrusr);
- }
- if (*now_filled == 0) {
- (void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
- *now_filled = 1;
- *now = net->last_sent_time;
- } else {
- net->last_sent_time = *now;
- }
- hbflag = 0;
/* error, could not output */
if (error == EHOSTUNREACH) {
/*
@@ -8157,17 +8228,10 @@
sctp_move_chunks_from_net(stcb, net);
}
*reason_code = 7;
- continue;
- } else
+ break;
+ } else {
asoc->ifp_had_enobuf = 0;
- if (*now_filled == 0) {
- (void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
- *now_filled = 1;
- *now = net->last_sent_time;
- } else {
- net->last_sent_time = *now;
}
- hbflag = 0;
/*
* increase the number we sent, if a
* cookie is sent we don't tell them
@@ -8200,6 +8264,10 @@
}
}
}
+ if (error != 0) {
+ /* try next net */
+ continue;
+ }
/************************/
/* Control transmission */
/************************/
@@ -8338,7 +8406,8 @@
/* turn off the timer */
if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
- inp, stcb, net, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_1);
+ inp, stcb, net,
+ SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_1);
}
}
ctl_cnt++;
@@ -8395,6 +8464,15 @@
sctp_timer_start(SCTP_TIMER_TYPE_COOKIE, inp, stcb, net);
cookie = 0;
}
+ /* Only HB or ASCONF advances time */
+ if (hbflag) {
+ if (*now_filled == 0) {
+ (void)SCTP_GETTIME_TIMEVAL(now);
+ *now_filled = 1;
+ }
+ net->last_sent_time = *now;
+ hbflag = 0;
+ }
if ((error = sctp_lowlevel_chunk_output(inp, stcb, net,
(struct sockaddr *)&net->ro._l_addr,
outchain,
@@ -8406,24 +8484,18 @@
net->port, NULL,
0, 0,
so_locked))) {
+ /*
+ * error, we could not
+ * output
+ */
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error);
+ if (from_where == 0) {
+ SCTP_STAT_INCR(sctps_lowlevelerrusr);
+ }
if (error == ENOBUFS) {
asoc->ifp_had_enobuf = 1;
SCTP_STAT_INCR(sctps_lowlevelerr);
}
- if (from_where == 0) {
- SCTP_STAT_INCR(sctps_lowlevelerrusr);
- }
- /* error, could not output */
- if (hbflag) {
- if (*now_filled == 0) {
- (void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
- *now_filled = 1;
- *now = net->last_sent_time;
- } else {
- net->last_sent_time = *now;
- }
- hbflag = 0;
- }
if (error == EHOSTUNREACH) {
/*
* Destination went
@@ -8433,19 +8505,9 @@
sctp_move_chunks_from_net(stcb, net);
}
*reason_code = 7;
- continue;
- } else
+ break;
+ } else {
asoc->ifp_had_enobuf = 0;
- /* Only HB or ASCONF advances time */
- if (hbflag) {
- if (*now_filled == 0) {
- (void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
- *now_filled = 1;
- *now = net->last_sent_time;
- } else {
- net->last_sent_time = *now;
- }
- hbflag = 0;
}
/*
* increase the number we sent, if a
@@ -8479,6 +8541,10 @@
}
}
}
+ if (error != 0) {
+ /* try next net */
+ continue;
+ }
/* JRI: if dest is in PF state, do not send data to it */
if ((asoc->sctp_cmt_on_off > 0) &&
(net != stcb->asoc.alternate) &&
@@ -8523,8 +8589,8 @@
switch (((struct sockaddr *)&net->ro._l_addr)->sa_family) {
#ifdef INET
case AF_INET:
- if (net->mtu > (sizeof(struct ip) + sizeof(struct sctphdr)))
- omtu = net->mtu - (sizeof(struct ip) + sizeof(struct sctphdr));
+ if (net->mtu > SCTP_MIN_V4_OVERHEAD)
+ omtu = net->mtu - SCTP_MIN_V4_OVERHEAD;
else
omtu = 0;
break;
@@ -8531,8 +8597,8 @@
#endif
#ifdef INET6
case AF_INET6:
- if (net->mtu > (sizeof(struct ip6_hdr) + sizeof(struct sctphdr)))
- omtu = net->mtu - (sizeof(struct ip6_hdr) + sizeof(struct sctphdr));
+ if (net->mtu > SCTP_MIN_OVERHEAD)
+ omtu = net->mtu - SCTP_MIN_OVERHEAD;
else
omtu = 0;
break;
@@ -8542,7 +8608,8 @@
omtu = 0;
break;
}
- if ((((asoc->state & SCTP_STATE_OPEN) == SCTP_STATE_OPEN) &&
+ if ((((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) &&
(skip_data_for_this_net == 0)) ||
(cookie)) {
TAILQ_FOREACH_SAFE(chk, &asoc->send_queue, sctp_next, nchk) {
@@ -8732,6 +8799,14 @@
*/
sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
}
+ if (bundle_at || hbflag) {
+ /* For data/asconf and hb set time */
+ if (*now_filled == 0) {
+ (void)SCTP_GETTIME_TIMEVAL(now);
+ *now_filled = 1;
+ }
+ net->last_sent_time = *now;
+ }
/* Now send it, if there is anything to send :> */
if ((error = sctp_lowlevel_chunk_output(inp,
stcb,
@@ -8750,24 +8825,14 @@
0, 0,
so_locked))) {
/* error, we could not output */
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error);
+ if (from_where == 0) {
+ SCTP_STAT_INCR(sctps_lowlevelerrusr);
+ }
if (error == ENOBUFS) {
SCTP_STAT_INCR(sctps_lowlevelerr);
asoc->ifp_had_enobuf = 1;
}
- if (from_where == 0) {
- SCTP_STAT_INCR(sctps_lowlevelerrusr);
- }
- SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error);
- if (hbflag) {
- if (*now_filled == 0) {
- (void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
- *now_filled = 1;
- *now = net->last_sent_time;
- } else {
- net->last_sent_time = *now;
- }
- hbflag = 0;
- }
if (error == EHOSTUNREACH) {
/*
* Destination went unreachable
@@ -8791,16 +8856,6 @@
endoutchain = NULL;
auth = NULL;
auth_offset = 0;
- if (bundle_at || hbflag) {
- /* For data/asconf and hb set time */
- if (*now_filled == 0) {
- (void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
- *now_filled = 1;
- *now = net->last_sent_time;
- } else {
- net->last_sent_time = *now;
- }
- }
if (!no_out_cnt) {
*num_out += (ctl_cnt + bundle_at);
}
@@ -8861,9 +8916,37 @@
*/
struct sctp_chunkhdr *hdr;
struct sctp_tmit_chunk *chk;
- struct mbuf *mat;
+ struct mbuf *mat, *last_mbuf;
+ uint32_t chunk_length;
+ uint16_t padding_length;
SCTP_TCB_LOCK_ASSERT(stcb);
+ SCTP_BUF_PREPEND(op_err, sizeof(struct sctp_chunkhdr), M_NOWAIT);
+ if (op_err == NULL) {
+ return;
+ }
+ last_mbuf = NULL;
+ chunk_length = 0;
+ for (mat = op_err; mat != NULL; mat = SCTP_BUF_NEXT(mat)) {
+ chunk_length += SCTP_BUF_LEN(mat);
+ if (SCTP_BUF_NEXT(mat) == NULL) {
+ last_mbuf = mat;
+ }
+ }
+ if (chunk_length > SCTP_MAX_CHUNK_LENGTH) {
+ sctp_m_freem(op_err);
+ return;
+ }
+ padding_length = chunk_length % 4;
+ if (padding_length != 0) {
+ padding_length = 4 - padding_length;
+ }
+ if (padding_length != 0) {
+ if (sctp_add_pad_tombuf(last_mbuf, padding_length) == NULL) {
+ sctp_m_freem(op_err);
+ return;
+ }
+ }
sctp_alloc_a_chunk(stcb, chk);
if (chk == NULL) {
/* no memory */
@@ -8871,22 +8954,9 @@
return;
}
chk->copy_by_ref = 0;
- SCTP_BUF_PREPEND(op_err, sizeof(struct sctp_chunkhdr), M_DONTWAIT);
- if (op_err == NULL) {
- sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
- return;
- }
- chk->send_size = 0;
- mat = op_err;
- while (mat != NULL) {
- chk->send_size += SCTP_BUF_LEN(mat);
- mat = SCTP_BUF_NEXT(mat);
- }
- chk->rec.chunk_id.id = SCTP_OPERATION_ERROR;
- chk->rec.chunk_id.can_take_data = 1;
+ chk->send_size = (uint16_t) chunk_length;
chk->sent = SCTP_DATAGRAM_UNSENT;
chk->snd_count = 0;
- chk->flags = 0;
chk->asoc = &stcb->asoc;
chk->data = op_err;
chk->whoTo = NULL;
@@ -8894,9 +8964,7 @@
hdr->chunk_type = SCTP_OPERATION_ERROR;
hdr->chunk_flags = 0;
hdr->chunk_length = htons(chk->send_size);
- TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue,
- chk,
- sctp_next);
+ TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
chk->asoc->ctrl_queue_cnt++;
}
@@ -8917,12 +8985,11 @@
struct sctp_tmit_chunk *chk;
uint16_t ptype, plen;
+ SCTP_TCB_LOCK_ASSERT(stcb);
/* First find the cookie in the param area */
cookie = NULL;
at = offset + sizeof(struct sctp_init_chunk);
-
- SCTP_TCB_LOCK_ASSERT(stcb);
- do {
+ for (;;) {
phdr = sctp_get_next_param(m, at, &parm, sizeof(parm));
if (phdr == NULL) {
return (-3);
@@ -8936,7 +9003,7 @@
if ((pad = (plen % 4))) {
plen += 4 - pad;
}
- cookie = SCTP_M_COPYM(m, at, plen, M_DONTWAIT);
+ cookie = SCTP_M_COPYM(m, at, plen, M_NOWAIT);
if (cookie == NULL) {
/* No memory */
return (-2);
@@ -8943,25 +9010,14 @@
}
#ifdef SCTP_MBUF_LOGGING
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- struct mbuf *mat;
-
- for (mat = cookie; mat; mat = SCTP_BUF_NEXT(mat)) {
- if (SCTP_BUF_IS_EXTENDED(mat)) {
- sctp_log_mb(mat, SCTP_MBUF_ICOPY);
- }
- }
+ sctp_log_mbc(cookie, SCTP_MBUF_ICOPY);
}
#endif
break;
}
at += SCTP_SIZE32(plen);
- } while (phdr);
- if (cookie == NULL) {
- /* Did not find the cookie */
- return (-3);
}
/* ok, we got the cookie lets change it into a cookie echo chunk */
-
/* first the change from param to cookie */
hdr = mtod(cookie, struct sctp_chunkhdr *);
hdr->chunk_type = SCTP_COOKIE_ECHO;
@@ -8974,12 +9030,12 @@
return (-5);
}
chk->copy_by_ref = 0;
- chk->send_size = plen;
chk->rec.chunk_id.id = SCTP_COOKIE_ECHO;
chk->rec.chunk_id.can_take_data = 0;
+ chk->flags = CHUNK_FLAGS_FRAGMENT_OK;
+ chk->send_size = plen;
chk->sent = SCTP_DATAGRAM_UNSENT;
chk->snd_count = 0;
- chk->flags = CHUNK_FLAGS_FRAGMENT_OK;
chk->asoc = &stcb->asoc;
chk->data = cookie;
chk->whoTo = net;
@@ -9008,7 +9064,7 @@
/* must have a net pointer */
return;
- outchain = SCTP_M_COPYM(m, offset, chk_length, M_DONTWAIT);
+ outchain = SCTP_M_COPYM(m, offset, chk_length, M_NOWAIT);
if (outchain == NULL) {
/* gak out of memory */
return;
@@ -9015,13 +9071,7 @@
}
#ifdef SCTP_MBUF_LOGGING
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- struct mbuf *mat;
-
- for (mat = outchain; mat; mat = SCTP_BUF_NEXT(mat)) {
- if (SCTP_BUF_IS_EXTENDED(mat)) {
- sctp_log_mb(mat, SCTP_MBUF_ICOPY);
- }
- }
+ sctp_log_mbc(outchain, SCTP_MBUF_ICOPY);
}
#endif
chdr = mtod(outchain, struct sctp_chunkhdr *);
@@ -9042,12 +9092,12 @@
return;
}
chk->copy_by_ref = 0;
- chk->send_size = chk_length;
chk->rec.chunk_id.id = SCTP_HEARTBEAT_ACK;
chk->rec.chunk_id.can_take_data = 1;
+ chk->flags = 0;
+ chk->send_size = chk_length;
chk->sent = SCTP_DATAGRAM_UNSENT;
chk->snd_count = 0;
- chk->flags = 0;
chk->asoc = &stcb->asoc;
chk->data = outchain;
chk->whoTo = net;
@@ -9064,10 +9114,9 @@
struct sctp_chunkhdr *hdr;
struct sctp_tmit_chunk *chk;
- cookie_ack = NULL;
SCTP_TCB_LOCK_ASSERT(stcb);
- cookie_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_chunkhdr), 0, M_DONTWAIT, 1, MT_HEADER);
+ cookie_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_chunkhdr), 0, M_NOWAIT, 1, MT_HEADER);
if (cookie_ack == NULL) {
/* no mbuf's */
return;
@@ -9080,12 +9129,12 @@
return;
}
chk->copy_by_ref = 0;
- chk->send_size = sizeof(struct sctp_chunkhdr);
chk->rec.chunk_id.id = SCTP_COOKIE_ACK;
chk->rec.chunk_id.can_take_data = 1;
+ chk->flags = 0;
+ chk->send_size = sizeof(struct sctp_chunkhdr);
chk->sent = SCTP_DATAGRAM_UNSENT;
chk->snd_count = 0;
- chk->flags = 0;
chk->asoc = &stcb->asoc;
chk->data = cookie_ack;
if (chk->asoc->last_control_chunk_from != NULL) {
@@ -9113,7 +9162,7 @@
struct sctp_shutdown_ack_chunk *ack_cp;
struct sctp_tmit_chunk *chk;
- m_shutdown_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_ack_chunk), 0, M_DONTWAIT, 1, MT_HEADER);
+ m_shutdown_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_ack_chunk), 0, M_NOWAIT, 1, MT_HEADER);
if (m_shutdown_ack == NULL) {
/* no mbuf's */
return;
@@ -9126,9 +9175,10 @@
return;
}
chk->copy_by_ref = 0;
- chk->send_size = sizeof(struct sctp_chunkhdr);
chk->rec.chunk_id.id = SCTP_SHUTDOWN_ACK;
chk->rec.chunk_id.can_take_data = 1;
+ chk->flags = 0;
+ chk->send_size = sizeof(struct sctp_chunkhdr);
chk->sent = SCTP_DATAGRAM_UNSENT;
chk->snd_count = 0;
chk->flags = 0;
@@ -9156,7 +9206,7 @@
struct sctp_shutdown_chunk *shutdown_cp;
struct sctp_tmit_chunk *chk;
- m_shutdown = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_chunk), 0, M_DONTWAIT, 1, MT_HEADER);
+ m_shutdown = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_chunk), 0, M_NOWAIT, 1, MT_HEADER);
if (m_shutdown == NULL) {
/* no mbuf's */
return;
@@ -9169,9 +9219,10 @@
return;
}
chk->copy_by_ref = 0;
- chk->send_size = sizeof(struct sctp_shutdown_chunk);
chk->rec.chunk_id.id = SCTP_SHUTDOWN;
chk->rec.chunk_id.can_take_data = 1;
+ chk->flags = 0;
+ chk->send_size = sizeof(struct sctp_shutdown_chunk);
chk->sent = SCTP_DATAGRAM_UNSENT;
chk->snd_count = 0;
chk->flags = 0;
@@ -9222,13 +9273,13 @@
return;
}
chk->copy_by_ref = 0;
+ chk->rec.chunk_id.id = SCTP_ASCONF;
+ chk->rec.chunk_id.can_take_data = 0;
+ chk->flags = CHUNK_FLAGS_FRAGMENT_OK;
chk->data = m_asconf;
chk->send_size = len;
- chk->rec.chunk_id.id = SCTP_ASCONF;
- chk->rec.chunk_id.can_take_data = 0;
chk->sent = SCTP_DATAGRAM_UNSENT;
chk->snd_count = 0;
- chk->flags = CHUNK_FLAGS_FRAGMENT_OK;
chk->asoc = &stcb->asoc;
chk->whoTo = net;
if (chk->whoTo) {
@@ -9292,7 +9343,7 @@
continue;
}
/* copy the asconf_ack */
- m_ack = SCTP_M_COPYM(ack->data, 0, M_COPYALL, M_DONTWAIT);
+ m_ack = SCTP_M_COPYM(ack->data, 0, M_COPYALL, M_NOWAIT);
if (m_ack == NULL) {
/* couldn't copy it */
return;
@@ -9299,13 +9350,7 @@
}
#ifdef SCTP_MBUF_LOGGING
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- struct mbuf *mat;
-
- for (mat = m_ack; mat; mat = SCTP_BUF_NEXT(mat)) {
- if (SCTP_BUF_IS_EXTENDED(mat)) {
- sctp_log_mb(mat, SCTP_MBUF_ICOPY);
- }
- }
+ sctp_log_mbc(m_ack, SCTP_MBUF_ICOPY);
}
#endif
@@ -9317,20 +9362,17 @@
return;
}
chk->copy_by_ref = 0;
-
+ chk->rec.chunk_id.id = SCTP_ASCONF_ACK;
+ chk->rec.chunk_id.can_take_data = 1;
+ chk->flags = CHUNK_FLAGS_FRAGMENT_OK;
chk->whoTo = net;
if (chk->whoTo) {
atomic_add_int(&chk->whoTo->ref_count, 1);
}
chk->data = m_ack;
- chk->send_size = 0;
- /* Get size */
chk->send_size = ack->len;
- chk->rec.chunk_id.id = SCTP_ASCONF_ACK;
- chk->rec.chunk_id.can_take_data = 1;
chk->sent = SCTP_DATAGRAM_UNSENT;
chk->snd_count = 0;
- chk->flags |= CHUNK_FLAGS_FRAGMENT_OK; /* XXX */
chk->asoc = &stcb->asoc;
TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
@@ -9503,12 +9545,16 @@
}
if ((SCTP_BASE_SYSCTL(sctp_max_retran_chunk)) &&
(chk->snd_count >= SCTP_BASE_SYSCTL(sctp_max_retran_chunk))) {
- /* Gak, we have exceeded max unlucky retran, abort! */
- SCTP_PRINTF("Gak, chk->snd_count:%d >= max:%d - send abort\n",
- chk->snd_count,
- SCTP_BASE_SYSCTL(sctp_max_retran_chunk));
+ struct mbuf *op_err;
+ char msg[SCTP_DIAG_INFO_LEN];
+
+ snprintf(msg, sizeof(msg), "TSN %8.8x retransmitted %d times, giving up",
+ chk->rec.data.TSN_seq, chk->snd_count);
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ msg);
atomic_add_int(&stcb->asoc.refcnt, 1);
- sctp_abort_an_association(stcb->sctp_ep, stcb, NULL, so_locked);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, op_err,
+ so_locked);
SCTP_TCB_LOCK(stcb);
atomic_subtract_int(&stcb->asoc.refcnt, 1);
return (SCTP_RETRAN_EXIT);
@@ -9822,7 +9868,7 @@
* t3-expiring.
*/
sctp_timer_stop(SCTP_TIMER_TYPE_SEND, inp, stcb, net,
- SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_4);
+ SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_2);
sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
}
}
@@ -9902,7 +9948,7 @@
*/
struct sctp_association *asoc;
struct sctp_nets *net;
- int error = 0, num_out = 0, tot_out = 0, ret = 0, reason_code = 0;
+ int error = 0, num_out, tot_out = 0, ret = 0, reason_code;
unsigned int burst_cnt = 0;
struct timeval now;
int now_filled = 0;
@@ -9913,6 +9959,7 @@
unsigned int tot_frs = 0;
asoc = &stcb->asoc;
+do_it_again:
/* The Nagle algorithm is only applied when handling a send call. */
if (from_where == SCTP_OUTPUT_FROM_USR_SEND) {
if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NODELAY)) {
@@ -9930,7 +9977,8 @@
if ((un_sent <= 0) &&
(TAILQ_EMPTY(&asoc->control_send_queue)) &&
(TAILQ_EMPTY(&asoc->asconf_send_queue)) &&
- (asoc->sent_queue_retran_cnt == 0)) {
+ (asoc->sent_queue_retran_cnt == 0) &&
+ (asoc->trigger_reset == 0)) {
/* Nothing to do unless there is something to be sent left */
return;
}
@@ -10149,6 +10197,12 @@
*/
if (stcb->asoc.ecn_echo_cnt_onq)
sctp_fix_ecn_echo(asoc);
+
+ if (stcb->asoc.trigger_reset) {
+ if (sctp_send_stream_reset_out_if_possible(stcb, so_locked) == 0) {
+ goto do_it_again;
+ }
+ }
return;
}
@@ -10210,9 +10264,10 @@
chk->copy_by_ref = 0;
chk->rec.chunk_id.id = SCTP_FORWARD_CUM_TSN;
chk->rec.chunk_id.can_take_data = 0;
+ chk->flags = 0;
chk->asoc = asoc;
chk->whoTo = NULL;
- chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+ chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
if (chk->data == NULL) {
sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
return;
@@ -10236,7 +10291,8 @@
unsigned int cnt_of_skipped = 0;
TAILQ_FOREACH(at, &asoc->sent_queue, sctp_next) {
- if (at->sent != SCTP_FORWARD_TSN_SKIP) {
+ if ((at->sent != SCTP_FORWARD_TSN_SKIP) &&
+ (at->sent != SCTP_DATAGRAM_NR_ACKED)) {
/* no more to look at */
break;
}
@@ -10390,8 +10446,7 @@
uint8_t type;
uint8_t tsn_map;
- if ((stcb->asoc.sctp_nr_sack_on_off == 1) &&
- (stcb->asoc.peer_supports_nr_sack == 1)) {
+ if (stcb->asoc.nrsack_supported == 1) {
type = SCTP_NR_SELECTIVE_ACK;
} else {
type = SCTP_SELECTIVE_ACK;
@@ -10428,7 +10483,8 @@
/* No memory so we drop the idea, and set a timer */
if (stcb->asoc.delayed_ack) {
sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
- stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_5);
+ stcb->sctp_ep, stcb, NULL,
+ SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_3);
sctp_timer_start(SCTP_TIMER_TYPE_RECV,
stcb->sctp_ep, stcb, NULL);
} else {
@@ -10443,6 +10499,7 @@
/* Clear our pkt counts */
asoc->data_pkts_seen = 0;
+ a_chk->flags = 0;
a_chk->asoc = asoc;
a_chk->snd_count = 0;
a_chk->send_size = 0; /* fill in later */
@@ -10449,32 +10506,17 @@
a_chk->sent = SCTP_DATAGRAM_UNSENT;
a_chk->whoTo = NULL;
- if ((asoc->numduptsns) ||
- (!(asoc->last_data_chunk_from->dest_state & SCTP_ADDR_REACHABLE))) {
+ if (!(asoc->last_data_chunk_from->dest_state & SCTP_ADDR_REACHABLE)) {
/*-
- * Ok, we have some duplicates or the destination for the
- * sack is unreachable, lets see if we can select an
- * alternate than asoc->last_data_chunk_from
+ * Ok, the destination for the SACK is unreachable, lets see if
+ * we can select an alternate to asoc->last_data_chunk_from
*/
- if ((asoc->last_data_chunk_from->dest_state & SCTP_ADDR_REACHABLE) &&
- (asoc->used_alt_onsack > asoc->numnets)) {
- /* We used an alt last time, don't this time */
- a_chk->whoTo = NULL;
- } else {
- asoc->used_alt_onsack++;
- a_chk->whoTo = sctp_find_alternate_net(stcb, asoc->last_data_chunk_from, 0);
- }
+ a_chk->whoTo = sctp_find_alternate_net(stcb, asoc->last_data_chunk_from, 0);
if (a_chk->whoTo == NULL) {
/* Nope, no alternate */
a_chk->whoTo = asoc->last_data_chunk_from;
- asoc->used_alt_onsack = 0;
}
} else {
- /*
- * No duplicates so we use the last place we received data
- * from.
- */
- asoc->used_alt_onsack = 0;
a_chk->whoTo = asoc->last_data_chunk_from;
}
if (a_chk->whoTo) {
@@ -10497,7 +10539,7 @@
space_req = MCLBYTES;
}
/* Ok now lets formulate a MBUF with our sack */
- a_chk->data = sctp_get_mbuf_for_msg(space_req, 0, M_DONTWAIT, 1, MT_DATA);
+ a_chk->data = sctp_get_mbuf_for_msg(space_req, 0, M_NOWAIT, 1, MT_DATA);
if ((a_chk->data == NULL) ||
(a_chk->whoTo == NULL)) {
/* rats, no mbuf memory */
@@ -10510,7 +10552,8 @@
/* sa_ignore NO_NULL_CHK */
if (stcb->asoc.delayed_ack) {
sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
- stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_6);
+ stcb->sctp_ep, stcb, NULL,
+ SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_4);
sctp_timer_start(SCTP_TIMER_TYPE_RECV,
stcb->sctp_ep, stcb, NULL);
} else {
@@ -10589,7 +10632,7 @@
* Clear all bits corresponding to TSNs
* smaller or equal to the cumulative TSN.
*/
- tsn_map &= (~0 << (1 - offset));
+ tsn_map &= (~0U << (1 - offset));
}
selector = &sack_array[tsn_map];
if (mergeable && selector->right_edge) {
@@ -10664,7 +10707,7 @@
* TSNs smaller or equal to the
* cumulative TSN.
*/
- tsn_map &= (~0 << (1 - offset));
+ tsn_map &= (~0U << (1 - offset));
}
selector = &sack_array[tsn_map];
if (mergeable && selector->right_edge) {
@@ -10775,14 +10818,16 @@
#endif
)
{
- struct mbuf *m_abort;
- struct mbuf *m_out = NULL, *m_end = NULL;
- struct sctp_abort_chunk *abort = NULL;
- int sz;
- uint32_t auth_offset = 0;
+ struct mbuf *m_abort, *m, *m_last;
+ struct mbuf *m_out, *m_end = NULL;
+ struct sctp_abort_chunk *abort;
struct sctp_auth_chunk *auth = NULL;
struct sctp_nets *net;
+ uint32_t vtag;
+ uint32_t auth_offset = 0;
+ uint16_t cause_len, chunk_len, padding_len;
+ SCTP_TCB_LOCK_ASSERT(stcb);
/*-
* Add an AUTH chunk, if chunk requires it and save the offset into
* the chain for AUTH
@@ -10789,31 +10834,35 @@
*/
if (sctp_auth_is_required_chunk(SCTP_ABORT_ASSOCIATION,
stcb->asoc.peer_auth_chunks)) {
- m_out = sctp_add_auth_chunk(m_out, &m_end, &auth, &auth_offset,
+ m_out = sctp_add_auth_chunk(NULL, &m_end, &auth, &auth_offset,
stcb, SCTP_ABORT_ASSOCIATION);
SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+ } else {
+ m_out = NULL;
}
- SCTP_TCB_LOCK_ASSERT(stcb);
- m_abort = sctp_get_mbuf_for_msg(sizeof(struct sctp_abort_chunk), 0, M_DONTWAIT, 1, MT_HEADER);
+ m_abort = sctp_get_mbuf_for_msg(sizeof(struct sctp_abort_chunk), 0, M_NOWAIT, 1, MT_HEADER);
if (m_abort == NULL) {
- /* no mbuf's */
- if (m_out)
+ if (m_out) {
sctp_m_freem(m_out);
+ }
+ if (operr) {
+ sctp_m_freem(operr);
+ }
return;
}
/* link in any error */
SCTP_BUF_NEXT(m_abort) = operr;
- sz = 0;
- if (operr) {
- struct mbuf *n;
-
- n = operr;
- while (n) {
- sz += SCTP_BUF_LEN(n);
- n = SCTP_BUF_NEXT(n);
+ cause_len = 0;
+ m_last = NULL;
+ for (m = operr; m; m = SCTP_BUF_NEXT(m)) {
+ cause_len += (uint16_t) SCTP_BUF_LEN(m);
+ if (SCTP_BUF_NEXT(m) == NULL) {
+ m_last = m;
}
}
- SCTP_BUF_LEN(m_abort) = sizeof(*abort);
+ SCTP_BUF_LEN(m_abort) = sizeof(struct sctp_abort_chunk);
+ chunk_len = (uint16_t) sizeof(struct sctp_abort_chunk) + cause_len;
+ padding_len = SCTP_SIZE32(chunk_len) - chunk_len;
if (m_out == NULL) {
/* NO Auth chunk prepended, so reserve space in front */
SCTP_BUF_RESV_UF(m_abort, SCTP_MIN_OVERHEAD);
@@ -10827,16 +10876,30 @@
} else {
net = stcb->asoc.primary_destination;
}
- /* fill in the ABORT chunk */
+ /* Fill in the ABORT chunk header. */
abort = mtod(m_abort, struct sctp_abort_chunk *);
abort->ch.chunk_type = SCTP_ABORT_ASSOCIATION;
- abort->ch.chunk_flags = 0;
- abort->ch.chunk_length = htons(sizeof(*abort) + sz);
-
+ if (stcb->asoc.peer_vtag == 0) {
+ /* This happens iff the assoc is in COOKIE-WAIT state. */
+ vtag = stcb->asoc.my_vtag;
+ abort->ch.chunk_flags = SCTP_HAD_NO_TCB;
+ } else {
+ vtag = stcb->asoc.peer_vtag;
+ abort->ch.chunk_flags = 0;
+ }
+ abort->ch.chunk_length = htons(chunk_len);
+ /* Add padding, if necessary. */
+ if (padding_len > 0) {
+ if ((m_last == NULL) ||
+ (sctp_add_pad_tombuf(m_last, padding_len) == NULL)) {
+ sctp_m_freem(m_out);
+ return;
+ }
+ }
(void)sctp_lowlevel_chunk_output(stcb->sctp_ep, stcb, net,
(struct sockaddr *)&net->ro._l_addr,
m_out, auth_offset, auth, stcb->asoc.authinfo.active_keyid, 1, 0, 0,
- stcb->sctp_ep->sctp_lport, stcb->rport, htonl(stcb->asoc.peer_vtag),
+ stcb->sctp_ep->sctp_lport, stcb->rport, htonl(vtag),
stcb->asoc.primary_destination->port, NULL,
0, 0,
so_locked);
@@ -10854,7 +10917,7 @@
uint32_t vtag;
uint8_t flags;
- m_shutdown_comp = sctp_get_mbuf_for_msg(sizeof(struct sctp_chunkhdr), 0, M_DONTWAIT, 1, MT_HEADER);
+ m_shutdown_comp = sctp_get_mbuf_for_msg(sizeof(struct sctp_chunkhdr), 0, M_NOWAIT, 1, MT_HEADER);
if (m_shutdown_comp == NULL) {
/* no mbuf's */
return;
@@ -10884,9 +10947,10 @@
}
static void
-sctp_send_resp_msg(struct mbuf *m, struct sctphdr *sh, uint32_t vtag,
+sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst,
+ struct sctphdr *sh, uint32_t vtag,
uint8_t type, struct mbuf *cause,
- uint8_t use_mflowid, uint32_t mflowid,
+ uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum,
uint32_t vrf_id, uint16_t port)
{
struct mbuf *o_pak;
@@ -10893,16 +10957,22 @@
struct mbuf *mout;
struct sctphdr *shout;
struct sctp_chunkhdr *ch;
- struct ip *iph;
+
+#if defined(INET) || defined(INET6)
struct udphdr *udp;
- int len, cause_len, padding_len, ret;
+ int ret;
+#endif
+ int len, cause_len, padding_len;
+
#ifdef INET
- struct ip *iph_out;
+ struct sockaddr_in *src_sin, *dst_sin;
+ struct ip *ip;
#endif
#ifdef INET6
- struct ip6_hdr *ip6, *ip6_out;
+ struct sockaddr_in6 *src_sin6, *dst_sin6;
+ struct ip6_hdr *ip6;
#endif
@@ -10921,7 +10991,7 @@
padding_len = 4 - padding_len;
}
if (padding_len != 0) {
- if (sctp_add_pad_tombuf(m_last, padding_len)) {
+ if (sctp_add_pad_tombuf(m_last, padding_len) == NULL) {
sctp_m_freem(cause);
return;
}
@@ -10931,15 +11001,14 @@
}
/* Get an mbuf for the header. */
len = sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
- iph = mtod(m, struct ip *);
- switch (iph->ip_v) {
+ switch (dst->sa_family) {
#ifdef INET
- case IPVERSION:
+ case AF_INET:
len += sizeof(struct ip);
break;
#endif
#ifdef INET6
- case IPV6_VERSION >> 4:
+ case AF_INET6:
len += sizeof(struct ip6_hdr);
break;
#endif
@@ -10946,10 +11015,12 @@
default:
break;
}
+#if defined(INET) || defined(INET6)
if (port) {
len += sizeof(struct udphdr);
}
- mout = sctp_get_mbuf_for_msg(len + max_linkhdr, 1, M_DONTWAIT, 1, MT_DATA);
+#endif
+ mout = sctp_get_mbuf_for_msg(len + max_linkhdr, 1, M_NOWAIT, 1, MT_DATA);
if (mout == NULL) {
if (cause) {
sctp_m_freem(cause);
@@ -10959,56 +11030,58 @@
SCTP_BUF_RESV_UF(mout, max_linkhdr);
SCTP_BUF_LEN(mout) = len;
SCTP_BUF_NEXT(mout) = cause;
- if (use_mflowid != 0) {
- mout->m_pkthdr.flowid = mflowid;
- mout->m_flags |= M_FLOWID;
- }
+ M_SETFIB(mout, fibnum);
+ mout->m_pkthdr.flowid = mflowid;
+ M_HASHTYPE_SET(mout, mflowtype);
#ifdef INET
- iph_out = NULL;
+ ip = NULL;
#endif
#ifdef INET6
- ip6_out = NULL;
+ ip6 = NULL;
#endif
- switch (iph->ip_v) {
+ switch (dst->sa_family) {
#ifdef INET
- case IPVERSION:
- iph_out = mtod(mout, struct ip *);
- iph_out->ip_v = IPVERSION;
- iph_out->ip_hl = (sizeof(struct ip) >> 2);
- iph_out->ip_tos = 0;
- iph_out->ip_id = ip_newid();
- iph_out->ip_off = 0;
- iph_out->ip_ttl = MODULE_GLOBAL(ip_defttl);
+ case AF_INET:
+ src_sin = (struct sockaddr_in *)src;
+ dst_sin = (struct sockaddr_in *)dst;
+ ip = mtod(mout, struct ip *);
+ ip->ip_v = IPVERSION;
+ ip->ip_hl = (sizeof(struct ip) >> 2);
+ ip->ip_tos = 0;
+ ip->ip_id = ip_newid();
+ ip->ip_off = 0;
+ ip->ip_ttl = MODULE_GLOBAL(ip_defttl);
if (port) {
- iph_out->ip_p = IPPROTO_UDP;
+ ip->ip_p = IPPROTO_UDP;
} else {
- iph_out->ip_p = IPPROTO_SCTP;
+ ip->ip_p = IPPROTO_SCTP;
}
- iph_out->ip_src.s_addr = iph->ip_dst.s_addr;
- iph_out->ip_dst.s_addr = iph->ip_src.s_addr;
- iph_out->ip_sum = 0;
+ ip->ip_src.s_addr = dst_sin->sin_addr.s_addr;
+ ip->ip_dst.s_addr = src_sin->sin_addr.s_addr;
+ ip->ip_sum = 0;
len = sizeof(struct ip);
- shout = (struct sctphdr *)((caddr_t)iph_out + len);
+ shout = (struct sctphdr *)((caddr_t)ip + len);
break;
#endif
#ifdef INET6
- case IPV6_VERSION >> 4:
- ip6 = (struct ip6_hdr *)iph;
- ip6_out = mtod(mout, struct ip6_hdr *);
- ip6_out->ip6_flow = htonl(0x60000000);
+ case AF_INET6:
+ src_sin6 = (struct sockaddr_in6 *)src;
+ dst_sin6 = (struct sockaddr_in6 *)dst;
+ ip6 = mtod(mout, struct ip6_hdr *);
+ ip6->ip6_flow = htonl(0x60000000);
if (V_ip6_auto_flowlabel) {
- ip6_out->ip6_flow |= (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
+ ip6->ip6_flow |= (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
}
- ip6_out->ip6_hlim = MODULE_GLOBAL(ip6_defhlim);
+ ip6->ip6_hlim = MODULE_GLOBAL(ip6_defhlim);
if (port) {
- ip6_out->ip6_nxt = IPPROTO_UDP;
+ ip6->ip6_nxt = IPPROTO_UDP;
} else {
- ip6_out->ip6_nxt = IPPROTO_SCTP;
+ ip6->ip6_nxt = IPPROTO_SCTP;
}
- ip6_out->ip6_src = ip6->ip6_dst;
- ip6_out->ip6_dst = ip6->ip6_src;
+ ip6->ip6_src = dst_sin6->sin6_addr;
+ ip6->ip6_dst = src_sin6->sin6_addr;
len = sizeof(struct ip6_hdr);
- shout = (struct sctphdr *)((caddr_t)ip6_out + len);
+ shout = (struct sctphdr *)((caddr_t)ip6 + len);
break;
#endif
default:
@@ -11016,6 +11089,7 @@
shout = mtod(mout, struct sctphdr *);
break;
}
+#if defined(INET) || defined(INET6)
if (port) {
if (htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)) == 0) {
sctp_m_freem(mout);
@@ -11034,6 +11108,7 @@
} else {
udp = NULL;
}
+#endif
shout->src_port = sh->dest_port;
shout->dest_port = sh->src_port;
shout->checksum = 0;
@@ -11059,21 +11134,17 @@
return;
}
SCTP_ATTACH_CHAIN(o_pak, mout, len);
+ switch (dst->sa_family) {
#ifdef INET
- if (iph_out != NULL) {
+ case AF_INET:
if (port) {
if (V_udp_cksum) {
- udp->uh_sum = in_pseudo(iph_out->ip_src.s_addr, iph_out->ip_dst.s_addr, udp->uh_ulen + htons(IPPROTO_UDP));
+ udp->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, udp->uh_ulen + htons(IPPROTO_UDP));
} else {
udp->uh_sum = 0;
}
}
- iph_out->ip_len = len;
-#ifdef SCTP_PACKET_LOGGING
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) {
- sctp_packet_log(mout, len);
- }
-#endif
+ ip->ip_len = htons(len);
if (port) {
#if defined(SCTP_WITH_NO_CSUM)
SCTP_STAT_INCR(sctps_sendnocrc);
@@ -11089,21 +11160,21 @@
SCTP_STAT_INCR(sctps_sendnocrc);
#else
mout->m_pkthdr.csum_flags = CSUM_SCTP;
- mout->m_pkthdr.csum_data = 0;
+ mout->m_pkthdr.csum_data = offsetof(struct sctphdr, checksum);
SCTP_STAT_INCR(sctps_sendhwcrc);
#endif
}
+#ifdef SCTP_PACKET_LOGGING
+ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) {
+ sctp_packet_log(o_pak);
+ }
+#endif
SCTP_IP_OUTPUT(ret, o_pak, NULL, NULL, vrf_id);
- }
+ break;
#endif
#ifdef INET6
- if (ip6_out != NULL) {
- ip6_out->ip6_plen = len - sizeof(struct ip6_hdr);
-#ifdef SCTP_PACKET_LOGGING
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) {
- sctp_packet_log(mout, len);
- }
-#endif
+ case AF_INET6:
+ ip6->ip6_plen = len - sizeof(struct ip6_hdr);
if (port) {
#if defined(SCTP_WITH_NO_CSUM)
SCTP_STAT_INCR(sctps_sendnocrc);
@@ -11119,13 +11190,25 @@
SCTP_STAT_INCR(sctps_sendnocrc);
#else
mout->m_pkthdr.csum_flags = CSUM_SCTP_IPV6;
- mout->m_pkthdr.csum_data = 0;
+ mout->m_pkthdr.csum_data = offsetof(struct sctphdr, checksum);
SCTP_STAT_INCR(sctps_sendhwcrc);
#endif
}
+#ifdef SCTP_PACKET_LOGGING
+ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) {
+ sctp_packet_log(o_pak);
+ }
+#endif
SCTP_IP6_OUTPUT(ret, o_pak, NULL, NULL, NULL, vrf_id);
+ break;
+#endif
+ default:
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "Unknown protocol (TSNH) type %d\n",
+ dst->sa_family);
+ sctp_m_freem(mout);
+ SCTP_LTRACE_ERR_RET_PKT(mout, NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EFAULT);
+ return;
}
-#endif
SCTP_STAT_INCR(sctps_sendpackets);
SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
@@ -11133,12 +11216,13 @@
}
void
-sctp_send_shutdown_complete2(struct mbuf *m, struct sctphdr *sh,
- uint8_t use_mflowid, uint32_t mflowid,
+sctp_send_shutdown_complete2(struct sockaddr *src, struct sockaddr *dst,
+ struct sctphdr *sh,
+ uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum,
uint32_t vrf_id, uint16_t port)
{
- sctp_send_resp_msg(m, sh, 0, SCTP_SHUTDOWN_COMPLETE, NULL,
- use_mflowid, mflowid,
+ sctp_send_resp_msg(src, dst, sh, 0, SCTP_SHUTDOWN_COMPLETE, NULL,
+ mflowtype, mflowid, fibnum,
vrf_id, port);
}
@@ -11178,10 +11262,11 @@
chk->copy_by_ref = 0;
chk->rec.chunk_id.id = SCTP_HEARTBEAT_REQUEST;
chk->rec.chunk_id.can_take_data = 1;
+ chk->flags = 0;
chk->asoc = &stcb->asoc;
chk->send_size = sizeof(struct sctp_heartbeat_chunk);
- chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_DONTWAIT, 1, MT_HEADER);
+ chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_NOWAIT, 1, MT_HEADER);
if (chk->data == NULL) {
sctp_free_a_chunk(stcb, chk, so_locked);
return;
@@ -11205,7 +11290,7 @@
hb->heartbeat.hb_info.time_value_1 = now.tv_sec;
hb->heartbeat.hb_info.time_value_2 = now.tv_usec;
/* Did our user request this one, put it in */
- hb->heartbeat.hb_info.addr_family = net->ro._l_addr.sa.sa_family;
+ hb->heartbeat.hb_info.addr_family = (uint8_t) net->ro._l_addr.sa.sa_family;
hb->heartbeat.hb_info.addr_len = net->ro._l_addr.sa.sa_len;
if (net->dest_state & SCTP_ADDR_UNCONFIRMED) {
/*
@@ -11234,6 +11319,11 @@
break;
#endif
default:
+ if (chk->data) {
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
+ }
+ sctp_free_a_chunk(stcb, chk, so_locked);
return;
break;
}
@@ -11279,13 +11369,14 @@
if (chk == NULL) {
return;
}
+ SCTP_STAT_INCR(sctps_queue_upd_ecne);
chk->copy_by_ref = 0;
- SCTP_STAT_INCR(sctps_queue_upd_ecne);
chk->rec.chunk_id.id = SCTP_ECN_ECHO;
chk->rec.chunk_id.can_take_data = 0;
+ chk->flags = 0;
chk->asoc = &stcb->asoc;
chk->send_size = sizeof(struct sctp_ecne_chunk);
- chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_DONTWAIT, 1, MT_HEADER);
+ chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_NOWAIT, 1, MT_HEADER);
if (chk->data == NULL) {
sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
return;
@@ -11310,21 +11401,14 @@
void
sctp_send_packet_dropped(struct sctp_tcb *stcb, struct sctp_nets *net,
- struct mbuf *m, int iphlen, int bad_crc)
+ struct mbuf *m, int len, int iphlen, int bad_crc)
{
struct sctp_association *asoc;
struct sctp_pktdrop_chunk *drp;
struct sctp_tmit_chunk *chk;
uint8_t *datap;
- int len;
int was_trunc = 0;
- struct ip *iph;
-
-#ifdef INET6
- struct ip6_hdr *ip6h;
-
-#endif
- int fullsz = 0, extra = 0;
+ int fullsz = 0;
long spc;
int offset;
struct sctp_chunkhdr *ch, chunk_buf;
@@ -11335,7 +11419,7 @@
}
asoc = &stcb->asoc;
SCTP_TCB_LOCK_ASSERT(stcb);
- if (asoc->peer_supports_pktdrop == 0) {
+ if (asoc->pktdrop_supported == 0) {
/*-
* peer must declare support before I send one.
*/
@@ -11349,28 +11433,11 @@
return;
}
chk->copy_by_ref = 0;
- iph = mtod(m, struct ip *);
- if (iph == NULL) {
- sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
- return;
- }
- switch (iph->ip_v) {
-#ifdef INET
- case IPVERSION:
- /* IPv4 */
- len = chk->send_size = iph->ip_len;
- break;
-#endif
-#ifdef INET6
- case IPV6_VERSION >> 4:
- /* IPv6 */
- ip6h = mtod(m, struct ip6_hdr *);
- len = chk->send_size = htons(ip6h->ip6_plen);
- break;
-#endif
- default:
- return;
- }
+ chk->rec.chunk_id.id = SCTP_PACKET_DROPPED;
+ chk->rec.chunk_id.can_take_data = 1;
+ chk->flags = 0;
+ len -= iphlen;
+ chk->send_size = len;
/* Validate that we do not have an ABORT in here. */
offset = iphlen + sizeof(struct sctphdr);
ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset,
@@ -11406,12 +11473,12 @@
/*
* only send 1 mtu worth, trim off the excess on the end.
*/
- fullsz = len - extra;
+ fullsz = len;
len = min(stcb->asoc.smallest_mtu, MCLBYTES) - SCTP_MAX_OVERHEAD;
was_trunc = 1;
}
chk->asoc = &stcb->asoc;
- chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+ chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
if (chk->data == NULL) {
jump_out:
sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
@@ -11455,8 +11522,6 @@
} else {
chk->whoTo = NULL;
}
- chk->rec.chunk_id.id = SCTP_PACKET_DROPPED;
- chk->rec.chunk_id.can_take_data = 1;
drp->ch.chunk_type = SCTP_PACKET_DROPPED;
drp->ch.chunk_length = htons(chk->send_size);
spc = SCTP_SB_LIMIT_RCV(stcb->sctp_socket);
@@ -11490,11 +11555,11 @@
struct sctp_cwr_chunk *cwr;
struct sctp_tmit_chunk *chk;
- asoc = &stcb->asoc;
SCTP_TCB_LOCK_ASSERT(stcb);
if (net == NULL) {
return;
}
+ asoc = &stcb->asoc;
TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
if ((chk->rec.chunk_id.id == SCTP_ECN_CWR) && (net == chk->whoTo)) {
/*
@@ -11522,9 +11587,10 @@
chk->copy_by_ref = 0;
chk->rec.chunk_id.id = SCTP_ECN_CWR;
chk->rec.chunk_id.can_take_data = 1;
+ chk->flags = 0;
chk->asoc = &stcb->asoc;
chk->send_size = sizeof(struct sctp_cwr_chunk);
- chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_DONTWAIT, 1, MT_HEADER);
+ chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_NOWAIT, 1, MT_HEADER);
if (chk->data == NULL) {
sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
return;
@@ -11544,23 +11610,37 @@
asoc->ctrl_queue_cnt++;
}
-void
-sctp_add_stream_reset_out(struct sctp_tmit_chunk *chk,
- int number_entries, uint16_t * list,
+static int
+sctp_add_stream_reset_out(struct sctp_tcb *stcb, struct sctp_tmit_chunk *chk,
uint32_t seq, uint32_t resp_seq, uint32_t last_sent)
{
- int len, old_len, i;
+ uint16_t len, old_len, i;
struct sctp_stream_reset_out_request *req_out;
struct sctp_chunkhdr *ch;
+ int at;
+ int number_entries = 0;
ch = mtod(chk->data, struct sctp_chunkhdr *);
-
-
old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
-
/* get to new offset for the param. */
req_out = (struct sctp_stream_reset_out_request *)((caddr_t)ch + len);
/* now how long will this param be? */
+ for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+ if ((stcb->asoc.strmout[i].state == SCTP_STREAM_RESET_PENDING) &&
+ (stcb->asoc.strmout[i].chunks_on_queues == 0) &&
+ TAILQ_EMPTY(&stcb->asoc.strmout[i].outqueue)) {
+ number_entries++;
+ }
+ }
+ if (number_entries == 0) {
+ return (0);
+ }
+ if (number_entries == stcb->asoc.streamoutcnt) {
+ number_entries = 0;
+ }
+ if (number_entries > SCTP_MAX_STREAMS_AT_ONCE_RESET) {
+ number_entries = SCTP_MAX_STREAMS_AT_ONCE_RESET;
+ }
len = (sizeof(struct sctp_stream_reset_out_request) + (sizeof(uint16_t) * number_entries));
req_out->ph.param_type = htons(SCTP_STR_RESET_OUT_REQUEST);
req_out->ph.param_length = htons(len);
@@ -11567,10 +11647,24 @@
req_out->request_seq = htonl(seq);
req_out->response_seq = htonl(resp_seq);
req_out->send_reset_at_tsn = htonl(last_sent);
+ at = 0;
if (number_entries) {
- for (i = 0; i < number_entries; i++) {
- req_out->list_of_streams[i] = htons(list[i]);
+ for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+ if ((stcb->asoc.strmout[i].state == SCTP_STREAM_RESET_PENDING) &&
+ (stcb->asoc.strmout[i].chunks_on_queues == 0) &&
+ TAILQ_EMPTY(&stcb->asoc.strmout[i].outqueue)) {
+ req_out->list_of_streams[at] = htons(i);
+ at++;
+ stcb->asoc.strmout[i].state = SCTP_STREAM_RESET_IN_FLIGHT;
+ if (at >= number_entries) {
+ break;
+ }
+ }
}
+ } else {
+ for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+ stcb->asoc.strmout[i].state = SCTP_STREAM_RESET_IN_FLIGHT;
+ }
}
if (SCTP_SIZE32(len) > len) {
/*-
@@ -11586,22 +11680,19 @@
chk->book_size_scale = 0;
chk->send_size = SCTP_SIZE32(chk->book_size);
SCTP_BUF_LEN(chk->data) = chk->send_size;
- return;
+ return (1);
}
-
-void
+static void
sctp_add_stream_reset_in(struct sctp_tmit_chunk *chk,
int number_entries, uint16_t * list,
uint32_t seq)
{
- int len, old_len, i;
+ uint16_t len, old_len, i;
struct sctp_stream_reset_in_request *req_in;
struct sctp_chunkhdr *ch;
ch = mtod(chk->data, struct sctp_chunkhdr *);
-
-
old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
/* get to new offset for the param. */
@@ -11633,18 +11724,15 @@
return;
}
-
-void
+static void
sctp_add_stream_reset_tsn(struct sctp_tmit_chunk *chk,
uint32_t seq)
{
- int len, old_len;
+ uint16_t len, old_len;
struct sctp_stream_reset_tsn_request *req_tsn;
struct sctp_chunkhdr *ch;
ch = mtod(chk->data, struct sctp_chunkhdr *);
-
-
old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
/* get to new offset for the param. */
@@ -11668,13 +11756,11 @@
sctp_add_stream_reset_result(struct sctp_tmit_chunk *chk,
uint32_t resp_seq, uint32_t result)
{
- int len, old_len;
+ uint16_t len, old_len;
struct sctp_stream_reset_response *resp;
struct sctp_chunkhdr *ch;
ch = mtod(chk->data, struct sctp_chunkhdr *);
-
-
old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
/* get to new offset for the param. */
@@ -11693,22 +11779,80 @@
chk->send_size = SCTP_SIZE32(chk->book_size);
SCTP_BUF_LEN(chk->data) = chk->send_size;
return;
+}
+void
+sctp_send_deferred_reset_response(struct sctp_tcb *stcb,
+ struct sctp_stream_reset_list *ent,
+ int response)
+{
+ struct sctp_association *asoc;
+ struct sctp_tmit_chunk *chk;
+ struct sctp_chunkhdr *ch;
+
+ asoc = &stcb->asoc;
+
+ /*
+ * Reset our last reset action to the new one IP -> response
+ * (PERFORMED probably). This assures that if we fail to send, a
+ * retran from the peer will get the new response.
+ */
+ asoc->last_reset_action[0] = response;
+ if (asoc->stream_reset_outstanding) {
+ return;
+ }
+ sctp_alloc_a_chunk(stcb, chk);
+ if (chk == NULL) {
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+ return;
+ }
+ chk->copy_by_ref = 0;
+ chk->rec.chunk_id.id = SCTP_STREAM_RESET;
+ chk->rec.chunk_id.can_take_data = 0;
+ chk->flags = 0;
+ chk->asoc = &stcb->asoc;
+ chk->book_size = sizeof(struct sctp_chunkhdr);
+ chk->send_size = SCTP_SIZE32(chk->book_size);
+ chk->book_size_scale = 0;
+ chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
+ if (chk->data == NULL) {
+ sctp_free_a_chunk(stcb, chk, SCTP_SO_LOCKED);
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+ return;
+ }
+ SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
+ sctp_add_stream_reset_result(chk, ent->seq, response);
+ /* setup chunk parameters */
+ chk->sent = SCTP_DATAGRAM_UNSENT;
+ chk->snd_count = 0;
+ if (stcb->asoc.alternate) {
+ chk->whoTo = stcb->asoc.alternate;
+ } else {
+ chk->whoTo = stcb->asoc.primary_destination;
+ }
+ ch = mtod(chk->data, struct sctp_chunkhdr *);
+ ch->chunk_type = SCTP_STREAM_RESET;
+ ch->chunk_flags = 0;
+ ch->chunk_length = htons(chk->book_size);
+ atomic_add_int(&chk->whoTo->ref_count, 1);
+ SCTP_BUF_LEN(chk->data) = chk->send_size;
+ /* insert the chunk for sending */
+ TAILQ_INSERT_TAIL(&asoc->control_send_queue,
+ chk,
+ sctp_next);
+ asoc->ctrl_queue_cnt++;
}
-
void
sctp_add_stream_reset_result_tsn(struct sctp_tmit_chunk *chk,
uint32_t resp_seq, uint32_t result,
uint32_t send_una, uint32_t recv_next)
{
- int len, old_len;
+ uint16_t len, old_len;
struct sctp_stream_reset_response_tsn *resp;
struct sctp_chunkhdr *ch;
ch = mtod(chk->data, struct sctp_chunkhdr *);
-
-
old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
/* get to new offset for the param. */
@@ -11736,7 +11880,7 @@
uint32_t seq,
uint16_t adding)
{
- int len, old_len;
+ uint16_t len, old_len;
struct sctp_chunkhdr *ch;
struct sctp_stream_reset_add_strm *addstr;
@@ -11769,7 +11913,7 @@
uint32_t seq,
uint16_t adding)
{
- int len, old_len;
+ uint16_t len, old_len;
struct sctp_chunkhdr *ch;
struct sctp_stream_reset_add_strm *addstr;
@@ -11796,23 +11940,91 @@
return;
}
+int
+sctp_send_stream_reset_out_if_possible(struct sctp_tcb *stcb, int so_locked)
+{
+ struct sctp_association *asoc;
+ struct sctp_tmit_chunk *chk;
+ struct sctp_chunkhdr *ch;
+ uint32_t seq;
+ asoc = &stcb->asoc;
+ asoc->trigger_reset = 0;
+ if (asoc->stream_reset_outstanding) {
+ return (EALREADY);
+ }
+ sctp_alloc_a_chunk(stcb, chk);
+ if (chk == NULL) {
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+ return (ENOMEM);
+ }
+ chk->copy_by_ref = 0;
+ chk->rec.chunk_id.id = SCTP_STREAM_RESET;
+ chk->rec.chunk_id.can_take_data = 0;
+ chk->flags = 0;
+ chk->asoc = &stcb->asoc;
+ chk->book_size = sizeof(struct sctp_chunkhdr);
+ chk->send_size = SCTP_SIZE32(chk->book_size);
+ chk->book_size_scale = 0;
+ chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
+ if (chk->data == NULL) {
+ sctp_free_a_chunk(stcb, chk, so_locked);
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+ return (ENOMEM);
+ }
+ SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
+ /* setup chunk parameters */
+ chk->sent = SCTP_DATAGRAM_UNSENT;
+ chk->snd_count = 0;
+ if (stcb->asoc.alternate) {
+ chk->whoTo = stcb->asoc.alternate;
+ } else {
+ chk->whoTo = stcb->asoc.primary_destination;
+ }
+ ch = mtod(chk->data, struct sctp_chunkhdr *);
+ ch->chunk_type = SCTP_STREAM_RESET;
+ ch->chunk_flags = 0;
+ ch->chunk_length = htons(chk->book_size);
+ atomic_add_int(&chk->whoTo->ref_count, 1);
+ SCTP_BUF_LEN(chk->data) = chk->send_size;
+ seq = stcb->asoc.str_reset_seq_out;
+ if (sctp_add_stream_reset_out(stcb, chk, seq, (stcb->asoc.str_reset_seq_in - 1), (stcb->asoc.sending_seq - 1))) {
+ seq++;
+ asoc->stream_reset_outstanding++;
+ } else {
+ m_freem(chk->data);
+ chk->data = NULL;
+ sctp_free_a_chunk(stcb, chk, so_locked);
+ return (ENOENT);
+ }
+ asoc->str_reset = chk;
+ /* insert the chunk for sending */
+ TAILQ_INSERT_TAIL(&asoc->control_send_queue,
+ chk,
+ sctp_next);
+ asoc->ctrl_queue_cnt++;
+
+ if (stcb->asoc.send_sack) {
+ sctp_send_sack(stcb, so_locked);
+ }
+ sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo);
+ return (0);
+}
+
int
sctp_send_str_reset_req(struct sctp_tcb *stcb,
- int number_entries, uint16_t * list,
- uint8_t send_out_req,
+ uint16_t number_entries, uint16_t * list,
uint8_t send_in_req,
uint8_t send_tsn_req,
uint8_t add_stream,
uint16_t adding_o,
- uint16_t adding_i, uint8_t peer_asked
-)
+ uint16_t adding_i, uint8_t peer_asked)
{
-
struct sctp_association *asoc;
struct sctp_tmit_chunk *chk;
struct sctp_chunkhdr *ch;
+ int can_send_out_req = 0;
uint32_t seq;
asoc = &stcb->asoc;
@@ -11823,17 +12035,27 @@
SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EBUSY);
return (EBUSY);
}
- if ((send_out_req == 0) && (send_in_req == 0) && (send_tsn_req == 0) &&
+ if ((send_in_req == 0) && (send_tsn_req == 0) &&
(add_stream == 0)) {
/* nothing to do */
SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
return (EINVAL);
}
- if (send_tsn_req && (send_out_req || send_in_req)) {
+ if (send_tsn_req && send_in_req) {
/* error, can't do that */
SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
return (EINVAL);
+ } else if (send_in_req) {
+ can_send_out_req = 1;
}
+ if (number_entries > (MCLBYTES -
+ SCTP_MIN_OVERHEAD -
+ sizeof(struct sctp_chunkhdr) -
+ sizeof(struct sctp_stream_reset_out_request)) /
+ sizeof(uint16_t)) {
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+ return (ENOMEM);
+ }
sctp_alloc_a_chunk(stcb, chk);
if (chk == NULL) {
SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
@@ -11842,12 +12064,13 @@
chk->copy_by_ref = 0;
chk->rec.chunk_id.id = SCTP_STREAM_RESET;
chk->rec.chunk_id.can_take_data = 0;
+ chk->flags = 0;
chk->asoc = &stcb->asoc;
chk->book_size = sizeof(struct sctp_chunkhdr);
chk->send_size = SCTP_SIZE32(chk->book_size);
chk->book_size_scale = 0;
- chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+ chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
if (chk->data == NULL) {
sctp_free_a_chunk(stcb, chk, SCTP_SO_LOCKED);
SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
@@ -11871,12 +12094,14 @@
SCTP_BUF_LEN(chk->data) = chk->send_size;
seq = stcb->asoc.str_reset_seq_out;
- if (send_out_req) {
- sctp_add_stream_reset_out(chk, number_entries, list,
- seq, (stcb->asoc.str_reset_seq_in - 1), (stcb->asoc.sending_seq - 1));
- asoc->stream_reset_out_is_outstanding = 1;
- seq++;
- asoc->stream_reset_outstanding++;
+ if (can_send_out_req) {
+ int ret;
+
+ ret = sctp_add_stream_reset_out(stcb, chk, seq, (stcb->asoc.str_reset_seq_in - 1), (stcb->asoc.sending_seq - 1));
+ if (ret) {
+ seq++;
+ asoc->stream_reset_outstanding++;
+ }
}
if ((add_stream & 1) &&
((stcb->asoc.strm_realoutsize - stcb->asoc.streamoutcnt) < adding_o)) {
@@ -11885,10 +12110,15 @@
struct sctp_stream_queue_pending *sp, *nsp;
int i;
+#if defined(SCTP_DETAILED_STR_STATS)
+ int j;
+
+#endif
+
oldstream = stcb->asoc.strmout;
/* get some more */
SCTP_MALLOC(stcb->asoc.strmout, struct sctp_stream_out *,
- ((stcb->asoc.streamoutcnt + adding_o) * sizeof(struct sctp_stream_out)),
+ (stcb->asoc.streamoutcnt + adding_o) * sizeof(struct sctp_stream_out),
SCTP_M_STRMO);
if (stcb->asoc.strmout == NULL) {
uint8_t x;
@@ -11907,9 +12137,11 @@
stcb->asoc.ss_functions.sctp_ss_clear(stcb, &stcb->asoc, 0, 1);
for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
TAILQ_INIT(&stcb->asoc.strmout[i].outqueue);
- stcb->asoc.strmout[i].next_sequence_sent = oldstream[i].next_sequence_sent;
+ stcb->asoc.strmout[i].chunks_on_queues = oldstream[i].chunks_on_queues;
+ stcb->asoc.strmout[i].next_sequence_send = oldstream[i].next_sequence_send;
stcb->asoc.strmout[i].last_msg_incomplete = oldstream[i].last_msg_incomplete;
stcb->asoc.strmout[i].stream_no = i;
+ stcb->asoc.strmout[i].state = oldstream[i].state;
stcb->asoc.ss_functions.sctp_ss_init_stream(&stcb->asoc.strmout[i], &oldstream[i]);
/* now anything on those queues? */
TAILQ_FOREACH_SAFE(sp, &oldstream[i].outqueue, next, nsp) {
@@ -11927,11 +12159,22 @@
/* now the new streams */
stcb->asoc.ss_functions.sctp_ss_init(stcb, &stcb->asoc, 1);
for (i = stcb->asoc.streamoutcnt; i < (stcb->asoc.streamoutcnt + adding_o); i++) {
- stcb->asoc.strmout[i].next_sequence_sent = 0x0;
TAILQ_INIT(&stcb->asoc.strmout[i].outqueue);
+ stcb->asoc.strmout[i].chunks_on_queues = 0;
+#if defined(SCTP_DETAILED_STR_STATS)
+ for (j = 0; j < SCTP_PR_SCTP_MAX + 1; j++) {
+ stcb->asoc.strmout[i].abandoned_sent[j] = 0;
+ stcb->asoc.strmout[i].abandoned_unsent[j] = 0;
+ }
+#else
+ stcb->asoc.strmout[i].abandoned_sent[0] = 0;
+ stcb->asoc.strmout[i].abandoned_unsent[0] = 0;
+#endif
+ stcb->asoc.strmout[i].next_sequence_send = 0x0;
stcb->asoc.strmout[i].stream_no = i;
stcb->asoc.strmout[i].last_msg_incomplete = 0;
stcb->asoc.ss_functions.sctp_ss_init_stream(&stcb->asoc.strmout[i], NULL);
+ stcb->asoc.strmout[i].state = SCTP_STREAM_CLOSED;
}
stcb->asoc.strm_realoutsize = stcb->asoc.streamoutcnt + adding_o;
SCTP_FREE(oldstream, SCTP_M_STRMO);
@@ -11965,14 +12208,17 @@
chk,
sctp_next);
asoc->ctrl_queue_cnt++;
+ if (stcb->asoc.send_sack) {
+ sctp_send_sack(stcb, SCTP_SO_LOCKED);
+ }
sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo);
return (0);
}
void
-sctp_send_abort(struct mbuf *m, int iphlen, struct sctphdr *sh, uint32_t vtag,
- struct mbuf *cause,
- uint8_t use_mflowid, uint32_t mflowid,
+sctp_send_abort(struct mbuf *m, int iphlen, struct sockaddr *src, struct sockaddr *dst,
+ struct sctphdr *sh, uint32_t vtag, struct mbuf *cause,
+ uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum,
uint32_t vrf_id, uint16_t port)
{
/* Don't respond to an ABORT with an ABORT. */
@@ -11981,20 +12227,20 @@
sctp_m_freem(cause);
return;
}
- sctp_send_resp_msg(m, sh, vtag, SCTP_ABORT_ASSOCIATION, cause,
- use_mflowid, mflowid,
+ sctp_send_resp_msg(src, dst, sh, vtag, SCTP_ABORT_ASSOCIATION, cause,
+ mflowtype, mflowid, fibnum,
vrf_id, port);
return;
}
void
-sctp_send_operr_to(struct mbuf *m, struct sctphdr *sh, uint32_t vtag,
- struct mbuf *cause,
- uint8_t use_mflowid, uint32_t mflowid,
+sctp_send_operr_to(struct sockaddr *src, struct sockaddr *dst,
+ struct sctphdr *sh, uint32_t vtag, struct mbuf *cause,
+ uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum,
uint32_t vrf_id, uint16_t port)
{
- sctp_send_resp_msg(m, sh, vtag, SCTP_OPERATION_ERROR, cause,
- use_mflowid, mflowid,
+ sctp_send_resp_msg(src, dst, sh, vtag, SCTP_OPERATION_ERROR, cause,
+ mflowtype, mflowid, fibnum,
vrf_id, port);
return;
}
@@ -12012,8 +12258,8 @@
m = m_uiotombuf(uio, M_WAITOK, max_send_len, 0,
(M_PKTHDR | (user_marks_eor ? M_EOR : 0)));
if (m == NULL) {
- SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
- *error = ENOMEM;
+ SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOBUFS);
+ *error = ENOBUFS;
} else {
*sndout = m_length(m, NULL);
*new_tail = m_last(m);
@@ -12026,14 +12272,11 @@
struct uio *uio,
int resv_upfront)
{
- int left;
-
- left = sp->length;
sp->data = m_uiotombuf(uio, M_WAITOK, sp->length,
resv_upfront, 0);
if (sp->data == NULL) {
- SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
- return (ENOMEM);
+ SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOBUFS);
+ return (ENOBUFS);
}
sp->tail_mbuf = m_last(sp->data);
return (0);
@@ -12084,7 +12327,6 @@
sp->timetolive = srcv->sinfo_timetolive;
sp->ppid = srcv->sinfo_ppid;
sp->context = srcv->sinfo_context;
- sp->strseq = 0;
(void)SCTP_GETTIME_TIMEVAL(&sp->ts);
sp->stream = srcv->sinfo_stream;
@@ -12253,7 +12495,7 @@
sndlen = SCTP_HEADER_LEN(i_pak);
}
SCTPDBG(SCTP_DEBUG_OUTPUT1, "Send called addr:%p send length %d\n",
- addr,
+ (void *)addr,
sndlen);
if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
(inp->sctp_socket->so_qlimit)) {
@@ -12423,6 +12665,7 @@
}
#endif
stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id,
+ inp->sctp_ep.pre_open_stream_count,
p
);
if (stcb == NULL) {
@@ -12458,7 +12701,8 @@
if (control) {
if (sctp_process_cmsgs_for_init(stcb, control, &error)) {
- sctp_free_assoc(inp, stcb, SCTP_PCBFREE_FORCE, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_7);
+ sctp_free_assoc(inp, stcb, SCTP_PCBFREE_FORCE,
+ SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_5);
hold_tcblock = 0;
stcb = NULL;
goto out_unlocked;
@@ -12544,12 +12788,24 @@
SCTP_ASOC_CREATE_UNLOCK(inp);
create_lock_applied = 0;
}
- if (asoc->stream_reset_outstanding) {
+ /* Is the stream no. valid? */
+ if (srcv->sinfo_stream >= asoc->streamoutcnt) {
+ /* Invalid stream number */
+ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+ error = EINVAL;
+ goto out_unlocked;
+ }
+ if ((asoc->strmout[srcv->sinfo_stream].state != SCTP_STREAM_OPEN) &&
+ (asoc->strmout[srcv->sinfo_stream].state != SCTP_STREAM_OPENING)) {
/*
* Can't queue any data while stream reset is underway.
*/
- SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EAGAIN);
- error = EAGAIN;
+ if (asoc->strmout[srcv->sinfo_stream].state > SCTP_STREAM_OPEN) {
+ error = EAGAIN;
+ } else {
+ error = EINVAL;
+ }
+ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, error);
goto out_unlocked;
}
if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
@@ -12598,7 +12854,7 @@
if (top) {
struct mbuf *cntm = NULL;
- mm = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_WAIT, 1, MT_DATA);
+ mm = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_WAITOK, 1, MT_DATA);
if (sndlen != 0) {
for (cntm = top; cntm; cntm = SCTP_BUF_NEXT(cntm)) {
tot_out += SCTP_BUF_LEN(cntm);
@@ -12614,7 +12870,7 @@
error = EMSGSIZE;
goto out;
}
- mm = sctp_get_mbuf_for_msg(tot_demand, 0, M_WAIT, 1, MT_DATA);
+ mm = sctp_get_mbuf_for_msg(tot_demand, 0, M_WAITOK, 1, MT_DATA);
}
if (mm == NULL) {
SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
@@ -12632,7 +12888,7 @@
/* now move forward the data pointer */
ph = mtod(mm, struct sctp_paramhdr *);
ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
- ph->param_length = htons((sizeof(struct sctp_paramhdr) + tot_out));
+ ph->param_length = htons(sizeof(struct sctp_paramhdr) + tot_out);
ph++;
SCTP_BUF_LEN(mm) = tot_out + sizeof(struct sctp_paramhdr);
if (top == NULL) {
@@ -12688,13 +12944,6 @@
SCTP_TCB_UNLOCK(stcb);
hold_tcblock = 0;
}
- /* Is the stream no. valid? */
- if (srcv->sinfo_stream >= asoc->streamoutcnt) {
- /* Invalid stream number */
- SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
- error = EINVAL;
- goto out_unlocked;
- }
if (asoc->strmout == NULL) {
/* huh? software error */
SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EFAULT);
@@ -12837,15 +13086,7 @@
}
sctp_snd_sb_alloc(stcb, sp->length);
atomic_add_int(&asoc->stream_queue_cnt, 1);
- if ((srcv->sinfo_flags & SCTP_UNORDERED) == 0) {
- sp->strseq = strm->next_sequence_sent;
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_AT_SEND_2_SCTP) {
- sctp_misc_ints(SCTP_STRMOUT_LOG_ASSIGN,
- (uintptr_t) stcb, sp->length,
- (uint32_t) ((srcv->sinfo_stream << 16) | sp->strseq), 0);
- }
- strm->next_sequence_sent++;
- } else {
+ if (srcv->sinfo_flags & SCTP_UNORDERED) {
SCTP_STAT_INCR(sctps_sends_with_unord);
}
TAILQ_INSERT_TAIL(&strm->outqueue, sp, next);
@@ -12936,7 +13177,7 @@
continue;
}
/* PR-SCTP? */
- if ((asoc->peer_supports_prsctp) && (asoc->sent_queue_cnt_removeable > 0)) {
+ if ((asoc->prsctp_supported) && (asoc->sent_queue_cnt_removeable > 0)) {
/*
* This is ugly but we must assure locking
* order
@@ -12998,7 +13239,7 @@
/*-
* Ok, Nagle is set on and we have data outstanding.
* Don't send anything and let SACKs drive out the
- * data unless wen have a "full" segment to send.
+ * data unless we have a "full" segment to send.
*/
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_NAGLE_LOGGING_ENABLE) {
sctp_log_nagle_event(stcb, SCTP_NAGLE_APPLIED);
@@ -13157,18 +13398,19 @@
(SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
struct sctp_nets *netp;
+ /* only send SHUTDOWN the first time through */
+ if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
+ SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+ }
+ SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+ SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ sctp_stop_timers_for_shutdown(stcb);
if (stcb->asoc.alternate) {
netp = stcb->asoc.alternate;
} else {
netp = stcb->asoc.primary_destination;
}
- /* only send SHUTDOWN the first time through */
sctp_send_shutdown(stcb, netp);
- if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
- SCTP_STAT_DECR_GAUGE32(sctps_currestab);
- }
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
- SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb,
netp);
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
@@ -13206,13 +13448,20 @@
if (TAILQ_EMPTY(&asoc->send_queue) &&
TAILQ_EMPTY(&asoc->sent_queue) &&
(asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
+ struct mbuf *op_err;
+ char msg[SCTP_DIAG_INFO_LEN];
+
abort_anyway:
if (free_cnt_applied) {
atomic_add_int(&stcb->asoc.refcnt, -1);
free_cnt_applied = 0;
}
+ snprintf(msg, sizeof(msg),
+ "%s:%d at %s", __FILE__, __LINE__, __func__);
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ msg);
sctp_abort_an_association(stcb->sctp_ep, stcb,
- NULL, SCTP_SO_LOCKED);
+ op_err, SCTP_SO_LOCKED);
/*
* now relock the stcb so everything
* is sane
@@ -13352,13 +13601,6 @@
}
}
#endif
-#ifdef INVARIANTS
- if (inp) {
- sctp_validate_no_locks(inp);
- } else {
- SCTP_PRINTF("Warning - inp is NULL so cant validate locks\n");
- }
-#endif
if (top) {
sctp_m_freem(top);
}
@@ -13386,19 +13628,14 @@
(stcb == NULL))
return (m);
- /* sysctl disabled auth? */
- if (SCTP_BASE_SYSCTL(sctp_auth_disable))
+ if (stcb->asoc.auth_supported == 0) {
return (m);
-
- /* peer doesn't do auth... */
- if (!stcb->asoc.peer_supports_auth) {
- return (m);
}
/* does the requested chunk require auth? */
if (!sctp_auth_is_required_chunk(chunk, stcb->asoc.peer_auth_chunks)) {
return (m);
}
- m_auth = sctp_get_mbuf_for_msg(sizeof(*auth), 0, M_DONTWAIT, 1, MT_HEADER);
+ m_auth = sctp_get_mbuf_for_msg(sizeof(*auth), 0, M_NOWAIT, 1, MT_HEADER);
if (m_auth == NULL) {
/* no mbuf's */
return (m);
@@ -13497,7 +13734,7 @@
}
ifa = (struct ifaddr *)sifa->ifa;
mask = (struct sockaddr_in *)(ifa->ifa_netmask);
- sin = (struct sockaddr_in *)&sifa->address.sin;
+ sin = &sifa->address.sin;
srcnetaddr.s_addr = (sin->sin_addr.s_addr & mask->sin_addr.s_addr);
SCTPDBG(SCTP_DEBUG_OUTPUT1, "match_nexthop4: src address is ");
SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &sifa->address.sa);
Modified: trunk/sys/netinet/sctp_output.h
===================================================================
--- trunk/sys/netinet/sctp_output.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_output.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_output.h 238253 2012-07-08 16:14:42Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_output.h 295208 2016-02-03 14:04:07Z tuexen $");
#ifndef _NETINET_SCTP_OUTPUT_H_
#define _NETINET_SCTP_OUTPUT_H_
@@ -47,7 +47,8 @@
struct sctp_tcb *stcb,
struct sctp_scoping *scope,
struct mbuf *m_at,
- int cnt_inits_to);
+ int cnt_inits_to,
+ uint16_t * padding_len, uint16_t * chunk_len);
int sctp_is_addr_restricted(struct sctp_tcb *, struct sctp_ifa *);
@@ -55,13 +56,9 @@
int
sctp_is_address_in_scope(struct sctp_ifa *ifa,
- int ipv4_addr_legal,
- int ipv6_addr_legal,
- int loopback_scope,
- int ipv4_local_scope,
- int local_scope,
- int site_scope,
+ struct sctp_scoping *scope,
int do_update);
+
int
sctp_is_addr_in_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa);
@@ -84,8 +81,11 @@
);
void
-sctp_send_initiate_ack(struct sctp_inpcb *, struct sctp_tcb *, struct mbuf *,
- int, int, struct sctphdr *, struct sctp_init_chunk *,
+sctp_send_initiate_ack(struct sctp_inpcb *, struct sctp_tcb *,
+ struct sctp_nets *, struct mbuf *,
+ int, int,
+ struct sockaddr *, struct sockaddr *,
+ struct sctphdr *, struct sctp_init_chunk *,
uint8_t, uint32_t,
uint32_t, uint16_t, int);
@@ -117,8 +117,9 @@
void sctp_send_shutdown_complete(struct sctp_tcb *, struct sctp_nets *, int);
void
-sctp_send_shutdown_complete2(struct mbuf *, struct sctphdr *,
- uint8_t, uint32_t,
+sctp_send_shutdown_complete2(struct sockaddr *, struct sockaddr *,
+ struct sctphdr *,
+ uint8_t, uint32_t, uint16_t,
uint32_t, uint16_t);
void sctp_send_asconf(struct sctp_tcb *, struct sctp_nets *, int addr_locked);
@@ -163,7 +164,7 @@
void
sctp_send_packet_dropped(struct sctp_tcb *, struct sctp_nets *, struct mbuf *,
- int, int);
+ int, int, int);
@@ -171,48 +172,33 @@
void
-sctp_add_stream_reset_out(struct sctp_tmit_chunk *chk,
- int number_entries, uint16_t * list,
- uint32_t seq, uint32_t resp_seq, uint32_t last_sent);
+ sctp_add_stream_reset_result(struct sctp_tmit_chunk *, uint32_t, uint32_t);
void
-sctp_add_stream_reset_in(struct sctp_tmit_chunk *chk,
- int number_entries, uint16_t * list,
- uint32_t seq);
+sctp_send_deferred_reset_response(struct sctp_tcb *,
+ struct sctp_stream_reset_list *,
+ int);
void
-sctp_add_stream_reset_tsn(struct sctp_tmit_chunk *chk,
- uint32_t seq);
+sctp_add_stream_reset_result_tsn(struct sctp_tmit_chunk *,
+ uint32_t, uint32_t, uint32_t, uint32_t);
+int
+ sctp_send_stream_reset_out_if_possible(struct sctp_tcb *, int);
-void
-sctp_add_stream_reset_result(struct sctp_tmit_chunk *chk,
- uint32_t resp_seq, uint32_t result);
-
-void
-sctp_add_stream_reset_result_tsn(struct sctp_tmit_chunk *chk,
- uint32_t resp_seq, uint32_t result,
- uint32_t send_una, uint32_t recv_next);
-
int
-sctp_send_str_reset_req(struct sctp_tcb *stcb,
- int number_entries, uint16_t * list,
- uint8_t send_out_req,
- uint8_t send_in_req,
- uint8_t send_tsn_req,
- uint8_t add_stream,
- uint16_t adding_o,
- uint16_t adding_i, uint8_t from_peer);
+sctp_send_str_reset_req(struct sctp_tcb *, uint16_t, uint16_t *,
+ uint8_t, uint8_t, uint8_t, uint16_t, uint16_t, uint8_t);
void
-sctp_send_abort(struct mbuf *, int, struct sctphdr *, uint32_t,
- struct mbuf *,
- uint8_t, uint32_t,
+sctp_send_abort(struct mbuf *, int, struct sockaddr *, struct sockaddr *,
+ struct sctphdr *, uint32_t, struct mbuf *,
+ uint8_t, uint32_t, uint16_t,
uint32_t, uint16_t);
void
-sctp_send_operr_to(struct mbuf *, struct sctphdr *, uint32_t,
- struct mbuf *,
- uint8_t, uint32_t,
+sctp_send_operr_to(struct sockaddr *, struct sockaddr *,
+ struct sctphdr *, uint32_t, struct mbuf *,
+ uint8_t, uint32_t, uint16_t,
uint32_t, uint16_t);
#endif /* _KERNEL || __Userspace__ */
Modified: trunk/sys/netinet/sctp_pcb.c
===================================================================
--- trunk/sys/netinet/sctp_pcb.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_pcb.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_pcb.c 239448 2012-08-20 16:45:46Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_pcb.c 296052 2016-02-25 18:46:06Z tuexen $");
#include <netinet/sctp_os.h>
#include <sys/proc.h>
@@ -47,7 +47,9 @@
#include <netinet/sctp_timer.h>
#include <netinet/sctp_bsd_addr.h>
#include <netinet/sctp_dtrace_define.h>
+#if defined(INET) || defined(INET6)
#include <netinet/udp.h>
+#endif
#ifdef INET6
#include <netinet6/ip6_var.h>
#endif
@@ -95,11 +97,10 @@
spcb->readq_count = SCTP_BASE_INFO(ipi_count_readq);
spcb->stream_oque = SCTP_BASE_INFO(ipi_count_strmoq);
spcb->free_chunks = SCTP_BASE_INFO(ipi_free_chunks);
-
SCTP_INP_INFO_RUNLOCK();
}
-/*
+/*-
* Addresses are added to VRF's (Virtual Router's). For BSD we
* have only the default VRF 0. We maintain a hash list of
* VRF's. Each VRF has its own list of sctp_ifn's. Each of
@@ -215,7 +216,6 @@
}
-
struct sctp_vrf *
sctp_find_vrf(uint32_t vrf_id)
{
@@ -231,6 +231,7 @@
return (NULL);
}
+
void
sctp_free_vrf(struct sctp_vrf *vrf)
{
@@ -246,6 +247,7 @@
}
}
+
void
sctp_free_ifn(struct sctp_ifn *sctp_ifnp)
{
@@ -259,6 +261,7 @@
}
}
+
void
sctp_update_ifn_mtu(uint32_t ifn_index, uint32_t mtu)
{
@@ -284,6 +287,7 @@
}
}
+
static void
sctp_delete_ifn(struct sctp_ifn *sctp_ifnp, int hold_addr_lock)
{
@@ -306,12 +310,13 @@
sctp_free_ifn(sctp_ifnp);
}
+
void
sctp_mark_ifa_addr_down(uint32_t vrf_id, struct sockaddr *addr,
const char *if_name, uint32_t ifn_index)
{
struct sctp_vrf *vrf;
- struct sctp_ifa *sctp_ifap = NULL;
+ struct sctp_ifa *sctp_ifap;
SCTP_IPI_ADDR_RLOCK();
vrf = sctp_find_vrf(vrf_id);
@@ -349,12 +354,13 @@
SCTP_IPI_ADDR_RUNLOCK();
}
+
void
sctp_mark_ifa_addr_up(uint32_t vrf_id, struct sockaddr *addr,
const char *if_name, uint32_t ifn_index)
{
struct sctp_vrf *vrf;
- struct sctp_ifa *sctp_ifap = NULL;
+ struct sctp_ifa *sctp_ifap;
SCTP_IPI_ADDR_RLOCK();
vrf = sctp_find_vrf(vrf_id);
@@ -392,6 +398,7 @@
SCTP_IPI_ADDR_RUNLOCK();
}
+
/*-
* Add an ifa to an ifn.
* Register the interface as necessary.
@@ -429,6 +436,7 @@
}
}
+
/*-
* Remove an ifa from its ifn.
* If no more addresses exist, remove the ifn too. Otherwise, re-register
@@ -480,6 +488,7 @@
}
}
+
struct sctp_ifa *
sctp_add_addr_to_vrf(uint32_t vrf_id, void *ifn, uint32_t ifn_index,
uint32_t ifn_type, const char *if_name, void *ifa,
@@ -554,7 +563,7 @@
(sctp_ifap->ifn_p->ifn_index == ifn_index)) {
SCTPDBG(SCTP_DEBUG_PCB4, "Using existing ifn %s (0x%x) for ifa %p\n",
sctp_ifap->ifn_p->ifn_name, ifn_index,
- sctp_ifap);
+ (void *)sctp_ifap);
if (new_ifn_af) {
/* Remove the created one that we don't want */
sctp_delete_ifn(sctp_ifnp, SCTP_ADDR_LOCKED);
@@ -576,7 +585,7 @@
* old one
*/
SCTPDBG(SCTP_DEBUG_PCB4, "Moving ifa %p from %s (0x%x) to %s (0x%x)\n",
- sctp_ifap, sctp_ifap->ifn_p->ifn_name,
+ (void *)sctp_ifap, sctp_ifap->ifn_p->ifn_name,
sctp_ifap->ifn_p->ifn_index, if_name,
ifn_index);
/* remove the address from the old ifn */
@@ -588,7 +597,7 @@
/* repair ifnp which was NULL ? */
sctp_ifap->localifa_flags = SCTP_ADDR_VALID;
SCTPDBG(SCTP_DEBUG_PCB4, "Repairing ifn %p for ifa %p\n",
- sctp_ifnp, sctp_ifap);
+ (void *)sctp_ifnp, (void *)sctp_ifap);
sctp_add_ifa_to_ifn(sctp_ifnp, sctp_ifap);
}
goto exit_stage_left;
@@ -617,7 +626,7 @@
{
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)&sctp_ifap->address.sin;
+ sin = &sctp_ifap->address.sin;
if (SCTP_IFN_IS_IFT_LOOP(sctp_ifap->ifn_p) ||
(IN4_ISLOOPBACK_ADDRESS(&sin->sin_addr))) {
sctp_ifap->src_is_loop = 1;
@@ -637,7 +646,7 @@
/* ok to use deprecated addresses? */
struct sockaddr_in6 *sin6;
- sin6 = (struct sockaddr_in6 *)&sctp_ifap->address.sin6;
+ sin6 = &sctp_ifap->address.sin6;
if (SCTP_IFN_IS_IFT_LOOP(sctp_ifap->ifn_p) ||
(IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr))) {
sctp_ifap->src_is_loop = 1;
@@ -764,9 +773,16 @@
return;
}
}
- SCTPDBG(SCTP_DEBUG_PCB4, "Deleting ifa %p\n", sctp_ifap);
+ SCTPDBG(SCTP_DEBUG_PCB4, "Deleting ifa %p\n", (void *)sctp_ifap);
sctp_ifap->localifa_flags &= SCTP_ADDR_VALID;
- sctp_ifap->localifa_flags |= SCTP_BEING_DELETED;
+ /*
+ * We don't set the flag. This means that the structure will
+ * hang around in EP's that have bound specific to it until
+ * they close. This gives us TCP like behavior if someone
+ * removes an address (or for that matter adds it right
+ * back).
+ */
+ /* sctp_ifap->localifa_flags |= SCTP_BEING_DELETED; */
vrf->total_ifa_count--;
LIST_REMOVE(sctp_ifap, next_bucket);
sctp_remove_ifa_from_ifn(sctp_ifap);
@@ -818,6 +834,183 @@
}
+static int
+sctp_does_stcb_own_this_addr(struct sctp_tcb *stcb, struct sockaddr *to)
+{
+ int loopback_scope;
+
+#if defined(INET)
+ int ipv4_local_scope, ipv4_addr_legal;
+
+#endif
+#if defined(INET6)
+ int local_scope, site_scope, ipv6_addr_legal;
+
+#endif
+ struct sctp_vrf *vrf;
+ struct sctp_ifn *sctp_ifn;
+ struct sctp_ifa *sctp_ifa;
+
+ loopback_scope = stcb->asoc.scope.loopback_scope;
+#if defined(INET)
+ ipv4_local_scope = stcb->asoc.scope.ipv4_local_scope;
+ ipv4_addr_legal = stcb->asoc.scope.ipv4_addr_legal;
+#endif
+#if defined(INET6)
+ local_scope = stcb->asoc.scope.local_scope;
+ site_scope = stcb->asoc.scope.site_scope;
+ ipv6_addr_legal = stcb->asoc.scope.ipv6_addr_legal;
+#endif
+
+ SCTP_IPI_ADDR_RLOCK();
+ vrf = sctp_find_vrf(stcb->asoc.vrf_id);
+ if (vrf == NULL) {
+ /* no vrf, no addresses */
+ SCTP_IPI_ADDR_RUNLOCK();
+ return (0);
+ }
+ if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+ LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+ if ((loopback_scope == 0) &&
+ SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
+ continue;
+ }
+ LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+ if (sctp_is_addr_restricted(stcb, sctp_ifa) &&
+ (!sctp_is_addr_pending(stcb, sctp_ifa))) {
+ /*
+ * We allow pending addresses, where
+ * we have sent an asconf-add to be
+ * considered valid.
+ */
+ continue;
+ }
+ if (sctp_ifa->address.sa.sa_family != to->sa_family) {
+ continue;
+ }
+ switch (sctp_ifa->address.sa.sa_family) {
+#ifdef INET
+ case AF_INET:
+ if (ipv4_addr_legal) {
+ struct sockaddr_in *sin,
+ *rsin;
+
+ sin = &sctp_ifa->address.sin;
+ rsin = (struct sockaddr_in *)to;
+ if ((ipv4_local_scope == 0) &&
+ IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
+ continue;
+ }
+ if (prison_check_ip4(stcb->sctp_ep->ip_inp.inp.inp_cred,
+ &sin->sin_addr) != 0) {
+ continue;
+ }
+ if (sin->sin_addr.s_addr == rsin->sin_addr.s_addr) {
+ SCTP_IPI_ADDR_RUNLOCK();
+ return (1);
+ }
+ }
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ if (ipv6_addr_legal) {
+ struct sockaddr_in6 *sin6,
+ *rsin6;
+
+ sin6 = &sctp_ifa->address.sin6;
+ rsin6 = (struct sockaddr_in6 *)to;
+ if (prison_check_ip6(stcb->sctp_ep->ip_inp.inp.inp_cred,
+ &sin6->sin6_addr) != 0) {
+ continue;
+ }
+ if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+ if (local_scope == 0)
+ continue;
+ if (sin6->sin6_scope_id == 0) {
+ if (sa6_recoverscope(sin6) != 0)
+ continue;
+ }
+ }
+ if ((site_scope == 0) &&
+ (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) {
+ continue;
+ }
+ if (SCTP6_ARE_ADDR_EQUAL(sin6, rsin6)) {
+ SCTP_IPI_ADDR_RUNLOCK();
+ return (1);
+ }
+ }
+ break;
+#endif
+ default:
+ /* TSNH */
+ break;
+ }
+ }
+ }
+ } else {
+ struct sctp_laddr *laddr;
+
+ LIST_FOREACH(laddr, &stcb->sctp_ep->sctp_addr_list, sctp_nxt_addr) {
+ if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) {
+ SCTPDBG(SCTP_DEBUG_PCB1, "ifa being deleted\n");
+ continue;
+ }
+ if (sctp_is_addr_restricted(stcb, laddr->ifa) &&
+ (!sctp_is_addr_pending(stcb, laddr->ifa))) {
+ /*
+ * We allow pending addresses, where we have
+ * sent an asconf-add to be considered
+ * valid.
+ */
+ continue;
+ }
+ if (laddr->ifa->address.sa.sa_family != to->sa_family) {
+ continue;
+ }
+ switch (to->sa_family) {
+#ifdef INET
+ case AF_INET:
+ {
+ struct sockaddr_in *sin, *rsin;
+
+ sin = &laddr->ifa->address.sin;
+ rsin = (struct sockaddr_in *)to;
+ if (sin->sin_addr.s_addr == rsin->sin_addr.s_addr) {
+ SCTP_IPI_ADDR_RUNLOCK();
+ return (1);
+ }
+ break;
+ }
+#endif
+#ifdef INET6
+ case AF_INET6:
+ {
+ struct sockaddr_in6 *sin6, *rsin6;
+
+ sin6 = &laddr->ifa->address.sin6;
+ rsin6 = (struct sockaddr_in6 *)to;
+ if (SCTP6_ARE_ADDR_EQUAL(sin6, rsin6)) {
+ SCTP_IPI_ADDR_RUNLOCK();
+ return (1);
+ }
+ break;
+ }
+
+#endif
+ default:
+ /* TSNH */
+ break;
+ }
+
+ }
+ }
+ SCTP_IPI_ADDR_RUNLOCK();
+ return (0);
+}
+
+
static struct sctp_tcb *
sctp_tcb_special_locate(struct sctp_inpcb **inp_p, struct sockaddr *from,
struct sockaddr *to, struct sctp_nets **netp, uint32_t vrf_id)
@@ -878,6 +1071,39 @@
SCTP_INP_RUNLOCK(inp);
continue;
}
+ switch (to->sa_family) {
+#ifdef INET
+ case AF_INET:
+ {
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *)to;
+ if (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+ &sin->sin_addr) != 0) {
+ SCTP_INP_RUNLOCK(inp);
+ continue;
+ }
+ break;
+ }
+#endif
+#ifdef INET6
+ case AF_INET6:
+ {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)to;
+ if (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+ &sin6->sin6_addr) != 0) {
+ SCTP_INP_RUNLOCK(inp);
+ continue;
+ }
+ break;
+ }
+#endif
+ default:
+ SCTP_INP_RUNLOCK(inp);
+ continue;
+ }
if (inp->def_vrf_id != vrf_id) {
SCTP_INP_RUNLOCK(inp);
continue;
@@ -890,7 +1116,7 @@
LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
if (laddr->ifa == NULL) {
- SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n", __FUNCTION__);
+ SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n", __func__);
continue;
}
if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) {
@@ -900,7 +1126,6 @@
if (laddr->ifa->address.sa.sa_family ==
to->sa_family) {
/* see if it matches */
-
#ifdef INET
if (from->sa_family == AF_INET) {
struct sockaddr_in *intf_addr,
@@ -943,7 +1168,7 @@
* Ok if we hit here the ep has the address, does it hold
* the tcb?
*/
-
+ /* XXX: Why don't we TAILQ_FOREACH through sctp_asoc_list? */
stcb = LIST_FIRST(&inp->sctp_asoc_list);
if (stcb == NULL) {
SCTP_INP_RUNLOCK(inp);
@@ -950,6 +1175,11 @@
continue;
}
SCTP_TCB_LOCK(stcb);
+ if (!sctp_does_stcb_own_this_addr(stcb, to)) {
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_INP_RUNLOCK(inp);
+ continue;
+ }
if (stcb->rport != rport) {
/* remote port does not match. */
SCTP_TCB_UNLOCK(stcb);
@@ -961,6 +1191,11 @@
SCTP_INP_RUNLOCK(inp);
continue;
}
+ if (!sctp_does_stcb_own_this_addr(stcb, to)) {
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_INP_RUNLOCK(inp);
+ continue;
+ }
/* Does this TCB have a matching address? */
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
@@ -1028,162 +1263,7 @@
return (NULL);
}
-static int
-sctp_does_stcb_own_this_addr(struct sctp_tcb *stcb, struct sockaddr *to)
-{
- int loopback_scope, ipv4_local_scope, local_scope, site_scope;
- int ipv4_addr_legal, ipv6_addr_legal;
- struct sctp_vrf *vrf;
- struct sctp_ifn *sctp_ifn;
- struct sctp_ifa *sctp_ifa;
- loopback_scope = stcb->asoc.loopback_scope;
- ipv4_local_scope = stcb->asoc.ipv4_local_scope;
- local_scope = stcb->asoc.local_scope;
- site_scope = stcb->asoc.site_scope;
- ipv4_addr_legal = ipv6_addr_legal = 0;
- if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
- ipv6_addr_legal = 1;
- if (SCTP_IPV6_V6ONLY(stcb->sctp_ep) == 0) {
- ipv4_addr_legal = 1;
- }
- } else {
- ipv4_addr_legal = 1;
- }
-
- SCTP_IPI_ADDR_RLOCK();
- vrf = sctp_find_vrf(stcb->asoc.vrf_id);
- if (vrf == NULL) {
- /* no vrf, no addresses */
- SCTP_IPI_ADDR_RUNLOCK();
- return (0);
- }
- if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
- LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
- if ((loopback_scope == 0) &&
- SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
- continue;
- }
- LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
- if (sctp_is_addr_restricted(stcb, sctp_ifa) &&
- (!sctp_is_addr_pending(stcb, sctp_ifa))) {
- /*
- * We allow pending addresses, where
- * we have sent an asconf-add to be
- * considered valid.
- */
- continue;
- }
- switch (sctp_ifa->address.sa.sa_family) {
-#ifdef INET
- case AF_INET:
- if (ipv4_addr_legal) {
- struct sockaddr_in *sin,
- *rsin;
-
- sin = &sctp_ifa->address.sin;
- rsin = (struct sockaddr_in *)to;
- if ((ipv4_local_scope == 0) &&
- IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
- continue;
- }
- if (sin->sin_addr.s_addr == rsin->sin_addr.s_addr) {
- SCTP_IPI_ADDR_RUNLOCK();
- return (1);
- }
- }
- break;
-#endif
-#ifdef INET6
- case AF_INET6:
- if (ipv6_addr_legal) {
- struct sockaddr_in6 *sin6,
- *rsin6;
-
- sin6 = &sctp_ifa->address.sin6;
- rsin6 = (struct sockaddr_in6 *)to;
- if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
- if (local_scope == 0)
- continue;
- if (sin6->sin6_scope_id == 0) {
- if (sa6_recoverscope(sin6) != 0)
- continue;
- }
- }
- if ((site_scope == 0) &&
- (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) {
- continue;
- }
- if (SCTP6_ARE_ADDR_EQUAL(sin6, rsin6)) {
- SCTP_IPI_ADDR_RUNLOCK();
- return (1);
- }
- }
- break;
-#endif
- default:
- /* TSNH */
- break;
- }
- }
- }
- } else {
- struct sctp_laddr *laddr;
-
- LIST_FOREACH(laddr, &stcb->sctp_ep->sctp_addr_list, sctp_nxt_addr) {
- if (sctp_is_addr_restricted(stcb, laddr->ifa) &&
- (!sctp_is_addr_pending(stcb, laddr->ifa))) {
- /*
- * We allow pending addresses, where we have
- * sent an asconf-add to be considered
- * valid.
- */
- continue;
- }
- if (laddr->ifa->address.sa.sa_family != to->sa_family) {
- continue;
- }
- switch (to->sa_family) {
-#ifdef INET
- case AF_INET:
- {
- struct sockaddr_in *sin, *rsin;
-
- sin = (struct sockaddr_in *)&laddr->ifa->address.sin;
- rsin = (struct sockaddr_in *)to;
- if (sin->sin_addr.s_addr == rsin->sin_addr.s_addr) {
- SCTP_IPI_ADDR_RUNLOCK();
- return (1);
- }
- break;
- }
-#endif
-#ifdef INET6
- case AF_INET6:
- {
- struct sockaddr_in6 *sin6, *rsin6;
-
- sin6 = (struct sockaddr_in6 *)&laddr->ifa->address.sin6;
- rsin6 = (struct sockaddr_in6 *)to;
- if (SCTP6_ARE_ADDR_EQUAL(sin6, rsin6)) {
- SCTP_IPI_ADDR_RUNLOCK();
- return (1);
- }
- break;
- }
-
-#endif
- default:
- /* TSNH */
- break;
- }
-
- }
- }
- SCTP_IPI_ADDR_RUNLOCK();
- return (0);
-}
-
/*
* rules for use
*
@@ -1204,11 +1284,18 @@
uint16_t rport;
inp = *inp_p;
- if (remote->sa_family == AF_INET) {
+ switch (remote->sa_family) {
+#ifdef INET
+ case AF_INET:
rport = (((struct sockaddr_in *)remote)->sin_port);
- } else if (remote->sa_family == AF_INET6) {
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
rport = (((struct sockaddr_in6 *)remote)->sin6_port);
- } else {
+ break;
+#endif
+ default:
return (NULL);
}
if (locked_tcb) {
@@ -1220,7 +1307,8 @@
SCTP_TCB_UNLOCK(locked_tcb);
}
SCTP_INP_INFO_RLOCK();
- if (inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
/*-
* Now either this guy is our listener or it's the
* connector. If it is the one that issued the connect, then
@@ -1354,9 +1442,6 @@
}
head = &inp->sctp_tcbhash[SCTP_PCBHASH_ALLADDR(rport,
inp->sctp_hashmark)];
- if (head == NULL) {
- goto null_return;
- }
LIST_FOREACH(stcb, head, sctp_tcbhash) {
if (stcb->rport != rport) {
/* remote port does not match */
@@ -1464,11 +1549,11 @@
return (NULL);
}
+
/*
* Find an association for a specific endpoint using the association id given
* out in the COMM_UP notification
*/
-
struct sctp_tcb *
sctp_findasoc_ep_asocid_locked(struct sctp_inpcb *inp, sctp_assoc_t asoc_id, int want_lock)
{
@@ -1529,6 +1614,9 @@
}
+/*
+ * Endpoint probe expects that the INP_INFO is locked.
+ */
static struct sctp_inpcb *
sctp_endpoint_probe(struct sockaddr *nam, struct sctppcbhead *head,
uint16_t lport, uint32_t vrf_id)
@@ -1545,12 +1633,8 @@
struct sockaddr_in6 *intf_addr6;
#endif
-
int fnd;
- /*
- * Endpoint probe expects that the INP_INFO is locked.
- */
#ifdef INET
sin = NULL;
#endif
@@ -1585,23 +1669,45 @@
if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) &&
(inp->sctp_lport == lport)) {
/* got it */
+ switch (nam->sa_family) {
#ifdef INET
- if ((nam->sa_family == AF_INET) &&
- (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
- SCTP_IPV6_V6ONLY(inp)) {
- /* IPv4 on a IPv6 socket with ONLY IPv6 set */
- SCTP_INP_RUNLOCK(inp);
- continue;
- }
+ case AF_INET:
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+ SCTP_IPV6_V6ONLY(inp)) {
+ /*
+ * IPv4 on a IPv6 socket with ONLY
+ * IPv6 set
+ */
+ SCTP_INP_RUNLOCK(inp);
+ continue;
+ }
+ if (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+ &sin->sin_addr) != 0) {
+ SCTP_INP_RUNLOCK(inp);
+ continue;
+ }
+ break;
#endif
#ifdef INET6
- /* A V6 address and the endpoint is NOT bound V6 */
- if (nam->sa_family == AF_INET6 &&
- (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) {
- SCTP_INP_RUNLOCK(inp);
- continue;
+ case AF_INET6:
+ /*
+ * A V6 address and the endpoint is NOT
+ * bound V6
+ */
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) {
+ SCTP_INP_RUNLOCK(inp);
+ continue;
+ }
+ if (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+ &sin6->sin6_addr) != 0) {
+ SCTP_INP_RUNLOCK(inp);
+ continue;
+ }
+ break;
+#endif
+ default:
+ break;
}
-#endif
/* does a VRF id match? */
fnd = 0;
if (inp->def_vrf_id == vrf_id)
@@ -1668,11 +1774,11 @@
LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
if (laddr->ifa == NULL) {
SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n",
- __FUNCTION__);
+ __func__);
continue;
}
SCTPDBG(SCTP_DEBUG_PCB1, "Ok laddr->ifa:%p is possible, ",
- laddr->ifa);
+ (void *)laddr->ifa);
if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) {
SCTPDBG(SCTP_DEBUG_PCB1, "Huh IFA being deleted\n");
continue;
@@ -1762,7 +1868,7 @@
{
/* For 1-2-1 with port reuse */
struct sctppcbhead *head;
- struct sctp_inpcb *tinp;
+ struct sctp_inpcb *tinp, *ninp;
if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_PORTREUSE)) {
/* only works with port reuse on */
@@ -1772,10 +1878,11 @@
return (0);
}
SCTP_INP_RUNLOCK(inp);
+ SCTP_INP_INFO_WLOCK();
head = &SCTP_BASE_INFO(sctp_ephash)[SCTP_PCBHASH_ALLADDR(inp->sctp_lport,
SCTP_BASE_INFO(hashmark))];
/* Kick out all non-listeners to the TCP hash */
- LIST_FOREACH(tinp, head, sctp_hash) {
+ LIST_FOREACH_SAFE(tinp, head, sctp_hash, ninp) {
if (tinp->sctp_lport != inp->sctp_lport) {
continue;
}
@@ -1803,6 +1910,7 @@
LIST_INSERT_HEAD(head, inp, sctp_hash);
SCTP_INP_WUNLOCK(inp);
SCTP_INP_RLOCK(inp);
+ SCTP_INP_INFO_WUNLOCK();
return (0);
}
@@ -1886,6 +1994,7 @@
return (inp);
}
+
/*
* Find an association for an endpoint with the pointer to whom you want to
* send to and the endpoint pointer. The address can be IPv4 or IPv6. We may
@@ -1892,25 +2001,25 @@
* need to change the *to to some other struct like a mbuf...
*/
struct sctp_tcb *
-sctp_findassociation_addr_sa(struct sockaddr *to, struct sockaddr *from,
+sctp_findassociation_addr_sa(struct sockaddr *from, struct sockaddr *to,
struct sctp_inpcb **inp_p, struct sctp_nets **netp, int find_tcp_pool,
uint32_t vrf_id)
{
struct sctp_inpcb *inp = NULL;
- struct sctp_tcb *retval;
+ struct sctp_tcb *stcb;
SCTP_INP_INFO_RLOCK();
if (find_tcp_pool) {
if (inp_p != NULL) {
- retval = sctp_tcb_special_locate(inp_p, from, to, netp,
+ stcb = sctp_tcb_special_locate(inp_p, from, to, netp,
vrf_id);
} else {
- retval = sctp_tcb_special_locate(&inp, from, to, netp,
+ stcb = sctp_tcb_special_locate(&inp, from, to, netp,
vrf_id);
}
- if (retval != NULL) {
+ if (stcb != NULL) {
SCTP_INP_INFO_RUNLOCK();
- return (retval);
+ return (stcb);
}
}
inp = sctp_pcb_findep(to, 0, 1, vrf_id);
@@ -1918,7 +2027,6 @@
*inp_p = inp;
}
SCTP_INP_INFO_RUNLOCK();
-
if (inp == NULL) {
return (NULL);
}
@@ -1929,13 +2037,13 @@
* inbound packet side.
*/
if (inp_p != NULL) {
- retval = sctp_findassociation_ep_addr(inp_p, from, netp, to,
+ stcb = sctp_findassociation_ep_addr(inp_p, from, netp, to,
NULL);
} else {
- retval = sctp_findassociation_ep_addr(&inp, from, netp, to,
+ stcb = sctp_findassociation_ep_addr(&inp, from, netp, to,
NULL);
}
- return retval;
+ return (stcb);
}
@@ -1947,12 +2055,17 @@
static struct sctp_tcb *
sctp_findassociation_special_addr(struct mbuf *m, int offset,
struct sctphdr *sh, struct sctp_inpcb **inp_p, struct sctp_nets **netp,
- struct sockaddr *dest)
+ struct sockaddr *dst)
{
struct sctp_paramhdr *phdr, parm_buf;
- struct sctp_tcb *retval;
- uint32_t ptype, plen;
+#if defined(INET) || defined(INET6)
+ struct sctp_tcb *stcb;
+ uint16_t ptype;
+
+#endif
+ uint16_t plen;
+
#ifdef INET
struct sockaddr_in sin4;
@@ -1975,13 +2088,14 @@
sin6.sin6_port = sh->src_port;
#endif
- retval = NULL;
offset += sizeof(struct sctp_init_chunk);
phdr = sctp_get_next_param(m, offset, &parm_buf, sizeof(parm_buf));
while (phdr != NULL) {
/* now we must see if we want the parameter */
+#if defined(INET) || defined(INET6)
ptype = ntohs(phdr->param_type);
+#endif
plen = ntohs(phdr->param_length);
if (plen == 0) {
break;
@@ -2000,10 +2114,10 @@
p4 = (struct sctp_ipv4addr_param *)phdr;
memcpy(&sin4.sin_addr, &p4->addr, sizeof(p4->addr));
/* look it up */
- retval = sctp_findassociation_ep_addr(inp_p,
- (struct sockaddr *)&sin4, netp, dest, NULL);
- if (retval != NULL) {
- return (retval);
+ stcb = sctp_findassociation_ep_addr(inp_p,
+ (struct sockaddr *)&sin4, netp, dst, NULL);
+ if (stcb != NULL) {
+ return (stcb);
}
}
#endif
@@ -2021,10 +2135,10 @@
p6 = (struct sctp_ipv6addr_param *)phdr;
memcpy(&sin6.sin6_addr, &p6->addr, sizeof(p6->addr));
/* look it up */
- retval = sctp_findassociation_ep_addr(inp_p,
- (struct sockaddr *)&sin6, netp, dest, NULL);
- if (retval != NULL) {
- return (retval);
+ stcb = sctp_findassociation_ep_addr(inp_p,
+ (struct sockaddr *)&sin6, netp, dst, NULL);
+ if (stcb != NULL) {
+ return (stcb);
}
}
#endif
@@ -2052,11 +2166,6 @@
SCTP_INP_INFO_RLOCK();
head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(vtag,
SCTP_BASE_INFO(hashasocmark))];
- if (head == NULL) {
- /* invalid vtag */
- SCTP_INP_INFO_RUNLOCK();
- return (NULL);
- }
LIST_FOREACH(stcb, head, sctp_asocs) {
SCTP_INP_RLOCK(stcb->sctp_ep);
if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
@@ -2137,6 +2246,7 @@
return (NULL);
}
+
/*
* Find an association with the pointer to the inbound IP packet. This can be
* a IPv4 or IPv6 packet.
@@ -2143,124 +2253,31 @@
*/
struct sctp_tcb *
sctp_findassociation_addr(struct mbuf *m, int offset,
+ struct sockaddr *src, struct sockaddr *dst,
struct sctphdr *sh, struct sctp_chunkhdr *ch,
struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint32_t vrf_id)
{
- int find_tcp_pool;
- struct ip *iph;
- struct sctp_tcb *retval;
- struct sockaddr_storage to_store, from_store;
- struct sockaddr *to = (struct sockaddr *)&to_store;
- struct sockaddr *from = (struct sockaddr *)&from_store;
+ struct sctp_tcb *stcb;
struct sctp_inpcb *inp;
- iph = mtod(m, struct ip *);
- switch (iph->ip_v) {
-#ifdef INET
- case IPVERSION:
- {
- /* its IPv4 */
- struct sockaddr_in *from4;
-
- from4 = (struct sockaddr_in *)&from_store;
- bzero(from4, sizeof(*from4));
- from4->sin_family = AF_INET;
- from4->sin_len = sizeof(struct sockaddr_in);
- from4->sin_addr.s_addr = iph->ip_src.s_addr;
- from4->sin_port = sh->src_port;
- break;
- }
-#endif
-#ifdef INET6
- case IPV6_VERSION >> 4:
- {
- /* its IPv6 */
- struct ip6_hdr *ip6;
- struct sockaddr_in6 *from6;
-
- ip6 = mtod(m, struct ip6_hdr *);
- from6 = (struct sockaddr_in6 *)&from_store;
- bzero(from6, sizeof(*from6));
- from6->sin6_family = AF_INET6;
- from6->sin6_len = sizeof(struct sockaddr_in6);
- from6->sin6_addr = ip6->ip6_src;
- from6->sin6_port = sh->src_port;
- /* Get the scopes in properly to the sin6 addr's */
- sa6_embedscope(from6, MODULE_GLOBAL(ip6_use_defzone));
- break;
- }
-#endif
- default:
- /* Currently not supported. */
- return (NULL);
- }
-
-
- switch (iph->ip_v) {
-#ifdef INET
- case IPVERSION:
- {
- /* its IPv4 */
- struct sockaddr_in *to4;
-
- to4 = (struct sockaddr_in *)&to_store;
- bzero(to4, sizeof(*to4));
- to4->sin_family = AF_INET;
- to4->sin_len = sizeof(struct sockaddr_in);
- to4->sin_addr.s_addr = iph->ip_dst.s_addr;
- to4->sin_port = sh->dest_port;
- break;
- }
-#endif
-#ifdef INET6
- case IPV6_VERSION >> 4:
- {
- /* its IPv6 */
- struct ip6_hdr *ip6;
- struct sockaddr_in6 *to6;
-
- ip6 = mtod(m, struct ip6_hdr *);
- to6 = (struct sockaddr_in6 *)&to_store;
- bzero(to6, sizeof(*to6));
- to6->sin6_family = AF_INET6;
- to6->sin6_len = sizeof(struct sockaddr_in6);
- to6->sin6_addr = ip6->ip6_dst;
- to6->sin6_port = sh->dest_port;
- /* Get the scopes in properly to the sin6 addr's */
- sa6_embedscope(to6, MODULE_GLOBAL(ip6_use_defzone));
- break;
- }
-#endif
- default:
- /* TSNH */
- break;
- }
if (sh->v_tag) {
/* we only go down this path if vtag is non-zero */
- retval = sctp_findassoc_by_vtag(from, to, ntohl(sh->v_tag),
+ stcb = sctp_findassoc_by_vtag(src, dst, ntohl(sh->v_tag),
inp_p, netp, sh->src_port, sh->dest_port, 0, vrf_id, 0);
- if (retval) {
- return (retval);
+ if (stcb) {
+ return (stcb);
}
}
- find_tcp_pool = 0;
- if ((ch->chunk_type != SCTP_INITIATION) &&
- (ch->chunk_type != SCTP_INITIATION_ACK) &&
- (ch->chunk_type != SCTP_COOKIE_ACK) &&
- (ch->chunk_type != SCTP_COOKIE_ECHO)) {
- /* Other chunk types go to the tcp pool. */
- find_tcp_pool = 1;
- }
if (inp_p) {
- retval = sctp_findassociation_addr_sa(to, from, inp_p, netp,
- find_tcp_pool, vrf_id);
+ stcb = sctp_findassociation_addr_sa(src, dst, inp_p, netp,
+ 1, vrf_id);
inp = *inp_p;
} else {
- retval = sctp_findassociation_addr_sa(to, from, &inp, netp,
- find_tcp_pool, vrf_id);
+ stcb = sctp_findassociation_addr_sa(src, dst, &inp, netp,
+ 1, vrf_id);
}
- SCTPDBG(SCTP_DEBUG_PCB1, "retval:%p inp:%p\n", retval, inp);
- if (retval == NULL && inp) {
+ SCTPDBG(SCTP_DEBUG_PCB1, "stcb:%p inp:%p\n", (void *)stcb, (void *)inp);
+ if (stcb == NULL && inp) {
/* Found a EP but not this address */
if ((ch->chunk_type == SCTP_INITIATION) ||
(ch->chunk_type == SCTP_INITIATION_ACK)) {
@@ -2278,15 +2295,15 @@
}
return (NULL);
}
- retval = sctp_findassociation_special_addr(m,
- offset, sh, &inp, netp, to);
+ stcb = sctp_findassociation_special_addr(m,
+ offset, sh, &inp, netp, dst);
if (inp_p != NULL) {
*inp_p = inp;
}
}
}
- SCTPDBG(SCTP_DEBUG_PCB1, "retval is %p\n", retval);
- return (retval);
+ SCTPDBG(SCTP_DEBUG_PCB1, "stcb is %p\n", (void *)stcb);
+ return (stcb);
}
/*
@@ -2295,12 +2312,11 @@
*/
struct sctp_tcb *
sctp_findassociation_ep_asconf(struct mbuf *m, int offset,
- struct sctphdr *sh, struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint32_t vrf_id)
+ struct sockaddr *dst, struct sctphdr *sh,
+ struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint32_t vrf_id)
{
struct sctp_tcb *stcb;
- struct sockaddr_storage local_store, remote_store;
- struct sockaddr *to;
- struct ip *iph;
+ union sctp_sockstore remote_store;
struct sctp_paramhdr parm_buf, *phdr;
int ptype;
int zero_address = 0;
@@ -2310,47 +2326,16 @@
#endif
#ifdef INET6
- struct ip6_hdr *ip6;
struct sockaddr_in6 *sin6;
#endif
- memset(&local_store, 0, sizeof(local_store));
memset(&remote_store, 0, sizeof(remote_store));
- to = (struct sockaddr *)&local_store;
- /* First get the destination address setup too. */
- iph = mtod(m, struct ip *);
- switch (iph->ip_v) {
-#ifdef INET
- case IPVERSION:
- /* its IPv4 */
- sin = (struct sockaddr_in *)&local_store;
- sin->sin_family = AF_INET;
- sin->sin_len = sizeof(*sin);
- sin->sin_port = sh->dest_port;
- sin->sin_addr.s_addr = iph->ip_dst.s_addr;
- break;
-#endif
-#ifdef INET6
- case IPV6_VERSION >> 4:
- /* its IPv6 */
- ip6 = mtod(m, struct ip6_hdr *);
- sin6 = (struct sockaddr_in6 *)&local_store;
- sin6->sin6_family = AF_INET6;
- sin6->sin6_len = sizeof(*sin6);
- sin6->sin6_port = sh->dest_port;
- sin6->sin6_addr = ip6->ip6_dst;
- break;
-#endif
- default:
- return NULL;
- }
-
phdr = sctp_get_next_param(m, offset + sizeof(struct sctp_asconf_chunk),
&parm_buf, sizeof(struct sctp_paramhdr));
if (phdr == NULL) {
SCTPDBG(SCTP_DEBUG_INPUT3, "%s: failed to get asconf lookup addr\n",
- __FUNCTION__);
+ __func__);
return NULL;
}
ptype = (int)((uint32_t) ntohs(phdr->param_type));
@@ -2370,10 +2355,10 @@
&p6_buf.ph, sizeof(*p6));
if (p6 == NULL) {
SCTPDBG(SCTP_DEBUG_INPUT3, "%s: failed to get asconf v6 lookup addr\n",
- __FUNCTION__);
+ __func__);
return (NULL);
}
- sin6 = (struct sockaddr_in6 *)&remote_store;
+ sin6 = &remote_store.sin6;
sin6->sin6_family = AF_INET6;
sin6->sin6_len = sizeof(*sin6);
sin6->sin6_port = sh->src_port;
@@ -2397,10 +2382,10 @@
&p4_buf.ph, sizeof(*p4));
if (p4 == NULL) {
SCTPDBG(SCTP_DEBUG_INPUT3, "%s: failed to get asconf v4 lookup addr\n",
- __FUNCTION__);
+ __func__);
return (NULL);
}
- sin = (struct sockaddr_in *)&remote_store;
+ sin = &remote_store.sin;
sin->sin_family = AF_INET;
sin->sin_len = sizeof(*sin);
sin->sin_port = sh->src_port;
@@ -2416,7 +2401,7 @@
}
if (zero_address) {
- stcb = sctp_findassoc_by_vtag(NULL, to, ntohl(sh->v_tag), inp_p,
+ stcb = sctp_findassoc_by_vtag(NULL, dst, ntohl(sh->v_tag), inp_p,
netp, sh->src_port, sh->dest_port, 1, vrf_id, 0);
if (stcb != NULL) {
SCTP_INP_DECR_REF(*inp_p);
@@ -2423,8 +2408,8 @@
}
} else {
stcb = sctp_findassociation_ep_addr(inp_p,
- (struct sockaddr *)&remote_store, netp,
- to, NULL);
+ &remote_store.sa, netp,
+ dst, NULL);
}
return (stcb);
}
@@ -2469,33 +2454,46 @@
/* setup socket pointers */
inp->sctp_socket = so;
inp->ip_inp.inp.inp_socket = so;
+ inp->ip_inp.inp.inp_cred = crhold(so->so_cred);
#ifdef INET6
- if (MODULE_GLOBAL(ip6_auto_flowlabel)) {
- inp->ip_inp.inp.inp_flags |= IN6P_AUTOFLOWLABEL;
+ if (INP_SOCKAF(so) == AF_INET6) {
+ if (MODULE_GLOBAL(ip6_auto_flowlabel)) {
+ inp->ip_inp.inp.inp_flags |= IN6P_AUTOFLOWLABEL;
+ }
+ if (MODULE_GLOBAL(ip6_v6only)) {
+ inp->ip_inp.inp.inp_flags |= IN6P_IPV6_V6ONLY;
+ }
}
#endif
inp->sctp_associd_counter = 1;
inp->partial_delivery_point = SCTP_SB_LIMIT_RCV(so) >> SCTP_PARTIAL_DELIVERY_SHIFT;
inp->sctp_frag_point = SCTP_DEFAULT_MAXSEGMENT;
+ inp->max_cwnd = 0;
inp->sctp_cmt_on_off = SCTP_BASE_SYSCTL(sctp_cmt_on_off);
- inp->sctp_ecn_enable = SCTP_BASE_SYSCTL(sctp_ecn_enable);
+ inp->ecn_supported = (uint8_t) SCTP_BASE_SYSCTL(sctp_ecn_enable);
+ inp->prsctp_supported = (uint8_t) SCTP_BASE_SYSCTL(sctp_pr_enable);
+ if (SCTP_BASE_SYSCTL(sctp_auth_disable)) {
+ inp->auth_supported = 0;
+ } else {
+ inp->auth_supported = 1;
+ }
+ inp->asconf_supported = (uint8_t) SCTP_BASE_SYSCTL(sctp_asconf_enable);
+ inp->reconfig_supported = (uint8_t) SCTP_BASE_SYSCTL(sctp_reconfig_enable);
+ inp->nrsack_supported = (uint8_t) SCTP_BASE_SYSCTL(sctp_nrsack_enable);
+ inp->pktdrop_supported = (uint8_t) SCTP_BASE_SYSCTL(sctp_pktdrop_enable);
+ inp->fibnum = so->so_fibnum;
/* init the small hash table we use to track asocid <-> tcb */
inp->sctp_asocidhash = SCTP_HASH_INIT(SCTP_STACK_VTAG_HASH_SIZE, &inp->hashasocidmark);
if (inp->sctp_asocidhash == NULL) {
+ crfree(inp->ip_inp.inp.inp_cred);
SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp);
SCTP_INP_INFO_WUNLOCK();
return (ENOBUFS);
}
#ifdef IPSEC
- {
- struct inpcbpolicy *pcb_sp = NULL;
-
- error = ipsec_init_policy(so, &pcb_sp);
- /* Arrange to share the policy */
- inp->ip_inp.inp.inp_sp = pcb_sp;
- ((struct in6pcb *)(&inp->ip_inp.inp))->in6p_sp = pcb_sp;
- }
+ error = ipsec_init_policy(so, &inp->ip_inp.inp.inp_sp);
if (error != 0) {
+ crfree(inp->ip_inp.inp.inp_cred);
SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp);
SCTP_INP_INFO_WUNLOCK();
return error;
@@ -2526,6 +2524,10 @@
*/
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EOPNOTSUPP);
so->so_pcb = NULL;
+ crfree(inp->ip_inp.inp.inp_cred);
+#ifdef IPSEC
+ ipsec_delete_pcbpolicy(&inp->ip_inp.inp);
+#endif
SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp);
return (EOPNOTSUPP);
}
@@ -2545,6 +2547,10 @@
SCTP_PRINTF("Out of SCTP-INPCB->hashinit - no resources\n");
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS);
so->so_pcb = NULL;
+ crfree(inp->ip_inp.inp.inp_cred);
+#ifdef IPSEC
+ ipsec_delete_pcbpolicy(&inp->ip_inp.inp);
+#endif
SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp);
return (ENOBUFS);
}
@@ -2592,9 +2598,6 @@
m->initial_rto = SCTP_BASE_SYSCTL(sctp_rto_initial_default);
m->initial_init_rto_max = SCTP_BASE_SYSCTL(sctp_init_rto_max_default);
m->sctp_sack_freq = SCTP_BASE_SYSCTL(sctp_sack_freq_default);
-
- m->max_open_streams_intome = MAX_SCTP_STREAMS;
-
m->max_init_times = SCTP_BASE_SYSCTL(sctp_init_rtx_max_default);
m->max_send_times = SCTP_BASE_SYSCTL(sctp_assoc_rtx_max_default);
m->def_net_failure = SCTP_BASE_SYSCTL(sctp_path_rtx_max_default);
@@ -2606,11 +2609,13 @@
m->sctp_default_cc_module = SCTP_BASE_SYSCTL(sctp_default_cc_module);
m->sctp_default_ss_module = SCTP_BASE_SYSCTL(sctp_default_ss_module);
+ m->max_open_streams_intome = SCTP_BASE_SYSCTL(sctp_nr_incoming_streams_default);
/* number of streams to pre-open on a association */
m->pre_open_stream_count = SCTP_BASE_SYSCTL(sctp_nr_outgoing_streams_default);
/* Add adaptation cookie */
- m->adaptation_layer_indicator = 0x504C5253;
+ m->adaptation_layer_indicator = 0;
+ m->adaptation_layer_indicator_provided = 0;
/* seed random number generator */
m->random_counter = 1;
@@ -2639,12 +2644,15 @@
*/
m->local_hmacs = sctp_default_supported_hmaclist();
m->local_auth_chunks = sctp_alloc_chunklist();
+ if (inp->asconf_supported) {
+ sctp_auth_add_chunk(SCTP_ASCONF, m->local_auth_chunks);
+ sctp_auth_add_chunk(SCTP_ASCONF_ACK, m->local_auth_chunks);
+ }
m->default_dscp = 0;
#ifdef INET6
m->default_flowlabel = 0;
#endif
m->port = 0; /* encapsulation disabled by default */
- sctp_auth_set_default_chunks(m->local_auth_chunks);
LIST_INIT(&m->shared_keys);
/* add default NULL key as key id 0 */
null_key = sctp_alloc_sharedkey();
@@ -2778,9 +2786,48 @@
SCTP_INP_WUNLOCK(old_inp);
}
+/*
+ * insert an laddr entry with the given ifa for the desired list
+ */
+static int
+sctp_insert_laddr(struct sctpladdr *list, struct sctp_ifa *ifa, uint32_t act)
+{
+ struct sctp_laddr *laddr;
+ laddr = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_laddr), struct sctp_laddr);
+ if (laddr == NULL) {
+ /* out of memory? */
+ SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+ return (EINVAL);
+ }
+ SCTP_INCR_LADDR_COUNT();
+ bzero(laddr, sizeof(*laddr));
+ (void)SCTP_GETTIME_TIMEVAL(&laddr->start_time);
+ laddr->ifa = ifa;
+ laddr->action = act;
+ atomic_add_int(&ifa->refcount, 1);
+ /* insert it */
+ LIST_INSERT_HEAD(list, laddr, sctp_nxt_addr);
+ return (0);
+}
+/*
+ * Remove an laddr entry from the local address list (on an assoc)
+ */
+static void
+sctp_remove_laddr(struct sctp_laddr *laddr)
+{
+
+ /* remove from the list */
+ LIST_REMOVE(laddr, sctp_nxt_addr);
+ sctp_free_ifa(laddr->ifa);
+ SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_laddr), laddr);
+ SCTP_DECR_LADDR_COUNT();
+}
+
+
+
/* sctp_ifap is used to bypass normal local address validation checks */
int
sctp_inpcb_bind(struct socket *so, struct sockaddr *addr,
@@ -2797,15 +2844,14 @@
uint32_t vrf_id;
lport = 0;
- error = 0;
bindall = 1;
inp = (struct sctp_inpcb *)so->so_pcb;
ip_inp = (struct inpcb *)so->so_pcb;
#ifdef SCTP_DEBUG
if (addr) {
- SCTPDBG(SCTP_DEBUG_PCB1, "Bind called port:%d\n",
+ SCTPDBG(SCTP_DEBUG_PCB1, "Bind called port: %d\n",
ntohs(((struct sockaddr_in *)addr)->sin_port));
- SCTPDBG(SCTP_DEBUG_PCB1, "Addr :");
+ SCTPDBG(SCTP_DEBUG_PCB1, "Addr: ");
SCTPDBG_ADDR(SCTP_DEBUG_PCB1, addr);
}
#endif
@@ -2867,7 +2913,6 @@
return (EINVAL);
}
lport = sin6->sin6_port;
-
/*
* For LOOPBACK the prison_local_ip6() call
* will transmute the ipv6 address to the
@@ -2905,7 +2950,7 @@
SCTP_INP_INCR_REF(inp);
if (lport) {
/*
- * Did the caller specify a port? if so we must see if a ep
+ * Did the caller specify a port? if so we must see if an ep
* already has this one bound.
*/
/* got to be root to get at low ports */
@@ -2919,13 +2964,6 @@
return (error);
}
}
- if (p == NULL) {
- SCTP_INP_DECR_REF(inp);
- SCTP_INP_WUNLOCK(inp);
- SCTP_INP_INFO_WUNLOCK();
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, error);
- return (error);
- }
SCTP_INP_WUNLOCK(inp);
if (bindall) {
vrf_id = inp->def_vrf_id;
@@ -2986,8 +3024,7 @@
if (bindall) {
/* verify that no lport is not used by a singleton */
if ((port_reuse_active == 0) &&
- (inp_tmp = sctp_isport_inuse(inp, lport, vrf_id))
- ) {
+ (inp_tmp = sctp_isport_inuse(inp, lport, vrf_id))) {
/* Sorry someone already has this one bound */
if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE)) &&
(sctp_is_feature_on(inp_tmp, SCTP_PCB_FLAGS_PORTREUSE))) {
@@ -3119,31 +3156,21 @@
* too (before adding).
*/
struct sctp_ifa *ifa;
- struct sockaddr_storage store_sa;
+ union sctp_sockstore store;
- memset(&store_sa, 0, sizeof(store_sa));
+ memset(&store, 0, sizeof(store));
switch (addr->sa_family) {
#ifdef INET
case AF_INET:
- {
- struct sockaddr_in *sin;
-
- sin = (struct sockaddr_in *)&store_sa;
- memcpy(sin, addr, sizeof(struct sockaddr_in));
- sin->sin_port = 0;
- break;
- }
+ memcpy(&store.sin, addr, sizeof(struct sockaddr_in));
+ store.sin.sin_port = 0;
+ break;
#endif
#ifdef INET6
case AF_INET6:
- {
- struct sockaddr_in6 *sin6;
-
- sin6 = (struct sockaddr_in6 *)&store_sa;
- memcpy(sin6, addr, sizeof(struct sockaddr_in6));
- sin6->sin6_port = 0;
- break;
- }
+ memcpy(&store.sin6, addr, sizeof(struct sockaddr_in6));
+ store.sin6.sin6_port = 0;
+ break;
#endif
default:
break;
@@ -3153,15 +3180,15 @@
* zero out the port to find the address! yuck! can't do
* this earlier since need port for sctp_pcb_findep()
*/
- if (sctp_ifap != NULL)
+ if (sctp_ifap != NULL) {
ifa = sctp_ifap;
- else {
+ } else {
/*
* Note for BSD we hit here always other O/S's will
* pass things in via the sctp_ifap argument
* (Panda).
*/
- ifa = sctp_find_ifa_by_addr((struct sockaddr *)&store_sa,
+ ifa = sctp_find_ifa_by_addr(&store.sa,
vrf_id, SCTP_ADDR_NOT_LOCKED);
}
if (ifa == NULL) {
@@ -3210,7 +3237,7 @@
/* put it in the bucket */
LIST_INSERT_HEAD(head, inp, sctp_hash);
SCTPDBG(SCTP_DEBUG_PCB1, "Main hash to bind at head:%p, bound port:%d - in tcp_pool=%d\n",
- head, ntohs(lport), port_reuse_active);
+ (void *)head, ntohs(lport), port_reuse_active);
/* set in the port */
inp->sctp_lport = lport;
@@ -3404,23 +3431,7 @@
/* Left with Data unread */
struct mbuf *op_err;
- op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (op_err) {
- /* Fill in the user initiated abort */
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
-
- SCTP_BUF_LEN(op_err) =
- sizeof(struct sctp_paramhdr) + sizeof(uint32_t);
- ph = mtod(op_err,
- struct sctp_paramhdr *);
- ph->param_type = htons(
- SCTP_CAUSE_USER_INITIATED_ABT);
- ph->param_length = htons(SCTP_BUF_LEN(op_err));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_PCB + SCTP_LOC_3);
- }
+ op_err = sctp_generate_cause(SCTP_CAUSE_USER_INITIATED_ABT, "");
asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_3;
sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED);
SCTP_STAT_INCR_COUNTER32(sctps_aborted);
@@ -3435,8 +3446,7 @@
continue;
} else if (TAILQ_EMPTY(&asoc->asoc.send_queue) &&
TAILQ_EMPTY(&asoc->asoc.sent_queue) &&
- (asoc->asoc.stream_queue_cnt == 0)
- ) {
+ (asoc->asoc.stream_queue_cnt == 0)) {
if (asoc->asoc.locked_on_sending) {
goto abort_anyway;
}
@@ -3444,16 +3454,10 @@
(SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
struct sctp_nets *netp;
- if (asoc->asoc.alternate) {
- netp = asoc->asoc.alternate;
- } else {
- netp = asoc->asoc.primary_destination;
- }
/*
* there is nothing queued to send,
* so I send shutdown
*/
- sctp_send_shutdown(asoc, netp);
if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) ||
(SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
@@ -3460,6 +3464,13 @@
}
SCTP_SET_STATE(&asoc->asoc, SCTP_STATE_SHUTDOWN_SENT);
SCTP_CLEAR_SUBSTATE(&asoc->asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ sctp_stop_timers_for_shutdown(asoc);
+ if (asoc->asoc.alternate) {
+ netp = asoc->asoc.alternate;
+ } else {
+ netp = asoc->asoc.primary_destination;
+ }
+ sctp_send_shutdown(asoc, netp);
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, asoc->sctp_ep, asoc,
netp);
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, asoc->sctp_ep, asoc,
@@ -3478,7 +3489,7 @@
sctp_streamhead);
if (sp == NULL) {
SCTP_PRINTF("Error, sp is NULL, locked on sending is %p strm:%d\n",
- asoc->asoc.locked_on_sending,
+ (void *)asoc->asoc.locked_on_sending,
asoc->asoc.locked_on_sending->stream_no);
} else {
if ((sp->length == 0) && (sp->msg_is_complete == 0))
@@ -3491,27 +3502,7 @@
struct mbuf *op_err;
abort_anyway:
- op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (op_err) {
- /*
- * Fill in the user
- * initiated abort
- */
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
-
- SCTP_BUF_LEN(op_err) =
- (sizeof(struct sctp_paramhdr) +
- sizeof(uint32_t));
- ph = mtod(op_err,
- struct sctp_paramhdr *);
- ph->param_type = htons(
- SCTP_CAUSE_USER_INITIATED_ABT);
- ph->param_length = htons(SCTP_BUF_LEN(op_err));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_PCB + SCTP_LOC_5);
- }
+ op_err = sctp_generate_cause(SCTP_CAUSE_USER_INITIATED_ABT, "");
asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_5;
sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED);
SCTP_STAT_INCR_COUNTER32(sctps_aborted);
@@ -3574,24 +3565,8 @@
if ((SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_COOKIE_WAIT) &&
((asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0)) {
struct mbuf *op_err;
- uint32_t *ippp;
- op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (op_err) {
- /* Fill in the user initiated abort */
- struct sctp_paramhdr *ph;
-
- SCTP_BUF_LEN(op_err) = (sizeof(struct sctp_paramhdr) +
- sizeof(uint32_t));
- ph = mtod(op_err, struct sctp_paramhdr *);
- ph->param_type = htons(
- SCTP_CAUSE_USER_INITIATED_ABT);
- ph->param_length = htons(SCTP_BUF_LEN(op_err));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_PCB + SCTP_LOC_7);
-
- }
+ op_err = sctp_generate_cause(SCTP_CAUSE_USER_INITIATED_ABT, "");
asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_7;
sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED);
SCTP_STAT_INCR_COUNTER32(sctps_aborted);
@@ -3604,7 +3579,8 @@
(SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
- if (sctp_free_assoc(inp, asoc, SCTP_PCBFREE_FORCE, SCTP_FROM_SCTP_PCB + SCTP_LOC_8) == 0) {
+ if (sctp_free_assoc(inp, asoc, SCTP_PCBFREE_FORCE,
+ SCTP_FROM_SCTP_PCB + SCTP_LOC_8) == 0) {
cnt++;
}
}
@@ -3698,13 +3674,9 @@
* macro here since le_next will get freed as part of the
* sctp_free_assoc() call.
*/
- if (so) {
#ifdef IPSEC
- ipsec_delete_pcbpolicy(ip_pcb);
-#endif /* IPSEC */
-
- /* Unlocks not needed since the socket is gone now */
- }
+ ipsec_delete_pcbpolicy(ip_pcb);
+#endif
if (ip_pcb->inp_options) {
(void)sctp_m_free(ip_pcb->inp_options);
ip_pcb->inp_options = 0;
@@ -3754,6 +3726,7 @@
inp->sctp_tcbhash = NULL;
}
/* Now we must put the ep memory back into the zone pool */
+ crfree(inp->ip_inp.inp.inp_cred);
INP_LOCK_DESTROY(&inp->ip_inp.inp);
SCTP_INP_LOCK_DESTROY(inp);
SCTP_INP_READ_DESTROY(inp);
@@ -3850,17 +3823,13 @@
/* assure len is set */
sin->sin_len = sizeof(struct sockaddr_in);
if (set_scope) {
-#ifdef SCTP_DONT_DO_PRIVADDR_SCOPE
- stcb->ipv4_local_scope = 1;
-#else
if (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
- stcb->asoc.ipv4_local_scope = 1;
+ stcb->asoc.scope.ipv4_local_scope = 1;
}
-#endif /* SCTP_DONT_DO_PRIVADDR_SCOPE */
} else {
/* Validate the address is in scope */
if ((IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) &&
- (stcb->asoc.ipv4_local_scope == 0)) {
+ (stcb->asoc.scope.ipv4_local_scope == 0)) {
addr_inscope = 0;
}
}
@@ -3881,10 +3850,10 @@
sin6->sin6_len = sizeof(struct sockaddr_in6);
if (set_scope) {
if (sctp_is_address_on_local_host(newaddr, stcb->asoc.vrf_id)) {
- stcb->asoc.loopback_scope = 1;
- stcb->asoc.local_scope = 0;
- stcb->asoc.ipv4_local_scope = 1;
- stcb->asoc.site_scope = 1;
+ stcb->asoc.scope.loopback_scope = 1;
+ stcb->asoc.scope.local_scope = 0;
+ stcb->asoc.scope.ipv4_local_scope = 1;
+ stcb->asoc.scope.site_scope = 1;
} else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
/*
* If the new destination is a
@@ -3896,8 +3865,8 @@
* also be on our private network
* for v4 too.
*/
- stcb->asoc.ipv4_local_scope = 1;
- stcb->asoc.site_scope = 1;
+ stcb->asoc.scope.ipv4_local_scope = 1;
+ stcb->asoc.scope.site_scope = 1;
} else if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) {
/*
* If the new destination is
@@ -3904,18 +3873,18 @@
* SITE_LOCAL then we must have site
* scope in common.
*/
- stcb->asoc.site_scope = 1;
+ stcb->asoc.scope.site_scope = 1;
}
} else {
/* Validate the address is in scope */
if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr) &&
- (stcb->asoc.loopback_scope == 0)) {
+ (stcb->asoc.scope.loopback_scope == 0)) {
addr_inscope = 0;
} else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
- (stcb->asoc.local_scope == 0)) {
+ (stcb->asoc.scope.local_scope == 0)) {
addr_inscope = 0;
} else if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
- (stcb->asoc.site_scope == 0)) {
+ (stcb->asoc.scope.site_scope == 0)) {
addr_inscope = 0;
}
}
@@ -3950,10 +3919,10 @@
}
net->addr_is_local = sctp_is_address_on_local_host(newaddr, stcb->asoc.vrf_id);
if (net->addr_is_local && ((set_scope || (from == SCTP_ADDR_IS_CONFIRMED)))) {
- stcb->asoc.loopback_scope = 1;
- stcb->asoc.ipv4_local_scope = 1;
- stcb->asoc.local_scope = 0;
- stcb->asoc.site_scope = 1;
+ stcb->asoc.scope.loopback_scope = 1;
+ stcb->asoc.scope.ipv4_local_scope = 1;
+ stcb->asoc.scope.local_scope = 0;
+ stcb->asoc.scope.site_scope = 1;
addr_inscope = 1;
}
net->failure_threshold = stcb->asoc.def_net_failure;
@@ -4011,7 +3980,9 @@
sin6->sin6_scope_id = 0;
}
#endif
- SCTP_RTALLOC((sctp_route_t *) & net->ro, stcb->asoc.vrf_id);
+ SCTP_RTALLOC((sctp_route_t *) & net->ro,
+ stcb->asoc.vrf_id,
+ stcb->sctp_ep->fibnum);
if (SCTP_ROUTE_HAS_VALID_IFN(&net->ro)) {
/* Get source address */
@@ -4021,9 +3992,14 @@
net,
0,
stcb->asoc.vrf_id);
- /* Now get the interface MTU */
- if (net->ro._s_addr && net->ro._s_addr->ifn_p) {
- net->mtu = SCTP_GATHER_MTU_FROM_INTFC(net->ro._s_addr->ifn_p);
+ if (net->ro._s_addr != NULL) {
+ net->src_addr_selected = 1;
+ /* Now get the interface MTU */
+ if (net->ro._s_addr->ifn_p != NULL) {
+ net->mtu = SCTP_GATHER_MTU_FROM_INTFC(net->ro._s_addr->ifn_p);
+ }
+ } else {
+ net->src_addr_selected = 0;
}
if (net->mtu > 0) {
uint32_t rmtu;
@@ -4045,6 +4021,8 @@
net->mtu = rmtu;
}
}
+ } else {
+ net->src_addr_selected = 0;
}
if (net->mtu == 0) {
switch (newaddr->sa_family) {
@@ -4062,9 +4040,11 @@
break;
}
}
+#if defined(INET) || defined(INET6)
if (net->port) {
net->mtu -= (uint32_t) sizeof(struct udphdr);
}
+#endif
if (from == SCTP_ALLOC_ASOC) {
stcb->asoc.smallest_mtu = net->mtu;
}
@@ -4090,14 +4070,11 @@
*/
net->find_pseudo_cumack = 1;
net->find_rtx_pseudo_cumack = 1;
- net->src_addr_selected = 0;
/* Choose an initial flowid. */
net->flowid = stcb->asoc.my_vtag ^
ntohs(stcb->rport) ^
ntohs(stcb->sctp_ep->sctp_lport);
-#ifdef INVARIANTS
- net->flowidset = 1;
-#endif
+ net->flowtype = M_HASHTYPE_OPAQUE;
if (netp) {
*netp = net;
}
@@ -4218,6 +4195,7 @@
struct sctp_tcb *
sctp_aloc_assoc(struct sctp_inpcb *inp, struct sockaddr *firstaddr,
int *error, uint32_t override_tag, uint32_t vrf_id,
+ uint16_t o_streams,
struct thread *p
)
{
@@ -4376,7 +4354,7 @@
/* setup back pointer's */
stcb->sctp_ep = inp;
stcb->sctp_socket = inp->sctp_socket;
- if ((err = sctp_init_asoc(inp, stcb, override_tag, vrf_id))) {
+ if ((err = sctp_init_asoc(inp, stcb, override_tag, vrf_id, o_streams))) {
/* failed */
SCTP_TCB_LOCK_DESTROY(stcb);
SCTP_TCB_SEND_LOCK_DESTROY(stcb);
@@ -4425,6 +4403,7 @@
asoc->nr_mapping_array = NULL;
}
SCTP_DECR_ASOC_COUNT();
+ SCTP_TCB_UNLOCK(stcb);
SCTP_TCB_LOCK_DESTROY(stcb);
SCTP_TCB_SEND_LOCK_DESTROY(stcb);
LIST_REMOVE(stcb, sctp_tcbasocidhash);
@@ -4451,7 +4430,7 @@
LIST_INSERT_HEAD(head, stcb, sctp_tcbhash);
}
SCTP_INP_WUNLOCK(inp);
- SCTPDBG(SCTP_DEBUG_PCB1, "Association %p now allocated\n", stcb);
+ SCTPDBG(SCTP_DEBUG_PCB1, "Association %p now allocated\n", (void *)stcb);
return (stcb);
}
@@ -4560,23 +4539,21 @@
int i;
chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
- if (!LIST_EMPTY(chain)) {
- LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
- for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
- if ((twait_block->vtag_block[i].v_tag == tag) &&
- (twait_block->vtag_block[i].lport == lport) &&
- (twait_block->vtag_block[i].rport == rport)) {
- twait_block->vtag_block[i].tv_sec_at_expire = 0;
- twait_block->vtag_block[i].v_tag = 0;
- twait_block->vtag_block[i].lport = 0;
- twait_block->vtag_block[i].rport = 0;
- found = 1;
- break;
- }
+ LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
+ for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
+ if ((twait_block->vtag_block[i].v_tag == tag) &&
+ (twait_block->vtag_block[i].lport == lport) &&
+ (twait_block->vtag_block[i].rport == rport)) {
+ twait_block->vtag_block[i].tv_sec_at_expire = 0;
+ twait_block->vtag_block[i].v_tag = 0;
+ twait_block->vtag_block[i].lport = 0;
+ twait_block->vtag_block[i].rport = 0;
+ found = 1;
+ break;
}
- if (found)
- break;
}
+ if (found)
+ break;
}
}
@@ -4590,19 +4567,17 @@
SCTP_INP_INFO_WLOCK();
chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
- if (!LIST_EMPTY(chain)) {
- LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
- for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
- if ((twait_block->vtag_block[i].v_tag == tag) &&
- (twait_block->vtag_block[i].lport == lport) &&
- (twait_block->vtag_block[i].rport == rport)) {
- found = 1;
- break;
- }
+ LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
+ for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
+ if ((twait_block->vtag_block[i].v_tag == tag) &&
+ (twait_block->vtag_block[i].lport == lport) &&
+ (twait_block->vtag_block[i].rport == rport)) {
+ found = 1;
+ break;
}
- if (found)
- break;
}
+ if (found)
+ break;
}
SCTP_INP_INFO_WUNLOCK();
return (found);
@@ -4624,43 +4599,41 @@
(void)SCTP_GETTIME_TIMEVAL(&now);
chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
set = 0;
- if (!LIST_EMPTY(chain)) {
+ LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
/* Block(s) present, lets find space, and expire on the fly */
- LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
- for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
- if ((twait_block->vtag_block[i].v_tag == 0) &&
- !set) {
- twait_block->vtag_block[i].tv_sec_at_expire =
- now.tv_sec + time;
+ for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
+ if ((twait_block->vtag_block[i].v_tag == 0) &&
+ !set) {
+ twait_block->vtag_block[i].tv_sec_at_expire =
+ now.tv_sec + time;
+ twait_block->vtag_block[i].v_tag = tag;
+ twait_block->vtag_block[i].lport = lport;
+ twait_block->vtag_block[i].rport = rport;
+ set = 1;
+ } else if ((twait_block->vtag_block[i].v_tag) &&
+ ((long)twait_block->vtag_block[i].tv_sec_at_expire < now.tv_sec)) {
+ /* Audit expires this guy */
+ twait_block->vtag_block[i].tv_sec_at_expire = 0;
+ twait_block->vtag_block[i].v_tag = 0;
+ twait_block->vtag_block[i].lport = 0;
+ twait_block->vtag_block[i].rport = 0;
+ if (set == 0) {
+ /* Reuse it for my new tag */
+ twait_block->vtag_block[i].tv_sec_at_expire = now.tv_sec + time;
twait_block->vtag_block[i].v_tag = tag;
twait_block->vtag_block[i].lport = lport;
twait_block->vtag_block[i].rport = rport;
set = 1;
- } else if ((twait_block->vtag_block[i].v_tag) &&
- ((long)twait_block->vtag_block[i].tv_sec_at_expire < now.tv_sec)) {
- /* Audit expires this guy */
- twait_block->vtag_block[i].tv_sec_at_expire = 0;
- twait_block->vtag_block[i].v_tag = 0;
- twait_block->vtag_block[i].lport = 0;
- twait_block->vtag_block[i].rport = 0;
- if (set == 0) {
- /* Reuse it for my new tag */
- twait_block->vtag_block[i].tv_sec_at_expire = now.tv_sec + time;
- twait_block->vtag_block[i].v_tag = tag;
- twait_block->vtag_block[i].lport = lport;
- twait_block->vtag_block[i].rport = rport;
- set = 1;
- }
}
}
- if (set) {
- /*
- * We only do up to the block where we can
- * place our tag for audits
- */
- break;
- }
}
+ if (set) {
+ /*
+ * We only do up to the block where we can place our
+ * tag for audits
+ */
+ break;
+ }
}
/* Need to add a new block to chain */
if (!set) {
@@ -5038,6 +5011,13 @@
}
/* pending send queue SHOULD be empty */
TAILQ_FOREACH_SAFE(chk, &asoc->send_queue, sctp_next, nchk) {
+ if (asoc->strmout[chk->rec.data.stream_number].chunks_on_queues > 0) {
+ asoc->strmout[chk->rec.data.stream_number].chunks_on_queues--;
+#ifdef INVARIANTS
+ } else {
+ panic("No chunks on the queues for sid %u.", chk->rec.data.stream_number);
+#endif
+ }
TAILQ_REMOVE(&asoc->send_queue, chk, sctp_next);
if (chk->data) {
if (so) {
@@ -5062,6 +5042,15 @@
}
/* sent queue SHOULD be empty */
TAILQ_FOREACH_SAFE(chk, &asoc->sent_queue, sctp_next, nchk) {
+ if (chk->sent != SCTP_DATAGRAM_NR_ACKED) {
+ if (asoc->strmout[chk->rec.data.stream_number].chunks_on_queues > 0) {
+ asoc->strmout[chk->rec.data.stream_number].chunks_on_queues--;
+#ifdef INVARIANTS
+ } else {
+ panic("No chunks on the queues for sid %u.", chk->rec.data.stream_number);
+#endif
+ }
+ }
TAILQ_REMOVE(&asoc->sent_queue, chk, sctp_next);
if (chk->data) {
if (so) {
@@ -5081,6 +5070,13 @@
SCTP_DECR_CHK_COUNT();
/* sa_ignore FREED_MEMORY */
}
+#ifdef INVARIANTS
+ for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+ if (stcb->asoc.strmout[i].chunks_on_queues > 0) {
+ panic("%u chunks left for stream %u.", stcb->asoc.strmout[i].chunks_on_queues, i);
+ }
+ }
+#endif
/* control queue MAY not be empty */
TAILQ_FOREACH_SAFE(chk, &asoc->control_send_queue, sctp_next, nchk) {
TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next);
@@ -5210,6 +5206,7 @@
/* Insert new items here :> */
/* Get rid of LOCK */
+ SCTP_TCB_UNLOCK(stcb);
SCTP_TCB_LOCK_DESTROY(stcb);
SCTP_TCB_SEND_LOCK_DESTROY(stcb);
if (from_inpcbfree == SCTP_NORMAL_PROC) {
@@ -5334,7 +5331,7 @@
LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
if (laddr->ifa == NULL) {
SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n",
- __FUNCTION__);
+ __func__);
continue;
}
if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) {
@@ -5440,7 +5437,7 @@
/*
- * Delete the address from the endpoint local address list There is nothing
+ * Delete the address from the endpoint local address list. There is nothing
* to be done if we are bound to all addresses
*/
void
@@ -5491,8 +5488,7 @@
* to laddr
*/
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
- if (net->ro._s_addr &&
- (net->ro._s_addr->ifa == laddr->ifa)) {
+ if (net->ro._s_addr == laddr->ifa) {
/* Yep, purge src address selected */
sctp_rtentry_t *rt;
@@ -5556,46 +5552,6 @@
}
/*
- * insert an laddr entry with the given ifa for the desired list
- */
-int
-sctp_insert_laddr(struct sctpladdr *list, struct sctp_ifa *ifa, uint32_t act)
-{
- struct sctp_laddr *laddr;
-
- laddr = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_laddr), struct sctp_laddr);
- if (laddr == NULL) {
- /* out of memory? */
- SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
- return (EINVAL);
- }
- SCTP_INCR_LADDR_COUNT();
- bzero(laddr, sizeof(*laddr));
- (void)SCTP_GETTIME_TIMEVAL(&laddr->start_time);
- laddr->ifa = ifa;
- laddr->action = act;
- atomic_add_int(&ifa->refcount, 1);
- /* insert it */
- LIST_INSERT_HEAD(list, laddr, sctp_nxt_addr);
-
- return (0);
-}
-
-/*
- * Remove an laddr entry from the local address list (on an assoc)
- */
-void
-sctp_remove_laddr(struct sctp_laddr *laddr)
-{
-
- /* remove from the list */
- LIST_REMOVE(laddr, sctp_nxt_addr);
- sctp_free_ifa(laddr->ifa);
- SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_laddr), laddr);
- SCTP_DECR_LADDR_COUNT();
-}
-
-/*
* Remove a local address from the TCB local address restricted list
*/
void
@@ -5935,7 +5891,6 @@
for (i = 0; i < SCTP_STACK_VTAG_HASH_SIZE; i++) {
LIST_INIT(&SCTP_BASE_INFO(vtag_timewait)[i]);
}
-
sctp_startup_iterator();
#if defined(__FreeBSD__) && defined(SCTP_MCORE_INPUT) && defined(SMP)
@@ -5964,40 +5919,59 @@
struct sctp_tagblock *twait_block, *prev_twait_block;
struct sctp_laddr *wi, *nwi;
int i;
+ struct sctp_iterator *it, *nit;
+ if (SCTP_BASE_VAR(sctp_pcb_initialized) == 0) {
+ SCTP_PRINTF("%s: race condition on teardown.\n", __func__);
+ return;
+ }
+ SCTP_BASE_VAR(sctp_pcb_initialized) = 0;
/*
- * Free BSD the it thread never exits but we do clean up. The only
- * way freebsd reaches here if we have VRF's but we still add the
- * ifdef to make it compile on old versions.
+ * In FreeBSD the iterator thread never exits but we do clean up.
+ * The only way FreeBSD reaches here is if we have VRF's but we
+ * still add the ifdef to make it compile on old versions.
*/
- {
- struct sctp_iterator *it, *nit;
-
- SCTP_IPI_ITERATOR_WQ_LOCK();
- TAILQ_FOREACH_SAFE(it, &sctp_it_ctl.iteratorhead, sctp_nxt_itr, nit) {
- if (it->vn != curvnet) {
- continue;
- }
- TAILQ_REMOVE(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr);
- if (it->function_atend != NULL) {
- (*it->function_atend) (it->pointer, it->val);
- }
- SCTP_FREE(it, SCTP_M_ITER);
- }
+retry:
+ SCTP_IPI_ITERATOR_WQ_LOCK();
+ /*
+ * sctp_iterator_worker() might be working on an it entry without
+ * holding the lock. We won't find it on the list either and
+ * continue and free/destroy it. While holding the lock, spin, to
+ * avoid the race condition as sctp_iterator_worker() will have to
+ * wait to re-aquire the lock.
+ */
+ if (sctp_it_ctl.iterator_running != 0 || sctp_it_ctl.cur_it != NULL) {
SCTP_IPI_ITERATOR_WQ_UNLOCK();
- SCTP_ITERATOR_LOCK();
- if ((sctp_it_ctl.cur_it) &&
- (sctp_it_ctl.cur_it->vn == curvnet)) {
- sctp_it_ctl.iterator_flags |= SCTP_ITERATOR_STOP_CUR_IT;
+ SCTP_PRINTF("%s: Iterator running while we held the lock. Retry. "
+ "cur_it=%p\n", __func__, sctp_it_ctl.cur_it);
+ DELAY(10);
+ goto retry;
+ }
+ TAILQ_FOREACH_SAFE(it, &sctp_it_ctl.iteratorhead, sctp_nxt_itr, nit) {
+ if (it->vn != curvnet) {
+ continue;
}
- SCTP_ITERATOR_UNLOCK();
+ TAILQ_REMOVE(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr);
+ if (it->function_atend != NULL) {
+ (*it->function_atend) (it->pointer, it->val);
+ }
+ SCTP_FREE(it, SCTP_M_ITER);
}
-
- SCTP_OS_TIMER_STOP(&SCTP_BASE_INFO(addr_wq_timer.timer));
+ SCTP_IPI_ITERATOR_WQ_UNLOCK();
+ SCTP_ITERATOR_LOCK();
+ if ((sctp_it_ctl.cur_it) &&
+ (sctp_it_ctl.cur_it->vn == curvnet)) {
+ sctp_it_ctl.iterator_flags |= SCTP_ITERATOR_STOP_CUR_IT;
+ }
+ SCTP_ITERATOR_UNLOCK();
+ SCTP_OS_TIMER_STOP_DRAIN(&SCTP_BASE_INFO(addr_wq_timer.timer));
SCTP_WQ_ADDR_LOCK();
LIST_FOREACH_SAFE(wi, &SCTP_BASE_INFO(addr_wq), sctp_nxt_addr, nwi) {
LIST_REMOVE(wi, sctp_nxt_addr);
SCTP_DECR_LADDR_COUNT();
+ if (wi->action == SCTP_DEL_IP_ADDRESS) {
+ SCTP_FREE(wi->ifa, SCTP_M_IFA);
+ }
SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_laddr), wi);
}
SCTP_WQ_ADDR_UNLOCK();
@@ -6057,6 +6031,14 @@
SCTP_WQ_ADDR_DESTROY();
+ /* Get rid of other stuff too. */
+ if (SCTP_BASE_INFO(sctp_asochash) != NULL)
+ SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_asochash), SCTP_BASE_INFO(hashasocmark));
+ if (SCTP_BASE_INFO(sctp_ephash) != NULL)
+ SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_ephash), SCTP_BASE_INFO(hashmark));
+ if (SCTP_BASE_INFO(sctp_tcpephash) != NULL)
+ SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_tcpephash), SCTP_BASE_INFO(hashtcpmark));
+
SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_ep));
SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_asoc));
SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_laddr));
@@ -6066,13 +6048,6 @@
SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_strmoq));
SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_asconf));
SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_asconf_ack));
- /* Get rid of other stuff to */
- if (SCTP_BASE_INFO(sctp_asochash) != NULL)
- SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_asochash), SCTP_BASE_INFO(hashasocmark));
- if (SCTP_BASE_INFO(sctp_ephash) != NULL)
- SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_ephash), SCTP_BASE_INFO(hashmark));
- if (SCTP_BASE_INFO(sctp_tcpephash) != NULL)
- SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_tcpephash), SCTP_BASE_INFO(hashtcpmark));
#if defined(__FreeBSD__) && defined(SMP) && defined(SCTP_USE_PERCPU_STAT)
SCTP_FREE(SCTP_BASE_STATS, SCTP_M_MCORE);
#endif
@@ -6081,7 +6056,8 @@
int
sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
- int offset, int limit, struct sctphdr *sh,
+ int offset, int limit,
+ struct sockaddr *src, struct sockaddr *dst,
struct sockaddr *altsa)
{
/*
@@ -6093,13 +6069,10 @@
*/
struct sctp_inpcb *inp;
struct sctp_nets *net, *nnet, *net_tmp;
- struct ip *iph;
struct sctp_paramhdr *phdr, parm_buf;
struct sctp_tcb *stcb_tmp;
uint16_t ptype, plen;
struct sockaddr *sa;
- struct sockaddr_storage dest_store;
- struct sockaddr *local_sa = (struct sockaddr *)&dest_store;
uint8_t random_store[SCTP_PARAM_BUFFER_SIZE];
struct sctp_auth_random *p_random = NULL;
uint16_t random_len = 0;
@@ -6114,7 +6087,14 @@
sctp_key_t *new_key;
uint32_t keylen;
int got_random = 0, got_hmacs = 0, got_chklist = 0;
- uint8_t ecn_allowed;
+ uint8_t peer_supports_ecn;
+ uint8_t peer_supports_prsctp;
+ uint8_t peer_supports_auth;
+ uint8_t peer_supports_asconf;
+ uint8_t peer_supports_asconf_ack;
+ uint8_t peer_supports_reconfig;
+ uint8_t peer_supports_nrsack;
+ uint8_t peer_supports_pktdrop;
#ifdef INET
struct sockaddr_in sin;
@@ -6138,68 +6118,18 @@
sin6.sin6_len = sizeof(struct sockaddr_in6);
sin6.sin6_port = stcb->rport;
#endif
- iph = mtod(m, struct ip *);
- switch (iph->ip_v) {
-#ifdef INET
- case IPVERSION:
- {
- /* its IPv4 */
- struct sockaddr_in *sin_2;
-
- sin_2 = (struct sockaddr_in *)(local_sa);
- memset(sin_2, 0, sizeof(sin));
- sin_2->sin_family = AF_INET;
- sin_2->sin_len = sizeof(sin);
- sin_2->sin_port = sh->dest_port;
- sin_2->sin_addr.s_addr = iph->ip_dst.s_addr;
- if (altsa) {
- /*
- * For cookies we use the src address NOT
- * from the packet but from the original
- * INIT.
- */
- sa = altsa;
- } else {
- sin.sin_addr = iph->ip_src;
- sa = (struct sockaddr *)&sin;
- }
- break;
- }
-#endif
-#ifdef INET6
- case IPV6_VERSION >> 4:
- {
- /* its IPv6 */
- struct ip6_hdr *ip6;
- struct sockaddr_in6 *sin6_2;
-
- ip6 = mtod(m, struct ip6_hdr *);
- sin6_2 = (struct sockaddr_in6 *)(local_sa);
- memset(sin6_2, 0, sizeof(sin6));
- sin6_2->sin6_family = AF_INET6;
- sin6_2->sin6_len = sizeof(struct sockaddr_in6);
- sin6_2->sin6_port = sh->dest_port;
- sin6_2->sin6_addr = ip6->ip6_dst;
- if (altsa) {
- /*
- * For cookies we use the src address NOT
- * from the packet but from the original
- * INIT.
- */
- sa = altsa;
- } else {
- sin6.sin6_addr = ip6->ip6_src;
- sa = (struct sockaddr *)&sin6;
- }
- break;
- }
-#endif
- default:
- return (-1);
- break;
+ if (altsa) {
+ sa = altsa;
+ } else {
+ sa = src;
}
- /* Turn off ECN until we get through all params */
- ecn_allowed = 0;
+ peer_supports_ecn = 0;
+ peer_supports_prsctp = 0;
+ peer_supports_auth = 0;
+ peer_supports_asconf = 0;
+ peer_supports_reconfig = 0;
+ peer_supports_nrsack = 0;
+ peer_supports_pktdrop = 0;
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
/* mark all addresses that we have currently on the list */
net->dest_state |= SCTP_ADDR_NOT_IN_ASSOC;
@@ -6207,7 +6137,7 @@
/* does the source address already exist? if so skip it */
inp = stcb->sctp_ep;
atomic_add_int(&stcb->asoc.refcnt, 1);
- stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net_tmp, local_sa, stcb);
+ stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net_tmp, dst, stcb);
atomic_add_int(&stcb->asoc.refcnt, -1);
if ((stcb_tmp == NULL && inp == stcb->sctp_ep) || inp == NULL) {
@@ -6216,7 +6146,7 @@
switch (sa->sa_family) {
#ifdef INET
case AF_INET:
- if (stcb->asoc.ipv4_addr_legal) {
+ if (stcb->asoc.scope.ipv4_addr_legal) {
if (sctp_add_remote_addr(stcb, sa, NULL, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_2)) {
return (-1);
}
@@ -6225,7 +6155,7 @@
#endif
#ifdef INET6
case AF_INET6:
- if (stcb->asoc.ipv6_addr_legal) {
+ if (stcb->asoc.scope.ipv6_addr_legal) {
if (sctp_add_remote_addr(stcb, sa, NULL, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_3)) {
return (-2);
}
@@ -6249,12 +6179,6 @@
/* the assoc was freed? */
return (-4);
}
- /*
- * peer must explicitly turn this on. This may have been initialized
- * to be "on" in order to allow local addr changes while INIT's are
- * in flight.
- */
- stcb->asoc.peer_supports_asconf = 0;
/* now we must go through each of the params. */
phdr = sctp_get_next_param(m, offset, &parm_buf, sizeof(parm_buf));
while (phdr) {
@@ -6272,7 +6196,7 @@
}
#ifdef INET
if (ptype == SCTP_IPV4_ADDRESS) {
- if (stcb->asoc.ipv4_addr_legal) {
+ if (stcb->asoc.scope.ipv4_addr_legal) {
struct sctp_ipv4addr_param *p4, p4_buf;
/* ok get the v4 address and check/add */
@@ -6297,7 +6221,7 @@
inp = stcb->sctp_ep;
atomic_add_int(&stcb->asoc.refcnt, 1);
stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net,
- local_sa, stcb);
+ dst, stcb);
atomic_add_int(&stcb->asoc.refcnt, -1);
if ((stcb_tmp == NULL && inp == stcb->sctp_ep) ||
@@ -6337,12 +6261,20 @@
*/
if (stcb_tmp) {
if (SCTP_GET_STATE(&stcb_tmp->asoc) & SCTP_STATE_COOKIE_WAIT) {
+ struct mbuf *op_err;
+ char msg[SCTP_DIAG_INFO_LEN];
+
/*
* in setup state we
* abort this guy
*/
+ snprintf(msg, sizeof(msg),
+ "%s:%d at %s", __FILE__, __LINE__, __func__);
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ msg);
sctp_abort_an_association(stcb_tmp->sctp_ep,
- stcb_tmp, NULL, SCTP_SO_NOT_LOCKED);
+ stcb_tmp, op_err,
+ SCTP_SO_NOT_LOCKED);
goto add_it_now;
}
SCTP_TCB_UNLOCK(stcb_tmp);
@@ -6358,7 +6290,7 @@
#endif
#ifdef INET6
if (ptype == SCTP_IPV6_ADDRESS) {
- if (stcb->asoc.ipv6_addr_legal) {
+ if (stcb->asoc.scope.ipv6_addr_legal) {
/* ok get the v6 address and check/add */
struct sctp_ipv6addr_param *p6, p6_buf;
@@ -6387,7 +6319,7 @@
inp = stcb->sctp_ep;
atomic_add_int(&stcb->asoc.refcnt, 1);
stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net,
- local_sa, stcb);
+ dst, stcb);
atomic_add_int(&stcb->asoc.refcnt, -1);
if (stcb_tmp == NULL &&
(inp == stcb->sctp_ep || inp == NULL)) {
@@ -6426,18 +6358,26 @@
* strange, address is in another
* assoc? straighten out locks.
*/
- if (stcb_tmp)
+ if (stcb_tmp) {
if (SCTP_GET_STATE(&stcb_tmp->asoc) & SCTP_STATE_COOKIE_WAIT) {
+ struct mbuf *op_err;
+ char msg[SCTP_DIAG_INFO_LEN];
+
/*
* in setup state we
* abort this guy
*/
+ snprintf(msg, sizeof(msg),
+ "%s:%d at %s", __FILE__, __LINE__, __func__);
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ msg);
sctp_abort_an_association(stcb_tmp->sctp_ep,
- stcb_tmp, NULL, SCTP_SO_NOT_LOCKED);
+ stcb_tmp, op_err,
+ SCTP_SO_NOT_LOCKED);
goto add_it_now6;
}
- SCTP_TCB_UNLOCK(stcb_tmp);
-
+ SCTP_TCB_UNLOCK(stcb_tmp);
+ }
if (stcb->asoc.state == 0) {
/* the assoc was freed? */
return (-21);
@@ -6448,7 +6388,7 @@
} else
#endif
if (ptype == SCTP_ECN_CAPABLE) {
- ecn_allowed = 1;
+ peer_supports_ecn = 1;
} else if (ptype == SCTP_ULP_ADAPTATION) {
if (stcb->asoc.state != SCTP_STATE_OPEN) {
struct sctp_adaptation_layer_indication ai,
@@ -6472,7 +6412,9 @@
#endif
- stcb->asoc.peer_supports_asconf = 1;
+ if (stcb->asoc.asconf_supported == 0) {
+ return (-100);
+ }
if (plen > sizeof(lstore)) {
return (-23);
}
@@ -6524,7 +6466,7 @@
stcb->asoc.peer_supports_nat = 1;
} else if (ptype == SCTP_PRSCTP_SUPPORTED) {
/* Peer supports pr-sctp */
- stcb->asoc.peer_supports_prsctp = 1;
+ peer_supports_prsctp = 1;
} else if (ptype == SCTP_SUPPORTED_CHUNK_EXT) {
/* A supported extension chunk */
struct sctp_supported_chunk_types_param *pr_supported;
@@ -6536,34 +6478,30 @@
if (phdr == NULL) {
return (-25);
}
- stcb->asoc.peer_supports_asconf = 0;
- stcb->asoc.peer_supports_prsctp = 0;
- stcb->asoc.peer_supports_pktdrop = 0;
- stcb->asoc.peer_supports_strreset = 0;
- stcb->asoc.peer_supports_nr_sack = 0;
- stcb->asoc.peer_supports_auth = 0;
pr_supported = (struct sctp_supported_chunk_types_param *)phdr;
num_ent = plen - sizeof(struct sctp_paramhdr);
for (i = 0; i < num_ent; i++) {
switch (pr_supported->chunk_types[i]) {
case SCTP_ASCONF:
+ peer_supports_asconf = 1;
+ break;
case SCTP_ASCONF_ACK:
- stcb->asoc.peer_supports_asconf = 1;
+ peer_supports_asconf_ack = 1;
break;
case SCTP_FORWARD_CUM_TSN:
- stcb->asoc.peer_supports_prsctp = 1;
+ peer_supports_prsctp = 1;
break;
case SCTP_PACKET_DROPPED:
- stcb->asoc.peer_supports_pktdrop = 1;
+ peer_supports_pktdrop = 1;
break;
case SCTP_NR_SELECTIVE_ACK:
- stcb->asoc.peer_supports_nr_sack = 1;
+ peer_supports_nrsack = 1;
break;
case SCTP_STREAM_RESET:
- stcb->asoc.peer_supports_strreset = 1;
+ peer_supports_reconfig = 1;
break;
case SCTP_AUTHENTICATION:
- stcb->asoc.peer_supports_auth = 1;
+ peer_supports_auth = 1;
break;
default:
/* one I have not learned yet */
@@ -6592,8 +6530,8 @@
}
got_random = 1;
} else if (ptype == SCTP_HMAC_LIST) {
- int num_hmacs;
- int i;
+ uint16_t num_hmacs;
+ uint16_t i;
if (plen > sizeof(hmacs_store))
break;
@@ -6700,24 +6638,47 @@
}
}
}
- if (ecn_allowed == 0) {
- stcb->asoc.ecn_allowed = 0;
+ if ((stcb->asoc.ecn_supported == 1) &&
+ (peer_supports_ecn == 0)) {
+ stcb->asoc.ecn_supported = 0;
}
+ if ((stcb->asoc.prsctp_supported == 1) &&
+ (peer_supports_prsctp == 0)) {
+ stcb->asoc.prsctp_supported = 0;
+ }
+ if ((stcb->asoc.auth_supported == 1) &&
+ ((peer_supports_auth == 0) ||
+ (got_random == 0) || (got_hmacs == 0))) {
+ stcb->asoc.auth_supported = 0;
+ }
+ if ((stcb->asoc.asconf_supported == 1) &&
+ ((peer_supports_asconf == 0) || (peer_supports_asconf_ack == 0) ||
+ (stcb->asoc.auth_supported == 0) ||
+ (saw_asconf == 0) || (saw_asconf_ack == 0))) {
+ stcb->asoc.asconf_supported = 0;
+ }
+ if ((stcb->asoc.reconfig_supported == 1) &&
+ (peer_supports_reconfig == 0)) {
+ stcb->asoc.reconfig_supported = 0;
+ }
+ if ((stcb->asoc.nrsack_supported == 1) &&
+ (peer_supports_nrsack == 0)) {
+ stcb->asoc.nrsack_supported = 0;
+ }
+ if ((stcb->asoc.pktdrop_supported == 1) &&
+ (peer_supports_pktdrop == 0)) {
+ stcb->asoc.pktdrop_supported = 0;
+ }
/* validate authentication required parameters */
- if (got_random && got_hmacs) {
- stcb->asoc.peer_supports_auth = 1;
- } else {
- stcb->asoc.peer_supports_auth = 0;
- }
- if (!stcb->asoc.peer_supports_auth && got_chklist) {
+ if ((peer_supports_auth == 0) && (got_chklist == 1)) {
/* peer does not support auth but sent a chunks list? */
return (-31);
}
- if (!SCTP_BASE_SYSCTL(sctp_asconf_auth_nochk) && stcb->asoc.peer_supports_asconf &&
- !stcb->asoc.peer_supports_auth) {
+ if ((peer_supports_asconf == 1) && (peer_supports_auth == 0)) {
/* peer supports asconf but not auth? */
return (-32);
- } else if ((stcb->asoc.peer_supports_asconf) && (stcb->asoc.peer_supports_auth) &&
+ } else if ((peer_supports_asconf == 1) &&
+ (peer_supports_auth == 1) &&
((saw_asconf == 0) || (saw_asconf_ack == 0))) {
return (-33);
}
@@ -6812,10 +6773,6 @@
SCTP_INP_INFO_RLOCK();
head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(tag,
SCTP_BASE_INFO(hashasocmark))];
- if (head == NULL) {
- /* invalid vtag */
- goto skip_vtag_check;
- }
LIST_FOREACH(stcb, head, sctp_asocs) {
/*
* We choose not to lock anything here. TCB's can't be
@@ -6839,34 +6796,30 @@
return (0);
}
}
-skip_vtag_check:
-
chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
/* Now what about timed wait ? */
- if (!LIST_EMPTY(chain)) {
+ LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
/*
* Block(s) are present, lets see if we have this tag in the
* list
*/
- LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
- for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
- if (twait_block->vtag_block[i].v_tag == 0) {
- /* not used */
- continue;
- } else if ((long)twait_block->vtag_block[i].tv_sec_at_expire <
- now->tv_sec) {
- /* Audit expires this guy */
- twait_block->vtag_block[i].tv_sec_at_expire = 0;
- twait_block->vtag_block[i].v_tag = 0;
- twait_block->vtag_block[i].lport = 0;
- twait_block->vtag_block[i].rport = 0;
- } else if ((twait_block->vtag_block[i].v_tag == tag) &&
- (twait_block->vtag_block[i].lport == lport) &&
- (twait_block->vtag_block[i].rport == rport)) {
- /* Bad tag, sorry :< */
- SCTP_INP_INFO_RUNLOCK();
- return (0);
- }
+ for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
+ if (twait_block->vtag_block[i].v_tag == 0) {
+ /* not used */
+ continue;
+ } else if ((long)twait_block->vtag_block[i].tv_sec_at_expire <
+ now->tv_sec) {
+ /* Audit expires this guy */
+ twait_block->vtag_block[i].tv_sec_at_expire = 0;
+ twait_block->vtag_block[i].v_tag = 0;
+ twait_block->vtag_block[i].lport = 0;
+ twait_block->vtag_block[i].rport = 0;
+ } else if ((twait_block->vtag_block[i].v_tag == tag) &&
+ (twait_block->vtag_block[i].lport == lport) &&
+ (twait_block->vtag_block[i].rport == rport)) {
+ /* Bad tag, sorry :< */
+ SCTP_INP_INFO_RUNLOCK();
+ return (0);
}
}
}
@@ -7058,6 +7011,11 @@
if (af == NULL) {
return (-1);
}
+ if (SCTP_BASE_VAR(sctp_pcb_initialized) == 0) {
+ SCTP_PRINTF("%s: abort on initialize being %d\n", __func__,
+ SCTP_BASE_VAR(sctp_pcb_initialized));
+ return (-1);
+ }
SCTP_MALLOC(it, struct sctp_iterator *, sizeof(struct sctp_iterator),
SCTP_M_ITER);
if (it == NULL) {
@@ -7096,7 +7054,13 @@
}
SCTP_IPI_ITERATOR_WQ_LOCK();
-
+ if (SCTP_BASE_VAR(sctp_pcb_initialized) == 0) {
+ SCTP_IPI_ITERATOR_WQ_UNLOCK();
+ SCTP_PRINTF("%s: rollback on initialize being %d it=%p\n", __func__,
+ SCTP_BASE_VAR(sctp_pcb_initialized), it);
+ SCTP_FREE(it, SCTP_M_ITER);
+ return (-1);
+ }
TAILQ_INSERT_TAIL(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr);
if (sctp_it_ctl.iterator_running == 0) {
sctp_wakeup_iterator();
Modified: trunk/sys/netinet/sctp_pcb.h
===================================================================
--- trunk/sys/netinet/sctp_pcb.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_pcb.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_pcb.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_pcb.h 296052 2016-02-25 18:46:06Z tuexen $");
#ifndef _NETINET_SCTP_PCB_H_
#define _NETINET_SCTP_PCB_H_
@@ -145,7 +145,12 @@
struct sctp_epinfo {
- struct socket *udp_tun_socket;
+#ifdef INET
+ struct socket *udp4_tun_socket;
+#endif
+#ifdef INET6
+ struct socket *udp6_tun_socket;
+#endif
struct sctpasochead *sctp_asochash;
u_long hashasocmark;
@@ -319,6 +324,7 @@
int auto_close_time;
uint32_t initial_sequence_debug;
uint32_t adaptation_layer_indicator;
+ uint8_t adaptation_layer_indicator_provided;
uint32_t store_at;
uint32_t max_burst;
uint32_t fr_max_burst;
@@ -383,8 +389,8 @@
/* back pointer to our socket */
struct socket *sctp_socket;
+ uint64_t sctp_features; /* Feature flags */
uint32_t sctp_flags; /* INP state flag set */
- uint32_t sctp_features; /* Feature flags */
uint32_t sctp_mobility_features; /* Mobility Feature flags */
struct sctp_pcb sctp_ep;/* SCTP ep data */
/* head of the hash of all associations */
@@ -399,9 +405,16 @@
uint32_t sctp_frag_point;
uint32_t partial_delivery_point;
uint32_t sctp_context;
+ uint32_t max_cwnd;
uint8_t local_strreset_support;
uint32_t sctp_cmt_on_off;
- uint32_t sctp_ecn_enable;
+ uint8_t ecn_supported;
+ uint8_t prsctp_supported;
+ uint8_t auth_supported;
+ uint8_t asconf_supported;
+ uint8_t reconfig_supported;
+ uint8_t nrsack_supported;
+ uint8_t pktdrop_supported;
struct sctp_nonpad_sndrcvinfo def_send;
/*-
* These three are here for the sosend_dgram
@@ -418,6 +431,7 @@
struct mtx inp_rdata_mtx;
int32_t refcount;
uint32_t def_vrf_id;
+ uint16_t fibnum;
uint32_t total_sends;
uint32_t total_recvs;
uint32_t last_abort_code;
@@ -529,6 +543,7 @@
struct sctp_tcb *
sctp_findassociation_addr(struct mbuf *, int,
+ struct sockaddr *, struct sockaddr *,
struct sctphdr *, struct sctp_chunkhdr *, struct sctp_inpcb **,
struct sctp_nets **, uint32_t vrf_id);
@@ -559,7 +574,7 @@
sctp_assoc_t, int);
struct sctp_tcb *
-sctp_findassociation_ep_asconf(struct mbuf *, int,
+sctp_findassociation_ep_asconf(struct mbuf *, int, struct sockaddr *,
struct sctphdr *, struct sctp_inpcb **, struct sctp_nets **, uint32_t vrf_id);
int sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id);
@@ -570,7 +585,7 @@
struct sctp_tcb *
sctp_aloc_assoc(struct sctp_inpcb *, struct sockaddr *,
- int *, uint32_t, uint32_t, struct thread *);
+ int *, uint32_t, uint32_t, uint16_t, struct thread *);
int sctp_free_assoc(struct sctp_inpcb *, struct sctp_tcb *, int, int);
@@ -584,10 +599,6 @@
void sctp_add_local_addr_ep(struct sctp_inpcb *, struct sctp_ifa *, uint32_t);
-int sctp_insert_laddr(struct sctpladdr *, struct sctp_ifa *, uint32_t);
-
-void sctp_remove_laddr(struct sctp_laddr *);
-
void sctp_del_local_addr_ep(struct sctp_inpcb *, struct sctp_ifa *);
int sctp_add_remote_addr(struct sctp_tcb *, struct sockaddr *, struct sctp_nets **, int, int);
@@ -604,8 +615,8 @@
void sctp_del_local_addr_restricted(struct sctp_tcb *, struct sctp_ifa *);
int
-sctp_load_addresses_from_init(struct sctp_tcb *, struct mbuf *, int,
- int, struct sctphdr *, struct sockaddr *);
+sctp_load_addresses_from_init(struct sctp_tcb *, struct mbuf *, int, int,
+ struct sockaddr *, struct sockaddr *, struct sockaddr *);
int
sctp_set_primary_addr(struct sctp_tcb *, struct sockaddr *,
@@ -640,11 +651,5 @@
#endif
-#ifdef INVARIANTS
-void
- sctp_validate_no_locks(struct sctp_inpcb *inp);
-
-#endif
-
#endif /* _KERNEL */
#endif /* !__sctp_pcb_h__ */
Modified: trunk/sys/netinet/sctp_peeloff.c
===================================================================
--- trunk/sys/netinet/sctp_peeloff.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_peeloff.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_peeloff.c 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_peeloff.c 283724 2015-05-29 12:54:30Z tuexen $");
#include <netinet/sctp_os.h>
#include <netinet/sctp_pcb.h>
@@ -75,9 +75,7 @@
}
state = SCTP_GET_STATE((&stcb->asoc));
if ((state == SCTP_STATE_EMPTY) ||
- (state == SCTP_STATE_INUSE) ||
- (state == SCTP_STATE_COOKIE_WAIT) ||
- (state == SCTP_STATE_COOKIE_ECHOED)) {
+ (state == SCTP_STATE_INUSE)) {
SCTP_TCB_UNLOCK(stcb);
SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN);
return (ENOTCONN);
@@ -106,9 +104,7 @@
}
state = SCTP_GET_STATE((&stcb->asoc));
if ((state == SCTP_STATE_EMPTY) ||
- (state == SCTP_STATE_INUSE) ||
- (state == SCTP_STATE_COOKIE_WAIT) ||
- (state == SCTP_STATE_COOKIE_ECHOED)) {
+ (state == SCTP_STATE_INUSE)) {
SCTP_TCB_UNLOCK(stcb);
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN);
return (ENOTCONN);
@@ -123,9 +119,16 @@
n_inp->sctp_mobility_features = inp->sctp_mobility_features;
n_inp->sctp_frag_point = inp->sctp_frag_point;
n_inp->sctp_cmt_on_off = inp->sctp_cmt_on_off;
- n_inp->sctp_ecn_enable = inp->sctp_ecn_enable;
+ n_inp->ecn_supported = inp->ecn_supported;
+ n_inp->prsctp_supported = inp->prsctp_supported;
+ n_inp->auth_supported = inp->auth_supported;
+ n_inp->asconf_supported = inp->asconf_supported;
+ n_inp->reconfig_supported = inp->reconfig_supported;
+ n_inp->nrsack_supported = inp->nrsack_supported;
+ n_inp->pktdrop_supported = inp->pktdrop_supported;
n_inp->partial_delivery_point = inp->partial_delivery_point;
n_inp->sctp_context = inp->sctp_context;
+ n_inp->max_cwnd = inp->max_cwnd;
n_inp->local_strreset_support = inp->local_strreset_support;
n_inp->inp_starting_point_for_iterator = NULL;
/* copy in the authentication parameters from the original endpoint */
@@ -152,103 +155,3 @@
return (0);
}
-
-
-struct socket *
-sctp_get_peeloff(struct socket *head, sctp_assoc_t assoc_id, int *error)
-{
- struct socket *newso;
- struct sctp_inpcb *inp, *n_inp;
- struct sctp_tcb *stcb;
-
- SCTPDBG(SCTP_DEBUG_PEEL1, "SCTP peel-off called\n");
- inp = (struct sctp_inpcb *)head->so_pcb;
- if (inp == NULL) {
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, EFAULT);
- *error = EFAULT;
- return (NULL);
- }
- stcb = sctp_findassociation_ep_asocid(inp, assoc_id, 1);
- if (stcb == NULL) {
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN);
- *error = ENOTCONN;
- return (NULL);
- }
- atomic_add_int(&stcb->asoc.refcnt, 1);
- SCTP_TCB_UNLOCK(stcb);
- CURVNET_SET(head->so_vnet);
- newso = sonewconn(head, SS_ISCONNECTED
- );
- CURVNET_RESTORE();
- if (newso == NULL) {
- SCTPDBG(SCTP_DEBUG_PEEL1, "sctp_peeloff:sonewconn failed\n");
- SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_PEELOFF, ENOMEM);
- *error = ENOMEM;
- atomic_subtract_int(&stcb->asoc.refcnt, 1);
- return (NULL);
-
- }
- SCTP_TCB_LOCK(stcb);
- atomic_subtract_int(&stcb->asoc.refcnt, 1);
- n_inp = (struct sctp_inpcb *)newso->so_pcb;
- SOCK_LOCK(head);
- n_inp->sctp_flags = (SCTP_PCB_FLAGS_UDPTYPE |
- SCTP_PCB_FLAGS_CONNECTED |
- SCTP_PCB_FLAGS_IN_TCPPOOL | /* Turn on Blocking IO */
- (SCTP_PCB_COPY_FLAGS & inp->sctp_flags));
- n_inp->sctp_features = inp->sctp_features;
- n_inp->sctp_frag_point = inp->sctp_frag_point;
- n_inp->sctp_cmt_on_off = inp->sctp_cmt_on_off;
- n_inp->sctp_ecn_enable = inp->sctp_ecn_enable;
- n_inp->partial_delivery_point = inp->partial_delivery_point;
- n_inp->sctp_context = inp->sctp_context;
- n_inp->local_strreset_support = inp->local_strreset_support;
- n_inp->inp_starting_point_for_iterator = NULL;
-
- /* copy in the authentication parameters from the original endpoint */
- if (n_inp->sctp_ep.local_hmacs)
- sctp_free_hmaclist(n_inp->sctp_ep.local_hmacs);
- n_inp->sctp_ep.local_hmacs =
- sctp_copy_hmaclist(inp->sctp_ep.local_hmacs);
- if (n_inp->sctp_ep.local_auth_chunks)
- sctp_free_chunklist(n_inp->sctp_ep.local_auth_chunks);
- n_inp->sctp_ep.local_auth_chunks =
- sctp_copy_chunklist(inp->sctp_ep.local_auth_chunks);
- (void)sctp_copy_skeylist(&inp->sctp_ep.shared_keys,
- &n_inp->sctp_ep.shared_keys);
-
- n_inp->sctp_socket = newso;
- if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE)) {
- sctp_feature_off(n_inp, SCTP_PCB_FLAGS_AUTOCLOSE);
- n_inp->sctp_ep.auto_close_time = 0;
- sctp_timer_stop(SCTP_TIMER_TYPE_AUTOCLOSE, n_inp, stcb, NULL,
- SCTP_FROM_SCTP_PEELOFF + SCTP_LOC_1);
- }
- /* Turn off any non-blocking semantic. */
- SCTP_CLEAR_SO_NBIO(newso);
- newso->so_state |= SS_ISCONNECTED;
- /* We remove it right away */
-
-#ifdef SCTP_LOCK_LOGGING
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOCK_LOGGING_ENABLE) {
- sctp_log_lock(inp, (struct sctp_tcb *)NULL, SCTP_LOG_LOCK_SOCK);
- }
-#endif
- TAILQ_REMOVE(&head->so_comp, newso, so_list);
- head->so_qlen--;
- SOCK_UNLOCK(head);
- /*
- * Now we must move it from one hash table to another and get the
- * stcb in the right place.
- */
- sctp_move_pcb_and_assoc(inp, n_inp, stcb);
- atomic_add_int(&stcb->asoc.refcnt, 1);
- SCTP_TCB_UNLOCK(stcb);
- /*
- * And now the final hack. We move data in the pending side i.e.
- * head to the new socket buffer. Let the GRUBBING begin :-0
- */
- sctp_pull_off_control_to_new_inp(inp, n_inp, stcb, SBL_WAIT);
- atomic_subtract_int(&stcb->asoc.refcnt, 1);
- return (newso);
-}
Modified: trunk/sys/netinet/sctp_peeloff.h
===================================================================
--- trunk/sys/netinet/sctp_peeloff.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_peeloff.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,22 +32,13 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_peeloff.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_peeloff.h 243516 2012-11-25 14:25:08Z tuexen $");
#ifndef _NETINET_SCTP_PEELOFF_H_
#define _NETINET_SCTP_PEELOFF_H_
-
-
-
-
#if defined(_KERNEL)
-
int sctp_can_peel_off(struct socket *, sctp_assoc_t);
int sctp_do_peeloff(struct socket *, struct socket *, sctp_assoc_t);
-struct socket *sctp_get_peeloff(struct socket *, sctp_assoc_t, int *);
-
-
#endif /* _KERNEL */
-
-#endif
+#endif /* _NETINET_SCTP_PEELOFF_H_ */
Modified: trunk/sys/netinet/sctp_ss_functions.c
===================================================================
--- trunk/sys/netinet/sctp_ss_functions.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_ss_functions.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_ss_functions.c 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_ss_functions.c 235828 2012-05-23 11:26:28Z tuexen $");
#include <netinet/sctp_pcb.h>
Modified: trunk/sys/netinet/sctp_structs.h
===================================================================
--- trunk/sys/netinet/sctp_structs.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_structs.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_structs.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_structs.h 294140 2016-01-16 12:15:07Z tuexen $");
#ifndef _NETINET_SCTP_STRUCTS_H_
#define _NETINET_SCTP_STRUCTS_H_
@@ -77,9 +77,10 @@
struct sctp_stream_reset_list {
TAILQ_ENTRY(sctp_stream_reset_list) next_resp;
+ uint32_t seq;
uint32_t tsn;
- int number_entries;
- struct sctp_stream_reset_out_request req;
+ uint32_t number_entries;
+ uint16_t list_of_streams[];
};
TAILQ_HEAD(sctp_resethead, sctp_stream_reset_list);
@@ -381,9 +382,7 @@
uint8_t lan_type;
uint8_t rto_needed;
uint32_t flowid;
-#ifdef INVARIANTS
- uint8_t flowidset;
-#endif
+ uint8_t flowtype;
};
@@ -419,8 +418,8 @@
#define CHUNK_FLAGS_FRAGMENT_OK 0x0100
struct chk_id {
- uint16_t id;
- uint16_t can_take_data;
+ uint8_t id;
+ uint8_t can_take_data;
};
@@ -447,7 +446,6 @@
uint8_t do_rtt;
uint8_t book_size_scale;
uint8_t no_fr_allowed;
- uint8_t pr_sctp_on;
uint8_t copy_by_ref;
uint8_t window_probe;
};
@@ -518,13 +516,11 @@
uint32_t context;
uint16_t sinfo_flags;
uint16_t stream;
- uint16_t strseq;
uint16_t act_flags;
uint16_t auth_keyid;
uint8_t holds_key_ref;
uint8_t msg_is_complete;
uint8_t some_taken;
- uint8_t pr_sctp_on;
uint8_t sender_all_done;
uint8_t put_last_out;
uint8_t discard_rest;
@@ -586,13 +582,32 @@
struct ss_fb fb;
};
+/* States for outgoing streams */
+#define SCTP_STREAM_CLOSED 0x00
+#define SCTP_STREAM_OPENING 0x01
+#define SCTP_STREAM_OPEN 0x02
+#define SCTP_STREAM_RESET_PENDING 0x03
+#define SCTP_STREAM_RESET_IN_FLIGHT 0x04
+
+#define SCTP_MAX_STREAMS_AT_ONCE_RESET 200
+
/* This struct is used to track the traffic on outbound streams */
struct sctp_stream_out {
struct sctp_streamhead outqueue;
union scheduling_parameters ss_params;
+ uint32_t chunks_on_queues; /* send queue and sent queue */
+#if defined(SCTP_DETAILED_STR_STATS)
+ uint32_t abandoned_unsent[SCTP_PR_SCTP_MAX + 1];
+ uint32_t abandoned_sent[SCTP_PR_SCTP_MAX + 1];
+#else
+ /* Only the aggregation */
+ uint32_t abandoned_unsent[1];
+ uint32_t abandoned_sent[1];
+#endif
uint16_t stream_no;
- uint16_t next_sequence_sent; /* next one I expect to send out */
+ uint16_t next_sequence_send; /* next one I expect to send out */
uint8_t last_msg_incomplete;
+ uint8_t state;
};
/* used to keep track of the addresses yet to try to add/delete */
@@ -1145,7 +1160,7 @@
uint8_t hb_random_idx;
uint8_t default_dscp;
uint8_t asconf_del_pending; /* asconf delete last addr pending */
-
+ uint8_t trigger_reset;
/*
* This value, plus all other ack'd but above cum-ack is added
* together to cross check against the bit that we have yet to
@@ -1153,44 +1168,24 @@
* sum is updated as well.
*/
- /* Flag to tell if ECN is allowed */
- uint8_t ecn_allowed;
+ /* Flags whether an extension is supported or not */
+ uint8_t ecn_supported;
+ uint8_t prsctp_supported;
+ uint8_t auth_supported;
+ uint8_t asconf_supported;
+ uint8_t reconfig_supported;
+ uint8_t nrsack_supported;
+ uint8_t pktdrop_supported;
/* Did the peer make the stream config (add out) request */
uint8_t peer_req_out;
- /* flag to indicate if peer can do asconf */
- uint8_t peer_supports_asconf;
- /* EY - flag to indicate if peer can do nr_sack */
- uint8_t peer_supports_nr_sack;
- /* pr-sctp support flag */
- uint8_t peer_supports_prsctp;
- /* peer authentication support flag */
- uint8_t peer_supports_auth;
- /* stream resets are supported by the peer */
- uint8_t peer_supports_strreset;
uint8_t local_strreset_support;
uint8_t peer_supports_nat;
- /*
- * packet drop's are supported by the peer, we don't really care
- * about this but we bookkeep it anyway.
- */
- uint8_t peer_supports_pktdrop;
- /* Do we allow V6/V4? */
- uint8_t ipv4_addr_legal;
- uint8_t ipv6_addr_legal;
- /* Address scoping flags */
- /* scope value for IPv4 */
- uint8_t ipv4_local_scope;
- /* scope values for IPv6 */
- uint8_t local_scope;
- uint8_t site_scope;
- /* loopback scope */
- uint8_t loopback_scope;
+ struct sctp_scoping scope;
/* flags to handle send alternate net tracking */
- uint8_t used_alt_onsack;
uint8_t used_alt_asconfack;
uint8_t fast_retran_loss_recovery;
uint8_t sat_t3_loss_recovery;
@@ -1211,12 +1206,11 @@
uint8_t sctp_cmt_on_off;
uint8_t iam_blocking;
uint8_t cookie_how[8];
- /* EY 05/05/08 - NR_SACK variable */
- uint8_t sctp_nr_sack_on_off;
/* JRS 5/21/07 - CMT PF variable */
uint8_t sctp_cmt_pf;
uint8_t use_precise_time;
- uint32_t sctp_features;
+ uint64_t sctp_features;
+ uint32_t max_cwnd;
uint16_t port; /* remote UDP encapsulation port */
/*
* The mapping array is used to track out of order sequences above
@@ -1235,6 +1229,8 @@
uint32_t timoshutdownack;
struct timeval start_time;
struct timeval discontinuity_time;
+ uint64_t abandoned_unsent[SCTP_PR_SCTP_MAX + 1];
+ uint64_t abandoned_sent[SCTP_PR_SCTP_MAX + 1];
};
#endif
Added: trunk/sys/netinet/sctp_syscalls.c
===================================================================
--- trunk/sys/netinet/sctp_syscalls.c (rev 0)
+++ trunk/sys/netinet/sctp_syscalls.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -0,0 +1,597 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_syscalls.c 321021 2017-07-15 17:28:03Z dchagin $");
+
+#include "opt_capsicum.h"
+#include "opt_inet.h"
+#include "opt_inet6.h"
+#include "opt_sctp.h"
+#include "opt_compat.h"
+#include "opt_ktrace.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/capsicum.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/sysproto.h>
+#include <sys/malloc.h>
+#include <sys/filedesc.h>
+#include <sys/event.h>
+#include <sys/proc.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/filio.h>
+#include <sys/jail.h>
+#include <sys/mount.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/sf_buf.h>
+#include <sys/sysent.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/signalvar.h>
+#include <sys/syscall.h>
+#include <sys/syscallsubr.h>
+#include <sys/sysctl.h>
+#include <sys/uio.h>
+#include <sys/vnode.h>
+#ifdef KTRACE
+#include <sys/ktrace.h>
+#endif
+#ifdef COMPAT_FREEBSD32
+#include <compat/freebsd32/freebsd32_util.h>
+#endif
+
+#include <net/vnet.h>
+
+#include <security/audit/audit.h>
+#include <security/mac/mac_framework.h>
+
+#include <netinet/sctp.h>
+#include <netinet/sctp_peeloff.h>
+
+static struct syscall_helper_data sctp_syscalls[] = {
+ SYSCALL_INIT_HELPER(sctp_peeloff),
+ SYSCALL_INIT_HELPER(sctp_generic_sendmsg),
+ SYSCALL_INIT_HELPER(sctp_generic_sendmsg_iov),
+ SYSCALL_INIT_HELPER(sctp_generic_recvmsg),
+ SYSCALL_INIT_LAST
+};
+
+static void
+sctp_syscalls_init(void *unused __unused)
+{
+ int error;
+
+ error = syscall_helper_register(sctp_syscalls);
+ KASSERT((error == 0),
+ ("%s: syscall_helper_register failed for sctp syscalls", __func__));
+#ifdef COMPAT_FREEBSD32
+ error = syscall32_helper_register(sctp_syscalls);
+ KASSERT((error == 0),
+ ("%s: syscall32_helper_register failed for sctp syscalls",
+ __func__));
+#endif
+}
+SYSINIT(sctp_syscalls, SI_SUB_SYSCALLS, SI_ORDER_ANY, sctp_syscalls_init, NULL);
+
+/*
+ * SCTP syscalls.
+ * Functionality only compiled in if SCTP is defined in the kernel Makefile,
+ * otherwise all return EOPNOTSUPP.
+ * XXX: We should make this loadable one day.
+ */
+int
+sys_sctp_peeloff(td, uap)
+ struct thread *td;
+ struct sctp_peeloff_args /* {
+ int sd;
+ caddr_t name;
+ } */ *uap;
+{
+#if (defined(INET) || defined(INET6)) && defined(SCTP)
+ struct file *nfp = NULL;
+ struct socket *head, *so;
+ cap_rights_t rights;
+ u_int fflag;
+ int error, fd;
+
+ AUDIT_ARG_FD(uap->sd);
+ error = fgetsock(td, uap->sd, cap_rights_init(&rights, CAP_PEELOFF),
+ &head, &fflag);
+ if (error != 0)
+ goto done2;
+ if (head->so_proto->pr_protocol != IPPROTO_SCTP) {
+ error = EOPNOTSUPP;
+ goto done;
+ }
+ error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name);
+ if (error != 0)
+ goto done;
+ /*
+ * At this point we know we do have a assoc to pull
+ * we proceed to get the fd setup. This may block
+ * but that is ok.
+ */
+
+ error = falloc(td, &nfp, &fd, 0);
+ if (error != 0)
+ goto done;
+ td->td_retval[0] = fd;
+
+ CURVNET_SET(head->so_vnet);
+ so = sonewconn(head, SS_ISCONNECTED);
+ if (so == NULL) {
+ error = ENOMEM;
+ goto noconnection;
+ }
+ /*
+ * Before changing the flags on the socket, we have to bump the
+ * reference count. Otherwise, if the protocol calls sofree(),
+ * the socket will be released due to a zero refcount.
+ */
+ SOCK_LOCK(so);
+ soref(so); /* file descriptor reference */
+ SOCK_UNLOCK(so);
+
+ ACCEPT_LOCK();
+
+ TAILQ_REMOVE(&head->so_comp, so, so_list);
+ head->so_qlen--;
+ so->so_state |= (head->so_state & SS_NBIO);
+ so->so_state &= ~SS_NOFDREF;
+ so->so_qstate &= ~SQ_COMP;
+ so->so_head = NULL;
+ ACCEPT_UNLOCK();
+ finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
+ error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
+ if (error != 0)
+ goto noconnection;
+ if (head->so_sigio != NULL)
+ fsetown(fgetown(&head->so_sigio), &so->so_sigio);
+
+noconnection:
+ /*
+ * close the new descriptor, assuming someone hasn't ripped it
+ * out from under us.
+ */
+ if (error != 0)
+ fdclose(td, nfp, fd);
+
+ /*
+ * Release explicitly held references before returning.
+ */
+ CURVNET_RESTORE();
+done:
+ if (nfp != NULL)
+ fdrop(nfp, td);
+ fputsock(head);
+done2:
+ return (error);
+#else /* SCTP */
+ return (EOPNOTSUPP);
+#endif /* SCTP */
+}
+
+int
+sys_sctp_generic_sendmsg (td, uap)
+ struct thread *td;
+ struct sctp_generic_sendmsg_args /* {
+ int sd,
+ caddr_t msg,
+ int mlen,
+ caddr_t to,
+ __socklen_t tolen,
+ struct sctp_sndrcvinfo *sinfo,
+ int flags
+ } */ *uap;
+{
+#if (defined(INET) || defined(INET6)) && defined(SCTP)
+ struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
+ struct socket *so;
+ struct file *fp = NULL;
+ struct sockaddr *to = NULL;
+#ifdef KTRACE
+ struct uio *ktruio = NULL;
+#endif
+ struct uio auio;
+ struct iovec iov[1];
+ cap_rights_t rights;
+ int error = 0, len;
+
+ if (uap->sinfo != NULL) {
+ error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
+ if (error != 0)
+ return (error);
+ u_sinfo = &sinfo;
+ }
+
+ cap_rights_init(&rights, CAP_SEND);
+ if (uap->tolen != 0) {
+ error = getsockaddr(&to, uap->to, uap->tolen);
+ if (error != 0) {
+ to = NULL;
+ goto sctp_bad2;
+ }
+ cap_rights_set(&rights, CAP_CONNECT);
+ }
+
+ AUDIT_ARG_FD(uap->sd);
+ error = getsock_cap(td, uap->sd, &rights, &fp, NULL);
+ if (error != 0)
+ goto sctp_bad;
+#ifdef KTRACE
+ if (to && (KTRPOINT(td, KTR_STRUCT)))
+ ktrsockaddr(to);
+#endif
+
+ iov[0].iov_base = uap->msg;
+ iov[0].iov_len = uap->mlen;
+
+ so = (struct socket *)fp->f_data;
+ if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
+ error = EOPNOTSUPP;
+ goto sctp_bad;
+ }
+#ifdef MAC
+ error = mac_socket_check_send(td->td_ucred, so);
+ if (error != 0)
+ goto sctp_bad;
+#endif /* MAC */
+
+ auio.uio_iov = iov;
+ auio.uio_iovcnt = 1;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_rw = UIO_WRITE;
+ auio.uio_td = td;
+ auio.uio_offset = 0; /* XXX */
+ auio.uio_resid = 0;
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_GENIO))
+ ktruio = cloneuio(&auio);
+#endif /* KTRACE */
+ len = auio.uio_resid = uap->mlen;
+ CURVNET_SET(so->so_vnet);
+ error = sctp_lower_sosend(so, to, &auio, (struct mbuf *)NULL,
+ (struct mbuf *)NULL, uap->flags, u_sinfo, td);
+ CURVNET_RESTORE();
+ if (error != 0) {
+ if (auio.uio_resid != len && (error == ERESTART ||
+ error == EINTR || error == EWOULDBLOCK))
+ error = 0;
+ /* Generation of SIGPIPE can be controlled per socket. */
+ if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
+ !(uap->flags & MSG_NOSIGNAL)) {
+ PROC_LOCK(td->td_proc);
+ tdsignal(td, SIGPIPE);
+ PROC_UNLOCK(td->td_proc);
+ }
+ }
+ if (error == 0)
+ td->td_retval[0] = len - auio.uio_resid;
+#ifdef KTRACE
+ if (ktruio != NULL) {
+ ktruio->uio_resid = td->td_retval[0];
+ ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
+ }
+#endif /* KTRACE */
+sctp_bad:
+ if (fp != NULL)
+ fdrop(fp, td);
+sctp_bad2:
+ free(to, M_SONAME);
+ return (error);
+#else /* SCTP */
+ return (EOPNOTSUPP);
+#endif /* SCTP */
+}
+
+int
+sys_sctp_generic_sendmsg_iov(td, uap)
+ struct thread *td;
+ struct sctp_generic_sendmsg_iov_args /* {
+ int sd,
+ struct iovec *iov,
+ int iovlen,
+ caddr_t to,
+ __socklen_t tolen,
+ struct sctp_sndrcvinfo *sinfo,
+ int flags
+ } */ *uap;
+{
+#if (defined(INET) || defined(INET6)) && defined(SCTP)
+ struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
+ struct socket *so;
+ struct file *fp = NULL;
+ struct sockaddr *to = NULL;
+#ifdef KTRACE
+ struct uio *ktruio = NULL;
+#endif
+ struct uio auio;
+ struct iovec *iov, *tiov;
+ cap_rights_t rights;
+ ssize_t len;
+ int error, i;
+
+ if (uap->sinfo != NULL) {
+ error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
+ if (error != 0)
+ return (error);
+ u_sinfo = &sinfo;
+ }
+ cap_rights_init(&rights, CAP_SEND);
+ if (uap->tolen != 0) {
+ error = getsockaddr(&to, uap->to, uap->tolen);
+ if (error != 0) {
+ to = NULL;
+ goto sctp_bad2;
+ }
+ cap_rights_set(&rights, CAP_CONNECT);
+ }
+
+ AUDIT_ARG_FD(uap->sd);
+ error = getsock_cap(td, uap->sd, &rights, &fp, NULL);
+ if (error != 0)
+ goto sctp_bad1;
+
+#ifdef COMPAT_FREEBSD32
+ if (SV_CURPROC_FLAG(SV_ILP32))
+ error = freebsd32_copyiniov((struct iovec32 *)uap->iov,
+ uap->iovlen, &iov, EMSGSIZE);
+ else
+#endif
+ error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
+ if (error != 0)
+ goto sctp_bad1;
+#ifdef KTRACE
+ if (to && (KTRPOINT(td, KTR_STRUCT)))
+ ktrsockaddr(to);
+#endif
+
+ so = (struct socket *)fp->f_data;
+ if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
+ error = EOPNOTSUPP;
+ goto sctp_bad;
+ }
+#ifdef MAC
+ error = mac_socket_check_send(td->td_ucred, so);
+ if (error != 0)
+ goto sctp_bad;
+#endif /* MAC */
+
+ auio.uio_iov = iov;
+ auio.uio_iovcnt = uap->iovlen;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_rw = UIO_WRITE;
+ auio.uio_td = td;
+ auio.uio_offset = 0; /* XXX */
+ auio.uio_resid = 0;
+ tiov = iov;
+ for (i = 0; i <uap->iovlen; i++, tiov++) {
+ if ((auio.uio_resid += tiov->iov_len) < 0) {
+ error = EINVAL;
+ goto sctp_bad;
+ }
+ }
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_GENIO))
+ ktruio = cloneuio(&auio);
+#endif /* KTRACE */
+ len = auio.uio_resid;
+ CURVNET_SET(so->so_vnet);
+ error = sctp_lower_sosend(so, to, &auio,
+ (struct mbuf *)NULL, (struct mbuf *)NULL,
+ uap->flags, u_sinfo, td);
+ CURVNET_RESTORE();
+ if (error != 0) {
+ if (auio.uio_resid != len && (error == ERESTART ||
+ error == EINTR || error == EWOULDBLOCK))
+ error = 0;
+ /* Generation of SIGPIPE can be controlled per socket */
+ if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
+ !(uap->flags & MSG_NOSIGNAL)) {
+ PROC_LOCK(td->td_proc);
+ tdsignal(td, SIGPIPE);
+ PROC_UNLOCK(td->td_proc);
+ }
+ }
+ if (error == 0)
+ td->td_retval[0] = len - auio.uio_resid;
+#ifdef KTRACE
+ if (ktruio != NULL) {
+ ktruio->uio_resid = td->td_retval[0];
+ ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
+ }
+#endif /* KTRACE */
+sctp_bad:
+ free(iov, M_IOV);
+sctp_bad1:
+ if (fp != NULL)
+ fdrop(fp, td);
+sctp_bad2:
+ free(to, M_SONAME);
+ return (error);
+#else /* SCTP */
+ return (EOPNOTSUPP);
+#endif /* SCTP */
+}
+
+int
+sys_sctp_generic_recvmsg(td, uap)
+ struct thread *td;
+ struct sctp_generic_recvmsg_args /* {
+ int sd,
+ struct iovec *iov,
+ int iovlen,
+ struct sockaddr *from,
+ __socklen_t *fromlenaddr,
+ struct sctp_sndrcvinfo *sinfo,
+ int *msg_flags
+ } */ *uap;
+{
+#if (defined(INET) || defined(INET6)) && defined(SCTP)
+ uint8_t sockbufstore[256];
+ struct uio auio;
+ struct iovec *iov, *tiov;
+ struct sctp_sndrcvinfo sinfo;
+ struct socket *so;
+ struct file *fp = NULL;
+ struct sockaddr *fromsa;
+ cap_rights_t rights;
+#ifdef KTRACE
+ struct uio *ktruio = NULL;
+#endif
+ ssize_t len;
+ int error, fromlen, i, msg_flags;
+
+ AUDIT_ARG_FD(uap->sd);
+ error = getsock_cap(td, uap->sd, cap_rights_init(&rights, CAP_RECV),
+ &fp, NULL);
+ if (error != 0)
+ return (error);
+#ifdef COMPAT_FREEBSD32
+ if (SV_CURPROC_FLAG(SV_ILP32))
+ error = freebsd32_copyiniov((struct iovec32 *)uap->iov,
+ uap->iovlen, &iov, EMSGSIZE);
+ else
+#endif
+ error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
+ if (error != 0)
+ goto out1;
+
+ so = fp->f_data;
+ if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
+ error = EOPNOTSUPP;
+ goto out;
+ }
+#ifdef MAC
+ error = mac_socket_check_receive(td->td_ucred, so);
+ if (error != 0)
+ goto out;
+#endif /* MAC */
+
+ if (uap->fromlenaddr != NULL) {
+ error = copyin(uap->fromlenaddr, &fromlen, sizeof (fromlen));
+ if (error != 0)
+ goto out;
+ } else {
+ fromlen = 0;
+ }
+ if (uap->msg_flags) {
+ error = copyin(uap->msg_flags, &msg_flags, sizeof (int));
+ if (error != 0)
+ goto out;
+ } else {
+ msg_flags = 0;
+ }
+ auio.uio_iov = iov;
+ auio.uio_iovcnt = uap->iovlen;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_rw = UIO_READ;
+ auio.uio_td = td;
+ auio.uio_offset = 0; /* XXX */
+ auio.uio_resid = 0;
+ tiov = iov;
+ for (i = 0; i <uap->iovlen; i++, tiov++) {
+ if ((auio.uio_resid += tiov->iov_len) < 0) {
+ error = EINVAL;
+ goto out;
+ }
+ }
+ len = auio.uio_resid;
+ fromsa = (struct sockaddr *)sockbufstore;
+
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_GENIO))
+ ktruio = cloneuio(&auio);
+#endif /* KTRACE */
+ memset(&sinfo, 0, sizeof(struct sctp_sndrcvinfo));
+ CURVNET_SET(so->so_vnet);
+ error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL,
+ fromsa, fromlen, &msg_flags,
+ (struct sctp_sndrcvinfo *)&sinfo, 1);
+ CURVNET_RESTORE();
+ if (error != 0) {
+ if (auio.uio_resid != len && (error == ERESTART ||
+ error == EINTR || error == EWOULDBLOCK))
+ error = 0;
+ } else {
+ if (uap->sinfo)
+ error = copyout(&sinfo, uap->sinfo, sizeof (sinfo));
+ }
+#ifdef KTRACE
+ if (ktruio != NULL) {
+ ktruio->uio_resid = len - auio.uio_resid;
+ ktrgenio(uap->sd, UIO_READ, ktruio, error);
+ }
+#endif /* KTRACE */
+ if (error != 0)
+ goto out;
+ td->td_retval[0] = len - auio.uio_resid;
+
+ if (fromlen && uap->from) {
+ len = fromlen;
+ if (len <= 0 || fromsa == 0)
+ len = 0;
+ else {
+ len = MIN(len, fromsa->sa_len);
+ error = copyout(fromsa, uap->from, (size_t)len);
+ if (error != 0)
+ goto out;
+ }
+ error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t));
+ if (error != 0)
+ goto out;
+ }
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_STRUCT))
+ ktrsockaddr(fromsa);
+#endif
+ if (uap->msg_flags) {
+ error = copyout(&msg_flags, uap->msg_flags, sizeof (int));
+ if (error != 0)
+ goto out;
+ }
+out:
+ free(iov, M_IOV);
+out1:
+ if (fp != NULL)
+ fdrop(fp, td);
+
+ return (error);
+#else /* SCTP */
+ return (EOPNOTSUPP);
+#endif /* SCTP */
+}
Property changes on: trunk/sys/netinet/sctp_syscalls.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/netinet/sctp_sysctl.c
===================================================================
--- trunk/sys/netinet/sctp_sysctl.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_sysctl.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_sysctl.c 238249 2012-07-08 15:37:58Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_sysctl.c 294149 2016-01-16 14:41:44Z tuexen $");
#include <netinet/sctp_os.h>
#include <netinet/sctp.h>
@@ -42,7 +42,10 @@
#include <netinet/sctputil.h>
#include <netinet/sctp_output.h>
#include <sys/smp.h>
+#include <sys/sysctl.h>
+FEATURE(sctp, "Stream Control Transmission Protocol");
+
/*
* sysctl tunable variables
*/
@@ -55,6 +58,12 @@
SCTP_BASE_SYSCTL(sctp_auto_asconf) = SCTPCTL_AUTOASCONF_DEFAULT;
SCTP_BASE_SYSCTL(sctp_multiple_asconfs) = SCTPCTL_MULTIPLEASCONFS_DEFAULT;
SCTP_BASE_SYSCTL(sctp_ecn_enable) = SCTPCTL_ECN_ENABLE_DEFAULT;
+ SCTP_BASE_SYSCTL(sctp_pr_enable) = SCTPCTL_PR_ENABLE_DEFAULT;
+ SCTP_BASE_SYSCTL(sctp_auth_disable) = SCTPCTL_AUTH_DISABLE_DEFAULT;
+ SCTP_BASE_SYSCTL(sctp_asconf_enable) = SCTPCTL_ASCONF_ENABLE_DEFAULT;
+ SCTP_BASE_SYSCTL(sctp_reconfig_enable) = SCTPCTL_RECONFIG_ENABLE_DEFAULT;
+ SCTP_BASE_SYSCTL(sctp_nrsack_enable) = SCTPCTL_NRSACK_ENABLE_DEFAULT;
+ SCTP_BASE_SYSCTL(sctp_pktdrop_enable) = SCTPCTL_PKTDROP_ENABLE_DEFAULT;
SCTP_BASE_SYSCTL(sctp_strict_sacks) = SCTPCTL_STRICT_SACKS_DEFAULT;
SCTP_BASE_SYSCTL(sctp_peer_chunk_oh) = SCTPCTL_PEER_CHKOH_DEFAULT;
SCTP_BASE_SYSCTL(sctp_max_burst_default) = SCTPCTL_MAXBURST_DEFAULT;
@@ -82,14 +91,11 @@
SCTP_BASE_SYSCTL(sctp_path_rtx_max_default) = SCTPCTL_PATH_RTX_MAX_DEFAULT;
SCTP_BASE_SYSCTL(sctp_path_pf_threshold) = SCTPCTL_PATH_PF_THRESHOLD_DEFAULT;
SCTP_BASE_SYSCTL(sctp_add_more_threshold) = SCTPCTL_ADD_MORE_ON_OUTPUT_DEFAULT;
+ SCTP_BASE_SYSCTL(sctp_nr_incoming_streams_default) = SCTPCTL_INCOMING_STREAMS_DEFAULT;
SCTP_BASE_SYSCTL(sctp_nr_outgoing_streams_default) = SCTPCTL_OUTGOING_STREAMS_DEFAULT;
SCTP_BASE_SYSCTL(sctp_cmt_on_off) = SCTPCTL_CMT_ON_OFF_DEFAULT;
- /* EY */
- SCTP_BASE_SYSCTL(sctp_nr_sack_on_off) = SCTPCTL_NR_SACK_ON_OFF_DEFAULT;
SCTP_BASE_SYSCTL(sctp_cmt_use_dac) = SCTPCTL_CMT_USE_DAC_DEFAULT;
SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst) = SCTPCTL_CWND_MAXBURST_DEFAULT;
- SCTP_BASE_SYSCTL(sctp_asconf_auth_nochk) = SCTPCTL_ASCONF_AUTH_NOCHK_DEFAULT;
- SCTP_BASE_SYSCTL(sctp_auth_disable) = SCTPCTL_AUTH_DISABLE_DEFAULT;
SCTP_BASE_SYSCTL(sctp_nat_friendly) = SCTPCTL_NAT_FRIENDLY_DEFAULT;
SCTP_BASE_SYSCTL(sctp_L2_abc_variable) = SCTPCTL_ABC_L_VAR_DEFAULT;
SCTP_BASE_SYSCTL(sctp_mbuf_threshold_count) = SCTPCTL_MAX_CHAINED_MBUFS_DEFAULT;
@@ -100,9 +106,7 @@
SCTP_BASE_SYSCTL(sctp_min_residual) = SCTPCTL_MIN_RESIDUAL_DEFAULT;
SCTP_BASE_SYSCTL(sctp_max_retran_chunk) = SCTPCTL_MAX_RETRAN_CHUNK_DEFAULT;
SCTP_BASE_SYSCTL(sctp_logging_level) = SCTPCTL_LOGGING_LEVEL_DEFAULT;
- /* JRS - Variable for default congestion control module */
SCTP_BASE_SYSCTL(sctp_default_cc_module) = SCTPCTL_DEFAULT_CC_MODULE_DEFAULT;
- /* RS - Variable for default stream scheduling module */
SCTP_BASE_SYSCTL(sctp_default_ss_module) = SCTPCTL_DEFAULT_SS_MODULE_DEFAULT;
SCTP_BASE_SYSCTL(sctp_default_frag_interleave) = SCTPCTL_DEFAULT_FRAG_INTERLEAVE_DEFAULT;
SCTP_BASE_SYSCTL(sctp_mobility_base) = SCTPCTL_MOBILITY_BASE_DEFAULT;
@@ -116,7 +120,7 @@
SCTP_BASE_SYSCTL(sctp_steady_step) = SCTPCTL_RTTVAR_STEADYS_DEFAULT;
SCTP_BASE_SYSCTL(sctp_use_dccc_ecn) = SCTPCTL_RTTVAR_DCCCECN_DEFAULT;
SCTP_BASE_SYSCTL(sctp_blackhole) = SCTPCTL_BLACKHOLE_DEFAULT;
-
+ SCTP_BASE_SYSCTL(sctp_diag_info_code) = SCTPCTL_DIAG_INFO_CODE_DEFAULT;
#if defined(SCTP_LOCAL_TRACE_BUF)
memset(&SCTP_BASE_SYSCTL(sctp_log), 0, sizeof(struct sctp_log));
#endif
@@ -134,7 +138,7 @@
/* It returns an upper limit. No filtering is done here */
static unsigned int
-number_of_addresses(struct sctp_inpcb *inp)
+sctp_sysctl_number_of_addresses(struct sctp_inpcb *inp)
{
unsigned int cnt;
struct sctp_vrf *vrf;
@@ -184,7 +188,7 @@
}
static int
-copy_out_local_addresses(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sysctl_req *req)
+sctp_sysctl_copy_out_local_addresses(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sysctl_req *req)
{
struct sctp_ifn *sctp_ifn;
struct sctp_ifa *sctp_ifa;
@@ -198,29 +202,29 @@
/* Turn on all the appropriate scope */
if (stcb) {
/* use association specific values */
- loopback_scope = stcb->asoc.loopback_scope;
- ipv4_local_scope = stcb->asoc.ipv4_local_scope;
- local_scope = stcb->asoc.local_scope;
- site_scope = stcb->asoc.site_scope;
+ loopback_scope = stcb->asoc.scope.loopback_scope;
+ ipv4_local_scope = stcb->asoc.scope.ipv4_local_scope;
+ local_scope = stcb->asoc.scope.local_scope;
+ site_scope = stcb->asoc.scope.site_scope;
+ ipv4_addr_legal = stcb->asoc.scope.ipv4_addr_legal;
+ ipv6_addr_legal = stcb->asoc.scope.ipv6_addr_legal;
} else {
- /* use generic values for endpoints */
+ /* Use generic values for endpoints. */
loopback_scope = 1;
ipv4_local_scope = 1;
local_scope = 1;
site_scope = 1;
- }
-
- /* use only address families of interest */
- if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
- ipv6_addr_legal = 1;
- if (SCTP_IPV6_V6ONLY(inp)) {
- ipv4_addr_legal = 0;
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+ ipv6_addr_legal = 1;
+ if (SCTP_IPV6_V6ONLY(inp)) {
+ ipv4_addr_legal = 0;
+ } else {
+ ipv4_addr_legal = 1;
+ }
} else {
+ ipv6_addr_legal = 0;
ipv4_addr_legal = 1;
}
- } else {
- ipv4_addr_legal = 1;
- ipv6_addr_legal = 0;
}
/* neither Mac OS X nor FreeBSD support mulitple routing functions */
@@ -249,9 +253,13 @@
if (ipv4_addr_legal) {
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)&sctp_ifa->address.sa;
+ sin = &sctp_ifa->address.sin;
if (sin->sin_addr.s_addr == 0)
continue;
+ if (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+ &sin->sin_addr) != 0) {
+ continue;
+ }
if ((ipv4_local_scope == 0) && (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)))
continue;
} else {
@@ -264,9 +272,13 @@
if (ipv6_addr_legal) {
struct sockaddr_in6 *sin6;
- sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sa;
+ sin6 = &sctp_ifa->address.sin6;
if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
continue;
+ if (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+ &sin6->sin6_addr) != 0) {
+ continue;
+ }
if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
if (local_scope == 0)
continue;
@@ -342,7 +354,7 @@
* sysctl functions
*/
static int
-sctp_assoclist(SYSCTL_HANDLER_ARGS)
+sctp_sysctl_handle_assoclist(SYSCTL_HANDLER_ARGS)
{
unsigned int number_of_endpoints;
unsigned int number_of_local_addresses;
@@ -364,14 +376,14 @@
number_of_remote_addresses = 0;
SCTP_INP_INFO_RLOCK();
- if (req->oldptr == USER_ADDR_NULL) {
+ if (req->oldptr == NULL) {
LIST_FOREACH(inp, &SCTP_BASE_INFO(listhead), sctp_list) {
SCTP_INP_RLOCK(inp);
number_of_endpoints++;
- number_of_local_addresses += number_of_addresses(inp);
+ number_of_local_addresses += sctp_sysctl_number_of_addresses(inp);
LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
number_of_associations++;
- number_of_local_addresses += number_of_addresses(inp);
+ number_of_local_addresses += sctp_sysctl_number_of_addresses(inp);
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
number_of_remote_addresses++;
}
@@ -388,7 +400,7 @@
req->oldidx = (n + n / 8);
return (0);
}
- if (req->newptr != USER_ADDR_NULL) {
+ if (req->newptr != NULL) {
SCTP_INP_INFO_RUNLOCK();
SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_SYSCTL, EPERM);
return (EPERM);
@@ -407,6 +419,7 @@
xinpcb.total_recvs = inp->total_recvs;
xinpcb.total_nospaces = inp->total_nospaces;
xinpcb.fragmentation_point = inp->sctp_frag_point;
+ xinpcb.socket = inp->sctp_socket;
so = inp->sctp_socket;
if ((so == NULL) ||
(inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) {
@@ -426,7 +439,7 @@
}
SCTP_INP_INFO_RLOCK();
SCTP_INP_RLOCK(inp);
- error = copy_out_local_addresses(inp, NULL, req);
+ error = sctp_sysctl_copy_out_local_addresses(inp, NULL, req);
if (error) {
SCTP_INP_DECR_REF(inp);
return (error);
@@ -441,7 +454,7 @@
if (stcb->asoc.primary_destination != NULL)
xstcb.primary_addr = stcb->asoc.primary_destination->ro._l_addr;
xstcb.heartbeat_interval = stcb->asoc.heart_beat_delay;
- xstcb.state = SCTP_GET_STATE(&stcb->asoc); /* FIXME */
+ xstcb.state = (uint32_t) sctp_map_assoc_state(stcb->asoc.state);
/* 7.0 does not support these */
xstcb.assoc_id = sctp_get_associd(stcb);
xstcb.peers_rwnd = stcb->asoc.peers_rwnd;
@@ -477,7 +490,7 @@
}
SCTP_INP_INFO_RLOCK();
SCTP_INP_RLOCK(inp);
- error = copy_out_local_addresses(inp, stcb, req);
+ error = sctp_sysctl_copy_out_local_addresses(inp, stcb, req);
if (error) {
SCTP_INP_DECR_REF(inp);
atomic_subtract_int(&stcb->asoc.refcnt, 1);
@@ -499,6 +512,7 @@
xraddr.mtu = net->mtu;
xraddr.rtt = net->rtt / 1000;
xraddr.heartbeat_interval = net->heart_beat_delay;
+ xraddr.ssthresh = net->ssthresh;
xraddr.start_time.tv_sec = (uint32_t) net->start_time.tv_sec;
xraddr.start_time.tv_usec = (uint32_t) net->start_time.tv_usec;
SCTP_INP_RUNLOCK(inp);
@@ -545,151 +559,120 @@
return (error);
}
-
-#define RANGECHK(var, min, max) \
- if ((var) < (min)) { (var) = (min); } \
- else if ((var) > (max)) { (var) = (max); }
-
-/* XXX: Remove the #if after tunneling over IPv6 works also on FreeBSD. */
-#if !defined(__FreeBSD__) || defined(INET)
static int
-sysctl_sctp_udp_tunneling_check(SYSCTL_HANDLER_ARGS)
+sctp_sysctl_handle_udp_tunneling(SYSCTL_HANDLER_ARGS)
{
int error;
- uint32_t old_sctp_udp_tunneling_port;
+ uint32_t old, new;
SCTP_INP_INFO_RLOCK();
- old_sctp_udp_tunneling_port = SCTP_BASE_SYSCTL(sctp_udp_tunneling_port);
+ old = SCTP_BASE_SYSCTL(sctp_udp_tunneling_port);
SCTP_INP_INFO_RUNLOCK();
- error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
- if (error == 0) {
- RANGECHK(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port), SCTPCTL_UDP_TUNNELING_PORT_MIN, SCTPCTL_UDP_TUNNELING_PORT_MAX);
- if (old_sctp_udp_tunneling_port == SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)) {
- error = 0;
- goto out;
- }
- SCTP_INP_INFO_WLOCK();
- if (old_sctp_udp_tunneling_port) {
- sctp_over_udp_stop();
- }
- if (SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)) {
- if (sctp_over_udp_start()) {
- SCTP_BASE_SYSCTL(sctp_udp_tunneling_port) = 0;
+ new = old;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if ((error == 0) &&
+ (req->newptr != NULL)) {
+#if (SCTPCTL_UDP_TUNNELING_PORT_MIN == 0)
+ if (new > SCTPCTL_UDP_TUNNELING_PORT_MAX) {
+#else
+ if ((new < SCTPCTL_UDP_TUNNELING_PORT_MIN) ||
+ (new > SCTPCTL_UDP_TUNNELING_PORT_MAX)) {
+#endif
+ error = EINVAL;
+ } else {
+ SCTP_INP_INFO_WLOCK();
+ SCTP_BASE_SYSCTL(sctp_udp_tunneling_port) = new;
+ if (old != 0) {
+ sctp_over_udp_stop();
}
+ if (new != 0) {
+ error = sctp_over_udp_start();
+ }
+ SCTP_INP_INFO_WUNLOCK();
}
- SCTP_INP_INFO_WUNLOCK();
}
-out:
return (error);
}
-#endif
-
static int
-sysctl_sctp_check(SYSCTL_HANDLER_ARGS)
+sctp_sysctl_handle_auth(SYSCTL_HANDLER_ARGS)
{
int error;
+ uint32_t new;
-#ifdef VIMAGE
- error = vnet_sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
+ new = SCTP_BASE_SYSCTL(sctp_auth_disable);
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if ((error == 0) &&
+ (req->newptr != NULL)) {
+#if (SCTPCTL_AUTH_DISABLE_MIN ==0)
+ if ((new > SCTPCTL_AUTH_DISABLE_MAX) ||
+ ((new == 1) && (SCTP_BASE_SYSCTL(sctp_asconf_enable) == 1))) {
#else
- error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
+ if ((new < SCTPCTL_AUTH_DISABLE_MIN) ||
+ (new > SCTPCTL_AUTH_DISABLE_MAX) ||
+ ((new == 1) && (SCTP_BASE_SYSCTL(sctp_asconf_enable) == 1))) {
#endif
- if (error == 0) {
- RANGECHK(SCTP_BASE_SYSCTL(sctp_sendspace), SCTPCTL_MAXDGRAM_MIN, SCTPCTL_MAXDGRAM_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_recvspace), SCTPCTL_RECVSPACE_MIN, SCTPCTL_RECVSPACE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_auto_asconf), SCTPCTL_AUTOASCONF_MIN, SCTPCTL_AUTOASCONF_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_ecn_enable), SCTPCTL_ECN_ENABLE_MIN, SCTPCTL_ECN_ENABLE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_strict_sacks), SCTPCTL_STRICT_SACKS_MIN, SCTPCTL_STRICT_SACKS_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_peer_chunk_oh), SCTPCTL_PEER_CHKOH_MIN, SCTPCTL_PEER_CHKOH_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_max_burst_default), SCTPCTL_MAXBURST_MIN, SCTPCTL_MAXBURST_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_fr_max_burst_default), SCTPCTL_FRMAXBURST_MIN, SCTPCTL_FRMAXBURST_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue), SCTPCTL_MAXCHUNKS_MIN, SCTPCTL_MAXCHUNKS_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_hashtblsize), SCTPCTL_TCBHASHSIZE_MIN, SCTPCTL_TCBHASHSIZE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_pcbtblsize), SCTPCTL_PCBHASHSIZE_MIN, SCTPCTL_PCBHASHSIZE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_min_split_point), SCTPCTL_MIN_SPLIT_POINT_MIN, SCTPCTL_MIN_SPLIT_POINT_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_chunkscale), SCTPCTL_CHUNKSCALE_MIN, SCTPCTL_CHUNKSCALE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_delayed_sack_time_default), SCTPCTL_DELAYED_SACK_TIME_MIN, SCTPCTL_DELAYED_SACK_TIME_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_sack_freq_default), SCTPCTL_SACK_FREQ_MIN, SCTPCTL_SACK_FREQ_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_system_free_resc_limit), SCTPCTL_SYS_RESOURCE_MIN, SCTPCTL_SYS_RESOURCE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_asoc_free_resc_limit), SCTPCTL_ASOC_RESOURCE_MIN, SCTPCTL_ASOC_RESOURCE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_heartbeat_interval_default), SCTPCTL_HEARTBEAT_INTERVAL_MIN, SCTPCTL_HEARTBEAT_INTERVAL_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_pmtu_raise_time_default), SCTPCTL_PMTU_RAISE_TIME_MIN, SCTPCTL_PMTU_RAISE_TIME_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_shutdown_guard_time_default), SCTPCTL_SHUTDOWN_GUARD_TIME_MIN, SCTPCTL_SHUTDOWN_GUARD_TIME_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_secret_lifetime_default), SCTPCTL_SECRET_LIFETIME_MIN, SCTPCTL_SECRET_LIFETIME_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_rto_max_default), SCTPCTL_RTO_MAX_MIN, SCTPCTL_RTO_MAX_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_rto_min_default), SCTPCTL_RTO_MIN_MIN, SCTPCTL_RTO_MIN_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_rto_initial_default), SCTPCTL_RTO_INITIAL_MIN, SCTPCTL_RTO_INITIAL_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_init_rto_max_default), SCTPCTL_INIT_RTO_MAX_MIN, SCTPCTL_INIT_RTO_MAX_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_valid_cookie_life_default), SCTPCTL_VALID_COOKIE_LIFE_MIN, SCTPCTL_VALID_COOKIE_LIFE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_init_rtx_max_default), SCTPCTL_INIT_RTX_MAX_MIN, SCTPCTL_INIT_RTX_MAX_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_assoc_rtx_max_default), SCTPCTL_ASSOC_RTX_MAX_MIN, SCTPCTL_ASSOC_RTX_MAX_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_path_rtx_max_default), SCTPCTL_PATH_RTX_MAX_MIN, SCTPCTL_PATH_RTX_MAX_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_path_pf_threshold), SCTPCTL_PATH_PF_THRESHOLD_MIN, SCTPCTL_PATH_PF_THRESHOLD_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_add_more_threshold), SCTPCTL_ADD_MORE_ON_OUTPUT_MIN, SCTPCTL_ADD_MORE_ON_OUTPUT_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_nr_outgoing_streams_default), SCTPCTL_OUTGOING_STREAMS_MIN, SCTPCTL_OUTGOING_STREAMS_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_cmt_on_off), SCTPCTL_CMT_ON_OFF_MIN, SCTPCTL_CMT_ON_OFF_MAX);
- /* EY */
- RANGECHK(SCTP_BASE_SYSCTL(sctp_nr_sack_on_off), SCTPCTL_NR_SACK_ON_OFF_MIN, SCTPCTL_NR_SACK_ON_OFF_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_cmt_use_dac), SCTPCTL_CMT_USE_DAC_MIN, SCTPCTL_CMT_USE_DAC_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst), SCTPCTL_CWND_MAXBURST_MIN, SCTPCTL_CWND_MAXBURST_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_asconf_auth_nochk), SCTPCTL_ASCONF_AUTH_NOCHK_MIN, SCTPCTL_ASCONF_AUTH_NOCHK_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_auth_disable), SCTPCTL_AUTH_DISABLE_MIN, SCTPCTL_AUTH_DISABLE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_nat_friendly), SCTPCTL_NAT_FRIENDLY_MIN, SCTPCTL_NAT_FRIENDLY_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_L2_abc_variable), SCTPCTL_ABC_L_VAR_MIN, SCTPCTL_ABC_L_VAR_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_mbuf_threshold_count), SCTPCTL_MAX_CHAINED_MBUFS_MIN, SCTPCTL_MAX_CHAINED_MBUFS_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_do_drain), SCTPCTL_DO_SCTP_DRAIN_MIN, SCTPCTL_DO_SCTP_DRAIN_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_hb_maxburst), SCTPCTL_HB_MAX_BURST_MIN, SCTPCTL_HB_MAX_BURST_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_abort_if_one_2_one_hits_limit), SCTPCTL_ABORT_AT_LIMIT_MIN, SCTPCTL_ABORT_AT_LIMIT_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_strict_data_order), SCTPCTL_STRICT_DATA_ORDER_MIN, SCTPCTL_STRICT_DATA_ORDER_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_min_residual), SCTPCTL_MIN_RESIDUAL_MIN, SCTPCTL_MIN_RESIDUAL_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_max_retran_chunk), SCTPCTL_MAX_RETRAN_CHUNK_MIN, SCTPCTL_MAX_RETRAN_CHUNK_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_logging_level), SCTPCTL_LOGGING_LEVEL_MIN, SCTPCTL_LOGGING_LEVEL_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_default_cc_module), SCTPCTL_DEFAULT_CC_MODULE_MIN, SCTPCTL_DEFAULT_CC_MODULE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_default_ss_module), SCTPCTL_DEFAULT_SS_MODULE_MIN, SCTPCTL_DEFAULT_SS_MODULE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_default_frag_interleave), SCTPCTL_DEFAULT_FRAG_INTERLEAVE_MIN, SCTPCTL_DEFAULT_FRAG_INTERLEAVE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_vtag_time_wait), SCTPCTL_TIME_WAIT_MIN, SCTPCTL_TIME_WAIT_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_buffer_splitting), SCTPCTL_BUFFER_SPLITTING_MIN, SCTPCTL_BUFFER_SPLITTING_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_initial_cwnd), SCTPCTL_INITIAL_CWND_MIN, SCTPCTL_INITIAL_CWND_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_rttvar_bw), SCTPCTL_RTTVAR_BW_MIN, SCTPCTL_RTTVAR_BW_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_rttvar_rtt), SCTPCTL_RTTVAR_RTT_MIN, SCTPCTL_RTTVAR_RTT_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_rttvar_eqret), SCTPCTL_RTTVAR_EQRET_MIN, SCTPCTL_RTTVAR_EQRET_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_steady_step), SCTPCTL_RTTVAR_STEADYS_MIN, SCTPCTL_RTTVAR_STEADYS_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_use_dccc_ecn), SCTPCTL_RTTVAR_DCCCECN_MIN, SCTPCTL_RTTVAR_DCCCECN_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_mobility_base), SCTPCTL_MOBILITY_BASE_MIN, SCTPCTL_MOBILITY_BASE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_mobility_fasthandoff), SCTPCTL_MOBILITY_FASTHANDOFF_MIN, SCTPCTL_MOBILITY_FASTHANDOFF_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_enable_sack_immediately), SCTPCTL_SACK_IMMEDIATELY_ENABLE_MIN, SCTPCTL_SACK_IMMEDIATELY_ENABLE_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_inits_include_nat_friendly), SCTPCTL_NAT_FRIENDLY_INITS_MIN, SCTPCTL_NAT_FRIENDLY_INITS_MAX);
- RANGECHK(SCTP_BASE_SYSCTL(sctp_blackhole), SCTPCTL_BLACKHOLE_MIN, SCTPCTL_BLACKHOLE_MAX);
+ error = EINVAL;
+ } else {
+ SCTP_BASE_SYSCTL(sctp_auth_disable) = new;
+ }
+ }
+ return (error);
+}
-#ifdef SCTP_DEBUG
- RANGECHK(SCTP_BASE_SYSCTL(sctp_debug_on), SCTPCTL_DEBUG_MIN, SCTPCTL_DEBUG_MAX);
+static int
+sctp_sysctl_handle_asconf(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ uint32_t new;
+
+ new = SCTP_BASE_SYSCTL(sctp_asconf_enable);
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if ((error == 0) &&
+ (req->newptr != NULL)) {
+#if (SCTPCTL_ASCONF_ENABLE_MIN == 0)
+ if ((new > SCTPCTL_ASCONF_ENABLE_MAX) ||
+ ((new == 1) && (SCTP_BASE_SYSCTL(sctp_auth_disable) == 1))) {
+#else
+ if ((new < SCTPCTL_ASCONF_ENABLE_MIN) ||
+ (new > SCTPCTL_ASCONF_ENABLE_MAX) ||
+ ((new == 1) && (SCTP_BASE_SYSCTL(sctp_auth_disable) == 1))) {
#endif
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
- RANGECHK(SCTP_BASE_SYSCTL(sctp_output_unlocked), SCTPCTL_OUTPUT_UNLOCKED_MIN, SCTPCTL_OUTPUT_UNLOCKED_MAX);
-#endif
+ error = EINVAL;
+ } else {
+ SCTP_BASE_SYSCTL(sctp_asconf_enable) = new;
+ }
}
return (error);
}
-#if defined(__FreeBSD__) && defined(SMP) && defined(SCTP_USE_PERCPU_STAT)
static int
-sysctl_stat_get(SYSCTL_HANDLER_ARGS)
+sctp_sysctl_handle_stats(SYSCTL_HANDLER_ARGS)
{
- int cpu, error;
- struct sctpstat sb, *sarry, *cpin = NULL;
+ int error;
- if ((req->newptr) && (req->newlen == sizeof(struct sctpstat))) {
- /*
- * User wants us to clear or at least reset the counters to
- * the specified values.
- */
- cpin = (struct sctpstat *)req->newptr;
- } else if (req->newptr) {
- /* Must be a stat structure */
+#if defined(SMP) && defined(SCTP_USE_PERCPU_STAT)
+ struct sctpstat *sarry;
+ struct sctpstat sb;
+ int cpu;
+
+#endif
+ struct sctpstat sb_temp;
+
+ if ((req->newptr != NULL) &&
+ (req->newlen != sizeof(struct sctpstat))) {
return (EINVAL);
}
+ memset(&sb_temp, 0, sizeof(struct sctpstat));
+
+ if (req->newptr != NULL) {
+ error = SYSCTL_IN(req, &sb_temp, sizeof(struct sctpstat));
+ if (error != 0) {
+ return (error);
+ }
+ }
+#if defined(SMP) && defined(SCTP_USE_PERCPU_STAT)
memset(&sb, 0, sizeof(sb));
for (cpu = 0; cpu < mp_maxid; cpu++) {
sarry = &SCTP_BASE_STATS[cpu];
@@ -818,20 +801,36 @@
sb.sctps_send_burst_avoid += sarry->sctps_send_burst_avoid;
sb.sctps_send_cwnd_avoid += sarry->sctps_send_cwnd_avoid;
sb.sctps_fwdtsn_map_over += sarry->sctps_fwdtsn_map_over;
- if (cpin) {
- memcpy(sarry, cpin, sizeof(struct sctpstat));
+ if (req->newptr != NULL) {
+ memcpy(sarry, &sb_temp, sizeof(struct sctpstat));
}
}
- error = SYSCTL_OUT(req, &sb, sizeof(sb));
+ error = SYSCTL_OUT(req, &sb, sizeof(struct sctpstat));
+#else
+ error = SYSCTL_OUT(req, &SCTP_BASE_STATS, sizeof(struct sctpstat));
+ if (error != 0) {
+ return (error);
+ }
+ if (req->newptr != NULL) {
+ memcpy(&SCTP_BASE_STATS, &sb_temp, sizeof(struct sctpstat));
+ }
+#endif
return (error);
}
-#endif
-
#if defined(SCTP_LOCAL_TRACE_BUF)
static int
-sysctl_sctp_cleartrace(SYSCTL_HANDLER_ARGS)
+sctp_sysctl_handle_trace_log(SYSCTL_HANDLER_ARGS)
{
+ int error;
+
+ error = SYSCTL_OUT(req, &SCTP_BASE_SYSCTL(sctp_log), sizeof(struct sctp_log));
+ return (error);
+}
+
+static int
+sctp_sysctl_handle_trace_log_clear(SYSCTL_HANDLER_ARGS)
+{
int error = 0;
memset(&SCTP_BASE_SYSCTL(sctp_log), 0, sizeof(struct sctp_log));
@@ -840,309 +839,119 @@
#endif
+#define SCTP_UINT_SYSCTL(mib_name, var_name, prefix) \
+ static int \
+ sctp_sysctl_handle_##mib_name(SYSCTL_HANDLER_ARGS) \
+ { \
+ int error; \
+ uint32_t new; \
+ \
+ new = SCTP_BASE_SYSCTL(var_name); \
+ error = sysctl_handle_int(oidp, &new, 0, req); \
+ if ((error == 0) && (req->newptr != NULL)) { \
+ if ((new < prefix##_MIN) || \
+ (new > prefix##_MAX)) { \
+ error = EINVAL; \
+ } else { \
+ SCTP_BASE_SYSCTL(var_name) = new; \
+ } \
+ } \
+ return (error); \
+ } \
+ SYSCTL_PROC(_net_inet_sctp, OID_AUTO, mib_name, \
+ CTLFLAG_VNET|CTLTYPE_UINT|CTLFLAG_RW, NULL, 0, \
+ sctp_sysctl_handle_##mib_name, "UI", prefix##_DESC);
/*
* sysctl definitions
*/
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, sendspace, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_sendspace), 0, sysctl_sctp_check, "IU",
- SCTPCTL_MAXDGRAM_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, recvspace, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_recvspace), 0, sysctl_sctp_check, "IU",
- SCTPCTL_RECVSPACE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, auto_asconf, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_auto_asconf), 0, sysctl_sctp_check, "IU",
- SCTPCTL_AUTOASCONF_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, ecn_enable, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_ecn_enable), 0, sysctl_sctp_check, "IU",
- SCTPCTL_ECN_ENABLE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, strict_sacks, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_strict_sacks), 0, sysctl_sctp_check, "IU",
- SCTPCTL_STRICT_SACKS_DESC);
-
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, peer_chkoh, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_peer_chunk_oh), 0, sysctl_sctp_check, "IU",
- SCTPCTL_PEER_CHKOH_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, maxburst, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_max_burst_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_MAXBURST_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, fr_maxburst, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_fr_max_burst_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_FRMAXBURST_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, maxchunks, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue), 0, sysctl_sctp_check, "IU",
- SCTPCTL_MAXCHUNKS_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, tcbhashsize, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_hashtblsize), 0, sysctl_sctp_check, "IU",
- SCTPCTL_TCBHASHSIZE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, pcbhashsize, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_pcbtblsize), 0, sysctl_sctp_check, "IU",
- SCTPCTL_PCBHASHSIZE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, min_split_point, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_min_split_point), 0, sysctl_sctp_check, "IU",
- SCTPCTL_MIN_SPLIT_POINT_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, chunkscale, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_chunkscale), 0, sysctl_sctp_check, "IU",
- SCTPCTL_CHUNKSCALE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, delayed_sack_time, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_delayed_sack_time_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_DELAYED_SACK_TIME_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, sack_freq, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_sack_freq_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_SACK_FREQ_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, sys_resource, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_system_free_resc_limit), 0, sysctl_sctp_check, "IU",
- SCTPCTL_SYS_RESOURCE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, asoc_resource, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_asoc_free_resc_limit), 0, sysctl_sctp_check, "IU",
- SCTPCTL_ASOC_RESOURCE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, heartbeat_interval, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_heartbeat_interval_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_HEARTBEAT_INTERVAL_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, pmtu_raise_time, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_pmtu_raise_time_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_PMTU_RAISE_TIME_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, shutdown_guard_time, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_shutdown_guard_time_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_SHUTDOWN_GUARD_TIME_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, secret_lifetime, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_secret_lifetime_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_SECRET_LIFETIME_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, rto_max, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_rto_max_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_RTO_MAX_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, rto_min, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_rto_min_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_RTO_MIN_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, rto_initial, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_rto_initial_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_RTO_INITIAL_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, init_rto_max, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_init_rto_max_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_INIT_RTO_MAX_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, valid_cookie_life, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_valid_cookie_life_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_VALID_COOKIE_LIFE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, init_rtx_max, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_init_rtx_max_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_INIT_RTX_MAX_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, assoc_rtx_max, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_assoc_rtx_max_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_ASSOC_RTX_MAX_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, path_rtx_max, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_path_rtx_max_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_PATH_RTX_MAX_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, path_pf_threshold, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_path_pf_threshold), 0, sysctl_sctp_check, "IU",
- SCTPCTL_PATH_PF_THRESHOLD_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, add_more_on_output, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_add_more_threshold), 0, sysctl_sctp_check, "IU",
- SCTPCTL_ADD_MORE_ON_OUTPUT_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, outgoing_streams, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_nr_outgoing_streams_default), 0, sysctl_sctp_check, "IU",
- SCTPCTL_OUTGOING_STREAMS_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, cmt_on_off, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_cmt_on_off), 0, sysctl_sctp_check, "IU",
- SCTPCTL_CMT_ON_OFF_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, nr_sack_on_off, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_nr_sack_on_off), 0, sysctl_sctp_check, "IU",
- SCTPCTL_NR_SACK_ON_OFF_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, cmt_use_dac, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_cmt_use_dac), 0, sysctl_sctp_check, "IU",
- SCTPCTL_CMT_USE_DAC_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, cwnd_maxburst, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst), 0, sysctl_sctp_check, "IU",
- SCTPCTL_CWND_MAXBURST_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, asconf_auth_nochk, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_asconf_auth_nochk), 0, sysctl_sctp_check, "IU",
- SCTPCTL_ASCONF_AUTH_NOCHK_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, auth_disable, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_auth_disable), 0, sysctl_sctp_check, "IU",
- SCTPCTL_AUTH_DISABLE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, nat_friendly, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_nat_friendly), 0, sysctl_sctp_check, "IU",
- SCTPCTL_NAT_FRIENDLY_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, abc_l_var, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_L2_abc_variable), 0, sysctl_sctp_check, "IU",
- SCTPCTL_ABC_L_VAR_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, max_chained_mbufs, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_mbuf_threshold_count), 0, sysctl_sctp_check, "IU",
- SCTPCTL_MAX_CHAINED_MBUFS_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, do_sctp_drain, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_do_drain), 0, sysctl_sctp_check, "IU",
- SCTPCTL_DO_SCTP_DRAIN_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, hb_max_burst, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_hb_maxburst), 0, sysctl_sctp_check, "IU",
- SCTPCTL_HB_MAX_BURST_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, abort_at_limit, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_abort_if_one_2_one_hits_limit), 0, sysctl_sctp_check, "IU",
- SCTPCTL_ABORT_AT_LIMIT_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, strict_data_order, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_strict_data_order), 0, sysctl_sctp_check, "IU",
- SCTPCTL_STRICT_DATA_ORDER_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, min_residual, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_min_residual), 0, sysctl_sctp_check, "IU",
- SCTPCTL_MIN_RESIDUAL_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, max_retran_chunk, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_max_retran_chunk), 0, sysctl_sctp_check, "IU",
- SCTPCTL_MAX_RETRAN_CHUNK_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, log_level, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_logging_level), 0, sysctl_sctp_check, "IU",
- SCTPCTL_LOGGING_LEVEL_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, default_cc_module, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_default_cc_module), 0, sysctl_sctp_check, "IU",
- SCTPCTL_DEFAULT_CC_MODULE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, default_ss_module, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_default_ss_module), 0, sysctl_sctp_check, "IU",
- SCTPCTL_DEFAULT_SS_MODULE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, default_frag_interleave, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_default_frag_interleave), 0, sysctl_sctp_check, "IU",
- SCTPCTL_DEFAULT_FRAG_INTERLEAVE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, mobility_base, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_mobility_base), 0, sysctl_sctp_check, "IU",
- SCTPCTL_MOBILITY_BASE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, mobility_fasthandoff, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_mobility_fasthandoff), 0, sysctl_sctp_check, "IU",
- SCTPCTL_MOBILITY_FASTHANDOFF_DESC);
-
+SCTP_UINT_SYSCTL(sendspace, sctp_sendspace, SCTPCTL_MAXDGRAM)
+SCTP_UINT_SYSCTL(recvspace, sctp_recvspace, SCTPCTL_RECVSPACE)
+SCTP_UINT_SYSCTL(auto_asconf, sctp_auto_asconf, SCTPCTL_AUTOASCONF)
+SCTP_UINT_SYSCTL(ecn_enable, sctp_ecn_enable, SCTPCTL_ECN_ENABLE)
+SCTP_UINT_SYSCTL(pr_enable, sctp_pr_enable, SCTPCTL_PR_ENABLE)
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, auth_disable, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW,
+ NULL, 0, sctp_sysctl_handle_auth, "IU", SCTPCTL_AUTH_DISABLE_DESC);
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, asconf_enable, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW,
+ NULL, 0, sctp_sysctl_handle_asconf, "IU", SCTPCTL_ASCONF_ENABLE_DESC);
+SCTP_UINT_SYSCTL(reconfig_enable, sctp_reconfig_enable, SCTPCTL_RECONFIG_ENABLE)
+SCTP_UINT_SYSCTL(nr_sack_on_off, sctp_nrsack_enable, SCTPCTL_NRSACK_ENABLE)
+SCTP_UINT_SYSCTL(pktdrop_enable, sctp_pktdrop_enable, SCTPCTL_PKTDROP_ENABLE)
+SCTP_UINT_SYSCTL(strict_sacks, sctp_strict_sacks, SCTPCTL_STRICT_SACKS)
+SCTP_UINT_SYSCTL(peer_chkoh, sctp_peer_chunk_oh, SCTPCTL_PEER_CHKOH)
+SCTP_UINT_SYSCTL(maxburst, sctp_max_burst_default, SCTPCTL_MAXBURST)
+SCTP_UINT_SYSCTL(fr_maxburst, sctp_fr_max_burst_default, SCTPCTL_FRMAXBURST)
+SCTP_UINT_SYSCTL(maxchunks, sctp_max_chunks_on_queue, SCTPCTL_MAXCHUNKS)
+SCTP_UINT_SYSCTL(tcbhashsize, sctp_hashtblsize, SCTPCTL_TCBHASHSIZE)
+SCTP_UINT_SYSCTL(pcbhashsize, sctp_pcbtblsize, SCTPCTL_PCBHASHSIZE)
+SCTP_UINT_SYSCTL(min_split_point, sctp_min_split_point, SCTPCTL_MIN_SPLIT_POINT)
+SCTP_UINT_SYSCTL(chunkscale, sctp_chunkscale, SCTPCTL_CHUNKSCALE)
+SCTP_UINT_SYSCTL(delayed_sack_time, sctp_delayed_sack_time_default, SCTPCTL_DELAYED_SACK_TIME)
+SCTP_UINT_SYSCTL(sack_freq, sctp_sack_freq_default, SCTPCTL_SACK_FREQ)
+SCTP_UINT_SYSCTL(sys_resource, sctp_system_free_resc_limit, SCTPCTL_SYS_RESOURCE)
+SCTP_UINT_SYSCTL(asoc_resource, sctp_asoc_free_resc_limit, SCTPCTL_ASOC_RESOURCE)
+SCTP_UINT_SYSCTL(heartbeat_interval, sctp_heartbeat_interval_default, SCTPCTL_HEARTBEAT_INTERVAL)
+SCTP_UINT_SYSCTL(pmtu_raise_time, sctp_pmtu_raise_time_default, SCTPCTL_PMTU_RAISE_TIME)
+SCTP_UINT_SYSCTL(shutdown_guard_time, sctp_shutdown_guard_time_default, SCTPCTL_SHUTDOWN_GUARD_TIME)
+SCTP_UINT_SYSCTL(secret_lifetime, sctp_secret_lifetime_default, SCTPCTL_SECRET_LIFETIME)
+SCTP_UINT_SYSCTL(rto_max, sctp_rto_max_default, SCTPCTL_RTO_MAX)
+SCTP_UINT_SYSCTL(rto_min, sctp_rto_min_default, SCTPCTL_RTO_MIN)
+SCTP_UINT_SYSCTL(rto_initial, sctp_rto_initial_default, SCTPCTL_RTO_INITIAL)
+SCTP_UINT_SYSCTL(init_rto_max, sctp_init_rto_max_default, SCTPCTL_INIT_RTO_MAX)
+SCTP_UINT_SYSCTL(valid_cookie_life, sctp_valid_cookie_life_default, SCTPCTL_VALID_COOKIE_LIFE)
+SCTP_UINT_SYSCTL(init_rtx_max, sctp_init_rtx_max_default, SCTPCTL_INIT_RTX_MAX)
+SCTP_UINT_SYSCTL(assoc_rtx_max, sctp_assoc_rtx_max_default, SCTPCTL_ASSOC_RTX_MAX)
+SCTP_UINT_SYSCTL(path_rtx_max, sctp_path_rtx_max_default, SCTPCTL_PATH_RTX_MAX)
+SCTP_UINT_SYSCTL(path_pf_threshold, sctp_path_pf_threshold, SCTPCTL_PATH_PF_THRESHOLD)
+SCTP_UINT_SYSCTL(add_more_on_output, sctp_add_more_threshold, SCTPCTL_ADD_MORE_ON_OUTPUT)
+SCTP_UINT_SYSCTL(incoming_streams, sctp_nr_incoming_streams_default, SCTPCTL_INCOMING_STREAMS)
+SCTP_UINT_SYSCTL(outgoing_streams, sctp_nr_outgoing_streams_default, SCTPCTL_OUTGOING_STREAMS)
+SCTP_UINT_SYSCTL(cmt_on_off, sctp_cmt_on_off, SCTPCTL_CMT_ON_OFF)
+SCTP_UINT_SYSCTL(cmt_use_dac, sctp_cmt_use_dac, SCTPCTL_CMT_USE_DAC)
+SCTP_UINT_SYSCTL(cwnd_maxburst, sctp_use_cwnd_based_maxburst, SCTPCTL_CWND_MAXBURST)
+SCTP_UINT_SYSCTL(nat_friendly, sctp_nat_friendly, SCTPCTL_NAT_FRIENDLY)
+SCTP_UINT_SYSCTL(abc_l_var, sctp_L2_abc_variable, SCTPCTL_ABC_L_VAR)
+SCTP_UINT_SYSCTL(max_chained_mbufs, sctp_mbuf_threshold_count, SCTPCTL_MAX_CHAINED_MBUFS)
+SCTP_UINT_SYSCTL(do_sctp_drain, sctp_do_drain, SCTPCTL_DO_SCTP_DRAIN)
+SCTP_UINT_SYSCTL(hb_max_burst, sctp_hb_maxburst, SCTPCTL_HB_MAX_BURST)
+SCTP_UINT_SYSCTL(abort_at_limit, sctp_abort_if_one_2_one_hits_limit, SCTPCTL_ABORT_AT_LIMIT)
+SCTP_UINT_SYSCTL(strict_data_order, sctp_strict_data_order, SCTPCTL_STRICT_DATA_ORDER)
+SCTP_UINT_SYSCTL(min_residual, sctp_min_residual, SCTPCTL_MIN_RESIDUAL)
+SCTP_UINT_SYSCTL(max_retran_chunk, sctp_max_retran_chunk, SCTPCTL_MAX_RETRAN_CHUNK)
+SCTP_UINT_SYSCTL(log_level, sctp_logging_level, SCTPCTL_LOGGING_LEVEL)
+SCTP_UINT_SYSCTL(default_cc_module, sctp_default_cc_module, SCTPCTL_DEFAULT_CC_MODULE)
+SCTP_UINT_SYSCTL(default_ss_module, sctp_default_ss_module, SCTPCTL_DEFAULT_SS_MODULE)
+SCTP_UINT_SYSCTL(default_frag_interleave, sctp_default_frag_interleave, SCTPCTL_DEFAULT_FRAG_INTERLEAVE)
+SCTP_UINT_SYSCTL(mobility_base, sctp_mobility_base, SCTPCTL_MOBILITY_BASE)
+SCTP_UINT_SYSCTL(mobility_fasthandoff, sctp_mobility_fasthandoff, SCTPCTL_MOBILITY_FASTHANDOFF)
#if defined(SCTP_LOCAL_TRACE_BUF)
-SYSCTL_VNET_STRUCT(_net_inet_sctp, OID_AUTO, log, CTLFLAG_RD,
- &SCTP_BASE_SYSCTL(sctp_log), sctp_log,
- "SCTP logging (struct sctp_log)");
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, clear_trace, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_log), 0, sysctl_sctp_cleartrace, "IU",
- "Clear SCTP Logging buffer");
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, log, CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_RD,
+ NULL, 0, sctp_sysctl_handle_trace_log, "S,sctplog", "SCTP logging (struct sctp_log)");
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, clear_trace, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW,
+ NULL, 0, sctp_sysctl_handle_trace_log_clear, "IU", "Clear SCTP Logging buffer");
#endif
-
-/* XXX: Remove the #if after tunneling over IPv6 works also on FreeBSD. */
-#if !defined(__FreeBSD__) || defined(INET)
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, udp_tunneling_port, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_udp_tunneling_port), 0, sysctl_sctp_udp_tunneling_check, "IU",
- SCTPCTL_UDP_TUNNELING_PORT_DESC);
-#endif
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, enable_sack_immediately, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_enable_sack_immediately), 0, sysctl_sctp_check, "IU",
- SCTPCTL_SACK_IMMEDIATELY_ENABLE_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, nat_friendly_init, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_inits_include_nat_friendly), 0, sysctl_sctp_check, "IU",
- SCTPCTL_NAT_FRIENDLY_INITS_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, vtag_time_wait, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_vtag_time_wait), 0, sysctl_sctp_check, "IU",
- SCTPCTL_TIME_WAIT_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, buffer_splitting, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_buffer_splitting), 0, sysctl_sctp_check, "IU",
- SCTPCTL_BUFFER_SPLITTING_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, initial_cwnd, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_initial_cwnd), 0, sysctl_sctp_check, "IU",
- SCTPCTL_INITIAL_CWND_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, rttvar_bw, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_rttvar_bw), 0, sysctl_sctp_check, "IU",
- SCTPCTL_RTTVAR_BW_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, rttvar_rtt, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_rttvar_rtt), 0, sysctl_sctp_check, "IU",
- SCTPCTL_RTTVAR_RTT_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, rttvar_eqret, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_rttvar_eqret), 0, sysctl_sctp_check, "IU",
- SCTPCTL_RTTVAR_EQRET_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, rttvar_steady_step, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_steady_step), 0, sysctl_sctp_check, "IU",
- SCTPCTL_RTTVAR_STEADYS_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, use_dcccecn, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_use_dccc_ecn), 0, sysctl_sctp_check, "IU",
- SCTPCTL_RTTVAR_DCCCECN_DESC);
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, blackhole, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_blackhole), 0, sysctl_sctp_check, "IU",
- SCTPCTL_BLACKHOLE_DESC);
-
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, udp_tunneling_port, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW,
+ NULL, 0, sctp_sysctl_handle_udp_tunneling, "IU", SCTPCTL_UDP_TUNNELING_PORT_DESC);
+SCTP_UINT_SYSCTL(enable_sack_immediately, sctp_enable_sack_immediately, SCTPCTL_SACK_IMMEDIATELY_ENABLE)
+SCTP_UINT_SYSCTL(nat_friendly_init, sctp_inits_include_nat_friendly, SCTPCTL_NAT_FRIENDLY_INITS)
+SCTP_UINT_SYSCTL(vtag_time_wait, sctp_vtag_time_wait, SCTPCTL_TIME_WAIT)
+SCTP_UINT_SYSCTL(buffer_splitting, sctp_buffer_splitting, SCTPCTL_BUFFER_SPLITTING)
+SCTP_UINT_SYSCTL(initial_cwnd, sctp_initial_cwnd, SCTPCTL_INITIAL_CWND)
+SCTP_UINT_SYSCTL(rttvar_bw, sctp_rttvar_bw, SCTPCTL_RTTVAR_BW)
+SCTP_UINT_SYSCTL(rttvar_rtt, sctp_rttvar_rtt, SCTPCTL_RTTVAR_RTT)
+SCTP_UINT_SYSCTL(rttvar_eqret, sctp_rttvar_eqret, SCTPCTL_RTTVAR_EQRET)
+SCTP_UINT_SYSCTL(rttvar_steady_step, sctp_steady_step, SCTPCTL_RTTVAR_STEADYS)
+SCTP_UINT_SYSCTL(use_dcccecn, sctp_use_dccc_ecn, SCTPCTL_RTTVAR_DCCCECN)
+SCTP_UINT_SYSCTL(blackhole, sctp_blackhole, SCTPCTL_BLACKHOLE)
+SCTP_UINT_SYSCTL(diag_info_code, sctp_diag_info_code, SCTPCTL_DIAG_INFO_CODE)
#ifdef SCTP_DEBUG
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, debug, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_debug_on), 0, sysctl_sctp_check, "IU",
- SCTPCTL_DEBUG_DESC);
+SCTP_UINT_SYSCTL(debug, sctp_debug_on, SCTPCTL_DEBUG)
#endif
-
-
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, output_unlocked, CTLTYPE_UINT | CTLFLAG_RW,
- &SCTP_BASE_SYSCTL(sctp_output_unlocked), 0, sysctl_sctp_check, "IU",
- SCTPCTL_OUTPUT_UNLOCKED_DESC);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+SCTP_UINT_SYSCTL(output_unlocked, sctp_output_unlocked, SCTPCTL_OUTPUT_UNLOCKED)
#endif
-
-#if defined(__FreeBSD__) && defined(SMP) && defined(SCTP_USE_PERCPU_STAT)
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, stats,
- CTLTYPE_STRUCT | CTLFLAG_RW,
- 0, 0, sysctl_stat_get, "S,sctpstat",
- "SCTP statistics (struct sctp_stat)");
-#else
-SYSCTL_VNET_STRUCT(_net_inet_sctp, OID_AUTO, stats, CTLFLAG_RW,
- &SCTP_BASE_STATS_SYSCTL, sctpstat,
- "SCTP statistics (struct sctp_stat)");
-#endif
-
-SYSCTL_VNET_PROC(_net_inet_sctp, OID_AUTO, assoclist, CTLTYPE_OPAQUE | CTLFLAG_RD,
- 0, 0, sctp_assoclist,
- "S,xassoc", "List of active SCTP associations");
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, stats, CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_RW,
+ NULL, 0, sctp_sysctl_handle_stats, "S,sctpstat", "SCTP statistics (struct sctp_stat)");
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, assoclist, CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_RD,
+ NULL, 0, sctp_sysctl_handle_assoclist, "S,xassoc", "List of active SCTP associations");
Modified: trunk/sys/netinet/sctp_sysctl.h
===================================================================
--- trunk/sys/netinet/sctp_sysctl.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_sysctl.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_sysctl.h 238249 2012-07-08 15:37:58Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_sysctl.h 294151 2016-01-16 14:46:27Z tuexen $");
#ifndef _NETINET_SCTP_SYSCTL_H_
#define _NETINET_SCTP_SYSCTL_H_
@@ -46,6 +46,12 @@
uint32_t sctp_auto_asconf;
uint32_t sctp_multiple_asconfs;
uint32_t sctp_ecn_enable;
+ uint32_t sctp_pr_enable;
+ uint32_t sctp_auth_disable;
+ uint32_t sctp_asconf_enable;
+ uint32_t sctp_reconfig_enable;
+ uint32_t sctp_nrsack_enable;
+ uint32_t sctp_pktdrop_enable;
uint32_t sctp_fr_max_burst_default;
uint32_t sctp_strict_sacks;
uint32_t sctp_peer_chunk_oh;
@@ -73,14 +79,11 @@
uint32_t sctp_path_rtx_max_default;
uint32_t sctp_path_pf_threshold;
uint32_t sctp_add_more_threshold;
+ uint32_t sctp_nr_incoming_streams_default;
uint32_t sctp_nr_outgoing_streams_default;
uint32_t sctp_cmt_on_off;
uint32_t sctp_cmt_use_dac;
- /* EY 5/5/08 - nr_sack flag variable */
- uint32_t sctp_nr_sack_on_off;
uint32_t sctp_use_cwnd_based_maxburst;
- uint32_t sctp_asconf_auth_nochk;
- uint32_t sctp_auth_disable;
uint32_t sctp_nat_friendly;
uint32_t sctp_L2_abc_variable;
uint32_t sctp_mbuf_threshold_count;
@@ -104,6 +107,7 @@
uint32_t sctp_rttvar_eqret;
uint32_t sctp_steady_step;
uint32_t sctp_use_dccc_ecn;
+ uint32_t sctp_diag_info_code;
#if defined(SCTP_LOCAL_TRACE_BUF)
struct sctp_log sctp_log;
#endif
@@ -140,7 +144,7 @@
#define SCTPCTL_AUTOASCONF_DESC "Enable SCTP Auto-ASCONF"
#define SCTPCTL_AUTOASCONF_MIN 0
#define SCTPCTL_AUTOASCONF_MAX 1
-#define SCTPCTL_AUTOASCONF_DEFAULT SCTP_DEFAULT_AUTO_ASCONF
+#define SCTPCTL_AUTOASCONF_DEFAULT 1
/* autoasconf: Enable SCTP Auto-ASCONF */
#define SCTPCTL_MULTIPLEASCONFS_DESC "Enable SCTP Muliple-ASCONFs"
@@ -154,6 +158,42 @@
#define SCTPCTL_ECN_ENABLE_MAX 1
#define SCTPCTL_ECN_ENABLE_DEFAULT 1
+/* pr_enable: Enable PR-SCTP */
+#define SCTPCTL_PR_ENABLE_DESC "Enable PR-SCTP"
+#define SCTPCTL_PR_ENABLE_MIN 0
+#define SCTPCTL_PR_ENABLE_MAX 1
+#define SCTPCTL_PR_ENABLE_DEFAULT 1
+
+/* auth_disable: Disable SCTP AUTH function */
+#define SCTPCTL_AUTH_DISABLE_DESC "Disable SCTP AUTH function"
+#define SCTPCTL_AUTH_DISABLE_MIN 0
+#define SCTPCTL_AUTH_DISABLE_MAX 1
+#define SCTPCTL_AUTH_DISABLE_DEFAULT 0
+
+/* asconf_enable: Enable SCTP ASCONF */
+#define SCTPCTL_ASCONF_ENABLE_DESC "Enable SCTP ASCONF"
+#define SCTPCTL_ASCONF_ENABLE_MIN 0
+#define SCTPCTL_ASCONF_ENABLE_MAX 1
+#define SCTPCTL_ASCONF_ENABLE_DEFAULT 1
+
+/* reconfig_enable: Enable SCTP RE-CONFIG */
+#define SCTPCTL_RECONFIG_ENABLE_DESC "Enable SCTP RE-CONFIG"
+#define SCTPCTL_RECONFIG_ENABLE_MIN 0
+#define SCTPCTL_RECONFIG_ENABLE_MAX 1
+#define SCTPCTL_RECONFIG_ENABLE_DEFAULT 1
+
+/* nrsack_enable: Enable NR_SACK */
+#define SCTPCTL_NRSACK_ENABLE_DESC "Enable SCTP NR-SACK"
+#define SCTPCTL_NRSACK_ENABLE_MIN 0
+#define SCTPCTL_NRSACK_ENABLE_MAX 1
+#define SCTPCTL_NRSACK_ENABLE_DEFAULT 0
+
+/* pktdrop_enable: Enable SCTP Packet Drop Reports */
+#define SCTPCTL_PKTDROP_ENABLE_DESC "Enable SCTP PKTDROP"
+#define SCTPCTL_PKTDROP_ENABLE_MIN 0
+#define SCTPCTL_PKTDROP_ENABLE_MAX 1
+#define SCTPCTL_PKTDROP_ENABLE_DEFAULT 0
+
/* strict_sacks: Enable SCTP Strict SACK checking */
#define SCTPCTL_STRICT_SACKS_DESC "Enable SCTP Strict SACK checking"
#define SCTPCTL_STRICT_SACKS_MIN 0
@@ -252,10 +292,10 @@
#define SCTPCTL_PMTU_RAISE_TIME_DEFAULT SCTP_DEF_PMTU_RAISE_SEC
/* shutdown_guard_time: Default shutdown guard timer in seconds */
-#define SCTPCTL_SHUTDOWN_GUARD_TIME_DESC "Default shutdown guard timer in seconds"
+#define SCTPCTL_SHUTDOWN_GUARD_TIME_DESC "Shutdown guard timer in seconds (0 means 5 times RTO.Max)"
#define SCTPCTL_SHUTDOWN_GUARD_TIME_MIN 0
#define SCTPCTL_SHUTDOWN_GUARD_TIME_MAX 0xFFFFFFFF
-#define SCTPCTL_SHUTDOWN_GUARD_TIME_DEFAULT SCTP_DEF_MAX_SHUTDOWN_SEC
+#define SCTPCTL_SHUTDOWN_GUARD_TIME_DEFAULT 0
/* secret_lifetime: Default secret lifetime in seconds */
#define SCTPCTL_SECRET_LIFETIME_DESC "Default secret lifetime in seconds"
@@ -323,6 +363,12 @@
#define SCTPCTL_ADD_MORE_ON_OUTPUT_MAX 0xFFFFFFFF
#define SCTPCTL_ADD_MORE_ON_OUTPUT_DEFAULT SCTP_DEFAULT_ADD_MORE
+/* incoming_streams: Default number of incoming streams */
+#define SCTPCTL_INCOMING_STREAMS_DESC "Default number of incoming streams"
+#define SCTPCTL_INCOMING_STREAMS_MIN 1
+#define SCTPCTL_INCOMING_STREAMS_MAX 65535
+#define SCTPCTL_INCOMING_STREAMS_DEFAULT SCTP_ISTREAM_INITIAL
+
/* outgoing_streams: Default number of outgoing streams */
#define SCTPCTL_OUTGOING_STREAMS_DESC "Default number of outgoing streams"
#define SCTPCTL_OUTGOING_STREAMS_MIN 1
@@ -335,12 +381,6 @@
#define SCTPCTL_CMT_ON_OFF_MAX SCTP_CMT_MAX
#define SCTPCTL_CMT_ON_OFF_DEFAULT SCTP_CMT_OFF
-/* EY - nr_sack_on_off: NR_SACK on/off flag */
-#define SCTPCTL_NR_SACK_ON_OFF_DESC "NR_SACK on/off flag"
-#define SCTPCTL_NR_SACK_ON_OFF_MIN 0
-#define SCTPCTL_NR_SACK_ON_OFF_MAX 1
-#define SCTPCTL_NR_SACK_ON_OFF_DEFAULT 0
-
/* cmt_use_dac: CMT DAC on/off flag */
#define SCTPCTL_CMT_USE_DAC_DESC "CMT DAC on/off flag"
#define SCTPCTL_CMT_USE_DAC_MIN 0
@@ -353,18 +393,6 @@
#define SCTPCTL_CWND_MAXBURST_MAX 1
#define SCTPCTL_CWND_MAXBURST_DEFAULT 1
-/* asconf_auth_nochk: Disable SCTP ASCONF AUTH requirement */
-#define SCTPCTL_ASCONF_AUTH_NOCHK_DESC "Disable SCTP ASCONF AUTH requirement"
-#define SCTPCTL_ASCONF_AUTH_NOCHK_MIN 0
-#define SCTPCTL_ASCONF_AUTH_NOCHK_MAX 1
-#define SCTPCTL_ASCONF_AUTH_NOCHK_DEFAULT 0
-
-/* auth_disable: Disable SCTP AUTH function */
-#define SCTPCTL_AUTH_DISABLE_DESC "Disable SCTP AUTH function"
-#define SCTPCTL_AUTH_DISABLE_MIN 0
-#define SCTPCTL_AUTH_DISABLE_MAX 1
-#define SCTPCTL_AUTH_DISABLE_DEFAULT 0
-
/* nat_friendly: SCTP NAT friendly operation */
#define SCTPCTL_NAT_FRIENDLY_DESC "SCTP NAT friendly operation"
#define SCTPCTL_NAT_FRIENDLY_MIN 0
@@ -447,19 +475,19 @@
#define SCTPCTL_MOBILITY_BASE_DESC "Enable SCTP base mobility"
#define SCTPCTL_MOBILITY_BASE_MIN 0
#define SCTPCTL_MOBILITY_BASE_MAX 1
-#define SCTPCTL_MOBILITY_BASE_DEFAULT SCTP_DEFAULT_MOBILITY_BASE
+#define SCTPCTL_MOBILITY_BASE_DEFAULT 0
/* mobility_fasthandoff: Enable SCTP fast handoff support */
#define SCTPCTL_MOBILITY_FASTHANDOFF_DESC "Enable SCTP fast handoff"
#define SCTPCTL_MOBILITY_FASTHANDOFF_MIN 0
#define SCTPCTL_MOBILITY_FASTHANDOFF_MAX 1
-#define SCTPCTL_MOBILITY_FASTHANDOFF_DEFAULT SCTP_DEFAULT_MOBILITY_FASTHANDOFF
+#define SCTPCTL_MOBILITY_FASTHANDOFF_DEFAULT 0
/* Enable SCTP/UDP tunneling port */
#define SCTPCTL_UDP_TUNNELING_PORT_DESC "Set the SCTP/UDP tunneling port"
#define SCTPCTL_UDP_TUNNELING_PORT_MIN 0
#define SCTPCTL_UDP_TUNNELING_PORT_MAX 65535
-#define SCTPCTL_UDP_TUNNELING_PORT_DEFAULT SCTP_OVER_UDP_TUNNELING_PORT
+#define SCTPCTL_UDP_TUNNELING_PORT_DEFAULT 0
/* Enable sending of the SACK-IMMEDIATELY bit */
#define SCTPCTL_SACK_IMMEDIATELY_ENABLE_DESC "Enable sending of the SACK-IMMEDIATELY-bit."
@@ -523,6 +551,11 @@
#define SCTPCTL_BLACKHOLE_MAX 2
#define SCTPCTL_BLACKHOLE_DEFAULT SCTPCTL_BLACKHOLE_MIN
+#define SCTPCTL_DIAG_INFO_CODE_DESC "Diagnostic information error cause code"
+#define SCTPCTL_DIAG_INFO_CODE_MIN 0
+#define SCTPCTL_DIAG_INFO_CODE_MAX 65535
+#define SCTPCTL_DIAG_INFO_CODE_DEFAULT 0
+
#if defined(SCTP_DEBUG)
/* debug: Configure debug output */
#define SCTPCTL_DEBUG_DESC "Configure debug output"
@@ -532,7 +565,7 @@
#endif
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
#define SCTPCTL_OUTPUT_UNLOCKED_DESC "Unlock socket when sending packets down to IP."
#define SCTPCTL_OUTPUT_UNLOCKED_MIN 0
#define SCTPCTL_OUTPUT_UNLOCKED_MAX 1
Modified: trunk/sys/netinet/sctp_timer.c
===================================================================
--- trunk/sys/netinet/sctp_timer.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_timer.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_timer.c 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_timer.c 296052 2016-02-25 18:46:06Z tuexen $");
#define _IP_VHL
#include <netinet/sctp_os.h>
@@ -50,7 +50,9 @@
#include <netinet/sctp_input.h>
#include <netinet/sctp.h>
#include <netinet/sctp_uio.h>
+#if defined(INET) || defined(INET6)
#include <netinet/udp.h>
+#endif
void
@@ -84,7 +86,7 @@
asoc->sent_queue_cnt);
}
-int
+static int
sctp_threshold_management(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
struct sctp_nets *net, uint16_t threshold)
{
@@ -91,7 +93,7 @@
if (net) {
net->error_count++;
SCTPDBG(SCTP_DEBUG_TIMER4, "Error count for %p now %d thresh:%d\n",
- net, net->error_count,
+ (void *)net, net->error_count,
net->failure_threshold);
if (net->error_count > net->failure_threshold) {
/* We had a threshold failure */
@@ -109,8 +111,10 @@
net->dest_state |= SCTP_ADDR_PF;
net->last_active = sctp_get_tick_count();
sctp_send_hb(stcb, net, SCTP_SO_NOT_LOCKED);
- sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_TIMER + SCTP_LOC_3);
- sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net);
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
+ inp, stcb, net,
+ SCTP_FROM_SCTP_TIMER + SCTP_LOC_1);
+ sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net);
}
}
}
@@ -139,7 +143,7 @@
stcb->asoc.overall_error_count++;
}
SCTPDBG(SCTP_DEBUG_TIMER4, "Overall error count for %p now %d thresh:%u state:%x\n",
- &stcb->asoc, stcb->asoc.overall_error_count,
+ (void *)&stcb->asoc, stcb->asoc.overall_error_count,
(uint32_t) threshold,
((net == NULL) ? (uint32_t) 0 : (uint32_t) net->dest_state));
/*
@@ -148,24 +152,12 @@
*/
if (stcb->asoc.overall_error_count > threshold) {
/* Abort notification sends a ULP notify */
- struct mbuf *oper;
+ struct mbuf *op_err;
- oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (oper) {
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
-
- SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
- sizeof(uint32_t);
- ph = mtod(oper, struct sctp_paramhdr *);
- ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
- ph->param_length = htons(SCTP_BUF_LEN(oper));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_TIMER + SCTP_LOC_1);
- }
- inp->last_abort_code = SCTP_FROM_SCTP_TIMER + SCTP_LOC_1;
- sctp_abort_an_association(inp, stcb, oper, SCTP_SO_NOT_LOCKED);
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ "Association error counter exceeded");
+ inp->last_abort_code = SCTP_FROM_SCTP_TIMER + SCTP_LOC_2;
+ sctp_abort_an_association(inp, stcb, op_err, SCTP_SO_NOT_LOCKED);
return (1);
}
return (0);
@@ -348,7 +340,7 @@
return (NULL);
}
}
- do {
+ for (;;) {
alt = TAILQ_NEXT(mnet, sctp_next);
if (alt == NULL) {
once++;
@@ -367,7 +359,6 @@
}
alt->src_addr_selected = 0;
}
- /* sa_ignore NO_NULL_CHK */
if (((alt->dest_state & SCTP_ADDR_REACHABLE) == SCTP_ADDR_REACHABLE) &&
(alt->ro.ro_rt != NULL) &&
(!(alt->dest_state & SCTP_ADDR_UNCONFIRMED))) {
@@ -375,7 +366,7 @@
break;
}
mnet = alt;
- } while (alt != NULL);
+ }
if (alt == NULL) {
/* Case where NO insv network exists (dormant state) */
@@ -382,7 +373,7 @@
/* we rotate destinations */
once = 0;
mnet = net;
- do {
+ for (;;) {
if (mnet == NULL) {
return (TAILQ_FIRST(&stcb->asoc.nets));
}
@@ -393,8 +384,10 @@
break;
}
alt = TAILQ_FIRST(&stcb->asoc.nets);
+ if (alt == NULL) {
+ break;
+ }
}
- /* sa_ignore NO_NULL_CHK */
if ((!(alt->dest_state & SCTP_ADDR_UNCONFIRMED)) &&
(alt != net)) {
/* Found an alternate address */
@@ -401,7 +394,7 @@
break;
}
mnet = alt;
- } while (alt != NULL);
+ }
}
if (alt == NULL) {
return (net);
@@ -416,7 +409,11 @@
int num_marked, int num_abandoned)
{
if (net->RTO == 0) {
- net->RTO = stcb->asoc.minrto;
+ if (net->RTO_measured) {
+ net->RTO = stcb->asoc.minrto;
+ } else {
+ net->RTO = stcb->asoc.initial_rto;
+ }
}
net->RTO <<= 1;
if (net->RTO > stcb->asoc.maxrto) {
@@ -440,9 +437,19 @@
TAILQ_FOREACH_SAFE(chk, &asoc->sent_queue, sctp_next, nchk) {
if (SCTP_TSN_GE(asoc->last_acked_seq, chk->rec.data.TSN_seq)) {
SCTP_PRINTF("Found chk:%p tsn:%x <= last_acked_seq:%x\n",
- chk, chk->rec.data.TSN_seq, asoc->last_acked_seq);
+ (void *)chk, chk->rec.data.TSN_seq, asoc->last_acked_seq);
+ if (chk->sent != SCTP_DATAGRAM_NR_ACKED) {
+ if (asoc->strmout[chk->rec.data.stream_number].chunks_on_queues > 0) {
+ asoc->strmout[chk->rec.data.stream_number].chunks_on_queues--;
+ }
+ }
+ if ((asoc->strmout[chk->rec.data.stream_number].chunks_on_queues == 0) &&
+ (asoc->strmout[chk->rec.data.stream_number].state == SCTP_STREAM_RESET_PENDING) &&
+ TAILQ_EMPTY(&asoc->strmout[chk->rec.data.stream_number].outqueue)) {
+ asoc->trigger_reset = 1;
+ }
TAILQ_REMOVE(&asoc->sent_queue, chk, sctp_next);
- if (chk->pr_sctp_on) {
+ if (PR_SCTP_ENABLED(chk->flags)) {
if (asoc->pr_sctp_cnt != 0)
asoc->pr_sctp_cnt--;
}
@@ -451,7 +458,7 @@
sctp_free_bufspace(stcb, asoc, chk, 1);
sctp_m_freem(chk->data);
chk->data = NULL;
- if (asoc->peer_supports_prsctp && PR_SCTP_BUF_ENABLED(chk->flags)) {
+ if (asoc->prsctp_supported && PR_SCTP_BUF_ENABLED(chk->flags)) {
asoc->sent_queue_cnt_removeable--;
}
}
@@ -461,7 +468,7 @@
}
SCTP_PRINTF("after recover order is as follows\n");
TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
- SCTP_PRINTF("chk:%p TSN:%x\n", chk, chk->rec.data.TSN_seq);
+ SCTP_PRINTF("chk:%p TSN:%x\n", (void *)chk, chk->rec.data.TSN_seq);
}
}
@@ -548,7 +555,7 @@
TAILQ_FOREACH_SAFE(chk, &stcb->asoc.sent_queue, sctp_next, nchk) {
if (SCTP_TSN_GE(stcb->asoc.last_acked_seq, chk->rec.data.TSN_seq)) {
/* Strange case our list got out of order? */
- SCTP_PRINTF("Our list is out of order? last_acked:%x chk:%x",
+ SCTP_PRINTF("Our list is out of order? last_acked:%x chk:%x\n",
(unsigned int)stcb->asoc.last_acked_seq, (unsigned int)chk->rec.data.TSN_seq);
recovery_cnt++;
#ifdef INVARIANTS
@@ -606,7 +613,7 @@
continue;
}
}
- if (stcb->asoc.peer_supports_prsctp && PR_SCTP_TTL_ENABLED(chk->flags)) {
+ if (stcb->asoc.prsctp_supported && PR_SCTP_TTL_ENABLED(chk->flags)) {
/* Is it expired? */
if (timevalcmp(&now, &chk->rec.data.timetodrop, >)) {
/* Yes so drop it */
@@ -620,7 +627,7 @@
continue;
}
}
- if (stcb->asoc.peer_supports_prsctp && PR_SCTP_RTX_ENABLED(chk->flags)) {
+ if (stcb->asoc.prsctp_supported && PR_SCTP_RTX_ENABLED(chk->flags)) {
/* Has it been retransmitted tv_sec times? */
if (chk->snd_count > chk->rec.data.timetodrop.tv_sec) {
if (chk->data) {
@@ -768,7 +775,7 @@
if (audit_tf) {
SCTPDBG(SCTP_DEBUG_TIMER4,
"Audit total flight due to negative value net:%p\n",
- net);
+ (void *)net);
stcb->asoc.total_flight = 0;
stcb->asoc.total_flight_count = 0;
/* Clear all networks flight size */
@@ -776,7 +783,7 @@
lnets->flight_size = 0;
SCTPDBG(SCTP_DEBUG_TIMER4,
"Net:%p c-f cwnd:%d ssthresh:%d\n",
- lnets, lnets->cwnd, lnets->ssthresh);
+ (void *)lnets, lnets->cwnd, lnets->ssthresh);
}
TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
if (chk->sent < SCTP_DATAGRAM_RESEND) {
@@ -963,7 +970,7 @@
sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
return (0);
}
- if (stcb->asoc.peer_supports_prsctp) {
+ if (stcb->asoc.prsctp_supported) {
struct sctp_tmit_chunk *lchk;
lchk = sctp_try_advance_peer_ack_point(stcb, &stcb->asoc);
@@ -1047,24 +1054,12 @@
if (cookie == NULL) {
if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED) {
/* FOOBAR! */
- struct mbuf *oper;
+ struct mbuf *op_err;
- oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (oper) {
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
-
- SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
- sizeof(uint32_t);
- ph = mtod(oper, struct sctp_paramhdr *);
- ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
- ph->param_length = htons(SCTP_BUF_LEN(oper));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_TIMER + SCTP_LOC_3);
- }
- inp->last_abort_code = SCTP_FROM_SCTP_TIMER + SCTP_LOC_4;
- sctp_abort_an_association(inp, stcb, oper, SCTP_SO_NOT_LOCKED);
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ "Cookie timer expired, but no cookie");
+ inp->last_abort_code = SCTP_FROM_SCTP_TIMER + SCTP_LOC_3;
+ sctp_abort_an_association(inp, stcb, op_err, SCTP_SO_NOT_LOCKED);
} else {
#ifdef INVARIANTS
panic("Cookie timer expires in wrong state?");
@@ -1500,11 +1495,15 @@
}
if (net->ro._s_addr) {
mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._s_addr.sa, net->ro.ro_rt);
+#if defined(INET) || defined(INET6)
if (net->port) {
mtu -= sizeof(struct udphdr);
}
+#endif
if (mtu > next_mtu) {
net->mtu = next_mtu;
+ } else {
+ net->mtu = mtu;
}
}
}
@@ -1556,6 +1555,13 @@
/* only send SHUTDOWN 1st time thru */
struct sctp_nets *netp;
+ if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+ }
+ SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+ SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ sctp_stop_timers_for_shutdown(stcb);
if (stcb->asoc.alternate) {
netp = stcb->asoc.alternate;
} else {
@@ -1562,12 +1568,6 @@
netp = stcb->asoc.primary_destination;
}
sctp_send_shutdown(stcb, netp);
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
- SCTP_STAT_DECR_GAUGE32(sctps_currestab);
- }
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
- SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
stcb->sctp_ep, stcb,
netp);
Modified: trunk/sys/netinet/sctp_timer.h
===================================================================
--- trunk/sys/netinet/sctp_timer.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_timer.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_timer.h 237896 2012-07-01 07:59:00Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_timer.h 296052 2016-02-25 18:46:06Z tuexen $");
#ifndef _NETINET_SCTP_TIMER_H_
#define _NETINET_SCTP_TIMER_H_
@@ -47,10 +47,6 @@
struct sctp_nets *, int mode);
int
-sctp_threshold_management(struct sctp_inpcb *, struct sctp_tcb *,
- struct sctp_nets *, uint16_t);
-
-int
sctp_t3rxt_timer(struct sctp_inpcb *, struct sctp_tcb *,
struct sctp_nets *);
int
Modified: trunk/sys/netinet/sctp_uio.h
===================================================================
--- trunk/sys/netinet/sctp_uio.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_uio.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_uio.h 244524 2012-12-21 00:41:52Z delphij $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_uio.h 294180 2016-01-16 18:05:24Z tuexen $");
#ifndef _NETINET_SCTP_UIO_H_
#define _NETINET_SCTP_UIO_H_
@@ -135,20 +135,27 @@
uint16_t sinfo_flags;
uint32_t sinfo_ppid;
uint32_t sinfo_context;
- uint32_t sinfo_timetolive;
+ uint32_t sinfo_timetolive; /* should have been sinfo_pr_value */
uint32_t sinfo_tsn;
uint32_t sinfo_cumtsn;
sctp_assoc_t sinfo_assoc_id;
- uint16_t sreinfo_next_flags;
- uint16_t sreinfo_next_stream;
- uint32_t sreinfo_next_aid;
- uint32_t sreinfo_next_length;
- uint32_t sreinfo_next_ppid;
+ uint16_t serinfo_next_flags;
+ uint16_t serinfo_next_stream;
+ uint32_t serinfo_next_aid;
+ uint32_t serinfo_next_length;
+ uint32_t serinfo_next_ppid;
uint16_t sinfo_keynumber;
uint16_t sinfo_keynumber_valid;
uint8_t __reserve_pad[SCTP_ALIGN_RESV_PAD_SHORT];
};
+#define sinfo_pr_value sinfo_timetolive
+#define sreinfo_next_flags serinfo_next_flags
+#define sreinfo_next_stream serinfo_next_stream
+#define sreinfo_next_aid serinfo_next_aid
+#define sreinfo_next_length serinfo_next_length
+#define sreinfo_next_ppid serinfo_next_ppid
+
struct sctp_sndinfo {
uint16_t snd_sid;
uint16_t snd_flags;
@@ -250,18 +257,23 @@
SCTP_SACK_IMMEDIATELY)) != 0)
/* for the endpoint */
-/* The lower byte is an enumeration of PR-SCTP policies */
+/* The lower four bits is an enumeration of PR-SCTP policies */
#define SCTP_PR_SCTP_NONE 0x0000/* Reliable transfer */
#define SCTP_PR_SCTP_TTL 0x0001/* Time based PR-SCTP */
#define SCTP_PR_SCTP_BUF 0x0002/* Buffer based PR-SCTP */
#define SCTP_PR_SCTP_RTX 0x0003/* Number of retransmissions based PR-SCTP */
+#define SCTP_PR_SCTP_MAX SCTP_PR_SCTP_RTX
+#define SCTP_PR_SCTP_ALL 0x000f/* Used for aggregated stats */
#define PR_SCTP_POLICY(x) ((x) & 0x0f)
-#define PR_SCTP_ENABLED(x) (PR_SCTP_POLICY(x) != SCTP_PR_SCTP_NONE)
+#define PR_SCTP_ENABLED(x) ((PR_SCTP_POLICY(x) != SCTP_PR_SCTP_NONE) && \
+ (PR_SCTP_POLICY(x) != SCTP_PR_SCTP_ALL))
#define PR_SCTP_TTL_ENABLED(x) (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_TTL)
#define PR_SCTP_BUF_ENABLED(x) (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_BUF)
#define PR_SCTP_RTX_ENABLED(x) (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_RTX)
-#define PR_SCTP_INVALID_POLICY(x) (PR_SCTP_POLICY(x) > SCTP_PR_SCTP_RTX)
+#define PR_SCTP_INVALID_POLICY(x) (PR_SCTP_POLICY(x) > SCTP_PR_SCTP_MAX)
+#define PR_SCTP_VALID_POLICY(x) (PR_SCTP_POLICY(x) <= SCTP_PR_SCTP_MAX)
+
/* Stat's */
struct sctp_pcbinfo {
uint32_t ep_count;
@@ -663,11 +675,7 @@
#define SCTP_AUTH_HMAC_ID_RSVD 0x0000
#define SCTP_AUTH_HMAC_ID_SHA1 0x0001 /* default, mandatory */
#define SCTP_AUTH_HMAC_ID_SHA256 0x0003
-#define SCTP_AUTH_HMAC_ID_SHA224 0x0004
-#define SCTP_AUTH_HMAC_ID_SHA384 0x0005
-#define SCTP_AUTH_HMAC_ID_SHA512 0x0006
-
/* SCTP_AUTH_ACTIVE_KEY / SCTP_AUTH_DELETE_KEY */
struct sctp_authkeyid {
sctp_assoc_t scact_assoc_id;
@@ -725,6 +733,14 @@
uint16_t sue_port;
};
+struct sctp_prstatus {
+ sctp_assoc_t sprstat_assoc_id;
+ uint16_t sprstat_sid;
+ uint16_t sprstat_policy;
+ uint64_t sprstat_abandoned_unsent;
+ uint64_t sprstat_abandoned_sent;
+};
+
struct sctp_cwnd_args {
struct sctp_nets *net; /* network to *//* FIXME: LP64 issue */
uint32_t cwnd_new_value;/* cwnd in k */
@@ -1150,7 +1166,7 @@
struct xsctp_inpcb {
uint32_t last;
uint32_t flags;
- uint32_t features;
+ uint64_t features;
uint32_t total_sends;
uint32_t total_recvs;
uint32_t total_nospaces;
@@ -1158,7 +1174,12 @@
uint16_t local_port;
uint16_t qlen;
uint16_t maxqlen;
- uint32_t extra_padding[32]; /* future */
+ void *socket;
+#if defined(__LP64__)
+ uint32_t extra_padding[29]; /* future */
+#else
+ uint32_t extra_padding[30]; /* future */
+#endif
};
struct xsctp_tcb {
@@ -1216,7 +1237,8 @@
struct sctp_timeval start_time; /* sctpAssocLocalRemEntry 8 */
uint32_t rtt;
uint32_t heartbeat_interval;
- uint32_t extra_padding[31]; /* future */
+ uint32_t ssthresh;
+ uint32_t extra_padding[30]; /* future */
};
#define SCTP_MAX_LOGGING_SIZE 30000
@@ -1279,33 +1301,39 @@
int sctp_opt_info(int, sctp_assoc_t, int, void *, socklen_t *);
/* deprecated */
-ssize_t sctp_sendmsg
-(int, const void *, size_t, const struct sockaddr *,
+ssize_t
+sctp_sendmsg(int, const void *, size_t, const struct sockaddr *,
socklen_t, uint32_t, uint32_t, uint16_t, uint32_t, uint32_t);
/* deprecated */
- ssize_t sctp_send(int, const void *, size_t,
- const struct sctp_sndrcvinfo *, int);
+ssize_t
+sctp_send(int, const void *, size_t,
+ const struct sctp_sndrcvinfo *, int);
/* deprecated */
- ssize_t sctp_sendx(int, const void *, size_t, struct sockaddr *,
- int, struct sctp_sndrcvinfo *, int);
+ssize_t
+sctp_sendx(int, const void *, size_t, struct sockaddr *,
+ int, struct sctp_sndrcvinfo *, int);
/* deprecated */
- ssize_t sctp_sendmsgx(int sd, const void *, size_t, struct sockaddr *,
- int, uint32_t, uint32_t, uint16_t, uint32_t, uint32_t);
+ssize_t
+sctp_sendmsgx(int sd, const void *, size_t, struct sockaddr *,
+ int, uint32_t, uint32_t, uint16_t, uint32_t, uint32_t);
- sctp_assoc_t sctp_getassocid(int, struct sockaddr *);
+sctp_assoc_t sctp_getassocid(int, struct sockaddr *);
/* deprecated */
- ssize_t sctp_recvmsg(int, void *, size_t, struct sockaddr *, socklen_t *,
- struct sctp_sndrcvinfo *, int *);
+ssize_t
+sctp_recvmsg(int, void *, size_t, struct sockaddr *, socklen_t *,
+ struct sctp_sndrcvinfo *, int *);
- ssize_t sctp_sendv(int, const struct iovec *, int, struct sockaddr *,
- int, void *, socklen_t, unsigned int, int);
+ssize_t
+sctp_sendv(int, const struct iovec *, int, struct sockaddr *,
+ int, void *, socklen_t, unsigned int, int);
- ssize_t sctp_recvv(int, const struct iovec *, int, struct sockaddr *,
- socklen_t *, void *, socklen_t *, unsigned int *, int *);
+ssize_t
+sctp_recvv(int, const struct iovec *, int, struct sockaddr *,
+ socklen_t *, void *, socklen_t *, unsigned int *, int *);
__END_DECLS
Modified: trunk/sys/netinet/sctp_usrreq.c
===================================================================
--- trunk/sys/netinet/sctp_usrreq.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_usrreq.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_usrreq.c 238613 2012-07-19 09:32:59Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_usrreq.c 294222 2016-01-17 14:10:37Z tuexen $");
#include <netinet/sctp_os.h>
#include <sys/proc.h>
@@ -40,6 +40,7 @@
#include <netinet/sctp_header.h>
#include <netinet/sctp_var.h>
#ifdef INET6
+#include <netinet6/sctp6_var.h>
#endif
#include <netinet/sctp_sysctl.h>
#include <netinet/sctp_output.h>
@@ -125,21 +126,19 @@
if (chk->sent < SCTP_DATAGRAM_RESEND) {
sctp_flight_size_decrease(chk);
sctp_total_flight_decrease(stcb, chk);
- }
- if (chk->sent != SCTP_DATAGRAM_RESEND) {
+ chk->sent = SCTP_DATAGRAM_RESEND;
sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+ chk->rec.data.doing_fast_retransmit = 0;
+ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) {
+ sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_PMTU,
+ chk->whoTo->flight_size,
+ chk->book_size,
+ (uintptr_t) chk->whoTo,
+ chk->rec.data.TSN_seq);
+ }
+ /* Clear any time so NO RTT is being done */
+ chk->do_rtt = 0;
}
- chk->sent = SCTP_DATAGRAM_RESEND;
- chk->rec.data.doing_fast_retransmit = 0;
- if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) {
- sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_PMTU,
- chk->whoTo->flight_size,
- chk->book_size,
- (uintptr_t) chk->whoTo,
- chk->rec.data.TSN_seq);
- }
- /* Clear any time so NO RTT is being done */
- chk->do_rtt = 0;
}
}
}
@@ -181,7 +180,7 @@
SCTP_TCB_UNLOCK(stcb);
return;
}
- totsz = ip->ip_len;
+ totsz = ntohs(ip->ip_len);
nxtsz = ntohs(icmph->icmp_nextmtu);
if (nxtsz == 0) {
@@ -215,8 +214,6 @@
SCTP_TCB_UNLOCK(stcb);
}
-#endif
-
void
sctp_notify(struct sctp_inpcb *inp,
struct ip *ip,
@@ -225,7 +222,7 @@
struct sctp_tcb *stcb,
struct sctp_nets *net)
{
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
@@ -284,7 +281,7 @@
* with no TCB
*/
sctp_abort_notification(stcb, 1, 0, NULL, SCTP_SO_NOT_LOCKED);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(inp);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -292,8 +289,9 @@
SCTP_TCB_LOCK(stcb);
atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_2);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_2);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
/* SCTP_TCB_UNLOCK(stcb); MT: I think this is not needed. */
#endif
@@ -303,6 +301,8 @@
}
}
+#endif
+
#ifdef INET
void
sctp_ctlinput(cmd, sa, vip)
@@ -346,8 +346,8 @@
* 'from' holds our local endpoint address. Thus we reverse
* the to and the from in the lookup.
*/
- stcb = sctp_findassociation_addr_sa((struct sockaddr *)&from,
- (struct sockaddr *)&to,
+ stcb = sctp_findassociation_addr_sa((struct sockaddr *)&to,
+ (struct sockaddr *)&from,
&inp, &net, 1, vrf_id);
if (stcb != NULL && inp && (inp->sctp_socket != NULL)) {
if (cmd != PRC_MSGSIZE) {
@@ -398,8 +398,8 @@
if (error)
return (error);
- stcb = sctp_findassociation_addr_sa(sintosa(&addrs[0]),
- sintosa(&addrs[1]),
+ stcb = sctp_findassociation_addr_sa(sintosa(&addrs[1]),
+ sintosa(&addrs[0]),
&inp, &net, 1, vrf_id);
if (stcb == NULL || inp == NULL || inp->sctp_socket == NULL) {
if ((inp != NULL) && (stcb == NULL)) {
@@ -488,11 +488,6 @@
int error;
uint32_t vrf_id = SCTP_DEFAULT_VRFID;
-#ifdef IPSEC
- uint32_t flags;
-
-#endif
-
inp = (struct sctp_inpcb *)so->so_pcb;
if (inp != 0) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
@@ -514,33 +509,6 @@
ip_inp = &inp->ip_inp.inp;
ip_inp->inp_vflag |= INP_IPV4;
ip_inp->inp_ip_ttl = MODULE_GLOBAL(ip_defttl);
-#ifdef IPSEC
- error = ipsec_init_policy(so, &ip_inp->inp_sp);
-#ifdef SCTP_LOG_CLOSING
- sctp_log_closing(inp, NULL, 17);
-#endif
- if (error != 0) {
-try_again:
- flags = inp->sctp_flags;
- if (((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) &&
- (atomic_cmpset_int(&inp->sctp_flags, flags, (flags | SCTP_PCB_FLAGS_SOCKET_GONE | SCTP_PCB_FLAGS_CLOSE_IP)))) {
-#ifdef SCTP_LOG_CLOSING
- sctp_log_closing(inp, NULL, 15);
-#endif
- SCTP_INP_WUNLOCK(inp);
- sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
- SCTP_CALLED_AFTER_CMPSET_OFCLOSE);
- } else {
- flags = inp->sctp_flags;
- if ((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) {
- goto try_again;
- } else {
- SCTP_INP_WUNLOCK(inp);
- }
- }
- return (error);
- }
-#endif /* IPSEC */
SCTP_INP_WUNLOCK(inp);
return (0);
}
@@ -548,27 +516,21 @@
static int
sctp_bind(struct socket *so, struct sockaddr *addr, struct thread *p)
{
- struct sctp_inpcb *inp = NULL;
- int error;
+ struct sctp_inpcb *inp;
-#ifdef INET
- if (addr && addr->sa_family != AF_INET) {
- /* must be a v4 address! */
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
- return (EINVAL);
- }
-#endif /* INET6 */
- if (addr && (addr->sa_len != sizeof(struct sockaddr_in))) {
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
- return (EINVAL);
- }
inp = (struct sctp_inpcb *)so->so_pcb;
if (inp == NULL) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
return (EINVAL);
}
- error = sctp_inpcb_bind(so, addr, NULL, p);
- return (error);
+ if (addr != NULL) {
+ if ((addr->sa_family != AF_INET) ||
+ (addr->sa_len != sizeof(struct sockaddr_in))) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ return (EINVAL);
+ }
+ }
+ return (sctp_inpcb_bind(so, addr, NULL, p));
}
#endif
@@ -764,7 +726,7 @@
/* Left with Data unread */
struct mbuf *err;
- err = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_DONTWAIT, 1, MT_DATA);
+ err = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_NOWAIT, 1, MT_DATA);
if (err) {
/*
* Fill in the user
@@ -785,7 +747,8 @@
(SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_3);
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_3);
/* No unlock tcb assoc is gone */
return (0);
}
@@ -801,25 +764,24 @@
/* only send SHUTDOWN 1st time thru */
struct sctp_nets *netp;
+ if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+ }
+ SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+ SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ sctp_stop_timers_for_shutdown(stcb);
if (stcb->asoc.alternate) {
netp = stcb->asoc.alternate;
} else {
netp = stcb->asoc.primary_destination;
}
- sctp_stop_timers_for_shutdown(stcb);
sctp_send_shutdown(stcb, netp);
- sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_LOCKED);
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
- SCTP_STAT_DECR_GAUGE32(sctps_currestab);
- }
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
- SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
stcb->sctp_ep, stcb, netp);
sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
stcb->sctp_ep, stcb, netp);
-
+ sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_LOCKED);
}
} else {
/*
@@ -862,26 +824,7 @@
struct mbuf *op_err;
abort_anyway:
- op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (op_err) {
- /*
- * Fill in the user
- * initiated abort
- */
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
-
- SCTP_BUF_LEN(op_err) =
- (sizeof(struct sctp_paramhdr) + sizeof(uint32_t));
- ph = mtod(op_err,
- struct sctp_paramhdr *);
- ph->param_type = htons(
- SCTP_CAUSE_USER_INITIATED_ABT);
- ph->param_length = htons(SCTP_BUF_LEN(op_err));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_USRREQ + SCTP_LOC_4);
- }
+ op_err = sctp_generate_cause(SCTP_CAUSE_USER_INITIATED_ABT, "");
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_USRREQ + SCTP_LOC_4;
sctp_send_abort_tcb(stcb, op_err, SCTP_SO_LOCKED);
SCTP_STAT_INCR_COUNTER32(sctps_aborted);
@@ -890,7 +833,8 @@
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
SCTP_INP_RUNLOCK(inp);
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_5);
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_5);
return (0);
} else {
sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CLOSING, SCTP_SO_LOCKED);
@@ -972,7 +916,8 @@
}
SCTP_INP_RLOCK(inp);
/* For UDP model this is a invalid call */
- if (inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) {
+ if (!((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) {
/* Restore the flags that the soshutdown took away. */
SOCKBUF_LOCK(&so->so_rcv);
so->so_rcv.sb_state &= ~SBS_CANTRCVMORE;
@@ -981,14 +926,15 @@
SCTP_INP_RUNLOCK(inp);
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
return (EOPNOTSUPP);
- }
- /*
- * Ok if we reach here its the TCP model and it is either a SHUT_WR
- * or SHUT_RDWR. This means we put the shutdown flag against it.
- */
- {
+ } else {
+ /*
+ * Ok, if we reach here its the TCP model and it is either a
+ * SHUT_WR or SHUT_RDWR. This means we put the shutdown flag
+ * against it.
+ */
struct sctp_tcb *stcb;
struct sctp_association *asoc;
+ struct sctp_nets *netp;
if ((so->so_state &
(SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) {
@@ -1000,7 +946,7 @@
stcb = LIST_FIRST(&inp->sctp_asoc_list);
if (stcb == NULL) {
/*
- * Ok we hit the case that the shutdown call was
+ * Ok, we hit the case that the shutdown call was
* made after an abort or something. Nothing to do
* now.
*/
@@ -1009,7 +955,29 @@
}
SCTP_TCB_LOCK(stcb);
asoc = &stcb->asoc;
- if (TAILQ_EMPTY(&asoc->send_queue) &&
+ if (asoc->state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_INP_RUNLOCK(inp);
+ return (0);
+ }
+ if ((SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_WAIT) &&
+ (SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_ECHOED) &&
+ (SCTP_GET_STATE(asoc) != SCTP_STATE_OPEN)) {
+ /*
+ * If we are not in or before ESTABLISHED, there is
+ * no protocol action required.
+ */
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_INP_RUNLOCK(inp);
+ return (0);
+ }
+ if (stcb->asoc.alternate) {
+ netp = stcb->asoc.alternate;
+ } else {
+ netp = stcb->asoc.primary_destination;
+ }
+ if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) &&
+ TAILQ_EMPTY(&asoc->send_queue) &&
TAILQ_EMPTY(&asoc->sent_queue) &&
(asoc->stream_queue_cnt == 0)) {
if (asoc->locked_on_sending) {
@@ -1016,46 +984,19 @@
goto abort_anyway;
}
/* there is nothing queued to send, so I'm done... */
- if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) {
- /* only send SHUTDOWN the first time through */
- struct sctp_nets *netp;
-
- if (stcb->asoc.alternate) {
- netp = stcb->asoc.alternate;
- } else {
- netp = stcb->asoc.primary_destination;
- }
- sctp_stop_timers_for_shutdown(stcb);
- sctp_send_shutdown(stcb, netp);
- sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_LOCKED);
- if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
- (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
- SCTP_STAT_DECR_GAUGE32(sctps_currestab);
- }
- SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
- SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
- sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
- stcb->sctp_ep, stcb, netp);
- sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
- stcb->sctp_ep, stcb, netp);
- }
+ SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+ SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+ SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ sctp_stop_timers_for_shutdown(stcb);
+ sctp_send_shutdown(stcb, netp);
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
+ stcb->sctp_ep, stcb, netp);
} else {
/*
- * we still got (or just got) data to send, so set
- * SHUTDOWN_PENDING
+ * We still got (or just got) data to send, so set
+ * SHUTDOWN_PENDING.
*/
- struct sctp_nets *netp;
-
- if (stcb->asoc.alternate) {
- netp = stcb->asoc.alternate;
- } else {
- netp = stcb->asoc.primary_destination;
- }
-
- asoc->state |= SCTP_STATE_SHUTDOWN_PENDING;
- sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
- netp);
-
+ SCTP_ADD_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
if (asoc->locked_on_sending) {
/* Locked to send out the data */
struct sctp_stream_queue_pending *sp;
@@ -1066,7 +1007,7 @@
asoc->locked_on_sending->stream_no);
} else {
if ((sp->length == 0) && (sp->msg_is_complete == 0)) {
- asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+ SCTP_ADD_SUBSTATE(asoc, SCTP_STATE_PARTIAL_MSG_LEFT);
}
}
}
@@ -1076,36 +1017,24 @@
struct mbuf *op_err;
abort_anyway:
- op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
- 0, M_DONTWAIT, 1, MT_DATA);
- if (op_err) {
- /* Fill in the user initiated abort */
- struct sctp_paramhdr *ph;
- uint32_t *ippp;
-
- SCTP_BUF_LEN(op_err) =
- sizeof(struct sctp_paramhdr) + sizeof(uint32_t);
- ph = mtod(op_err,
- struct sctp_paramhdr *);
- ph->param_type = htons(
- SCTP_CAUSE_USER_INITIATED_ABT);
- ph->param_length = htons(SCTP_BUF_LEN(op_err));
- ippp = (uint32_t *) (ph + 1);
- *ippp = htonl(SCTP_FROM_SCTP_USRREQ + SCTP_LOC_6);
- }
+ op_err = sctp_generate_cause(SCTP_CAUSE_USER_INITIATED_ABT, "");
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_USRREQ + SCTP_LOC_6;
sctp_abort_an_association(stcb->sctp_ep, stcb,
op_err, SCTP_SO_LOCKED);
- goto skip_unlock;
- } else {
- sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CLOSING, SCTP_SO_LOCKED);
+ SCTP_INP_RUNLOCK(inp);
+ return (0);
}
}
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb, netp);
+ /*
+ * XXX: Why do this in the case where we have still data
+ * queued?
+ */
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CLOSING, SCTP_SO_LOCKED);
SCTP_TCB_UNLOCK(stcb);
+ SCTP_INP_RUNLOCK(inp);
+ return (0);
}
-skip_unlock:
- SCTP_INP_RUNLOCK(inp);
- return (0);
}
/*
@@ -1139,9 +1068,17 @@
{
struct sctp_ifn *sctp_ifn;
struct sctp_ifa *sctp_ifa;
- int loopback_scope, ipv4_local_scope, local_scope, site_scope;
size_t actual;
- int ipv4_addr_legal, ipv6_addr_legal;
+ int loopback_scope;
+
+#if defined(INET)
+ int ipv4_local_scope, ipv4_addr_legal;
+
+#endif
+#if defined(INET6)
+ int local_scope, site_scope, ipv6_addr_legal;
+
+#endif
struct sctp_vrf *vrf;
actual = 0;
@@ -1150,23 +1087,45 @@
if (stcb) {
/* Turn on all the appropriate scope */
- loopback_scope = stcb->asoc.loopback_scope;
- ipv4_local_scope = stcb->asoc.ipv4_local_scope;
- local_scope = stcb->asoc.local_scope;
- site_scope = stcb->asoc.site_scope;
+ loopback_scope = stcb->asoc.scope.loopback_scope;
+#if defined(INET)
+ ipv4_local_scope = stcb->asoc.scope.ipv4_local_scope;
+ ipv4_addr_legal = stcb->asoc.scope.ipv4_addr_legal;
+#endif
+#if defined(INET6)
+ local_scope = stcb->asoc.scope.local_scope;
+ site_scope = stcb->asoc.scope.site_scope;
+ ipv6_addr_legal = stcb->asoc.scope.ipv6_addr_legal;
+#endif
} else {
- /* Turn on ALL scope, since we look at the EP */
- loopback_scope = ipv4_local_scope = local_scope =
- site_scope = 1;
- }
- ipv4_addr_legal = ipv6_addr_legal = 0;
- if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
- ipv6_addr_legal = 1;
- if (SCTP_IPV6_V6ONLY(inp) == 0) {
+ /* Use generic values for endpoints. */
+ loopback_scope = 1;
+#if defined(INET)
+ ipv4_local_scope = 1;
+#endif
+#if defined(INET6)
+ local_scope = 1;
+ site_scope = 1;
+#endif
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+#if defined(INET6)
+ ipv6_addr_legal = 1;
+#endif
+#if defined(INET)
+ if (SCTP_IPV6_V6ONLY(inp)) {
+ ipv4_addr_legal = 0;
+ } else {
+ ipv4_addr_legal = 1;
+ }
+#endif
+ } else {
+#if defined(INET6)
+ ipv6_addr_legal = 0;
+#endif
+#if defined(INET)
ipv4_addr_legal = 1;
+#endif
}
- } else {
- ipv4_addr_legal = 1;
}
vrf = sctp_find_vrf(vrf_id);
if (vrf == NULL) {
@@ -1200,7 +1159,7 @@
if (ipv4_addr_legal) {
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)&sctp_ifa->address.sa;
+ sin = &sctp_ifa->address.sin;
if (sin->sin_addr.s_addr == 0) {
/*
* we skip
@@ -1209,6 +1168,10 @@
*/
continue;
}
+ if (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+ &sin->sin_addr) != 0) {
+ continue;
+ }
if ((ipv4_local_scope == 0) &&
(IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) {
continue;
@@ -1241,7 +1204,7 @@
if (ipv6_addr_legal) {
struct sockaddr_in6 *sin6;
- sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sa;
+ sin6 = &sctp_ifa->address.sin6;
if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
/*
* we skip
@@ -1250,6 +1213,10 @@
*/
continue;
}
+ if (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+ &sin6->sin6_addr) != 0) {
+ continue;
+ }
if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
if (local_scope == 0)
continue;
@@ -1305,8 +1272,21 @@
}
if (sctp_fill_user_address(sas, &laddr->ifa->address.sa))
continue;
-
- ((struct sockaddr_in6 *)sas)->sin6_port = inp->sctp_lport;
+ switch (laddr->ifa->address.sa.sa_family) {
+#ifdef INET
+ case AF_INET:
+ ((struct sockaddr_in *)sas)->sin_port = inp->sctp_lport;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ ((struct sockaddr_in6 *)sas)->sin6_port = inp->sctp_lport;
+ break;
+#endif
+ default:
+ /* TSNH */
+ break;
+ }
sas = (struct sockaddr_storage *)((caddr_t)sas +
laddr->ifa->address.sa.sa_len);
actual += laddr->ifa->address.sa.sa_len;
@@ -1364,10 +1344,14 @@
switch (sctp_ifa->address.sa.sa_family) {
#ifdef INET
case AF_INET:
+#ifdef INET6
if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4))
cnt += sizeof(struct sockaddr_in6);
else
cnt += sizeof(struct sockaddr_in);
+#else
+ cnt += sizeof(struct sockaddr_in);
+#endif
break;
#endif
#ifdef INET6
@@ -1387,10 +1371,14 @@
switch (laddr->ifa->address.sa.sa_family) {
#ifdef INET
case AF_INET:
+#ifdef INET6
if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4))
cnt += sizeof(struct sockaddr_in6);
else
cnt += sizeof(struct sockaddr_in);
+#else
+ cnt += sizeof(struct sockaddr_in);
+#endif
break;
#endif
#ifdef INET6
@@ -1514,6 +1502,7 @@
/* We are GOOD to go */
stcb = sctp_aloc_assoc(inp, sa, &error, 0, vrf_id,
+ inp->sctp_ep.pre_open_stream_count,
(struct thread *)p
);
if (stcb == NULL) {
@@ -1546,7 +1535,8 @@
sctp_connectx_helper_add(stcb, sa, (totaddr - 1), &error);
/* Fill in the return id */
if (error) {
- (void)sctp_free_assoc(inp, stcb, SCTP_PCBFREE_FORCE, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_6);
+ (void)sctp_free_assoc(inp, stcb, SCTP_PCBFREE_FORCE,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_7);
goto out_now;
}
a_id = (sctp_assoc_t *) optval;
@@ -1845,8 +1835,9 @@
SCTP_CHECK_AND_CAST(av, optval, struct sctp_stream_value, *optsize);
SCTP_FIND_STCB(inp, stcb, av->assoc_id);
if (stcb) {
- if (stcb->asoc.ss_functions.sctp_ss_get_value(stcb, &stcb->asoc, &stcb->asoc.strmout[av->stream_id],
- &av->stream_value) < 0) {
+ if ((av->stream_id >= stcb->asoc.streamoutcnt) ||
+ (stcb->asoc.ss_functions.sctp_ss_get_value(stcb, &stcb->asoc, &stcb->asoc.strmout[av->stream_id],
+ &av->stream_value) < 0)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
} else {
@@ -1893,8 +1884,15 @@
uint32_t *value, cnt;
SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
+ SCTP_INP_RLOCK(inp);
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+ /* Can't do this for a 1-1 socket */
+ error = EINVAL;
+ SCTP_INP_RUNLOCK(inp);
+ break;
+ }
cnt = 0;
- SCTP_INP_RLOCK(inp);
LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
cnt++;
}
@@ -1909,9 +1907,16 @@
unsigned int at, limit;
SCTP_CHECK_AND_CAST(ids, optval, struct sctp_assoc_ids, *optsize);
+ SCTP_INP_RLOCK(inp);
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+ /* Can't do this for a 1-1 socket */
+ error = EINVAL;
+ SCTP_INP_RUNLOCK(inp);
+ break;
+ }
at = 0;
limit = (*optsize - sizeof(uint32_t)) / sizeof(sctp_assoc_t);
- SCTP_INP_RLOCK(inp);
LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
if (at < limit) {
ids->gaids_assoc_id[at++] = sctp_get_associd(stcb);
@@ -2207,23 +2212,27 @@
size = 0;
/* Count the sizes */
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
- if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
- size += sizeof(struct sockaddr_in6);
- } else {
- switch (((struct sockaddr *)&net->ro._l_addr)->sa_family) {
+ switch (net->ro._l_addr.sa.sa_family) {
#ifdef INET
- case AF_INET:
+ case AF_INET:
+#ifdef INET6
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
+ size += sizeof(struct sockaddr_in6);
+ } else {
size += sizeof(struct sockaddr_in);
- break;
+ }
+#else
+ size += sizeof(struct sockaddr_in);
#endif
+ break;
+#endif
#ifdef INET6
- case AF_INET6:
- size += sizeof(struct sockaddr_in6);
- break;
+ case AF_INET6:
+ size += sizeof(struct sockaddr_in6);
+ break;
#endif
- default:
- break;
- }
+ default:
+ break;
}
}
SCTP_TCB_UNLOCK(stcb);
@@ -2255,24 +2264,28 @@
sas = (struct sockaddr_storage *)&saddr->addr[0];
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
- if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
- cpsz = sizeof(struct sockaddr_in6);
- } else {
- switch (((struct sockaddr *)&net->ro._l_addr)->sa_family) {
+ switch (net->ro._l_addr.sa.sa_family) {
#ifdef INET
- case AF_INET:
+ case AF_INET:
+#ifdef INET6
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
+ cpsz = sizeof(struct sockaddr_in6);
+ } else {
cpsz = sizeof(struct sockaddr_in);
- break;
+ }
+#else
+ cpsz = sizeof(struct sockaddr_in);
#endif
+ break;
+#endif
#ifdef INET6
- case AF_INET6:
- cpsz = sizeof(struct sockaddr_in6);
- break;
+ case AF_INET6:
+ cpsz = sizeof(struct sockaddr_in6);
+ break;
#endif
- default:
- cpsz = 0;
- break;
- }
+ default:
+ cpsz = 0;
+ break;
}
if (cpsz == 0) {
break;
@@ -2283,15 +2296,15 @@
}
#if defined(INET) && defined(INET6)
if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) &&
- (((struct sockaddr *)&net->ro._l_addr)->sa_family == AF_INET)) {
+ (net->ro._l_addr.sa.sa_family == AF_INET)) {
/* Must map the address */
- in6_sin_2_v4mapsin6((struct sockaddr_in *)&net->ro._l_addr,
+ in6_sin_2_v4mapsin6(&net->ro._l_addr.sin,
(struct sockaddr_in6 *)sas);
} else {
-#endif
memcpy(sas, &net->ro._l_addr, cpsz);
-#if defined(INET) && defined(INET6)
}
+#else
+ memcpy(sas, &net->ro._l_addr, cpsz);
#endif
((struct sockaddr_in *)sas)->sin_port = stcb->rport;
@@ -2328,14 +2341,36 @@
{
struct sctp_paddrparams *paddrp;
struct sctp_nets *net;
+ struct sockaddr *addr;
+#if defined(INET) && defined(INET6)
+ struct sockaddr_in sin_store;
+
+#endif
+
SCTP_CHECK_AND_CAST(paddrp, optval, struct sctp_paddrparams, *optsize);
SCTP_FIND_STCB(inp, stcb, paddrp->spp_assoc_id);
- net = NULL;
- if (stcb) {
- net = sctp_findnet(stcb, (struct sockaddr *)&paddrp->spp_address);
+#if defined(INET) && defined(INET6)
+ if (paddrp->spp_address.ss_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&paddrp->spp_address;
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ in6_sin6_2_sin(&sin_store, sin6);
+ addr = (struct sockaddr *)&sin_store;
+ } else {
+ addr = (struct sockaddr *)&paddrp->spp_address;
+ }
} else {
+ addr = (struct sockaddr *)&paddrp->spp_address;
+ }
+#else
+ addr = (struct sockaddr *)&paddrp->spp_address;
+#endif
+ if (stcb != NULL) {
+ net = sctp_findnet(stcb, addr);
+ } else {
/*
* We increment here since
* sctp_findassociation_ep_addr() wil do a
@@ -2343,22 +2378,20 @@
* the locked tcb (last argument) is NOT a
* TCB.. aka NULL.
*/
+ net = NULL;
SCTP_INP_INCR_REF(inp);
- stcb = sctp_findassociation_ep_addr(&inp, (struct sockaddr *)&paddrp->spp_address, &net, NULL, NULL);
+ stcb = sctp_findassociation_ep_addr(&inp, addr, &net, NULL, NULL);
if (stcb == NULL) {
SCTP_INP_DECR_REF(inp);
}
}
- if (stcb && (net == NULL)) {
- struct sockaddr *sa;
-
- sa = (struct sockaddr *)&paddrp->spp_address;
+ if ((stcb != NULL) && (net == NULL)) {
#ifdef INET
- if (sa->sa_family == AF_INET) {
+ if (addr->sa_family == AF_INET) {
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)sa;
- if (sin->sin_addr.s_addr) {
+ sin = (struct sockaddr_in *)addr;
+ if (sin->sin_addr.s_addr != INADDR_ANY) {
error = EINVAL;
SCTP_TCB_UNLOCK(stcb);
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
@@ -2367,10 +2400,10 @@
} else
#endif
#ifdef INET6
- if (sa->sa_family == AF_INET6) {
+ if (addr->sa_family == AF_INET6) {
struct sockaddr_in6 *sin6;
- sin6 = (struct sockaddr_in6 *)sa;
+ sin6 = (struct sockaddr_in6 *)addr;
if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
error = EINVAL;
SCTP_TCB_UNLOCK(stcb);
@@ -2386,21 +2419,27 @@
break;
}
}
- if (stcb) {
+ if (stcb != NULL) {
/* Applies to the specific association */
paddrp->spp_flags = 0;
- if (net) {
- int ovh;
-
- if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
- ovh = SCTP_MED_OVERHEAD;
- } else {
- ovh = SCTP_MED_V4_OVERHEAD;
- }
-
+ if (net != NULL) {
paddrp->spp_hbinterval = net->heart_beat_delay;
paddrp->spp_pathmaxrxt = net->failure_threshold;
- paddrp->spp_pathmtu = net->mtu - ovh;
+ paddrp->spp_pathmtu = net->mtu;
+ switch (net->ro._l_addr.sa.sa_family) {
+#ifdef INET
+ case AF_INET:
+ paddrp->spp_pathmtu -= SCTP_MIN_V4_OVERHEAD;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ paddrp->spp_pathmtu -= SCTP_MIN_V4_OVERHEAD;
+ break;
+#endif
+ default:
+ break;
+ }
/* get flags for HB */
if (net->dest_state & SCTP_ADDR_NOHB) {
paddrp->spp_flags |= SPP_HB_DISABLE;
@@ -2409,9 +2448,9 @@
}
/* get flags for PMTU */
if (net->dest_state & SCTP_ADDR_NO_PMTUD) {
+ paddrp->spp_flags |= SPP_PMTUD_DISABLE;
+ } else {
paddrp->spp_flags |= SPP_PMTUD_ENABLE;
- } else {
- paddrp->spp_flags |= SPP_PMTUD_DISABLE;
}
if (net->dscp & 0x01) {
paddrp->spp_dscp = net->dscp & 0xfc;
@@ -2430,7 +2469,7 @@
* value
*/
paddrp->spp_pathmaxrxt = stcb->asoc.def_net_failure;
- paddrp->spp_pathmtu = sctp_get_frag_point(stcb, &stcb->asoc);
+ paddrp->spp_pathmtu = 0;
if (stcb->asoc.default_dscp & 0x01) {
paddrp->spp_dscp = stcb->asoc.default_dscp & 0xfc;
paddrp->spp_flags |= SPP_DSCP;
@@ -2505,14 +2544,36 @@
{
struct sctp_paddrinfo *paddri;
struct sctp_nets *net;
+ struct sockaddr *addr;
+#if defined(INET) && defined(INET6)
+ struct sockaddr_in sin_store;
+
+#endif
+
SCTP_CHECK_AND_CAST(paddri, optval, struct sctp_paddrinfo, *optsize);
SCTP_FIND_STCB(inp, stcb, paddri->spinfo_assoc_id);
- net = NULL;
- if (stcb) {
- net = sctp_findnet(stcb, (struct sockaddr *)&paddri->spinfo_address);
+#if defined(INET) && defined(INET6)
+ if (paddri->spinfo_address.ss_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&paddri->spinfo_address;
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ in6_sin6_2_sin(&sin_store, sin6);
+ addr = (struct sockaddr *)&sin_store;
+ } else {
+ addr = (struct sockaddr *)&paddri->spinfo_address;
+ }
} else {
+ addr = (struct sockaddr *)&paddri->spinfo_address;
+ }
+#else
+ addr = (struct sockaddr *)&paddri->spinfo_address;
+#endif
+ if (stcb != NULL) {
+ net = sctp_findnet(stcb, addr);
+ } else {
/*
* We increment here since
* sctp_findassociation_ep_addr() wil do a
@@ -2520,14 +2581,15 @@
* the locked tcb (last argument) is NOT a
* TCB.. aka NULL.
*/
+ net = NULL;
SCTP_INP_INCR_REF(inp);
- stcb = sctp_findassociation_ep_addr(&inp, (struct sockaddr *)&paddri->spinfo_address, &net, NULL, NULL);
+ stcb = sctp_findassociation_ep_addr(&inp, addr, &net, NULL, NULL);
if (stcb == NULL) {
SCTP_INP_DECR_REF(inp);
}
}
- if ((stcb) && (net)) {
+ if ((stcb != NULL) && (net != NULL)) {
if (net->dest_state & SCTP_ADDR_UNCONFIRMED) {
/* It's unconfirmed */
paddri->spinfo_state = SCTP_UNCONFIRMED;
@@ -2543,10 +2605,24 @@
paddri->spinfo_rto = net->RTO;
paddri->spinfo_assoc_id = sctp_get_associd(stcb);
paddri->spinfo_mtu = net->mtu;
+ switch (addr->sa_family) {
+#if defined(INET)
+ case AF_INET:
+ paddri->spinfo_mtu -= SCTP_MIN_V4_OVERHEAD;
+ break;
+#endif
+#if defined(INET6)
+ case AF_INET6:
+ paddri->spinfo_mtu -= SCTP_MIN_OVERHEAD;
+ break;
+#endif
+ default:
+ break;
+ }
SCTP_TCB_UNLOCK(stcb);
*optsize = sizeof(struct sctp_paddrinfo);
} else {
- if (stcb) {
+ if (stcb != NULL) {
SCTP_TCB_UNLOCK(stcb);
}
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
@@ -2576,12 +2652,7 @@
error = EINVAL;
break;
}
- /*
- * I think passing the state is fine since
- * sctp_constants.h will be available to the user
- * land.
- */
- sstat->sstat_state = stcb->asoc.state;
+ sstat->sstat_state = sctp_map_assoc_state(stcb->asoc.state);
sstat->sstat_assoc_id = sctp_get_associd(stcb);
sstat->sstat_rwnd = stcb->asoc.peers_rwnd;
sstat->sstat_unackdata = stcb->asoc.sent_queue_cnt;
@@ -2619,6 +2690,20 @@
sstat->sstat_primary.spinfo_srtt = net->lastsa >> SCTP_RTT_SHIFT;
sstat->sstat_primary.spinfo_rto = net->RTO;
sstat->sstat_primary.spinfo_mtu = net->mtu;
+ switch (stcb->asoc.primary_destination->ro._l_addr.sa.sa_family) {
+#if defined(INET)
+ case AF_INET:
+ sstat->sstat_primary.spinfo_mtu -= SCTP_MIN_V4_OVERHEAD;
+ break;
+#endif
+#if defined(INET6)
+ case AF_INET6:
+ sstat->sstat_primary.spinfo_mtu -= SCTP_MIN_OVERHEAD;
+ break;
+#endif
+ default:
+ break;
+ }
sstat->sstat_primary.spinfo_assoc_id = sctp_get_associd(stcb);
SCTP_TCB_UNLOCK(stcb);
*optsize = sizeof(struct sctp_status);
@@ -2763,16 +2848,32 @@
SCTP_FIND_STCB(inp, stcb, ssp->ssp_assoc_id);
if (stcb) {
- /* simply copy out the sockaddr_storage... */
- int len;
+ union sctp_sockstore *addr;
- len = *optsize;
- if (len > stcb->asoc.primary_destination->ro._l_addr.sa.sa_len)
- len = stcb->asoc.primary_destination->ro._l_addr.sa.sa_len;
-
- memcpy(&ssp->ssp_addr,
- &stcb->asoc.primary_destination->ro._l_addr,
- len);
+ addr = &stcb->asoc.primary_destination->ro._l_addr;
+ switch (addr->sa.sa_family) {
+#ifdef INET
+ case AF_INET:
+#ifdef INET6
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
+ in6_sin_2_v4mapsin6(&addr->sin,
+ (struct sockaddr_in6 *)&ssp->ssp_addr);
+ } else {
+ memcpy(&ssp->ssp_addr, &addr->sin, sizeof(struct sockaddr_in));
+ }
+#else
+ memcpy(&ssp->ssp_addr, &addr->sin, sizeof(struct sockaddr_in));
+#endif
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ memcpy(&ssp->ssp_addr, &addr->sin6, sizeof(struct sockaddr_in6));
+ break;
+#endif
+ default:
+ break;
+ }
SCTP_TCB_UNLOCK(stcb);
*optsize = sizeof(struct sctp_setprim);
} else {
@@ -3112,14 +3213,36 @@
{
struct sctp_paddrthlds *thlds;
struct sctp_nets *net;
+ struct sockaddr *addr;
+#if defined(INET) && defined(INET6)
+ struct sockaddr_in sin_store;
+
+#endif
+
SCTP_CHECK_AND_CAST(thlds, optval, struct sctp_paddrthlds, *optsize);
SCTP_FIND_STCB(inp, stcb, thlds->spt_assoc_id);
- net = NULL;
- if (stcb) {
- net = sctp_findnet(stcb, (struct sockaddr *)&thlds->spt_address);
+#if defined(INET) && defined(INET6)
+ if (thlds->spt_address.ss_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&thlds->spt_address;
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ in6_sin6_2_sin(&sin_store, sin6);
+ addr = (struct sockaddr *)&sin_store;
+ } else {
+ addr = (struct sockaddr *)&thlds->spt_address;
+ }
} else {
+ addr = (struct sockaddr *)&thlds->spt_address;
+ }
+#else
+ addr = (struct sockaddr *)&thlds->spt_address;
+#endif
+ if (stcb != NULL) {
+ net = sctp_findnet(stcb, addr);
+ } else {
/*
* We increment here since
* sctp_findassociation_ep_addr() wil do a
@@ -3127,22 +3250,20 @@
* the locked tcb (last argument) is NOT a
* TCB.. aka NULL.
*/
+ net = NULL;
SCTP_INP_INCR_REF(inp);
- stcb = sctp_findassociation_ep_addr(&inp, (struct sockaddr *)&thlds->spt_address, &net, NULL, NULL);
+ stcb = sctp_findassociation_ep_addr(&inp, addr, &net, NULL, NULL);
if (stcb == NULL) {
SCTP_INP_DECR_REF(inp);
}
}
- if (stcb && (net == NULL)) {
- struct sockaddr *sa;
-
- sa = (struct sockaddr *)&thlds->spt_address;
+ if ((stcb != NULL) && (net == NULL)) {
#ifdef INET
- if (sa->sa_family == AF_INET) {
+ if (addr->sa_family == AF_INET) {
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)sa;
- if (sin->sin_addr.s_addr) {
+ sin = (struct sockaddr_in *)addr;
+ if (sin->sin_addr.s_addr != INADDR_ANY) {
error = EINVAL;
SCTP_TCB_UNLOCK(stcb);
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
@@ -3151,10 +3272,10 @@
} else
#endif
#ifdef INET6
- if (sa->sa_family == AF_INET6) {
+ if (addr->sa_family == AF_INET6) {
struct sockaddr_in6 *sin6;
- sin6 = (struct sockaddr_in6 *)sa;
+ sin6 = (struct sockaddr_in6 *)addr;
if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
error = EINVAL;
SCTP_TCB_UNLOCK(stcb);
@@ -3170,8 +3291,8 @@
break;
}
}
- if (stcb) {
- if (net) {
+ if (stcb != NULL) {
+ if (net != NULL) {
thlds->spt_pathmaxrxt = net->failure_threshold;
thlds->spt_pathpfthld = net->pf_threshold;
} else {
@@ -3203,12 +3324,35 @@
{
struct sctp_udpencaps *encaps;
struct sctp_nets *net;
+ struct sockaddr *addr;
+#if defined(INET) && defined(INET6)
+ struct sockaddr_in sin_store;
+
+#endif
+
SCTP_CHECK_AND_CAST(encaps, optval, struct sctp_udpencaps, *optsize);
SCTP_FIND_STCB(inp, stcb, encaps->sue_assoc_id);
+#if defined(INET) && defined(INET6)
+ if (encaps->sue_address.ss_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&encaps->sue_address;
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ in6_sin6_2_sin(&sin_store, sin6);
+ addr = (struct sockaddr *)&sin_store;
+ } else {
+ addr = (struct sockaddr *)&encaps->sue_address;
+ }
+ } else {
+ addr = (struct sockaddr *)&encaps->sue_address;
+ }
+#else
+ addr = (struct sockaddr *)&encaps->sue_address;
+#endif
if (stcb) {
- net = sctp_findnet(stcb, (struct sockaddr *)&encaps->sue_address);
+ net = sctp_findnet(stcb, addr);
} else {
/*
* We increment here since
@@ -3219,21 +3363,18 @@
*/
net = NULL;
SCTP_INP_INCR_REF(inp);
- stcb = sctp_findassociation_ep_addr(&inp, (struct sockaddr *)&encaps->sue_address, &net, NULL, NULL);
+ stcb = sctp_findassociation_ep_addr(&inp, addr, &net, NULL, NULL);
if (stcb == NULL) {
SCTP_INP_DECR_REF(inp);
}
}
- if (stcb && (net == NULL)) {
- struct sockaddr *sa;
-
- sa = (struct sockaddr *)&encaps->sue_address;
+ if ((stcb != NULL) && (net == NULL)) {
#ifdef INET
- if (sa->sa_family == AF_INET) {
+ if (addr->sa_family == AF_INET) {
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)sa;
- if (sin->sin_addr.s_addr) {
+ sin = (struct sockaddr_in *)addr;
+ if (sin->sin_addr.s_addr != INADDR_ANY) {
error = EINVAL;
SCTP_TCB_UNLOCK(stcb);
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
@@ -3242,10 +3383,10 @@
} else
#endif
#ifdef INET6
- if (sa->sa_family == AF_INET6) {
+ if (addr->sa_family == AF_INET6) {
struct sockaddr_in6 *sin6;
- sin6 = (struct sockaddr_in6 *)sa;
+ sin6 = (struct sockaddr_in6 *)addr;
if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
error = EINVAL;
SCTP_TCB_UNLOCK(stcb);
@@ -3261,7 +3402,7 @@
break;
}
}
- if (stcb) {
+ if (stcb != NULL) {
if (net) {
encaps->sue_port = net->port;
} else {
@@ -3281,10 +3422,199 @@
}
}
if (error == 0) {
- *optsize = sizeof(struct sctp_paddrparams);
+ *optsize = sizeof(struct sctp_udpencaps);
}
break;
}
+ case SCTP_ECN_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ av->assoc_value = stcb->asoc.ecn_supported;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ av->assoc_value = inp->ecn_supported;
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_assoc_value);
+ }
+ break;
+ }
+ case SCTP_PR_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ av->assoc_value = stcb->asoc.prsctp_supported;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ av->assoc_value = inp->prsctp_supported;
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_assoc_value);
+ }
+ break;
+ }
+ case SCTP_AUTH_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ av->assoc_value = stcb->asoc.auth_supported;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ av->assoc_value = inp->auth_supported;
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_assoc_value);
+ }
+ break;
+ }
+ case SCTP_ASCONF_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ av->assoc_value = stcb->asoc.asconf_supported;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ av->assoc_value = inp->asconf_supported;
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_assoc_value);
+ }
+ break;
+ }
+ case SCTP_RECONFIG_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ av->assoc_value = stcb->asoc.reconfig_supported;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ av->assoc_value = inp->reconfig_supported;
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_assoc_value);
+ }
+ break;
+ }
+ case SCTP_NRSACK_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ av->assoc_value = stcb->asoc.nrsack_supported;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ av->assoc_value = inp->nrsack_supported;
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_assoc_value);
+ }
+ break;
+ }
+ case SCTP_PKTDROP_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ av->assoc_value = stcb->asoc.pktdrop_supported;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ av->assoc_value = inp->pktdrop_supported;
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_assoc_value);
+ }
+ break;
+ }
case SCTP_ENABLE_STREAM_RESET:
{
struct sctp_assoc_value *av;
@@ -3312,6 +3642,100 @@
}
break;
}
+ case SCTP_PR_STREAM_STATUS:
+ {
+ struct sctp_prstatus *sprstat;
+ uint16_t sid;
+ uint16_t policy;
+
+ SCTP_CHECK_AND_CAST(sprstat, optval, struct sctp_prstatus, *optsize);
+ SCTP_FIND_STCB(inp, stcb, sprstat->sprstat_assoc_id);
+
+ sid = sprstat->sprstat_sid;
+ policy = sprstat->sprstat_policy;
+#if defined(SCTP_DETAILED_STR_STATS)
+ if ((stcb != NULL) &&
+ (sid < stcb->asoc.streamoutcnt) &&
+ (policy != SCTP_PR_SCTP_NONE) &&
+ ((policy <= SCTP_PR_SCTP_MAX) ||
+ (policy == SCTP_PR_SCTP_ALL))) {
+ if (policy == SCTP_PR_SCTP_ALL) {
+ sprstat->sprstat_abandoned_unsent = stcb->asoc.strmout[sid].abandoned_unsent[0];
+ sprstat->sprstat_abandoned_sent = stcb->asoc.strmout[sid].abandoned_sent[0];
+ } else {
+ sprstat->sprstat_abandoned_unsent = stcb->asoc.strmout[sid].abandoned_unsent[policy];
+ sprstat->sprstat_abandoned_sent = stcb->asoc.strmout[sid].abandoned_sent[policy];
+ }
+#else
+ if ((stcb != NULL) &&
+ (sid < stcb->asoc.streamoutcnt) &&
+ (policy == SCTP_PR_SCTP_ALL)) {
+ sprstat->sprstat_abandoned_unsent = stcb->asoc.strmout[sid].abandoned_unsent[0];
+ sprstat->sprstat_abandoned_sent = stcb->asoc.strmout[sid].abandoned_sent[0];
+#endif
+ SCTP_TCB_UNLOCK(stcb);
+ *optsize = sizeof(struct sctp_prstatus);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ break;
+ }
+ case SCTP_PR_ASSOC_STATUS:
+ {
+ struct sctp_prstatus *sprstat;
+ uint16_t policy;
+
+ SCTP_CHECK_AND_CAST(sprstat, optval, struct sctp_prstatus, *optsize);
+ SCTP_FIND_STCB(inp, stcb, sprstat->sprstat_assoc_id);
+
+ policy = sprstat->sprstat_policy;
+ if ((stcb != NULL) &&
+ (policy != SCTP_PR_SCTP_NONE) &&
+ ((policy <= SCTP_PR_SCTP_MAX) ||
+ (policy == SCTP_PR_SCTP_ALL))) {
+ if (policy == SCTP_PR_SCTP_ALL) {
+ sprstat->sprstat_abandoned_unsent = stcb->asoc.abandoned_unsent[0];
+ sprstat->sprstat_abandoned_sent = stcb->asoc.abandoned_sent[0];
+ } else {
+ sprstat->sprstat_abandoned_unsent = stcb->asoc.abandoned_unsent[policy];
+ sprstat->sprstat_abandoned_sent = stcb->asoc.abandoned_sent[policy];
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ *optsize = sizeof(struct sctp_prstatus);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ break;
+ }
+ case SCTP_MAX_CWND:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ av->assoc_value = stcb->asoc.max_cwnd;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_RLOCK(inp);
+ av->assoc_value = inp->max_cwnd;
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ if (error == 0) {
+ *optsize = sizeof(struct sctp_assoc_value);
+ }
+ break;
+ }
default:
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
error = ENOPROTOOPT;
@@ -3651,8 +4075,9 @@
SCTP_CHECK_AND_CAST(av, optval, struct sctp_stream_value, optsize);
SCTP_FIND_STCB(inp, stcb, av->assoc_id);
if (stcb) {
- if (stcb->asoc.ss_functions.sctp_ss_set_value(stcb, &stcb->asoc, &stcb->asoc.strmout[av->stream_id],
- av->stream_value) < 0) {
+ if ((av->stream_id >= stcb->asoc.streamoutcnt) ||
+ (stcb->asoc.ss_functions.sctp_ss_set_value(stcb, &stcb->asoc, &stcb->asoc.strmout[av->stream_id],
+ av->stream_value) < 0)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
}
@@ -3662,14 +4087,15 @@
SCTP_INP_RLOCK(inp);
LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
SCTP_TCB_LOCK(stcb);
- stcb->asoc.ss_functions.sctp_ss_set_value(stcb,
- &stcb->asoc,
- &stcb->asoc.strmout[av->stream_id],
- av->stream_value);
+ if (av->stream_id < stcb->asoc.streamoutcnt) {
+ stcb->asoc.ss_functions.sctp_ss_set_value(stcb,
+ &stcb->asoc,
+ &stcb->asoc.strmout[av->stream_id],
+ av->stream_value);
+ }
SCTP_TCB_UNLOCK(stcb);
}
SCTP_INP_RUNLOCK(inp);
-
} else {
/*
* Can't set stream value without
@@ -3942,15 +4368,15 @@
sctp_hmaclist_t *hmaclist;
uint16_t hmacid;
uint32_t i;
- size_t found;
SCTP_CHECK_AND_CAST(shmac, optval, struct sctp_hmacalgo, optsize);
- if (optsize < sizeof(struct sctp_hmacalgo) + shmac->shmac_number_of_idents * sizeof(uint16_t)) {
+ if ((optsize < sizeof(struct sctp_hmacalgo) + shmac->shmac_number_of_idents * sizeof(uint16_t)) ||
+ (shmac->shmac_number_of_idents > 0xffff)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
break;
}
- hmaclist = sctp_alloc_hmaclist(shmac->shmac_number_of_idents);
+ hmaclist = sctp_alloc_hmaclist((uint16_t) shmac->shmac_number_of_idents);
if (hmaclist == NULL) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
error = ENOMEM;
@@ -3966,14 +4392,14 @@
goto sctp_set_hmac_done;
}
}
- found = 0;
for (i = 0; i < hmaclist->num_algo; i++) {
if (hmaclist->hmac[i] == SCTP_AUTH_HMAC_ID_SHA1) {
/* already in list */
- found = 1;
+ break;
}
}
- if (!found) {
+ if (i == hmaclist->num_algo) {
+ /* not found in list */
sctp_free_hmaclist(hmaclist);
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
@@ -4158,7 +4584,7 @@
error = ENOENT;
break;
}
- if (stcb->asoc.peer_supports_strreset == 0) {
+ if (stcb->asoc.reconfig_supported == 0) {
/*
* Peer does not support the chunk type.
*/
@@ -4167,18 +4593,30 @@
SCTP_TCB_UNLOCK(stcb);
break;
}
- if (stcb->asoc.stream_reset_outstanding) {
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
- error = EALREADY;
+ if (sizeof(struct sctp_reset_streams) +
+ strrst->srs_number_streams * sizeof(uint16_t) > optsize) {
+ error = EINVAL;
SCTP_TCB_UNLOCK(stcb);
break;
}
if (strrst->srs_flags & SCTP_STREAM_RESET_INCOMING) {
send_in = 1;
+ if (stcb->asoc.stream_reset_outstanding) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
+ error = EALREADY;
+ SCTP_TCB_UNLOCK(stcb);
+ break;
+ }
}
if (strrst->srs_flags & SCTP_STREAM_RESET_OUTGOING) {
send_out = 1;
}
+ if ((strrst->srs_number_streams > SCTP_MAX_STREAMS_AT_ONCE_RESET) && send_in) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
+ error = ENOMEM;
+ SCTP_TCB_UNLOCK(stcb);
+ break;
+ }
if ((send_in == 0) && (send_out == 0)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
@@ -4203,11 +4641,46 @@
SCTP_TCB_UNLOCK(stcb);
break;
}
- error = sctp_send_str_reset_req(stcb, strrst->srs_number_streams,
- strrst->srs_stream_list,
- send_out, send_in, 0, 0, 0, 0, 0);
+ if (send_out) {
+ int cnt;
+ uint16_t strm;
- sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_REQ, SCTP_SO_LOCKED);
+ if (strrst->srs_number_streams) {
+ for (i = 0, cnt = 0; i < strrst->srs_number_streams; i++) {
+ strm = strrst->srs_stream_list[i];
+ if (stcb->asoc.strmout[strm].state == SCTP_STREAM_OPEN) {
+ stcb->asoc.strmout[strm].state = SCTP_STREAM_RESET_PENDING;
+ cnt++;
+ }
+ }
+ } else {
+ /* Its all */
+ for (i = 0, cnt = 0; i < stcb->asoc.streamoutcnt; i++) {
+ if (stcb->asoc.strmout[i].state == SCTP_STREAM_OPEN) {
+ stcb->asoc.strmout[i].state = SCTP_STREAM_RESET_PENDING;
+ cnt++;
+ }
+ }
+ }
+ }
+ if (send_in) {
+ error = sctp_send_str_reset_req(stcb, strrst->srs_number_streams,
+ strrst->srs_stream_list,
+ send_in, 0, 0, 0, 0, 0);
+ } else {
+ error = sctp_send_stream_reset_out_if_possible(stcb, SCTP_SO_LOCKED);
+ }
+ if (error == 0) {
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_REQ, SCTP_SO_LOCKED);
+ } else {
+ /*
+ * For outgoing streams don't report any
+ * problems in sending the request to the
+ * application. XXX: Double check resetting
+ * incoming streams.
+ */
+ error = 0;
+ }
SCTP_TCB_UNLOCK(stcb);
break;
}
@@ -4225,7 +4698,7 @@
error = ENOENT;
break;
}
- if (stcb->asoc.peer_supports_strreset == 0) {
+ if (stcb->asoc.reconfig_supported == 0) {
/*
* Peer does not support the chunk type.
*/
@@ -4277,7 +4750,7 @@
goto skip_stuff;
}
}
- error = sctp_send_str_reset_req(stcb, 0, NULL, 0, 0, 0, addstream, add_o_strmcnt, add_i_strmcnt, 0);
+ error = sctp_send_str_reset_req(stcb, 0, NULL, 0, 0, addstream, add_o_strmcnt, add_i_strmcnt, 0);
sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_REQ, SCTP_SO_LOCKED);
skip_stuff:
SCTP_TCB_UNLOCK(stcb);
@@ -4285,6 +4758,7 @@
}
case SCTP_RESET_ASSOC:
{
+ int i;
uint32_t *value;
SCTP_CHECK_AND_CAST(value, optval, uint32_t, optsize);
@@ -4294,7 +4768,7 @@
error = ENOENT;
break;
}
- if (stcb->asoc.peer_supports_strreset == 0) {
+ if (stcb->asoc.reconfig_supported == 0) {
/*
* Peer does not support the chunk type.
*/
@@ -4309,7 +4783,25 @@
SCTP_TCB_UNLOCK(stcb);
break;
}
- error = sctp_send_str_reset_req(stcb, 0, NULL, 0, 0, 1, 0, 0, 0, 0);
+ /*
+ * Is there any data pending in the send or sent
+ * queues?
+ */
+ if (!TAILQ_EMPTY(&stcb->asoc.send_queue) ||
+ !TAILQ_EMPTY(&stcb->asoc.sent_queue)) {
+ busy_out:
+ error = EBUSY;
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+ SCTP_TCB_UNLOCK(stcb);
+ break;
+ }
+ /* Do any streams have data queued? */
+ for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+ if (!TAILQ_EMPTY(&stcb->asoc.strmout[i].outqueue)) {
+ goto busy_out;
+ }
+ }
+ error = sctp_send_str_reset_req(stcb, 0, NULL, 0, 1, 0, 0, 0, 0);
sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_REQ, SCTP_SO_LOCKED);
SCTP_TCB_UNLOCK(stcb);
break;
@@ -4333,7 +4825,6 @@
case SCTP_CONNECT_X_COMPLETE:
{
struct sockaddr *sa;
- struct sctp_nets *net;
/* FIXME MT: check correct? */
SCTP_CHECK_AND_CAST(sa, optval, struct sockaddr, optsize);
@@ -4344,7 +4835,6 @@
stcb = LIST_FIRST(&inp->sctp_asoc_list);
if (stcb) {
SCTP_TCB_LOCK(stcb);
- net = sctp_findnet(stcb, sa);
}
SCTP_INP_RUNLOCK(inp);
} else {
@@ -4356,7 +4846,7 @@
* TCB.. aka NULL.
*/
SCTP_INP_INCR_REF(inp);
- stcb = sctp_findassociation_ep_addr(&inp, sa, &net, NULL, NULL);
+ stcb = sctp_findassociation_ep_addr(&inp, sa, NULL, NULL, NULL);
if (stcb == NULL) {
SCTP_INP_DECR_REF(inp);
}
@@ -4372,7 +4862,7 @@
(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb,
stcb->asoc.primary_destination,
- SCTP_FROM_SCTP_USRREQ + SCTP_LOC_9);
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_8);
sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
} else {
/*
@@ -4617,6 +5107,7 @@
SCTP_CHECK_AND_CAST(adap_bits, optval, struct sctp_setadaptation, optsize);
SCTP_INP_WLOCK(inp);
inp->sctp_ep.adaptation_layer_indicator = adap_bits->ssb_adaptation_ind;
+ inp->sctp_ep.adaptation_layer_indicator_provided = 1;
SCTP_INP_WUNLOCK(inp);
break;
}
@@ -4675,13 +5166,36 @@
{
struct sctp_paddrparams *paddrp;
struct sctp_nets *net;
+ struct sockaddr *addr;
+#if defined(INET) && defined(INET6)
+ struct sockaddr_in sin_store;
+
+#endif
+
SCTP_CHECK_AND_CAST(paddrp, optval, struct sctp_paddrparams, optsize);
SCTP_FIND_STCB(inp, stcb, paddrp->spp_assoc_id);
- net = NULL;
- if (stcb) {
- net = sctp_findnet(stcb, (struct sockaddr *)&paddrp->spp_address);
+
+#if defined(INET) && defined(INET6)
+ if (paddrp->spp_address.ss_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&paddrp->spp_address;
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ in6_sin6_2_sin(&sin_store, sin6);
+ addr = (struct sockaddr *)&sin_store;
+ } else {
+ addr = (struct sockaddr *)&paddrp->spp_address;
+ }
} else {
+ addr = (struct sockaddr *)&paddrp->spp_address;
+ }
+#else
+ addr = (struct sockaddr *)&paddrp->spp_address;
+#endif
+ if (stcb != NULL) {
+ net = sctp_findnet(stcb, addr);
+ } else {
/*
* We increment here since
* sctp_findassociation_ep_addr() wil do a
@@ -4689,25 +5203,22 @@
* the locked tcb (last argument) is NOT a
* TCB.. aka NULL.
*/
+ net = NULL;
SCTP_INP_INCR_REF(inp);
- stcb = sctp_findassociation_ep_addr(&inp,
- (struct sockaddr *)&paddrp->spp_address,
+ stcb = sctp_findassociation_ep_addr(&inp, addr,
&net, NULL, NULL);
if (stcb == NULL) {
SCTP_INP_DECR_REF(inp);
}
}
- if (stcb && (net == NULL)) {
- struct sockaddr *sa;
-
- sa = (struct sockaddr *)&paddrp->spp_address;
+ if ((stcb != NULL) && (net == NULL)) {
#ifdef INET
- if (sa->sa_family == AF_INET) {
+ if (addr->sa_family == AF_INET) {
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)sa;
- if (sin->sin_addr.s_addr) {
+ sin = (struct sockaddr_in *)addr;
+ if (sin->sin_addr.s_addr != INADDR_ANY) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
SCTP_TCB_UNLOCK(stcb);
error = EINVAL;
@@ -4716,10 +5227,10 @@
} else
#endif
#ifdef INET6
- if (sa->sa_family == AF_INET6) {
+ if (addr->sa_family == AF_INET6) {
struct sockaddr_in6 *sin6;
- sin6 = (struct sockaddr_in6 *)sa;
+ sin6 = (struct sockaddr_in6 *)addr;
if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
SCTP_TCB_UNLOCK(stcb);
@@ -4748,28 +5259,15 @@
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
return (EINVAL);
}
- if (stcb) {
+ if (stcb != NULL) {
/************************TCB SPECIFIC SET ******************/
- /*
- * do we change the timer for HB, we run
- * only one?
- */
- int ovh = 0;
-
- if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
- ovh = SCTP_MED_OVERHEAD;
- } else {
- ovh = SCTP_MED_V4_OVERHEAD;
- }
-
- /* network sets ? */
- if (net) {
+ if (net != NULL) {
/************************NET SPECIFIC SET ******************/
if (paddrp->spp_flags & SPP_HB_DISABLE) {
if (!(net->dest_state & SCTP_ADDR_UNCONFIRMED) &&
!(net->dest_state & SCTP_ADDR_NOHB)) {
sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net,
- SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10);
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_9);
}
net->dest_state |= SCTP_ADDR_NOHB;
}
@@ -4793,15 +5291,27 @@
if ((paddrp->spp_flags & SPP_PMTUD_DISABLE) && (paddrp->spp_pathmtu >= SCTP_SMALLEST_PMTU)) {
if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net,
- SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10);
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_11);
}
net->dest_state |= SCTP_ADDR_NO_PMTUD;
- if (paddrp->spp_pathmtu > SCTP_DEFAULT_MINSEGMENT) {
- net->mtu = paddrp->spp_pathmtu + ovh;
- if (net->mtu < stcb->asoc.smallest_mtu) {
- sctp_pathmtu_adjustment(stcb, net->mtu);
- }
+ net->mtu = paddrp->spp_pathmtu;
+ switch (net->ro._l_addr.sa.sa_family) {
+#ifdef INET
+ case AF_INET:
+ net->mtu += SCTP_MIN_V4_OVERHEAD;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ net->mtu += SCTP_MIN_OVERHEAD;
+ break;
+#endif
+ default:
+ break;
}
+ if (net->mtu < stcb->asoc.smallest_mtu) {
+ sctp_pathmtu_adjustment(stcb, net->mtu);
+ }
}
if (paddrp->spp_flags & SPP_PMTUD_ENABLE) {
if (!SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
@@ -4819,7 +5329,9 @@
(net->error_count > net->pf_threshold)) {
net->dest_state |= SCTP_ADDR_PF;
sctp_send_hb(stcb, net, SCTP_SO_LOCKED);
- sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_TIMER + SCTP_LOC_3);
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
+ stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_12);
sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net);
}
}
@@ -4850,7 +5362,7 @@
#endif
} else {
/************************ASSOC ONLY -- NO NET SPECIFIC SET ******************/
- if (paddrp->spp_pathmaxrxt) {
+ if (paddrp->spp_pathmaxrxt != 0) {
stcb->asoc.def_net_failure = paddrp->spp_pathmaxrxt;
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
if (net->dest_state & SCTP_ADDR_PF) {
@@ -4862,7 +5374,9 @@
(net->error_count > net->pf_threshold)) {
net->dest_state |= SCTP_ADDR_PF;
sctp_send_hb(stcb, net, SCTP_SO_LOCKED);
- sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_TIMER + SCTP_LOC_3);
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
+ stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_13);
sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net);
}
}
@@ -4881,7 +5395,7 @@
}
}
if (paddrp->spp_flags & SPP_HB_ENABLE) {
- if (paddrp->spp_hbinterval) {
+ if (paddrp->spp_hbinterval != 0) {
stcb->asoc.heart_beat_delay = paddrp->spp_hbinterval;
} else if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO) {
stcb->asoc.heart_beat_delay = 0;
@@ -4888,7 +5402,7 @@
}
/* Turn back on the timer */
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
- if (paddrp->spp_hbinterval) {
+ if (paddrp->spp_hbinterval != 0) {
net->heart_beat_delay = paddrp->spp_hbinterval;
} else if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO) {
net->heart_beat_delay = 0;
@@ -4897,7 +5411,7 @@
net->dest_state &= ~SCTP_ADDR_NOHB;
}
sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net,
- SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10);
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_14);
sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net);
}
sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_DONOT_HEARTBEAT);
@@ -4907,7 +5421,9 @@
if (!(net->dest_state & SCTP_ADDR_NOHB)) {
net->dest_state |= SCTP_ADDR_NOHB;
if (!(net->dest_state & SCTP_ADDR_UNCONFIRMED)) {
- sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10);
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
+ inp, stcb, net,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_15);
}
}
}
@@ -4917,15 +5433,27 @@
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net,
- SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10);
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_16);
}
net->dest_state |= SCTP_ADDR_NO_PMTUD;
- if (paddrp->spp_pathmtu > SCTP_DEFAULT_MINSEGMENT) {
- net->mtu = paddrp->spp_pathmtu + ovh;
- if (net->mtu < stcb->asoc.smallest_mtu) {
- sctp_pathmtu_adjustment(stcb, net->mtu);
- }
+ net->mtu = paddrp->spp_pathmtu;
+ switch (net->ro._l_addr.sa.sa_family) {
+#ifdef INET
+ case AF_INET:
+ net->mtu += SCTP_MIN_V4_OVERHEAD;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ net->mtu += SCTP_MIN_OVERHEAD;
+ break;
+#endif
+ default:
+ break;
}
+ if (net->mtu < stcb->asoc.smallest_mtu) {
+ sctp_pathmtu_adjustment(stcb, net->mtu);
+ }
}
sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_DO_NOT_PMTUD);
}
@@ -4971,12 +5499,12 @@
* set it with the options on the
* socket
*/
- if (paddrp->spp_pathmaxrxt) {
+ if (paddrp->spp_pathmaxrxt != 0) {
inp->sctp_ep.def_net_failure = paddrp->spp_pathmaxrxt;
}
if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO)
inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = 0;
- else if (paddrp->spp_hbinterval) {
+ else if (paddrp->spp_hbinterval != 0) {
if (paddrp->spp_hbinterval > SCTP_MAX_HB_INTERVAL)
paddrp->spp_hbinterval = SCTP_MAX_HB_INTERVAL;
inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = MSEC_TO_TICKS(paddrp->spp_hbinterval);
@@ -5142,14 +5670,36 @@
{
struct sctp_setprim *spa;
struct sctp_nets *net;
+ struct sockaddr *addr;
+#if defined(INET) && defined(INET6)
+ struct sockaddr_in sin_store;
+
+#endif
+
SCTP_CHECK_AND_CAST(spa, optval, struct sctp_setprim, optsize);
SCTP_FIND_STCB(inp, stcb, spa->ssp_assoc_id);
- net = NULL;
- if (stcb) {
- net = sctp_findnet(stcb, (struct sockaddr *)&spa->ssp_addr);
+#if defined(INET) && defined(INET6)
+ if (spa->ssp_addr.ss_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&spa->ssp_addr;
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ in6_sin6_2_sin(&sin_store, sin6);
+ addr = (struct sockaddr *)&sin_store;
+ } else {
+ addr = (struct sockaddr *)&spa->ssp_addr;
+ }
} else {
+ addr = (struct sockaddr *)&spa->ssp_addr;
+ }
+#else
+ addr = (struct sockaddr *)&spa->ssp_addr;
+#endif
+ if (stcb != NULL) {
+ net = sctp_findnet(stcb, addr);
+ } else {
/*
* We increment here since
* sctp_findassociation_ep_addr() wil do a
@@ -5157,9 +5707,9 @@
* the locked tcb (last argument) is NOT a
* TCB.. aka NULL.
*/
+ net = NULL;
SCTP_INP_INCR_REF(inp);
- stcb = sctp_findassociation_ep_addr(&inp,
- (struct sockaddr *)&spa->ssp_addr,
+ stcb = sctp_findassociation_ep_addr(&inp, addr,
&net, NULL, NULL);
if (stcb == NULL) {
SCTP_INP_DECR_REF(inp);
@@ -5166,17 +5716,24 @@
}
}
- if ((stcb) && (net)) {
- if ((net != stcb->asoc.primary_destination) &&
- (!(net->dest_state & SCTP_ADDR_UNCONFIRMED))) {
- /* Ok we need to set it */
- if (sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net) == 0) {
- if ((stcb->asoc.alternate) &&
- (!(net->dest_state & SCTP_ADDR_PF)) &&
- (net->dest_state & SCTP_ADDR_REACHABLE)) {
- sctp_free_remote_addr(stcb->asoc.alternate);
- stcb->asoc.alternate = NULL;
+ if ((stcb != NULL) && (net != NULL)) {
+ if (net != stcb->asoc.primary_destination) {
+ if (!(net->dest_state & SCTP_ADDR_UNCONFIRMED)) {
+ /* Ok we need to set it */
+ if (sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net) == 0) {
+ if ((stcb->asoc.alternate) &&
+ (!(net->dest_state & SCTP_ADDR_PF)) &&
+ (net->dest_state & SCTP_ADDR_REACHABLE)) {
+ sctp_free_remote_addr(stcb->asoc.alternate);
+ stcb->asoc.alternate = NULL;
+ }
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
}
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
}
}
} else {
@@ -5183,7 +5740,7 @@
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
}
- if (stcb) {
+ if (stcb != NULL) {
SCTP_TCB_UNLOCK(stcb);
}
break;
@@ -5205,14 +5762,36 @@
case SCTP_SET_PEER_PRIMARY_ADDR:
{
struct sctp_setpeerprim *sspp;
+ struct sockaddr *addr;
+#if defined(INET) && defined(INET6)
+ struct sockaddr_in sin_store;
+
+#endif
+
SCTP_CHECK_AND_CAST(sspp, optval, struct sctp_setpeerprim, optsize);
SCTP_FIND_STCB(inp, stcb, sspp->sspp_assoc_id);
if (stcb != NULL) {
struct sctp_ifa *ifa;
- ifa = sctp_find_ifa_by_addr((struct sockaddr *)&sspp->sspp_addr,
- stcb->asoc.vrf_id, SCTP_ADDR_NOT_LOCKED);
+#if defined(INET) && defined(INET6)
+ if (sspp->sspp_addr.ss_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&sspp->sspp_addr;
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ in6_sin6_2_sin(&sin_store, sin6);
+ addr = (struct sockaddr *)&sin_store;
+ } else {
+ addr = (struct sockaddr *)&sspp->sspp_addr;
+ }
+ } else {
+ addr = (struct sockaddr *)&sspp->sspp_addr;
+ }
+#else
+ addr = (struct sockaddr *)&sspp->sspp_addr;
+#endif
+ ifa = sctp_find_ifa_by_addr(addr, stcb->asoc.vrf_id, SCTP_ADDR_NOT_LOCKED);
if (ifa == NULL) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
@@ -5229,7 +5808,7 @@
LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
if (laddr->ifa == NULL) {
SCTPDBG(SCTP_DEBUG_OUTPUT1, "%s: NULL ifa\n",
- __FUNCTION__);
+ __func__);
continue;
}
if (laddr->ifa == ifa) {
@@ -5242,9 +5821,45 @@
error = EINVAL;
goto out_of_it;
}
+ } else {
+ switch (addr->sa_family) {
+#ifdef INET
+ case AF_INET:
+ {
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *)addr;
+ if (prison_check_ip4(inp->ip_inp.inp.inp_cred,
+ &sin->sin_addr) != 0) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ goto out_of_it;
+ }
+ break;
+ }
+#endif
+#ifdef INET6
+ case AF_INET6:
+ {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)addr;
+ if (prison_check_ip6(inp->ip_inp.inp.inp_cred,
+ &sin6->sin6_addr) != 0) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ goto out_of_it;
+ }
+ break;
+ }
+#endif
+ default:
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ goto out_of_it;
+ }
}
- if (sctp_set_primary_ip_address_sa(stcb,
- (struct sockaddr *)&sspp->sspp_addr) != 0) {
+ if (sctp_set_primary_ip_address_sa(stcb, addr) != 0) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
}
@@ -5554,7 +6169,7 @@
SCTP_CHECK_AND_CAST(info, optval, struct sctp_default_prinfo, optsize);
SCTP_FIND_STCB(inp, stcb, info->pr_assoc_id);
- if (PR_SCTP_INVALID_POLICY(info->pr_policy)) {
+ if (info->pr_policy > SCTP_PR_SCTP_MAX) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
error = EINVAL;
break;
@@ -5595,13 +6210,36 @@
{
struct sctp_paddrthlds *thlds;
struct sctp_nets *net;
+ struct sockaddr *addr;
+#if defined(INET) && defined(INET6)
+ struct sockaddr_in sin_store;
+
+#endif
+
SCTP_CHECK_AND_CAST(thlds, optval, struct sctp_paddrthlds, optsize);
SCTP_FIND_STCB(inp, stcb, thlds->spt_assoc_id);
- net = NULL;
- if (stcb) {
- net = sctp_findnet(stcb, (struct sockaddr *)&thlds->spt_assoc_id);
+
+#if defined(INET) && defined(INET6)
+ if (thlds->spt_address.ss_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&thlds->spt_address;
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ in6_sin6_2_sin(&sin_store, sin6);
+ addr = (struct sockaddr *)&sin_store;
+ } else {
+ addr = (struct sockaddr *)&thlds->spt_address;
+ }
} else {
+ addr = (struct sockaddr *)&thlds->spt_address;
+ }
+#else
+ addr = (struct sockaddr *)&thlds->spt_address;
+#endif
+ if (stcb != NULL) {
+ net = sctp_findnet(stcb, addr);
+ } else {
/*
* We increment here since
* sctp_findassociation_ep_addr() wil do a
@@ -5609,25 +6247,22 @@
* the locked tcb (last argument) is NOT a
* TCB.. aka NULL.
*/
+ net = NULL;
SCTP_INP_INCR_REF(inp);
- stcb = sctp_findassociation_ep_addr(&inp,
- (struct sockaddr *)&thlds->spt_assoc_id,
+ stcb = sctp_findassociation_ep_addr(&inp, addr,
&net, NULL, NULL);
if (stcb == NULL) {
SCTP_INP_DECR_REF(inp);
}
}
- if (stcb && (net == NULL)) {
- struct sockaddr *sa;
-
- sa = (struct sockaddr *)&thlds->spt_assoc_id;
+ if ((stcb != NULL) && (net == NULL)) {
#ifdef INET
- if (sa->sa_family == AF_INET) {
+ if (addr->sa_family == AF_INET) {
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)sa;
- if (sin->sin_addr.s_addr) {
+ sin = (struct sockaddr_in *)addr;
+ if (sin->sin_addr.s_addr != INADDR_ANY) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
SCTP_TCB_UNLOCK(stcb);
error = EINVAL;
@@ -5636,10 +6271,10 @@
} else
#endif
#ifdef INET6
- if (sa->sa_family == AF_INET6) {
+ if (addr->sa_family == AF_INET6) {
struct sockaddr_in6 *sin6;
- sin6 = (struct sockaddr_in6 *)sa;
+ sin6 = (struct sockaddr_in6 *)addr;
if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
SCTP_TCB_UNLOCK(stcb);
@@ -5655,68 +6290,73 @@
break;
}
}
- if (stcb) {
- if (net) {
+ if (stcb != NULL) {
+ if (net != NULL) {
+ net->failure_threshold = thlds->spt_pathmaxrxt;
+ net->pf_threshold = thlds->spt_pathpfthld;
if (net->dest_state & SCTP_ADDR_PF) {
- if ((net->failure_threshold > thlds->spt_pathmaxrxt) ||
- (net->failure_threshold <= thlds->spt_pathpfthld)) {
+ if ((net->error_count > net->failure_threshold) ||
+ (net->error_count <= net->pf_threshold)) {
net->dest_state &= ~SCTP_ADDR_PF;
}
} else {
- if ((net->failure_threshold > thlds->spt_pathpfthld) &&
- (net->failure_threshold <= thlds->spt_pathmaxrxt)) {
+ if ((net->error_count > net->pf_threshold) &&
+ (net->error_count <= net->failure_threshold)) {
net->dest_state |= SCTP_ADDR_PF;
sctp_send_hb(stcb, net, SCTP_SO_LOCKED);
- sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_TIMER + SCTP_LOC_3);
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
+ stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_17);
sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net);
}
}
if (net->dest_state & SCTP_ADDR_REACHABLE) {
- if (net->failure_threshold > thlds->spt_pathmaxrxt) {
+ if (net->error_count > net->failure_threshold) {
net->dest_state &= ~SCTP_ADDR_REACHABLE;
sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, 0, net, SCTP_SO_LOCKED);
}
} else {
- if (net->failure_threshold <= thlds->spt_pathmaxrxt) {
+ if (net->error_count <= net->failure_threshold) {
net->dest_state |= SCTP_ADDR_REACHABLE;
sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 0, net, SCTP_SO_LOCKED);
}
}
- net->failure_threshold = thlds->spt_pathmaxrxt;
- net->pf_threshold = thlds->spt_pathpfthld;
} else {
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ net->failure_threshold = thlds->spt_pathmaxrxt;
+ net->pf_threshold = thlds->spt_pathpfthld;
if (net->dest_state & SCTP_ADDR_PF) {
- if ((net->failure_threshold > thlds->spt_pathmaxrxt) ||
- (net->failure_threshold <= thlds->spt_pathpfthld)) {
+ if ((net->error_count > net->failure_threshold) ||
+ (net->error_count <= net->pf_threshold)) {
net->dest_state &= ~SCTP_ADDR_PF;
}
} else {
- if ((net->failure_threshold > thlds->spt_pathpfthld) &&
- (net->failure_threshold <= thlds->spt_pathmaxrxt)) {
+ if ((net->error_count > net->pf_threshold) &&
+ (net->error_count <= net->failure_threshold)) {
net->dest_state |= SCTP_ADDR_PF;
sctp_send_hb(stcb, net, SCTP_SO_LOCKED);
- sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_TIMER + SCTP_LOC_3);
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
+ stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_18);
sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net);
}
}
if (net->dest_state & SCTP_ADDR_REACHABLE) {
- if (net->failure_threshold > thlds->spt_pathmaxrxt) {
+ if (net->error_count > net->failure_threshold) {
net->dest_state &= ~SCTP_ADDR_REACHABLE;
sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, 0, net, SCTP_SO_LOCKED);
}
} else {
- if (net->failure_threshold <= thlds->spt_pathmaxrxt) {
+ if (net->error_count <= net->failure_threshold) {
net->dest_state |= SCTP_ADDR_REACHABLE;
sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 0, net, SCTP_SO_LOCKED);
}
}
- net->failure_threshold = thlds->spt_pathmaxrxt;
- net->pf_threshold = thlds->spt_pathpfthld;
}
stcb->asoc.def_net_failure = thlds->spt_pathmaxrxt;
stcb->asoc.def_net_pf_threshold = thlds->spt_pathpfthld;
}
+ SCTP_TCB_UNLOCK(stcb);
} else {
if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
(inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
@@ -5736,12 +6376,36 @@
{
struct sctp_udpencaps *encaps;
struct sctp_nets *net;
+ struct sockaddr *addr;
+#if defined(INET) && defined(INET6)
+ struct sockaddr_in sin_store;
+
+#endif
+
SCTP_CHECK_AND_CAST(encaps, optval, struct sctp_udpencaps, optsize);
SCTP_FIND_STCB(inp, stcb, encaps->sue_assoc_id);
- if (stcb) {
- net = sctp_findnet(stcb, (struct sockaddr *)&encaps->sue_address);
+
+#if defined(INET) && defined(INET6)
+ if (encaps->sue_address.ss_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&encaps->sue_address;
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ in6_sin6_2_sin(&sin_store, sin6);
+ addr = (struct sockaddr *)&sin_store;
+ } else {
+ addr = (struct sockaddr *)&encaps->sue_address;
+ }
} else {
+ addr = (struct sockaddr *)&encaps->sue_address;
+ }
+#else
+ addr = (struct sockaddr *)&encaps->sue_address;
+#endif
+ if (stcb != NULL) {
+ net = sctp_findnet(stcb, addr);
+ } else {
/*
* We increment here since
* sctp_findassociation_ep_addr() wil do a
@@ -5751,22 +6415,19 @@
*/
net = NULL;
SCTP_INP_INCR_REF(inp);
- stcb = sctp_findassociation_ep_addr(&inp, (struct sockaddr *)&encaps->sue_address, &net, NULL, NULL);
+ stcb = sctp_findassociation_ep_addr(&inp, addr, &net, NULL, NULL);
if (stcb == NULL) {
SCTP_INP_DECR_REF(inp);
}
}
- if (stcb && (net == NULL)) {
- struct sockaddr *sa;
-
- sa = (struct sockaddr *)&encaps->sue_address;
+ if ((stcb != NULL) && (net == NULL)) {
#ifdef INET
- if (sa->sa_family == AF_INET) {
+ if (addr->sa_family == AF_INET) {
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)sa;
- if (sin->sin_addr.s_addr) {
+ sin = (struct sockaddr_in *)addr;
+ if (sin->sin_addr.s_addr != INADDR_ANY) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
SCTP_TCB_UNLOCK(stcb);
error = EINVAL;
@@ -5775,10 +6436,10 @@
} else
#endif
#ifdef INET6
- if (sa->sa_family == AF_INET6) {
+ if (addr->sa_family == AF_INET6) {
struct sockaddr_in6 *sin6;
- sin6 = (struct sockaddr_in6 *)sa;
+ sin6 = (struct sockaddr_in6 *)addr;
if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
SCTP_TCB_UNLOCK(stcb);
@@ -5794,8 +6455,8 @@
break;
}
}
- if (stcb) {
- if (net) {
+ if (stcb != NULL) {
+ if (net != NULL) {
net->port = encaps->sue_port;
} else {
stcb->asoc.port = encaps->sue_port;
@@ -5815,6 +6476,273 @@
}
break;
}
+ case SCTP_ECN_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_WLOCK(inp);
+ if (av->assoc_value == 0) {
+ inp->ecn_supported = 0;
+ } else {
+ inp->ecn_supported = 1;
+ }
+ SCTP_INP_WUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ break;
+ }
+ case SCTP_PR_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_WLOCK(inp);
+ if (av->assoc_value == 0) {
+ inp->prsctp_supported = 0;
+ } else {
+ inp->prsctp_supported = 1;
+ }
+ SCTP_INP_WUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ break;
+ }
+ case SCTP_AUTH_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ if ((av->assoc_value == 0) &&
+ (inp->asconf_supported == 1)) {
+ /*
+ * AUTH is required for
+ * ASCONF
+ */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ } else {
+ SCTP_INP_WLOCK(inp);
+ if (av->assoc_value == 0) {
+ inp->auth_supported = 0;
+ } else {
+ inp->auth_supported = 1;
+ }
+ SCTP_INP_WUNLOCK(inp);
+ }
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ break;
+ }
+ case SCTP_ASCONF_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ if ((av->assoc_value != 0) &&
+ (inp->auth_supported == 0)) {
+ /*
+ * AUTH is required for
+ * ASCONF
+ */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ } else {
+ SCTP_INP_WLOCK(inp);
+ if (av->assoc_value == 0) {
+ inp->asconf_supported = 0;
+ sctp_auth_delete_chunk(SCTP_ASCONF,
+ inp->sctp_ep.local_auth_chunks);
+ sctp_auth_delete_chunk(SCTP_ASCONF_ACK,
+ inp->sctp_ep.local_auth_chunks);
+ } else {
+ inp->asconf_supported = 1;
+ sctp_auth_add_chunk(SCTP_ASCONF,
+ inp->sctp_ep.local_auth_chunks);
+ sctp_auth_add_chunk(SCTP_ASCONF_ACK,
+ inp->sctp_ep.local_auth_chunks);
+ }
+ SCTP_INP_WUNLOCK(inp);
+ }
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ break;
+ }
+ case SCTP_RECONFIG_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_WLOCK(inp);
+ if (av->assoc_value == 0) {
+ inp->reconfig_supported = 0;
+ } else {
+ inp->reconfig_supported = 1;
+ }
+ SCTP_INP_WUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ break;
+ }
+ case SCTP_NRSACK_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_WLOCK(inp);
+ if (av->assoc_value == 0) {
+ inp->nrsack_supported = 0;
+ } else {
+ inp->nrsack_supported = 1;
+ }
+ SCTP_INP_WUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ break;
+ }
+ case SCTP_PKTDROP_SUPPORTED:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_WLOCK(inp);
+ if (av->assoc_value == 0) {
+ inp->pktdrop_supported = 0;
+ } else {
+ inp->pktdrop_supported = 1;
+ }
+ SCTP_INP_WUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ break;
+ }
+ case SCTP_MAX_CWND:
+ {
+ struct sctp_assoc_value *av;
+ struct sctp_nets *net;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ stcb->asoc.max_cwnd = av->assoc_value;
+ if (stcb->asoc.max_cwnd > 0) {
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ if ((net->cwnd > stcb->asoc.max_cwnd) &&
+ (net->cwnd > (net->mtu - sizeof(struct sctphdr)))) {
+ net->cwnd = stcb->asoc.max_cwnd;
+ if (net->cwnd < (net->mtu - sizeof(struct sctphdr))) {
+ net->cwnd = net->mtu - sizeof(struct sctphdr);
+ }
+ }
+ }
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ (av->assoc_id == SCTP_FUTURE_ASSOC)) {
+ SCTP_INP_WLOCK(inp);
+ inp->max_cwnd = av->assoc_value;
+ SCTP_INP_WUNLOCK(inp);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ break;
+ }
default:
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
error = ENOPROTOOPT;
@@ -5830,7 +6758,20 @@
size_t optsize = 0;
void *p;
int error = 0;
+ struct sctp_inpcb *inp;
+ if ((sopt->sopt_level == SOL_SOCKET) &&
+ (sopt->sopt_name == SO_SETFIB)) {
+ inp = (struct sctp_inpcb *)so->so_pcb;
+ if (inp == NULL) {
+ SCTP_LTRACE_ERR_RET(so->so_pcb, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOBUFS);
+ return (EINVAL);
+ }
+ SCTP_INP_WLOCK(inp);
+ inp->fibnum = so->so_fibnum;
+ SCTP_INP_WUNLOCK(inp);
+ return (0);
+ }
if (sopt->sopt_level != IPPROTO_SCTP) {
/* wrong proto level... send back up to IP */
#ifdef INET6
@@ -5837,7 +6778,7 @@
if (INP_CHECK_SOCKAF(so, AF_INET6))
error = ip6_ctloutput(so, sopt);
#endif /* INET6 */
-#if defined(INET) && defined (INET6)
+#if defined(INET) && defined(INET6)
else
#endif
#ifdef INET
@@ -5955,7 +6896,7 @@
error = EINVAL;
goto out_now;
}
-#endif /* INET6 */
+#endif
if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) ==
SCTP_PCB_FLAGS_UNBOUND) {
/* Bind a ephemeral port */
@@ -6004,7 +6945,7 @@
}
vrf_id = inp->def_vrf_id;
/* We are GOOD to go */
- stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id, p);
+ stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id, inp->sctp_ep.pre_open_stream_count, p);
if (stcb == NULL) {
/* Gak! no memory */
goto out_now;
@@ -6056,9 +6997,8 @@
if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE)) {
/* See if we have a listener */
struct sctp_inpcb *tinp;
- union sctp_sockstore store, *sp;
+ union sctp_sockstore store;
- sp = &store;
if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) {
/* not bound all */
struct sctp_laddr *laddr;
@@ -6065,21 +7005,21 @@
LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
memcpy(&store, &laddr->ifa->address, sizeof(store));
- switch (sp->sa.sa_family) {
+ switch (store.sa.sa_family) {
#ifdef INET
case AF_INET:
- sp->sin.sin_port = inp->sctp_lport;
+ store.sin.sin_port = inp->sctp_lport;
break;
#endif
#ifdef INET6
case AF_INET6:
- sp->sin6.sin6_port = inp->sctp_lport;
+ store.sin6.sin6_port = inp->sctp_lport;
break;
#endif
default:
break;
}
- tinp = sctp_pcb_findep(&sp->sa, 0, 0, inp->def_vrf_id);
+ tinp = sctp_pcb_findep(&store.sa, 0, 0, inp->def_vrf_id);
if (tinp && (tinp != inp) &&
((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) == 0) &&
((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) &&
@@ -6097,8 +7037,20 @@
} else {
/* Setup a local addr bound all */
memset(&store, 0, sizeof(store));
- switch (sp->sa.sa_family) {
+#ifdef INET6
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+ store.sa.sa_family = AF_INET6;
+ store.sa.sa_len = sizeof(struct sockaddr_in6);
+ }
+#endif
#ifdef INET
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) {
+ store.sa.sa_family = AF_INET;
+ store.sa.sa_len = sizeof(struct sockaddr_in);
+ }
+#endif
+ switch (store.sa.sa_family) {
+#ifdef INET
case AF_INET:
store.sin.sin_port = inp->sctp_lport;
break;
@@ -6105,25 +7057,13 @@
#endif
#ifdef INET6
case AF_INET6:
- sp->sin6.sin6_port = inp->sctp_lport;
+ store.sin6.sin6_port = inp->sctp_lport;
break;
#endif
default:
break;
}
-#ifdef INET6
- if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
- store.sa.sa_family = AF_INET6;
- store.sa.sa_len = sizeof(struct sockaddr_in6);
- }
-#endif
-#ifdef INET
- if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) {
- store.sa.sa_family = AF_INET;
- store.sa.sa_len = sizeof(struct sockaddr_in);
- }
-#endif
- tinp = sctp_pcb_findep(&sp->sa, 0, 0, inp->def_vrf_id);
+ tinp = sctp_pcb_findep(&store.sa, 0, 0, inp->def_vrf_id);
if (tinp && (tinp != inp) &&
((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) == 0) &&
((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) &&
@@ -6135,7 +7075,7 @@
SCTP_INP_DECR_REF(tinp);
return (EADDRINUSE);
} else if (tinp) {
- SCTP_INP_DECR_REF(inp);
+ SCTP_INP_DECR_REF(tinp);
}
}
}
@@ -6147,8 +7087,8 @@
#endif
SOCK_LOCK(so);
error = solisten_proto_check(so);
+ SOCK_UNLOCK(so);
if (error) {
- SOCK_UNLOCK(so);
SCTP_INP_RUNLOCK(inp);
return (error);
}
@@ -6161,15 +7101,15 @@
* move the guy that was listener to the TCP Pool.
*/
if (sctp_swap_inpcb_for_listen(inp)) {
- goto in_use;
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EADDRINUSE);
+ return (EADDRINUSE);
}
}
if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
(inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED)) {
/* We are already connected AND the TCP model */
-in_use:
SCTP_INP_RUNLOCK(inp);
- SOCK_UNLOCK(so);
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EADDRINUSE);
return (EADDRINUSE);
}
@@ -6176,13 +7116,12 @@
SCTP_INP_RUNLOCK(inp);
if (inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) {
/* We must do a bind. */
- SOCK_UNLOCK(so);
if ((error = sctp_inpcb_bind(so, NULL, NULL, p))) {
/* bind error, probably perm */
return (error);
}
- SOCK_LOCK(so);
}
+ SOCK_LOCK(so);
/* It appears for 7.0 and on, we must always call this. */
solisten_proto(so, backlog);
if (inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) {
@@ -6249,8 +7188,8 @@
return (ENOMEM);
sin->sin_family = AF_INET;
sin->sin_len = sizeof(*sin);
- sin->sin_port = ((struct sockaddr_in *)&store)->sin_port;
- sin->sin_addr = ((struct sockaddr_in *)&store)->sin_addr;
+ sin->sin_port = store.sin.sin_port;
+ sin->sin_addr = store.sin.sin_addr;
*addr = (struct sockaddr *)sin;
break;
}
@@ -6265,9 +7204,8 @@
return (ENOMEM);
sin6->sin6_family = AF_INET6;
sin6->sin6_len = sizeof(*sin6);
- sin6->sin6_port = ((struct sockaddr_in6 *)&store)->sin6_port;
-
- sin6->sin6_addr = ((struct sockaddr_in6 *)&store)->sin6_addr;
+ sin6->sin6_port = store.sin6.sin6_port;
+ sin6->sin6_addr = store.sin6.sin6_addr;
if ((error = sa6_recoverscope(sin6)) != 0) {
SCTP_FREE_SONAME(sin6);
return (error);
@@ -6311,7 +7249,8 @@
}
if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
SCTP_TCB_LOCK(stcb);
- sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_7);
+ sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_19);
}
return (0);
}
@@ -6396,7 +7335,7 @@
if (laddr->ifa->address.sa.sa_family == AF_INET) {
struct sockaddr_in *sin_a;
- sin_a = (struct sockaddr_in *)&laddr->ifa->address.sa;
+ sin_a = &laddr->ifa->address.sin;
sin->sin_addr = sin_a->sin_addr;
fnd = 1;
break;
Modified: trunk/sys/netinet/sctp_var.h
===================================================================
--- trunk/sys/netinet/sctp_var.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctp_var.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctp_var.h 244524 2012-12-21 00:41:52Z delphij $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctp_var.h 295208 2016-02-03 14:04:07Z tuexen $");
#ifndef _NETINET_SCTP_VAR_H_
#define _NETINET_SCTP_VAR_H_
@@ -73,7 +73,7 @@
((stcb->asoc.sctp_features & feature) == 0)) || \
((stcb == NULL) && (inp != NULL) && \
((inp->sctp_features & feature) == 0)) || \
- ((stcb == NULL) && (inp == NULL)))
+ ((stcb == NULL) && (inp == NULL)))
/* managing mobility_feature in inpcb (by micchie) */
#define sctp_mobility_feature_on(inp, feature) (inp->sctp_mobility_features |= feature)
@@ -87,7 +87,7 @@
#define sctp_sbspace_failedmsgs(sb) ((long) ((sctp_maxspace(sb) > (sb)->sb_cc) ? (sctp_maxspace(sb) - (sb)->sb_cc) : 0))
-#define sctp_sbspace_sub(a,b) ((a > b) ? (a - b) : 0)
+#define sctp_sbspace_sub(a,b) (((a) > (b)) ? ((a) - (b)) : 0)
/*
* I tried to cache the readq entries at one point. But the reality
@@ -107,7 +107,7 @@
#define sctp_alloc_a_readq(_stcb, _readq) { \
(_readq) = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_readq), struct sctp_queued_to_read); \
if ((_readq)) { \
- SCTP_INCR_READQ_COUNT(); \
+ SCTP_INCR_READQ_COUNT(); \
} \
}
@@ -122,11 +122,11 @@
#define sctp_alloc_a_strmoq(_stcb, _strmoq) { \
(_strmoq) = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_strmoq), struct sctp_stream_queue_pending); \
- if ((_strmoq)) { \
+ if ((_strmoq)) { \
memset(_strmoq, 0, sizeof(struct sctp_stream_queue_pending)); \
SCTP_INCR_STRMOQ_COUNT(); \
(_strmoq)->holds_key_ref = 0; \
- } \
+ } \
}
#define sctp_free_a_chunk(_stcb, _chk, _so_locked) { \
@@ -134,22 +134,22 @@
sctp_auth_key_release((_stcb), (_chk)->auth_keyid, _so_locked); \
(_chk)->holds_key_ref = 0; \
} \
- if (_stcb) { \
- SCTP_TCB_LOCK_ASSERT((_stcb)); \
- if ((_chk)->whoTo) { \
- sctp_free_remote_addr((_chk)->whoTo); \
- (_chk)->whoTo = NULL; \
- } \
- if (((_stcb)->asoc.free_chunk_cnt > SCTP_BASE_SYSCTL(sctp_asoc_free_resc_limit)) || \
- (SCTP_BASE_INFO(ipi_free_chunks) > SCTP_BASE_SYSCTL(sctp_system_free_resc_limit))) { \
- SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), (_chk)); \
- SCTP_DECR_CHK_COUNT(); \
- } else { \
- TAILQ_INSERT_TAIL(&(_stcb)->asoc.free_chunks, (_chk), sctp_next); \
- (_stcb)->asoc.free_chunk_cnt++; \
- atomic_add_int(&SCTP_BASE_INFO(ipi_free_chunks), 1); \
- } \
- } else { \
+ if (_stcb) { \
+ SCTP_TCB_LOCK_ASSERT((_stcb)); \
+ if ((_chk)->whoTo) { \
+ sctp_free_remote_addr((_chk)->whoTo); \
+ (_chk)->whoTo = NULL; \
+ } \
+ if (((_stcb)->asoc.free_chunk_cnt > SCTP_BASE_SYSCTL(sctp_asoc_free_resc_limit)) || \
+ (SCTP_BASE_INFO(ipi_free_chunks) > SCTP_BASE_SYSCTL(sctp_system_free_resc_limit))) { \
+ SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), (_chk)); \
+ SCTP_DECR_CHK_COUNT(); \
+ } else { \
+ TAILQ_INSERT_TAIL(&(_stcb)->asoc.free_chunks, (_chk), sctp_next); \
+ (_stcb)->asoc.free_chunk_cnt++; \
+ atomic_add_int(&SCTP_BASE_INFO(ipi_free_chunks), 1); \
+ } \
+ } else { \
SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), (_chk)); \
SCTP_DECR_CHK_COUNT(); \
} \
@@ -160,7 +160,7 @@
(_chk) = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_chunk), struct sctp_tmit_chunk); \
if ((_chk)) { \
SCTP_INCR_CHK_COUNT(); \
- (_chk)->whoTo = NULL; \
+ (_chk)->whoTo = NULL; \
(_chk)->holds_key_ref = 0; \
} \
} else { \
@@ -168,7 +168,7 @@
TAILQ_REMOVE(&(_stcb)->asoc.free_chunks, (_chk), sctp_next); \
atomic_subtract_int(&SCTP_BASE_INFO(ipi_free_chunks), 1); \
(_chk)->holds_key_ref = 0; \
- SCTP_STAT_INCR(sctps_cached_chk); \
+ SCTP_STAT_INCR(sctps_cached_chk); \
(_stcb)->asoc.free_chunk_cnt--; \
} \
}
@@ -179,15 +179,16 @@
if (SCTP_DECREMENT_AND_CHECK_REFCOUNT(&(__net)->ref_count)) { \
(void)SCTP_OS_TIMER_STOP(&(__net)->rxt_timer.timer); \
(void)SCTP_OS_TIMER_STOP(&(__net)->pmtu_timer.timer); \
- if ((__net)->ro.ro_rt) { \
+ (void)SCTP_OS_TIMER_STOP(&(__net)->hb_timer.timer); \
+ if ((__net)->ro.ro_rt) { \
RTFREE((__net)->ro.ro_rt); \
(__net)->ro.ro_rt = NULL; \
- } \
+ } \
if ((__net)->src_addr_selected) { \
sctp_free_ifa((__net)->ro._s_addr); \
(__net)->ro._s_addr = NULL; \
} \
- (__net)->src_addr_selected = 0; \
+ (__net)->src_addr_selected = 0; \
(__net)->dest_state &= ~SCTP_ADDR_REACHABLE; \
SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_net), (__net)); \
SCTP_DECR_RADDR_COUNT(); \
@@ -251,12 +252,12 @@
} while (0)
#define sctp_flight_size_increase(tp1) do { \
- (tp1)->whoTo->flight_size += (tp1)->book_size; \
+ (tp1)->whoTo->flight_size += (tp1)->book_size; \
} while (0)
#ifdef SCTP_FS_SPEC_LOG
#define sctp_total_flight_decrease(stcb, tp1) do { \
- if (stcb->asoc.fs_index > SCTP_FS_SPEC_LOG_SIZE) \
+ if (stcb->asoc.fs_index > SCTP_FS_SPEC_LOG_SIZE) \
stcb->asoc.fs_index = 0;\
stcb->asoc.fslog[stcb->asoc.fs_index].total_flight = stcb->asoc.total_flight; \
stcb->asoc.fslog[stcb->asoc.fs_index].tsn = tp1->rec.data.TSN_seq; \
@@ -265,7 +266,7 @@
stcb->asoc.fslog[stcb->asoc.fs_index].incr = 0; \
stcb->asoc.fslog[stcb->asoc.fs_index].decr = 1; \
stcb->asoc.fs_index++; \
- tp1->window_probe = 0; \
+ tp1->window_probe = 0; \
if (stcb->asoc.total_flight >= tp1->book_size) { \
stcb->asoc.total_flight -= tp1->book_size; \
if (stcb->asoc.total_flight_count > 0) \
@@ -277,7 +278,7 @@
} while (0)
#define sctp_total_flight_increase(stcb, tp1) do { \
- if (stcb->asoc.fs_index > SCTP_FS_SPEC_LOG_SIZE) \
+ if (stcb->asoc.fs_index > SCTP_FS_SPEC_LOG_SIZE) \
stcb->asoc.fs_index = 0;\
stcb->asoc.fslog[stcb->asoc.fs_index].total_flight = stcb->asoc.total_flight; \
stcb->asoc.fslog[stcb->asoc.fs_index].tsn = tp1->rec.data.TSN_seq; \
@@ -286,14 +287,14 @@
stcb->asoc.fslog[stcb->asoc.fs_index].incr = 1; \
stcb->asoc.fslog[stcb->asoc.fs_index].decr = 0; \
stcb->asoc.fs_index++; \
- (stcb)->asoc.total_flight_count++; \
- (stcb)->asoc.total_flight += (tp1)->book_size; \
+ (stcb)->asoc.total_flight_count++; \
+ (stcb)->asoc.total_flight += (tp1)->book_size; \
} while (0)
#else
#define sctp_total_flight_decrease(stcb, tp1) do { \
- tp1->window_probe = 0; \
+ tp1->window_probe = 0; \
if (stcb->asoc.total_flight >= tp1->book_size) { \
stcb->asoc.total_flight -= tp1->book_size; \
if (stcb->asoc.total_flight_count > 0) \
@@ -305,8 +306,8 @@
} while (0)
#define sctp_total_flight_increase(stcb, tp1) do { \
- (stcb)->asoc.total_flight_count++; \
- (stcb)->asoc.total_flight += (tp1)->book_size; \
+ (stcb)->asoc.total_flight_count++; \
+ (stcb)->asoc.total_flight += (tp1)->book_size; \
} while (0)
#endif
@@ -322,15 +323,11 @@
void sctp_close(struct socket *so);
int sctp_disconnect(struct socket *so);
-
void sctp_ctlinput(int, struct sockaddr *, void *);
int sctp_ctloutput(struct socket *, struct sockopt *);
#ifdef INET
void sctp_input_with_port(struct mbuf *, int, uint16_t);
-
-#endif
-#ifdef INET
void sctp_input(struct mbuf *, int);
#endif
@@ -337,34 +334,24 @@
void sctp_pathmtu_adjustment(struct sctp_tcb *, uint16_t);
void sctp_drain(void);
void sctp_init(void);
-
void sctp_finish(void);
-
int sctp_flush(struct socket *, int);
int sctp_shutdown(struct socket *);
-void sctp_notify
-(struct sctp_inpcb *, struct ip *ip, struct sctphdr *,
+void
+sctp_notify(struct sctp_inpcb *, struct ip *ip, struct sctphdr *,
struct sockaddr *, struct sctp_tcb *,
struct sctp_nets *);
+int
+sctp_bindx(struct socket *, int, struct sockaddr_storage *,
+ int, int, struct proc *);
- int sctp_bindx(struct socket *, int, struct sockaddr_storage *,
- int, int, struct proc *);
-
/* can't use sctp_assoc_t here */
- int sctp_peeloff(struct socket *, struct socket *, int, caddr_t, int *);
+int sctp_peeloff(struct socket *, struct socket *, int, caddr_t, int *);
+int sctp_ingetaddr(struct socket *, struct sockaddr **);
+int sctp_peeraddr(struct socket *, struct sockaddr **);
+int sctp_listen(struct socket *, int, struct thread *);
+int sctp_accept(struct socket *, struct sockaddr **);
- int sctp_ingetaddr(struct socket *,
- struct sockaddr **
-);
-
- int sctp_peeraddr(struct socket *,
- struct sockaddr **
-);
-
- int sctp_listen(struct socket *, int, struct thread *);
-
- int sctp_accept(struct socket *, struct sockaddr **);
-
#endif /* _KERNEL */
#endif /* !_NETINET_SCTP_VAR_H_ */
Modified: trunk/sys/netinet/sctputil.c
===================================================================
--- trunk/sys/netinet/sctputil.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctputil.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctputil.c 242836 2012-11-09 19:31:31Z mjg $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctputil.c 296052 2016-02-25 18:46:06Z tuexen $");
#include <netinet/sctp_os.h>
#include <netinet/sctp_pcb.h>
@@ -40,6 +40,7 @@
#include <netinet/sctp_var.h>
#include <netinet/sctp_sysctl.h>
#ifdef INET6
+#include <netinet6/sctp6_var.h>
#endif
#include <netinet/sctp_header.h>
#include <netinet/sctp_output.h>
@@ -49,6 +50,9 @@
#include <netinet/sctp_auth.h>
#include <netinet/sctp_asconf.h>
#include <netinet/sctp_bsd_addr.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+#include <sys/proc.h>
#ifndef KTR_SCTP
@@ -59,8 +63,7 @@
extern struct sctp_ss_functions sctp_ss_functions[];
void
-sctp_sblog(struct sockbuf *sb,
- struct sctp_tcb *stcb, int from, int incr)
+sctp_sblog(struct sockbuf *sb, struct sctp_tcb *stcb, int from, int incr)
{
struct sctp_cwnd_log sctp_clog;
@@ -104,7 +107,6 @@
sctp_clog.x.misc.log4);
}
-
void
rto_logging(struct sctp_nets *net, int from)
{
@@ -199,8 +201,7 @@
}
void
-sctp_log_fr(uint32_t biggest_tsn, uint32_t biggest_new_tsn, uint32_t tsn,
- int from)
+sctp_log_fr(uint32_t biggest_tsn, uint32_t biggest_new_tsn, uint32_t tsn, int from)
{
struct sctp_cwnd_log sctp_clog;
@@ -217,6 +218,7 @@
sctp_clog.x.misc.log4);
}
+#ifdef SCTP_MBUF_LOGGING
void
sctp_log_mb(struct mbuf *m, int from)
{
@@ -243,9 +245,20 @@
}
void
-sctp_log_strm_del(struct sctp_queued_to_read *control, struct sctp_queued_to_read *poschk,
- int from)
+sctp_log_mbc(struct mbuf *m, int from)
{
+ struct mbuf *mat;
+
+ for (mat = m; mat; mat = SCTP_BUF_NEXT(mat)) {
+ sctp_log_mb(mat, from);
+ }
+}
+
+#endif
+
+void
+sctp_log_strm_del(struct sctp_queued_to_read *control, struct sctp_queued_to_read *poschk, int from)
+{
struct sctp_cwnd_log sctp_clog;
if (control == NULL) {
@@ -414,7 +427,8 @@
sctp_clog.x.misc.log4);
}
-void
+#ifdef SCTP_MBCNT_LOGGING
+static void
sctp_log_mbcnt(uint8_t from, uint32_t total_oq, uint32_t book, uint32_t total_mbcnt_q, uint32_t mbcnt)
{
struct sctp_cwnd_log sctp_clog;
@@ -432,6 +446,8 @@
sctp_clog.x.misc.log4);
}
+#endif
+
void
sctp_misc_ints(uint8_t from, uint32_t a, uint32_t b, uint32_t c, uint32_t d)
{
@@ -687,7 +703,7 @@
}
if (lnet->flight_size != tot_out) {
SCTP_PRINTF("net:%p flight was %d corrected to %d\n",
- lnet, lnet->flight_size,
+ (void *)lnet, lnet->flight_size,
tot_out);
lnet->flight_size = tot_out;
}
@@ -878,9 +894,52 @@
return (x);
}
+int32_t
+sctp_map_assoc_state(int kernel_state)
+{
+ int32_t user_state;
+
+ if (kernel_state & SCTP_STATE_WAS_ABORTED) {
+ user_state = SCTP_CLOSED;
+ } else if (kernel_state & SCTP_STATE_SHUTDOWN_PENDING) {
+ user_state = SCTP_SHUTDOWN_PENDING;
+ } else {
+ switch (kernel_state & SCTP_STATE_MASK) {
+ case SCTP_STATE_EMPTY:
+ user_state = SCTP_CLOSED;
+ break;
+ case SCTP_STATE_INUSE:
+ user_state = SCTP_CLOSED;
+ break;
+ case SCTP_STATE_COOKIE_WAIT:
+ user_state = SCTP_COOKIE_WAIT;
+ break;
+ case SCTP_STATE_COOKIE_ECHOED:
+ user_state = SCTP_COOKIE_ECHOED;
+ break;
+ case SCTP_STATE_OPEN:
+ user_state = SCTP_ESTABLISHED;
+ break;
+ case SCTP_STATE_SHUTDOWN_SENT:
+ user_state = SCTP_SHUTDOWN_SENT;
+ break;
+ case SCTP_STATE_SHUTDOWN_RECEIVED:
+ user_state = SCTP_SHUTDOWN_RECEIVED;
+ break;
+ case SCTP_STATE_SHUTDOWN_ACK_SENT:
+ user_state = SCTP_SHUTDOWN_ACK_SENT;
+ break;
+ default:
+ user_state = SCTP_CLOSED;
+ break;
+ }
+ }
+ return (user_state);
+}
+
int
-sctp_init_asoc(struct sctp_inpcb *m, struct sctp_tcb *stcb,
- uint32_t override_tag, uint32_t vrf_id)
+sctp_init_asoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+ uint32_t override_tag, uint32_t vrf_id, uint16_t o_strms)
{
struct sctp_association *asoc;
@@ -897,26 +956,37 @@
*/
int i;
+#if defined(SCTP_DETAILED_STR_STATS)
+ int j;
+
+#endif
+
asoc = &stcb->asoc;
/* init all variables to a known value. */
SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_INUSE);
- asoc->max_burst = m->sctp_ep.max_burst;
- asoc->fr_max_burst = m->sctp_ep.fr_max_burst;
- asoc->heart_beat_delay = TICKS_TO_MSEC(m->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT]);
- asoc->cookie_life = m->sctp_ep.def_cookie_life;
- asoc->sctp_cmt_on_off = m->sctp_cmt_on_off;
- asoc->ecn_allowed = m->sctp_ecn_enable;
- asoc->sctp_nr_sack_on_off = (uint8_t) SCTP_BASE_SYSCTL(sctp_nr_sack_on_off);
+ asoc->max_burst = inp->sctp_ep.max_burst;
+ asoc->fr_max_burst = inp->sctp_ep.fr_max_burst;
+ asoc->heart_beat_delay = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT]);
+ asoc->cookie_life = inp->sctp_ep.def_cookie_life;
+ asoc->sctp_cmt_on_off = inp->sctp_cmt_on_off;
+ asoc->ecn_supported = inp->ecn_supported;
+ asoc->prsctp_supported = inp->prsctp_supported;
+ asoc->auth_supported = inp->auth_supported;
+ asoc->asconf_supported = inp->asconf_supported;
+ asoc->reconfig_supported = inp->reconfig_supported;
+ asoc->nrsack_supported = inp->nrsack_supported;
+ asoc->pktdrop_supported = inp->pktdrop_supported;
asoc->sctp_cmt_pf = (uint8_t) 0;
- asoc->sctp_frag_point = m->sctp_frag_point;
- asoc->sctp_features = m->sctp_features;
- asoc->default_dscp = m->sctp_ep.default_dscp;
+ asoc->sctp_frag_point = inp->sctp_frag_point;
+ asoc->sctp_features = inp->sctp_features;
+ asoc->default_dscp = inp->sctp_ep.default_dscp;
+ asoc->max_cwnd = inp->max_cwnd;
#ifdef INET6
- if (m->sctp_ep.default_flowlabel) {
- asoc->default_flowlabel = m->sctp_ep.default_flowlabel;
+ if (inp->sctp_ep.default_flowlabel) {
+ asoc->default_flowlabel = inp->sctp_ep.default_flowlabel;
} else {
- if (m->ip_inp.inp.inp_flags & IN6P_AUTOFLOWLABEL) {
- asoc->default_flowlabel = sctp_select_initial_TSN(&m->sctp_ep);
+ if (inp->ip_inp.inp.inp_flags & IN6P_AUTOFLOWLABEL) {
+ asoc->default_flowlabel = sctp_select_initial_TSN(&inp->sctp_ep);
asoc->default_flowlabel &= 0x000fffff;
asoc->default_flowlabel |= 0x80000000;
} else {
@@ -928,11 +998,11 @@
if (override_tag) {
asoc->my_vtag = override_tag;
} else {
- asoc->my_vtag = sctp_select_a_tag(m, stcb->sctp_ep->sctp_lport, stcb->rport, 1);
+ asoc->my_vtag = sctp_select_a_tag(inp, stcb->sctp_ep->sctp_lport, stcb->rport, 1);
}
/* Get the nonce tags */
- asoc->my_vtag_nonce = sctp_select_a_tag(m, stcb->sctp_ep->sctp_lport, stcb->rport, 0);
- asoc->peer_vtag_nonce = sctp_select_a_tag(m, stcb->sctp_ep->sctp_lport, stcb->rport, 0);
+ asoc->my_vtag_nonce = sctp_select_a_tag(inp, stcb->sctp_ep->sctp_lport, stcb->rport, 0);
+ asoc->peer_vtag_nonce = sctp_select_a_tag(inp, stcb->sctp_ep->sctp_lport, stcb->rport, 0);
asoc->vrf_id = vrf_id;
#ifdef SCTP_ASOCLOG_OF_TSNS
@@ -949,10 +1019,9 @@
asoc->refcnt = 0;
asoc->assoc_up_sent = 0;
asoc->asconf_seq_out = asoc->str_reset_seq_out = asoc->init_seq_number = asoc->sending_seq =
- sctp_select_initial_TSN(&m->sctp_ep);
+ sctp_select_initial_TSN(&inp->sctp_ep);
asoc->asconf_seq_out_acked = asoc->asconf_seq_out - 1;
/* we are optimisitic here */
- asoc->peer_supports_pktdrop = 1;
asoc->peer_supports_nat = 0;
asoc->sent_queue_retran_cnt = 0;
@@ -967,49 +1036,42 @@
/* here we are different, we hold the next one we expect */
asoc->str_reset_seq_in = asoc->last_acked_seq + 1;
- asoc->initial_init_rto_max = m->sctp_ep.initial_init_rto_max;
- asoc->initial_rto = m->sctp_ep.initial_rto;
+ asoc->initial_init_rto_max = inp->sctp_ep.initial_init_rto_max;
+ asoc->initial_rto = inp->sctp_ep.initial_rto;
- asoc->max_init_times = m->sctp_ep.max_init_times;
- asoc->max_send_times = m->sctp_ep.max_send_times;
- asoc->def_net_failure = m->sctp_ep.def_net_failure;
- asoc->def_net_pf_threshold = m->sctp_ep.def_net_pf_threshold;
+ asoc->max_init_times = inp->sctp_ep.max_init_times;
+ asoc->max_send_times = inp->sctp_ep.max_send_times;
+ asoc->def_net_failure = inp->sctp_ep.def_net_failure;
+ asoc->def_net_pf_threshold = inp->sctp_ep.def_net_pf_threshold;
asoc->free_chunk_cnt = 0;
asoc->iam_blocking = 0;
- asoc->context = m->sctp_context;
- asoc->local_strreset_support = m->local_strreset_support;
- asoc->def_send = m->def_send;
- asoc->delayed_ack = TICKS_TO_MSEC(m->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV]);
- asoc->sack_freq = m->sctp_ep.sctp_sack_freq;
+ asoc->context = inp->sctp_context;
+ asoc->local_strreset_support = inp->local_strreset_support;
+ asoc->def_send = inp->def_send;
+ asoc->delayed_ack = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV]);
+ asoc->sack_freq = inp->sctp_ep.sctp_sack_freq;
asoc->pr_sctp_cnt = 0;
asoc->total_output_queue_size = 0;
- if (m->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
- struct in6pcb *inp6;
-
- /* Its a V6 socket */
- inp6 = (struct in6pcb *)m;
- asoc->ipv6_addr_legal = 1;
- /* Now look at the binding flag to see if V4 will be legal */
- if (SCTP_IPV6_V6ONLY(inp6) == 0) {
- asoc->ipv4_addr_legal = 1;
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+ asoc->scope.ipv6_addr_legal = 1;
+ if (SCTP_IPV6_V6ONLY(inp) == 0) {
+ asoc->scope.ipv4_addr_legal = 1;
} else {
- /* V4 addresses are NOT legal on the association */
- asoc->ipv4_addr_legal = 0;
+ asoc->scope.ipv4_addr_legal = 0;
}
} else {
- /* Its a V4 socket, no - V6 */
- asoc->ipv4_addr_legal = 1;
- asoc->ipv6_addr_legal = 0;
+ asoc->scope.ipv6_addr_legal = 0;
+ asoc->scope.ipv4_addr_legal = 1;
}
- asoc->my_rwnd = max(SCTP_SB_LIMIT_RCV(m->sctp_socket), SCTP_MINIMAL_RWND);
- asoc->peers_rwnd = SCTP_SB_LIMIT_RCV(m->sctp_socket);
+ asoc->my_rwnd = max(SCTP_SB_LIMIT_RCV(inp->sctp_socket), SCTP_MINIMAL_RWND);
+ asoc->peers_rwnd = SCTP_SB_LIMIT_RCV(inp->sctp_socket);
- asoc->smallest_mtu = m->sctp_frag_point;
- asoc->minrto = m->sctp_ep.sctp_minrto;
- asoc->maxrto = m->sctp_ep.sctp_maxrto;
+ asoc->smallest_mtu = inp->sctp_frag_point;
+ asoc->minrto = inp->sctp_ep.sctp_minrto;
+ asoc->maxrto = inp->sctp_ep.sctp_maxrto;
asoc->locked_on_sending = NULL;
asoc->stream_locked_on = 0;
@@ -1026,13 +1088,13 @@
/* Setup to fill the hb random cache at first HB */
asoc->hb_random_idx = 4;
- asoc->sctp_autoclose_ticks = m->sctp_ep.auto_close_time;
+ asoc->sctp_autoclose_ticks = inp->sctp_ep.auto_close_time;
- stcb->asoc.congestion_control_module = m->sctp_ep.sctp_default_cc_module;
- stcb->asoc.cc_functions = sctp_cc_functions[m->sctp_ep.sctp_default_cc_module];
+ stcb->asoc.congestion_control_module = inp->sctp_ep.sctp_default_cc_module;
+ stcb->asoc.cc_functions = sctp_cc_functions[inp->sctp_ep.sctp_default_cc_module];
- stcb->asoc.stream_scheduling_module = m->sctp_ep.sctp_default_ss_module;
- stcb->asoc.ss_functions = sctp_ss_functions[m->sctp_ep.sctp_default_ss_module];
+ stcb->asoc.stream_scheduling_module = inp->sctp_ep.sctp_default_ss_module;
+ stcb->asoc.ss_functions = sctp_ss_functions[inp->sctp_ep.sctp_default_ss_module];
/*
* Now the stream parameters, here we allocate space for all streams
@@ -1039,7 +1101,7 @@
* that we request by default.
*/
asoc->strm_realoutsize = asoc->streamoutcnt = asoc->pre_open_streams =
- m->sctp_ep.pre_open_stream_count;
+ o_strms;
SCTP_MALLOC(asoc->strmout, struct sctp_stream_out *,
asoc->streamoutcnt * sizeof(struct sctp_stream_out),
SCTP_M_STRMO);
@@ -1057,10 +1119,21 @@
* that were dropped must be notified to the upper layer as
* failed to send.
*/
- asoc->strmout[i].next_sequence_sent = 0x0;
+ asoc->strmout[i].next_sequence_send = 0x0;
TAILQ_INIT(&asoc->strmout[i].outqueue);
+ asoc->strmout[i].chunks_on_queues = 0;
+#if defined(SCTP_DETAILED_STR_STATS)
+ for (j = 0; j < SCTP_PR_SCTP_MAX + 1; j++) {
+ asoc->strmout[i].abandoned_sent[j] = 0;
+ asoc->strmout[i].abandoned_unsent[j] = 0;
+ }
+#else
+ asoc->strmout[i].abandoned_sent[0] = 0;
+ asoc->strmout[i].abandoned_unsent[0] = 0;
+#endif
asoc->strmout[i].stream_no = i;
asoc->strmout[i].last_msg_incomplete = 0;
+ asoc->strmout[i].state = SCTP_STREAM_OPENING;
asoc->ss_functions.sctp_ss_init_stream(&asoc->strmout[i], NULL);
}
asoc->ss_functions.sctp_ss_init(stcb, asoc, 0);
@@ -1093,7 +1166,7 @@
TAILQ_INIT(&asoc->sent_queue);
TAILQ_INIT(&asoc->reasmqueue);
TAILQ_INIT(&asoc->resetHead);
- asoc->max_inbound_streams = m->sctp_ep.max_open_streams_intome;
+ asoc->max_inbound_streams = inp->sctp_ep.max_open_streams_intome;
TAILQ_INIT(&asoc->asconf_queue);
/* authentication fields */
asoc->authinfo.random = NULL;
@@ -1104,7 +1177,7 @@
asoc->authinfo.recv_keyid = 0;
LIST_INIT(&asoc->shared_keys);
asoc->marked_retrans = 0;
- asoc->port = m->sctp_ep.port;
+ asoc->port = inp->sctp_ep.port;
asoc->timoinit = 0;
asoc->timodata = 0;
asoc->timosack = 0;
@@ -1114,6 +1187,10 @@
asoc->timoshutdownack = 0;
(void)SCTP_GETTIME_TIMEVAL(&asoc->start_time);
asoc->discontinuity_time = asoc->start_time;
+ for (i = 0; i < SCTP_PR_SCTP_MAX + 1; i++) {
+ asoc->abandoned_unsent[i] = 0;
+ asoc->abandoned_sent[i] = 0;
+ }
/*
* sa_ignore MEMLEAK {memory is put in the assoc mapping array and
* freed later when the association is freed.
@@ -1394,7 +1471,9 @@
if (asc->cnt == 0) {
SCTP_FREE(asc, SCTP_M_ASC_IT);
} else {
- (void)sctp_initiate_iterator(sctp_asconf_iterator_ep,
+ int ret;
+
+ ret = sctp_initiate_iterator(sctp_asconf_iterator_ep,
sctp_asconf_iterator_stcb,
NULL, /* No ep end for boundall */
SCTP_PCB_FLAGS_BOUNDALL,
@@ -1402,6 +1481,23 @@
SCTP_ASOC_ANY_STATE,
(void *)asc, 0,
sctp_asconf_iterator_end, NULL, 0);
+ if (ret) {
+ SCTP_PRINTF("Failed to initiate iterator for handle_addr_wq\n");
+ /*
+ * Freeing if we are stopping or put back on the
+ * addr_wq.
+ */
+ if (SCTP_BASE_VAR(sctp_pcb_initialized) == 0) {
+ sctp_asconf_iterator_end(asc, 0);
+ } else {
+ SCTP_WQ_ADDR_LOCK();
+ LIST_FOREACH(wi, &asc->list_of_work, sctp_nxt_addr) {
+ LIST_INSERT_HEAD(&SCTP_BASE_INFO(addr_wq), wi, sctp_nxt_addr);
+ }
+ SCTP_WQ_ADDR_UNLOCK();
+ SCTP_FREE(asc, SCTP_M_ASC_IT);
+ }
+ }
}
}
@@ -1412,12 +1508,14 @@
struct sctp_tcb *stcb;
struct sctp_nets *net;
struct sctp_timer *tmr;
+ struct mbuf *op_err;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
- int did_output, type;
+ int did_output;
+ int type;
tmr = (struct sctp_timer *)t;
inp = (struct sctp_inpcb *)tmr->ep;
@@ -1435,7 +1533,7 @@
if (tmr->self != (void *)tmr) {
/*
* SCTP_PRINTF("Stale SCTP timer fired (%p), ignoring...\n",
- * tmr);
+ * (void *)tmr);
*/
CURVNET_RESTORE();
return;
@@ -1456,7 +1554,6 @@
}
/* if this is an iterator timeout, get the struct and clear inp */
tmr->stopped_from = 0xa003;
- type = tmr->type;
if (inp) {
SCTP_INP_INCR_REF(inp);
if ((inp->sctp_socket == NULL) &&
@@ -1487,8 +1584,9 @@
return;
}
}
+ type = tmr->type;
tmr->stopped_from = 0xa005;
- SCTPDBG(SCTP_DEBUG_TIMER1, "Timer type %d goes off\n", tmr->type);
+ SCTPDBG(SCTP_DEBUG_TIMER1, "Timer type %d goes off\n", type);
if (!SCTP_OS_TIMER_ACTIVE(&tmr->timer)) {
if (inp) {
SCTP_INP_DECR_REF(inp);
@@ -1504,7 +1602,7 @@
if (stcb) {
SCTP_TCB_LOCK(stcb);
atomic_add_int(&stcb->asoc.refcnt, -1);
- if ((tmr->type != SCTP_TIMER_TYPE_ASOCKILL) &&
+ if ((type != SCTP_TIMER_TYPE_ASOCKILL) &&
((stcb->asoc.state == 0) ||
(stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED))) {
SCTP_TCB_UNLOCK(stcb);
@@ -1516,7 +1614,7 @@
}
}
/* record in stopped what t-o occured */
- tmr->stopped_from = tmr->type;
+ tmr->stopped_from = type;
/* mark as being serviced now */
if (SCTP_OS_TIMER_PENDING(&tmr->timer)) {
@@ -1534,7 +1632,7 @@
SCTP_OS_TIMER_DEACTIVATE(&tmr->timer);
/* call the handler for the appropriate timer type */
- switch (tmr->type) {
+ switch (type) {
case SCTP_TIMER_TYPE_ZERO_COPY:
if (inp == NULL) {
break;
@@ -1724,7 +1822,9 @@
break;
}
SCTP_STAT_INCR(sctps_timoshutdownguard);
- sctp_abort_an_association(inp, stcb, NULL, SCTP_SO_NOT_LOCKED);
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ "Shutdown guard timer expired");
+ sctp_abort_an_association(inp, stcb, op_err, SCTP_SO_NOT_LOCKED);
/* no need to unlock on tcb its gone */
goto out_decr;
@@ -1777,8 +1877,9 @@
SCTP_STAT_INCR(sctps_timoassockill);
/* Can we free it yet? */
SCTP_INP_DECR_REF(inp);
- sctp_timer_stop(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL, SCTP_FROM_SCTPUTIL + SCTP_LOC_1);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ sctp_timer_stop(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL,
+ SCTP_FROM_SCTPUTIL + SCTP_LOC_1);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(inp);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -1786,8 +1887,9 @@
SCTP_TCB_LOCK(stcb);
atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTPUTIL + SCTP_LOC_2);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTPUTIL + SCTP_LOC_2);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
/*
@@ -1806,7 +1908,8 @@
* killer
*/
SCTP_INP_DECR_REF(inp);
- sctp_timer_stop(SCTP_TIMER_TYPE_INPKILL, inp, NULL, NULL, SCTP_FROM_SCTPUTIL + SCTP_LOC_3);
+ sctp_timer_stop(SCTP_TIMER_TYPE_INPKILL, inp, NULL, NULL,
+ SCTP_FROM_SCTPUTIL + SCTP_LOC_3);
sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
SCTP_CALLED_FROM_INPKILL_TIMER);
inp = NULL;
@@ -1813,11 +1916,11 @@
goto out_no_decr;
default:
SCTPDBG(SCTP_DEBUG_TIMER1, "sctp_timeout_handler:unknown timer %d\n",
- tmr->type);
+ type);
break;
}
#ifdef SCTP_AUDITING_ENABLED
- sctp_audit_log(0xF1, (uint8_t) tmr->type);
+ sctp_audit_log(0xF1, (uint8_t) type);
if (inp)
sctp_auditing(5, inp, stcb, net);
#endif
@@ -1840,8 +1943,7 @@
SCTP_INP_DECR_REF(inp);
}
out_no_decr:
- SCTPDBG(SCTP_DEBUG_TIMER1, "Timer now complete (type %d)\n",
- type);
+ SCTPDBG(SCTP_DEBUG_TIMER1, "Timer now complete (type = %d)\n", type);
CURVNET_RESTORE();
}
@@ -1934,7 +2036,7 @@
* though we use a different timer. We also add the HB timer
* PLUS a random jitter.
*/
- if ((inp == NULL) || (stcb == NULL) || (net == NULL)) {
+ if ((stcb == NULL) || (net == NULL)) {
return;
} else {
uint32_t rndval;
@@ -1989,9 +2091,6 @@
* nothing needed but the endpoint here ususually about 60
* minutes.
*/
- if (inp == NULL) {
- return;
- }
tmr = &inp->sctp_ep.signature_change;
to_ticks = inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_SIGNATURE];
break;
@@ -2008,9 +2107,6 @@
* timer since that has stopped and we are in the GONE
* state.
*/
- if (inp == NULL) {
- return;
- }
tmr = &inp->sctp_ep.signature_change;
to_ticks = MSEC_TO_TICKS(SCTP_INP_KILL_TIMEOUT);
break;
@@ -2019,12 +2115,9 @@
* Here we use the value found in the EP for PMTU ususually
* about 10 minutes.
*/
- if ((stcb == NULL) || (inp == NULL)) {
+ if ((stcb == NULL) || (net == NULL)) {
return;
}
- if (net == NULL) {
- return;
- }
if (net->dest_state & SCTP_ADDR_NO_PMTUD) {
return;
}
@@ -2048,10 +2141,14 @@
* Here we use the endpoints shutdown guard timer usually
* about 3 minutes.
*/
- if ((inp == NULL) || (stcb == NULL)) {
+ if (stcb == NULL) {
return;
}
- to_ticks = inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_MAXSHUTDOWN];
+ if (inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_MAXSHUTDOWN] == 0) {
+ to_ticks = 5 * MSEC_TO_TICKS(stcb->asoc.maxrto);
+ } else {
+ to_ticks = inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_MAXSHUTDOWN];
+ }
tmr = &stcb->asoc.shut_guard_timer;
break;
case SCTP_TIMER_TYPE_STRRESET:
@@ -2107,13 +2204,13 @@
break;
default:
SCTPDBG(SCTP_DEBUG_TIMER1, "%s: Unknown timer type %d\n",
- __FUNCTION__, t_type);
+ __func__, t_type);
return;
break;
}
if ((to_ticks <= 0) || (tmr == NULL)) {
SCTPDBG(SCTP_DEBUG_TIMER1, "%s: %d:software error to_ticks:%d tmr:%p not set ??\n",
- __FUNCTION__, t_type, to_ticks, tmr);
+ __func__, t_type, to_ticks, (void *)tmr);
return;
}
if (SCTP_OS_TIMER_PENDING(&tmr->timer)) {
@@ -2269,7 +2366,7 @@
break;
default:
SCTPDBG(SCTP_DEBUG_TIMER1, "%s: Unknown timer type %d\n",
- __FUNCTION__, t_type);
+ __func__, t_type);
break;
}
if (tmr == NULL) {
@@ -2386,10 +2483,10 @@
timevalsub(&now, old);
/* store the current RTT in us */
net->rtt = (uint64_t) 1000000 *(uint64_t) now.tv_sec +
- (uint64_t) now.tv_usec;
+ (uint64_t) now.tv_usec;
- /* computer rtt in ms */
- rtt = net->rtt / 1000;
+ /* compute rtt in ms */
+ rtt = (int32_t) (net->rtt / 1000);
if ((asoc->cc_functions.sctp_rtt_calculated) && (rtt_from_sack == SCTP_RTT_FROM_DATA)) {
/*
* Tell the CC module that a new update has just occurred
@@ -2523,19 +2620,14 @@
}
-int
+struct mbuf *
sctp_add_pad_tombuf(struct mbuf *m, int padlen)
{
- /*
- * add padlen bytes of 0 filled padding to the end of the mbuf. If
- * padlen is > 3 this routine will fail.
- */
- uint8_t *dp;
- int i;
+ struct mbuf *m_last;
+ caddr_t dp;
if (padlen > 3) {
- SCTP_LTRACE_ERR_RET_PKT(m, NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOBUFS);
- return (ENOBUFS);
+ return (NULL);
}
if (padlen <= M_TRAILINGSPACE(m)) {
/*
@@ -2542,39 +2634,30 @@
* The easy way. We hope the majority of the time we hit
* here :)
*/
- dp = (uint8_t *) (mtod(m, caddr_t)+SCTP_BUF_LEN(m));
- SCTP_BUF_LEN(m) += padlen;
+ m_last = m;
} else {
- /* Hard way we must grow the mbuf */
- struct mbuf *tmp;
-
- tmp = sctp_get_mbuf_for_msg(padlen, 0, M_DONTWAIT, 1, MT_DATA);
- if (tmp == NULL) {
- /* Out of space GAK! we are in big trouble. */
- SCTP_LTRACE_ERR_RET_PKT(m, NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOBUFS);
- return (ENOBUFS);
+ /* Hard way we must grow the mbuf chain */
+ m_last = sctp_get_mbuf_for_msg(padlen, 0, M_NOWAIT, 1, MT_DATA);
+ if (m_last == NULL) {
+ return (NULL);
}
- /* setup and insert in middle */
- SCTP_BUF_LEN(tmp) = padlen;
- SCTP_BUF_NEXT(tmp) = NULL;
- SCTP_BUF_NEXT(m) = tmp;
- dp = mtod(tmp, uint8_t *);
+ SCTP_BUF_LEN(m_last) = 0;
+ SCTP_BUF_NEXT(m_last) = NULL;
+ SCTP_BUF_NEXT(m) = m_last;
}
- /* zero out the pad */
- for (i = 0; i < padlen; i++) {
- *dp = 0;
- dp++;
- }
- return (0);
+ dp = mtod(m_last, caddr_t)+SCTP_BUF_LEN(m_last);
+ SCTP_BUF_LEN(m_last) += padlen;
+ memset(dp, 0, padlen);
+ return (m_last);
}
-int
+struct mbuf *
sctp_pad_lastmbuf(struct mbuf *m, int padval, struct mbuf *last_mbuf)
{
/* find the last mbuf in chain and pad it */
struct mbuf *m_at;
- if (last_mbuf) {
+ if (last_mbuf != NULL) {
return (sctp_add_pad_tombuf(last_mbuf, padval));
} else {
for (m_at = m; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
@@ -2583,8 +2666,7 @@
}
}
}
- SCTP_LTRACE_ERR_RET_PKT(m, NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, EFAULT);
- return (EFAULT);
+ return (NULL);
}
static void
@@ -2601,15 +2683,18 @@
size_t notif_len, abort_len;
unsigned int i;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
+ if (stcb == NULL) {
+ return;
+ }
if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVASSOCEVNT)) {
notif_len = sizeof(struct sctp_assoc_change);
if (abort != NULL) {
- abort_len = htons(abort->ch.chunk_length);
+ abort_len = ntohs(abort->ch.chunk_length);
} else {
abort_len = 0;
}
@@ -2618,11 +2703,11 @@
} else if ((state == SCTP_COMM_LOST) || (state == SCTP_CANT_STR_ASSOC)) {
notif_len += abort_len;
}
- m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_DONTWAIT, 1, MT_DATA);
+ m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL) {
/* Retry with smaller value. */
notif_len = sizeof(struct sctp_assoc_change);
- m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_DONTWAIT, 1, MT_DATA);
+ m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL) {
goto set_error;
}
@@ -2629,6 +2714,7 @@
}
SCTP_BUF_NEXT(m_notify) = NULL;
sac = mtod(m_notify, struct sctp_assoc_change *);
+ memset(sac, 0, notif_len);
sac->sac_type = SCTP_ASSOC_CHANGE;
sac->sac_flags = 0;
sac->sac_length = sizeof(struct sctp_assoc_change);
@@ -2641,17 +2727,17 @@
if (notif_len > sizeof(struct sctp_assoc_change)) {
if ((state == SCTP_COMM_UP) || (state == SCTP_RESTART)) {
i = 0;
- if (stcb->asoc.peer_supports_prsctp) {
+ if (stcb->asoc.prsctp_supported == 1) {
sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_PR;
}
- if (stcb->asoc.peer_supports_auth) {
+ if (stcb->asoc.auth_supported == 1) {
sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_AUTH;
}
- if (stcb->asoc.peer_supports_asconf) {
+ if (stcb->asoc.asconf_supported == 1) {
sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_ASCONF;
}
sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_MULTIBUF;
- if (stcb->asoc.peer_supports_strreset) {
+ if (stcb->asoc.reconfig_supported == 1) {
sac->sac_info[i++] = SCTP_ASSOC_SUPPORTS_RE_CONFIG;
}
sac->sac_length += i;
@@ -2685,6 +2771,7 @@
if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
(stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
((state == SCTP_COMM_LOST) || (state == SCTP_CANT_STR_ASSOC))) {
+ SOCK_LOCK(stcb->sctp_socket);
if (from_peer) {
if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_WAIT) {
SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNREFUSED);
@@ -2694,12 +2781,18 @@
stcb->sctp_socket->so_error = ECONNRESET;
}
} else {
- SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNABORTED);
- stcb->sctp_socket->so_error = ECONNABORTED;
+ if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ETIMEDOUT);
+ stcb->sctp_socket->so_error = ETIMEDOUT;
+ } else {
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNABORTED);
+ stcb->sctp_socket->so_error = ECONNABORTED;
+ }
}
}
/* Wake ANY sleepers */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(stcb->sctp_ep);
if (!so_locked) {
atomic_add_int(&stcb->asoc.refcnt, 1);
@@ -2716,11 +2809,11 @@
if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
(stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
((state == SCTP_COMM_LOST) || (state == SCTP_CANT_STR_ASSOC))) {
- socantrcvmore(stcb->sctp_socket);
+ socantrcvmore_locked(stcb->sctp_socket);
}
sorwakeup(stcb->sctp_socket);
sowwakeup(stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
if (!so_locked) {
SCTP_SOCKET_UNLOCK(so, 1);
}
@@ -2729,7 +2822,11 @@
static void
sctp_notify_peer_addr_change(struct sctp_tcb *stcb, uint32_t state,
- struct sockaddr *sa, uint32_t error)
+ struct sockaddr *sa, uint32_t error, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+)
{
struct mbuf *m_notify;
struct sctp_paddr_change *spc;
@@ -2740,11 +2837,12 @@
/* event not enabled */
return;
}
- m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_paddr_change), 0, M_DONTWAIT, 1, MT_DATA);
+ m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_paddr_change), 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL)
return;
SCTP_BUF_LEN(m_notify) = 0;
spc = mtod(m_notify, struct sctp_paddr_change *);
+ memset(spc, 0, sizeof(struct sctp_paddr_change));
spc->spc_type = SCTP_PEER_ADDR_CHANGE;
spc->spc_flags = 0;
spc->spc_length = sizeof(struct sctp_paddr_change);
@@ -2751,7 +2849,16 @@
switch (sa->sa_family) {
#ifdef INET
case AF_INET:
+#ifdef INET6
+ if (sctp_is_feature_on(stcb->sctp_ep, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
+ in6_sin_2_v4mapsin6((struct sockaddr_in *)sa,
+ (struct sockaddr_in6 *)&spc->spc_aaddr);
+ } else {
+ memcpy(&spc->spc_aaddr, sa, sizeof(struct sockaddr_in));
+ }
+#else
memcpy(&spc->spc_aaddr, sa, sizeof(struct sockaddr_in));
+#endif
break;
#endif
#ifdef INET6
@@ -2802,7 +2909,7 @@
control,
&stcb->sctp_socket->so_rcv, 1,
SCTP_READ_LOCK_NOT_HELD,
- SCTP_SO_NOT_LOCKED);
+ so_locked);
}
@@ -2831,15 +2938,14 @@
} else {
length = sizeof(struct sctp_send_failed);
}
- m_notify = sctp_get_mbuf_for_msg(length, 0, M_DONTWAIT, 1, MT_DATA);
+ m_notify = sctp_get_mbuf_for_msg(length, 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL)
/* no space left */
return;
- length += chk->send_size;
- length -= sizeof(struct sctp_data_chunk);
SCTP_BUF_LEN(m_notify) = 0;
if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVNSENDFAILEVNT)) {
ssfe = mtod(m_notify, struct sctp_send_failed_event *);
+ memset(ssfe, 0, length);
ssfe->ssfe_type = SCTP_SEND_FAILED_EVENT;
if (sent) {
ssfe->ssfe_flags = SCTP_DATA_SENT;
@@ -2846,10 +2952,11 @@
} else {
ssfe->ssfe_flags = SCTP_DATA_UNSENT;
}
+ length += chk->send_size;
+ length -= sizeof(struct sctp_data_chunk);
ssfe->ssfe_length = length;
ssfe->ssfe_error = error;
/* not exactly what the user sent in, but should be close :) */
- bzero(&ssfe->ssfe_info, sizeof(ssfe->ssfe_info));
ssfe->ssfe_info.snd_sid = chk->rec.data.stream_number;
ssfe->ssfe_info.snd_flags = chk->rec.data.rcv_flags;
ssfe->ssfe_info.snd_ppid = chk->rec.data.payloadtype;
@@ -2859,6 +2966,7 @@
SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_send_failed_event);
} else {
ssf = mtod(m_notify, struct sctp_send_failed *);
+ memset(ssf, 0, length);
ssf->ssf_type = SCTP_SEND_FAILED;
if (sent) {
ssf->ssf_flags = SCTP_DATA_SENT;
@@ -2865,6 +2973,8 @@
} else {
ssf->ssf_flags = SCTP_DATA_UNSENT;
}
+ length += chk->send_size;
+ length -= sizeof(struct sctp_data_chunk);
ssf->ssf_length = length;
ssf->ssf_error = error;
/* not exactly what the user sent in, but should be close :) */
@@ -2943,21 +3053,21 @@
} else {
length = sizeof(struct sctp_send_failed);
}
- m_notify = sctp_get_mbuf_for_msg(length, 0, M_DONTWAIT, 1, MT_DATA);
+ m_notify = sctp_get_mbuf_for_msg(length, 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL) {
/* no space left */
return;
}
- length += sp->length;
SCTP_BUF_LEN(m_notify) = 0;
if (sctp_stcb_is_feature_on(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVNSENDFAILEVNT)) {
ssfe = mtod(m_notify, struct sctp_send_failed_event *);
+ memset(ssfe, 0, length);
ssfe->ssfe_type = SCTP_SEND_FAILED_EVENT;
ssfe->ssfe_flags = SCTP_DATA_UNSENT;
+ length += sp->length;
ssfe->ssfe_length = length;
ssfe->ssfe_error = error;
/* not exactly what the user sent in, but should be close :) */
- bzero(&ssfe->ssfe_info, sizeof(ssfe->ssfe_info));
ssfe->ssfe_info.snd_sid = sp->stream;
if (sp->some_taken) {
ssfe->ssfe_info.snd_flags = SCTP_DATA_LAST_FRAG;
@@ -2971,14 +3081,15 @@
SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_send_failed_event);
} else {
ssf = mtod(m_notify, struct sctp_send_failed *);
+ memset(ssf, 0, length);
ssf->ssf_type = SCTP_SEND_FAILED;
ssf->ssf_flags = SCTP_DATA_UNSENT;
+ length += sp->length;
ssf->ssf_length = length;
ssf->ssf_error = error;
/* not exactly what the user sent in, but should be close :) */
- bzero(&ssf->ssf_info, sizeof(ssf->ssf_info));
ssf->ssf_info.sinfo_stream = sp->stream;
- ssf->ssf_info.sinfo_ssn = sp->strseq;
+ ssf->ssf_info.sinfo_ssn = 0;
if (sp->some_taken) {
ssf->ssf_info.sinfo_flags = SCTP_DATA_LAST_FRAG;
} else {
@@ -3032,12 +3143,13 @@
/* event not enabled */
return;
}
- m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_adaption_event), 0, M_DONTWAIT, 1, MT_DATA);
+ m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_adaption_event), 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL)
/* no space left */
return;
SCTP_BUF_LEN(m_notify) = 0;
sai = mtod(m_notify, struct sctp_adaptation_event *);
+ memset(sai, 0, sizeof(struct sctp_adaptation_event));
sai->sai_type = SCTP_ADAPTATION_INDICATION;
sai->sai_flags = 0;
sai->sai_length = sizeof(struct sctp_adaptation_event);
@@ -3087,12 +3199,13 @@
if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_CANT_READ) {
return;
}
- m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_pdapi_event), 0, M_DONTWAIT, 1, MT_DATA);
+ m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_pdapi_event), 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL)
/* no space left */
return;
SCTP_BUF_LEN(m_notify) = 0;
pdapi = mtod(m_notify, struct sctp_pdapi_event *);
+ memset(pdapi, 0, sizeof(struct sctp_pdapi_event));
pdapi->pdapi_type = SCTP_PARTIAL_DELIVERY_EVENT;
pdapi->pdapi_flags = 0;
pdapi->pdapi_length = sizeof(struct sctp_pdapi_event);
@@ -3135,7 +3248,7 @@
}
if (stcb->sctp_ep && stcb->sctp_socket) {
/* This should always be the case */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
so = SCTP_INP_SO(stcb->sctp_ep);
@@ -3152,7 +3265,7 @@
}
#endif
sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
if (!so_locked) {
SCTP_SOCKET_UNLOCK(so, 1);
}
@@ -3174,7 +3287,7 @@
if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
(stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
/* mark socket closed for read/write and wakeup! */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
so = SCTP_INP_SO(stcb->sctp_ep);
@@ -3189,7 +3302,7 @@
}
#endif
socantsendmore(stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
}
@@ -3197,11 +3310,12 @@
/* event not enabled */
return;
}
- m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_event), 0, M_DONTWAIT, 1, MT_DATA);
+ m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_event), 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL)
/* no space left */
return;
sse = mtod(m_notify, struct sctp_shutdown_event *);
+ memset(sse, 0, sizeof(struct sctp_shutdown_event));
sse->sse_type = SCTP_SHUTDOWN_EVENT;
sse->sse_flags = 0;
sse->sse_length = sizeof(struct sctp_shutdown_event);
@@ -3245,7 +3359,7 @@
/* event not enabled */
return;
}
- m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_sender_dry_event), 0, M_DONTWAIT, 1, MT_DATA);
+ m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_sender_dry_event), 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL) {
/* no space left */
return;
@@ -3252,6 +3366,7 @@
}
SCTP_BUF_LEN(m_notify) = 0;
event = mtod(m_notify, struct sctp_sender_dry_event *);
+ memset(event, 0, sizeof(struct sctp_sender_dry_event));
event->sender_dry_type = SCTP_SENDER_DRY_EVENT;
event->sender_dry_flags = 0;
event->sender_dry_length = sizeof(struct sctp_sender_dry_event);
@@ -3284,7 +3399,6 @@
struct mbuf *m_notify;
struct sctp_queued_to_read *control;
struct sctp_stream_change_event *stradd;
- int len;
if ((stcb == NULL) ||
(sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_STREAM_CHANGEEVNT))) {
@@ -3297,25 +3411,20 @@
return;
}
stcb->asoc.peer_req_out = 0;
- m_notify = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+ m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_stream_change_event), 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL)
/* no space left */
return;
SCTP_BUF_LEN(m_notify) = 0;
- len = sizeof(struct sctp_stream_change_event);
- if (len > M_TRAILINGSPACE(m_notify)) {
- /* never enough room */
- sctp_m_freem(m_notify);
- return;
- }
stradd = mtod(m_notify, struct sctp_stream_change_event *);
+ memset(stradd, 0, sizeof(struct sctp_stream_change_event));
stradd->strchange_type = SCTP_STREAM_CHANGE_EVENT;
stradd->strchange_flags = flag;
- stradd->strchange_length = len;
+ stradd->strchange_length = sizeof(struct sctp_stream_change_event);
stradd->strchange_assoc_id = sctp_get_associd(stcb);
stradd->strchange_instrms = numberin;
stradd->strchange_outstrms = numberout;
- SCTP_BUF_LEN(m_notify) = len;
+ SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_stream_change_event);
SCTP_BUF_NEXT(m_notify) = NULL;
if (sctp_sbspace(&stcb->asoc, &stcb->sctp_socket->so_rcv) < SCTP_BUF_LEN(m_notify)) {
/* no space */
@@ -3346,7 +3455,6 @@
struct mbuf *m_notify;
struct sctp_queued_to_read *control;
struct sctp_assoc_reset_event *strasoc;
- int len;
if ((stcb == NULL) ||
(sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_ASSOC_RESETEVNT))) {
@@ -3353,25 +3461,20 @@
/* event not enabled */
return;
}
- m_notify = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+ m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_assoc_reset_event), 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL)
/* no space left */
return;
SCTP_BUF_LEN(m_notify) = 0;
- len = sizeof(struct sctp_assoc_reset_event);
- if (len > M_TRAILINGSPACE(m_notify)) {
- /* never enough room */
- sctp_m_freem(m_notify);
- return;
- }
strasoc = mtod(m_notify, struct sctp_assoc_reset_event *);
+ memset(strasoc, 0, sizeof(struct sctp_assoc_reset_event));
strasoc->assocreset_type = SCTP_ASSOC_RESET_EVENT;
strasoc->assocreset_flags = flag;
- strasoc->assocreset_length = len;
+ strasoc->assocreset_length = sizeof(struct sctp_assoc_reset_event);
strasoc->assocreset_assoc_id = sctp_get_associd(stcb);
strasoc->assocreset_local_tsn = sending_tsn;
strasoc->assocreset_remote_tsn = recv_tsn;
- SCTP_BUF_LEN(m_notify) = len;
+ SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_assoc_reset_event);
SCTP_BUF_NEXT(m_notify) = NULL;
if (sctp_sbspace(&stcb->asoc, &stcb->sctp_socket->so_rcv) < SCTP_BUF_LEN(m_notify)) {
/* no space */
@@ -3412,7 +3515,7 @@
/* event not enabled */
return;
}
- m_notify = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+ m_notify = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL)
/* no space left */
return;
@@ -3424,6 +3527,7 @@
return;
}
strreset = mtod(m_notify, struct sctp_stream_reset_event *);
+ memset(strreset, 0, len);
strreset->strreset_type = SCTP_STREAM_RESET_EVENT;
strreset->strreset_flags = flag;
strreset->strreset_length = len;
@@ -3474,16 +3578,16 @@
return;
}
if (chunk != NULL) {
- chunk_len = htons(chunk->ch.chunk_length);
+ chunk_len = ntohs(chunk->ch.chunk_length);
} else {
chunk_len = 0;
}
notif_len = sizeof(struct sctp_remote_error) + chunk_len;
- m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_DONTWAIT, 1, MT_DATA);
+ m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL) {
/* Retry with smaller value. */
notif_len = sizeof(struct sctp_remote_error);
- m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_DONTWAIT, 1, MT_DATA);
+ m_notify = sctp_get_mbuf_for_msg(notif_len, 0, M_NOWAIT, 1, MT_DATA);
if (m_notify == NULL) {
return;
}
@@ -3490,6 +3594,7 @@
}
SCTP_BUF_NEXT(m_notify) = NULL;
sre = mtod(m_notify, struct sctp_remote_error *);
+ memset(sre, 0, notif_len);
sre->sre_type = SCTP_REMOTE_ERROR;
sre->sre_flags = 0;
sre->sre_length = sizeof(struct sctp_remote_error);
@@ -3536,8 +3641,8 @@
if (stcb->sctp_socket->so_rcv.sb_state & SBS_CANTRCVMORE) {
return;
}
- if (stcb && ((stcb->asoc.state & SCTP_STATE_COOKIE_WAIT) ||
- (stcb->asoc.state & SCTP_STATE_COOKIE_ECHOED))) {
+ if ((stcb->asoc.state & SCTP_STATE_COOKIE_WAIT) ||
+ (stcb->asoc.state & SCTP_STATE_COOKIE_ECHOED)) {
if ((notification == SCTP_NOTIFY_INTERFACE_DOWN) ||
(notification == SCTP_NOTIFY_INTERFACE_UP) ||
(notification == SCTP_NOTIFY_INTERFACE_CONFIRMED)) {
@@ -3554,7 +3659,7 @@
if (stcb->asoc.adaptation_needed && (stcb->asoc.adaptation_sent == 0)) {
sctp_notify_adaptation_layer(stcb);
}
- if (stcb->asoc.peer_supports_auth == 0) {
+ if (stcb->asoc.auth_supported == 0) {
sctp_ulp_notify(SCTP_NOTIFY_NO_PEER_AUTH, stcb, 0,
NULL, so_locked);
}
@@ -3568,7 +3673,7 @@
net = (struct sctp_nets *)data;
sctp_notify_peer_addr_change(stcb, SCTP_ADDR_UNREACHABLE,
- (struct sockaddr *)&net->ro._l_addr, error);
+ (struct sockaddr *)&net->ro._l_addr, error, so_locked);
break;
}
case SCTP_NOTIFY_INTERFACE_UP:
@@ -3577,7 +3682,7 @@
net = (struct sctp_nets *)data;
sctp_notify_peer_addr_change(stcb, SCTP_ADDR_AVAILABLE,
- (struct sockaddr *)&net->ro._l_addr, error);
+ (struct sockaddr *)&net->ro._l_addr, error, so_locked);
break;
}
case SCTP_NOTIFY_INTERFACE_CONFIRMED:
@@ -3586,7 +3691,7 @@
net = (struct sctp_nets *)data;
sctp_notify_peer_addr_change(stcb, SCTP_ADDR_CONFIRMED,
- (struct sockaddr *)&net->ro._l_addr, error);
+ (struct sockaddr *)&net->ro._l_addr, error, so_locked);
break;
}
case SCTP_NOTIFY_SPECIAL_SP_FAIL:
@@ -3611,8 +3716,8 @@
break;
}
case SCTP_NOTIFY_ASSOC_LOC_ABORTED:
- if ((stcb) && (((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_WAIT) ||
- ((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_ECHOED))) {
+ if (((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_WAIT) ||
+ ((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_ECHOED)) {
sctp_notify_assoc_change(SCTP_CANT_STR_ASSOC, stcb, error, data, 0, so_locked);
} else {
sctp_notify_assoc_change(SCTP_COMM_LOST, stcb, error, data, 0, so_locked);
@@ -3619,8 +3724,8 @@
}
break;
case SCTP_NOTIFY_ASSOC_REM_ABORTED:
- if ((stcb) && (((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_WAIT) ||
- ((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_ECHOED))) {
+ if (((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_WAIT) ||
+ ((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_ECHOED)) {
sctp_notify_assoc_change(SCTP_CANT_STR_ASSOC, stcb, error, data, 1, so_locked);
} else {
sctp_notify_assoc_change(SCTP_COMM_LOST, stcb, error, data, 1, so_locked);
@@ -3628,7 +3733,7 @@
break;
case SCTP_NOTIFY_ASSOC_RESTART:
sctp_notify_assoc_change(SCTP_RESTART, stcb, error, NULL, 0, so_locked);
- if (stcb->asoc.peer_supports_auth == 0) {
+ if (stcb->asoc.auth_supported == 0) {
sctp_ulp_notify(SCTP_NOTIFY_NO_PEER_AUTH, stcb, 0,
NULL, so_locked);
}
@@ -3657,15 +3762,15 @@
break;
case SCTP_NOTIFY_ASCONF_ADD_IP:
sctp_notify_peer_addr_change(stcb, SCTP_ADDR_ADDED, data,
- error);
+ error, so_locked);
break;
case SCTP_NOTIFY_ASCONF_DELETE_IP:
sctp_notify_peer_addr_change(stcb, SCTP_ADDR_REMOVED, data,
- error);
+ error, so_locked);
break;
case SCTP_NOTIFY_ASCONF_SET_PRIMARY:
sctp_notify_peer_addr_change(stcb, SCTP_ADDR_MADE_PRIM, data,
- error);
+ error, so_locked);
break;
case SCTP_NOTIFY_PEER_SHUTDOWN:
sctp_notify_shutdown_event(stcb);
@@ -3693,7 +3798,7 @@
break;
default:
SCTPDBG(SCTP_DEBUG_UTIL1, "%s: unknown notification %xh (%u)\n",
- __FUNCTION__, notification, notification);
+ __func__, notification, notification);
break;
} /* end switch */
}
@@ -3732,6 +3837,15 @@
TAILQ_FOREACH_SAFE(chk, &asoc->sent_queue, sctp_next, nchk) {
TAILQ_REMOVE(&asoc->sent_queue, chk, sctp_next);
asoc->sent_queue_cnt--;
+ if (chk->sent != SCTP_DATAGRAM_NR_ACKED) {
+ if (asoc->strmout[chk->rec.data.stream_number].chunks_on_queues > 0) {
+ asoc->strmout[chk->rec.data.stream_number].chunks_on_queues--;
+#ifdef INVARIANTS
+ } else {
+ panic("No chunks on the queues for sid %u.", chk->rec.data.stream_number);
+#endif
+ }
+ }
if (chk->data != NULL) {
sctp_free_bufspace(stcb, asoc, chk, 1);
sctp_ulp_notify(SCTP_NOTIFY_SENT_DG_FAIL, stcb,
@@ -3748,6 +3862,13 @@
TAILQ_FOREACH_SAFE(chk, &asoc->send_queue, sctp_next, nchk) {
TAILQ_REMOVE(&asoc->send_queue, chk, sctp_next);
asoc->send_queue_cnt--;
+ if (asoc->strmout[chk->rec.data.stream_number].chunks_on_queues > 0) {
+ asoc->strmout[chk->rec.data.stream_number].chunks_on_queues--;
+#ifdef INVARIANTS
+ } else {
+ panic("No chunks on the queues for sid %u.", chk->rec.data.stream_number);
+#endif
+ }
if (chk->data != NULL) {
sctp_free_bufspace(stcb, asoc, chk, 1);
sctp_ulp_notify(SCTP_NOTIFY_UNSENT_DG_FAIL, stcb,
@@ -3826,14 +3947,15 @@
void
sctp_abort_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
- struct mbuf *m, int iphlen, struct sctphdr *sh,
- struct mbuf *op_err,
- uint8_t use_mflowid, uint32_t mflowid,
+ struct mbuf *m, int iphlen,
+ struct sockaddr *src, struct sockaddr *dst,
+ struct sctphdr *sh, struct mbuf *op_err,
+ uint8_t mflowtype, uint32_t mflowid,
uint32_t vrf_id, uint16_t port)
{
uint32_t vtag;
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
@@ -3847,12 +3969,12 @@
vrf_id = stcb->asoc.vrf_id;
stcb->asoc.state |= SCTP_STATE_WAS_ABORTED;
}
- sctp_send_abort(m, iphlen, sh, vtag, op_err,
- use_mflowid, mflowid,
+ sctp_send_abort(m, iphlen, src, dst, sh, vtag, op_err,
+ mflowtype, mflowid, inp->fibnum,
vrf_id, port);
if (stcb != NULL) {
/* Ok, now lets free it */
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(inp);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -3865,8 +3987,9 @@
(SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
SCTP_STAT_DECR_GAUGE32(sctps_currestab);
}
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTPUTIL + SCTP_LOC_4);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTPUTIL + SCTP_LOC_4);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
}
@@ -3945,18 +4068,18 @@
#endif
)
{
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(inp);
#endif
if (stcb == NULL) {
/* Got to have a TCB */
if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
- if (LIST_FIRST(&inp->sctp_asoc_list) == NULL) {
+ if (LIST_EMPTY(&inp->sctp_asoc_list)) {
sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
SCTP_CALLED_DIRECTLY_NOCMPSET);
}
@@ -3980,7 +4103,7 @@
#ifdef SCTP_ASOCLOG_OF_TSNS
sctp_print_out_track_log(stcb);
#endif
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
if (!so_locked) {
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -3989,8 +4112,9 @@
atomic_subtract_int(&stcb->asoc.refcnt, 1);
}
#endif
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTPUTIL + SCTP_LOC_5);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTPUTIL + SCTP_LOC_5);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
if (!so_locked) {
SCTP_SOCKET_UNLOCK(so, 1);
}
@@ -3998,9 +4122,11 @@
}
void
-sctp_handle_ootb(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh,
- struct sctp_inpcb *inp,
- uint8_t use_mflowid, uint32_t mflowid,
+sctp_handle_ootb(struct mbuf *m, int iphlen, int offset,
+ struct sockaddr *src, struct sockaddr *dst,
+ struct sctphdr *sh, struct sctp_inpcb *inp,
+ struct mbuf *cause,
+ uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum,
uint32_t vrf_id, uint16_t port)
{
struct sctp_chunkhdr *ch, chunk_buf;
@@ -4010,7 +4136,7 @@
SCTP_STAT_INCR_COUNTER32(sctps_outoftheblue);
/* Generate a TO address for future reference */
if (inp && (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) {
- if (LIST_FIRST(&inp->sctp_asoc_list) == NULL) {
+ if (LIST_EMPTY(&inp->sctp_asoc_list)) {
sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
SCTP_CALLED_DIRECTLY_NOCMPSET);
}
@@ -4028,9 +4154,6 @@
case SCTP_INIT:
contains_init_chunk = 1;
break;
- case SCTP_COOKIE_ECHO:
- /* We hit here only if the assoc is being freed */
- return;
case SCTP_PACKET_DROPPED:
/* we don't respond to pkt-dropped */
return;
@@ -4044,8 +4167,8 @@
*/
return;
case SCTP_SHUTDOWN_ACK:
- sctp_send_shutdown_complete2(m, sh,
- use_mflowid, mflowid,
+ sctp_send_shutdown_complete2(src, dst, sh,
+ mflowtype, mflowid, fibnum,
vrf_id, port);
return;
default:
@@ -4058,8 +4181,8 @@
if ((SCTP_BASE_SYSCTL(sctp_blackhole) == 0) ||
((SCTP_BASE_SYSCTL(sctp_blackhole) == 1) &&
(contains_init_chunk == 0))) {
- sctp_send_abort(m, iphlen, sh, 0, NULL,
- use_mflowid, mflowid,
+ sctp_send_abort(m, iphlen, src, dst, sh, 0, cause,
+ mflowtype, mflowid, fibnum,
vrf_id, port);
}
}
@@ -4214,7 +4337,6 @@
#ifdef INET6
char ip6buf[INET6_ADDRSTRLEN];
- ip6buf[0] = 0;
#endif
switch (sa->sa_family) {
@@ -4251,62 +4373,6 @@
}
void
-sctp_print_address_pkt(struct ip *iph, struct sctphdr *sh)
-{
- switch (iph->ip_v) {
-#ifdef INET
- case IPVERSION:
- {
- struct sockaddr_in lsa, fsa;
-
- bzero(&lsa, sizeof(lsa));
- lsa.sin_len = sizeof(lsa);
- lsa.sin_family = AF_INET;
- lsa.sin_addr = iph->ip_src;
- lsa.sin_port = sh->src_port;
- bzero(&fsa, sizeof(fsa));
- fsa.sin_len = sizeof(fsa);
- fsa.sin_family = AF_INET;
- fsa.sin_addr = iph->ip_dst;
- fsa.sin_port = sh->dest_port;
- SCTP_PRINTF("src: ");
- sctp_print_address((struct sockaddr *)&lsa);
- SCTP_PRINTF("dest: ");
- sctp_print_address((struct sockaddr *)&fsa);
- break;
- }
-#endif
-#ifdef INET6
- case IPV6_VERSION >> 4:
- {
- struct ip6_hdr *ip6;
- struct sockaddr_in6 lsa6, fsa6;
-
- ip6 = (struct ip6_hdr *)iph;
- bzero(&lsa6, sizeof(lsa6));
- lsa6.sin6_len = sizeof(lsa6);
- lsa6.sin6_family = AF_INET6;
- lsa6.sin6_addr = ip6->ip6_src;
- lsa6.sin6_port = sh->src_port;
- bzero(&fsa6, sizeof(fsa6));
- fsa6.sin6_len = sizeof(fsa6);
- fsa6.sin6_family = AF_INET6;
- fsa6.sin6_addr = ip6->ip6_dst;
- fsa6.sin6_port = sh->dest_port;
- SCTP_PRINTF("src: ");
- sctp_print_address((struct sockaddr *)&lsa6);
- SCTP_PRINTF("dest: ");
- sctp_print_address((struct sockaddr *)&fsa6);
- break;
- }
-#endif
- default:
- /* TSNH */
- break;
- }
-}
-
-void
sctp_pull_off_control_to_new_inp(struct sctp_inpcb *old_inp,
struct sctp_inpcb *new_inp,
struct sctp_tcb *stcb,
@@ -4478,7 +4544,7 @@
if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE)) {
SCTP_ZERO_COPY_EVENT(inp, inp->sctp_socket);
} else {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
so = SCTP_INP_SO(inp);
@@ -4499,7 +4565,7 @@
}
#endif
sctp_sorwakeup(inp, inp->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
if (!so_locked) {
SCTP_SOCKET_UNLOCK(so, 1);
}
@@ -4632,7 +4698,7 @@
if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE)) {
SCTP_ZERO_COPY_EVENT(inp, inp->sctp_socket);
} else {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
so = SCTP_INP_SO(inp);
@@ -4651,7 +4717,7 @@
}
#endif
sctp_sorwakeup(inp, inp->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
#endif
}
@@ -4670,19 +4736,43 @@
*/
struct mbuf *
-sctp_generate_invmanparam(int err)
+sctp_generate_cause(uint16_t code, char *info)
{
- /* Return a MBUF with a invalid mandatory parameter */
struct mbuf *m;
+ struct sctp_gen_error_cause *cause;
+ size_t info_len, len;
- m = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_DONTWAIT, 1, MT_DATA);
- if (m) {
- struct sctp_paramhdr *ph;
+ if ((code == 0) || (info == NULL)) {
+ return (NULL);
+ }
+ info_len = strlen(info);
+ len = sizeof(struct sctp_paramhdr) + info_len;
+ m = sctp_get_mbuf_for_msg(len, 0, M_NOWAIT, 1, MT_DATA);
+ if (m != NULL) {
+ SCTP_BUF_LEN(m) = len;
+ cause = mtod(m, struct sctp_gen_error_cause *);
+ cause->code = htons(code);
+ cause->length = htons((uint16_t) len);
+ memcpy(cause->info, info, info_len);
+ }
+ return (m);
+}
- SCTP_BUF_LEN(m) = sizeof(struct sctp_paramhdr);
- ph = mtod(m, struct sctp_paramhdr *);
- ph->param_length = htons(sizeof(struct sctp_paramhdr));
- ph->param_type = htons(err);
+struct mbuf *
+sctp_generate_no_user_data_cause(uint32_t tsn)
+{
+ struct mbuf *m;
+ struct sctp_error_no_user_data *no_user_data_cause;
+ size_t len;
+
+ len = sizeof(struct sctp_error_no_user_data);
+ m = sctp_get_mbuf_for_msg(len, 0, M_NOWAIT, 1, MT_DATA);
+ if (m != NULL) {
+ SCTP_BUF_LEN(m) = len;
+ no_user_data_cause = mtod(m, struct sctp_error_no_user_data *);
+ no_user_data_cause->cause.code = htons(SCTP_CAUSE_NO_USER_DATA);
+ no_user_data_cause->cause.length = htons((uint16_t) len);
+ no_user_data_cause->tsn = tsn; /* tsn is passed in as NBO */
}
return (m);
}
@@ -4741,6 +4831,21 @@
stream = tp1->rec.data.stream_number;
seq = tp1->rec.data.stream_seq;
+ if (sent || !(tp1->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG)) {
+ stcb->asoc.abandoned_sent[0]++;
+ stcb->asoc.abandoned_sent[PR_SCTP_POLICY(tp1->flags)]++;
+ stcb->asoc.strmout[stream].abandoned_sent[0]++;
+#if defined(SCTP_DETAILED_STR_STATS)
+ stcb->asoc.strmout[stream].abandoned_sent[PR_SCTP_POLICY(tp1->flags)]++;
+#endif
+ } else {
+ stcb->asoc.abandoned_unsent[0]++;
+ stcb->asoc.abandoned_unsent[PR_SCTP_POLICY(tp1->flags)]++;
+ stcb->asoc.strmout[stream].abandoned_unsent[0]++;
+#if defined(SCTP_DETAILED_STR_STATS)
+ stcb->asoc.strmout[stream].abandoned_unsent[PR_SCTP_POLICY(tp1->flags)]++;
+#endif
+ }
do {
ret_sz += tp1->book_size;
if (tp1->data != NULL) {
@@ -4834,81 +4939,68 @@
* Still no eom found. That means there is stuff left on the
* stream out queue.. yuck.
*/
+ SCTP_TCB_SEND_LOCK(stcb);
strq = &stcb->asoc.strmout[stream];
- SCTP_TCB_SEND_LOCK(stcb);
- TAILQ_FOREACH(sp, &strq->outqueue, next) {
- /* FIXME: Shouldn't this be a serial number check? */
- if (sp->strseq > seq) {
- break;
- }
- /* Check if its our SEQ */
- if (sp->strseq == seq) {
- sp->discard_rest = 1;
- /*
- * We may need to put a chunk on the queue
- * that holds the TSN that would have been
- * sent with the LAST bit.
- */
+ sp = TAILQ_FIRST(&strq->outqueue);
+ if (sp != NULL) {
+ sp->discard_rest = 1;
+ /*
+ * We may need to put a chunk on the queue that
+ * holds the TSN that would have been sent with the
+ * LAST bit.
+ */
+ if (chk == NULL) {
+ /* Yep, we have to */
+ sctp_alloc_a_chunk(stcb, chk);
if (chk == NULL) {
- /* Yep, we have to */
- sctp_alloc_a_chunk(stcb, chk);
- if (chk == NULL) {
- /*
- * we are hosed. All we can
- * do is nothing.. which
- * will cause an abort if
- * the peer is paying
- * attention.
- */
- goto oh_well;
- }
- memset(chk, 0, sizeof(*chk));
- chk->rec.data.rcv_flags = SCTP_DATA_LAST_FRAG;
- chk->sent = SCTP_FORWARD_TSN_SKIP;
- chk->asoc = &stcb->asoc;
- chk->rec.data.stream_seq = sp->strseq;
- chk->rec.data.stream_number = sp->stream;
- chk->rec.data.payloadtype = sp->ppid;
- chk->rec.data.context = sp->context;
- chk->flags = sp->act_flags;
- if (sp->net)
- chk->whoTo = sp->net;
- else
- chk->whoTo = stcb->asoc.primary_destination;
- atomic_add_int(&chk->whoTo->ref_count, 1);
- chk->rec.data.TSN_seq = atomic_fetchadd_int(&stcb->asoc.sending_seq, 1);
- stcb->asoc.pr_sctp_cnt++;
- chk->pr_sctp_on = 1;
- TAILQ_INSERT_TAIL(&stcb->asoc.sent_queue, chk, sctp_next);
- stcb->asoc.sent_queue_cnt++;
- stcb->asoc.pr_sctp_cnt++;
- } else {
- chk->rec.data.rcv_flags |= SCTP_DATA_LAST_FRAG;
- }
- oh_well:
- if (sp->data) {
/*
- * Pull any data to free up the SB
- * and allow sender to "add more"
- * while we will throw away :-)
+ * we are hosed. All we can do is
+ * nothing.. which will cause an
+ * abort if the peer is paying
+ * attention.
*/
- sctp_free_spbufspace(stcb, &stcb->asoc,
- sp);
- ret_sz += sp->length;
- do_wakeup_routine = 1;
- sp->some_taken = 1;
- sctp_m_freem(sp->data);
- sp->data = NULL;
- sp->tail_mbuf = NULL;
- sp->length = 0;
+ goto oh_well;
}
- break;
+ memset(chk, 0, sizeof(*chk));
+ chk->rec.data.rcv_flags = SCTP_DATA_LAST_FRAG;
+ chk->sent = SCTP_FORWARD_TSN_SKIP;
+ chk->asoc = &stcb->asoc;
+ chk->rec.data.stream_seq = strq->next_sequence_send;
+ chk->rec.data.stream_number = sp->stream;
+ chk->rec.data.payloadtype = sp->ppid;
+ chk->rec.data.context = sp->context;
+ chk->flags = sp->act_flags;
+ chk->whoTo = NULL;
+ chk->rec.data.TSN_seq = atomic_fetchadd_int(&stcb->asoc.sending_seq, 1);
+ strq->chunks_on_queues++;
+ TAILQ_INSERT_TAIL(&stcb->asoc.sent_queue, chk, sctp_next);
+ stcb->asoc.sent_queue_cnt++;
+ stcb->asoc.pr_sctp_cnt++;
+ } else {
+ chk->rec.data.rcv_flags |= SCTP_DATA_LAST_FRAG;
}
- } /* End tailq_foreach */
+ strq->next_sequence_send++;
+ oh_well:
+ if (sp->data) {
+ /*
+ * Pull any data to free up the SB and allow
+ * sender to "add more" while we will throw
+ * away :-)
+ */
+ sctp_free_spbufspace(stcb, &stcb->asoc, sp);
+ ret_sz += sp->length;
+ do_wakeup_routine = 1;
+ sp->some_taken = 1;
+ sctp_m_freem(sp->data);
+ sp->data = NULL;
+ sp->tail_mbuf = NULL;
+ sp->length = 0;
+ }
+ }
SCTP_TCB_SEND_UNLOCK(stcb);
}
if (do_wakeup_routine) {
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
so = SCTP_INP_SO(stcb->sctp_ep);
@@ -4926,7 +5018,7 @@
}
#endif
sctp_sowwakeup(stcb->sctp_ep, stcb->sctp_socket);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
if (!so_locked) {
SCTP_SOCKET_UNLOCK(so, 1);
}
@@ -5002,7 +5094,7 @@
}
#endif
#ifdef INET6
- case INET6:
+ case AF_INET6:
{
struct sockaddr_in6 *sin6;
uint32_t hash_of_addr;
@@ -5035,7 +5127,6 @@
vrf = sctp_find_vrf(vrf_id);
if (vrf == NULL) {
-stage_right:
if (holds_lock == 0)
SCTP_IPI_ADDR_RUNLOCK();
return (NULL);
@@ -5055,15 +5146,6 @@
return (NULL);
}
LIST_FOREACH(sctp_ifap, hash_head, next_bucket) {
- if (sctp_ifap == NULL) {
-#ifdef INVARIANTS
- panic("Huh LIST_FOREACH corrupt");
- goto stage_right;
-#else
- SCTP_PRINTF("LIST corrupt of sctp_ifap's?\n");
- goto stage_right;
-#endif
- }
if (addr->sa_family != sctp_ifap->address.sa.sa_family)
continue;
#ifdef INET
@@ -5162,7 +5244,8 @@
sctp_chunk_output(stcb->sctp_ep, stcb,
SCTP_OUTPUT_FROM_USR_RCVD, SCTP_SO_LOCKED);
/* make sure no timer is running */
- sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTPUTIL + SCTP_LOC_6);
+ sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, NULL,
+ SCTP_FROM_SCTPUTIL + SCTP_LOC_6);
SCTP_TCB_UNLOCK(stcb);
} else {
/* Update how much we have pending */
@@ -5592,20 +5675,20 @@
s_extra = (struct sctp_extrcvinfo *)sinfo;
if ((nxt) &&
(nxt->length)) {
- s_extra->sreinfo_next_flags = SCTP_NEXT_MSG_AVAIL;
+ s_extra->serinfo_next_flags = SCTP_NEXT_MSG_AVAIL;
if (nxt->sinfo_flags & SCTP_UNORDERED) {
- s_extra->sreinfo_next_flags |= SCTP_NEXT_MSG_IS_UNORDERED;
+ s_extra->serinfo_next_flags |= SCTP_NEXT_MSG_IS_UNORDERED;
}
if (nxt->spec_flags & M_NOTIFICATION) {
- s_extra->sreinfo_next_flags |= SCTP_NEXT_MSG_IS_NOTIFICATION;
+ s_extra->serinfo_next_flags |= SCTP_NEXT_MSG_IS_NOTIFICATION;
}
- s_extra->sreinfo_next_aid = nxt->sinfo_assoc_id;
- s_extra->sreinfo_next_length = nxt->length;
- s_extra->sreinfo_next_ppid = nxt->sinfo_ppid;
- s_extra->sreinfo_next_stream = nxt->sinfo_stream;
+ s_extra->serinfo_next_aid = nxt->sinfo_assoc_id;
+ s_extra->serinfo_next_length = nxt->length;
+ s_extra->serinfo_next_ppid = nxt->sinfo_ppid;
+ s_extra->serinfo_next_stream = nxt->sinfo_stream;
if (nxt->tail_mbuf != NULL) {
if (nxt->end_added) {
- s_extra->sreinfo_next_flags |= SCTP_NEXT_MSG_ISCOMPLETE;
+ s_extra->serinfo_next_flags |= SCTP_NEXT_MSG_ISCOMPLETE;
}
}
} else {
@@ -5616,11 +5699,11 @@
* :-D
*/
nxt = NULL;
- s_extra->sreinfo_next_flags = SCTP_NO_NEXT_MSG;
- s_extra->sreinfo_next_aid = 0;
- s_extra->sreinfo_next_length = 0;
- s_extra->sreinfo_next_ppid = 0;
- s_extra->sreinfo_next_stream = 0;
+ s_extra->serinfo_next_flags = SCTP_NO_NEXT_MSG;
+ s_extra->serinfo_next_aid = 0;
+ s_extra->serinfo_next_length = 0;
+ s_extra->serinfo_next_ppid = 0;
+ s_extra->serinfo_next_stream = 0;
}
}
/*
@@ -5657,43 +5740,43 @@
entry->flgs = control->sinfo_flags;
}
#endif
- if (fromlen && from) {
- cp_len = min((size_t)fromlen, (size_t)control->whoFrom->ro._l_addr.sa.sa_len);
+ if ((fromlen > 0) && (from != NULL)) {
+ union sctp_sockstore store;
+ size_t len;
+
switch (control->whoFrom->ro._l_addr.sa.sa_family) {
#ifdef INET6
case AF_INET6:
- ((struct sockaddr_in6 *)from)->sin6_port = control->port_from;
+ len = sizeof(struct sockaddr_in6);
+ store.sin6 = control->whoFrom->ro._l_addr.sin6;
+ store.sin6.sin6_port = control->port_from;
break;
#endif
#ifdef INET
case AF_INET:
- ((struct sockaddr_in *)from)->sin_port = control->port_from;
+#ifdef INET6
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
+ len = sizeof(struct sockaddr_in6);
+ in6_sin_2_v4mapsin6(&control->whoFrom->ro._l_addr.sin,
+ &store.sin6);
+ store.sin6.sin6_port = control->port_from;
+ } else {
+ len = sizeof(struct sockaddr_in);
+ store.sin = control->whoFrom->ro._l_addr.sin;
+ store.sin.sin_port = control->port_from;
+ }
+#else
+ len = sizeof(struct sockaddr_in);
+ store.sin = control->whoFrom->ro._l_addr.sin;
+ store.sin.sin_port = control->port_from;
+#endif
break;
#endif
default:
+ len = 0;
break;
}
- memcpy(from, &control->whoFrom->ro._l_addr, cp_len);
-
-#if defined(INET) && defined(INET6)
- if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) &&
- (from->sa_family == AF_INET) &&
- ((size_t)fromlen >= sizeof(struct sockaddr_in6))) {
- struct sockaddr_in *sin;
- struct sockaddr_in6 sin6;
-
- sin = (struct sockaddr_in *)from;
- bzero(&sin6, sizeof(sin6));
- sin6.sin6_family = AF_INET6;
- sin6.sin6_len = sizeof(struct sockaddr_in6);
- sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
- bcopy(&sin->sin_addr,
- &sin6.sin6_addr.s6_addr32[3],
- sizeof(sin6.sin6_addr.s6_addr32[3]));
- sin6.sin6_port = sin->sin_port;
- memcpy(from, &sin6, sizeof(struct sockaddr_in6));
- }
-#endif
+ memcpy(from, &store, min((size_t)fromlen, len));
#ifdef INET6
{
struct sockaddr_in6 lsa6, *from6;
@@ -5916,8 +5999,8 @@
goto release;
}
if ((uio->uio_resid == 0) ||
- ((in_eeor_mode) && (copied_so_far >= max(so->so_rcv.sb_lowat, 1)))
- ) {
+ ((in_eeor_mode) &&
+ (copied_so_far >= (uint32_t) max(so->so_rcv.sb_lowat, 1)))) {
goto release;
}
/*
@@ -6103,7 +6186,7 @@
struct sctp_extrcvinfo *s_extra;
s_extra = (struct sctp_extrcvinfo *)sinfo;
- s_extra->sreinfo_next_flags = SCTP_NO_NEXT_MSG;
+ s_extra->serinfo_next_flags = SCTP_NO_NEXT_MSG;
}
if (hold_rlock == 1) {
SCTP_INP_READ_UNLOCK(inp);
@@ -6161,9 +6244,7 @@
sctp_m_free(struct mbuf *m)
{
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- if (SCTP_BUF_IS_EXTENDED(m)) {
- sctp_log_mb(m, SCTP_MBUF_IFREE);
- }
+ sctp_log_mb(m, SCTP_MBUF_IFREE);
}
return (m_free(m));
}
@@ -6262,9 +6343,12 @@
fromlen = 0;
}
+ if (filling_sinfo) {
+ memset(&sinfo, 0, sizeof(struct sctp_extrcvinfo));
+ }
error = sctp_sorecvmsg(so, uio, mp0, from, fromlen, flagsp,
(struct sctp_sndrcvinfo *)&sinfo, filling_sinfo);
- if ((controlp) && (filling_sinfo)) {
+ if (controlp != NULL) {
/* copy back the sinfo in a CMSG format */
if (filling_sinfo)
*controlp = sctp_build_ctl_nchunk(inp,
@@ -6319,7 +6403,8 @@
(sin->sin_addr.s_addr == INADDR_BROADCAST) ||
IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_7);
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTPUTIL + SCTP_LOC_7);
*error = EINVAL;
goto out_now;
}
@@ -6326,7 +6411,8 @@
if (sctp_add_remote_addr(stcb, sa, NULL, SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) {
/* assoc gone no un-lock */
SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ENOBUFS);
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_7);
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTPUTIL + SCTP_LOC_8);
*error = ENOBUFS;
goto out_now;
}
@@ -6340,7 +6426,8 @@
if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_8);
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTPUTIL + SCTP_LOC_9);
*error = EINVAL;
goto out_now;
}
@@ -6347,7 +6434,8 @@
if (sctp_add_remote_addr(stcb, sa, NULL, SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) {
/* assoc gone no un-lock */
SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ENOBUFS);
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_8);
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTPUTIL + SCTP_LOC_10);
*error = ENOBUFS;
goto out_now;
}
@@ -6451,7 +6539,7 @@
{
struct sockaddr *addr_touse;
-#ifdef INET6
+#if defined(INET) && defined(INET6)
struct sockaddr_in sin;
#endif
@@ -6465,8 +6553,10 @@
addr_touse = sa;
#ifdef INET6
if (sa->sa_family == AF_INET6) {
+#ifdef INET
struct sockaddr_in6 *sin6;
+#endif
if (sa->sa_len != sizeof(struct sockaddr_in6)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
*error = EINVAL;
@@ -6478,6 +6568,7 @@
*error = EINVAL;
return;
}
+#ifdef INET
sin6 = (struct sockaddr_in6 *)addr_touse;
if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
@@ -6490,6 +6581,7 @@
in6_sin6_2_sin(&sin, sin6);
addr_touse = (struct sockaddr *)&sin;
}
+#endif
}
#endif
#ifdef INET
@@ -6579,7 +6671,7 @@
{
struct sockaddr *addr_touse;
-#ifdef INET6
+#if defined(INET) && defined(INET6)
struct sockaddr_in sin;
#endif
@@ -6593,8 +6685,11 @@
addr_touse = sa;
#ifdef INET6
if (sa->sa_family == AF_INET6) {
+#ifdef INET
struct sockaddr_in6 *sin6;
+#endif
+
if (sa->sa_len != sizeof(struct sockaddr_in6)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
*error = EINVAL;
@@ -6606,6 +6701,7 @@
*error = EINVAL;
return;
}
+#ifdef INET
sin6 = (struct sockaddr_in6 *)addr_touse;
if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
@@ -6618,6 +6714,7 @@
in6_sin6_2_sin(&sin, sin6);
addr_touse = (struct sockaddr *)&sin;
}
+#endif
}
#endif
#ifdef INET
@@ -6660,8 +6757,16 @@
int
sctp_local_addr_count(struct sctp_tcb *stcb)
{
- int loopback_scope, ipv4_local_scope, local_scope, site_scope;
- int ipv4_addr_legal, ipv6_addr_legal;
+ int loopback_scope;
+
+#if defined(INET)
+ int ipv4_local_scope, ipv4_addr_legal;
+
+#endif
+#if defined (INET6)
+ int local_scope, site_scope, ipv6_addr_legal;
+
+#endif
struct sctp_vrf *vrf;
struct sctp_ifn *sctp_ifn;
struct sctp_ifa *sctp_ifa;
@@ -6668,20 +6773,16 @@
int count = 0;
/* Turn on all the appropriate scopes */
- loopback_scope = stcb->asoc.loopback_scope;
- ipv4_local_scope = stcb->asoc.ipv4_local_scope;
- local_scope = stcb->asoc.local_scope;
- site_scope = stcb->asoc.site_scope;
- ipv4_addr_legal = ipv6_addr_legal = 0;
- if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
- ipv6_addr_legal = 1;
- if (SCTP_IPV6_V6ONLY(stcb->sctp_ep) == 0) {
- ipv4_addr_legal = 1;
- }
- } else {
- ipv4_addr_legal = 1;
- }
-
+ loopback_scope = stcb->asoc.scope.loopback_scope;
+#if defined(INET)
+ ipv4_local_scope = stcb->asoc.scope.ipv4_local_scope;
+ ipv4_addr_legal = stcb->asoc.scope.ipv4_addr_legal;
+#endif
+#if defined(INET6)
+ local_scope = stcb->asoc.scope.local_scope;
+ site_scope = stcb->asoc.scope.site_scope;
+ ipv6_addr_legal = stcb->asoc.scope.ipv6_addr_legal;
+#endif
SCTP_IPI_ADDR_RLOCK();
vrf = sctp_find_vrf(stcb->asoc.vrf_id);
if (vrf == NULL) {
@@ -6707,7 +6808,7 @@
if (ipv4_addr_legal) {
struct sockaddr_in *sin;
- sin = (struct sockaddr_in *)&sctp_ifa->address.sa;
+ sin = &sctp_ifa->address.sin;
if (sin->sin_addr.s_addr == 0) {
/*
* skip unspecified
@@ -6715,6 +6816,10 @@
*/
continue;
}
+ if (prison_check_ip4(stcb->sctp_ep->ip_inp.inp.inp_cred,
+ &sin->sin_addr) != 0) {
+ continue;
+ }
if ((ipv4_local_scope == 0) &&
(IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) {
continue;
@@ -6731,10 +6836,14 @@
if (ipv6_addr_legal) {
struct sockaddr_in6 *sin6;
- sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sa;
+ sin6 = &sctp_ifa->address.sin6;
if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
continue;
}
+ if (prison_check_ip6(stcb->sctp_ep->ip_inp.inp.inp_cred,
+ &sin6->sin6_addr) != 0) {
+ continue;
+ }
if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
if (local_scope == 0)
continue;
@@ -6820,24 +6929,16 @@
}
#endif
-/* XXX: Remove the #ifdef after tunneling over IPv6 works also on FreeBSD. */
-#ifdef INET
-/* We will need to add support
- * to bind the ports and such here
- * so we can do UDP tunneling. In
- * the mean-time, we return error
- */
-#include <netinet/udp.h>
-#include <netinet/udp_var.h>
-#include <sys/proc.h>
-#ifdef INET6
-#include <netinet6/sctp6_var.h>
-#endif
-
static void
-sctp_recv_udp_tunneled_packet(struct mbuf *m, int off, struct inpcb *ignored)
+sctp_recv_udp_tunneled_packet(struct mbuf *m, int off, struct inpcb *inp,
+ const struct sockaddr *sa SCTP_UNUSED, void *ctx SCTP_UNUSED)
{
struct ip *iph;
+
+#ifdef INET6
+ struct ip6_hdr *ip6;
+
+#endif
struct mbuf *sp, *last;
struct udphdr *uhdr;
uint16_t port;
@@ -6854,7 +6955,7 @@
* Split out the mbuf chain. Leave the IP header in m, place the
* rest in the sp.
*/
- sp = m_split(m, off, M_DONTWAIT);
+ sp = m_split(m, off, M_NOWAIT);
if (sp == NULL) {
/* Gak, drop packet, we can't do a split */
goto out;
@@ -6877,20 +6978,32 @@
for (last = m; last->m_next; last = last->m_next);
last->m_next = sp;
m->m_pkthdr.len += sp->m_pkthdr.len;
+ /*
+ * The CSUM_DATA_VALID flags indicates that the HW checked the UDP
+ * checksum and it was valid. Since CSUM_DATA_VALID ==
+ * CSUM_SCTP_VALID this would imply that the HW also verified the
+ * SCTP checksum. Therefore, clear the bit.
+ */
+ SCTPDBG(SCTP_DEBUG_CRCOFFLOAD,
+ "sctp_recv_udp_tunneled_packet(): Packet of length %d received on %s with csum_flags 0x%b.\n",
+ m->m_pkthdr.len,
+ if_name(m->m_pkthdr.rcvif),
+ (int)m->m_pkthdr.csum_flags, CSUM_BITS);
+ m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
iph = mtod(m, struct ip *);
switch (iph->ip_v) {
#ifdef INET
case IPVERSION:
- iph->ip_len -= sizeof(struct udphdr);
+ iph->ip_len = htons(ntohs(iph->ip_len) - sizeof(struct udphdr));
sctp_input_with_port(m, off, port);
break;
#endif
#ifdef INET6
case IPV6_VERSION >> 4:
- /* Not yet supported. */
- goto out;
+ ip6 = mtod(m, struct ip6_hdr *);
+ ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) - sizeof(struct udphdr));
+ sctp6_input_with_port(&m, &off, port);
break;
-
#endif
default:
goto out;
@@ -6904,19 +7017,22 @@
void
sctp_over_udp_stop(void)
{
- struct socket *sop;
-
/*
* This function assumes sysctl caller holds sctp_sysctl_info_lock()
* for writting!
*/
- if (SCTP_BASE_INFO(udp_tun_socket) == NULL) {
- /* Nothing to do */
- return;
+#ifdef INET
+ if (SCTP_BASE_INFO(udp4_tun_socket) != NULL) {
+ soclose(SCTP_BASE_INFO(udp4_tun_socket));
+ SCTP_BASE_INFO(udp4_tun_socket) = NULL;
}
- sop = SCTP_BASE_INFO(udp_tun_socket);
- soclose(sop);
- SCTP_BASE_INFO(udp_tun_socket) = NULL;
+#endif
+#ifdef INET6
+ if (SCTP_BASE_INFO(udp6_tun_socket) != NULL) {
+ soclose(SCTP_BASE_INFO(udp6_tun_socket));
+ SCTP_BASE_INFO(udp6_tun_socket) = NULL;
+ }
+#endif
}
int
@@ -6924,53 +7040,83 @@
{
uint16_t port;
int ret;
+
+#ifdef INET
struct sockaddr_in sin;
- struct socket *sop = NULL;
- struct thread *th;
- struct ucred *cred;
+#endif
+#ifdef INET6
+ struct sockaddr_in6 sin6;
+
+#endif
/*
* This function assumes sysctl caller holds sctp_sysctl_info_lock()
* for writting!
*/
port = SCTP_BASE_SYSCTL(sctp_udp_tunneling_port);
- if (port == 0) {
+ if (ntohs(port) == 0) {
/* Must have a port set */
return (EINVAL);
}
- if (SCTP_BASE_INFO(udp_tun_socket) != NULL) {
+#ifdef INET
+ if (SCTP_BASE_INFO(udp4_tun_socket) != NULL) {
/* Already running -- must stop first */
return (EALREADY);
}
- th = curthread;
- cred = th->td_ucred;
- if ((ret = socreate(PF_INET, &sop,
- SOCK_DGRAM, IPPROTO_UDP, cred, th))) {
+#endif
+#ifdef INET6
+ if (SCTP_BASE_INFO(udp6_tun_socket) != NULL) {
+ /* Already running -- must stop first */
+ return (EALREADY);
+ }
+#endif
+#ifdef INET
+ if ((ret = socreate(PF_INET, &SCTP_BASE_INFO(udp4_tun_socket),
+ SOCK_DGRAM, IPPROTO_UDP,
+ curthread->td_ucred, curthread))) {
+ sctp_over_udp_stop();
return (ret);
}
- SCTP_BASE_INFO(udp_tun_socket) = sop;
- /* call the special UDP hook */
- ret = udp_set_kernel_tunneling(sop, sctp_recv_udp_tunneled_packet);
- if (ret) {
- goto exit_stage_left;
+ /* Call the special UDP hook. */
+ if ((ret = udp_set_kernel_tunneling(SCTP_BASE_INFO(udp4_tun_socket),
+ sctp_recv_udp_tunneled_packet, NULL))) {
+ sctp_over_udp_stop();
+ return (ret);
}
- /* Ok we have a socket, bind it to the port */
- memset(&sin, 0, sizeof(sin));
- sin.sin_len = sizeof(sin);
+ /* Ok, we have a socket, bind it to the port. */
+ memset(&sin, 0, sizeof(struct sockaddr_in));
+ sin.sin_len = sizeof(struct sockaddr_in);
sin.sin_family = AF_INET;
sin.sin_port = htons(port);
- ret = sobind(sop, (struct sockaddr *)&sin, th);
- if (ret) {
- /* Close up we cant get the port */
-exit_stage_left:
+ if ((ret = sobind(SCTP_BASE_INFO(udp4_tun_socket),
+ (struct sockaddr *)&sin, curthread))) {
sctp_over_udp_stop();
return (ret);
}
- /*
- * Ok we should now get UDP packets directly to our input routine
- * sctp_recv_upd_tunneled_packet().
- */
+#endif
+#ifdef INET6
+ if ((ret = socreate(PF_INET6, &SCTP_BASE_INFO(udp6_tun_socket),
+ SOCK_DGRAM, IPPROTO_UDP,
+ curthread->td_ucred, curthread))) {
+ sctp_over_udp_stop();
+ return (ret);
+ }
+ /* Call the special UDP hook. */
+ if ((ret = udp_set_kernel_tunneling(SCTP_BASE_INFO(udp6_tun_socket),
+ sctp_recv_udp_tunneled_packet, NULL))) {
+ sctp_over_udp_stop();
+ return (ret);
+ }
+ /* Ok, we have a socket, bind it to the port. */
+ memset(&sin6, 0, sizeof(struct sockaddr_in6));
+ sin6.sin6_len = sizeof(struct sockaddr_in6);
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_port = htons(port);
+ if ((ret = sobind(SCTP_BASE_INFO(udp6_tun_socket),
+ (struct sockaddr *)&sin6, curthread))) {
+ sctp_over_udp_stop();
+ return (ret);
+ }
+#endif
return (0);
}
-
-#endif
Modified: trunk/sys/netinet/sctputil.h
===================================================================
--- trunk/sys/netinet/sctputil.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/sctputil.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/sctputil.h 238253 2012-07-08 16:14:42Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/sctputil.h 294215 2016-01-17 12:15:41Z tuexen $");
#ifndef _NETINET_SCTP_UTIL_H_
#define _NETINET_SCTP_UTIL_H_
@@ -68,6 +68,9 @@
/*
* Function prototypes
*/
+int32_t
+sctp_map_assoc_state(int);
+
uint32_t
sctp_get_ifa_hash_val(struct sockaddr *addr);
@@ -81,7 +84,7 @@
uint32_t sctp_select_a_tag(struct sctp_inpcb *, uint16_t lport, uint16_t rport, int);
-int sctp_init_asoc(struct sctp_inpcb *, struct sctp_tcb *, uint32_t, uint32_t);
+int sctp_init_asoc(struct sctp_inpcb *, struct sctp_tcb *, uint32_t, uint32_t, uint16_t);
void sctp_fill_random_store(struct sctp_pcb *);
@@ -148,9 +151,11 @@
sctp_get_next_param(struct mbuf *, int,
struct sctp_paramhdr *, int);
-int sctp_add_pad_tombuf(struct mbuf *, int);
+struct mbuf *
+ sctp_add_pad_tombuf(struct mbuf *, int);
-int sctp_pad_lastmbuf(struct mbuf *, int, struct mbuf *);
+struct mbuf *
+ sctp_pad_lastmbuf(struct mbuf *, int, struct mbuf *);
void
sctp_ulp_notify(uint32_t, struct sctp_tcb *, uint32_t, void *, int
@@ -187,7 +192,8 @@
/* We abort responding to an IP packet for some reason */
void
sctp_abort_association(struct sctp_inpcb *, struct sctp_tcb *, struct mbuf *,
- int, struct sctphdr *, struct mbuf *,
+ int, struct sockaddr *, struct sockaddr *,
+ struct sctphdr *, struct mbuf *,
uint8_t, uint32_t,
uint32_t, uint16_t);
@@ -202,9 +208,11 @@
);
void
-sctp_handle_ootb(struct mbuf *, int, int, struct sctphdr *,
- struct sctp_inpcb *,
- uint8_t, uint32_t,
+sctp_handle_ootb(struct mbuf *, int, int,
+ struct sockaddr *, struct sockaddr *,
+ struct sctphdr *, struct sctp_inpcb *,
+ struct mbuf *,
+ uint8_t, uint32_t, uint16_t,
uint32_t, uint16_t);
int
@@ -242,7 +250,6 @@
int sctp_cmpaddr(struct sockaddr *, struct sockaddr *);
void sctp_print_address(struct sockaddr *);
-void sctp_print_address_pkt(struct ip *, struct sctphdr *);
int
sctp_release_pr_sctp_chunk(struct sctp_tcb *, struct sctp_tmit_chunk *,
@@ -252,7 +259,8 @@
#endif
);
-struct mbuf *sctp_generate_invmanparam(int);
+struct mbuf *sctp_generate_cause(uint16_t, char *);
+struct mbuf *sctp_generate_no_user_data_cause(uint32_t);
void
sctp_bindx_add_address(struct socket *so, struct sctp_inpcb *inp,
@@ -274,14 +282,14 @@
#define sctp_free_bufspace(stcb, asoc, tp1, chk_cnt) \
do { \
if (tp1->data != NULL) { \
- atomic_subtract_int(&((asoc)->chunks_on_out_queue), chk_cnt); \
+ atomic_subtract_int(&((asoc)->chunks_on_out_queue), chk_cnt); \
if ((asoc)->total_output_queue_size >= tp1->book_size) { \
atomic_subtract_int(&((asoc)->total_output_queue_size), tp1->book_size); \
} else { \
(asoc)->total_output_queue_size = 0; \
} \
- if (stcb->sctp_socket && ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || \
- (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) { \
+ if (stcb->sctp_socket && ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || \
+ (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) { \
if (stcb->sctp_socket->so_snd.sb_cc >= tp1->book_size) { \
atomic_subtract_int(&((stcb)->sctp_socket->so_snd.sb_cc), tp1->book_size); \
} else { \
@@ -288,7 +296,7 @@
stcb->sctp_socket->so_snd.sb_cc = 0; \
} \
} \
- } \
+ } \
} while (0)
#endif
@@ -295,14 +303,14 @@
#define sctp_free_spbufspace(stcb, asoc, sp) \
do { \
- if (sp->data != NULL) { \
+ if (sp->data != NULL) { \
if ((asoc)->total_output_queue_size >= sp->length) { \
atomic_subtract_int(&(asoc)->total_output_queue_size, sp->length); \
} else { \
(asoc)->total_output_queue_size = 0; \
} \
- if (stcb->sctp_socket && ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || \
- (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) { \
+ if (stcb->sctp_socket && ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || \
+ (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) { \
if (stcb->sctp_socket->so_snd.sb_cc >= sp->length) { \
atomic_subtract_int(&stcb->sctp_socket->so_snd.sb_cc,sp->length); \
} else { \
@@ -309,7 +317,7 @@
stcb->sctp_socket->so_snd.sb_cc = 0; \
} \
} \
- } \
+ } \
} while (0)
#define sctp_snd_sb_alloc(stcb, sz) \
@@ -323,13 +331,9 @@
} while (0)
/* functions to start/stop udp tunneling */
-/* XXX: Remove the #ifdef after tunneling over IPv6 works also on FreeBSD. */
-#ifdef INET
void sctp_over_udp_stop(void);
int sctp_over_udp_start(void);
-#endif
-
int
sctp_soreceive(struct socket *so, struct sockaddr **psa,
struct uio *uio,
@@ -349,10 +353,16 @@
void sctp_log_nagle_event(struct sctp_tcb *stcb, int action);
+#ifdef SCTP_MBUF_LOGGING
void
sctp_log_mb(struct mbuf *m, int from);
void
+ sctp_log_mbc(struct mbuf *m, int from);
+
+#endif
+
+void
sctp_sblog(struct sockbuf *sb,
struct sctp_tcb *stcb, int from, int incr);
@@ -369,7 +379,6 @@
void sctp_log_maxburst(struct sctp_tcb *stcb, struct sctp_nets *, int, int, uint8_t);
void sctp_log_block(uint8_t, struct sctp_association *, int);
void sctp_log_rwnd(uint8_t, uint32_t, uint32_t, uint32_t);
-void sctp_log_mbcnt(uint8_t, uint32_t, uint32_t, uint32_t, uint32_t);
void sctp_log_rwnd_set(uint8_t, uint32_t, uint32_t, uint32_t, uint32_t);
int sctp_fill_stat_log(void *, size_t *);
void sctp_log_fr(uint32_t, uint32_t, uint32_t, int);
Modified: trunk/sys/netinet/siftr.c
===================================================================
--- trunk/sys/netinet/siftr.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/siftr.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -60,7 +60,7 @@
******************************************************/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/siftr.c 248144 2013-03-11 06:09:08Z lstewart $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/siftr.c 281174 2015-04-06 22:41:13Z hiren $");
#include <sys/param.h>
#include <sys/alq.h>
@@ -226,6 +226,10 @@
u_int sent_inflight_bytes;
/* Number of segments currently in the reassembly queue. */
int t_segqlen;
+ /* Flowid for the connection. */
+ u_int flowid;
+ /* Flow type for the connection. */
+ u_int flowtype;
/* Link to next pkt_node in the list. */
STAILQ_ENTRY(pkt_node) nodes;
};
@@ -267,6 +271,7 @@
static unsigned int siftr_generate_hashes = 0;
/* static unsigned int siftr_binary_log = 0; */
static char siftr_logfile[PATH_MAX] = "/var/log/siftr.log";
+static char siftr_logfile_shadow[PATH_MAX] = "/var/log/siftr.log";
static u_long siftr_hashmask;
STAILQ_HEAD(pkthead, pkt_node) pkt_queue = STAILQ_HEAD_INITIALIZER(pkt_queue);
LIST_HEAD(listhead, flow_hash_node) *counter_hash;
@@ -298,7 +303,7 @@
"switch siftr module operations on/off");
SYSCTL_PROC(_net_inet_siftr, OID_AUTO, logfile, CTLTYPE_STRING|CTLFLAG_RW,
- &siftr_logfile, sizeof(siftr_logfile), &siftr_sysctl_logfile_name_handler,
+ &siftr_logfile_shadow, sizeof(siftr_logfile_shadow), &siftr_sysctl_logfile_name_handler,
"A", "file to save siftr log messages to");
SYSCTL_UINT(_net_inet_siftr, OID_AUTO, ppl, CTLFLAG_RW,
@@ -442,7 +447,7 @@
MAX_LOG_MSG_LEN,
"%c,0x%08x,%zd.%06ld,%x:%x:%x:%x:%x:%x:%x:%x,%u,%x:%x:%x:"
"%x:%x:%x:%x:%x,%u,%ld,%ld,%ld,%ld,%ld,%u,%u,%u,%u,%u,%u,"
- "%u,%d,%u,%u,%u,%u,%u,%u\n",
+ "%u,%d,%u,%u,%u,%u,%u,%u,%u,%u\n",
direction[pkt_node->direction],
pkt_node->hash,
pkt_node->tval.tv_sec,
@@ -483,7 +488,9 @@
pkt_node->rcv_buf_hiwater,
pkt_node->rcv_buf_cc,
pkt_node->sent_inflight_bytes,
- pkt_node->t_segqlen);
+ pkt_node->t_segqlen,
+ pkt_node->flowid,
+ pkt_node->flowtype);
} else { /* IPv4 packet */
pkt_node->ip_laddr[0] = FIRST_OCTET(pkt_node->ip_laddr[3]);
pkt_node->ip_laddr[1] = SECOND_OCTET(pkt_node->ip_laddr[3]);
@@ -499,7 +506,7 @@
log_buf->ae_bytesused = snprintf(log_buf->ae_data,
MAX_LOG_MSG_LEN,
"%c,0x%08x,%jd.%06ld,%u.%u.%u.%u,%u,%u.%u.%u.%u,%u,%ld,%ld,"
- "%ld,%ld,%ld,%u,%u,%u,%u,%u,%u,%u,%d,%u,%u,%u,%u,%u,%u\n",
+ "%ld,%ld,%ld,%u,%u,%u,%u,%u,%u,%u,%d,%u,%u,%u,%u,%u,%u,%u,%u\n",
direction[pkt_node->direction],
pkt_node->hash,
(intmax_t)pkt_node->tval.tv_sec,
@@ -532,7 +539,9 @@
pkt_node->rcv_buf_hiwater,
pkt_node->rcv_buf_cc,
pkt_node->sent_inflight_bytes,
- pkt_node->t_segqlen);
+ pkt_node->t_segqlen,
+ pkt_node->flowid,
+ pkt_node->flowtype);
#ifdef SIFTR_IPV6
}
#endif
@@ -785,6 +794,8 @@
pn->rcv_buf_cc = inp->inp_socket->so_rcv.sb_cc;
pn->sent_inflight_bytes = tp->snd_max - tp->snd_una;
pn->t_segqlen = tp->t_segqlen;
+ pn->flowid = inp->inp_flowid;
+ pn->flowtype = inp->inp_flowtype;
/* We've finished accessing the tcb so release the lock. */
if (inp_locally_locked)
@@ -953,7 +964,8 @@
* the mbuf cluster "at" at offset "offset" bytes from
* the beginning of the "at" mbuf's data pointer.
*/
- th->th_sum = in_cksum_skip(*m, ip->ip_len, ip_hl);
+ th->th_sum = in_cksum_skip(*m, ntohs(ip->ip_len),
+ ip_hl);
}
/*
@@ -1143,38 +1155,38 @@
struct alq *new_alq;
int error;
- if (req->newptr == NULL)
- goto skip;
+ error = sysctl_handle_string(oidp, arg1, arg2, req);
- /* If old filename and new filename are different. */
- if (strncmp(siftr_logfile, (char *)req->newptr, PATH_MAX)) {
+ /* Check for error or same filename */
+ if (error != 0 || req->newptr == NULL ||
+ strncmp(siftr_logfile, arg1, arg2) == 0)
+ goto done;
- error = alq_open(&new_alq, req->newptr, curthread->td_ucred,
- SIFTR_LOG_FILE_MODE, SIFTR_ALQ_BUFLEN, 0);
+ /* Filname changed */
+ error = alq_open(&new_alq, arg1, curthread->td_ucred,
+ SIFTR_LOG_FILE_MODE, SIFTR_ALQ_BUFLEN, 0);
+ if (error != 0)
+ goto done;
- /* Bail if unable to create new alq. */
- if (error)
- return (1);
-
- /*
- * If disabled, siftr_alq == NULL so we simply close
- * the alq as we've proved it can be opened.
- * If enabled, close the existing alq and switch the old
- * for the new.
- */
- if (siftr_alq == NULL)
- alq_close(new_alq);
- else {
- alq_close(siftr_alq);
- siftr_alq = new_alq;
- }
+ /*
+ * If disabled, siftr_alq == NULL so we simply close
+ * the alq as we've proved it can be opened.
+ * If enabled, close the existing alq and switch the old
+ * for the new.
+ */
+ if (siftr_alq == NULL) {
+ alq_close(new_alq);
+ } else {
+ alq_close(siftr_alq);
+ siftr_alq = new_alq;
}
-skip:
- return (sysctl_handle_string(oidp, arg1, arg2, req));
+ /* Update filename upon success */
+ strlcpy(siftr_logfile, arg1, arg2);
+done:
+ return (error);
}
-
static int
siftr_manage_ops(uint8_t action)
{
Modified: trunk/sys/netinet/tcp.h
===================================================================
--- trunk/sys/netinet/tcp.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* @(#)tcp.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet/tcp.h 247525 2013-03-01 03:04:57Z jhb $
+ * $FreeBSD: stable/10/sys/netinet/tcp.h 292823 2015-12-28 02:43:12Z pkelsey $
*/
#ifndef _NETINET_TCP_H_
@@ -98,6 +98,10 @@
#define TCPOLEN_TSTAMP_APPA (TCPOLEN_TIMESTAMP+2) /* appendix A */
#define TCPOPT_SIGNATURE 19 /* Keyed MD5: RFC 2385 */
#define TCPOLEN_SIGNATURE 18
+#define TCPOPT_FAST_OPEN 34
+#define TCPOLEN_FAST_OPEN_EMPTY 2
+#define TCPOLEN_FAST_OPEN_MIN 6
+#define TCPOLEN_FAST_OPEN_MAX 18
/* Miscellaneous constants */
#define MAX_SACK_BLKS 6 /* Max # SACK blocks stored at receiver side */
@@ -166,6 +170,7 @@
#define TCP_KEEPIDLE 256 /* L,N,X start keeplives after this period */
#define TCP_KEEPINTVL 512 /* L,N interval between keepalives */
#define TCP_KEEPCNT 1024 /* L,N number of keepalives before close */
+#define TCP_FASTOPEN 1025 /* enable TFO / was created via TFO */
/* Start of reserved space for third-party user-settable options. */
#define TCP_VENDOR SO_VENDOR
Modified: trunk/sys/netinet/tcp_debug.c
===================================================================
--- trunk/sys/netinet/tcp_debug.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_debug.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_debug.c 193895 2009-06-10 10:39:41Z bz $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_debug.c 241913 2012-10-22 21:09:03Z glebius $");
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -176,11 +176,10 @@
#ifdef INET6
isipv6 ? ntohs(((struct ip6_hdr *)ipgen)->ip6_plen) :
#endif
- ((struct ip *)ipgen)->ip_len;
+ ntohs(((struct ip *)ipgen)->ip_len);
if (act == TA_OUTPUT) {
seq = ntohl(seq);
ack = ntohl(ack);
- len = ntohs((u_short)len);
}
if (act == TA_OUTPUT)
len -= sizeof (struct tcphdr);
Modified: trunk/sys/netinet/tcp_debug.h
===================================================================
--- trunk/sys/netinet/tcp_debug.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_debug.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_debug.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet/tcp_debug.h 188578 2009-02-13 15:14:43Z luigi $
+ * $FreeBSD: stable/10/sys/netinet/tcp_debug.h 188578 2009-02-13 15:14:43Z luigi $
*/
#ifndef _NETINET_TCP_DEBUG_H_
Added: trunk/sys/netinet/tcp_fastopen.c
===================================================================
--- trunk/sys/netinet/tcp_fastopen.c (rev 0)
+++ trunk/sys/netinet/tcp_fastopen.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -0,0 +1,443 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2015 Patrick Kelsey
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * This is a server-side implementation of TCP Fast Open (TFO) [RFC7413].
+ *
+ * This implementation is currently considered to be experimental and is not
+ * included in kernel builds by default. To include this code, add the
+ * following line to your kernel config:
+ *
+ * options TCP_RFC7413
+ *
+ * The generated TFO cookies are the 64-bit output of
+ * SipHash24(<16-byte-key><client-ip>). Multiple concurrent valid keys are
+ * supported so that time-based rolling cookie invalidation policies can be
+ * implemented in the system. The default number of concurrent keys is 2.
+ * This can be adjusted in the kernel config as follows:
+ *
+ * options TCP_RFC7413_MAX_KEYS=<num-keys>
+ *
+ *
+ * The following TFO-specific sysctls are defined:
+ *
+ * net.inet.tcp.fastopen.acceptany (RW, default 0)
+ * When non-zero, all client-supplied TFO cookies will be considered to
+ * be valid.
+ *
+ * net.inet.tcp.fastopen.autokey (RW, default 120)
+ * When this and net.inet.tcp.fastopen.enabled are non-zero, a new key
+ * will be automatically generated after this many seconds.
+ *
+ * net.inet.tcp.fastopen.enabled (RW, default 0)
+ * When zero, no new TFO connections can be created. On the transition
+ * from enabled to disabled, all installed keys are removed. On the
+ * transition from disabled to enabled, if net.inet.tcp.fastopen.autokey
+ * is non-zero and there are no keys installed, a new key will be
+ * generated immediately. The transition from enabled to disabled does
+ * not affect any TFO connections in progress; it only prevents new ones
+ * from being made.
+ *
+ * net.inet.tcp.fastopen.keylen (RO)
+ * The key length in bytes.
+ *
+ * net.inet.tcp.fastopen.maxkeys (RO)
+ * The maximum number of keys supported.
+ *
+ * net.inet.tcp.fastopen.numkeys (RO)
+ * The current number of keys installed.
+ *
+ * net.inet.tcp.fastopen.setkey (WO)
+ * Install a new key by writing net.inet.tcp.fastopen.keylen bytes to this
+ * sysctl.
+ *
+ *
+ * In order for TFO connections to be created via a listen socket, that
+ * socket must have the TCP_FASTOPEN socket option set on it. This option
+ * can be set on the socket either before or after the listen() is invoked.
+ * Clearing this option on a listen socket after it has been set has no
+ * effect on existing TFO connections or TFO connections in progress; it
+ * only prevents new TFO connections from being made.
+ *
+ * For passively-created sockets, the TCP_FASTOPEN socket option can be
+ * queried to determine whether the connection was established using TFO.
+ * Note that connections that are established via a TFO SYN, but that fall
+ * back to using a non-TFO SYN|ACK will have the TCP_FASTOPEN socket option
+ * set.
+ *
+ * Per the RFC, this implementation limits the number of TFO connections
+ * that can be in the SYN_RECEIVED state on a per listen-socket basis.
+ * Whenever this limit is exceeded, requests for new TFO connections are
+ * serviced as non-TFO requests. Without such a limit, given a valid TFO
+ * cookie, an attacker could keep the listen queue in an overflow condition
+ * using a TFO SYN flood. This implementation sets the limit at half the
+ * configured listen backlog.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_fastopen.c 292823 2015-12-28 02:43:12Z pkelsey $");
+
+#include "opt_inet.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+
+#include <crypto/siphash/siphash.h>
+
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/tcp_fastopen.h>
+#include <netinet/tcp_var.h>
+
+
+#define TCP_FASTOPEN_KEY_LEN SIPHASH_KEY_LENGTH
+
+#if !defined(TCP_RFC7413_MAX_KEYS) || (TCP_RFC7413_MAX_KEYS < 1)
+#define TCP_FASTOPEN_MAX_KEYS 2
+#else
+#define TCP_FASTOPEN_MAX_KEYS TCP_RFC7413_MAX_KEYS
+#endif
+
+struct tcp_fastopen_keylist {
+ unsigned int newest;
+ uint8_t key[TCP_FASTOPEN_MAX_KEYS][TCP_FASTOPEN_KEY_LEN];
+};
+
+struct tcp_fastopen_callout {
+ struct callout c;
+ struct vnet *v;
+};
+
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, fastopen, CTLFLAG_RW, 0, "TCP Fast Open");
+
+static VNET_DEFINE(int, tcp_fastopen_acceptany) = 0;
+#define V_tcp_fastopen_acceptany VNET(tcp_fastopen_acceptany)
+SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, acceptany,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_fastopen_acceptany), 0,
+ "Accept any non-empty cookie");
+
+static VNET_DEFINE(unsigned int, tcp_fastopen_autokey) = 120;
+#define V_tcp_fastopen_autokey VNET(tcp_fastopen_autokey)
+static int sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, autokey,
+ CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0,
+ &sysctl_net_inet_tcp_fastopen_autokey, "IU",
+ "Number of seconds between auto-generation of a new key; zero disables");
+
+VNET_DEFINE(unsigned int, tcp_fastopen_enabled) = 0;
+static int sysctl_net_inet_tcp_fastopen_enabled(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, enabled,
+ CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0,
+ &sysctl_net_inet_tcp_fastopen_enabled, "IU",
+ "Enable/disable TCP Fast Open processing");
+
+SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, keylen,
+ CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_KEY_LEN,
+ "Key length in bytes");
+
+SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, maxkeys,
+ CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_MAX_KEYS,
+ "Maximum number of keys supported");
+
+static VNET_DEFINE(unsigned int, tcp_fastopen_numkeys) = 0;
+#define V_tcp_fastopen_numkeys VNET(tcp_fastopen_numkeys)
+SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, numkeys,
+ CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcp_fastopen_numkeys), 0,
+ "Number of keys installed");
+
+static int sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, setkey,
+ CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_WR, NULL, 0,
+ &sysctl_net_inet_tcp_fastopen_setkey, "",
+ "Install a new key");
+
+static VNET_DEFINE(struct rmlock, tcp_fastopen_keylock);
+#define V_tcp_fastopen_keylock VNET(tcp_fastopen_keylock)
+
+#define TCP_FASTOPEN_KEYS_RLOCK(t) rm_rlock(&V_tcp_fastopen_keylock, (t))
+#define TCP_FASTOPEN_KEYS_RUNLOCK(t) rm_runlock(&V_tcp_fastopen_keylock, (t))
+#define TCP_FASTOPEN_KEYS_WLOCK() rm_wlock(&V_tcp_fastopen_keylock)
+#define TCP_FASTOPEN_KEYS_WUNLOCK() rm_wunlock(&V_tcp_fastopen_keylock)
+
+static VNET_DEFINE(struct tcp_fastopen_keylist, tcp_fastopen_keys);
+#define V_tcp_fastopen_keys VNET(tcp_fastopen_keys)
+
+static VNET_DEFINE(struct tcp_fastopen_callout, tcp_fastopen_autokey_ctx);
+#define V_tcp_fastopen_autokey_ctx VNET(tcp_fastopen_autokey_ctx)
+
+static VNET_DEFINE(uma_zone_t, counter_zone);
+#define V_counter_zone VNET(counter_zone)
+
+void
+tcp_fastopen_init(void)
+{
+ V_counter_zone = uma_zcreate("tfo", sizeof(unsigned int),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ rm_init(&V_tcp_fastopen_keylock, "tfo_keylock");
+ callout_init_rm(&V_tcp_fastopen_autokey_ctx.c,
+ &V_tcp_fastopen_keylock, 0);
+ V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1;
+}
+
+void
+tcp_fastopen_destroy(void)
+{
+ callout_drain(&V_tcp_fastopen_autokey_ctx.c);
+ rm_destroy(&V_tcp_fastopen_keylock);
+ uma_zdestroy(V_counter_zone);
+}
+
+unsigned int *
+tcp_fastopen_alloc_counter(void)
+{
+ unsigned int *counter;
+ counter = uma_zalloc(V_counter_zone, M_NOWAIT);
+ if (counter)
+ *counter = 1;
+ return (counter);
+}
+
+void
+tcp_fastopen_decrement_counter(unsigned int *counter)
+{
+ if (*counter == 1)
+ uma_zfree(V_counter_zone, counter);
+ else
+ atomic_subtract_int(counter, 1);
+}
+
+static void
+tcp_fastopen_addkey_locked(uint8_t *key)
+{
+
+ V_tcp_fastopen_keys.newest++;
+ if (V_tcp_fastopen_keys.newest == TCP_FASTOPEN_MAX_KEYS)
+ V_tcp_fastopen_keys.newest = 0;
+ memcpy(V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest], key,
+ TCP_FASTOPEN_KEY_LEN);
+ if (V_tcp_fastopen_numkeys < TCP_FASTOPEN_MAX_KEYS)
+ V_tcp_fastopen_numkeys++;
+}
+
+static void
+tcp_fastopen_autokey_locked(void)
+{
+ uint8_t newkey[TCP_FASTOPEN_KEY_LEN];
+
+ arc4rand(newkey, TCP_FASTOPEN_KEY_LEN, 0);
+ tcp_fastopen_addkey_locked(newkey);
+}
+
+static void
+tcp_fastopen_autokey_callout(void *arg)
+{
+ struct tcp_fastopen_callout *ctx = arg;
+
+ CURVNET_SET(ctx->v);
+ tcp_fastopen_autokey_locked();
+ callout_reset(&ctx->c, V_tcp_fastopen_autokey * hz,
+ tcp_fastopen_autokey_callout, ctx);
+ CURVNET_RESTORE();
+}
+
+
+static uint64_t
+tcp_fastopen_make_cookie(uint8_t key[SIPHASH_KEY_LENGTH], struct in_conninfo *inc)
+{
+ SIPHASH_CTX ctx;
+ uint64_t siphash;
+
+ SipHash24_Init(&ctx);
+ SipHash_SetKey(&ctx, key);
+ switch (inc->inc_flags & INC_ISIPV6) {
+#ifdef INET
+ case 0:
+ SipHash_Update(&ctx, &inc->inc_faddr, sizeof(inc->inc_faddr));
+ break;
+#endif
+#ifdef INET6
+ case INC_ISIPV6:
+ SipHash_Update(&ctx, &inc->inc6_faddr, sizeof(inc->inc6_faddr));
+ break;
+#endif
+ }
+ SipHash_Final((u_int8_t *)&siphash, &ctx);
+
+ return (siphash);
+}
+
+
+/*
+ * Return values:
+ * -1 the cookie is invalid and no valid cookie is available
+ * 0 the cookie is invalid and the latest cookie has been returned
+ * 1 the cookie is valid and the latest cookie has been returned
+ */
+int
+tcp_fastopen_check_cookie(struct in_conninfo *inc, uint8_t *cookie,
+ unsigned int len, uint64_t *latest_cookie)
+{
+ struct rm_priotracker tracker;
+ unsigned int i, key_index;
+ uint64_t cur_cookie;
+
+ if (V_tcp_fastopen_acceptany) {
+ *latest_cookie = 0;
+ return (1);
+ }
+
+ if (len != TCP_FASTOPEN_COOKIE_LEN) {
+ if (V_tcp_fastopen_numkeys > 0) {
+ *latest_cookie =
+ tcp_fastopen_make_cookie(
+ V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest],
+ inc);
+ return (0);
+ }
+ return (-1);
+ }
+
+ /*
+ * Check against each available key, from newest to oldest.
+ */
+ TCP_FASTOPEN_KEYS_RLOCK(&tracker);
+ key_index = V_tcp_fastopen_keys.newest;
+ for (i = 0; i < V_tcp_fastopen_numkeys; i++) {
+ cur_cookie =
+ tcp_fastopen_make_cookie(V_tcp_fastopen_keys.key[key_index],
+ inc);
+ if (i == 0)
+ *latest_cookie = cur_cookie;
+ if (memcmp(cookie, &cur_cookie, TCP_FASTOPEN_COOKIE_LEN) == 0) {
+ TCP_FASTOPEN_KEYS_RUNLOCK(&tracker);
+ return (1);
+ }
+ if (key_index == 0)
+ key_index = TCP_FASTOPEN_MAX_KEYS - 1;
+ else
+ key_index--;
+ }
+ TCP_FASTOPEN_KEYS_RUNLOCK(&tracker);
+
+ return (0);
+}
+
+static int
+sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ unsigned int new;
+
+ new = V_tcp_fastopen_autokey;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error == 0 && req->newptr) {
+ if (new > (INT_MAX / hz))
+ return (EINVAL);
+
+ TCP_FASTOPEN_KEYS_WLOCK();
+ if (V_tcp_fastopen_enabled) {
+ if (V_tcp_fastopen_autokey && !new)
+ callout_stop(&V_tcp_fastopen_autokey_ctx.c);
+ else if (new)
+ callout_reset(&V_tcp_fastopen_autokey_ctx.c,
+ new * hz, tcp_fastopen_autokey_callout,
+ &V_tcp_fastopen_autokey_ctx);
+ }
+ V_tcp_fastopen_autokey = new;
+ TCP_FASTOPEN_KEYS_WUNLOCK();
+ }
+
+ return (error);
+}
+
+static int
+sysctl_net_inet_tcp_fastopen_enabled(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ unsigned int new;
+
+ new = V_tcp_fastopen_enabled;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error == 0 && req->newptr) {
+ if (V_tcp_fastopen_enabled && !new) {
+ /* enabled -> disabled */
+ TCP_FASTOPEN_KEYS_WLOCK();
+ V_tcp_fastopen_numkeys = 0;
+ V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1;
+ if (V_tcp_fastopen_autokey)
+ callout_stop(&V_tcp_fastopen_autokey_ctx.c);
+ V_tcp_fastopen_enabled = 0;
+ TCP_FASTOPEN_KEYS_WUNLOCK();
+ } else if (!V_tcp_fastopen_enabled && new) {
+ /* disabled -> enabled */
+ TCP_FASTOPEN_KEYS_WLOCK();
+ if (V_tcp_fastopen_autokey &&
+ (V_tcp_fastopen_numkeys == 0)) {
+ tcp_fastopen_autokey_locked();
+ callout_reset(&V_tcp_fastopen_autokey_ctx.c,
+ V_tcp_fastopen_autokey * hz,
+ tcp_fastopen_autokey_callout,
+ &V_tcp_fastopen_autokey_ctx);
+ }
+ V_tcp_fastopen_enabled = 1;
+ TCP_FASTOPEN_KEYS_WUNLOCK();
+ }
+ }
+ return (error);
+}
+
+static int
+sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ uint8_t newkey[TCP_FASTOPEN_KEY_LEN];
+
+ if (req->oldptr != NULL || req->oldlen != 0)
+ return (EINVAL);
+ if (req->newptr == NULL)
+ return (EPERM);
+ if (req->newlen != sizeof(newkey))
+ return (EINVAL);
+ error = SYSCTL_IN(req, newkey, sizeof(newkey));
+ if (error)
+ return (error);
+
+ TCP_FASTOPEN_KEYS_WLOCK();
+ tcp_fastopen_addkey_locked(newkey);
+ TCP_FASTOPEN_KEYS_WUNLOCK();
+
+ return (0);
+}
Property changes on: trunk/sys/netinet/tcp_fastopen.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/netinet/tcp_fastopen.h
===================================================================
--- trunk/sys/netinet/tcp_fastopen.h (rev 0)
+++ trunk/sys/netinet/tcp_fastopen.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -0,0 +1,48 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2015 Patrick Kelsey
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/netinet/tcp_fastopen.h 292823 2015-12-28 02:43:12Z pkelsey $
+ */
+
+#ifndef _TCP_FASTOPEN_H_
+#define _TCP_FASTOPEN_H_
+
+#ifdef _KERNEL
+
+#define TCP_FASTOPEN_COOKIE_LEN 8 /* tied to SipHash24 64-bit output */
+
+VNET_DECLARE(unsigned int, tcp_fastopen_enabled);
+#define V_tcp_fastopen_enabled VNET(tcp_fastopen_enabled)
+
+void tcp_fastopen_init(void);
+void tcp_fastopen_destroy(void);
+unsigned int *tcp_fastopen_alloc_counter(void);
+void tcp_fastopen_decrement_counter(unsigned int *counter);
+int tcp_fastopen_check_cookie(struct in_conninfo *inc, uint8_t *cookie,
+ unsigned int len, uint64_t *latest_cookie);
+#endif /* _KERNEL */
+
+#endif /* _TCP_FASTOPEN_H_ */
Property changes on: trunk/sys/netinet/tcp_fastopen.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/netinet/tcp_fsm.h
===================================================================
--- trunk/sys/netinet/tcp_fsm.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_fsm.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -29,7 +29,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_fsm.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet/tcp_fsm.h 171656 2007-07-30 11:06:42Z des $
+ * $FreeBSD: stable/10/sys/netinet/tcp_fsm.h 171656 2007-07-30 11:06:42Z des $
*/
#ifndef _NETINET_TCP_FSM_H_
Modified: trunk/sys/netinet/tcp_hostcache.c
===================================================================
--- trunk/sys/netinet/tcp_hostcache.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_hostcache.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -64,7 +64,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_hostcache.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_hostcache.c 314667 2017-03-04 13:03:31Z avg $");
#include "opt_inet6.h"
@@ -74,6 +74,7 @@
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/malloc.h>
+#include <sys/sbuf.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
@@ -233,7 +234,7 @@
/*
* Set up periodic cache cleanup.
*/
- callout_init(&V_tcp_hc_callout, CALLOUT_MPSAFE);
+ callout_init(&V_tcp_hc_callout, 1);
callout_reset(&V_tcp_hc_callout, V_tcp_hostcache.prune * hz,
tcp_hc_purge, curvnet);
}
@@ -593,23 +594,20 @@
static int
sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS)
{
- int bufsize;
int linesize = 128;
- char *p, *buf;
- int len, i, error;
+ struct sbuf sb;
+ int i, error;
struct hc_metrics *hc_entry;
#ifdef INET6
char ip6buf[INET6_ADDRSTRLEN];
#endif
- bufsize = linesize * (V_tcp_hostcache.cache_count + 1);
+ sbuf_new(&sb, NULL, linesize * (V_tcp_hostcache.cache_count + 1),
+ SBUF_FIXEDLEN);
- p = buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO);
-
- len = snprintf(p, linesize,
- "\nIP address MTU SSTRESH RTT RTTVAR BANDWIDTH "
+ sbuf_printf(&sb,
+ "\nIP address MTU SSTRESH RTT RTTVAR BANDWIDTH "
" CWND SENDPIPE RECVPIPE HITS UPD EXP\n");
- p += len;
#define msec(u) (((u) + 500) / 1000)
for (i = 0; i < V_tcp_hostcache.hashsize; i++) {
@@ -616,7 +614,7 @@
THC_LOCK(&V_tcp_hostcache.hashbase[i].hch_mtx);
TAILQ_FOREACH(hc_entry, &V_tcp_hostcache.hashbase[i].hch_bucket,
rmx_q) {
- len = snprintf(p, linesize,
+ sbuf_printf(&sb,
"%-15s %5lu %8lu %6lums %6lums %9lu %8lu %8lu %8lu "
"%4lu %4lu %4i\n",
hc_entry->ip4.s_addr ? inet_ntoa(hc_entry->ip4) :
@@ -638,13 +636,13 @@
hc_entry->rmx_hits,
hc_entry->rmx_updates,
hc_entry->rmx_expire);
- p += len;
}
THC_UNLOCK(&V_tcp_hostcache.hashbase[i].hch_mtx);
}
#undef msec
- error = SYSCTL_OUT(req, buf, p - buf);
- free(buf, M_TEMP);
+ sbuf_finish(&sb);
+ error = SYSCTL_OUT(req, sbuf_data(&sb), sbuf_len(&sb));
+ sbuf_delete(&sb);
return(error);
}
Modified: trunk/sys/netinet/tcp_hostcache.h
===================================================================
--- trunk/sys/netinet/tcp_hostcache.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_hostcache.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -27,7 +27,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/netinet/tcp_hostcache.h 185420 2008-11-28 23:39:25Z zec $
+ * $FreeBSD: stable/10/sys/netinet/tcp_hostcache.h 185420 2008-11-28 23:39:25Z zec $
*/
/*
Modified: trunk/sys/netinet/tcp_input.c
===================================================================
--- trunk/sys/netinet/tcp_input.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_input.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -49,12 +49,13 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_input.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_input.c 327520 2018-01-03 16:16:20Z smh $");
#include "opt_ipfw.h" /* for ipfw_fwd */
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
+#include "opt_kdtrace.h"
#include "opt_tcpdebug.h"
#include <sys/param.h>
@@ -64,6 +65,7 @@
#include <sys/mbuf.h>
#include <sys/proc.h> /* for proc0 declaration */
#include <sys/protosw.h>
+#include <sys/sdt.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
@@ -83,6 +85,7 @@
#include <netinet/cc.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <netinet/in_pcb.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
@@ -96,6 +99,9 @@
#include <netinet6/in6_pcb.h>
#include <netinet6/ip6_var.h>
#include <netinet6/nd6.h>
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
@@ -106,6 +112,9 @@
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif /* TCPDEBUG */
+#ifdef TCP_OFFLOAD
+#include <netinet/tcp_offload.h>
+#endif
#ifdef IPSEC
#include <netipsec/ipsec.h>
@@ -118,11 +127,6 @@
const int tcprexmtthresh = 3;
-VNET_DEFINE(struct tcpstat, tcpstat);
-SYSCTL_VNET_STRUCT(_net_inet_tcp, TCPCTL_STATS, stats, CTLFLAG_RW,
- &VNET_NAME(tcpstat), tcpstat,
- "TCP statistics (struct tcpstat, netinet/tcp_var.h)");
-
int tcp_log_in_vain = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_RW,
&tcp_log_in_vain, 0,
@@ -145,6 +149,11 @@
&VNET_NAME(drop_synfin), 0,
"Drop TCP packets with SYN+FIN set");
+VNET_DEFINE(int, tcp_do_rfc6675_pipe) = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_pipe, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(tcp_do_rfc6675_pipe), 0,
+ "Use calculated pipe/in-flight bytes per RFC 6675");
+
VNET_DEFINE(int, tcp_do_rfc3042) = 1;
#define V_tcp_do_rfc3042 VNET(tcp_do_rfc3042)
SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, rfc3042, CTLFLAG_RW,
@@ -156,6 +165,14 @@
&VNET_NAME(tcp_do_rfc3390), 0,
"Enable RFC 3390 (Increasing TCP's Initial Congestion Window)");
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, experimental, CTLFLAG_RW, 0,
+ "Experimental TCP extensions");
+
+VNET_DEFINE(int, tcp_do_initcwnd10) = 1;
+SYSCTL_VNET_INT(_net_inet_tcp_experimental, OID_AUTO, initcwnd10, CTLFLAG_RW,
+ &VNET_NAME(tcp_do_initcwnd10), 0,
+ "Enable RFC 6928 (Increasing initial CWND to 10)");
+
VNET_DEFINE(int, tcp_do_rfc3465) = 1;
SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, rfc3465, CTLFLAG_RW,
&VNET_NAME(tcp_do_rfc3465), 0,
@@ -168,7 +185,7 @@
static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, ecn, CTLFLAG_RW, 0, "TCP ECN");
-VNET_DEFINE(int, tcp_do_ecn) = 0;
+VNET_DEFINE(int, tcp_do_ecn) = 2;
SYSCTL_VNET_INT(_net_inet_tcp_ecn, OID_AUTO, enable, CTLFLAG_RW,
&VNET_NAME(tcp_do_ecn), 0,
"TCP ECN support");
@@ -184,6 +201,11 @@
&VNET_NAME(tcp_insecure_rst), 0,
"Follow the old (insecure) criteria for accepting RST packets");
+VNET_DEFINE(int, tcp_recvspace) = 1024*64;
+#define V_tcp_recvspace VNET(tcp_recvspace)
+SYSCTL_VNET_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
+ &VNET_NAME(tcp_recvspace), 0, "Initial receive socket buffer size");
+
VNET_DEFINE(int, tcp_do_autorcvbuf) = 1;
#define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf)
SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, recvbuf_auto, CTLFLAG_RW,
@@ -216,12 +238,6 @@
struct tcphdr *, struct mbuf *, int);
static void tcp_xmit_timer(struct tcpcb *, int);
static void tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *);
-static void inline tcp_fields_to_host(struct tcphdr *);
-#ifdef TCP_SIGNATURE
-static void inline tcp_fields_to_net(struct tcphdr *);
-static int inline tcp_signature_verify_input(struct mbuf *, int, int,
- int, struct tcpopt *, struct tcphdr *, u_int);
-#endif
static void inline cc_ack_received(struct tcpcb *tp, struct tcphdr *th,
uint16_t type);
static void inline cc_conn_init(struct tcpcb *tp);
@@ -230,17 +246,25 @@
struct tcphdr *th, struct tcpopt *to);
/*
+ * TCP statistics are stored in an "array" of counter(9)s.
+ */
+VNET_PCPUSTAT_DEFINE(struct tcpstat, tcpstat);
+VNET_PCPUSTAT_SYSINIT(tcpstat);
+SYSCTL_VNET_PCPUSTAT(_net_inet_tcp, TCPCTL_STATS, stats, struct tcpstat,
+ tcpstat, "TCP statistics (struct tcpstat, netinet/tcp_var.h)");
+
+#ifdef VIMAGE
+VNET_PCPUSTAT_SYSUNINIT(tcpstat);
+#endif /* VIMAGE */
+/*
* Kernel module interface for updating tcpstat. The argument is an index
- * into tcpstat treated as an array of u_long. While this encodes the
- * general layout of tcpstat into the caller, it doesn't encode its location,
- * so that future changes to add, for example, per-CPU stats support won't
- * cause binary compatibility problems for kernel modules.
+ * into tcpstat treated as an array.
*/
void
kmod_tcpstat_inc(int statnum)
{
- (*((u_long *)&V_tcpstat + statnum))++;
+ counter_u64_add(VNET(tcpstat)[statnum], 1);
}
/*
@@ -302,9 +326,6 @@
struct hc_metrics_lite metrics;
struct inpcb *inp = tp->t_inpcb;
int rtt;
-#ifdef INET6
- int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
-#endif
INP_WLOCK_ASSERT(tp->t_inpcb);
@@ -338,49 +359,33 @@
}
/*
- * Set the slow-start flight size depending on whether this
- * is a local network or not.
+ * Set the initial slow-start flight size.
*
- * Extend this so we cache the cwnd too and retrieve it here.
- * Make cwnd even bigger than RFC3390 suggests but only if we
- * have previous experience with the remote host. Be careful
- * not make cwnd bigger than remote receive window or our own
- * send socket buffer. Maybe put some additional upper bound
- * on the retrieved cwnd. Should do incremental updates to
- * hostcache when cwnd collapses so next connection doesn't
- * overloads the path again.
+ * RFC5681 Section 3.1 specifies the default conservative values.
+ * RFC3390 specifies slightly more aggressive values.
+ * RFC6928 increases it to ten segments.
*
- * XXXAO: Initializing the CWND from the hostcache is broken
- * and in its current form not RFC conformant. It is disabled
- * until fixed or removed entirely.
- *
- * RFC3390 says only do this if SYN or SYN/ACK didn't got lost.
- * We currently check only in syncache_socket for that.
+ * If a SYN or SYN/ACK was lost and retransmitted, we have to
+ * reduce the initial CWND to one segment as congestion is likely
+ * requiring us to be cautious.
*/
-/* #define TCP_METRICS_CWND */
-#ifdef TCP_METRICS_CWND
- if (metrics.rmx_cwnd)
- tp->snd_cwnd = max(tp->t_maxseg, min(metrics.rmx_cwnd / 2,
- min(tp->snd_wnd, so->so_snd.sb_hiwat)));
- else
-#endif
- if (V_tcp_do_rfc3390)
+ if (tp->snd_cwnd == 1)
+ tp->snd_cwnd = tp->t_maxseg; /* SYN(-ACK) lost */
+ else if (V_tcp_do_initcwnd10)
+ tp->snd_cwnd = min(10 * tp->t_maxseg,
+ max(2 * tp->t_maxseg, 14600));
+ else if (V_tcp_do_rfc3390)
tp->snd_cwnd = min(4 * tp->t_maxseg,
max(2 * tp->t_maxseg, 4380));
-#ifdef INET6
- else if (isipv6 && in6_localaddr(&inp->in6p_faddr))
- tp->snd_cwnd = tp->t_maxseg * V_ss_fltsz_local;
-#endif
-#if defined(INET) && defined(INET6)
- else if (!isipv6 && in_localaddr(inp->inp_faddr))
- tp->snd_cwnd = tp->t_maxseg * V_ss_fltsz_local;
-#endif
-#ifdef INET
- else if (in_localaddr(inp->inp_faddr))
- tp->snd_cwnd = tp->t_maxseg * V_ss_fltsz_local;
-#endif
- else
- tp->snd_cwnd = tp->t_maxseg * V_ss_fltsz;
+ else {
+ /* Per RFC5681 Section 3.1 */
+ if (tp->t_maxseg > 2190)
+ tp->snd_cwnd = 2 * tp->t_maxseg;
+ else if (tp->t_maxseg > 1095)
+ tp->snd_cwnd = 3 * tp->t_maxseg;
+ else
+ tp->snd_cwnd = 4 * tp->t_maxseg;
+ }
if (CC_ALGO(tp)->conn_init != NULL)
CC_ALGO(tp)->conn_init(tp->ccv);
@@ -453,27 +458,7 @@
tp->t_bytes_acked = 0;
}
-static inline void
-tcp_fields_to_host(struct tcphdr *th)
-{
-
- th->th_seq = ntohl(th->th_seq);
- th->th_ack = ntohl(th->th_ack);
- th->th_win = ntohs(th->th_win);
- th->th_urp = ntohs(th->th_urp);
-}
-
#ifdef TCP_SIGNATURE
-static inline void
-tcp_fields_to_net(struct tcphdr *th)
-{
-
- th->th_seq = htonl(th->th_seq);
- th->th_ack = htonl(th->th_ack);
- th->th_win = htons(th->th_win);
- th->th_urp = htons(th->th_urp);
-}
-
static inline int
tcp_signature_verify_input(struct mbuf *m, int off0, int tlen, int optlen,
struct tcpopt *to, struct tcphdr *th, u_int tcpbflag)
@@ -506,10 +491,13 @@
* the ack that opens up a 0-sized window and
* - delayed acks are enabled or
* - this is a half-synchronized T/TCP connection.
+ * - the segment size is not larger than the MSS and LRO wasn't used
+ * for this segment.
*/
-#define DELAY_ACK(tp) \
+#define DELAY_ACK(tp, tlen) \
((!tcp_timer_active(tp, TT_DELACK) && \
(tp->t_flags & TF_RXWIN0SENT) == 0) && \
+ (tlen <= tp->t_maxopd) && \
(V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
/*
@@ -557,9 +545,6 @@
{
struct tcphdr *th = NULL;
struct ip *ip = NULL;
-#ifdef INET
- struct ipovly *ipov;
-#endif
struct inpcb *inp = NULL;
struct tcpcb *tp = NULL;
struct socket *so = NULL;
@@ -587,7 +572,7 @@
char *s = NULL; /* address and port logging */
int ti_locked;
#define TI_UNLOCKED 1
-#define TI_WLOCKED 2
+#define TI_RLOCKED 2
#ifdef TCPDEBUG
/*
@@ -659,7 +644,7 @@
* Note: IP leaves IP header in first mbuf.
*/
if (off0 > sizeof (struct ip)) {
- ip_stripoptions(m, (struct mbuf *)0);
+ ip_stripoptions(m);
off0 = sizeof(struct ip);
}
if (m->m_len < sizeof (struct tcpiphdr)) {
@@ -670,9 +655,8 @@
}
}
ip = mtod(m, struct ip *);
- ipov = (struct ipovly *)ip;
th = (struct tcphdr *)((caddr_t)ip + off0);
- tlen = ip->ip_len;
+ tlen = ntohs(ip->ip_len) - off0;
if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
@@ -679,25 +663,24 @@
th->th_sum = m->m_pkthdr.csum_data;
else
th->th_sum = in_pseudo(ip->ip_src.s_addr,
- ip->ip_dst.s_addr,
- htonl(m->m_pkthdr.csum_data +
- ip->ip_len +
- IPPROTO_TCP));
+ ip->ip_dst.s_addr,
+ htonl(m->m_pkthdr.csum_data + tlen +
+ IPPROTO_TCP));
th->th_sum ^= 0xffff;
-#ifdef TCPDEBUG
- ipov->ih_len = (u_short)tlen;
- ipov->ih_len = htons(ipov->ih_len);
-#endif
} else {
+ struct ipovly *ipov = (struct ipovly *)ip;
+
/*
* Checksum extended TCP header and data.
*/
- len = sizeof (struct ip) + tlen;
+ len = off0 + tlen;
bzero(ipov->ih_x1, sizeof(ipov->ih_x1));
- ipov->ih_len = (u_short)tlen;
- ipov->ih_len = htons(ipov->ih_len);
+ ipov->ih_len = htons(tlen);
th->th_sum = in_cksum(m, len);
+ /* Reset length for SDT probes. */
+ ip->ip_len = htons(tlen + off0);
}
+
if (th->th_sum) {
TCPSTAT_INC(tcps_rcvbadsum);
goto drop;
@@ -748,7 +731,6 @@
return;
}
ip = mtod(m, struct ip *);
- ipov = (struct ipovly *)ip;
th = (struct tcphdr *)((caddr_t)ip + off0);
}
}
@@ -770,26 +752,17 @@
/*
* Locate pcb for segment; if we're likely to add or remove a
- * connection then first acquire pcbinfo lock. There are two cases
+ * connection then first acquire pcbinfo lock. There are three cases
* where we might discover later we need a write lock despite the
- * flags: ACKs moving a connection out of the syncache, and ACKs for
- * a connection in TIMEWAIT.
+ * flags: ACKs moving a connection out of the syncache, ACKs for a
+ * connection in TIMEWAIT and SYNs not targeting a listening socket.
*/
- if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0) {
- INP_INFO_WLOCK(&V_tcbinfo);
- ti_locked = TI_WLOCKED;
+ if ((thflags & (TH_FIN | TH_RST)) != 0) {
+ INP_INFO_RLOCK(&V_tcbinfo);
+ ti_locked = TI_RLOCKED;
} else
ti_locked = TI_UNLOCKED;
-findpcb:
-#ifdef INVARIANTS
- if (ti_locked == TI_WLOCKED) {
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
- } else {
- INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
- }
-#endif
-
/*
* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
*/
@@ -806,6 +779,14 @@
)
fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
+findpcb:
+#ifdef INVARIANTS
+ if (ti_locked == TI_RLOCKED) {
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ } else {
+ INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+ }
+#endif
#ifdef INET6
if (isipv6 && fwd_tag != NULL) {
struct sockaddr_in6 *next_hop6;
@@ -830,10 +811,6 @@
th->th_dport, INPLOOKUP_WILDCARD |
INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif);
}
- /* Remove the tag from the packet. We don't need it anymore. */
- m_tag_delete(m, fwd_tag);
- m->m_flags &= ~M_IP6_NEXTHOP;
- fwd_tag = NULL;
} else if (isipv6) {
inp = in6_pcblookup_mbuf(&V_tcbinfo, &ip6->ip6_src,
th->th_sport, &ip6->ip6_dst, th->th_dport,
@@ -868,10 +845,6 @@
th->th_dport, INPLOOKUP_WILDCARD |
INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif);
}
- /* Remove the tag from the packet. We don't need it anymore. */
- m_tag_delete(m, fwd_tag);
- m->m_flags &= ~M_IP_NEXTHOP;
- fwd_tag = NULL;
} else
inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src,
th->th_sport, ip->ip_dst, th->th_dport,
@@ -907,23 +880,32 @@
goto dropwithreset;
}
INP_WLOCK_ASSERT(inp);
- if (!(inp->inp_flags & INP_HW_FLOWID)
- && (m->m_flags & M_FLOWID)
- && ((inp->inp_socket == NULL)
- || !(inp->inp_socket->so_options & SO_ACCEPTCONN))) {
- inp->inp_flags |= INP_HW_FLOWID;
- inp->inp_flags &= ~INP_SW_FLOWID;
+ /*
+ * While waiting for inp lock during the lookup, another thread
+ * can have dropped the inpcb, in which case we need to loop back
+ * and try to find a new inpcb to deliver to.
+ */
+ if (inp->inp_flags & INP_DROPPED) {
+ INP_WUNLOCK(inp);
+ inp = NULL;
+ goto findpcb;
+ }
+ if ((inp->inp_flowtype == M_HASHTYPE_NONE) &&
+ (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) &&
+ ((inp->inp_socket == NULL) ||
+ (inp->inp_socket->so_options & SO_ACCEPTCONN) == 0)) {
inp->inp_flowid = m->m_pkthdr.flowid;
+ inp->inp_flowtype = M_HASHTYPE_GET(m);
}
#ifdef IPSEC
#ifdef INET6
if (isipv6 && ipsec6_in_reject(m, inp)) {
- V_ipsec6stat.in_polvio++;
+ IPSEC6STAT_INC(ips_in_polvio);
goto dropunlock;
} else
#endif /* INET6 */
if (ipsec4_in_reject(m, inp) != 0) {
- V_ipsec4stat.in_polvio++;
+ IPSECSTAT_INC(ips_in_polvio);
goto dropunlock;
}
#endif /* IPSEC */
@@ -944,7 +926,7 @@
/*
* A previous connection in TIMEWAIT state is supposed to catch stray
* or duplicate segments arriving late. If this segment was a
- * legitimate new connection attempt the old INPCB gets removed and
+ * legitimate new connection attempt, the old INPCB gets removed and
* we can try again to find a listening socket.
*
* At this point, due to earlier optimism, we may hold only an inpcb
@@ -960,20 +942,24 @@
relocked:
if (inp->inp_flags & INP_TIMEWAIT) {
if (ti_locked == TI_UNLOCKED) {
- if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) {
+ if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) {
in_pcbref(inp);
INP_WUNLOCK(inp);
- INP_INFO_WLOCK(&V_tcbinfo);
- ti_locked = TI_WLOCKED;
+ INP_INFO_RLOCK(&V_tcbinfo);
+ ti_locked = TI_RLOCKED;
INP_WLOCK(inp);
if (in_pcbrele_wlocked(inp)) {
inp = NULL;
goto findpcb;
+ } else if (inp->inp_flags & INP_DROPPED) {
+ INP_WUNLOCK(inp);
+ inp = NULL;
+ goto findpcb;
}
} else
- ti_locked = TI_WLOCKED;
+ ti_locked = TI_RLOCKED;
}
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
if (thflags & TH_SYN)
tcp_dooptions(&to, optp, optlen, TO_SYN);
@@ -982,7 +968,7 @@
*/
if (tcp_twcheck(inp, &to, th, m, tlen))
goto findpcb;
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return;
}
/*
@@ -996,6 +982,14 @@
goto dropwithreset;
}
+#ifdef TCP_OFFLOAD
+ if (tp->t_flags & TF_TOE) {
+ tcp_offload_input(tp, m);
+ m = NULL; /* consumed by the TOE driver */
+ goto dropunlock;
+ }
+#endif
+
/*
* We've identified a valid inpcb, but it could be that we need an
* inpcbinfo write lock but don't hold it. In this case, attempt to
@@ -1004,26 +998,32 @@
* now be in TIMEWAIT.
*/
#ifdef INVARIANTS
- if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0)
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ if ((thflags & (TH_FIN | TH_RST)) != 0)
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
#endif
- if (tp->t_state != TCPS_ESTABLISHED) {
+ if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) ||
+ (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN) &&
+ !(tp->t_flags & TF_FASTOPEN)))) {
if (ti_locked == TI_UNLOCKED) {
- if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) {
+ if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) {
in_pcbref(inp);
INP_WUNLOCK(inp);
- INP_INFO_WLOCK(&V_tcbinfo);
- ti_locked = TI_WLOCKED;
+ INP_INFO_RLOCK(&V_tcbinfo);
+ ti_locked = TI_RLOCKED;
INP_WLOCK(inp);
if (in_pcbrele_wlocked(inp)) {
inp = NULL;
goto findpcb;
+ } else if (inp->inp_flags & INP_DROPPED) {
+ INP_WUNLOCK(inp);
+ inp = NULL;
+ goto findpcb;
}
goto relocked;
} else
- ti_locked = TI_WLOCKED;
+ ti_locked = TI_RLOCKED;
}
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
}
#ifdef MAC
@@ -1048,9 +1048,7 @@
/*
* When the socket is accepting connections (the INPCB is in LISTEN
* state) we look into the SYN cache if this is a new connection
- * attempt or the completion of a previous one. Because listen
- * sockets are never in TCPS_ESTABLISHED, the V_tcbinfo lock will be
- * held in this case.
+ * attempt or the completion of a previous one.
*/
if (so->so_options & SO_ACCEPTCONN) {
struct in_conninfo inc;
@@ -1057,8 +1055,6 @@
KASSERT(tp->t_state == TCPS_LISTEN, ("%s: so accepting but "
"tp not listening", __func__));
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
-
bzero(&inc, sizeof(inc));
#ifdef INET6
if (isipv6) {
@@ -1081,6 +1077,8 @@
* socket appended to the listen queue in SYN_RECEIVED state.
*/
if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) {
+
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
/*
* Parse the TCP options here because
* syncookies need access to the reflected
@@ -1101,6 +1099,9 @@
rstreason = BANDLIM_RST_OPENPORT;
goto dropwithreset;
}
+#ifdef TCP_RFC7413
+new_tfo_socket:
+#endif
if (so == NULL) {
/*
* We completed the 3-way handshake
@@ -1132,7 +1133,11 @@
*/
INP_WUNLOCK(inp); /* listen socket */
inp = sotoinpcb(so);
- INP_WLOCK(inp); /* new connection */
+ /*
+ * New connection inpcb is already locked by
+ * syncache_expand().
+ */
+ INP_WLOCK_ASSERT(inp);
tp = intotcpcb(inp);
KASSERT(tp->t_state == TCPS_SYN_RECEIVED,
("%s: ", __func__));
@@ -1358,13 +1363,31 @@
(void *)tcp_saveipgen, &tcp_savetcp, 0);
#endif
tcp_dooptions(&to, optp, optlen, TO_SYN);
- syncache_add(&inc, &to, th, inp, &so, m);
+#ifdef TCP_RFC7413
+ if (syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL))
+ goto new_tfo_socket;
+#else
+ syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL);
+#endif
/*
* Entry added to syncache and mbuf consumed.
- * Everything already unlocked by syncache_add().
+ * Only the listen socket is unlocked by syncache_add().
*/
+ if (ti_locked == TI_RLOCKED) {
+ INP_INFO_RUNLOCK(&V_tcbinfo);
+ ti_locked = TI_UNLOCKED;
+ }
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
return;
+ } else if (tp->t_state == TCPS_LISTEN) {
+ /*
+ * When a listen socket is torn down the SO_ACCEPTCONN
+ * flag is removed first while connections are drained
+ * from the accept queue in a unlock/lock cycle of the
+ * ACCEPT_LOCK, opening a race condition allowing a SYN
+ * attempt go through unhandled.
+ */
+ goto dropunlock;
}
#ifdef TCP_SIGNATURE
@@ -1386,6 +1409,8 @@
}
#endif
+ TCP_PROBE5(receive, NULL, tp, mtod(m, const char *), tp, th);
+
/*
* Segment belongs to a connection in SYN_SENT, ESTABLISHED or later
* state. tcp_do_segment() always consumes the mbuf chain, unlocks
@@ -1396,8 +1421,10 @@
return;
dropwithreset:
- if (ti_locked == TI_WLOCKED) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ TCP_PROBE5(receive, NULL, tp, mtod(m, const char *), tp, th);
+
+ if (ti_locked == TI_RLOCKED) {
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
}
#ifdef INVARIANTS
@@ -1417,8 +1444,11 @@
goto drop;
dropunlock:
- if (ti_locked == TI_WLOCKED) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (m != NULL)
+ TCP_PROBE5(receive, NULL, tp, mtod(m, const char *), tp, th);
+
+ if (ti_locked == TI_RLOCKED) {
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
}
#ifdef INVARIANTS
@@ -1440,15 +1470,82 @@
m_freem(m);
}
+/*
+ * Automatic sizing of receive socket buffer. Often the send
+ * buffer size is not optimally adjusted to the actual network
+ * conditions at hand (delay bandwidth product). Setting the
+ * buffer size too small limits throughput on links with high
+ * bandwidth and high delay (eg. trans-continental/oceanic links).
+ *
+ * On the receive side the socket buffer memory is only rarely
+ * used to any significant extent. This allows us to be much
+ * more aggressive in scaling the receive socket buffer. For
+ * the case that the buffer space is actually used to a large
+ * extent and we run out of kernel memory we can simply drop
+ * the new segments; TCP on the sender will just retransmit it
+ * later. Setting the buffer size too big may only consume too
+ * much kernel memory if the application doesn't read() from
+ * the socket or packet loss or reordering makes use of the
+ * reassembly queue.
+ *
+ * The criteria to step up the receive buffer one notch are:
+ * 1. Application has not set receive buffer size with
+ * SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE.
+ * 2. the number of bytes received during the time it takes
+ * one timestamp to be reflected back to us (the RTT);
+ * 3. received bytes per RTT is within seven eighth of the
+ * current socket buffer size;
+ * 4. receive buffer size has not hit maximal automatic size;
+ *
+ * This algorithm does one step per RTT at most and only if
+ * we receive a bulk stream w/o packet losses or reorderings.
+ * Shrinking the buffer during idle times is not necessary as
+ * it doesn't consume any memory when idle.
+ *
+ * TODO: Only step up if the application is actually serving
+ * the buffer to better manage the socket buffer resources.
+ */
+int
+tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, struct socket *so,
+ struct tcpcb *tp, int tlen)
+{
+ int newsize = 0;
+
+ if (V_tcp_do_autorcvbuf && (so->so_rcv.sb_flags & SB_AUTOSIZE) &&
+ tp->t_srtt != 0 && tp->rfbuf_ts != 0 &&
+ TCP_TS_TO_TICKS(tcp_ts_getticks() - tp->rfbuf_ts) >
+ (tp->t_srtt >> TCP_RTT_SHIFT)) {
+ if (tp->rfbuf_cnt > (so->so_rcv.sb_hiwat / 8 * 7) &&
+ so->so_rcv.sb_hiwat < V_tcp_autorcvbuf_max) {
+ newsize = min(so->so_rcv.sb_hiwat +
+ V_tcp_autorcvbuf_inc, V_tcp_autorcvbuf_max);
+ }
+ TCP_PROBE6(receive__autoresize, NULL, tp, mtod(m, const char *),
+ tp, th, newsize);
+
+ /* Start over with next RTT. */
+ tp->rfbuf_ts = 0;
+ tp->rfbuf_cnt = 0;
+ } else {
+ tp->rfbuf_cnt += tlen; /* add up */
+ }
+
+ return (newsize);
+}
+
static void
tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
int ti_locked)
{
- int thflags, acked, ourfinisacked, needoutput = 0;
+ int thflags, acked, ourfinisacked, needoutput = 0, sack_changed;
int rstreason, todrop, win;
u_long tiwin;
+ char *s;
+ struct in_conninfo *inc;
+ struct mbuf *mfree;
struct tcpopt to;
+ int tfo_syn;
#ifdef TCPDEBUG
/*
@@ -1460,30 +1557,25 @@
short ostate = 0;
#endif
thflags = th->th_flags;
+ inc = &tp->t_inpcb->inp_inc;
tp->sackhint.last_sack_ack = 0;
+ sack_changed = 0;
/*
* If this is either a state-changing packet or current state isn't
* established, we require a write lock on tcbinfo. Otherwise, we
- * allow either a read lock or a write lock, as we may have acquired
- * a write lock due to a race.
- *
- * Require a global write lock for SYN/FIN/RST segments or
- * non-established connections; otherwise accept either a read or
- * write lock, as we may have conservatively acquired a write lock in
- * certain cases in tcp_input() (is this still true?). Currently we
- * will never enter with no lock, so we try to drop it quickly in the
- * common pure ack/pure data cases.
+ * allow the tcbinfo to be in either alocked or unlocked, as the
+ * caller may have unnecessarily acquired a write lock due to a race.
*/
if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
tp->t_state != TCPS_ESTABLISHED) {
- KASSERT(ti_locked == TI_WLOCKED, ("%s ti_locked %d for "
+ KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for "
"SYN/FIN/RST/!EST", __func__, ti_locked));
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
} else {
#ifdef INVARIANTS
- if (ti_locked == TI_WLOCKED)
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED)
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
else {
KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST "
"ti_locked: %d", __func__, ti_locked));
@@ -1555,7 +1647,6 @@
if (TSTMP_GT(to.to_tsecr, tcp_ts_getticks()))
to.to_tsecr = 0;
}
-
/*
* Process options only when we get SYN/ACK back. The SYN case
* for incoming connections is handled in tcp_syncache.
@@ -1587,6 +1678,25 @@
}
/*
+ * If timestamps were negotiated during SYN/ACK they should
+ * appear on every segment during this session and vice versa.
+ */
+ if ((tp->t_flags & TF_RCVD_TSTMP) && !(to.to_flags & TOF_TS)) {
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
+ log(LOG_DEBUG, "%s; %s: Timestamp missing, "
+ "no action\n", s, __func__);
+ free(s, M_TCPLOG);
+ }
+ }
+ if (!(tp->t_flags & TF_RCVD_TSTMP) && (to.to_flags & TOF_TS)) {
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
+ log(LOG_DEBUG, "%s; %s: Timestamp not expected, "
+ "no action\n", s, __func__);
+ free(s, M_TCPLOG);
+ }
+ }
+
+ /*
* Header prediction: check for the two common cases
* of a uni-directional data xfer. If the packet has
* no control flags, is in-sequence, the window didn't
@@ -1634,8 +1744,8 @@
/*
* This is a pure ack for outstanding data.
*/
- if (ti_locked == TI_WLOCKED)
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED)
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
TCPSTAT_INC(tcps_predack);
@@ -1738,8 +1848,8 @@
* nothing on the reassembly queue and we have enough
* buffer space to take it.
*/
- if (ti_locked == TI_WLOCKED)
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED)
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
/* Clean receiver SACK report if present */
@@ -1765,59 +1875,7 @@
tcp_trace(TA_INPUT, ostate, tp,
(void *)tcp_saveipgen, &tcp_savetcp, 0);
#endif
- /*
- * Automatic sizing of receive socket buffer. Often the send
- * buffer size is not optimally adjusted to the actual network
- * conditions at hand (delay bandwidth product). Setting the
- * buffer size too small limits throughput on links with high
- * bandwidth and high delay (eg. trans-continental/oceanic links).
- *
- * On the receive side the socket buffer memory is only rarely
- * used to any significant extent. This allows us to be much
- * more aggressive in scaling the receive socket buffer. For
- * the case that the buffer space is actually used to a large
- * extent and we run out of kernel memory we can simply drop
- * the new segments; TCP on the sender will just retransmit it
- * later. Setting the buffer size too big may only consume too
- * much kernel memory if the application doesn't read() from
- * the socket or packet loss or reordering makes use of the
- * reassembly queue.
- *
- * The criteria to step up the receive buffer one notch are:
- * 1. the number of bytes received during the time it takes
- * one timestamp to be reflected back to us (the RTT);
- * 2. received bytes per RTT is within seven eighth of the
- * current socket buffer size;
- * 3. receive buffer size has not hit maximal automatic size;
- *
- * This algorithm does one step per RTT at most and only if
- * we receive a bulk stream w/o packet losses or reorderings.
- * Shrinking the buffer during idle times is not necessary as
- * it doesn't consume any memory when idle.
- *
- * TODO: Only step up if the application is actually serving
- * the buffer to better manage the socket buffer resources.
- */
- if (V_tcp_do_autorcvbuf &&
- to.to_tsecr &&
- (so->so_rcv.sb_flags & SB_AUTOSIZE)) {
- if (TSTMP_GT(to.to_tsecr, tp->rfbuf_ts) &&
- to.to_tsecr - tp->rfbuf_ts < hz) {
- if (tp->rfbuf_cnt >
- (so->so_rcv.sb_hiwat / 8 * 7) &&
- so->so_rcv.sb_hiwat <
- V_tcp_autorcvbuf_max) {
- newsize =
- min(so->so_rcv.sb_hiwat +
- V_tcp_autorcvbuf_inc,
- V_tcp_autorcvbuf_max);
- }
- /* Start over with next RTT. */
- tp->rfbuf_ts = 0;
- tp->rfbuf_cnt = 0;
- } else
- tp->rfbuf_cnt += tlen; /* add up */
- }
+ newsize = tcp_autorcvbuf(m, th, so, tp, tlen);
/* Add data to socket buffer. */
SOCKBUF_LOCK(&so->so_rcv);
@@ -1837,7 +1895,7 @@
}
/* NB: sorwakeup_locked() does an implicit unlock. */
sorwakeup_locked(so);
- if (DELAY_ACK(tp)) {
+ if (DELAY_ACK(tp, tlen)) {
tp->t_flags |= TF_DELACK;
} else {
tp->t_flags |= TF_ACKNOW;
@@ -1858,10 +1916,6 @@
win = 0;
tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
- /* Reset receive buffer auto scaling when not in bulk receive mode. */
- tp->rfbuf_ts = 0;
- tp->rfbuf_cnt = 0;
-
switch (tp->t_state) {
/*
@@ -1875,6 +1929,28 @@
rstreason = BANDLIM_RST_OPENPORT;
goto dropwithreset;
}
+#ifdef TCP_RFC7413
+ if (tp->t_flags & TF_FASTOPEN) {
+ /*
+ * When a TFO connection is in SYN_RECEIVED, the
+ * only valid packets are the initial SYN, a
+ * retransmit/copy of the initial SYN (possibly with
+ * a subset of the original data), a valid ACK, a
+ * FIN, or a RST.
+ */
+ if ((thflags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)) {
+ rstreason = BANDLIM_RST_OPENPORT;
+ goto dropwithreset;
+ } else if (thflags & TH_SYN) {
+ /* non-initial SYN is ignored */
+ if ((tcp_timer_active(tp, TT_DELACK) ||
+ tcp_timer_active(tp, TT_REXMT)))
+ goto drop;
+ } else if (!(thflags & (TH_ACK|TH_FIN|TH_RST))) {
+ goto drop;
+ }
+ }
+#endif
break;
/*
@@ -1898,8 +1974,11 @@
rstreason = BANDLIM_UNLIMITED;
goto dropwithreset;
}
- if ((thflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST))
+ if ((thflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) {
+ TCP_PROBE5(connect__refused, NULL, tp,
+ mtod(m, const char *), tp, th);
tp = tcp_drop(tp, ECONNREFUSED);
+ }
if (thflags & TH_RST)
goto drop;
if (!(thflags & TH_SYN))
@@ -1925,7 +2004,7 @@
* If there's data, delay ACK; if there's also a FIN
* ACKNOW will be turned on later.
*/
- if (DELAY_ACK(tp) && tlen != 0)
+ if (DELAY_ACK(tp, tlen) && tlen != 0)
tcp_timer_activate(tp, TT_DELACK,
tcp_delacktime);
else
@@ -1944,11 +2023,13 @@
*/
tp->t_starttime = ticks;
if (tp->t_flags & TF_NEEDFIN) {
- tp->t_state = TCPS_FIN_WAIT_1;
+ tcp_state_change(tp, TCPS_FIN_WAIT_1);
tp->t_flags &= ~TF_NEEDFIN;
thflags &= ~TH_SYN;
} else {
- tp->t_state = TCPS_ESTABLISHED;
+ tcp_state_change(tp, TCPS_ESTABLISHED);
+ TCP_PROBE5(connect__established, NULL, tp,
+ mtod(m, const char *), tp, th);
cc_conn_init(tp);
tcp_timer_activate(tp, TT_KEEP,
TP_KEEPIDLE(tp));
@@ -1966,12 +2047,12 @@
*/
tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
tcp_timer_activate(tp, TT_REXMT, 0);
- tp->t_state = TCPS_SYN_RECEIVED;
+ tcp_state_change(tp, TCPS_SYN_RECEIVED);
}
- KASSERT(ti_locked == TI_WLOCKED, ("%s: trimthenstep6: "
+ KASSERT(ti_locked == TI_RLOCKED, ("%s: trimthenstep6: "
"ti_locked %d", __func__, ti_locked));
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb);
/*
@@ -2099,12 +2180,12 @@
case TCPS_CLOSE_WAIT:
so->so_error = ECONNRESET;
close:
- KASSERT(ti_locked == TI_WLOCKED,
+ KASSERT(ti_locked == TI_RLOCKED,
("tcp_do_segment: TH_RST 1 ti_locked %d",
ti_locked));
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- tp->t_state = TCPS_CLOSED;
+ tcp_state_change(tp, TCPS_CLOSED);
TCPSTAT_INC(tcps_drops);
tp = tcp_close(tp);
break;
@@ -2111,10 +2192,10 @@
case TCPS_CLOSING:
case TCPS_LAST_ACK:
- KASSERT(ti_locked == TI_WLOCKED,
+ KASSERT(ti_locked == TI_RLOCKED,
("tcp_do_segment: TH_RST 2 ti_locked %d",
ti_locked));
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tp = tcp_close(tp);
break;
@@ -2168,11 +2249,7 @@
todrop = tp->rcv_nxt - th->th_seq;
if (todrop > 0) {
- /*
- * If this is a duplicate SYN for our current connection,
- * advance over it and pretend and it's not a SYN.
- */
- if (thflags & TH_SYN && th->th_seq == tp->irs) {
+ if (thflags & TH_SYN) {
thflags &= ~TH_SYN;
th->th_seq++;
if (th->th_urp > 1)
@@ -2222,15 +2299,14 @@
*/
if ((so->so_state & SS_NOFDREF) &&
tp->t_state > TCPS_CLOSE_WAIT && tlen) {
- char *s;
-
- KASSERT(ti_locked == TI_WLOCKED, ("%s: SS_NOFDEREF && "
+ KASSERT(ti_locked == TI_RLOCKED, ("%s: SS_NOFDEREF && "
"CLOSE_WAIT && tlen ti_locked %d", __func__, ti_locked));
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) {
- log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data after socket "
- "was closed, sending RST and removing tcpcb\n",
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
+ log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data "
+ "after socket was closed, "
+ "sending RST and removing tcpcb\n",
s, __func__, tcpstates[tp->t_state], tlen);
free(s, M_TCPLOG);
}
@@ -2299,9 +2375,9 @@
* error and we send an RST and drop the connection.
*/
if (thflags & TH_SYN) {
- KASSERT(ti_locked == TI_WLOCKED,
+ KASSERT(ti_locked == TI_RLOCKED,
("tcp_do_segment: TH_SYN ti_locked %d", ti_locked));
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tp = tcp_drop(tp, ECONNRESET);
rstreason = BANDLIM_UNLIMITED;
@@ -2315,9 +2391,16 @@
*/
if ((thflags & TH_ACK) == 0) {
if (tp->t_state == TCPS_SYN_RECEIVED ||
- (tp->t_flags & TF_NEEDSYN))
+ (tp->t_flags & TF_NEEDSYN)) {
+#ifdef TCP_RFC7413
+ if (tp->t_state == TCPS_SYN_RECEIVED &&
+ tp->t_flags & TF_FASTOPEN) {
+ tp->snd_wnd = tiwin;
+ cc_conn_init(tp);
+ }
+#endif
goto step6;
- else if (tp->t_flags & TF_ACKNOW)
+ } else if (tp->t_flags & TF_ACKNOW)
goto dropafterack;
else
goto drop;
@@ -2350,11 +2433,33 @@
*/
tp->t_starttime = ticks;
if (tp->t_flags & TF_NEEDFIN) {
- tp->t_state = TCPS_FIN_WAIT_1;
+ tcp_state_change(tp, TCPS_FIN_WAIT_1);
tp->t_flags &= ~TF_NEEDFIN;
} else {
- tp->t_state = TCPS_ESTABLISHED;
- cc_conn_init(tp);
+ tcp_state_change(tp, TCPS_ESTABLISHED);
+ TCP_PROBE5(accept__established, NULL, tp,
+ mtod(m, const char *), tp, th);
+#ifdef TCP_RFC7413
+ if (tp->t_tfo_pending) {
+ tcp_fastopen_decrement_counter(tp->t_tfo_pending);
+ tp->t_tfo_pending = NULL;
+
+ /*
+ * Account for the ACK of our SYN prior to
+ * regular ACK processing below.
+ */
+ tp->snd_una++;
+ }
+ /*
+ * TFO connections call cc_conn_init() during SYN
+ * processing. Calling it again here for such
+ * connections is not harmless as it would undo the
+ * snd_cwnd reduction that occurs when a TFO SYN|ACK
+ * is retransmitted.
+ */
+ if (!(tp->t_flags & TF_FASTOPEN))
+#endif
+ cc_conn_init(tp);
tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
}
/*
@@ -2388,13 +2493,21 @@
if ((tp->t_flags & TF_SACK_PERMIT) &&
((to.to_flags & TOF_SACK) ||
!TAILQ_EMPTY(&tp->snd_holes)))
- tcp_sack_doack(tp, &to, th->th_ack);
+ sack_changed = tcp_sack_doack(tp, &to, th->th_ack);
+ else
+ /*
+ * Reset the value so that previous (valid) value
+ * from the last ack with SACK doesn't get used.
+ */
+ tp->sackhint.sacked_bytes = 0;
/* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */
hhook_run_tcp_est_in(tp, th, &to);
if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
- if (tlen == 0 && tiwin == tp->snd_wnd) {
+ if (tlen == 0 &&
+ (tiwin == tp->snd_wnd ||
+ (tp->t_flags & TF_SACK_PERMIT))) {
TCPSTAT_INC(tcps_rcvdupack);
/*
* If we have outstanding data (other than
@@ -2423,8 +2536,20 @@
* When using TCP ECN, notify the peer that
* we reduced the cwnd.
*/
- if (!tcp_timer_active(tp, TT_REXMT) ||
- th->th_ack != tp->snd_una)
+ /*
+ * Following 2 kinds of acks should not affect
+ * dupack counting:
+ * 1) Old acks
+ * 2) Acks with SACK but without any new SACK
+ * information in them. These could result from
+ * any anomaly in the network like a switch
+ * duplicating packets or a possible DoS attack.
+ */
+ if (th->th_ack != tp->snd_una ||
+ ((tp->t_flags & TF_SACK_PERMIT) &&
+ !sack_changed))
+ break;
+ else if (!tcp_timer_active(tp, TT_REXMT))
tp->t_dupacks = 0;
else if (++tp->t_dupacks > tcprexmtthresh ||
IN_FASTRECOVERY(tp->t_flags)) {
@@ -2439,8 +2564,12 @@
* we have less than 1/2 the original window's
* worth of data in flight.
*/
- awnd = (tp->snd_nxt - tp->snd_fack) +
- tp->sackhint.sack_bytes_rexmit;
+ if (V_tcp_do_rfc6675_pipe)
+ awnd = tcp_compute_pipe(tp);
+ else
+ awnd = (tp->snd_nxt - tp->snd_fack) +
+ tp->sackhint.sack_bytes_rexmit;
+
if (awnd < tp->snd_ssthresh) {
tp->snd_cwnd += tp->t_maxseg;
if (tp->snd_cwnd > tp->snd_ssthresh)
@@ -2518,10 +2647,21 @@
tp->snd_nxt = onxt;
goto drop;
} else if (V_tcp_do_rfc3042) {
+ /*
+ * Process first and second duplicate
+ * ACKs. Each indicates a segment
+ * leaving the network, creating room
+ * for more. Make sure we can send a
+ * packet on reception of each duplicate
+ * ACK by increasing snd_cwnd by one
+ * segment. Restore the original
+ * snd_cwnd after packet transmission.
+ */
cc_ack_received(tp, th, CC_DUPACK);
u_long oldcwnd = tp->snd_cwnd;
tcp_seq oldsndmax = tp->snd_max;
u_int sent;
+ int avail;
KASSERT(tp->t_dupacks == 1 ||
tp->t_dupacks == 2,
@@ -2543,7 +2683,17 @@
*/
break;
}
- (void) tcp_output(tp);
+ /*
+ * Only call tcp_output when there
+ * is new data available to be sent.
+ * Otherwise we would send pure ACKs.
+ */
+ SOCKBUF_LOCK(&so->so_snd);
+ avail = so->so_snd.sb_cc -
+ (tp->snd_nxt - tp->snd_una);
+ SOCKBUF_UNLOCK(&so->so_snd);
+ if (avail > 0)
+ (void) tcp_output(tp);
sent = tp->snd_max - oldsndmax;
if (sent > tp->t_maxseg) {
KASSERT((tp->t_dupacks == 2 &&
@@ -2558,9 +2708,20 @@
tp->snd_cwnd = oldcwnd;
goto drop;
}
- } else
- tp->t_dupacks = 0;
+ }
break;
+ } else {
+ /*
+ * This ack is advancing the left edge, reset the
+ * counter.
+ */
+ tp->t_dupacks = 0;
+ /*
+ * If this ack also has new SACK info, increment the
+ * counter as per rfc6675.
+ */
+ if ((tp->t_flags & TF_SACK_PERMIT) && sack_changed)
+ tp->t_dupacks++;
}
KASSERT(SEQ_GT(th->th_ack, tp->snd_una),
@@ -2579,7 +2740,6 @@
} else
cc_post_recovery(tp, th);
}
- tp->t_dupacks = 0;
/*
* If we reach this point, ACK is not a duplicate,
* i.e., it ACKs something we sent.
@@ -2606,6 +2766,9 @@
INP_WLOCK_ASSERT(tp->t_inpcb);
acked = BYTES_THIS_ACK(tp, th);
+ KASSERT(acked >= 0, ("%s: acked unexepectedly negative "
+ "(tp->snd_una=%u, th->th_ack=%u, tp=%p, m=%p)", __func__,
+ tp->snd_una, th->th_ack, tp, m));
TCPSTAT_INC(tcps_rcvackpack);
TCPSTAT_ADD(tcps_rcvackbyte, acked);
@@ -2675,16 +2838,24 @@
SOCKBUF_LOCK(&so->so_snd);
if (acked > so->so_snd.sb_cc) {
- tp->snd_wnd -= so->so_snd.sb_cc;
- sbdrop_locked(&so->so_snd, (int)so->so_snd.sb_cc);
+ if (tp->snd_wnd >= so->so_snd.sb_cc)
+ tp->snd_wnd -= so->so_snd.sb_cc;
+ else
+ tp->snd_wnd = 0;
+ mfree = sbcut_locked(&so->so_snd,
+ (int)so->so_snd.sb_cc);
ourfinisacked = 1;
} else {
- sbdrop_locked(&so->so_snd, acked);
- tp->snd_wnd -= acked;
+ mfree = sbcut_locked(&so->so_snd, acked);
+ if (tp->snd_wnd >= (u_long) acked)
+ tp->snd_wnd -= acked;
+ else
+ tp->snd_wnd = 0;
ourfinisacked = 0;
}
/* NB: sowwakeup_locked() does an implicit unlock. */
sowwakeup_locked(so);
+ m_freem(mfree);
/* Detect una wraparound. */
if (!IN_RECOVERY(tp->t_flags) &&
SEQ_GT(tp->snd_una, tp->snd_recover) &&
@@ -2730,7 +2901,7 @@
tcp_finwait2_timeout :
TP_MAXIDLE(tp)));
}
- tp->t_state = TCPS_FIN_WAIT_2;
+ tcp_state_change(tp, TCPS_FIN_WAIT_2);
}
break;
@@ -2742,9 +2913,9 @@
*/
case TCPS_CLOSING:
if (ourfinisacked) {
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tcp_twstart(tp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
m_freem(m);
return;
}
@@ -2758,7 +2929,7 @@
*/
case TCPS_LAST_ACK:
if (ourfinisacked) {
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tp = tcp_close(tp);
goto drop;
}
@@ -2862,9 +3033,12 @@
* case PRU_RCVD). If a FIN has already been received on this
* connection then we just ignore the text.
*/
- if ((tlen || (thflags & TH_FIN)) &&
+ tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) &&
+ (tp->t_flags & TF_FASTOPEN));
+ if ((tlen || (thflags & TH_FIN) || tfo_syn) &&
TCPS_HAVERCVDFIN(tp->t_state) == 0) {
tcp_seq save_start = th->th_seq;
+
m_adj(m, drop_hdrlen); /* delayed header drop */
/*
* Insert segment which includes th into TCP reassembly queue
@@ -2880,8 +3054,9 @@
*/
if (th->th_seq == tp->rcv_nxt &&
LIST_EMPTY(&tp->t_segq) &&
- TCPS_HAVEESTABLISHED(tp->t_state)) {
- if (DELAY_ACK(tp))
+ (TCPS_HAVEESTABLISHED(tp->t_state) ||
+ tfo_syn)) {
+ if (DELAY_ACK(tp, tlen) || tfo_syn)
tp->t_flags |= TF_DELACK;
else
tp->t_flags |= TF_ACKNOW;
@@ -2956,7 +3131,7 @@
tp->t_starttime = ticks;
/* FALLTHROUGH */
case TCPS_ESTABLISHED:
- tp->t_state = TCPS_CLOSE_WAIT;
+ tcp_state_change(tp, TCPS_CLOSE_WAIT);
break;
/*
@@ -2964,7 +3139,7 @@
* enter the CLOSING state.
*/
case TCPS_FIN_WAIT_1:
- tp->t_state = TCPS_CLOSING;
+ tcp_state_change(tp, TCPS_CLOSING);
break;
/*
@@ -2973,18 +3148,18 @@
* standard timers.
*/
case TCPS_FIN_WAIT_2:
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
- KASSERT(ti_locked == TI_WLOCKED, ("%s: dodata "
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ KASSERT(ti_locked == TI_RLOCKED, ("%s: dodata "
"TCP_FIN_WAIT_2 ti_locked: %d", __func__,
ti_locked));
tcp_twstart(tp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return;
}
}
- if (ti_locked == TI_WLOCKED)
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED)
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
#ifdef TCPDEBUG
@@ -3039,8 +3214,8 @@
tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
&tcp_savetcp, 0);
#endif
- if (ti_locked == TI_WLOCKED)
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED)
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
tp->t_flags |= TF_ACKNOW;
@@ -3050,8 +3225,8 @@
return;
dropwithreset:
- if (ti_locked == TI_WLOCKED)
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED)
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
if (tp != NULL) {
@@ -3062,8 +3237,8 @@
return;
drop:
- if (ti_locked == TI_WLOCKED) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED) {
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
}
#ifdef INVARIANTS
@@ -3234,6 +3409,21 @@
to->to_sacks = cp + 2;
TCPSTAT_INC(tcps_sack_rcv_blocks);
break;
+#ifdef TCP_RFC7413
+ case TCPOPT_FAST_OPEN:
+ if ((optlen != TCPOLEN_FAST_OPEN_EMPTY) &&
+ (optlen < TCPOLEN_FAST_OPEN_MIN) &&
+ (optlen > TCPOLEN_FAST_OPEN_MAX))
+ continue;
+ if (!(flags & TO_SYN))
+ continue;
+ if (!V_tcp_fastopen_enabled)
+ continue;
+ to->to_flags |= TOF_FASTOPEN;
+ to->to_tfo_len = optlen - 2;
+ to->to_tfo_cookie = to->to_tfo_len ? cp + 2 : NULL;
+ break;
+#endif
default:
continue;
}
@@ -3359,10 +3549,8 @@
/*
* Determine a reasonable value for maxseg size.
* If the route is known, check route for mtu.
- * If none, use an mss that can be handled on the outgoing
- * interface without forcing IP to fragment; if bigger than
- * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES
- * to utilize large mbufs. If no route is found, route has no mtu,
+ * If none, use an mss that can be handled on the outgoing interface
+ * without forcing IP to fragment. If no route is found, route has no mtu,
* or the destination isn't local, use a default, hopefully conservative
* size (usually 512 or the default IP max size, but no more than the mtu
* of the interface), as we can't discover anything about intervening
@@ -3383,7 +3571,7 @@
*/
void
tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer,
- struct hc_metrics_lite *metricptr, int *mtuflags)
+ struct hc_metrics_lite *metricptr, struct tcp_ifcap *cap)
{
int mss = 0;
u_long maxmtu = 0;
@@ -3410,7 +3598,7 @@
/* Initialize. */
#ifdef INET6
if (isipv6) {
- maxmtu = tcp_maxmtu6(&inp->inp_inc, mtuflags);
+ maxmtu = tcp_maxmtu6(&inp->inp_inc, cap);
tp->t_maxopd = tp->t_maxseg = V_tcp_v6mssdflt;
}
#endif
@@ -3419,7 +3607,7 @@
#endif
#ifdef INET
{
- maxmtu = tcp_maxmtu(&inp->inp_inc, mtuflags);
+ maxmtu = tcp_maxmtu(&inp->inp_inc, cap);
tp->t_maxopd = tp->t_maxseg = V_tcp_mssdflt;
}
#endif
@@ -3543,13 +3731,6 @@
(tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP))
mss -= TCPOLEN_TSTAMP_APPA;
-#if (MCLBYTES & (MCLBYTES - 1)) == 0
- if (mss > MCLBYTES)
- mss &= ~(MCLBYTES-1);
-#else
- if (mss > MCLBYTES)
- mss = mss / MCLBYTES * MCLBYTES;
-#endif
tp->t_maxseg = mss;
}
@@ -3561,12 +3742,13 @@
struct inpcb *inp;
struct socket *so;
struct hc_metrics_lite metrics;
- int mtuflags = 0;
+ struct tcp_ifcap cap;
KASSERT(tp != NULL, ("%s: tp == NULL", __func__));
-
- tcp_mss_update(tp, offer, -1, &metrics, &mtuflags);
+ bzero(&cap, sizeof(cap));
+ tcp_mss_update(tp, offer, -1, &metrics, &cap);
+
mss = tp->t_maxseg;
inp = tp->t_inpcb;
@@ -3579,7 +3761,7 @@
*/
so = inp->inp_socket;
SOCKBUF_LOCK(&so->so_snd);
- if ((so->so_snd.sb_hiwat == tcp_sendspace) && metrics.rmx_sendpipe)
+ if ((so->so_snd.sb_hiwat == V_tcp_sendspace) && metrics.rmx_sendpipe)
bufsize = metrics.rmx_sendpipe;
else
bufsize = so->so_snd.sb_hiwat;
@@ -3596,7 +3778,7 @@
tp->t_maxseg = mss;
SOCKBUF_LOCK(&so->so_rcv);
- if ((so->so_rcv.sb_hiwat == tcp_recvspace) && metrics.rmx_recvpipe)
+ if ((so->so_rcv.sb_hiwat == V_tcp_recvspace) && metrics.rmx_recvpipe)
bufsize = metrics.rmx_recvpipe;
else
bufsize = so->so_rcv.sb_hiwat;
@@ -3610,8 +3792,12 @@
SOCKBUF_UNLOCK(&so->so_rcv);
/* Check the interface for TSO capabilities. */
- if (mtuflags & CSUM_TSO)
+ if (cap.ifcap & CSUM_TSO) {
tp->t_flags |= TF_TSO;
+ tp->t_tsomax = cap.tsomax;
+ tp->t_tsomaxsegcount = cap.tsomaxsegcount;
+ tp->t_tsomaxsegsize = cap.tsomaxsegsize;
+ }
}
/*
@@ -3694,3 +3880,11 @@
tp->snd_cwnd = 0;
tp->snd_cwnd += tp->t_maxseg;
}
+
+int
+tcp_compute_pipe(struct tcpcb *tp)
+{
+ return (tp->snd_max - tp->snd_una +
+ tp->sackhint.sack_bytes_rexmit -
+ tp->sackhint.sacked_bytes);
+}
Modified: trunk/sys/netinet/tcp_lro.c
===================================================================
--- trunk/sys/netinet/tcp_lro.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_lro.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_lro.c 247470 2013-02-28 16:32:36Z gallatin $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_lro.c 305189 2016-09-01 08:01:13Z sephe $");
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -41,6 +41,7 @@
#include <sys/mbuf.h>
#include <sys/kernel.h>
#include <sys/socket.h>
+#include <sys/sysctl.h>
#include <net/if.h>
#include <net/if_var.h>
@@ -54,6 +55,7 @@
#include <netinet/ip_var.h>
#include <netinet/tcp.h>
#include <netinet/tcp_lro.h>
+#include <netinet/tcp_var.h>
#include <netinet6/ip6_var.h>
@@ -68,6 +70,14 @@
#define TCP_LRO_INVALID_CSUM 0x0000
#endif
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, lro, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+ "TCP LRO");
+
+static unsigned tcp_lro_entries = LRO_ENTRIES;
+SYSCTL_UINT(_net_inet_tcp_lro, OID_AUTO, entries,
+ CTLFLAG_RDTUN | CTLFLAG_MPSAFE, &tcp_lro_entries, 0,
+ "default number of LRO entries");
+
int
tcp_lro_init(struct lro_ctrl *lc)
{
@@ -82,7 +92,7 @@
SLIST_INIT(&lc->lro_active);
error = 0;
- for (i = 0; i < LRO_ENTRIES; i++) {
+ for (i = 0; i < tcp_lro_entries; i++) {
le = (struct lro_entry *)malloc(sizeof(*le), M_DEVBUF,
M_NOWAIT | M_ZERO);
if (le == NULL) {
@@ -195,6 +205,25 @@
#endif
void
+tcp_lro_flush_inactive(struct lro_ctrl *lc, const struct timeval *timeout)
+{
+ struct lro_entry *le, *le_tmp;
+ struct timeval tv;
+
+ if (SLIST_EMPTY(&lc->lro_active))
+ return;
+
+ getmicrotime(&tv);
+ timevalsub(&tv, timeout);
+ SLIST_FOREACH_SAFE(le, &lc->lro_active, next, le_tmp) {
+ if (timevalcmp(&tv, &le->mtime, >=)) {
+ SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
+ tcp_lro_flush(lc, le);
+ }
+ }
+}
+
+void
tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le)
{
@@ -364,6 +393,7 @@
tcp_seq seq;
int error, ip_len, l;
uint16_t eh_type, tcp_data_len;
+ int force_flush = 0;
/* We expect a contiguous header [eh, ip, tcp]. */
@@ -430,10 +460,17 @@
* Check TCP header constraints.
*/
/* Ensure no bits set besides ACK or PSH. */
- if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
- return (TCP_LRO_CANNOT);
+ if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0) {
+ if (th->th_flags & TH_SYN)
+ return (TCP_LRO_CANNOT);
+ /*
+ * Make sure that previously seen segements/ACKs are delivered
+ * before this segement, e.g. FIN.
+ */
+ force_flush = 1;
+ }
- /* XXX-BZ We lose a AKC|PUSH flag concatinating multiple segments. */
+ /* XXX-BZ We lose a ACK|PUSH flag concatenating multiple segments. */
/* XXX-BZ Ideally we'd flush on PUSH? */
/*
@@ -447,8 +484,13 @@
ts_ptr = (uint32_t *)(th + 1);
if (l != 0 && (__predict_false(l != TCPOLEN_TSTAMP_APPA) ||
(*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
- TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP))))
- return (TCP_LRO_CANNOT);
+ TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))) {
+ /*
+ * Make sure that previously seen segements/ACKs are delivered
+ * before this segement.
+ */
+ force_flush = 1;
+ }
/* If the driver did not pass in the checksum, set it now. */
if (csum == 0x0000)
@@ -482,6 +524,13 @@
#endif
}
+ if (force_flush) {
+ /* Timestamps mismatch; this is a FIN, etc */
+ SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
+ tcp_lro_flush(lc, le);
+ return (TCP_LRO_CANNOT);
+ }
+
/* Flush now if appending will result in overflow. */
if (le->p_len > (65535 - tcp_data_len)) {
SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
@@ -544,19 +593,29 @@
if (le->p_len > (65535 - lc->ifp->if_mtu)) {
SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
tcp_lro_flush(lc, le);
- }
+ } else
+ getmicrotime(&le->mtime);
return (0);
}
+ if (force_flush) {
+ /*
+ * Nothing to flush, but this segment can not be further
+ * aggregated/delayed.
+ */
+ return (TCP_LRO_CANNOT);
+ }
+
/* Try to find an empty slot. */
if (SLIST_EMPTY(&lc->lro_free))
- return (TCP_LRO_CANNOT);
+ return (TCP_LRO_NO_ENTRIES);
/* Start a new segment chain. */
le = SLIST_FIRST(&lc->lro_free);
SLIST_REMOVE_HEAD(&lc->lro_free, next);
SLIST_INSERT_HEAD(&lc->lro_active, le, next);
+ getmicrotime(&le->mtime);
/* Start filling in details. */
switch (eh_type) {
Modified: trunk/sys/netinet/tcp_lro.h
===================================================================
--- trunk/sys/netinet/tcp_lro.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_lro.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -25,12 +25,14 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/netinet/tcp_lro.h 238230 2012-07-08 11:24:22Z bz $
+ * $FreeBSD: stable/10/sys/netinet/tcp_lro.h 301949 2016-06-16 04:21:27Z sephe $
*/
#ifndef _TCP_LRO_H_
#define _TCP_LRO_H_
+#include <sys/time.h>
+
struct lro_entry
{
SLIST_ENTRY(lro_entry) next;
@@ -60,6 +62,7 @@
uint32_t tsecr;
uint16_t window;
uint16_t timestamp; /* flag, not a TCP hdr field. */
+ struct timeval mtime;
};
SLIST_HEAD(lro_head, lro_entry);
@@ -84,9 +87,11 @@
int tcp_lro_init(struct lro_ctrl *);
void tcp_lro_free(struct lro_ctrl *);
+void tcp_lro_flush_inactive(struct lro_ctrl *, const struct timeval *);
void tcp_lro_flush(struct lro_ctrl *, struct lro_entry *);
int tcp_lro_rx(struct lro_ctrl *, struct mbuf *, uint32_t);
+#define TCP_LRO_NO_ENTRIES -2
#define TCP_LRO_CANNOT -1
#define TCP_LRO_NOT_SUPPORTED 1
Modified: trunk/sys/netinet/tcp_offload.c
===================================================================
--- trunk/sys/netinet/tcp_offload.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_offload.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,146 +1,177 @@
/* $MidnightBSD$ */
/*-
- * Copyright (c) 2007, Chelsio Inc.
+ * Copyright (c) 2012 Chelsio Communications, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
*
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Neither the name of the Chelsio Corporation nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_offload.c 196019 2009-08-01 19:26:27Z rwatson $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_offload.c 237263 2012-06-19 07:34:13Z np $");
+#include "opt_inet.h"
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/types.h>
-#include <sys/malloc.h>
-#include <sys/kernel.h>
-#include <sys/sysctl.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
-
+#include <sys/sockopt.h>
#include <net/if.h>
-#include <net/if_types.h>
-#include <net/if_var.h>
#include <net/route.h>
-#include <net/vnet.h>
-
#include <netinet/in.h>
-#include <netinet/in_systm.h>
#include <netinet/in_pcb.h>
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_offload.h>
-#include <netinet/toedev.h>
+#define TCPOUTFLAGS
+#include <netinet/tcp_fsm.h>
+#include <netinet/toecore.h>
-uint32_t toedev_registration_count;
+int registered_toedevs;
+/*
+ * Provide an opportunity for a TOE driver to offload.
+ */
int
tcp_offload_connect(struct socket *so, struct sockaddr *nam)
{
struct ifnet *ifp;
- struct toedev *tdev;
+ struct toedev *tod;
struct rtentry *rt;
- int error;
+ int error = EOPNOTSUPP;
- if (toedev_registration_count == 0)
- return (EINVAL);
-
- /*
- * Look up the route used for the connection to
- * determine if it uses an interface capable of
- * offloading the connection.
- */
- rt = rtalloc1(nam, 0 /*report*/, 0 /*ignflags*/);
- if (rt)
+ INP_WLOCK_ASSERT(sotoinpcb(so));
+ KASSERT(nam->sa_family == AF_INET || nam->sa_family == AF_INET6,
+ ("%s: called with sa_family %d", __func__, nam->sa_family));
+
+ if (registered_toedevs == 0)
+ return (error);
+
+ rt = rtalloc1(nam, 0, 0);
+ if (rt)
RT_UNLOCK(rt);
- else
+ else
return (EHOSTUNREACH);
ifp = rt->rt_ifp;
- if ((ifp->if_capenable & IFCAP_TOE) == 0) {
- error = EINVAL;
- goto fail;
- }
-
- tdev = TOEDEV(ifp);
- if (tdev == NULL) {
- error = EPERM;
- goto fail;
- }
-
- if (tdev->tod_can_offload(tdev, so) == 0) {
- error = EPERM;
- goto fail;
- }
-
- return (tdev->tod_connect(tdev, so, rt, nam));
-fail:
+
+ if (nam->sa_family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4))
+ goto done;
+ if (nam->sa_family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6))
+ goto done;
+
+ tod = TOEDEV(ifp);
+ if (tod != NULL)
+ error = tod->tod_connect(tod, so, rt, nam);
+done:
RTFREE(rt);
return (error);
}
+void
+tcp_offload_listen_start(struct tcpcb *tp)
+{
-/*
- * This file contains code as a short-term staging area before it is moved in
- * to sys/netinet/tcp_offload.c
- */
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+ EVENTHANDLER_INVOKE(tcp_offload_listen_start, tp);
+}
+
void
-tcp_offload_twstart(struct tcpcb *tp)
+tcp_offload_listen_stop(struct tcpcb *tp)
{
- INP_INFO_WLOCK(&V_tcbinfo);
- INP_WLOCK(tp->t_inpcb);
- tcp_twstart(tp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+
+ EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp);
}
-struct tcpcb *
-tcp_offload_close(struct tcpcb *tp)
+void
+tcp_offload_input(struct tcpcb *tp, struct mbuf *m)
{
+ struct toedev *tod = tp->tod;
- INP_INFO_WLOCK(&V_tcbinfo);
- INP_WLOCK(tp->t_inpcb);
- tp = tcp_close(tp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
- if (tp)
- INP_WUNLOCK(tp->t_inpcb);
+ KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+ INP_WLOCK_ASSERT(tp->t_inpcb);
- return (tp);
+ tod->tod_input(tod, tp, m);
}
-struct tcpcb *
-tcp_offload_drop(struct tcpcb *tp, int error)
+int
+tcp_offload_output(struct tcpcb *tp)
{
+ struct toedev *tod = tp->tod;
+ int error, flags;
- INP_INFO_WLOCK(&V_tcbinfo);
- INP_WLOCK(tp->t_inpcb);
- tp = tcp_drop(tp, error);
- INP_INFO_WUNLOCK(&V_tcbinfo);
- if (tp)
- INP_WUNLOCK(tp->t_inpcb);
+ KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+ INP_WLOCK_ASSERT(tp->t_inpcb);
- return (tp);
+ flags = tcp_outflags[tp->t_state];
+
+ if (flags & TH_RST) {
+ /* XXX: avoid repeated calls like we do for FIN */
+ error = tod->tod_send_rst(tod, tp);
+ } else if ((flags & TH_FIN || tp->t_flags & TF_NEEDFIN) &&
+ (tp->t_flags & TF_SENTFIN) == 0) {
+ error = tod->tod_send_fin(tod, tp);
+ if (error == 0)
+ tp->t_flags |= TF_SENTFIN;
+ } else
+ error = tod->tod_output(tod, tp);
+
+ return (error);
}
+void
+tcp_offload_rcvd(struct tcpcb *tp)
+{
+ struct toedev *tod = tp->tod;
+
+ KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+
+ tod->tod_rcvd(tod, tp);
+}
+
+void
+tcp_offload_ctloutput(struct tcpcb *tp, int sopt_dir, int sopt_name)
+{
+ struct toedev *tod = tp->tod;
+
+ KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+
+ tod->tod_ctloutput(tod, tp, sopt_dir, sopt_name);
+}
+
+void
+tcp_offload_detach(struct tcpcb *tp)
+{
+ struct toedev *tod = tp->tod;
+
+ KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+
+ tod->tod_pcb_detach(tod, tp);
+}
Modified: trunk/sys/netinet/tcp_offload.h
===================================================================
--- trunk/sys/netinet/tcp_offload.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_offload.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,31 +1,31 @@
/* $MidnightBSD$ */
/*-
- * Copyright (c) 2007, Chelsio Inc.
+ * Copyright (c) 2012 Chelsio Communications, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
*
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
*
- * 2. Neither the name of the Chelsio Corporation nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
+ * $FreeBSD: stable/10/sys/netinet/tcp_offload.h 237263 2012-06-19 07:34:13Z np $
*
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD: stable/9/sys/netinet/tcp_offload.h 218909 2011-02-21 09:01:34Z brucec $
*/
#ifndef _NETINET_TCP_OFFLOAD_H_
@@ -35,321 +35,15 @@
#error "no user-serviceable parts inside"
#endif
-/*
- * A driver publishes that it provides offload services
- * by setting IFCAP_TOE in the ifnet. The offload connect
- * will bypass any further work if the interface that a
- * connection would use does not support TCP offload.
- *
- * The TOE API assumes that the tcp offload engine can offload the
- * the entire connection from set up to teardown, with some provision
- * being made to allowing the software stack to handle time wait. If
- * the device does not meet these criteria, it is the driver's responsibility
- * to overload the functions that it needs to in tcp_usrreqs and make
- * its own calls to tcp_output if it needs to do so.
- *
- * There is currently no provision for the device advertising the congestion
- * control algorithms it supports as there is currently no API for querying
- * an operating system for the protocols that it has loaded. This is a desirable
- * future extension.
- *
- *
- *
- * It is assumed that individuals deploying TOE will want connections
- * to be offloaded without software changes so all connections on an
- * interface providing TOE are offloaded unless the SO_NO_OFFLOAD
- * flag is set on the socket.
- *
- *
- * The toe_usrreqs structure constitutes the TOE driver's
- * interface to the TCP stack for functionality that doesn't
- * interact directly with userspace. If one wants to provide
- * (optional) functionality to do zero-copy to/from
- * userspace one still needs to override soreceive/sosend
- * with functions that fault in and pin the user buffers.
- *
- * + tu_send
- * - tells the driver that new data may have been added to the
- * socket's send buffer - the driver should not fail if the
- * buffer is in fact unchanged
- * - the driver is responsible for providing credits (bytes in the send window)
- * back to the socket by calling sbdrop() as segments are acknowledged.
- * - The driver expects the inpcb lock to be held - the driver is expected
- * not to drop the lock. Hence the driver is not allowed to acquire the
- * pcbinfo lock during this call.
- *
- * + tu_rcvd
- * - returns credits to the driver and triggers window updates
- * to the peer (a credit as used here is a byte in the peer's receive window)
- * - the driver is expected to determine how many bytes have been
- * consumed and credit that back to the card so that it can grow
- * the window again by maintaining its own state between invocations.
- * - In principle this could be used to shrink the window as well as
- * grow the window, although it is not used for that now.
- * - this function needs to correctly handle being called any number of
- * times without any bytes being consumed from the receive buffer.
- * - The driver expects the inpcb lock to be held - the driver is expected
- * not to drop the lock. Hence the driver is not allowed to acquire the
- * pcbinfo lock during this call.
- *
- * + tu_disconnect
- * - tells the driver to send FIN to peer
- * - driver is expected to send the remaining data and then do a clean half close
- * - disconnect implies at least half-close so only send, reset, and detach
- * are legal
- * - the driver is expected to handle transition through the shutdown
- * state machine and allow the stack to support SO_LINGER.
- * - The driver expects the inpcb lock to be held - the driver is expected
- * not to drop the lock. Hence the driver is not allowed to acquire the
- * pcbinfo lock during this call.
- *
- * + tu_reset
- * - closes the connection and sends a RST to peer
- * - driver is expectd to trigger an RST and detach the toepcb
- * - no further calls are legal after reset
- * - The driver expects the inpcb lock to be held - the driver is expected
- * not to drop the lock. Hence the driver is not allowed to acquire the
- * pcbinfo lock during this call.
- *
- * The following fields in the tcpcb are expected to be referenced by the driver:
- * + iss
- * + rcv_nxt
- * + rcv_wnd
- * + snd_isn
- * + snd_max
- * + snd_nxt
- * + snd_una
- * + t_flags
- * + t_inpcb
- * + t_maxseg
- * + t_toe
- *
- * The following fields in the inpcb are expected to be referenced by the driver:
- * + inp_lport
- * + inp_fport
- * + inp_laddr
- * + inp_fport
- * + inp_socket
- * + inp_ip_tos
- *
- * The following fields in the socket are expected to be referenced by the
- * driver:
- * + so_comp
- * + so_error
- * + so_linger
- * + so_options
- * + so_rcv
- * + so_snd
- * + so_state
- * + so_timeo
- *
- * These functions all return 0 on success and can return the following errors
- * as appropriate:
- * + EPERM:
- * + ENOBUFS: memory allocation failed
- * + EMSGSIZE: MTU changed during the call
- * + EHOSTDOWN:
- * + EHOSTUNREACH:
- * + ENETDOWN:
- * * ENETUNREACH: the peer is no longer reachable
- *
- * + tu_detach
- * - tells driver that the socket is going away so disconnect
- * the toepcb and free appropriate resources
- * - allows the driver to cleanly handle the case of connection state
- * outliving the socket
- * - no further calls are legal after detach
- * - the driver is expected to provide its own synchronization between
- * detach and receiving new data.
- *
- * + tu_syncache_event
- * - even if it is not actually needed, the driver is expected to
- * call syncache_add for the initial SYN and then syncache_expand
- * for the SYN,ACK
- * - tells driver that a connection either has not been added or has
- * been dropped from the syncache
- * - the driver is expected to maintain state that lives outside the
- * software stack so the syncache needs to be able to notify the
- * toe driver that the software stack is not going to create a connection
- * for a received SYN
- * - The driver is responsible for any synchronization required between
- * the syncache dropping an entry and the driver processing the SYN,ACK.
- *
- */
-struct toe_usrreqs {
- int (*tu_send)(struct tcpcb *tp);
- int (*tu_rcvd)(struct tcpcb *tp);
- int (*tu_disconnect)(struct tcpcb *tp);
- int (*tu_reset)(struct tcpcb *tp);
- void (*tu_detach)(struct tcpcb *tp);
- void (*tu_syncache_event)(int event, void *toep);
-};
+extern int registered_toedevs;
-/*
- * Proxy for struct tcpopt between TOE drivers and TCP functions.
- */
-struct toeopt {
- u_int64_t to_flags; /* see tcpopt in tcp_var.h */
- u_int16_t to_mss; /* maximum segment size */
- u_int8_t to_wscale; /* window scaling */
+int tcp_offload_connect(struct socket *, struct sockaddr *);
+void tcp_offload_listen_start(struct tcpcb *);
+void tcp_offload_listen_stop(struct tcpcb *);
+void tcp_offload_input(struct tcpcb *, struct mbuf *);
+int tcp_offload_output(struct tcpcb *);
+void tcp_offload_rcvd(struct tcpcb *);
+void tcp_offload_ctloutput(struct tcpcb *, int, int);
+void tcp_offload_detach(struct tcpcb *);
- u_int8_t _pad1; /* explicit pad for 64bit alignment */
- u_int32_t _pad2; /* explicit pad for 64bit alignment */
- u_int64_t _pad3[4]; /* TBD */
-};
-
-#define TOE_SC_ENTRY_PRESENT 1 /* 4-tuple already present */
-#define TOE_SC_DROP 2 /* connection was timed out */
-
-/*
- * Because listen is a one-to-many relationship (a socket can be listening
- * on all interfaces on a machine some of which may be using different TCP
- * offload devices), listen uses a publish/subscribe mechanism. The TCP
- * offload driver registers a listen notification function with the stack.
- * When a listen socket is created all TCP offload devices are notified
- * so that they can do the appropriate set up to offload connections on the
- * port to which the socket is bound. When the listen socket is closed,
- * the offload devices are notified so that they will stop listening on that
- * port and free any associated resources as well as sending RSTs on any
- * connections in the SYN_RCVD state.
- *
- */
-
-typedef void (*tcp_offload_listen_start_fn)(void *, struct tcpcb *);
-typedef void (*tcp_offload_listen_stop_fn)(void *, struct tcpcb *);
-
-EVENTHANDLER_DECLARE(tcp_offload_listen_start, tcp_offload_listen_start_fn);
-EVENTHANDLER_DECLARE(tcp_offload_listen_stop, tcp_offload_listen_stop_fn);
-
-/*
- * Check if the socket can be offloaded by the following steps:
- * - determine the egress interface
- * - check the interface for TOE capability and TOE is enabled
- * - check if the device has resources to offload the connection
- */
-int tcp_offload_connect(struct socket *so, struct sockaddr *nam);
-
-/*
- * The tcp_output_* routines are wrappers around the toe_usrreqs calls
- * which trigger packet transmission. In the non-offloaded case they
- * translate to tcp_output. The tcp_offload_* routines notify TOE
- * of specific events. I the non-offloaded case they are no-ops.
- *
- * Listen is a special case because it is a 1 to many relationship
- * and there can be more than one offload driver in the system.
- */
-
-/*
- * Connection is offloaded
- */
-#define tp_offload(tp) ((tp)->t_flags & TF_TOE)
-
-/*
- * hackish way of allowing this file to also be included by TOE
- * which needs to be kept ignorant of socket implementation details
- */
-#ifdef _SYS_SOCKETVAR_H_
-/*
- * The socket has not been marked as "do not offload"
- */
-#define SO_OFFLOADABLE(so) ((so->so_options & SO_NO_OFFLOAD) == 0)
-
-static __inline int
-tcp_output_connect(struct socket *so, struct sockaddr *nam)
-{
- struct tcpcb *tp = sototcpcb(so);
- int error;
-
- /*
- * If offload has been disabled for this socket or the
- * connection cannot be offloaded just call tcp_output
- * to start the TCP state machine.
- */
-#ifndef TCP_OFFLOAD_DISABLE
- if (!SO_OFFLOADABLE(so) || (error = tcp_offload_connect(so, nam)) != 0)
-#endif
- error = tcp_output(tp);
- return (error);
-}
-
-static __inline int
-tcp_output_send(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
- if (tp_offload(tp))
- return (tp->t_tu->tu_send(tp));
#endif
- return (tcp_output(tp));
-}
-
-static __inline int
-tcp_output_rcvd(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
- if (tp_offload(tp))
- return (tp->t_tu->tu_rcvd(tp));
-#endif
- return (tcp_output(tp));
-}
-
-static __inline int
-tcp_output_disconnect(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
- if (tp_offload(tp))
- return (tp->t_tu->tu_disconnect(tp));
-#endif
- return (tcp_output(tp));
-}
-
-static __inline int
-tcp_output_reset(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
- if (tp_offload(tp))
- return (tp->t_tu->tu_reset(tp));
-#endif
- return (tcp_output(tp));
-}
-
-static __inline void
-tcp_offload_detach(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
- if (tp_offload(tp))
- tp->t_tu->tu_detach(tp);
-#endif
-}
-
-static __inline void
-tcp_offload_listen_open(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
- if (SO_OFFLOADABLE(tp->t_inpcb->inp_socket))
- EVENTHANDLER_INVOKE(tcp_offload_listen_start, tp);
-#endif
-}
-
-static __inline void
-tcp_offload_listen_close(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
- EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp);
-#endif
-}
-#undef SO_OFFLOADABLE
-#endif /* _SYS_SOCKETVAR_H_ */
-#undef tp_offload
-
-void tcp_offload_twstart(struct tcpcb *tp);
-struct tcpcb *tcp_offload_close(struct tcpcb *tp);
-struct tcpcb *tcp_offload_drop(struct tcpcb *tp, int error);
-
-#endif /* _NETINET_TCP_OFFLOAD_H_ */
Modified: trunk/sys/netinet/tcp_output.c
===================================================================
--- trunk/sys/netinet/tcp_output.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_output.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,11 +31,12 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_output.c 240307 2012-09-10 11:43:28Z glebius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_output.c 317375 2017-04-24 16:31:28Z smh $");
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
+#include "opt_kdtrace.h"
#include "opt_tcpdebug.h"
#include <sys/param.h>
@@ -47,6 +48,7 @@
#include <sys/mbuf.h>
#include <sys/mutex.h>
#include <sys/protosw.h>
+#include <sys/sdt.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
@@ -57,6 +59,7 @@
#include <netinet/cc.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
@@ -67,6 +70,9 @@
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#endif
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
#define TCPOUTFLAGS
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
@@ -76,6 +82,9 @@
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif
+#ifdef TCP_OFFLOAD
+#include <netinet/tcp_offload.h>
+#endif
#ifdef IPSEC
#include <netipsec/ipsec.h>
@@ -90,16 +99,6 @@
&VNET_NAME(path_mtu_discovery), 1,
"Enable Path MTU Discovery");
-VNET_DEFINE(int, ss_fltsz) = 1;
-SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, slowstart_flightsize, CTLFLAG_RW,
- &VNET_NAME(ss_fltsz), 1,
- "Slow start flight size");
-
-VNET_DEFINE(int, ss_fltsz_local) = 4;
-SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, local_slowstart_flightsize,
- CTLFLAG_RW, &VNET_NAME(ss_fltsz_local), 1,
- "Slow start flight size for local networks");
-
VNET_DEFINE(int, tcp_do_tso) = 1;
#define V_tcp_do_tso VNET(tcp_do_tso)
SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_RW,
@@ -106,6 +105,11 @@
&VNET_NAME(tcp_do_tso), 0,
"Enable TCP Segmentation Offload");
+VNET_DEFINE(int, tcp_sendspace) = 1024*32;
+#define V_tcp_sendspace VNET(tcp_sendspace)
+SYSCTL_VNET_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW,
+ &VNET_NAME(tcp_sendspace), 0, "Initial send socket buffer size");
+
VNET_DEFINE(int, tcp_do_autosndbuf) = 1;
#define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf)
SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto, CTLFLAG_RW,
@@ -124,6 +128,16 @@
&VNET_NAME(tcp_autosndbuf_max), 0,
"Max size of automatic send buffer");
+/*
+ * Make sure that either retransmit or persist timer is set for SYN, FIN and
+ * non-ACK.
+ */
+#define TCP_XMIT_TIMER_ASSERT(tp, len, th_flags) \
+ KASSERT(((len) == 0 && ((th_flags) & (TH_SYN | TH_FIN)) == 0) ||\
+ tcp_timer_active((tp), TT_REXMT) || \
+ tcp_timer_active((tp), TT_PERSIST), \
+ ("neither rexmt nor persist timer is set"))
+
static void inline hhook_run_tcp_est_out(struct tcpcb *tp,
struct tcphdr *th, struct tcpopt *to,
long len, int tso);
@@ -130,7 +144,7 @@
static void inline cc_after_idle(struct tcpcb *tp);
/*
- * Wrapper for the TCP established ouput helper hook.
+ * Wrapper for the TCP established output helper hook.
*/
static void inline
hhook_run_tcp_est_out(struct tcpcb *tp, struct tcphdr *th,
@@ -197,7 +211,23 @@
INP_WLOCK_ASSERT(tp->t_inpcb);
+#ifdef TCP_OFFLOAD
+ if (tp->t_flags & TF_TOE)
+ return (tcp_offload_output(tp));
+#endif
+
+#ifdef TCP_RFC7413
/*
+ * For TFO connections in SYN_RECEIVED, only allow the initial
+ * SYN|ACK and those sent by the retransmit timer.
+ */
+ if ((tp->t_flags & TF_FASTOPEN) &&
+ (tp->t_state == TCPS_SYN_RECEIVED) &&
+ SEQ_GT(tp->snd_max, tp->snd_una) && /* inital SYN|ACK sent */
+ (tp->snd_nxt != tp->snd_una)) /* not a retransmit */
+ return (0);
+#endif
+ /*
* Determine length of data that should be transmitted,
* and flags that will be used.
* If there is some data or critical controls (SYN, RST)
@@ -382,6 +412,15 @@
if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
if (tp->t_state != TCPS_SYN_RECEIVED)
flags &= ~TH_SYN;
+#ifdef TCP_RFC7413
+ /*
+ * When sending additional segments following a TFO SYN|ACK,
+ * do not include the SYN bit.
+ */
+ if ((tp->t_flags & TF_FASTOPEN) &&
+ (tp->t_state == TCPS_SYN_RECEIVED))
+ flags &= ~TH_SYN;
+#endif
off--, len++;
}
@@ -395,7 +434,18 @@
flags &= ~TH_FIN;
}
- if (len < 0) {
+#ifdef TCP_RFC7413
+ /*
+ * When retransmitting SYN|ACK on a passively-created TFO socket,
+ * don't include data, as the presence of data may have caused the
+ * original SYN|ACK to have been dropped by a middlebox.
+ */
+ if ((tp->t_flags & TF_FASTOPEN) &&
+ (((tp->t_state == TCPS_SYN_RECEIVED) && (tp->t_rxtshift > 0)) ||
+ (flags & TH_RST)))
+ len = 0;
+#endif
+ if (len <= 0) {
/*
* If FIN has been sent but not acked,
* but we haven't been called to retransmit,
@@ -405,9 +455,16 @@
* to (closed) window, and set the persist timer
* if it isn't already going. If the window didn't
* close completely, just wait for an ACK.
+ *
+ * We also do a general check here to ensure that
+ * we will set the persist timer when we have data
+ * to send, but a 0-byte window. This makes sure
+ * the persist timer is set even if the packet
+ * hits one of the "goto send" lines below.
*/
len = 0;
- if (sendwin == 0) {
+ if ((sendwin == 0) && (TCPS_HAVEESTABLISHED(tp->t_state)) &&
+ (off < (int) so->so_snd.sb_cc)) {
tcp_timer_activate(tp, TT_REXMT, 0);
tp->t_rxtshift = 0;
tp->snd_nxt = tp->snd_una;
@@ -543,19 +600,39 @@
}
/*
- * Compare available window to amount of window
- * known to peer (as advertised window less
- * next expected input). If the difference is at least two
- * max size segments, or at least 50% of the maximum possible
- * window, then want to send a window update to peer.
- * Skip this if the connection is in T/TCP half-open state.
- * Don't send pure window updates when the peer has closed
- * the connection and won't ever send more data.
+ * Sending of standalone window updates.
+ *
+ * Window updates are important when we close our window due to a
+ * full socket buffer and are opening it again after the application
+ * reads data from it. Once the window has opened again and the
+ * remote end starts to send again the ACK clock takes over and
+ * provides the most current window information.
+ *
+ * We must avoid the silly window syndrome whereas every read
+ * from the receive buffer, no matter how small, causes a window
+ * update to be sent. We also should avoid sending a flurry of
+ * window updates when the socket buffer had queued a lot of data
+ * and the application is doing small reads.
+ *
+ * Prevent a flurry of pointless window updates by only sending
+ * an update when we can increase the advertized window by more
+ * than 1/4th of the socket buffer capacity. When the buffer is
+ * getting full or is very small be more aggressive and send an
+ * update whenever we can increase by two mss sized segments.
+ * In all other situations the ACK's to new incoming data will
+ * carry further window increases.
+ *
+ * Don't send an independent window update if a delayed
+ * ACK is pending (it will get piggy-backed on it) or the
+ * remote side already has done a half-close and won't send
+ * more data. Skip this if the connection is in T/TCP
+ * half-open state.
*/
if (recwin > 0 && !(tp->t_flags & TF_NEEDSYN) &&
+ !(tp->t_flags & TF_DELACK) &&
!TCPS_HAVERCVDFIN(tp->t_state)) {
/*
- * "adv" is the amount we can increase the window,
+ * "adv" is the amount we could increase the window,
* taking into account that we are limited by
* TCP_MAXWIN << tp->rcv_scale.
*/
@@ -575,10 +652,12 @@
*/
if (oldwin >> tp->rcv_scale == (adv + oldwin) >> tp->rcv_scale)
goto dontupdate;
- if (adv >= (long) (2 * tp->t_maxseg))
+
+ if (adv >= (long)(2 * tp->t_maxseg) &&
+ (adv >= (long)(so->so_rcv.sb_hiwat / 4) ||
+ recwin <= (long)(so->so_rcv.sb_hiwat / 8) ||
+ so->so_rcv.sb_hiwat <= 8 * tp->t_maxseg))
goto send;
- if (2 * adv >= (long) so->so_rcv.sb_hiwat)
- goto send;
}
dontupdate:
@@ -649,6 +728,12 @@
send:
SOCKBUF_LOCK_ASSERT(&so->so_snd);
+ if (len > 0) {
+ if (len >= tp->t_maxseg)
+ tp->t_flags2 |= TF2_PLPMTU_MAXSEGSNT;
+ else
+ tp->t_flags2 &= ~TF2_PLPMTU_MAXSEGSNT;
+ }
/*
* Before ESTABLISHED, force sending of initial options
* unless TCP set not to do any options.
@@ -671,13 +756,29 @@
* segments. Options for SYN-ACK segments are handled in TCP
* syncache.
*/
+ to.to_flags = 0;
if ((tp->t_flags & TF_NOOPT) == 0) {
- to.to_flags = 0;
/* Maximum segment size. */
if (flags & TH_SYN) {
tp->snd_nxt = tp->iss;
to.to_mss = tcp_mssopt(&tp->t_inpcb->inp_inc);
to.to_flags |= TOF_MSS;
+#ifdef TCP_RFC7413
+ /*
+ * Only include the TFO option on the first
+ * transmission of the SYN|ACK on a
+ * passively-created TFO socket, as the presence of
+ * the TFO option may have caused the original
+ * SYN|ACK to have been dropped by a middlebox.
+ */
+ if ((tp->t_flags & TF_FASTOPEN) &&
+ (tp->t_state == TCPS_SYN_RECEIVED) &&
+ (tp->t_rxtshift == 0)) {
+ to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN;
+ to.to_tfo_cookie = (u_char *)&tp->t_tfo_cookie;
+ to.to_flags |= TOF_FASTOPEN;
+ }
+#endif
}
/* Window scaling. */
if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) {
@@ -690,11 +791,13 @@
to.to_tsval = tcp_ts_getticks() + tp->ts_offset;
to.to_tsecr = tp->ts_recent;
to.to_flags |= TOF_TS;
- /* Set receive buffer autosizing timestamp. */
- if (tp->rfbuf_ts == 0 &&
- (so->so_rcv.sb_flags & SB_AUTOSIZE))
- tp->rfbuf_ts = tcp_ts_getticks();
}
+
+ /* Set receive buffer autosizing timestamp. */
+ if (tp->rfbuf_ts == 0 &&
+ (so->so_rcv.sb_flags & SB_AUTOSIZE))
+ tp->rfbuf_ts = tcp_ts_getticks();
+
/* Selective ACK's. */
if (tp->t_flags & TF_SACK_PERMIT) {
if (flags & TH_SYN)
@@ -741,27 +844,122 @@
flags &= ~TH_FIN;
if (tso) {
+ u_int if_hw_tsomax;
+ u_int if_hw_tsomaxsegcount;
+ u_int if_hw_tsomaxsegsize;
+ struct mbuf *mb;
+ u_int moff;
+ int max_len;
+
+ /* extract TSO information */
+ if_hw_tsomax = tp->t_tsomax;
+ if_hw_tsomaxsegcount = tp->t_tsomaxsegcount;
+ if_hw_tsomaxsegsize = tp->t_tsomaxsegsize;
+
+ /*
+ * Limit a TSO burst to prevent it from
+ * overflowing or exceeding the maximum length
+ * allowed by the network interface:
+ */
KASSERT(ipoptlen == 0,
("%s: TSO can't do IP options", __func__));
/*
- * Limit a burst to IP_MAXPACKET minus IP,
- * TCP and options length to keep ip->ip_len
- * from overflowing.
+ * Check if we should limit by maximum payload
+ * length:
*/
- if (len > IP_MAXPACKET - hdrlen) {
- len = IP_MAXPACKET - hdrlen;
- sendalot = 1;
+ if (if_hw_tsomax != 0) {
+ /* compute maximum TSO length */
+ max_len = (if_hw_tsomax - hdrlen -
+ max_linkhdr);
+ if (max_len <= 0) {
+ len = 0;
+ } else if (len > max_len) {
+ sendalot = 1;
+ len = max_len;
+ }
}
/*
+ * Check if we should limit by maximum segment
+ * size and count:
+ */
+ if (if_hw_tsomaxsegcount != 0 &&
+ if_hw_tsomaxsegsize != 0) {
+ /*
+ * Subtract one segment for the LINK
+ * and TCP/IP headers mbuf that will
+ * be prepended to this mbuf chain
+ * after the code in this section
+ * limits the number of mbufs in the
+ * chain to if_hw_tsomaxsegcount.
+ */
+ if_hw_tsomaxsegcount -= 1;
+ max_len = 0;
+ mb = sbsndmbuf(&so->so_snd, off, &moff);
+
+ while (mb != NULL && max_len < len) {
+ u_int mlen;
+ u_int frags;
+
+ /*
+ * Get length of mbuf fragment
+ * and how many hardware frags,
+ * rounded up, it would use:
+ */
+ mlen = (mb->m_len - moff);
+ frags = howmany(mlen,
+ if_hw_tsomaxsegsize);
+
+ /* Handle special case: Zero Length Mbuf */
+ if (frags == 0)
+ frags = 1;
+
+ /*
+ * Check if the fragment limit
+ * will be reached or exceeded:
+ */
+ if (frags >= if_hw_tsomaxsegcount) {
+ max_len += min(mlen,
+ if_hw_tsomaxsegcount *
+ if_hw_tsomaxsegsize);
+ break;
+ }
+ max_len += mlen;
+ if_hw_tsomaxsegcount -= frags;
+ moff = 0;
+ mb = mb->m_next;
+ }
+ if (max_len <= 0) {
+ len = 0;
+ } else if (len > max_len) {
+ sendalot = 1;
+ len = max_len;
+ }
+ }
+
+ /*
* Prevent the last segment from being
- * fractional unless the send sockbuf can
- * be emptied.
+ * fractional unless the send sockbuf can be
+ * emptied:
*/
- if (sendalot && off + len < so->so_snd.sb_cc) {
- len -= len % (tp->t_maxopd - optlen);
+ max_len = (tp->t_maxopd - optlen);
+ if ((off + len) < so->so_snd.sb_cc) {
+ moff = len % max_len;
+ if (moff != 0) {
+ len -= moff;
+ sendalot = 1;
+ }
+ }
+
+ /*
+ * In case there are too many small fragments
+ * don't use TSO:
+ */
+ if (len <= max_len) {
+ len = max_len;
sendalot = 1;
+ tso = 0;
}
/*
@@ -818,23 +1016,20 @@
TCPSTAT_INC(tcps_sndpack);
TCPSTAT_ADD(tcps_sndbyte, len);
}
- MGETHDR(m, M_DONTWAIT, MT_DATA);
+#ifdef INET6
+ if (MHLEN < hdrlen + max_linkhdr)
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+ else
+#endif
+ m = m_gethdr(M_NOWAIT, MT_DATA);
+
if (m == NULL) {
SOCKBUF_UNLOCK(&so->so_snd);
error = ENOBUFS;
+ sack_rxmit = 0;
goto out;
}
-#ifdef INET6
- if (MHLEN < hdrlen + max_linkhdr) {
- MCLGET(m, M_DONTWAIT);
- if ((m->m_flags & M_EXT) == 0) {
- SOCKBUF_UNLOCK(&so->so_snd);
- m_freem(m);
- error = ENOBUFS;
- goto out;
- }
- }
-#endif
+
m->m_data += max_linkhdr;
m->m_len = hdrlen;
@@ -854,6 +1049,7 @@
SOCKBUF_UNLOCK(&so->so_snd);
(void) m_free(m);
error = ENOBUFS;
+ sack_rxmit = 0;
goto out;
}
}
@@ -864,7 +1060,7 @@
* give data to the user when a buffer fills or
* a PUSH comes in.)
*/
- if (off + len == so->so_snd.sb_cc)
+ if ((off + len == so->so_snd.sb_cc) && !(flags & TH_SYN))
flags |= TH_PUSH;
SOCKBUF_UNLOCK(&so->so_snd);
} else {
@@ -878,9 +1074,10 @@
else
TCPSTAT_INC(tcps_sndwinup);
- MGETHDR(m, M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL) {
error = ENOBUFS;
+ sack_rxmit = 0;
goto out;
}
#ifdef INET6
@@ -925,7 +1122,7 @@
* resend those bits a number of times as per
* RFC 3168.
*/
- if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
+ if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn == 1) {
if (tp->t_rxtshift >= 1) {
if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
flags |= TH_ECE|TH_CWR;
@@ -1042,7 +1239,7 @@
tp->snd_up = tp->snd_una; /* drag it along */
#ifdef TCP_SIGNATURE
- if (tp->t_flags & TF_SIGNATURE) {
+ if (to.to_flags & TOF_SIGNATURE) {
int sigoff = to.to_signature - opt;
tcp_signature_compute(m, 0, len, optlen,
(u_char *)(th + 1) + sigoff, IPSEC_DIR_OUTBOUND);
@@ -1058,8 +1255,8 @@
#ifdef INET6
if (isipv6) {
/*
- * ip6_plen is not need to be filled now, and will be filled
- * in ip6_output.
+ * There is no need to fill in ip6_plen right now.
+ * It will be filled later by ip6_output.
*/
m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
th->th_sum = in6_cksum_pseudo(ip6, sizeof(struct tcphdr) +
@@ -1103,75 +1300,6 @@
__func__, len, hdrlen, ipoptlen, m_length(m, NULL)));
#endif
- /*
- * In transmit state, time the transmission and arrange for
- * the retransmit. In persist state, just set snd_max.
- */
- if ((tp->t_flags & TF_FORCEDATA) == 0 ||
- !tcp_timer_active(tp, TT_PERSIST)) {
- tcp_seq startseq = tp->snd_nxt;
-
- /*
- * Advance snd_nxt over sequence space of this segment.
- */
- if (flags & (TH_SYN|TH_FIN)) {
- if (flags & TH_SYN)
- tp->snd_nxt++;
- if (flags & TH_FIN) {
- tp->snd_nxt++;
- tp->t_flags |= TF_SENTFIN;
- }
- }
- if (sack_rxmit)
- goto timer;
- tp->snd_nxt += len;
- if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
- tp->snd_max = tp->snd_nxt;
- /*
- * Time this transmission if not a retransmission and
- * not currently timing anything.
- */
- if (tp->t_rtttime == 0) {
- tp->t_rtttime = ticks;
- tp->t_rtseq = startseq;
- TCPSTAT_INC(tcps_segstimed);
- }
- }
-
- /*
- * Set retransmit timer if not currently set,
- * and not doing a pure ack or a keep-alive probe.
- * Initial value for retransmit timer is smoothed
- * round-trip time + 2 * round-trip time variance.
- * Initialize shift counter which is used for backoff
- * of retransmit time.
- */
-timer:
- if (!tcp_timer_active(tp, TT_REXMT) &&
- ((sack_rxmit && tp->snd_nxt != tp->snd_max) ||
- (tp->snd_nxt != tp->snd_una))) {
- if (tcp_timer_active(tp, TT_PERSIST)) {
- tcp_timer_activate(tp, TT_PERSIST, 0);
- tp->t_rxtshift = 0;
- }
- tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
- }
- } else {
- /*
- * Persist case, update snd_max but since we are in
- * persist mode (no window) we do not update snd_nxt.
- */
- int xlen = len;
- if (flags & TH_SYN)
- ++xlen;
- if (flags & TH_FIN) {
- ++xlen;
- tp->t_flags |= TF_SENTFIN;
- }
- if (SEQ_GT(tp->snd_nxt + xlen, tp->snd_max))
- tp->snd_max = tp->snd_nxt + len;
- }
-
/* Run HHOOK_TCP_ESTABLISHED_OUT helper hooks. */
hhook_run_tcp_est_out(tp, th, &to, len, tso);
@@ -1219,6 +1347,23 @@
*/
ip6->ip6_hlim = in6_selecthlim(tp->t_inpcb, NULL);
+ /*
+ * Set the packet size here for the benefit of DTrace probes.
+ * ip6_output() will set it properly; it's supposed to include
+ * the option header lengths as well.
+ */
+ ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6));
+
+ if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss)
+ tp->t_flags2 |= TF2_PLPMTU_PMTUD;
+ else
+ tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
+
+ if (tp->t_state == TCPS_SYN_SENT)
+ TCP_PROBE5(connect__request, NULL, tp, ip6, tp, th);
+
+ TCP_PROBE5(send, NULL, tp, ip6, tp, th);
+
/* TODO: IPv6 IP6TOS_ECT bit on */
error = ip6_output(m, tp->t_inpcb->in6p_outputopts, &ro,
((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0),
@@ -1225,7 +1370,7 @@
NULL, NULL, tp->t_inpcb);
if (error == EMSGSIZE && ro.ro_rt != NULL)
- mtu = ro.ro_rt->rt_rmx.rmx_mtu;
+ mtu = ro.ro_rt->rt_mtu;
RO_RTFREE(&ro);
}
#endif /* INET6 */
@@ -1237,7 +1382,7 @@
struct route ro;
bzero(&ro, sizeof(ro));
- ip->ip_len = m->m_pkthdr.len;
+ ip->ip_len = htons(m->m_pkthdr.len);
#ifdef INET6
if (tp->t_inpcb->inp_vflag & INP_IPV6PROTO)
ip->ip_ttl = in6_selecthlim(tp->t_inpcb, NULL);
@@ -1250,18 +1395,122 @@
*
* NB: Don't set DF on small MTU/MSS to have a safe fallback.
*/
- if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss)
- ip->ip_off |= IP_DF;
+ if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss) {
+ ip->ip_off |= htons(IP_DF);
+ tp->t_flags2 |= TF2_PLPMTU_PMTUD;
+ } else {
+ tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
+ }
+ if (tp->t_state == TCPS_SYN_SENT)
+ TCP_PROBE5(connect__request, NULL, tp, ip, tp, th);
+
+ TCP_PROBE5(send, NULL, tp, ip, tp, th);
+
error = ip_output(m, tp->t_inpcb->inp_options, &ro,
((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), 0,
tp->t_inpcb);
if (error == EMSGSIZE && ro.ro_rt != NULL)
- mtu = ro.ro_rt->rt_rmx.rmx_mtu;
+ mtu = ro.ro_rt->rt_mtu;
RO_RTFREE(&ro);
}
#endif /* INET */
+
+out:
+ /*
+ * In transmit state, time the transmission and arrange for
+ * the retransmit. In persist state, just set snd_max.
+ */
+ if ((tp->t_flags & TF_FORCEDATA) == 0 ||
+ !tcp_timer_active(tp, TT_PERSIST)) {
+ tcp_seq startseq = tp->snd_nxt;
+
+ /*
+ * Advance snd_nxt over sequence space of this segment.
+ */
+ if (flags & (TH_SYN|TH_FIN)) {
+ if (flags & TH_SYN)
+ tp->snd_nxt++;
+ if (flags & TH_FIN) {
+ tp->snd_nxt++;
+ tp->t_flags |= TF_SENTFIN;
+ }
+ }
+ if (sack_rxmit)
+ goto timer;
+ tp->snd_nxt += len;
+ if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
+ tp->snd_max = tp->snd_nxt;
+ /*
+ * Time this transmission if not a retransmission and
+ * not currently timing anything.
+ */
+ if (tp->t_rtttime == 0) {
+ tp->t_rtttime = ticks;
+ tp->t_rtseq = startseq;
+ TCPSTAT_INC(tcps_segstimed);
+ }
+ }
+
+ /*
+ * Set retransmit timer if not currently set,
+ * and not doing a pure ack or a keep-alive probe.
+ * Initial value for retransmit timer is smoothed
+ * round-trip time + 2 * round-trip time variance.
+ * Initialize shift counter which is used for backoff
+ * of retransmit time.
+ */
+timer:
+ if (!tcp_timer_active(tp, TT_REXMT) &&
+ ((sack_rxmit && tp->snd_nxt != tp->snd_max) ||
+ (tp->snd_nxt != tp->snd_una))) {
+ if (tcp_timer_active(tp, TT_PERSIST)) {
+ tcp_timer_activate(tp, TT_PERSIST, 0);
+ tp->t_rxtshift = 0;
+ }
+ tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
+ } else if (len == 0 && so->so_snd.sb_cc &&
+ !tcp_timer_active(tp, TT_REXMT) &&
+ !tcp_timer_active(tp, TT_PERSIST)) {
+ /*
+ * Avoid a situation where we do not set persist timer
+ * after a zero window condition. For example:
+ * 1) A -> B: packet with enough data to fill the window
+ * 2) B -> A: ACK for #1 + new data (0 window
+ * advertisement)
+ * 3) A -> B: ACK for #2, 0 len packet
+ *
+ * In this case, A will not activate the persist timer,
+ * because it chose to send a packet. Unless tcp_output
+ * is called for some other reason (delayed ack timer,
+ * another input packet from B, socket syscall), A will
+ * not send zero window probes.
+ *
+ * So, if you send a 0-length packet, but there is data
+ * in the socket buffer, and neither the rexmt or
+ * persist timer is already set, then activate the
+ * persist timer.
+ */
+ tp->t_rxtshift = 0;
+ tcp_setpersist(tp);
+ }
+ } else {
+ /*
+ * Persist case, update snd_max but since we are in
+ * persist mode (no window) we do not update snd_nxt.
+ */
+ int xlen = len;
+ if (flags & TH_SYN)
+ ++xlen;
+ if (flags & TH_FIN) {
+ ++xlen;
+ tp->t_flags |= TF_SENTFIN;
+ }
+ if (SEQ_GT(tp->snd_nxt + xlen, tp->snd_max))
+ tp->snd_max = tp->snd_nxt + len;
+ }
+
if (error) {
/*
@@ -1289,7 +1538,6 @@
} else
tp->snd_nxt -= len;
}
-out:
SOCKBUF_UNLOCK_ASSERT(&so->so_snd); /* Check gotos. */
switch (error) {
case EPERM:
@@ -1296,9 +1544,7 @@
tp->t_softerror = error;
return (error);
case ENOBUFS:
- if (!tcp_timer_active(tp, TT_REXMT) &&
- !tcp_timer_active(tp, TT_PERSIST))
- tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
+ TCP_XMIT_TIMER_ASSERT(tp, len, flags);
tp->snd_cwnd = tp->t_maxseg;
return (0);
case EMSGSIZE:
@@ -1373,7 +1619,7 @@
* Start/restart persistance timer.
*/
TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift],
- TCPTV_PERSMIN, TCPTV_PERSMAX);
+ tcp_persmin, tcp_persmax);
tcp_timer_activate(tp, TT_PERSIST, tt);
if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
tp->t_rxtshift++;
@@ -1461,6 +1707,7 @@
bcopy((u_char *)&to->to_tsecr, optp, sizeof(to->to_tsecr));
optp += sizeof(to->to_tsecr);
break;
+#ifdef TCP_SIGNATURE
case TOF_SIGNATURE:
{
int siglen = TCPOLEN_SIGNATURE - 2;
@@ -1479,6 +1726,7 @@
*optp++ = 0;
break;
}
+#endif
case TOF_SACK:
{
int sackblks = 0;
@@ -1509,6 +1757,25 @@
TCPSTAT_INC(tcps_sack_send_blocks);
break;
}
+#ifdef TCP_RFC7413
+ case TOF_FASTOPEN:
+ {
+ int total_len;
+
+ /* XXX is there any point to aligning this option? */
+ total_len = TCPOLEN_FAST_OPEN_EMPTY + to->to_tfo_len;
+ if (TCP_MAXOLEN - optlen < total_len)
+ continue;
+ *optp++ = TCPOPT_FAST_OPEN;
+ *optp++ = total_len;
+ if (to->to_tfo_len > 0) {
+ bcopy(to->to_tfo_cookie, optp, to->to_tfo_len);
+ optp += to->to_tfo_len;
+ }
+ optlen += total_len;
+ break;
+ }
+#endif
default:
panic("%s: unknown TCP option type", __func__);
break;
Modified: trunk/sys/netinet/tcp_reass.c
===================================================================
--- trunk/sys/netinet/tcp_reass.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_reass.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_reass.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_reass.c 285976 2015-07-28 19:58:44Z delphij $");
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -75,35 +75,27 @@
#include <netinet/tcp_debug.h>
#endif /* TCPDEBUG */
-static int tcp_reass_sysctl_maxseg(SYSCTL_HANDLER_ARGS);
static int tcp_reass_sysctl_qsize(SYSCTL_HANDLER_ARGS);
static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
"TCP Segment Reassembly Queue");
-static VNET_DEFINE(int, tcp_reass_maxseg) = 0;
-#define V_tcp_reass_maxseg VNET(tcp_reass_maxseg)
-SYSCTL_VNET_PROC(_net_inet_tcp_reass, OID_AUTO, maxsegments,
- CTLTYPE_INT | CTLFLAG_RDTUN,
- &VNET_NAME(tcp_reass_maxseg), 0, &tcp_reass_sysctl_maxseg, "I",
+static int tcp_reass_maxseg = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN,
+ &tcp_reass_maxseg, 0,
"Global maximum number of TCP Segments in Reassembly Queue");
-static VNET_DEFINE(int, tcp_reass_qsize) = 0;
-#define V_tcp_reass_qsize VNET(tcp_reass_qsize)
-SYSCTL_VNET_PROC(_net_inet_tcp_reass, OID_AUTO, cursegments,
- CTLTYPE_INT | CTLFLAG_RD,
- &VNET_NAME(tcp_reass_qsize), 0, &tcp_reass_sysctl_qsize, "I",
+SYSCTL_PROC(_net_inet_tcp_reass, OID_AUTO, cursegments,
+ (CTLTYPE_INT | CTLFLAG_RD), NULL, 0, &tcp_reass_sysctl_qsize, "I",
"Global number of TCP Segments currently in Reassembly Queue");
-static VNET_DEFINE(int, tcp_reass_overflows) = 0;
-#define V_tcp_reass_overflows VNET(tcp_reass_overflows)
-SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, overflows,
- CTLTYPE_INT | CTLFLAG_RD,
- &VNET_NAME(tcp_reass_overflows), 0,
+static int tcp_reass_overflows = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows,
+ CTLFLAG_RD,
+ &tcp_reass_overflows, 0,
"Global number of TCP Segment Reassembly Queue Overflows");
-static VNET_DEFINE(uma_zone_t, tcp_reass_zone);
-#define V_tcp_reass_zone VNET(tcp_reass_zone)
+static uma_zone_t tcp_reass_zone;
/* Initialize TCP reassembly queue */
static void
@@ -110,34 +102,29 @@
tcp_reass_zone_change(void *tag)
{
- V_tcp_reass_maxseg = nmbclusters / 16;
- uma_zone_set_max(V_tcp_reass_zone, V_tcp_reass_maxseg);
+ /* Set the zone limit and read back the effective value. */
+ tcp_reass_maxseg = nmbclusters / 16;
+ tcp_reass_maxseg = uma_zone_set_max(tcp_reass_zone,
+ tcp_reass_maxseg);
}
void
-tcp_reass_init(void)
+tcp_reass_global_init(void)
{
- V_tcp_reass_maxseg = nmbclusters / 16;
+ tcp_reass_maxseg = nmbclusters / 16;
TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments",
- &V_tcp_reass_maxseg);
- V_tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent),
+ &tcp_reass_maxseg);
+ tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
- uma_zone_set_max(V_tcp_reass_zone, V_tcp_reass_maxseg);
+ /* Set the zone limit and read back the effective value. */
+ tcp_reass_maxseg = uma_zone_set_max(tcp_reass_zone,
+ tcp_reass_maxseg);
EVENTHANDLER_REGISTER(nmbclusters_change,
tcp_reass_zone_change, NULL, EVENTHANDLER_PRI_ANY);
}
-#ifdef VIMAGE
void
-tcp_reass_destroy(void)
-{
-
- uma_zdestroy(V_tcp_reass_zone);
-}
-#endif
-
-void
tcp_reass_flush(struct tcpcb *tp)
{
struct tseg_qent *qe;
@@ -147,7 +134,7 @@
while ((qe = LIST_FIRST(&tp->t_segq)) != NULL) {
LIST_REMOVE(qe, tqe_q);
m_freem(qe->tqe_m);
- uma_zfree(V_tcp_reass_zone, qe);
+ uma_zfree(tcp_reass_zone, qe);
tp->t_segqlen--;
}
@@ -157,17 +144,12 @@
}
static int
-tcp_reass_sysctl_maxseg(SYSCTL_HANDLER_ARGS)
+tcp_reass_sysctl_qsize(SYSCTL_HANDLER_ARGS)
{
- V_tcp_reass_maxseg = uma_zone_get_max(V_tcp_reass_zone);
- return (sysctl_handle_int(oidp, arg1, arg2, req));
-}
+ int qsize;
-static int
-tcp_reass_sysctl_qsize(SYSCTL_HANDLER_ARGS)
-{
- V_tcp_reass_qsize = uma_zone_get_cur(V_tcp_reass_zone);
- return (sysctl_handle_int(oidp, arg1, arg2, req));
+ qsize = uma_zone_get_cur(tcp_reass_zone);
+ return (sysctl_handle_int(oidp, &qsize, 0, req));
}
int
@@ -212,9 +194,9 @@
* Investigate why and re-evaluate the below limit after the behaviour
* is understood.
*/
- if (th->th_seq != tp->rcv_nxt &&
+ if ((th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) &&
tp->t_segqlen >= (so->so_rcv.sb_hiwat / tp->t_maxseg) + 1) {
- V_tcp_reass_overflows++;
+ tcp_reass_overflows++;
TCPSTAT_INC(tcps_rcvmemdrop);
m_freem(m);
*tlenp = 0;
@@ -233,9 +215,9 @@
* Use a temporary structure on the stack for the missing segment
* when the zone is exhausted. Otherwise we may get stuck.
*/
- te = uma_zalloc(V_tcp_reass_zone, M_NOWAIT);
+ te = uma_zalloc(tcp_reass_zone, M_NOWAIT);
if (te == NULL) {
- if (th->th_seq != tp->rcv_nxt) {
+ if (th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) {
TCPSTAT_INC(tcps_rcvmemdrop);
m_freem(m);
*tlenp = 0;
@@ -283,7 +265,8 @@
TCPSTAT_INC(tcps_rcvduppack);
TCPSTAT_ADD(tcps_rcvdupbyte, *tlenp);
m_freem(m);
- uma_zfree(V_tcp_reass_zone, te);
+ if (te != &tqs)
+ uma_zfree(tcp_reass_zone, te);
tp->t_segqlen--;
/*
* Try to present any queued data
@@ -320,7 +303,7 @@
nq = LIST_NEXT(q, tqe_q);
LIST_REMOVE(q, tqe_q);
m_freem(q->tqe_m);
- uma_zfree(V_tcp_reass_zone, q);
+ uma_zfree(tcp_reass_zone, q);
tp->t_segqlen--;
q = nq;
}
@@ -359,7 +342,7 @@
else
sbappendstream_locked(&so->so_rcv, q->tqe_m);
if (q != &tqs)
- uma_zfree(V_tcp_reass_zone, q);
+ uma_zfree(tcp_reass_zone, q);
tp->t_segqlen--;
q = nq;
} while (q && q->tqe_th->th_seq == tp->rcv_nxt);
Modified: trunk/sys/netinet/tcp_sack.c
===================================================================
--- trunk/sys/netinet/tcp_sack.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_sack.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -72,7 +72,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_sack.c 220105 2011-03-28 19:03:56Z weongyo $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_sack.c 293710 2016-01-11 23:34:29Z hiren $");
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -345,17 +345,22 @@
* Process cumulative ACK and the TCP SACK option to update the scoreboard.
* tp->snd_holes is an ordered list of holes (oldest to newest, in terms of
* the sequence space).
+ * Returns 1 if incoming ACK has previously unknown SACK information,
+ * 0 otherwise. Note: We treat (snd_una, th_ack) as a sack block so any changes
+ * to that (i.e. left edge moving) would also be considered a change in SACK
+ * information which is slightly different than rfc6675.
*/
-void
+int
tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
{
struct sackhole *cur, *temp;
struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp;
- int i, j, num_sack_blks;
+ int i, j, num_sack_blks, sack_changed;
INP_WLOCK_ASSERT(tp->t_inpcb);
num_sack_blks = 0;
+ sack_changed = 0;
/*
* If SND.UNA will be advanced by SEG.ACK, and if SACK holes exist,
* treat [SND.UNA, SEG.ACK) as if it is a SACK block.
@@ -369,6 +374,7 @@
* received new blocks from the other side.
*/
if (to->to_flags & TOF_SACK) {
+ tp->sackhint.sacked_bytes = 0; /* reset */
for (i = 0; i < to->to_nsacks; i++) {
bcopy((to->to_sacks + i * TCPOLEN_SACK),
&sack, sizeof(sack));
@@ -379,8 +385,11 @@
SEQ_GT(sack.start, th_ack) &&
SEQ_LT(sack.start, tp->snd_max) &&
SEQ_GT(sack.end, tp->snd_una) &&
- SEQ_LEQ(sack.end, tp->snd_max))
+ SEQ_LEQ(sack.end, tp->snd_max)) {
sack_blocks[num_sack_blks++] = sack;
+ tp->sackhint.sacked_bytes +=
+ (sack.end-sack.start);
+ }
}
}
/*
@@ -388,7 +397,7 @@
* received.
*/
if (num_sack_blks == 0)
- return;
+ return (sack_changed);
/*
* Sort the SACK blocks so we can update the scoreboard with just one
@@ -439,6 +448,7 @@
tp->snd_fack = sblkp->end;
/* Go to the previous sack block. */
sblkp--;
+ sack_changed = 1;
} else {
/*
* We failed to add a new hole based on the current
@@ -455,9 +465,11 @@
SEQ_LT(tp->snd_fack, sblkp->end))
tp->snd_fack = sblkp->end;
}
- } else if (SEQ_LT(tp->snd_fack, sblkp->end))
+ } else if (SEQ_LT(tp->snd_fack, sblkp->end)) {
/* fack is advanced. */
tp->snd_fack = sblkp->end;
+ sack_changed = 1;
+ }
/* We must have at least one SACK hole in scoreboard. */
KASSERT(!TAILQ_EMPTY(&tp->snd_holes),
("SACK scoreboard must not be empty"));
@@ -486,6 +498,7 @@
tp->sackhint.sack_bytes_rexmit -= (cur->rxmit - cur->start);
KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
("sackhint bytes rtx >= 0"));
+ sack_changed = 1;
if (SEQ_LEQ(sblkp->start, cur->start)) {
/* Data acks at least the beginning of hole. */
if (SEQ_GEQ(sblkp->end, cur->end)) {
@@ -541,6 +554,7 @@
else
sblkp--;
}
+ return (sack_changed);
}
/*
Modified: trunk/sys/netinet/tcp_seq.h
===================================================================
--- trunk/sys/netinet/tcp_seq.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_seq.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_seq.h 8.3 (Berkeley) 6/21/95
- * $FreeBSD: stable/9/sys/netinet/tcp_seq.h 235657 2012-05-19 18:32:31Z bz $
+ * $FreeBSD: stable/10/sys/netinet/tcp_seq.h 231767 2012-02-15 16:09:56Z bz $
*/
#ifndef _NETINET_TCP_SEQ_H_
Modified: trunk/sys/netinet/tcp_subr.c
===================================================================
--- trunk/sys/netinet/tcp_subr.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_subr.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,12 +31,13 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_subr.c 238247 2012-07-08 14:21:36Z bz $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_subr.c 314667 2017-03-04 13:03:31Z avg $");
#include "opt_compat.h"
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
+#include "opt_kdtrace.h"
#include "opt_tcpdebug.h"
#include <sys/param.h>
@@ -54,6 +55,7 @@
#endif
#include <sys/priv.h>
#include <sys/proc.h>
+#include <sys/sdt.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/protosw.h>
@@ -67,6 +69,7 @@
#include <netinet/cc.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <netinet/in_pcb.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
@@ -81,12 +84,14 @@
#include <netinet6/nd6.h>
#endif
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_syncache.h>
-#include <netinet/tcp_offload.h>
#ifdef INET6
#include <netinet6/tcp6_var.h>
#endif
@@ -97,6 +102,9 @@
#ifdef INET6
#include <netinet6/ip6protosw.h>
#endif
+#ifdef TCP_OFFLOAD
+#include <netinet/tcp_offload.h>
+#endif
#ifdef IPSEC
#include <netipsec/ipsec.h>
@@ -173,7 +181,7 @@
VNET_DEFINE(int, tcp_minmss) = TCP_MINMSS;
SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, minmss, CTLFLAG_RW,
&VNET_NAME(tcp_minmss), 0,
- "Minmum TCP Maximum Segment Size");
+ "Minimum TCP Maximum Segment Size");
VNET_DEFINE(int, tcp_do_rfc1323) = 1;
SYSCTL_VNET_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_RW,
@@ -226,6 +234,7 @@
static struct inpcb *tcp_mtudisc_notify(struct inpcb *, int);
static char * tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th,
void *ip4hdr, const void *ip6hdr);
+static void tcp_timer_discard(struct tcpcb *, uint32_t);
/*
* Target size of TCP PCB hash tables. Must be a power of two.
@@ -234,7 +243,7 @@
* variable net.inet.tcp.tcbhashsize
*/
#ifndef TCBHASHSIZE
-#define TCBHASHSIZE 512
+#define TCBHASHSIZE 0
#endif
/*
@@ -281,11 +290,35 @@
return (0);
}
+/*
+ * Take a value and get the next power of 2 that doesn't overflow.
+ * Used to size the tcp_inpcb hash buckets.
+ */
+static int
+maketcp_hashsize(int size)
+{
+ int hashsize;
+
+ /*
+ * auto tune.
+ * get the next power of 2 higher than maxsockets.
+ */
+ hashsize = 1 << fls(size);
+ /* catch overflow, and just go one power of 2 smaller */
+ if (hashsize < size) {
+ hashsize = 1 << (fls(size) - 1);
+ }
+ return (hashsize);
+}
+
void
tcp_init(void)
{
+ const char *tcbhash_tuneable;
int hashsize;
+ tcbhash_tuneable = "net.inet.tcp.tcbhashsize";
+
if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_IN,
&V_tcp_hhh[HHOOK_TCP_EST_IN], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
printf("%s: WARNING: unable to register helper hook\n", __func__);
@@ -294,10 +327,43 @@
printf("%s: WARNING: unable to register helper hook\n", __func__);
hashsize = TCBHASHSIZE;
- TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", &hashsize);
+ TUNABLE_INT_FETCH(tcbhash_tuneable, &hashsize);
+ if (hashsize == 0) {
+ /*
+ * Auto tune the hash size based on maxsockets.
+ * A perfect hash would have a 1:1 mapping
+ * (hashsize = maxsockets) however it's been
+ * suggested that O(2) average is better.
+ */
+ hashsize = maketcp_hashsize(maxsockets / 4);
+ /*
+ * Our historical default is 512,
+ * do not autotune lower than this.
+ */
+ if (hashsize < 512)
+ hashsize = 512;
+ if (bootverbose && IS_DEFAULT_VNET(curvnet))
+ printf("%s: %s auto tuned to %d\n", __func__,
+ tcbhash_tuneable, hashsize);
+ }
+ /*
+ * We require a hashsize to be a power of two.
+ * Previously if it was not a power of two we would just reset it
+ * back to 512, which could be a nasty surprise if you did not notice
+ * the error message.
+ * Instead what we do is clip it to the closest power of two lower
+ * than the specified hash value.
+ */
if (!powerof2(hashsize)) {
- printf("WARNING: TCB hash size not a power of 2\n");
- hashsize = 512; /* safe default */
+ int oldhashsize = hashsize;
+
+ hashsize = maketcp_hashsize(hashsize);
+ /* prevent absurdly low value */
+ if (hashsize < 16)
+ hashsize = 16;
+ printf("%s: WARNING: TCB hash size not a power of 2, "
+ "clipped from %d to %d.\n", __func__, oldhashsize,
+ hashsize);
}
in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, hashsize, hashsize,
"tcp_inpcb", tcp_inpcb_init, NULL, UMA_ZONE_NOFREE,
@@ -309,11 +375,11 @@
V_tcpcb_zone = uma_zcreate("tcpcb", sizeof(struct tcpcb_mem),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
uma_zone_set_max(V_tcpcb_zone, maxsockets);
+ uma_zone_set_warning(V_tcpcb_zone, "kern.ipc.maxsockets limit reached");
tcp_tw_init();
syncache_init();
tcp_hc_init();
- tcp_reass_init();
TUNABLE_INT_FETCH("net.inet.tcp.sack.enable", &V_tcp_do_sack);
V_sack_hole_zone = uma_zcreate("sackhole", sizeof(struct sackhole),
@@ -323,6 +389,8 @@
if (!IS_DEFAULT_VNET(curvnet))
return;
+ tcp_reass_global_init();
+
/* XXX virtualize those bellow? */
tcp_delacktime = TCPTV_DELACK;
tcp_keepinit = TCPTV_KEEP_INIT;
@@ -333,6 +401,8 @@
tcp_rexmit_min = TCPTV_MIN;
if (tcp_rexmit_min < 1)
tcp_rexmit_min = 1;
+ tcp_persmin = TCPTV_PERSMIN;
+ tcp_persmax = TCPTV_PERSMAX;
tcp_rexmit_slop = TCPTV_CPU_VAR;
tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT;
tcp_tcbhashsize = hashsize;
@@ -363,6 +433,10 @@
SHUTDOWN_PRI_DEFAULT);
EVENTHANDLER_REGISTER(maxsockets_change, tcp_zone_change, NULL,
EVENTHANDLER_PRI_ANY);
+
+#ifdef TCP_RFC7413
+ tcp_fastopen_init();
+#endif
}
#ifdef VIMAGE
@@ -370,7 +444,9 @@
tcp_destroy(void)
{
- tcp_reass_destroy();
+#ifdef TCP_RFC7413
+ tcp_fastopen_destroy();
+#endif
tcp_hc_destroy();
syncache_destroy();
tcp_tw_destroy();
@@ -481,16 +557,18 @@
tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
tcp_seq ack, tcp_seq seq, int flags)
{
- int tlen;
- int win = 0;
+ struct tcpopt to;
+ struct inpcb *inp;
struct ip *ip;
+ struct mbuf *optm;
struct tcphdr *nth;
+ u_char *optp;
#ifdef INET6
struct ip6_hdr *ip6;
int isipv6;
#endif /* INET6 */
- int ipflags = 0;
- struct inpcb *inp;
+ int optlen, tlen, win;
+ bool incl_opts;
KASSERT(tp != NULL || m != NULL, ("tcp_respond: tp and m both NULL"));
@@ -507,6 +585,8 @@
} else
inp = NULL;
+ incl_opts = false;
+ win = 0;
if (tp != NULL) {
if (!(flags & TH_RST)) {
win = sbspace(&inp->inp_socket->so_rcv);
@@ -513,12 +593,13 @@
if (win > (long)TCP_MAXWIN << tp->rcv_scale)
win = (long)TCP_MAXWIN << tp->rcv_scale;
}
+ if ((tp->t_flags & TF_NOOPT) == 0)
+ incl_opts = true;
}
if (m == NULL) {
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return;
- tlen = 0;
m->m_data += max_linkhdr;
#ifdef INET6
if (isipv6) {
@@ -528,25 +609,61 @@
nth = (struct tcphdr *)(ip6 + 1);
} else
#endif /* INET6 */
- {
- bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
- ip = mtod(m, struct ip *);
- nth = (struct tcphdr *)(ip + 1);
- }
+ {
+ bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
+ ip = mtod(m, struct ip *);
+ nth = (struct tcphdr *)(ip + 1);
+ }
bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr));
flags = TH_ACK;
+ } else if (!M_WRITABLE(m)) {
+ struct mbuf *n;
+
+ /* Can't reuse 'm', allocate a new mbuf. */
+ n = m_gethdr(M_NOWAIT, MT_DATA);
+ if (n == NULL) {
+ m_freem(m);
+ return;
+ }
+
+ if (!m_dup_pkthdr(n, m, M_NOWAIT)) {
+ m_freem(m);
+ m_freem(n);
+ return;
+ }
+
+ n->m_data += max_linkhdr;
+ /* m_len is set later */
+#define xchg(a,b,type) { type t; t=a; a=b; b=t; }
+#ifdef INET6
+ if (isipv6) {
+ bcopy((caddr_t)ip6, mtod(n, caddr_t),
+ sizeof(struct ip6_hdr));
+ ip6 = mtod(n, struct ip6_hdr *);
+ xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr);
+ nth = (struct tcphdr *)(ip6 + 1);
+ } else
+#endif /* INET6 */
+ {
+ bcopy((caddr_t)ip, mtod(n, caddr_t), sizeof(struct ip));
+ ip = mtod(n, struct ip *);
+ xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t);
+ nth = (struct tcphdr *)(ip + 1);
+ }
+ bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr));
+ xchg(nth->th_dport, nth->th_sport, uint16_t);
+ th = nth;
+ m_freem(m);
+ m = n;
} else {
/*
* reuse the mbuf.
- * XXX MRT We inherrit the FIB, which is lucky.
+ * XXX MRT We inherit the FIB, which is lucky.
*/
m_freem(m->m_next);
m->m_next = NULL;
m->m_data = (caddr_t)ipgen;
- m_addr_changed(m);
/* m_len is set later */
- tlen = 0;
-#define xchg(a,b,type) { type t; t=a; a=b; b=t; }
#ifdef INET6
if (isipv6) {
xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr);
@@ -553,10 +670,10 @@
nth = (struct tcphdr *)(ip6 + 1);
} else
#endif /* INET6 */
- {
- xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t);
- nth = (struct tcphdr *)(ip + 1);
- }
+ {
+ xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t);
+ nth = (struct tcphdr *)(ip + 1);
+ }
if (th != nth) {
/*
* this is usually a case when an extension header
@@ -569,13 +686,65 @@
xchg(nth->th_dport, nth->th_sport, uint16_t);
#undef xchg
}
+ tlen = 0;
#ifdef INET6
+ if (isipv6)
+ tlen = sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
+#endif
+#if defined(INET) && defined(INET6)
+ else
+#endif
+#ifdef INET
+ tlen = sizeof (struct tcpiphdr);
+#endif
+#ifdef INVARIANTS
+ m->m_len = 0;
+ KASSERT(M_TRAILINGSPACE(m) >= tlen,
+ ("Not enough trailing space for message (m=%p, need=%d, have=%ld)",
+ m, tlen, (long)M_TRAILINGSPACE(m)));
+#endif
+ m->m_len = tlen;
+ to.to_flags = 0;
+ if (incl_opts) {
+ /* Make sure we have room. */
+ if (M_TRAILINGSPACE(m) < TCP_MAXOLEN) {
+ m->m_next = m_get(M_NOWAIT, MT_DATA);
+ if (m->m_next) {
+ optp = mtod(m->m_next, u_char *);
+ optm = m->m_next;
+ } else
+ incl_opts = false;
+ } else {
+ optp = (u_char *) (nth + 1);
+ optm = m;
+ }
+ }
+ if (incl_opts) {
+ /* Timestamps. */
+ if (tp->t_flags & TF_RCVD_TSTMP) {
+ to.to_tsval = tcp_ts_getticks() + tp->ts_offset;
+ to.to_tsecr = tp->ts_recent;
+ to.to_flags |= TOF_TS;
+ }
+#ifdef TCP_SIGNATURE
+ /* TCP-MD5 (RFC2385). */
+ if (tp->t_flags & TF_SIGNATURE)
+ to.to_flags |= TOF_SIGNATURE;
+#endif
+
+ /* Add the options. */
+ tlen += optlen = tcp_addoptions(&to, optp);
+
+ /* Update m_len in the correct mbuf. */
+ optm->m_len += optlen;
+ } else
+ optlen = 0;
+#ifdef INET6
if (isipv6) {
ip6->ip6_flow = 0;
ip6->ip6_vfc = IPV6_VERSION;
ip6->ip6_nxt = IPPROTO_TCP;
- ip6->ip6_plen = 0; /* Set in ip6_output(). */
- tlen += sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
+ ip6->ip6_plen = htons(tlen - sizeof(*ip6));
}
#endif
#if defined(INET) && defined(INET6)
@@ -583,14 +752,12 @@
#endif
#ifdef INET
{
- tlen += sizeof (struct tcpiphdr);
- ip->ip_len = tlen;
+ ip->ip_len = htons(tlen);
ip->ip_ttl = V_ip_defttl;
if (V_path_mtu_discovery)
- ip->ip_off |= IP_DF;
+ ip->ip_off |= htons(IP_DF);
}
#endif
- m->m_len = tlen;
m->m_pkthdr.len = tlen;
m->m_pkthdr.rcvif = NULL;
#ifdef MAC
@@ -612,7 +779,7 @@
nth->th_seq = htonl(seq);
nth->th_ack = htonl(ack);
nth->th_x2 = 0;
- nth->th_off = sizeof (struct tcphdr) >> 2;
+ nth->th_off = (sizeof (struct tcphdr) + optlen) >> 2;
nth->th_flags = flags;
if (tp != NULL)
nth->th_win = htons((u_short) (win >> tp->rcv_scale));
@@ -620,6 +787,13 @@
nth->th_win = htons((u_short)win);
nth->th_urp = 0;
+#ifdef TCP_SIGNATURE
+ if (to.to_flags & TOF_SIGNATURE) {
+ tcp_signature_compute(m, 0, 0, optlen, to.to_signature,
+ IPSEC_DIR_OUTBOUND);
+ }
+#endif
+
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
#ifdef INET6
if (isipv6) {
@@ -644,15 +818,20 @@
if (tp == NULL || (inp->inp_socket->so_options & SO_DEBUG))
tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0);
#endif
+ if (flags & TH_RST)
+ TCP_PROBE5(accept__refused, NULL, NULL, mtod(m, const char *),
+ tp, nth);
+
+ TCP_PROBE5(send, NULL, tp, mtod(m, const char *), tp, nth);
#ifdef INET6
if (isipv6)
- (void) ip6_output(m, NULL, NULL, ipflags, NULL, NULL, inp);
+ (void) ip6_output(m, NULL, NULL, 0, NULL, NULL, inp);
#endif /* INET6 */
#if defined(INET) && defined(INET6)
else
#endif
#ifdef INET
- (void) ip_output(m, NULL, NULL, ipflags, NULL, inp);
+ (void) ip_output(m, NULL, NULL, 0, NULL, inp);
#endif
}
@@ -713,11 +892,11 @@
V_tcp_mssdflt;
/* Set up our timeouts. */
- callout_init(&tp->t_timers->tt_rexmt, CALLOUT_MPSAFE);
- callout_init(&tp->t_timers->tt_persist, CALLOUT_MPSAFE);
- callout_init(&tp->t_timers->tt_keep, CALLOUT_MPSAFE);
- callout_init(&tp->t_timers->tt_2msl, CALLOUT_MPSAFE);
- callout_init(&tp->t_timers->tt_delack, CALLOUT_MPSAFE);
+ callout_init(&tp->t_timers->tt_rexmt, 1);
+ callout_init(&tp->t_timers->tt_persist, 1);
+ callout_init(&tp->t_timers->tt_keep, 1);
+ callout_init(&tp->t_timers->tt_2msl, 1);
+ callout_init(&tp->t_timers->tt_delack, 1);
if (V_tcp_do_rfc1323)
tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
@@ -724,8 +903,14 @@
if (V_tcp_do_sack)
tp->t_flags |= TF_SACK_PERMIT;
TAILQ_INIT(&tp->snd_holes);
- tp->t_inpcb = inp; /* XXX */
/*
+ * The tcpcb will hold a reference on its inpcb until tcp_discardcb()
+ * is called.
+ */
+ in_pcbref(inp); /* Reference for tcpcb */
+ tp->t_inpcb = inp;
+
+ /*
* Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
* rtt estimate. Set rttvar so that srtt + 4 * rttvar gives
* reasonable initial retransmit time.
@@ -772,7 +957,7 @@
VNET_LIST_RLOCK();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_WLOCK(&V_tcbinfo);
/*
* New connections already part way through being initialised
* with the CC algo we're removing will not race with this code
@@ -802,7 +987,7 @@
}
INP_WUNLOCK(inp);
}
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK();
@@ -820,12 +1005,12 @@
{
struct socket *so = tp->t_inpcb->inp_socket;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_LOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb);
if (TCPS_HAVERCVDSYN(tp->t_state)) {
- tp->t_state = TCPS_CLOSED;
- (void) tcp_output_reset(tp);
+ tcp_state_change(tp, TCPS_CLOSED);
+ (void) tcp_output(tp);
TCPSTAT_INC(tcps_drops);
} else
TCPSTAT_INC(tcps_conndrops);
@@ -843,6 +1028,7 @@
#ifdef INET6
int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
#endif /* INET6 */
+ int released;
INP_WLOCK_ASSERT(inp);
@@ -850,22 +1036,15 @@
* Make sure that all of our timers are stopped before we delete the
* PCB.
*
- * XXXRW: Really, we would like to use callout_drain() here in order
- * to avoid races experienced in tcp_timer.c where a timer is already
- * executing at this point. However, we can't, both because we're
- * running in a context where we can't sleep, and also because we
- * hold locks required by the timers. What we instead need to do is
- * test to see if callout_drain() is required, and if so, defer some
- * portion of the remainder of tcp_discardcb() to an asynchronous
- * context that can callout_drain() and then continue. Some care
- * will be required to ensure that no further processing takes place
- * on the tcpcb, even though it hasn't been freed (a flag?).
+ * If stopping a timer fails, we schedule a discard function in same
+ * callout, and the last discard function called will take care of
+ * deleting the tcpcb.
*/
- callout_stop(&tp->t_timers->tt_rexmt);
- callout_stop(&tp->t_timers->tt_persist);
- callout_stop(&tp->t_timers->tt_keep);
- callout_stop(&tp->t_timers->tt_2msl);
- callout_stop(&tp->t_timers->tt_delack);
+ tcp_timer_stop(tp, TT_REXMT);
+ tcp_timer_stop(tp, TT_PERSIST);
+ tcp_timer_stop(tp, TT_KEEP);
+ tcp_timer_stop(tp, TT_2MSL);
+ tcp_timer_stop(tp, TT_DELACK);
/*
* If we got enough samples through the srtt filter,
@@ -902,14 +1081,14 @@
ssthresh = 2;
ssthresh *= (u_long)(tp->t_maxseg +
#ifdef INET6
- (isipv6 ? sizeof (struct ip6_hdr) +
- sizeof (struct tcphdr) :
+ (isipv6 ? sizeof (struct ip6_hdr) +
+ sizeof (struct tcphdr) :
#endif
- sizeof (struct tcpiphdr)
+ sizeof (struct tcpiphdr)
#ifdef INET6
- )
+ )
#endif
- );
+ );
} else
ssthresh = 0;
metrics.rmx_ssthresh = ssthresh;
@@ -925,8 +1104,12 @@
/* free the reassembly queue, if any */
tcp_reass_flush(tp);
+
+#ifdef TCP_OFFLOAD
/* Disconnect offload device, if any. */
- tcp_offload_detach(tp);
+ if (tp->t_flags & TF_TOE)
+ tcp_offload_detach(tp);
+#endif
tcp_free_sackholes(tp);
@@ -938,10 +1121,82 @@
CC_ALGO(tp) = NULL;
inp->inp_ppcb = NULL;
- tp->t_inpcb = NULL;
- uma_zfree(V_tcpcb_zone, tp);
+ if ((tp->t_timers->tt_flags & TT_MASK) == 0) {
+ /* We own the last reference on tcpcb, let's free it. */
+ tp->t_inpcb = NULL;
+ uma_zfree(V_tcpcb_zone, tp);
+ released = in_pcbrele_wlocked(inp);
+ KASSERT(!released, ("%s: inp %p should not have been released "
+ "here", __func__, inp));
+ }
}
+void
+tcp_timer_2msl_discard(void *xtp)
+{
+
+ tcp_timer_discard((struct tcpcb *)xtp, TT_2MSL);
+}
+
+void
+tcp_timer_keep_discard(void *xtp)
+{
+
+ tcp_timer_discard((struct tcpcb *)xtp, TT_KEEP);
+}
+
+void
+tcp_timer_persist_discard(void *xtp)
+{
+
+ tcp_timer_discard((struct tcpcb *)xtp, TT_PERSIST);
+}
+
+void
+tcp_timer_rexmt_discard(void *xtp)
+{
+
+ tcp_timer_discard((struct tcpcb *)xtp, TT_REXMT);
+}
+
+void
+tcp_timer_delack_discard(void *xtp)
+{
+
+ tcp_timer_discard((struct tcpcb *)xtp, TT_DELACK);
+}
+
+void
+tcp_timer_discard(struct tcpcb *tp, uint32_t timer_type)
+{
+ struct inpcb *inp;
+
+ CURVNET_SET(tp->t_vnet);
+ INP_INFO_RLOCK(&V_tcbinfo);
+ inp = tp->t_inpcb;
+ KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL",
+ __func__, tp));
+ INP_WLOCK(inp);
+ KASSERT((tp->t_timers->tt_flags & TT_STOPPED) != 0,
+ ("%s: tcpcb has to be stopped here", __func__));
+ KASSERT((tp->t_timers->tt_flags & timer_type) != 0,
+ ("%s: discard callout should be running", __func__));
+ tp->t_timers->tt_flags &= ~timer_type;
+ if ((tp->t_timers->tt_flags & TT_MASK) == 0) {
+ /* We own the last reference on this tcpcb, let's free it. */
+ tp->t_inpcb = NULL;
+ uma_zfree(V_tcpcb_zone, tp);
+ if (in_pcbrele_wlocked(inp)) {
+ INP_INFO_RUNLOCK(&V_tcbinfo);
+ CURVNET_RESTORE();
+ return;
+ }
+ }
+ INP_WUNLOCK(inp);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
+ CURVNET_RESTORE();
+}
+
/*
* Attempt to close a TCP control block, marking it as dropped, and freeing
* the socket if we hold the only reference.
@@ -952,12 +1207,24 @@
struct inpcb *inp = tp->t_inpcb;
struct socket *so;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_LOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
- /* Notify any offload devices of listener close */
+#ifdef TCP_OFFLOAD
if (tp->t_state == TCPS_LISTEN)
- tcp_offload_listen_close(tp);
+ tcp_offload_listen_stop(tp);
+#endif
+#ifdef TCP_RFC7413
+ /*
+ * This releases the TFO pending counter resource for TFO listen
+ * sockets as well as passively-created TFO sockets that transition
+ * from SYN_RECEIVED to CLOSED.
+ */
+ if (tp->t_tfo_pending) {
+ tcp_fastopen_decrement_counter(tp->t_tfo_pending);
+ tp->t_tfo_pending = NULL;
+ }
+#endif
in_pcbdrop(inp);
TCPSTAT_INC(tcps_closed);
KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL"));
@@ -997,9 +1264,9 @@
* XXX: The "Net/3" implementation doesn't imply that the TCP
* reassembly queue should be flushed, but in a situation
* where we're really low on mbufs, this is potentially
- * usefull.
+ * useful.
*/
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_WLOCK(&V_tcbinfo);
LIST_FOREACH(inpb, V_tcbinfo.ipi_listhead, inp_list) {
if (inpb->inp_flags & INP_TIMEWAIT)
continue;
@@ -1010,7 +1277,7 @@
}
INP_WUNLOCK(inpb);
}
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK_NOSLEEP();
@@ -1029,7 +1296,7 @@
{
struct tcpcb *tp;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_LOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
if ((inp->inp_flags & INP_TIMEWAIT) ||
@@ -1093,10 +1360,10 @@
/*
* OK, now we're committed to doing something.
*/
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_LIST_RLOCK(&V_tcbinfo);
gencnt = V_tcbinfo.ipi_gencnt;
n = V_tcbinfo.ipi_count;
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_LIST_RUNLOCK(&V_tcbinfo);
m = syncache_pcbcount();
@@ -1118,10 +1385,8 @@
return (error);
inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
- if (inp_list == NULL)
- return (ENOMEM);
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_WLOCK(&V_tcbinfo);
for (inp = LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0;
inp != NULL && i < n; inp = LIST_NEXT(inp, inp_list)) {
INP_WLOCK(inp);
@@ -1146,7 +1411,7 @@
}
INP_WUNLOCK(inp);
}
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
n = i;
error = 0;
@@ -1184,7 +1449,7 @@
} else
INP_RUNLOCK(inp);
}
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
for (i = 0; i < n; i++) {
inp = inp_list[i];
INP_RLOCK(inp);
@@ -1191,7 +1456,7 @@
if (!in_pcbrele_rlocked(inp))
INP_RUNLOCK(inp);
}
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
if (!error) {
/*
@@ -1201,11 +1466,11 @@
* while we were processing this request, and it
* might be necessary to retry.
*/
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_LIST_RLOCK(&V_tcbinfo);
xig.xig_gen = V_tcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
xig.xig_count = V_tcbinfo.ipi_count + pcb_count;
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_LIST_RUNLOCK(&V_tcbinfo);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
free(inp_list, M_TEMP);
@@ -1366,7 +1631,7 @@
- offsetof(struct icmp, icmp_ip));
th = (struct tcphdr *)((caddr_t)ip
+ (ip->ip_hl << 2));
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport,
ip->ip_src, th->th_sport, INPLOOKUP_WLOCKPCB, NULL);
if (inp != NULL) {
@@ -1393,12 +1658,11 @@
/*
* If no alternative MTU was
* proposed, try the next smaller
- * one. ip->ip_len has already
- * been swapped in icmp_input().
+ * one.
*/
if (!mtu)
- mtu = ip_next_mtu(ip->ip_len,
- 1);
+ mtu = ip_next_mtu(
+ ntohs(ip->ip_len), 1);
if (mtu < V_tcp_minmss
+ sizeof(struct tcpiphdr))
mtu = V_tcp_minmss
@@ -1427,7 +1691,7 @@
inc.inc_laddr = ip->ip_src;
syncache_unreach(&inc, th);
}
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
} else
in_pcbnotifyall(&V_tcbinfo, faddr, inetctlerrmap[cmd], notify);
}
@@ -1500,9 +1764,9 @@
inc.inc6_faddr = ((struct sockaddr_in6 *)sa)->sin6_addr;
inc.inc6_laddr = ip6cp->ip6c_src->sin6_addr;
inc.inc_flags |= INC_ISIPV6;
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
syncache_unreach(&inc, &th);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
} else
in6_pcbnotify(&V_tcbinfo, sa, 0, (const struct sockaddr *)sa6_src,
0, cmd, NULL, notify);
@@ -1635,7 +1899,7 @@
{
struct tcpcb *tp;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
if ((inp->inp_flags & INP_TIMEWAIT) ||
@@ -1696,7 +1960,7 @@
tp->snd_recover = tp->snd_max;
if (tp->t_flags & TF_SACK_PERMIT)
EXIT_FASTRECOVERY(tp->t_flags);
- tcp_output_send(tp);
+ tcp_output(tp);
return (inp);
}
@@ -1708,7 +1972,7 @@
* tcp_mss_update to get the peer/interface MTU.
*/
u_long
-tcp_maxmtu(struct in_conninfo *inc, int *flags)
+tcp_maxmtu(struct in_conninfo *inc, struct tcp_ifcap *cap)
{
struct route sro;
struct sockaddr_in *dst;
@@ -1727,16 +1991,20 @@
}
if (sro.ro_rt != NULL) {
ifp = sro.ro_rt->rt_ifp;
- if (sro.ro_rt->rt_rmx.rmx_mtu == 0)
+ if (sro.ro_rt->rt_mtu == 0)
maxmtu = ifp->if_mtu;
else
- maxmtu = min(sro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu);
+ maxmtu = min(sro.ro_rt->rt_mtu, ifp->if_mtu);
/* Report additional interface capabilities. */
- if (flags != NULL) {
+ if (cap != NULL) {
if (ifp->if_capenable & IFCAP_TSO4 &&
- ifp->if_hwassist & CSUM_TSO)
- *flags |= CSUM_TSO;
+ ifp->if_hwassist & CSUM_TSO) {
+ cap->ifcap |= CSUM_TSO;
+ cap->tsomax = ifp->if_hw_tsomax;
+ cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
+ cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
+ }
}
RTFREE(sro.ro_rt);
}
@@ -1746,7 +2014,7 @@
#ifdef INET6
u_long
-tcp_maxmtu6(struct in_conninfo *inc, int *flags)
+tcp_maxmtu6(struct in_conninfo *inc, struct tcp_ifcap *cap)
{
struct route_in6 sro6;
struct ifnet *ifp;
@@ -1763,17 +2031,21 @@
}
if (sro6.ro_rt != NULL) {
ifp = sro6.ro_rt->rt_ifp;
- if (sro6.ro_rt->rt_rmx.rmx_mtu == 0)
+ if (sro6.ro_rt->rt_mtu == 0)
maxmtu = IN6_LINKMTU(sro6.ro_rt->rt_ifp);
else
- maxmtu = min(sro6.ro_rt->rt_rmx.rmx_mtu,
+ maxmtu = min(sro6.ro_rt->rt_mtu,
IN6_LINKMTU(sro6.ro_rt->rt_ifp));
/* Report additional interface capabilities. */
- if (flags != NULL) {
+ if (cap != NULL) {
if (ifp->if_capenable & IFCAP_TSO6 &&
- ifp->if_hwassist & CSUM_TSO)
- *flags |= CSUM_TSO;
+ ifp->if_hwassist & CSUM_TSO) {
+ cap->ifcap |= CSUM_TSO;
+ cap->tsomax = ifp->if_hw_tsomax;
+ cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
+ cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
+ }
}
RTFREE(sro6.ro_rt);
}
@@ -1796,9 +2068,10 @@
#endif
struct tcphdr *th;
- if ((tp == NULL) || ((inp = tp->t_inpcb) == NULL))
+ if ((tp == NULL) || ((inp = tp->t_inpcb) == NULL) ||
+ (!key_havesp(IPSEC_DIR_OUTBOUND)))
return (0);
- MGETHDR(m, M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (!m)
return (0);
@@ -2150,7 +2423,7 @@
default:
return (EINVAL);
}
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
switch (addrs[0].ss_family) {
#ifdef INET6
case AF_INET6:
@@ -2189,7 +2462,7 @@
INP_WUNLOCK(inp);
} else
error = ESRCH;
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return (error);
}
@@ -2312,3 +2585,19 @@
panic("%s: string too long", __func__);
return (s);
}
+
+/*
+ * A subroutine which makes it easy to track TCP state changes with DTrace.
+ * This function shouldn't be called for t_state initializations that don't
+ * correspond to actual TCP state transitions.
+ */
+void
+tcp_state_change(struct tcpcb *tp, int newstate)
+{
+#if defined(KDTRACE_HOOKS)
+ int pstate = tp->t_state;
+#endif
+
+ tp->t_state = newstate;
+ TCP_PROBE6(state__change, NULL, tp, NULL, tp, NULL, pstate);
+}
Modified: trunk/sys/netinet/tcp_syncache.c
===================================================================
--- trunk/sys/netinet/tcp_syncache.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_syncache.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,13 +1,13 @@
/* $MidnightBSD$ */
/*-
* Copyright (c) 2001 McAfee, Inc.
- * Copyright (c) 2006 Andre Oppermann, Internet Business Solutions AG
+ * Copyright (c) 2006,2013 Andre Oppermann, Internet Business Solutions AG
* All rights reserved.
*
* This software was developed for the FreeBSD Project by Jonathan Lemon
* and McAfee Research, the Security Research Division of McAfee, Inc. under
* DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
- * DARPA CHATS research program.
+ * DARPA CHATS research program. [2001 McAfee, Inc.]
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_syncache.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_syncache.c 324520 2017-10-11 06:28:46Z sephe $");
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -48,7 +48,6 @@
#include <sys/mutex.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
-#include <sys/md5.h>
#include <sys/proc.h> /* for proc0 declaration */
#include <sys/random.h>
#include <sys/socket.h>
@@ -56,6 +55,9 @@
#include <sys/syslog.h>
#include <sys/ucred.h>
+#include <sys/md5.h>
+#include <crypto/siphash/siphash.h>
+
#include <vm/uma.h>
#include <net/if.h>
@@ -77,15 +79,20 @@
#include <netinet6/in6_pcb.h>
#endif
#include <netinet/tcp.h>
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_syncache.h>
-#include <netinet/tcp_offload.h>
#ifdef INET6
#include <netinet6/tcp6_var.h>
#endif
+#ifdef TCP_OFFLOAD
+#include <netinet/toecore.h>
+#endif
#ifdef IPSEC
#include <netipsec/ipsec.h>
@@ -111,28 +118,34 @@
&VNET_NAME(tcp_syncookiesonly), 0,
"Use only TCP SYN cookies");
-#ifdef TCP_OFFLOAD_DISABLE
-#define TOEPCB_ISSET(sc) (0)
-#else
-#define TOEPCB_ISSET(sc) ((sc)->sc_toepcb != NULL)
+#ifdef TCP_OFFLOAD
+#define ADDED_BY_TOE(sc) ((sc)->sc_tod != NULL)
#endif
static void syncache_drop(struct syncache *, struct syncache_head *);
static void syncache_free(struct syncache *);
static void syncache_insert(struct syncache *, struct syncache_head *);
-struct syncache *syncache_lookup(struct in_conninfo *, struct syncache_head **);
-static int syncache_respond(struct syncache *);
+static int syncache_respond(struct syncache *, const struct mbuf *);
static struct socket *syncache_socket(struct syncache *, struct socket *,
struct mbuf *m);
+static int syncache_sysctl_count(SYSCTL_HANDLER_ARGS);
static void syncache_timeout(struct syncache *sc, struct syncache_head *sch,
int docallout);
static void syncache_timer(void *);
-static void syncookie_generate(struct syncache_head *, struct syncache *,
- u_int32_t *);
+
+static uint32_t syncookie_mac(struct in_conninfo *, tcp_seq, uint8_t,
+ uint8_t *, uintptr_t);
+static tcp_seq syncookie_generate(struct syncache_head *, struct syncache *);
static struct syncache
*syncookie_lookup(struct in_conninfo *, struct syncache_head *,
- struct syncache *, struct tcpopt *, struct tcphdr *,
+ struct syncache *, struct tcphdr *, struct tcpopt *,
struct socket *);
+static void syncookie_reseed(void *);
+#ifdef INVARIANTS
+static int syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch,
+ struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
+ struct socket *lso);
+#endif
/*
* Transmit the SYN,ACK fewer times than TCP_MAXRXTSHIFT specifies.
@@ -159,8 +172,8 @@
&VNET_NAME(tcp_syncache.cache_limit), 0,
"Overall entry limit for syncache");
-SYSCTL_VNET_UINT(_net_inet_tcp_syncache, OID_AUTO, count, CTLFLAG_RD,
- &VNET_NAME(tcp_syncache.cache_count), 0,
+SYSCTL_VNET_PROC(_net_inet_tcp_syncache, OID_AUTO, count, (CTLTYPE_UINT|CTLFLAG_RD),
+ NULL, 0, &syncache_sysctl_count, "IU",
"Current number of entries in syncache");
SYSCTL_VNET_UINT(_net_inet_tcp_syncache, OID_AUTO, hashsize, CTLFLAG_RDTUN,
@@ -226,7 +239,6 @@
{
int i;
- V_tcp_syncache.cache_count = 0;
V_tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE;
V_tcp_syncache.bucket_limit = TCP_SYNCACHE_BUCKETLIMIT;
V_tcp_syncache.rexmt_limit = SYNCACHE_MAXREXMTS;
@@ -253,11 +265,12 @@
V_tcp_syncache.hashbase = malloc(V_tcp_syncache.hashsize *
sizeof(struct syncache_head), M_SYNCACHE, M_WAITOK | M_ZERO);
+#ifdef VIMAGE
+ V_tcp_syncache.vnet = curvnet;
+#endif
+
/* Initialize the hash buckets. */
for (i = 0; i < V_tcp_syncache.hashsize; i++) {
-#ifdef VIMAGE
- V_tcp_syncache.hashbase[i].sch_vnet = curvnet;
-#endif
TAILQ_INIT(&V_tcp_syncache.hashbase[i].sch_bucket);
mtx_init(&V_tcp_syncache.hashbase[i].sch_mtx, "tcp_sc_head",
NULL, MTX_DEF);
@@ -264,12 +277,23 @@
callout_init_mtx(&V_tcp_syncache.hashbase[i].sch_timer,
&V_tcp_syncache.hashbase[i].sch_mtx, 0);
V_tcp_syncache.hashbase[i].sch_length = 0;
+ V_tcp_syncache.hashbase[i].sch_sc = &V_tcp_syncache;
+ V_tcp_syncache.hashbase[i].sch_last_overflow =
+ -(SYNCOOKIE_LIFETIME + 1);
}
/* Create the syncache entry zone. */
V_tcp_syncache.zone = uma_zcreate("syncache", sizeof(struct syncache),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
- uma_zone_set_max(V_tcp_syncache.zone, V_tcp_syncache.cache_limit);
+ V_tcp_syncache.cache_limit = uma_zone_set_max(V_tcp_syncache.zone,
+ V_tcp_syncache.cache_limit);
+
+ /* Start the SYN cookie reseeder callout. */
+ callout_init(&V_tcp_syncache.secret.reseed, 1);
+ arc4rand(V_tcp_syncache.secret.key[0], SYNCOOKIE_SECRET_SIZE, 0);
+ arc4rand(V_tcp_syncache.secret.key[1], SYNCOOKIE_SECRET_SIZE, 0);
+ callout_reset(&V_tcp_syncache.secret.reseed, SYNCOOKIE_LIFETIME * hz,
+ syncookie_reseed, &V_tcp_syncache);
}
#ifdef VIMAGE
@@ -297,15 +321,26 @@
mtx_destroy(&sch->sch_mtx);
}
- KASSERT(V_tcp_syncache.cache_count == 0, ("%s: cache_count %d not 0",
- __func__, V_tcp_syncache.cache_count));
+ KASSERT(uma_zone_get_cur(V_tcp_syncache.zone) == 0,
+ ("%s: cache_count not 0", __func__));
/* Free the allocated global resources. */
uma_zdestroy(V_tcp_syncache.zone);
free(V_tcp_syncache.hashbase, M_SYNCACHE);
+
+ callout_drain(&V_tcp_syncache.secret.reseed);
}
#endif
+static int
+syncache_sysctl_count(SYSCTL_HANDLER_ARGS)
+{
+ int count;
+
+ count = uma_zone_get_cur(V_tcp_syncache.zone);
+ return (sysctl_handle_int(oidp, &count, 0, req));
+}
+
/*
* Inserts a syncache entry into the specified bucket row.
* Locks and unlocks the syncache_head autonomously.
@@ -325,6 +360,7 @@
KASSERT(!TAILQ_EMPTY(&sch->sch_bucket),
("sch->sch_length incorrect"));
sc2 = TAILQ_LAST(&sch->sch_bucket, sch_head);
+ sch->sch_last_overflow = time_uptime;
syncache_drop(sc2, sch);
TCPSTAT_INC(tcps_sc_bucketoverflow);
}
@@ -333,6 +369,14 @@
TAILQ_INSERT_HEAD(&sch->sch_bucket, sc, sc_hash);
sch->sch_length++;
+#ifdef TCP_OFFLOAD
+ if (ADDED_BY_TOE(sc)) {
+ struct toedev *tod = sc->sc_tod;
+
+ tod->tod_syncache_added(tod, sc->sc_todctx);
+ }
+#endif
+
/* Reinitialize the bucket row's timer. */
if (sch->sch_length == 1)
sch->sch_nextc = ticks + INT_MAX;
@@ -340,7 +384,6 @@
SCH_UNLOCK(sch);
- V_tcp_syncache.cache_count++;
TCPSTAT_INC(tcps_sc_added);
}
@@ -357,12 +400,15 @@
TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
sch->sch_length--;
-#ifndef TCP_OFFLOAD_DISABLE
- if (sc->sc_tu)
- sc->sc_tu->tu_syncache_event(TOE_SC_DROP, sc->sc_toepcb);
-#endif
+#ifdef TCP_OFFLOAD
+ if (ADDED_BY_TOE(sc)) {
+ struct toedev *tod = sc->sc_tod;
+
+ tod->tod_syncache_removed(tod, sc->sc_todctx);
+ }
+#endif
+
syncache_free(sc);
- V_tcp_syncache.cache_count--;
}
/*
@@ -372,7 +418,7 @@
syncache_timeout(struct syncache *sc, struct syncache_head *sch, int docallout)
{
sc->sc_rxttime = ticks +
- TCPTV_RTOBASE * (tcp_backoff[sc->sc_rxmits]);
+ TCPTV_RTOBASE * (tcp_syn_backoff[sc->sc_rxmits]);
sc->sc_rxmits++;
if (TSTMP_LT(sc->sc_rxttime, sch->sch_nextc)) {
sch->sch_nextc = sc->sc_rxttime;
@@ -395,7 +441,7 @@
int tick = ticks;
char *s;
- CURVNET_SET(sch->sch_vnet);
+ CURVNET_SET(sch->sch_sc->vnet);
/* NB: syncache_head has already been locked by the callout. */
SCH_LOCK_ASSERT(sch);
@@ -438,7 +484,7 @@
free(s, M_TCPLOG);
}
- (void) syncache_respond(sc);
+ (void) syncache_respond(sc, NULL);
TCPSTAT_INC(tcps_sc_retransmitted);
syncache_timeout(sc, sch, 0);
}
@@ -452,7 +498,7 @@
* Find an entry in the syncache.
* Returns always with locked syncache_head plus a matching entry or NULL.
*/
-struct syncache *
+static struct syncache *
syncache_lookup(struct in_conninfo *inc, struct syncache_head **schp)
{
struct syncache *sc;
@@ -623,6 +669,8 @@
/*
* Build a new TCP socket structure from a syncache entry.
+ *
+ * On success return the newly created socket with its underlying inp locked.
*/
static struct socket *
syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
@@ -633,7 +681,7 @@
int error;
char *s;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
/*
* Ok, create the full blown connection, and set things up
@@ -641,7 +689,7 @@
* connection when the SYN arrived. If we can't create
* the connection, abort it.
*/
- so = sonewconn(lso, SS_ISCONNECTED);
+ so = sonewconn(lso, 0);
if (so == NULL) {
/*
* Drop the connection; we will either send a RST or
@@ -664,6 +712,15 @@
inp = sotoinpcb(so);
inp->inp_inc.inc_fibnum = so->so_fibnum;
INP_WLOCK(inp);
+ /*
+ * Exclusive pcbinfo lock is not required in syncache socket case even
+ * if two inpcb locks can be acquired simultaneously:
+ * - the inpcb in LISTEN state,
+ * - the newly created inp.
+ *
+ * In this case, an inp cannot be at same time in LISTEN state and
+ * just created by an accept() call.
+ */
INP_HASH_WLOCK(&V_tcbinfo);
/* Insert new socket into PCB hash list. */
@@ -681,6 +738,15 @@
#endif
/*
+ * If there's an mbuf and it has a flowid, then let's initialise the
+ * inp with that particular flowid.
+ */
+ if (m != NULL && M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
+ inp->inp_flowid = m->m_pkthdr.flowid;
+ inp->inp_flowtype = M_HASHTYPE_GET(m);
+ }
+
+ /*
* Install in the reservation hash table for now, but don't yet
* install a connection group since the full 4-tuple isn't yet
* configured.
@@ -795,7 +861,7 @@
#endif /* INET */
INP_HASH_WUNLOCK(&V_tcbinfo);
tp = intotcpcb(inp);
- tp->t_state = TCPS_SYN_RECEIVED;
+ tcp_state_change(tp, TCPS_SYN_RECEIVED);
tp->iss = sc->sc_iss;
tp->irs = sc->sc_irs;
tcp_rcvseqinit(tp);
@@ -841,13 +907,26 @@
tcp_mss(tp, sc->sc_peer_mss);
/*
- * If the SYN,ACK was retransmitted, reset cwnd to 1 segment.
+ * If the SYN,ACK was retransmitted, indicate that CWND to be
+ * limited to one segment in cc_conn_init().
* NB: sc_rxmits counts all SYN,ACK transmits, not just retransmits.
*/
if (sc->sc_rxmits > 1)
- tp->snd_cwnd = tp->t_maxseg;
+ tp->snd_cwnd = 1;
+#ifdef TCP_OFFLOAD
/*
+ * Allow a TOE driver to install its hooks. Note that we hold the
+ * pcbinfo lock too and that prevents tcp_usr_accept from accepting a
+ * new connection before the TOE driver has done its thing.
+ */
+ if (ADDED_BY_TOE(sc)) {
+ struct toedev *tod = sc->sc_tod;
+
+ tod->tod_offload_socket(tod, sc->sc_todctx, so);
+ }
+#endif
+ /*
* Copy and activate timers.
*/
tp->t_keepinit = sototcpcb(lso)->t_keepinit;
@@ -856,8 +935,6 @@
tp->t_keepcnt = sototcpcb(lso)->t_keepcnt;
tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
- INP_WUNLOCK(inp);
-
TCPSTAT_INC(tcps_accepts);
return (so);
@@ -875,6 +952,9 @@
* in the syncache, and if its there, we pull it out of
* the cache and turn it into a full-blown connection in
* the SYN-RECEIVED state.
+ *
+ * On syncache_socket() success the newly created socket
+ * has its underlying inp locked.
*/
int
syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
@@ -889,20 +969,33 @@
* Global TCP locks are held because we manipulate the PCB lists
* and create a new socket.
*/
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK,
("%s: can handle only ACK", __func__));
sc = syncache_lookup(inc, &sch); /* returns locked sch */
SCH_LOCK_ASSERT(sch);
+
+#ifdef INVARIANTS
+ /*
+ * Test code for syncookies comparing the syncache stored
+ * values with the reconstructed values from the cookie.
+ */
+ if (sc != NULL)
+ syncookie_cmp(inc, sch, sc, th, to, *lsop);
+#endif
+
if (sc == NULL) {
/*
* There is no syncache entry, so see if this ACK is
* a returning syncookie. To do this, first:
- * A. See if this socket has had a syncache entry dropped in
- * the past. We don't want to accept a bogus syncookie
- * if we've never received a SYN.
- * B. check that the syncookie is valid. If it is, then
+ * A. Check if syncookies are used in case of syncache
+ * overflows
+ * B. See if this socket has had a syncache entry dropped in
+ * the recent past. We don't want to accept a bogus
+ * syncookie if we've never received a SYN or accept it
+ * twice.
+ * C. check that the syncookie is valid. If it is, then
* cobble up a fake syncache entry, and return.
*/
if (!V_tcp_syncookies) {
@@ -913,8 +1006,17 @@
s, __func__);
goto failed;
}
+ if (!V_tcp_syncookiesonly &&
+ sch->sch_last_overflow < time_uptime - SYNCOOKIE_LIFETIME) {
+ SCH_UNLOCK(sch);
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+ log(LOG_DEBUG, "%s; %s: Spurious ACK, "
+ "segment rejected (no syncache entry)\n",
+ s, __func__);
+ goto failed;
+ }
bzero(&scs, sizeof(scs));
- sc = syncookie_lookup(inc, sch, &scs, to, th, *lsop);
+ sc = syncookie_lookup(inc, sch, &scs, th, to, *lsop);
SCH_UNLOCK(sch);
if (sc == NULL) {
if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
@@ -927,7 +1029,13 @@
/* Pull out the entry to unlock the bucket row. */
TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
sch->sch_length--;
- V_tcp_syncache.cache_count--;
+#ifdef TCP_OFFLOAD
+ if (ADDED_BY_TOE(sc)) {
+ struct toedev *tod = sc->sc_tod;
+
+ tod->tod_syncache_removed(tod, sc->sc_todctx);
+ }
+#endif
SCH_UNLOCK(sch);
}
@@ -935,7 +1043,7 @@
* Segment validation:
* ACK must match our initial sequence number + 1 (the SYN|ACK).
*/
- if (th->th_ack != sc->sc_iss + 1 && !TOEPCB_ISSET(sc)) {
+ if (th->th_ack != sc->sc_iss + 1) {
if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
log(LOG_DEBUG, "%s; %s: ACK %u != ISS+1 %u, segment "
"rejected\n", s, __func__, th->th_ack, sc->sc_iss);
@@ -946,9 +1054,8 @@
* The SEQ must fall in the window starting at the received
* initial receive sequence number + 1 (the SYN).
*/
- if ((SEQ_LEQ(th->th_seq, sc->sc_irs) ||
- SEQ_GT(th->th_seq, sc->sc_irs + sc->sc_wnd)) &&
- !TOEPCB_ISSET(sc)) {
+ if (SEQ_LEQ(th->th_seq, sc->sc_irs) ||
+ SEQ_GT(th->th_seq, sc->sc_irs + sc->sc_wnd)) {
if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
log(LOG_DEBUG, "%s; %s: SEQ %u != IRS+1 %u, segment "
"rejected\n", s, __func__, th->th_seq, sc->sc_irs);
@@ -955,6 +1062,10 @@
goto failed;
}
+ /*
+ * If timestamps were not negotiated during SYN/ACK they
+ * must not appear on any segment during this session.
+ */
if (!(sc->sc_flags & SCF_TIMESTAMP) && (to->to_flags & TOF_TS)) {
if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
log(LOG_DEBUG, "%s; %s: Timestamp not expected, "
@@ -961,12 +1072,27 @@
"segment rejected\n", s, __func__);
goto failed;
}
+
/*
+ * If timestamps were negotiated during SYN/ACK they should
+ * appear on every segment during this session.
+ * XXXAO: This is only informal as there have been unverified
+ * reports of non-compliants stacks.
+ */
+ if ((sc->sc_flags & SCF_TIMESTAMP) && !(to->to_flags & TOF_TS)) {
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
+ log(LOG_DEBUG, "%s; %s: Timestamp missing, "
+ "no action\n", s, __func__);
+ free(s, M_TCPLOG);
+ s = NULL;
+ }
+ }
+
+ /*
* If timestamps were negotiated the reflected timestamp
* must be equal to what we actually sent in the SYN|ACK.
*/
- if ((to->to_flags & TOF_TS) && to->to_tsecr != sc->sc_ts &&
- !TOEPCB_ISSET(sc)) {
+ if ((to->to_flags & TOF_TS) && to->to_tsecr != sc->sc_ts) {
if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
log(LOG_DEBUG, "%s; %s: TSECR %u != TS %u, "
"segment rejected\n",
@@ -994,24 +1120,38 @@
return (0);
}
-int
-tcp_offload_syncache_expand(struct in_conninfo *inc, struct toeopt *toeo,
- struct tcphdr *th, struct socket **lsop, struct mbuf *m)
+#ifdef TCP_RFC7413
+static void
+syncache_tfo_expand(struct syncache *sc, struct socket **lsop, struct mbuf *m,
+ uint64_t response_cookie)
{
- struct tcpopt to;
- int rc;
+ struct inpcb *inp;
+ struct tcpcb *tp;
+ unsigned int *pending_counter;
- bzero(&to, sizeof(struct tcpopt));
- to.to_mss = toeo->to_mss;
- to.to_wscale = toeo->to_wscale;
- to.to_flags = toeo->to_flags;
-
- INP_INFO_WLOCK(&V_tcbinfo);
- rc = syncache_expand(inc, &to, th, lsop, m);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ /*
+ * Global TCP locks are held because we manipulate the PCB lists
+ * and create a new socket.
+ */
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- return (rc);
+ pending_counter = intotcpcb(sotoinpcb(*lsop))->t_tfo_pending;
+ *lsop = syncache_socket(sc, *lsop, m);
+ if (*lsop == NULL) {
+ TCPSTAT_INC(tcps_sc_aborted);
+ atomic_subtract_int(pending_counter, 1);
+ } else {
+ inp = sotoinpcb(*lsop);
+ tp = intotcpcb(inp);
+ tp->t_flags |= TF_FASTOPEN;
+ tp->t_tfo_cookie = response_cookie;
+ tp->snd_max = tp->iss;
+ tp->snd_nxt = tp->iss;
+ tp->t_tfo_pending = pending_counter;
+ TCPSTAT_INC(tcps_sc_completed);
+ }
}
+#endif /* TCP_RFC7413 */
/*
* Given a LISTEN socket and an inbound SYN request, add
@@ -1025,11 +1165,18 @@
* DoS attack, an attacker could send data which would eventually
* consume all available buffer space if it were ACKed. By not ACKing
* the data, we avoid this DoS scenario.
+ *
+ * The exception to the above is when a SYN with a valid TCP Fast Open (TFO)
+ * cookie is processed, V_tcp_fastopen_enabled set to true, and the
+ * TCP_FASTOPEN socket option is set. In this case, a new socket is created
+ * and returned via lsop, the mbuf is not freed so that tcp_input() can
+ * queue its data to the socket, and 1 is returned to indicate the
+ * TFO-socket-creation path was taken.
*/
-static void
-_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
- struct inpcb *inp, struct socket **lsop, struct mbuf *m,
- struct toe_usrreqs *tu, void *toepcb)
+int
+syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
+ struct inpcb *inp, struct socket **lsop, struct mbuf *m, void *tod,
+ void *todctx)
{
struct tcpcb *tp;
struct socket *so;
@@ -1036,10 +1183,10 @@
struct syncache *sc = NULL;
struct syncache_head *sch;
struct mbuf *ipopts = NULL;
- u_int32_t flowtmp;
u_int ltflags;
int win, sb_hiwat, ip_ttl, ip_tos;
char *s;
+ int rv = 0;
#ifdef INET6
int autoflowlabel = 0;
#endif
@@ -1048,8 +1195,12 @@
#endif
struct syncache scs;
struct ucred *cred;
+#ifdef TCP_RFC7413
+ uint64_t tfo_response_cookie;
+ int tfo_cookie_valid = 0;
+ int tfo_response_cookie_valid = 0;
+#endif
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp); /* listen socket */
KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN,
("%s: unexpected tcp flags", __func__));
@@ -1073,6 +1224,29 @@
sb_hiwat = so->so_rcv.sb_hiwat;
ltflags = (tp->t_flags & (TF_NOOPT | TF_SIGNATURE));
+#ifdef TCP_RFC7413
+ if (V_tcp_fastopen_enabled && (tp->t_flags & TF_FASTOPEN) &&
+ (tp->t_tfo_pending != NULL) && (to->to_flags & TOF_FASTOPEN)) {
+ /*
+ * Limit the number of pending TFO connections to
+ * approximately half of the queue limit. This prevents TFO
+ * SYN floods from starving the service by filling the
+ * listen queue with bogus TFO connections.
+ */
+ if (atomic_fetchadd_int(tp->t_tfo_pending, 1) <=
+ (so->so_qlimit / 2)) {
+ int result;
+
+ result = tcp_fastopen_check_cookie(inc,
+ to->to_tfo_cookie, to->to_tfo_len,
+ &tfo_response_cookie);
+ tfo_cookie_valid = (result > 0);
+ tfo_response_cookie_valid = (result >= 0);
+ } else
+ atomic_subtract_int(tp->t_tfo_pending, 1);
+ }
+#endif
+
/* By the time we drop the lock these should no longer be used. */
so = NULL;
tp = NULL;
@@ -1080,13 +1254,14 @@
#ifdef MAC
if (mac_syncache_init(&maclabel) != 0) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
goto done;
} else
mac_syncache_create(maclabel, inp);
#endif
- INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+#ifdef TCP_RFC7413
+ if (!tfo_cookie_valid)
+#endif
+ INP_WUNLOCK(inp);
/*
* Remember the IP options, if any.
@@ -1115,11 +1290,10 @@
sc = syncache_lookup(inc, &sch); /* returns locked entry */
SCH_LOCK_ASSERT(sch);
if (sc != NULL) {
-#ifndef TCP_OFFLOAD_DISABLE
- if (sc->sc_tu)
- sc->sc_tu->tu_syncache_event(TOE_SC_ENTRY_PRESENT,
- sc->sc_toepcb);
-#endif
+#ifdef TCP_RFC7413
+ if (tfo_cookie_valid)
+ INP_WUNLOCK(inp);
+#endif
TCPSTAT_INC(tcps_sc_dupsyn);
if (ipopts) {
/*
@@ -1152,7 +1326,7 @@
s, __func__);
free(s, M_TCPLOG);
}
- if (!TOEPCB_ISSET(sc) && syncache_respond(sc) == 0) {
+ if (syncache_respond(sc, m) == 0) {
sc->sc_rxmits = 0;
syncache_timeout(sc, sch, 1);
TCPSTAT_INC(tcps_sndacks);
@@ -1162,6 +1336,14 @@
goto done;
}
+#ifdef TCP_RFC7413
+ if (tfo_cookie_valid) {
+ bzero(&scs, sizeof(scs));
+ sc = &scs;
+ goto skip_alloc;
+ }
+#endif
+
sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
if (sc == NULL) {
/*
@@ -1170,8 +1352,10 @@
* entry and insert the new one.
*/
TCPSTAT_INC(tcps_sc_zonefail);
- if ((sc = TAILQ_LAST(&sch->sch_bucket, sch_head)) != NULL)
+ if ((sc = TAILQ_LAST(&sch->sch_bucket, sch_head)) != NULL) {
+ sch->sch_last_overflow = time_uptime;
syncache_drop(sc, sch);
+ }
sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
if (sc == NULL) {
if (V_tcp_syncookies) {
@@ -1185,7 +1369,13 @@
}
}
}
-
+
+#ifdef TCP_RFC7413
+skip_alloc:
+ if (!tfo_cookie_valid && tfo_response_cookie_valid)
+ sc->sc_tfo_cookie = &tfo_response_cookie;
+#endif
+
/*
* Fill in the syncache values.
*/
@@ -1203,9 +1393,9 @@
sc->sc_ip_tos = ip_tos;
sc->sc_ip_ttl = ip_ttl;
}
-#ifndef TCP_OFFLOAD_DISABLE
- sc->sc_tu = tu;
- sc->sc_toepcb = toepcb;
+#ifdef TCP_OFFLOAD
+ sc->sc_tod = tod;
+ sc->sc_todctx = todctx;
#endif
sc->sc_irs = th->th_seq;
sc->sc_iss = arc4random();
@@ -1282,25 +1472,32 @@
if ((th->th_flags & (TH_ECE|TH_CWR)) && V_tcp_do_ecn)
sc->sc_flags |= SCF_ECN;
- if (V_tcp_syncookies) {
- syncookie_generate(sch, sc, &flowtmp);
+ if (V_tcp_syncookies)
+ sc->sc_iss = syncookie_generate(sch, sc);
#ifdef INET6
- if (autoflowlabel)
- sc->sc_flowlabel = flowtmp;
+ if (autoflowlabel) {
+ if (V_tcp_syncookies)
+ sc->sc_flowlabel = sc->sc_iss;
+ else
+ sc->sc_flowlabel = ip6_randomflowlabel();
+ sc->sc_flowlabel = htonl(sc->sc_flowlabel) & IPV6_FLOWLABEL_MASK;
+ }
#endif
- } else {
-#ifdef INET6
- if (autoflowlabel)
- sc->sc_flowlabel =
- (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
-#endif
- }
SCH_UNLOCK(sch);
+#ifdef TCP_RFC7413
+ if (tfo_cookie_valid) {
+ syncache_tfo_expand(sc, lsop, m, tfo_response_cookie);
+ /* INP_WUNLOCK(inp) will be performed by the called */
+ rv = 1;
+ goto tfo_done;
+ }
+#endif
+
/*
* Do a standard 3-way handshake.
*/
- if (TOEPCB_ISSET(sc) || syncache_respond(sc) == 0) {
+ if (syncache_respond(sc, m) == 0) {
if (V_tcp_syncookies && V_tcp_syncookiesonly && sc != &scs)
syncache_free(sc);
else if (sc != &scs)
@@ -1314,6 +1511,13 @@
}
done:
+ if (m) {
+ *lsop = NULL;
+ m_freem(m);
+ }
+#ifdef TCP_RFC7413
+tfo_done:
+#endif
if (cred != NULL)
crfree(cred);
#ifdef MAC
@@ -1320,15 +1524,15 @@
if (sc == &scs)
mac_syncache_destroy(&maclabel);
#endif
- if (m) {
-
- *lsop = NULL;
- m_freem(m);
- }
+ return (rv);
}
+/*
+ * Send SYN|ACK to the peer. Either in response to the peer's SYN,
+ * i.e. m0 != NULL, or upon 3WHS ACK timeout, i.e. m0 == NULL.
+ */
static int
-syncache_respond(struct syncache *sc)
+syncache_respond(struct syncache *sc, const struct mbuf *m0)
{
struct ip *ip = NULL;
struct mbuf *m;
@@ -1348,9 +1552,7 @@
tlen = hlen + sizeof(struct tcphdr);
/* Determine MSS we advertize to other end of connection. */
- mssopt = tcp_mssopt(&sc->sc_inc);
- if (sc->sc_peer_mss)
- mssopt = max( min(sc->sc_peer_mss, mssopt), V_tcp_minmss);
+ mssopt = max(tcp_mssopt(&sc->sc_inc), V_tcp_minmss);
/* XXX: Assume that the entire packet will fit in a header mbuf. */
KASSERT(max_linkhdr + tlen + TCP_MAXOLEN <= MHLEN,
@@ -1357,7 +1559,7 @@
("syncache: mbuf too small"));
/* Create the IP+TCP header from scratch. */
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return (ENOBUFS);
#ifdef MAC
@@ -1391,7 +1593,7 @@
ip = mtod(m, struct ip *);
ip->ip_v = IPVERSION;
ip->ip_hl = sizeof(struct ip) >> 2;
- ip->ip_len = tlen;
+ ip->ip_len = htons(tlen);
ip->ip_id = 0;
ip->ip_off = 0;
ip->ip_sum = 0;
@@ -1409,7 +1611,7 @@
* 2) the SCF_UNREACH flag has been set
*/
if (V_path_mtu_discovery && ((sc->sc_flags & SCF_UNREACH) == 0))
- ip->ip_off |= IP_DF;
+ ip->ip_off |= htons(IP_DF);
th = (struct tcphdr *)(ip + 1);
}
@@ -1452,6 +1654,16 @@
if (sc->sc_flags & SCF_SIGNATURE)
to.to_flags |= TOF_SIGNATURE;
#endif
+
+#ifdef TCP_RFC7413
+ if (sc->sc_tfo_cookie) {
+ to.to_flags |= TOF_FASTOPEN;
+ to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN;
+ to.to_tfo_cookie = sc->sc_tfo_cookie;
+ /* don't send cookie again when retransmitting response */
+ sc->sc_tfo_cookie = NULL;
+ }
+#endif
optlen = tcp_addoptions(&to, (u_char *)(th + 1));
/* Adjust headers by option size. */
@@ -1469,12 +1681,21 @@
ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) + optlen);
else
#endif
- ip->ip_len += optlen;
+ ip->ip_len = htons(ntohs(ip->ip_len) + optlen);
} else
optlen = 0;
M_SETFIB(m, sc->sc_inc.inc_fibnum);
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+ /*
+ * If we have peer's SYN and it has a flowid, then let's assign it to
+ * our SYN|ACK. ip6_output() and ip_output() will not assign flowid
+ * to SYN|ACK due to lack of inp here.
+ */
+ if (m0 != NULL && M_HASHTYPE_GET(m0) != M_HASHTYPE_NONE) {
+ m->m_pkthdr.flowid = m0->m_pkthdr.flowid;
+ M_HASHTYPE_SET(m, M_HASHTYPE_GET(m0));
+ }
#ifdef INET6
if (sc->sc_inc.inc_flags & INC_ISIPV6) {
m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
@@ -1481,6 +1702,15 @@
th->th_sum = in6_cksum_pseudo(ip6, tlen + optlen - hlen,
IPPROTO_TCP, 0);
ip6->ip6_hlim = in6_selecthlim(NULL, NULL);
+#ifdef TCP_OFFLOAD
+ if (ADDED_BY_TOE(sc)) {
+ struct toedev *tod = sc->sc_tod;
+
+ error = tod->tod_syncache_respond(tod, sc->sc_todctx, m);
+
+ return (error);
+ }
+#endif
error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
}
#endif
@@ -1492,6 +1722,15 @@
m->m_pkthdr.csum_flags = CSUM_TCP;
th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htons(tlen + optlen - hlen + IPPROTO_TCP));
+#ifdef TCP_OFFLOAD
+ if (ADDED_BY_TOE(sc)) {
+ struct toedev *tod = sc->sc_tod;
+
+ error = tod->tod_syncache_respond(tod, sc->sc_todctx, m);
+
+ return (error);
+ }
+#endif
error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, NULL);
}
#endif
@@ -1498,239 +1737,261 @@
return (error);
}
-void
-syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
- struct inpcb *inp, struct socket **lsop, struct mbuf *m)
-{
- _syncache_add(inc, to, th, inp, lsop, m, NULL, NULL);
-}
-
-void
-tcp_offload_syncache_add(struct in_conninfo *inc, struct toeopt *toeo,
- struct tcphdr *th, struct inpcb *inp, struct socket **lsop,
- struct toe_usrreqs *tu, void *toepcb)
-{
- struct tcpopt to;
-
- bzero(&to, sizeof(struct tcpopt));
- to.to_mss = toeo->to_mss;
- to.to_wscale = toeo->to_wscale;
- to.to_flags = toeo->to_flags;
-
- INP_INFO_WLOCK(&V_tcbinfo);
- INP_WLOCK(inp);
-
- _syncache_add(inc, &to, th, inp, lsop, NULL, tu, toepcb);
-}
-
/*
- * The purpose of SYN cookies is to avoid keeping track of all SYN's we
- * receive and to be able to handle SYN floods from bogus source addresses
- * (where we will never receive any reply). SYN floods try to exhaust all
- * our memory and available slots in the SYN cache table to cause a denial
- * of service to legitimate users of the local host.
+ * The purpose of syncookies is to handle spoofed SYN flooding DoS attacks
+ * that exceed the capacity of the syncache by avoiding the storage of any
+ * of the SYNs we receive. Syncookies defend against blind SYN flooding
+ * attacks where the attacker does not have access to our responses.
*
- * The idea of SYN cookies is to encode and include all necessary information
- * about the connection setup state within the SYN-ACK we send back and thus
- * to get along without keeping any local state until the ACK to the SYN-ACK
- * arrives (if ever). Everything we need to know should be available from
- * the information we encoded in the SYN-ACK.
+ * Syncookies encode and include all necessary information about the
+ * connection setup within the SYN|ACK that we send back. That way we
+ * can avoid keeping any local state until the ACK to our SYN|ACK returns
+ * (if ever). Normally the syncache and syncookies are running in parallel
+ * with the latter taking over when the former is exhausted. When matching
+ * syncache entry is found the syncookie is ignored.
*
- * More information about the theory behind SYN cookies and its first
- * discussion and specification can be found at:
- * http://cr.yp.to/syncookies.html (overview)
- * http://cr.yp.to/syncookies/archive (gory details)
+ * The only reliable information persisting the 3WHS is our inital sequence
+ * number ISS of 32 bits. Syncookies embed a cryptographically sufficient
+ * strong hash (MAC) value and a few bits of TCP SYN options in the ISS
+ * of our SYN|ACK. The MAC can be recomputed when the ACK to our SYN|ACK
+ * returns and signifies a legitimate connection if it matches the ACK.
*
- * This implementation extends the orginal idea and first implementation
- * of FreeBSD by using not only the initial sequence number field to store
- * information but also the timestamp field if present. This way we can
- * keep track of the entire state we need to know to recreate the session in
- * its original form. Almost all TCP speakers implement RFC1323 timestamps
- * these days. For those that do not we still have to live with the known
- * shortcomings of the ISN only SYN cookies.
+ * The available space of 32 bits to store the hash and to encode the SYN
+ * option information is very tight and we should have at least 24 bits for
+ * the MAC to keep the number of guesses by blind spoofing reasonably high.
*
- * Cookie layers:
+ * SYN option information we have to encode to fully restore a connection:
+ * MSS: is imporant to chose an optimal segment size to avoid IP level
+ * fragmentation along the path. The common MSS values can be encoded
+ * in a 3-bit table. Uncommon values are captured by the next lower value
+ * in the table leading to a slight increase in packetization overhead.
+ * WSCALE: is necessary to allow large windows to be used for high delay-
+ * bandwidth product links. Not scaling the window when it was initially
+ * negotiated is bad for performance as lack of scaling further decreases
+ * the apparent available send window. We only need to encode the WSCALE
+ * we received from the remote end. Our end can be recalculated at any
+ * time. The common WSCALE values can be encoded in a 3-bit table.
+ * Uncommon values are captured by the next lower value in the table
+ * making us under-estimate the available window size halving our
+ * theoretically possible maximum throughput for that connection.
+ * SACK: Greatly assists in packet loss recovery and requires 1 bit.
+ * TIMESTAMP and SIGNATURE is not encoded because they are permanent options
+ * that are included in all segments on a connection. We enable them when
+ * the ACK has them.
*
- * Initial sequence number we send:
- * 31|................................|0
- * DDDDDDDDDDDDDDDDDDDDDDDDDMMMRRRP
- * D = MD5 Digest (first dword)
- * M = MSS index
- * R = Rotation of secret
- * P = Odd or Even secret
+ * Security of syncookies and attack vectors:
*
- * The MD5 Digest is computed with over following parameters:
- * a) randomly rotated secret
- * b) struct in_conninfo containing the remote/local ip/port (IPv4&IPv6)
- * c) the received initial sequence number from remote host
- * d) the rotation offset and odd/even bit
+ * The MAC is computed over (faddr||laddr||fport||lport||irs||flags||secmod)
+ * together with the gloabl secret to make it unique per connection attempt.
+ * Thus any change of any of those parameters results in a different MAC output
+ * in an unpredictable way unless a collision is encountered. 24 bits of the
+ * MAC are embedded into the ISS.
*
- * Timestamp we send:
- * 31|................................|0
- * DDDDDDDDDDDDDDDDDDDDDDSSSSRRRRA5
- * D = MD5 Digest (third dword) (only as filler)
- * S = Requested send window scale
- * R = Requested receive window scale
- * A = SACK allowed
- * 5 = TCP-MD5 enabled (not implemented yet)
- * XORed with MD5 Digest (forth dword)
+ * To prevent replay attacks two rotating global secrets are updated with a
+ * new random value every 15 seconds. The life-time of a syncookie is thus
+ * 15-30 seconds.
*
- * The timestamp isn't cryptographically secure and doesn't need to be.
- * The double use of the MD5 digest dwords ties it to a specific remote/
- * local host/port, remote initial sequence number and our local time
- * limited secret. A received timestamp is reverted (XORed) and then
- * the contained MD5 dword is compared to the computed one to ensure the
- * timestamp belongs to the SYN-ACK we sent. The other parameters may
- * have been tampered with but this isn't different from supplying bogus
- * values in the SYN in the first place.
+ * Vector 1: Attacking the secret. This requires finding a weakness in the
+ * MAC itself or the way it is used here. The attacker can do a chosen plain
+ * text attack by varying and testing the all parameters under his control.
+ * The strength depends on the size and randomness of the secret, and the
+ * cryptographic security of the MAC function. Due to the constant updating
+ * of the secret the attacker has at most 29.999 seconds to find the secret
+ * and launch spoofed connections. After that he has to start all over again.
*
- * Some problems with SYN cookies remain however:
- * Consider the problem of a recreated (and retransmitted) cookie. If the
- * original SYN was accepted, the connection is established. The second
- * SYN is inflight, and if it arrives with an ISN that falls within the
- * receive window, the connection is killed.
+ * Vector 2: Collision attack on the MAC of a single ACK. With a 24 bit MAC
+ * size an average of 4,823 attempts are required for a 50% chance of success
+ * to spoof a single syncookie (birthday collision paradox). However the
+ * attacker is blind and doesn't know if one of his attempts succeeded unless
+ * he has a side channel to interfere success from. A single connection setup
+ * success average of 90% requires 8,790 packets, 99.99% requires 17,578 packets.
+ * This many attempts are required for each one blind spoofed connection. For
+ * every additional spoofed connection he has to launch another N attempts.
+ * Thus for a sustained rate 100 spoofed connections per second approximately
+ * 1,800,000 packets per second would have to be sent.
*
- * Notes:
- * A heuristic to determine when to accept syn cookies is not necessary.
- * An ACK flood would cause the syncookie verification to be attempted,
- * but a SYN flood causes syncookies to be generated. Both are of equal
- * cost, so there's no point in trying to optimize the ACK flood case.
- * Also, if you don't process certain ACKs for some reason, then all someone
- * would have to do is launch a SYN and ACK flood at the same time, which
- * would stop cookie verification and defeat the entire purpose of syncookies.
+ * NB: The MAC function should be fast so that it doesn't become a CPU
+ * exhaustion attack vector itself.
+ *
+ * References:
+ * RFC4987 TCP SYN Flooding Attacks and Common Mitigations
+ * SYN cookies were first proposed by cryptographer Dan J. Bernstein in 1996
+ * http://cr.yp.to/syncookies.html (overview)
+ * http://cr.yp.to/syncookies/archive (details)
+ *
+ *
+ * Schematic construction of a syncookie enabled Initial Sequence Number:
+ * 0 1 2 3
+ * 12345678901234567890123456789012
+ * |xxxxxxxxxxxxxxxxxxxxxxxxWWWMMMSP|
+ *
+ * x 24 MAC (truncated)
+ * W 3 Send Window Scale index
+ * M 3 MSS index
+ * S 1 SACK permitted
+ * P 1 Odd/even secret
*/
-static int tcp_sc_msstab[] = { 0, 256, 468, 536, 996, 1452, 1460, 8960 };
-static void
-syncookie_generate(struct syncache_head *sch, struct syncache *sc,
- u_int32_t *flowlabel)
+/*
+ * Distribution and probability of certain MSS values. Those in between are
+ * rounded down to the next lower one.
+ * [An Analysis of TCP Maximum Segment Sizes, S. Alcock and R. Nelson, 2011]
+ * .2% .3% 5% 7% 7% 20% 15% 45%
+ */
+static int tcp_sc_msstab[] = { 216, 536, 1200, 1360, 1400, 1440, 1452, 1460 };
+
+/*
+ * Distribution and probability of certain WSCALE values. We have to map the
+ * (send) window scale (shift) option with a range of 0-14 from 4 bits into 3
+ * bits based on prevalence of certain values. Where we don't have an exact
+ * match for are rounded down to the next lower one letting us under-estimate
+ * the true available window. At the moment this would happen only for the
+ * very uncommon values 3, 5 and those above 8 (more than 16MB socket buffer
+ * and window size). The absence of the WSCALE option (no scaling in either
+ * direction) is encoded with index zero.
+ * [WSCALE values histograms, Allman, 2012]
+ * X 10 10 35 5 6 14 10% by host
+ * X 11 4 5 5 18 49 3% by connections
+ */
+static int tcp_sc_wstab[] = { 0, 0, 1, 2, 4, 6, 7, 8 };
+
+/*
+ * Compute the MAC for the SYN cookie. SIPHASH-2-4 is chosen for its speed
+ * and good cryptographic properties.
+ */
+static uint32_t
+syncookie_mac(struct in_conninfo *inc, tcp_seq irs, uint8_t flags,
+ uint8_t *secbits, uintptr_t secmod)
{
- MD5_CTX ctx;
- u_int32_t md5_buffer[MD5_DIGEST_LENGTH / sizeof(u_int32_t)];
- u_int32_t data;
- u_int32_t *secbits;
- u_int off, pmss, mss;
- int i;
+ SIPHASH_CTX ctx;
+ uint32_t siphash[2];
+ SipHash24_Init(&ctx);
+ SipHash_SetKey(&ctx, secbits);
+ switch (inc->inc_flags & INC_ISIPV6) {
+#ifdef INET
+ case 0:
+ SipHash_Update(&ctx, &inc->inc_faddr, sizeof(inc->inc_faddr));
+ SipHash_Update(&ctx, &inc->inc_laddr, sizeof(inc->inc_laddr));
+ break;
+#endif
+#ifdef INET6
+ case INC_ISIPV6:
+ SipHash_Update(&ctx, &inc->inc6_faddr, sizeof(inc->inc6_faddr));
+ SipHash_Update(&ctx, &inc->inc6_laddr, sizeof(inc->inc6_laddr));
+ break;
+#endif
+ }
+ SipHash_Update(&ctx, &inc->inc_fport, sizeof(inc->inc_fport));
+ SipHash_Update(&ctx, &inc->inc_lport, sizeof(inc->inc_lport));
+ SipHash_Update(&ctx, &irs, sizeof(irs));
+ SipHash_Update(&ctx, &flags, sizeof(flags));
+ SipHash_Update(&ctx, &secmod, sizeof(secmod));
+ SipHash_Final((u_int8_t *)&siphash, &ctx);
+
+ return (siphash[0] ^ siphash[1]);
+}
+
+static tcp_seq
+syncookie_generate(struct syncache_head *sch, struct syncache *sc)
+{
+ u_int i, secbit, wscale;
+ uint32_t iss, hash;
+ uint8_t *secbits;
+ union syncookie cookie;
+
SCH_LOCK_ASSERT(sch);
- /* Which of the two secrets to use. */
- secbits = sch->sch_oddeven ?
- sch->sch_secbits_odd : sch->sch_secbits_even;
+ cookie.cookie = 0;
- /* Reseed secret if too old. */
- if (sch->sch_reseed < time_uptime) {
- sch->sch_oddeven = sch->sch_oddeven ? 0 : 1; /* toggle */
- secbits = sch->sch_oddeven ?
- sch->sch_secbits_odd : sch->sch_secbits_even;
- for (i = 0; i < SYNCOOKIE_SECRET_SIZE; i++)
- secbits[i] = arc4random();
- sch->sch_reseed = time_uptime + SYNCOOKIE_LIFETIME;
+ /* Map our computed MSS into the 3-bit index. */
+ for (i = sizeof(tcp_sc_msstab) / sizeof(*tcp_sc_msstab) - 1;
+ tcp_sc_msstab[i] > sc->sc_peer_mss && i > 0;
+ i--)
+ ;
+ cookie.flags.mss_idx = i;
+
+ /*
+ * Map the send window scale into the 3-bit index but only if
+ * the wscale option was received.
+ */
+ if (sc->sc_flags & SCF_WINSCALE) {
+ wscale = sc->sc_requested_s_scale;
+ for (i = sizeof(tcp_sc_wstab) / sizeof(*tcp_sc_wstab) - 1;
+ tcp_sc_wstab[i] > wscale && i > 0;
+ i--)
+ ;
+ cookie.flags.wscale_idx = i;
}
- /* Secret rotation offset. */
- off = sc->sc_iss & 0x7; /* iss was randomized before */
+ /* Can we do SACK? */
+ if (sc->sc_flags & SCF_SACK)
+ cookie.flags.sack_ok = 1;
- /* Maximum segment size calculation. */
- pmss =
- max( min(sc->sc_peer_mss, tcp_mssopt(&sc->sc_inc)), V_tcp_minmss);
- for (mss = sizeof(tcp_sc_msstab) / sizeof(int) - 1; mss > 0; mss--)
- if (tcp_sc_msstab[mss] <= pmss)
- break;
+ /* Which of the two secrets to use. */
+ secbit = sch->sch_sc->secret.oddeven & 0x1;
+ cookie.flags.odd_even = secbit;
- /* Fold parameters and MD5 digest into the ISN we will send. */
- data = sch->sch_oddeven;/* odd or even secret, 1 bit */
- data |= off << 1; /* secret offset, derived from iss, 3 bits */
- data |= mss << 4; /* mss, 3 bits */
+ secbits = sch->sch_sc->secret.key[secbit];
+ hash = syncookie_mac(&sc->sc_inc, sc->sc_irs, cookie.cookie, secbits,
+ (uintptr_t)sch);
- MD5Init(&ctx);
- MD5Update(&ctx, ((u_int8_t *)secbits) + off,
- SYNCOOKIE_SECRET_SIZE * sizeof(*secbits) - off);
- MD5Update(&ctx, secbits, off);
- MD5Update(&ctx, &sc->sc_inc, sizeof(sc->sc_inc));
- MD5Update(&ctx, &sc->sc_irs, sizeof(sc->sc_irs));
- MD5Update(&ctx, &data, sizeof(data));
- MD5Final((u_int8_t *)&md5_buffer, &ctx);
+ /*
+ * Put the flags into the hash and XOR them to get better ISS number
+ * variance. This doesn't enhance the cryptographic strength and is
+ * done to prevent the 8 cookie bits from showing up directly on the
+ * wire.
+ */
+ iss = hash & ~0xff;
+ iss |= cookie.cookie ^ (hash >> 24);
- data |= (md5_buffer[0] << 7);
- sc->sc_iss = data;
-
-#ifdef INET6
- *flowlabel = md5_buffer[1] & IPV6_FLOWLABEL_MASK;
-#endif
-
- /* Additional parameters are stored in the timestamp if present. */
+ /* Randomize the timestamp. */
if (sc->sc_flags & SCF_TIMESTAMP) {
- data = ((sc->sc_flags & SCF_SIGNATURE) ? 1 : 0); /* TCP-MD5, 1 bit */
- data |= ((sc->sc_flags & SCF_SACK) ? 1 : 0) << 1; /* SACK, 1 bit */
- data |= sc->sc_requested_s_scale << 2; /* SWIN scale, 4 bits */
- data |= sc->sc_requested_r_scale << 6; /* RWIN scale, 4 bits */
- data |= md5_buffer[2] << 10; /* more digest bits */
- data ^= md5_buffer[3];
- sc->sc_ts = data;
- sc->sc_tsoff = data - tcp_ts_getticks(); /* after XOR */
+ sc->sc_ts = arc4random();
+ sc->sc_tsoff = sc->sc_ts - tcp_ts_getticks();
}
TCPSTAT_INC(tcps_sc_sendcookie);
+ return (iss);
}
static struct syncache *
syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch,
- struct syncache *sc, struct tcpopt *to, struct tcphdr *th,
- struct socket *so)
+ struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
+ struct socket *lso)
{
- MD5_CTX ctx;
- u_int32_t md5_buffer[MD5_DIGEST_LENGTH / sizeof(u_int32_t)];
- u_int32_t data = 0;
- u_int32_t *secbits;
+ uint32_t hash;
+ uint8_t *secbits;
tcp_seq ack, seq;
- int off, mss, wnd, flags;
+ int wnd, wscale = 0;
+ union syncookie cookie;
SCH_LOCK_ASSERT(sch);
/*
- * Pull information out of SYN-ACK/ACK and
- * revert sequence number advances.
+ * Pull information out of SYN-ACK/ACK and revert sequence number
+ * advances.
*/
ack = th->th_ack - 1;
seq = th->th_seq - 1;
- off = (ack >> 1) & 0x7;
- mss = (ack >> 4) & 0x7;
- flags = ack & 0x7f;
- /* Which of the two secrets to use. */
- secbits = (flags & 0x1) ? sch->sch_secbits_odd : sch->sch_secbits_even;
-
/*
- * The secret wasn't updated for the lifetime of a syncookie,
- * so this SYN-ACK/ACK is either too old (replay) or totally bogus.
+ * Unpack the flags containing enough information to restore the
+ * connection.
*/
- if (sch->sch_reseed + SYNCOOKIE_LIFETIME < time_uptime) {
- return (NULL);
- }
+ cookie.cookie = (ack & 0xff) ^ (ack >> 24);
- /* Recompute the digest so we can compare it. */
- MD5Init(&ctx);
- MD5Update(&ctx, ((u_int8_t *)secbits) + off,
- SYNCOOKIE_SECRET_SIZE * sizeof(*secbits) - off);
- MD5Update(&ctx, secbits, off);
- MD5Update(&ctx, inc, sizeof(*inc));
- MD5Update(&ctx, &seq, sizeof(seq));
- MD5Update(&ctx, &flags, sizeof(flags));
- MD5Final((u_int8_t *)&md5_buffer, &ctx);
+ /* Which of the two secrets to use. */
+ secbits = sch->sch_sc->secret.key[cookie.flags.odd_even];
- /* Does the digest part of or ACK'ed ISS match? */
- if ((ack & (~0x7f)) != (md5_buffer[0] << 7))
+ hash = syncookie_mac(inc, seq, cookie.cookie, secbits, (uintptr_t)sch);
+
+ /* The recomputed hash matches the ACK if this was a genuine cookie. */
+ if ((ack & ~0xff) != (hash & ~0xff))
return (NULL);
- /* Does the digest part of our reflected timestamp match? */
- if (to->to_flags & TOF_TS) {
- data = md5_buffer[3] ^ to->to_tsecr;
- if ((data & (~0x3ff)) != (md5_buffer[2] << 10))
- return (NULL);
- }
-
/* Fill in the syncache values. */
+ sc->sc_flags = 0;
bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo));
sc->sc_ipopts = NULL;
@@ -1737,52 +1998,125 @@
sc->sc_irs = seq;
sc->sc_iss = ack;
+ switch (inc->inc_flags & INC_ISIPV6) {
+#ifdef INET
+ case 0:
+ sc->sc_ip_ttl = sotoinpcb(lso)->inp_ip_ttl;
+ sc->sc_ip_tos = sotoinpcb(lso)->inp_ip_tos;
+ break;
+#endif
#ifdef INET6
- if (inc->inc_flags & INC_ISIPV6) {
- if (sotoinpcb(so)->inp_flags & IN6P_AUTOFLOWLABEL)
- sc->sc_flowlabel = md5_buffer[1] & IPV6_FLOWLABEL_MASK;
- } else
+ case INC_ISIPV6:
+ if (sotoinpcb(lso)->inp_flags & IN6P_AUTOFLOWLABEL)
+ sc->sc_flowlabel = sc->sc_iss & IPV6_FLOWLABEL_MASK;
+ break;
#endif
- {
- sc->sc_ip_ttl = sotoinpcb(so)->inp_ip_ttl;
- sc->sc_ip_tos = sotoinpcb(so)->inp_ip_tos;
}
- /* Additional parameters that were encoded in the timestamp. */
- if (data) {
+ sc->sc_peer_mss = tcp_sc_msstab[cookie.flags.mss_idx];
+
+ /* We can simply recompute receive window scale we sent earlier. */
+ while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < sb_max)
+ wscale++;
+
+ /* Only use wscale if it was enabled in the orignal SYN. */
+ if (cookie.flags.wscale_idx > 0) {
+ sc->sc_requested_r_scale = wscale;
+ sc->sc_requested_s_scale = tcp_sc_wstab[cookie.flags.wscale_idx];
+ sc->sc_flags |= SCF_WINSCALE;
+ }
+
+ wnd = sbspace(&lso->so_rcv);
+ wnd = imax(wnd, 0);
+ wnd = imin(wnd, TCP_MAXWIN);
+ sc->sc_wnd = wnd;
+
+ if (cookie.flags.sack_ok)
+ sc->sc_flags |= SCF_SACK;
+
+ if (to->to_flags & TOF_TS) {
sc->sc_flags |= SCF_TIMESTAMP;
sc->sc_tsreflect = to->to_tsval;
sc->sc_ts = to->to_tsecr;
sc->sc_tsoff = to->to_tsecr - tcp_ts_getticks();
- sc->sc_flags |= (data & 0x1) ? SCF_SIGNATURE : 0;
- sc->sc_flags |= ((data >> 1) & 0x1) ? SCF_SACK : 0;
- sc->sc_requested_s_scale = min((data >> 2) & 0xf,
- TCP_MAX_WINSHIFT);
- sc->sc_requested_r_scale = min((data >> 6) & 0xf,
- TCP_MAX_WINSHIFT);
- if (sc->sc_requested_s_scale || sc->sc_requested_r_scale)
- sc->sc_flags |= SCF_WINSCALE;
- } else
- sc->sc_flags |= SCF_NOOPT;
+ }
- wnd = sbspace(&so->so_rcv);
- wnd = imax(wnd, 0);
- wnd = imin(wnd, TCP_MAXWIN);
- sc->sc_wnd = wnd;
+ if (to->to_flags & TOF_SIGNATURE)
+ sc->sc_flags |= SCF_SIGNATURE;
sc->sc_rxmits = 0;
- sc->sc_peer_mss = tcp_sc_msstab[mss];
TCPSTAT_INC(tcps_sc_recvcookie);
return (sc);
}
+#ifdef INVARIANTS
+static int
+syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch,
+ struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
+ struct socket *lso)
+{
+ struct syncache scs, *scx;
+ char *s;
+
+ bzero(&scs, sizeof(scs));
+ scx = syncookie_lookup(inc, sch, &scs, th, to, lso);
+
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL)) == NULL)
+ return (0);
+
+ if (scx != NULL) {
+ if (sc->sc_peer_mss != scx->sc_peer_mss)
+ log(LOG_DEBUG, "%s; %s: mss different %i vs %i\n",
+ s, __func__, sc->sc_peer_mss, scx->sc_peer_mss);
+
+ if (sc->sc_requested_r_scale != scx->sc_requested_r_scale)
+ log(LOG_DEBUG, "%s; %s: rwscale different %i vs %i\n",
+ s, __func__, sc->sc_requested_r_scale,
+ scx->sc_requested_r_scale);
+
+ if (sc->sc_requested_s_scale != scx->sc_requested_s_scale)
+ log(LOG_DEBUG, "%s; %s: swscale different %i vs %i\n",
+ s, __func__, sc->sc_requested_s_scale,
+ scx->sc_requested_s_scale);
+
+ if ((sc->sc_flags & SCF_SACK) != (scx->sc_flags & SCF_SACK))
+ log(LOG_DEBUG, "%s; %s: SACK different\n", s, __func__);
+ }
+
+ if (s != NULL)
+ free(s, M_TCPLOG);
+ return (0);
+}
+#endif /* INVARIANTS */
+
+static void
+syncookie_reseed(void *arg)
+{
+ struct tcp_syncache *sc = arg;
+ uint8_t *secbits;
+ int secbit;
+
+ /*
+ * Reseeding the secret doesn't have to be protected by a lock.
+ * It only must be ensured that the new random values are visible
+ * to all CPUs in a SMP environment. The atomic with release
+ * semantics ensures that.
+ */
+ secbit = (sc->secret.oddeven & 0x1) ? 0 : 1;
+ secbits = sc->secret.key[secbit];
+ arc4rand(secbits, SYNCOOKIE_SECRET_SIZE, 0);
+ atomic_add_rel_int(&sc->secret.oddeven, 1);
+
+ /* Reschedule ourself. */
+ callout_schedule(&sc->secret.reseed, SYNCOOKIE_LIFETIME * hz);
+}
+
/*
* Returns the current number of syncache entries. This number
* will probably change before you get around to calling
* syncache_pcblist.
*/
-
int
syncache_pcbcount(void)
{
Modified: trunk/sys/netinet/tcp_syncache.h
===================================================================
--- trunk/sys/netinet/tcp_syncache.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_syncache.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_var.h 8.4 (Berkeley) 5/24/95
- * $FreeBSD: stable/9/sys/netinet/tcp_syncache.h 224151 2011-07-17 21:15:20Z bz $
+ * $FreeBSD: stable/10/sys/netinet/tcp_syncache.h 322315 2017-08-09 13:26:12Z tuexen $
*/
#ifndef _NETINET_TCP_SYNCACHE_H_
@@ -35,8 +35,6 @@
#define _NETINET_TCP_SYNCACHE_H_
#ifdef _KERNEL
-struct toeopt;
-
void syncache_init(void);
#ifdef VIMAGE
void syncache_destroy(void);
@@ -44,14 +42,9 @@
void syncache_unreach(struct in_conninfo *, struct tcphdr *);
int syncache_expand(struct in_conninfo *, struct tcpopt *,
struct tcphdr *, struct socket **, struct mbuf *);
-int tcp_offload_syncache_expand(struct in_conninfo *inc, struct toeopt *toeo,
- struct tcphdr *th, struct socket **lsop, struct mbuf *m);
-void syncache_add(struct in_conninfo *, struct tcpopt *,
- struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *);
-void tcp_offload_syncache_add(struct in_conninfo *, struct toeopt *,
- struct tcphdr *, struct inpcb *, struct socket **,
- struct toe_usrreqs *tu, void *toepcb);
-
+int syncache_add(struct in_conninfo *, struct tcpopt *,
+ struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *,
+ void *, void *);
void syncache_chkrst(struct in_conninfo *, struct tcphdr *);
void syncache_badack(struct in_conninfo *);
int syncache_pcbcount(void);
@@ -76,13 +69,16 @@
u_int8_t sc_requested_s_scale:4,
sc_requested_r_scale:4;
u_int16_t sc_flags;
-#ifndef TCP_OFFLOAD_DISABLE
- struct toe_usrreqs *sc_tu; /* TOE operations */
- void *sc_toepcb; /* TOE protocol block */
-#endif
+#if defined(TCP_OFFLOAD) || !defined(TCP_OFFLOAD_DISABLE)
+ struct toedev *sc_tod; /* entry added by this TOE */
+ void *sc_todctx; /* TOE driver context */
+#endif
struct label *sc_label; /* MAC label reference */
struct ucred *sc_cred; /* cred cache for jail checks */
-
+#ifdef TCP_RFC7413
+ void *sc_tfo_cookie; /* for TCP Fast Open response */
+#endif
+ void *sc_pspare; /* TCP_SIGNATURE */
u_int32_t sc_spare[2]; /* UTO */
};
@@ -98,22 +94,26 @@
#define SCF_SACK 0x80 /* send SACK option */
#define SCF_ECN 0x100 /* send ECN setup packet */
-#define SYNCOOKIE_SECRET_SIZE 8 /* dwords */
-#define SYNCOOKIE_LIFETIME 16 /* seconds */
-
struct syncache_head {
- struct vnet *sch_vnet;
struct mtx sch_mtx;
TAILQ_HEAD(sch_head, syncache) sch_bucket;
struct callout sch_timer;
int sch_nextc;
u_int sch_length;
- u_int sch_oddeven;
- u_int32_t sch_secbits_odd[SYNCOOKIE_SECRET_SIZE];
- u_int32_t sch_secbits_even[SYNCOOKIE_SECRET_SIZE];
- u_int sch_reseed; /* time_uptime, seconds */
+ struct tcp_syncache *sch_sc;
+ time_t sch_last_overflow;
};
+#define SYNCOOKIE_SECRET_SIZE 16
+#define SYNCOOKIE_LIFETIME 15 /* seconds */
+
+struct syncookie_secret {
+ volatile u_int oddeven;
+ uint8_t key[2][SYNCOOKIE_SECRET_SIZE];
+ struct callout reseed;
+ u_int lifetime;
+};
+
struct tcp_syncache {
struct syncache_head *hashbase;
uma_zone_t zone;
@@ -120,11 +120,23 @@
u_int hashsize;
u_int hashmask;
u_int bucket_limit;
- u_int cache_count; /* XXX: unprotected */
u_int cache_limit;
u_int rexmt_limit;
u_int hash_secret;
+ struct vnet *vnet;
+ struct syncookie_secret secret;
};
+/* Internal use for the syncookie functions. */
+union syncookie {
+ uint8_t cookie;
+ struct {
+ uint8_t odd_even:1,
+ sack_ok:1,
+ wscale_idx:3,
+ mss_idx:3;
+ } flags;
+};
+
#endif /* _KERNEL */
#endif /* !_NETINET_TCP_SYNCACHE_H_ */
Modified: trunk/sys/netinet/tcp_timer.c
===================================================================
--- trunk/sys/netinet/tcp_timer.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_timer.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,8 +31,9 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_timer.c 247498 2013-02-28 21:24:10Z jhb $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_timer.c 330303 2018-03-03 00:54:12Z jhb $");
+#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_tcpdebug.h"
@@ -63,11 +64,22 @@
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
+#ifdef INET6
+#include <netinet6/tcp6_var.h>
+#endif
#include <netinet/tcpip.h>
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif
+int tcp_persmin;
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmin, CTLTYPE_INT|CTLFLAG_RW,
+ &tcp_persmin, 0, sysctl_msec_to_ticks, "I", "minimum persistence interval");
+
+int tcp_persmax;
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmax, CTLTYPE_INT|CTLFLAG_RW,
+ &tcp_persmax, 0, sysctl_msec_to_ticks, "I", "maximum persistence interval");
+
int tcp_keepinit;
SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
&tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection");
@@ -99,9 +111,10 @@
&tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
"Retransmission Timer Slop");
-static int always_keepalive = 1;
+int tcp_always_keepalive = 1;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
- &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
+ &tcp_always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
+__strong_reference(tcp_always_keepalive, always_keepalive);
int tcp_fast_finwait2_recycle = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW,
@@ -119,11 +132,59 @@
/* max idle probes */
int tcp_maxpersistidle;
-static int tcp_rexmit_drop_options = 1;
+static int tcp_rexmit_drop_options = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW,
&tcp_rexmit_drop_options, 0,
"Drop TCP options from 3rd and later retransmitted SYN");
+static VNET_DEFINE(int, tcp_pmtud_blackhole_detect);
+#define V_tcp_pmtud_blackhole_detect VNET(tcp_pmtud_blackhole_detect)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection,
+ CTLFLAG_RW,
+ &VNET_NAME(tcp_pmtud_blackhole_detect), 0,
+ "Path MTU Discovery Black Hole Detection Enabled");
+
+static VNET_DEFINE(int, tcp_pmtud_blackhole_activated);
+#define V_tcp_pmtud_blackhole_activated \
+ VNET(tcp_pmtud_blackhole_activated)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated,
+ CTLFLAG_RD,
+ &VNET_NAME(tcp_pmtud_blackhole_activated), 0,
+ "Path MTU Discovery Black Hole Detection, Activation Count");
+
+static VNET_DEFINE(int, tcp_pmtud_blackhole_activated_min_mss);
+#define V_tcp_pmtud_blackhole_activated_min_mss \
+ VNET(tcp_pmtud_blackhole_activated_min_mss)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated_min_mss,
+ CTLFLAG_RD,
+ &VNET_NAME(tcp_pmtud_blackhole_activated_min_mss), 0,
+ "Path MTU Discovery Black Hole Detection, Activation Count at min MSS");
+
+static VNET_DEFINE(int, tcp_pmtud_blackhole_failed);
+#define V_tcp_pmtud_blackhole_failed VNET(tcp_pmtud_blackhole_failed)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_failed,
+ CTLFLAG_RD,
+ &VNET_NAME(tcp_pmtud_blackhole_failed), 0,
+ "Path MTU Discovery Black Hole Detection, Failure Count");
+
+#ifdef INET
+static VNET_DEFINE(int, tcp_pmtud_blackhole_mss) = 1200;
+#define V_tcp_pmtud_blackhole_mss VNET(tcp_pmtud_blackhole_mss)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss,
+ CTLFLAG_RW,
+ &VNET_NAME(tcp_pmtud_blackhole_mss), 0,
+ "Path MTU Discovery Black Hole Detection lowered MSS");
+#endif
+
+#ifdef INET6
+static VNET_DEFINE(int, tcp_v6pmtud_blackhole_mss) = 1220;
+#define V_tcp_v6pmtud_blackhole_mss VNET(tcp_v6pmtud_blackhole_mss)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, v6pmtud_blackhole_mss,
+ CTLFLAG_RW,
+ &VNET_NAME(tcp_v6pmtud_blackhole_mss), 0,
+ "Path MTU Discovery IPv6 Black Hole Detection lowered MSS");
+#endif
+
static int per_cpu_timers = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
&per_cpu_timers , 0, "run tcp timers on all cpus");
@@ -144,9 +205,7 @@
VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
- INP_INFO_WLOCK(&V_tcbinfo);
(void) tcp_tw_2msl_scan(0);
- INP_INFO_WUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK_NOSLEEP();
@@ -160,10 +219,6 @@
static int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */
-static int tcp_timer_race;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race,
- 0, "Count of t_inpcb races on tcp_discardcb");
-
/*
* TCP timer processing.
*/
@@ -176,18 +231,7 @@
CURVNET_SET(tp->t_vnet);
inp = tp->t_inpcb;
- /*
- * XXXRW: While this assert is in fact correct, bugs in the tcpcb
- * tear-down mean we need it as a work-around for races between
- * timers and tcp_discardcb().
- *
- * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL"));
- */
- if (inp == NULL) {
- tcp_timer_race++;
- CURVNET_RESTORE();
- return;
- }
+ KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
INP_WLOCK(inp);
if (callout_pending(&tp->t_timers->tt_delack) ||
!callout_active(&tp->t_timers->tt_delack)) {
@@ -201,6 +245,10 @@
CURVNET_RESTORE();
return;
}
+ KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
+ ("%s: tp %p tcpcb can't be stopped here", __func__, tp));
+ KASSERT((tp->t_timers->tt_flags & TT_DELACK) != 0,
+ ("%s: tp %p delack callout should be running", __func__, tp));
tp->t_flags |= TF_ACKNOW;
TCPSTAT_INC(tcps_delack);
@@ -220,30 +268,15 @@
ostate = tp->t_state;
#endif
- /*
- * XXXRW: Does this actually happen?
- */
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
inp = tp->t_inpcb;
- /*
- * XXXRW: While this assert is in fact correct, bugs in the tcpcb
- * tear-down mean we need it as a work-around for races between
- * timers and tcp_discardcb().
- *
- * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL"));
- */
- if (inp == NULL) {
- tcp_timer_race++;
- INP_INFO_WUNLOCK(&V_tcbinfo);
- CURVNET_RESTORE();
- return;
- }
+ KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
INP_WLOCK(inp);
tcp_free_sackholes(tp);
if (callout_pending(&tp->t_timers->tt_2msl) ||
!callout_active(&tp->t_timers->tt_2msl)) {
INP_WUNLOCK(tp->t_inpcb);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
}
@@ -250,20 +283,33 @@
callout_deactivate(&tp->t_timers->tt_2msl);
if ((inp->inp_flags & INP_DROPPED) != 0) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
}
+ KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
+ ("%s: tp %p tcpcb can't be stopped here", __func__, tp));
+ KASSERT((tp->t_timers->tt_flags & TT_2MSL) != 0,
+ ("%s: tp %p 2msl callout should be running", __func__, tp));
/*
* 2 MSL timeout in shutdown went off. If we're closed but
* still waiting for peer to close and connection has been idle
- * too long, or if 2MSL time is up from TIME_WAIT, delete connection
- * control block. Otherwise, check again in a bit.
+ * too long delete connection control block. Otherwise, check
+ * again in a bit.
*
+ * If in TIME_WAIT state just ignore as this timeout is handled in
+ * tcp_tw_2msl_scan().
+ *
* If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed,
* there's no point in hanging onto FIN_WAIT_2 socket. Just close it.
* Ignore fact that there were recent incoming segments.
*/
+ if ((inp->inp_flags & INP_TIMEWAIT) != 0) {
+ INP_WUNLOCK(inp);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
+ CURVNET_RESTORE();
+ return;
+ }
if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
tp->t_inpcb && tp->t_inpcb->inp_socket &&
(tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
@@ -270,11 +316,12 @@
TCPSTAT_INC(tcps_finwait2_drops);
tp = tcp_close(tp);
} else {
- if (tp->t_state != TCPS_TIME_WAIT &&
- ticks - tp->t_rcvtime <= TP_MAXIDLE(tp))
- callout_reset_on(&tp->t_timers->tt_2msl,
- TP_KEEPINTVL(tp), tcp_timer_2msl, tp, INP_CPU(inp));
- else
+ if (ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) {
+ if (!callout_reset(&tp->t_timers->tt_2msl,
+ TP_KEEPINTVL(tp), tcp_timer_2msl, tp)) {
+ tp->t_timers->tt_flags &= ~TT_2MSL_RST;
+ }
+ } else
tp = tcp_close(tp);
}
@@ -285,7 +332,7 @@
#endif
if (tp != NULL)
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
}
@@ -301,26 +348,14 @@
ostate = tp->t_state;
#endif
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
inp = tp->t_inpcb;
- /*
- * XXXRW: While this assert is in fact correct, bugs in the tcpcb
- * tear-down mean we need it as a work-around for races between
- * timers and tcp_discardcb().
- *
- * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL"));
- */
- if (inp == NULL) {
- tcp_timer_race++;
- INP_INFO_WUNLOCK(&V_tcbinfo);
- CURVNET_RESTORE();
- return;
- }
+ KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
INP_WLOCK(inp);
if (callout_pending(&tp->t_timers->tt_keep) ||
!callout_active(&tp->t_timers->tt_keep)) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
}
@@ -327,10 +362,14 @@
callout_deactivate(&tp->t_timers->tt_keep);
if ((inp->inp_flags & INP_DROPPED) != 0) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
}
+ KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
+ ("%s: tp %p tcpcb can't be stopped here", __func__, tp));
+ KASSERT((tp->t_timers->tt_flags & TT_KEEP) != 0,
+ ("%s: tp %p keep callout should be running", __func__, tp));
/*
* Keep-alive timer went off; send something
* or drop connection if idle for too long.
@@ -338,7 +377,8 @@
TCPSTAT_INC(tcps_keeptimeo);
if (tp->t_state < TCPS_ESTABLISHED)
goto dropit;
- if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
+ if ((tcp_always_keepalive ||
+ inp->inp_socket->so_options & SO_KEEPALIVE) &&
tp->t_state <= TCPS_CLOSING) {
if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
goto dropit;
@@ -362,11 +402,14 @@
tp->rcv_nxt, tp->snd_una - 1, 0);
free(t_template, M_TEMP);
}
- callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
- tcp_timer_keep, tp, INP_CPU(inp));
- } else
- callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
- tcp_timer_keep, tp, INP_CPU(inp));
+ if (!callout_reset(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
+ tcp_timer_keep, tp)) {
+ tp->t_timers->tt_flags &= ~TT_KEEP_RST;
+ }
+ } else if (!callout_reset(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
+ tcp_timer_keep, tp)) {
+ tp->t_timers->tt_flags &= ~TT_KEEP_RST;
+ }
#ifdef TCPDEBUG
if (inp->inp_socket->so_options & SO_DEBUG)
@@ -374,7 +417,7 @@
PRU_SLOWTIMO);
#endif
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
@@ -389,7 +432,7 @@
#endif
if (tp != NULL)
INP_WUNLOCK(tp->t_inpcb);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
}
@@ -404,26 +447,14 @@
ostate = tp->t_state;
#endif
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
inp = tp->t_inpcb;
- /*
- * XXXRW: While this assert is in fact correct, bugs in the tcpcb
- * tear-down mean we need it as a work-around for races between
- * timers and tcp_discardcb().
- *
- * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL"));
- */
- if (inp == NULL) {
- tcp_timer_race++;
- INP_INFO_WUNLOCK(&V_tcbinfo);
- CURVNET_RESTORE();
- return;
- }
+ KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
INP_WLOCK(inp);
if (callout_pending(&tp->t_timers->tt_persist) ||
!callout_active(&tp->t_timers->tt_persist)) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
}
@@ -430,10 +461,14 @@
callout_deactivate(&tp->t_timers->tt_persist);
if ((inp->inp_flags & INP_DROPPED) != 0) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
}
+ KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
+ ("%s: tp %p tcpcb can't be stopped here", __func__, tp));
+ KASSERT((tp->t_timers->tt_flags & TT_PERSIST) != 0,
+ ("%s: tp %p persist callout should be running", __func__, tp));
/*
* Persistance timer into zero window.
* Force a byte to be output, if possible.
@@ -453,6 +488,16 @@
tp = tcp_drop(tp, ETIMEDOUT);
goto out;
}
+ /*
+ * If the user has closed the socket then drop a persisting
+ * connection after a much reduced timeout.
+ */
+ if (tp->t_state > TCPS_CLOSE_WAIT &&
+ (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
+ TCPSTAT_INC(tcps_persistdrop);
+ tp = tcp_drop(tp, ETIMEDOUT);
+ goto out;
+ }
tcp_setpersist(tp);
tp->t_flags |= TF_FORCEDATA;
(void) tcp_output(tp);
@@ -465,7 +510,7 @@
#endif
if (tp != NULL)
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
}
@@ -482,21 +527,10 @@
ostate = tp->t_state;
#endif
+
INP_INFO_RLOCK(&V_tcbinfo);
inp = tp->t_inpcb;
- /*
- * XXXRW: While this assert is in fact correct, bugs in the tcpcb
- * tear-down mean we need it as a work-around for races between
- * timers and tcp_discardcb().
- *
- * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL"));
- */
- if (inp == NULL) {
- tcp_timer_race++;
- INP_INFO_RUNLOCK(&V_tcbinfo);
- CURVNET_RESTORE();
- return;
- }
+ KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
INP_WLOCK(inp);
if (callout_pending(&tp->t_timers->tt_rexmt) ||
!callout_active(&tp->t_timers->tt_rexmt)) {
@@ -512,6 +546,10 @@
CURVNET_RESTORE();
return;
}
+ KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
+ ("%s: tp %p tcpcb can't be stopped here", __func__, tp));
+ KASSERT((tp->t_timers->tt_flags & TT_REXMT) != 0,
+ ("%s: tp %p rexmt callout should be running", __func__, tp));
tcp_free_sackholes(tp);
/*
* Retransmission timer went off. Message has not
@@ -521,22 +559,6 @@
if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
tp->t_rxtshift = TCP_MAXRXTSHIFT;
TCPSTAT_INC(tcps_timeoutdrop);
- in_pcbref(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
- INP_WUNLOCK(inp);
- INP_INFO_WLOCK(&V_tcbinfo);
- INP_WLOCK(inp);
- if (in_pcbrele_wlocked(inp)) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
- CURVNET_RESTORE();
- return;
- }
- if (inp->inp_flags & INP_DROPPED) {
- INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
- CURVNET_RESTORE();
- return;
- }
tp = tcp_drop(tp, tp->t_softerror ?
tp->t_softerror : ETIMEDOUT);
@@ -545,8 +567,14 @@
}
INP_INFO_RUNLOCK(&V_tcbinfo);
headlocked = 0;
- if (tp->t_rxtshift == 1) {
+ if (tp->t_state == TCPS_SYN_SENT) {
/*
+ * If the SYN was retransmitted, indicate CWND to be
+ * limited to 1 segment in cc_conn_init().
+ */
+ tp->snd_cwnd = 1;
+ } else if (tp->t_rxtshift == 1) {
+ /*
* first retransmit; record ssthresh and cwnd so they can
* be recovered if this turns out to be a "bad" retransmit.
* A retransmit is considered "bad" if an ACK for this
@@ -571,14 +599,127 @@
} else
tp->t_flags &= ~TF_PREVVALID;
TCPSTAT_INC(tcps_rexmttimeo);
- if (tp->t_state == TCPS_SYN_SENT)
- rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift];
+ if ((tp->t_state == TCPS_SYN_SENT) ||
+ (tp->t_state == TCPS_SYN_RECEIVED))
+ rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift];
else
rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
TCPT_RANGESET(tp->t_rxtcur, rexmt,
tp->t_rttmin, TCPTV_REXMTMAX);
+
/*
- * Disable rfc1323 if we haven't got any response to
+ * We enter the path for PLMTUD if connection is established or, if
+ * connection is FIN_WAIT_1 status, reason for the last is that if
+ * amount of data we send is very small, we could send it in couple of
+ * packets and process straight to FIN. In that case we won't catch
+ * ESTABLISHED state.
+ */
+ if (V_tcp_pmtud_blackhole_detect && (((tp->t_state == TCPS_ESTABLISHED))
+ || (tp->t_state == TCPS_FIN_WAIT_1))) {
+ int optlen;
+#ifdef INET6
+ int isipv6;
+#endif
+
+ /*
+ * Idea here is that at each stage of mtu probe (usually, 1448
+ * -> 1188 -> 524) should be given 2 chances to recover before
+ * further clamping down. 'tp->t_rxtshift % 2 == 0' should
+ * take care of that.
+ */
+ if (((tp->t_flags2 & (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) ==
+ (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) &&
+ (tp->t_rxtshift >= 2 && tp->t_rxtshift % 2 == 0)) {
+ /*
+ * Enter Path MTU Black-hole Detection mechanism:
+ * - Disable Path MTU Discovery (IP "DF" bit).
+ * - Reduce MTU to lower value than what we
+ * negotiated with peer.
+ */
+ /* Record that we may have found a black hole. */
+ tp->t_flags2 |= TF2_PLPMTU_BLACKHOLE;
+
+ /* Keep track of previous MSS. */
+ optlen = tp->t_maxopd - tp->t_maxseg;
+ tp->t_pmtud_saved_maxopd = tp->t_maxopd;
+
+ /*
+ * Reduce the MSS to blackhole value or to the default
+ * in an attempt to retransmit.
+ */
+#ifdef INET6
+ isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) ? 1 : 0;
+ if (isipv6 &&
+ tp->t_maxopd > V_tcp_v6pmtud_blackhole_mss) {
+ /* Use the sysctl tuneable blackhole MSS. */
+ tp->t_maxopd = V_tcp_v6pmtud_blackhole_mss;
+ V_tcp_pmtud_blackhole_activated++;
+ } else if (isipv6) {
+ /* Use the default MSS. */
+ tp->t_maxopd = V_tcp_v6mssdflt;
+ /*
+ * Disable Path MTU Discovery when we switch to
+ * minmss.
+ */
+ tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
+ V_tcp_pmtud_blackhole_activated_min_mss++;
+ }
+#endif
+#if defined(INET6) && defined(INET)
+ else
+#endif
+#ifdef INET
+ if (tp->t_maxopd > V_tcp_pmtud_blackhole_mss) {
+ /* Use the sysctl tuneable blackhole MSS. */
+ tp->t_maxopd = V_tcp_pmtud_blackhole_mss;
+ V_tcp_pmtud_blackhole_activated++;
+ } else {
+ /* Use the default MSS. */
+ tp->t_maxopd = V_tcp_mssdflt;
+ /*
+ * Disable Path MTU Discovery when we switch to
+ * minmss.
+ */
+ tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
+ V_tcp_pmtud_blackhole_activated_min_mss++;
+ }
+#endif
+ tp->t_maxseg = tp->t_maxopd - optlen;
+ /*
+ * Reset the slow-start flight size
+ * as it may depend on the new MSS.
+ */
+ if (CC_ALGO(tp)->conn_init != NULL)
+ CC_ALGO(tp)->conn_init(tp->ccv);
+ } else {
+ /*
+ * If further retransmissions are still unsuccessful
+ * with a lowered MTU, maybe this isn't a blackhole and
+ * we restore the previous MSS and blackhole detection
+ * flags.
+ * The limit '6' is determined by giving each probe
+ * stage (1448, 1188, 524) 2 chances to recover.
+ */
+ if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) &&
+ (tp->t_rxtshift > 6)) {
+ tp->t_flags2 |= TF2_PLPMTU_PMTUD;
+ tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
+ optlen = tp->t_maxopd - tp->t_maxseg;
+ tp->t_maxopd = tp->t_pmtud_saved_maxopd;
+ tp->t_maxseg = tp->t_maxopd - optlen;
+ V_tcp_pmtud_blackhole_failed++;
+ /*
+ * Reset the slow-start flight size as it
+ * may depend on the new MSS.
+ */
+ if (CC_ALGO(tp)->conn_init != NULL)
+ CC_ALGO(tp)->conn_init(tp->ccv);
+ }
+ }
+ }
+
+ /*
+ * Disable RFC1323 and SACK if we haven't got any response to
* our third SYN to work-around some broken terminal servers
* (most of which have hopefully been retired) that have bad VJ
* header compression code which trashes TCP segments containing
@@ -586,7 +727,7 @@
*/
if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) &&
(tp->t_rxtshift == 3))
- tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP);
+ tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT);
/*
* If we backed off this far, our srtt estimate is probably bogus.
* Clobber it so we'll take the next rtt measurement as our srtt;
@@ -597,7 +738,6 @@
#ifdef INET6
if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
in6_losing(tp->t_inpcb);
- else
#endif
tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
tp->t_srtt = 0;
@@ -626,51 +766,80 @@
if (tp != NULL)
INP_WUNLOCK(inp);
if (headlocked)
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
}
void
-tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
+tcp_timer_activate(struct tcpcb *tp, uint32_t timer_type, u_int delta)
{
struct callout *t_callout;
- void *f_callout;
+ timeout_t *f_callout;
struct inpcb *inp = tp->t_inpcb;
int cpu = INP_CPU(inp);
+ uint32_t f_reset;
+#ifdef TCP_OFFLOAD
+ if (tp->t_flags & TF_TOE)
+ return;
+#endif
+
+ if (tp->t_timers->tt_flags & TT_STOPPED)
+ return;
+
switch (timer_type) {
case TT_DELACK:
t_callout = &tp->t_timers->tt_delack;
f_callout = tcp_timer_delack;
+ f_reset = TT_DELACK_RST;
break;
case TT_REXMT:
t_callout = &tp->t_timers->tt_rexmt;
f_callout = tcp_timer_rexmt;
+ f_reset = TT_REXMT_RST;
break;
case TT_PERSIST:
t_callout = &tp->t_timers->tt_persist;
f_callout = tcp_timer_persist;
+ f_reset = TT_PERSIST_RST;
break;
case TT_KEEP:
t_callout = &tp->t_timers->tt_keep;
f_callout = tcp_timer_keep;
+ f_reset = TT_KEEP_RST;
break;
case TT_2MSL:
t_callout = &tp->t_timers->tt_2msl;
f_callout = tcp_timer_2msl;
+ f_reset = TT_2MSL_RST;
break;
default:
- panic("bad timer_type");
+ panic("tp %p bad timer_type %#x", tp, timer_type);
}
if (delta == 0) {
- callout_stop(t_callout);
+ if ((tp->t_timers->tt_flags & timer_type) &&
+ callout_stop(t_callout) &&
+ (tp->t_timers->tt_flags & f_reset)) {
+ tp->t_timers->tt_flags &= ~(timer_type | f_reset);
+ }
} else {
- callout_reset_on(t_callout, delta, f_callout, tp, cpu);
+ if ((tp->t_timers->tt_flags & timer_type) == 0) {
+ tp->t_timers->tt_flags |= (timer_type | f_reset);
+ callout_reset_on(t_callout, delta, f_callout, tp, cpu);
+ } else {
+ /* Reset already running callout on the same CPU. */
+ if (!callout_reset(t_callout, delta, f_callout, tp)) {
+ /*
+ * Callout not cancelled, consider it as not
+ * properly restarted. */
+ tp->t_timers->tt_flags &= ~f_reset;
+ }
+ }
}
}
int
-tcp_timer_active(struct tcpcb *tp, int timer_type)
+tcp_timer_active(struct tcpcb *tp, uint32_t timer_type)
{
struct callout *t_callout;
@@ -691,28 +860,91 @@
t_callout = &tp->t_timers->tt_2msl;
break;
default:
- panic("bad timer_type");
+ panic("tp %p bad timer_type %#x", tp, timer_type);
}
return callout_active(t_callout);
}
+void
+tcp_timer_stop(struct tcpcb *tp, uint32_t timer_type)
+{
+ struct callout *t_callout;
+ timeout_t *f_callout;
+ uint32_t f_reset;
+
+ tp->t_timers->tt_flags |= TT_STOPPED;
+
+ switch (timer_type) {
+ case TT_DELACK:
+ t_callout = &tp->t_timers->tt_delack;
+ f_callout = tcp_timer_delack_discard;
+ f_reset = TT_DELACK_RST;
+ break;
+ case TT_REXMT:
+ t_callout = &tp->t_timers->tt_rexmt;
+ f_callout = tcp_timer_rexmt_discard;
+ f_reset = TT_REXMT_RST;
+ break;
+ case TT_PERSIST:
+ t_callout = &tp->t_timers->tt_persist;
+ f_callout = tcp_timer_persist_discard;
+ f_reset = TT_PERSIST_RST;
+ break;
+ case TT_KEEP:
+ t_callout = &tp->t_timers->tt_keep;
+ f_callout = tcp_timer_keep_discard;
+ f_reset = TT_KEEP_RST;
+ break;
+ case TT_2MSL:
+ t_callout = &tp->t_timers->tt_2msl;
+ f_callout = tcp_timer_2msl_discard;
+ f_reset = TT_2MSL_RST;
+ break;
+ default:
+ panic("tp %p bad timer_type %#x", tp, timer_type);
+ }
+
+ if (tp->t_timers->tt_flags & timer_type) {
+ if (callout_stop(t_callout) &&
+ (tp->t_timers->tt_flags & f_reset)) {
+ tp->t_timers->tt_flags &= ~(timer_type | f_reset);
+ } else {
+ /*
+ * Can't stop the callout, defer tcpcb actual deletion
+ * to the last tcp timer discard callout.
+ * The TT_STOPPED flag will ensure that no tcp timer
+ * callouts can be restarted on our behalf, and
+ * past this point currently running callouts waiting
+ * on inp lock will return right away after the
+ * classical check for callout reset/stop events:
+ * callout_pending() || !callout_active()
+ */
+ callout_reset(t_callout, 1, f_callout, tp);
+ }
+ }
+}
+
#define ticks_to_msecs(t) (1000*(t) / hz)
void
-tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, struct xtcp_timer *xtimer)
+tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer,
+ struct xtcp_timer *xtimer)
{
- bzero(xtimer, sizeof(struct xtcp_timer));
+ sbintime_t now;
+
+ bzero(xtimer, sizeof(*xtimer));
if (timer == NULL)
return;
+ now = getsbinuptime();
if (callout_active(&timer->tt_delack))
- xtimer->tt_delack = ticks_to_msecs(timer->tt_delack.c_time - ticks);
+ xtimer->tt_delack = (timer->tt_delack.c_time - now) / SBT_1MS;
if (callout_active(&timer->tt_rexmt))
- xtimer->tt_rexmt = ticks_to_msecs(timer->tt_rexmt.c_time - ticks);
+ xtimer->tt_rexmt = (timer->tt_rexmt.c_time - now) / SBT_1MS;
if (callout_active(&timer->tt_persist))
- xtimer->tt_persist = ticks_to_msecs(timer->tt_persist.c_time - ticks);
+ xtimer->tt_persist = (timer->tt_persist.c_time - now) / SBT_1MS;
if (callout_active(&timer->tt_keep))
- xtimer->tt_keep = ticks_to_msecs(timer->tt_keep.c_time - ticks);
+ xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS;
if (callout_active(&timer->tt_2msl))
- xtimer->tt_2msl = ticks_to_msecs(timer->tt_2msl.c_time - ticks);
+ xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS;
xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime);
}
Modified: trunk/sys/netinet/tcp_timer.h
===================================================================
--- trunk/sys/netinet/tcp_timer.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_timer.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_timer.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet/tcp_timer.h 232945 2012-03-13 20:37:57Z glebius $
+ * $FreeBSD: stable/10/sys/netinet/tcp_timer.h 330303 2018-03-03 00:54:12Z jhb $
*/
#ifndef _NETINET_TCP_TIMER_H_
@@ -79,7 +79,7 @@
#define TCPTV_RTOBASE ( 3*hz) /* assumed RTO if no info */
#define TCPTV_SRTTDFLT ( 3*hz) /* assumed RTT if no info */
-#define TCPTV_PERSMIN ( 5*hz) /* retransmit persistence */
+#define TCPTV_PERSMIN ( 5*hz) /* minimum persist interval */
#define TCPTV_PERSMAX ( 60*hz) /* maximum persist interval */
#define TCPTV_KEEP_INIT ( 75*hz) /* initial connect keepalive */
@@ -119,11 +119,11 @@
#define TCP_MAXRXTSHIFT 12 /* maximum retransmits */
-#define TCPTV_DELACK (hz / PR_FASTHZ / 2) /* 100ms timeout */
+#define TCPTV_DELACK ( hz/10 ) /* 100ms timeout */
#ifdef TCPTIMERS
static const char *tcptimers[] =
- { "REXMT", "PERSIST", "KEEP", "2MSL" };
+ { "REXMT", "PERSIST", "KEEP", "2MSL", "DELACK" };
#endif
/*
@@ -147,13 +147,28 @@
struct callout tt_keep; /* keepalive */
struct callout tt_2msl; /* 2*msl TIME_WAIT timer */
struct callout tt_delack; /* delayed ACK timer */
+ uint32_t tt_flags; /* Timers flags */
+ uint32_t tt_spare; /* TDB */
};
-#define TT_DELACK 0x01
-#define TT_REXMT 0x02
-#define TT_PERSIST 0x04
-#define TT_KEEP 0x08
-#define TT_2MSL 0x10
+/*
+ * Flags for the tt_flags field.
+ */
+#define TT_DELACK 0x0001
+#define TT_REXMT 0x0002
+#define TT_PERSIST 0x0004
+#define TT_KEEP 0x0008
+#define TT_2MSL 0x0010
+#define TT_MASK (TT_DELACK|TT_REXMT|TT_PERSIST|TT_KEEP|TT_2MSL)
+
+#define TT_DELACK_RST 0x0100
+#define TT_REXMT_RST 0x0200
+#define TT_PERSIST_RST 0x0400
+#define TT_KEEP_RST 0x0800
+#define TT_2MSL_RST 0x1000
+
+#define TT_STOPPED 0x00010000
+
#define TP_KEEPINIT(tp) ((tp)->t_keepinit ? (tp)->t_keepinit : tcp_keepinit)
#define TP_KEEPIDLE(tp) ((tp)->t_keepidle ? (tp)->t_keepidle : tcp_keepidle)
#define TP_KEEPINTVL(tp) ((tp)->t_keepintvl ? (tp)->t_keepintvl : tcp_keepintvl)
@@ -160,6 +175,8 @@
#define TP_KEEPCNT(tp) ((tp)->t_keepcnt ? (tp)->t_keepcnt : tcp_keepcnt)
#define TP_MAXIDLE(tp) (TP_KEEPCNT(tp) * TP_KEEPINTVL(tp))
+extern int tcp_persmin; /* minimum persist interval */
+extern int tcp_persmax; /* maximum persist interval */
extern int tcp_keepinit; /* time to establish connection */
extern int tcp_keepidle; /* time before keepalive probes begin */
extern int tcp_keepintvl; /* time between keepalive probes */
@@ -171,7 +188,9 @@
extern int tcp_msl;
extern int tcp_ttl; /* time to live for TCP segs */
extern int tcp_backoff[];
+extern int tcp_syn_backoff[];
+extern int tcp_always_keepalive;
extern int tcp_finwait2_timeout;
extern int tcp_fast_finwait2_recycle;
@@ -178,11 +197,16 @@
void tcp_timer_init(void);
void tcp_timer_2msl(void *xtp);
struct tcptw *
- tcp_tw_2msl_scan(int _reuse); /* XXX temporary */
+ tcp_tw_2msl_scan(int reuse); /* XXX temporary? */
void tcp_timer_keep(void *xtp);
void tcp_timer_persist(void *xtp);
void tcp_timer_rexmt(void *xtp);
void tcp_timer_delack(void *xtp);
+void tcp_timer_2msl_discard(void *xtp);
+void tcp_timer_keep_discard(void *xtp);
+void tcp_timer_persist_discard(void *xtp);
+void tcp_timer_rexmt_discard(void *xtp);
+void tcp_timer_delack_discard(void *xtp);
void tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer,
struct xtcp_timer *xtimer);
Modified: trunk/sys/netinet/tcp_timewait.c
===================================================================
--- trunk/sys/netinet/tcp_timewait.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_timewait.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_timewait.c 247658 2013-03-02 17:51:22Z flo $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_timewait.c 309108 2016-11-24 14:48:46Z jch $");
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -92,7 +92,7 @@
#include <security/mac/mac_framework.h>
static VNET_DEFINE(uma_zone_t, tcptw_zone);
-#define V_tcptw_zone VNET(tcptw_zone)
+#define V_tcptw_zone VNET(tcptw_zone)
static int maxtcptw;
/*
@@ -99,13 +99,34 @@
* The timed wait queue contains references to each of the TCP sessions
* currently in the TIME_WAIT state. The queue pointers, including the
* queue pointers in each tcptw structure, are protected using the global
- * tcbinfo lock, which must be held over queue iteration and modification.
+ * timewait lock, which must be held over queue iteration and modification.
+ *
+ * Rules on tcptw usage:
+ * - a inpcb is always freed _after_ its tcptw
+ * - a tcptw relies on its inpcb reference counting for memory stability
+ * - a tcptw is dereferenceable only while its inpcb is locked
*/
static VNET_DEFINE(TAILQ_HEAD(, tcptw), twq_2msl);
-#define V_twq_2msl VNET(twq_2msl)
+#define V_twq_2msl VNET(twq_2msl)
+/* Global timewait lock */
+static VNET_DEFINE(struct rwlock, tw_lock);
+#define V_tw_lock VNET(tw_lock)
+
+#define TW_LOCK_INIT(tw, d) rw_init_flags(&(tw), (d), 0)
+#define TW_LOCK_DESTROY(tw) rw_destroy(&(tw))
+#define TW_RLOCK(tw) rw_rlock(&(tw))
+#define TW_WLOCK(tw) rw_wlock(&(tw))
+#define TW_RUNLOCK(tw) rw_runlock(&(tw))
+#define TW_WUNLOCK(tw) rw_wunlock(&(tw))
+#define TW_LOCK_ASSERT(tw) rw_assert(&(tw), RA_LOCKED)
+#define TW_RLOCK_ASSERT(tw) rw_assert(&(tw), RA_RLOCKED)
+#define TW_WLOCK_ASSERT(tw) rw_assert(&(tw), RA_WLOCKED)
+#define TW_UNLOCK_ASSERT(tw) rw_assert(&(tw), RA_UNLOCKED)
+
static void tcp_tw_2msl_reset(struct tcptw *, int);
-static void tcp_tw_2msl_stop(struct tcptw *);
+static void tcp_tw_2msl_stop(struct tcptw *, int);
+static int tcp_twrespond(struct tcptw *, int);
static int
tcptw_auto_size(void)
@@ -172,6 +193,7 @@
else
uma_zone_set_max(V_tcptw_zone, maxtcptw);
TAILQ_INIT(&V_twq_2msl);
+ TW_LOCK_INIT(V_tw_lock, "tcptw");
}
#ifdef VIMAGE
@@ -180,11 +202,12 @@
{
struct tcptw *tw;
- INP_INFO_WLOCK(&V_tcbinfo);
- while((tw = TAILQ_FIRST(&V_twq_2msl)) != NULL)
+ INP_INFO_RLOCK(&V_tcbinfo);
+ while ((tw = TAILQ_FIRST(&V_twq_2msl)) != NULL)
tcp_twclose(tw, 0);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
+ TW_LOCK_DESTROY(V_tw_lock);
uma_zdestroy(V_tcptw_zone);
}
#endif
@@ -205,9 +228,13 @@
int isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6;
#endif
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo); /* tcp_tw_2msl_reset(). */
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
+ /* A dropped inp should never transition to TIME_WAIT state. */
+ KASSERT((inp->inp_flags & INP_DROPPED) == 0, ("tcp_twstart: "
+ "(inp->inp_flags & INP_DROPPED) != 0"));
+
if (V_nolocaltimewait) {
int error = 0;
#ifdef INET6
@@ -228,8 +255,23 @@
}
}
+
+ /*
+ * For use only by DTrace. We do not reference the state
+ * after this point so modifying it in place is not a problem.
+ */
+ tcp_state_change(tp, TCPS_TIME_WAIT);
+
tw = uma_zalloc(V_tcptw_zone, M_NOWAIT);
if (tw == NULL) {
+ /*
+ * Reached limit on total number of TIMEWAIT connections
+ * allowed. Remove a connection from TIMEWAIT queue in LRU
+ * fashion to make room for this connection.
+ *
+ * XXX: Check if it possible to always have enough room
+ * in advance based on guarantees provided by uma_zalloc().
+ */
tw = tcp_tw_2msl_scan(1);
if (tw == NULL) {
tp = tcp_close(tp);
@@ -238,7 +280,12 @@
return;
}
}
+ /*
+ * The tcptw will hold a reference on its inpcb until tcp_twclose
+ * is called
+ */
tw->tw_inpcb = inp;
+ in_pcbref(inp); /* Reference from tw */
/*
* Recover last window size sent.
@@ -321,7 +368,7 @@
* Most other new OSes use semi-randomized ISN values, so we
* do not need to worry about them.
*/
-#define MS_ISN_BYTES_PER_SECOND 250000
+#define MS_ISN_BYTES_PER_SECOND 250000
/*
* Determine if the ISN we will generate has advanced beyond the last
@@ -334,7 +381,7 @@
tcp_seq new_iss = tw->iss;
tcp_seq new_irs = tw->irs;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
new_iss += (ticks - tw->t_starttime) * (ISN_BYTES_PER_SECOND / hz);
new_irs += (ticks - tw->t_starttime) * (MS_ISN_BYTES_PER_SECOND / hz);
@@ -350,7 +397,7 @@
* looking for a pcb in the listen state. Returns 0 otherwise.
*/
int
-tcp_twcheck(struct inpcb *inp, struct tcpopt *to, struct tcphdr *th,
+tcp_twcheck(struct inpcb *inp, struct tcpopt *to __unused, struct tcphdr *th,
struct mbuf *m, int tlen)
{
struct tcptw *tw;
@@ -357,8 +404,7 @@
int thflags;
tcp_seq seq;
- /* tcbinfo lock required for tcp_twclose(), tcp_tw_2msl_reset(). */
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
/*
@@ -459,11 +505,10 @@
inp = tw->tw_inpcb;
KASSERT((inp->inp_flags & INP_TIMEWAIT), ("tcp_twclose: !timewait"));
KASSERT(intotw(inp) == tw, ("tcp_twclose: inp_ppcb != tw"));
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo); /* tcp_tw_2msl_stop(). */
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* in_pcbfree() */
INP_WLOCK_ASSERT(inp);
- tw->tw_inpcb = NULL;
- tcp_tw_2msl_stop(tw);
+ tcp_tw_2msl_stop(tw, reuse);
inp->inp_ppcb = NULL;
in_pcbdrop(inp);
@@ -492,17 +537,17 @@
*/
INP_WUNLOCK(inp);
}
- } else
+ } else {
+ /*
+ * The socket has been already cleaned-up for us, only free the
+ * inpcb.
+ */
in_pcbfree(inp);
+ }
TCPSTAT_INC(tcps_closed);
- crfree(tw->tw_cred);
- tw->tw_cred = NULL;
- if (reuse)
- return;
- uma_zfree(V_tcptw_zone, tw);
}
-int
+static int
tcp_twrespond(struct tcptw *tw, int flags)
{
struct inpcb *inp = tw->tw_inpcb;
@@ -524,7 +569,7 @@
INP_WLOCK_ASSERT(inp);
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return (ENOBUFS);
m->m_data += max_linkhdr;
@@ -595,9 +640,9 @@
m->m_pkthdr.csum_flags = CSUM_TCP;
th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP));
- ip->ip_len = m->m_pkthdr.len;
+ ip->ip_len = htons(m->m_pkthdr.len);
if (V_path_mtu_discovery)
- ip->ip_off |= IP_DF;
+ ip->ip_off |= htons(IP_DF);
error = ip_output(m, inp->inp_options, NULL,
((tw->tw_so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0),
NULL, inp);
@@ -615,20 +660,43 @@
tcp_tw_2msl_reset(struct tcptw *tw, int rearm)
{
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tw->tw_inpcb);
+
+ TW_WLOCK(V_tw_lock);
if (rearm)
TAILQ_REMOVE(&V_twq_2msl, tw, tw_2msl);
tw->tw_time = ticks + 2 * tcp_msl;
TAILQ_INSERT_TAIL(&V_twq_2msl, tw, tw_2msl);
+ TW_WUNLOCK(V_tw_lock);
}
static void
-tcp_tw_2msl_stop(struct tcptw *tw)
+tcp_tw_2msl_stop(struct tcptw *tw, int reuse)
{
+ struct ucred *cred;
+ struct inpcb *inp;
+ int released;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+
+ TW_WLOCK(V_tw_lock);
+ inp = tw->tw_inpcb;
+ tw->tw_inpcb = NULL;
+
TAILQ_REMOVE(&V_twq_2msl, tw, tw_2msl);
+ cred = tw->tw_cred;
+ tw->tw_cred = NULL;
+ TW_WUNLOCK(V_tw_lock);
+
+ if (cred != NULL)
+ crfree(cred);
+
+ released = in_pcbrele_wlocked(inp);
+ KASSERT(!released, ("%s: inp should not be released here", __func__));
+
+ if (!reuse)
+ uma_zfree(V_tcptw_zone, tw);
}
struct tcptw *
@@ -635,16 +703,70 @@
tcp_tw_2msl_scan(int reuse)
{
struct tcptw *tw;
+ struct inpcb *inp;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+#ifdef INVARIANTS
+ if (reuse) {
+ /*
+ * Exclusive pcbinfo lock is not required in reuse case even if
+ * two inpcb locks can be acquired simultaneously:
+ * - the inpcb transitioning to TIME_WAIT state in
+ * tcp_tw_start(),
+ * - the inpcb closed by tcp_twclose().
+ *
+ * It is because only inpcbs in FIN_WAIT2 or CLOSING states can
+ * transition in TIME_WAIT state. Then a pcbcb cannot be in
+ * TIME_WAIT list and transitioning to TIME_WAIT state at same
+ * time.
+ */
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ }
+#endif
+
for (;;) {
+ TW_RLOCK(V_tw_lock);
tw = TAILQ_FIRST(&V_twq_2msl);
- if (tw == NULL || (!reuse && (tw->tw_time - ticks) > 0))
+ if (tw == NULL || (!reuse && (tw->tw_time - ticks) > 0)) {
+ TW_RUNLOCK(V_tw_lock);
break;
- INP_WLOCK(tw->tw_inpcb);
- tcp_twclose(tw, reuse);
- if (reuse)
- return (tw);
+ }
+ KASSERT(tw->tw_inpcb != NULL, ("%s: tw->tw_inpcb == NULL",
+ __func__));
+
+ inp = tw->tw_inpcb;
+ in_pcbref(inp);
+ TW_RUNLOCK(V_tw_lock);
+
+ if (INP_INFO_TRY_RLOCK(&V_tcbinfo)) {
+
+ INP_WLOCK(inp);
+ tw = intotw(inp);
+ if (in_pcbrele_wlocked(inp)) {
+ KASSERT(tw == NULL, ("%s: held last inp "
+ "reference but tw not NULL", __func__));
+ INP_INFO_RUNLOCK(&V_tcbinfo);
+ continue;
+ }
+
+ if (tw == NULL) {
+ /* tcp_twclose() has already been called */
+ INP_WUNLOCK(inp);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
+ continue;
+ }
+
+ tcp_twclose(tw, reuse);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
+ if (reuse)
+ return tw;
+ } else {
+ /* INP_INFO lock is busy, continue later. */
+ INP_WLOCK(inp);
+ if (!in_pcbrele_wlocked(inp))
+ INP_WUNLOCK(inp);
+ break;
+ }
}
- return (NULL);
+
+ return NULL;
}
Modified: trunk/sys/netinet/tcp_usrreq.c
===================================================================
--- trunk/sys/netinet/tcp_usrreq.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_usrreq.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -37,7 +37,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_usrreq.c 241132 2012-10-02 12:57:47Z glebius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_usrreq.c 309108 2016-11-24 14:48:46Z jch $");
#include "opt_ddb.h"
#include "opt_inet.h"
@@ -59,6 +59,7 @@
#include <sys/protosw.h>
#include <sys/proc.h>
#include <sys/jail.h>
+#include <sys/syslog.h>
#ifdef DDB
#include <ddb/ddb.h>
@@ -80,6 +81,9 @@
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
#endif
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
@@ -88,7 +92,9 @@
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif
+#ifdef TCP_OFFLOAD
#include <netinet/tcp_offload.h>
+#endif
/*
* TCP protocol interface to socket abstraction.
@@ -161,7 +167,7 @@
{
struct tcpcb *tp;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_LOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
KASSERT(so->so_pcb == inp, ("tcp_detach: so_pcb != inp"));
@@ -181,12 +187,43 @@
* present until timewait ends.
*
* XXXRW: Would it be cleaner to free the tcptw here?
+ *
+ * Astute question indeed, from twtcp perspective there are
+ * three cases to consider:
+ *
+ * #1 tcp_detach is called at tcptw creation time by
+ * tcp_twstart, then do not discard the newly created tcptw
+ * and leave inpcb present until timewait ends
+ * #2 tcp_detach is called at timewait end (or reuse) by
+ * tcp_twclose, then the tcptw has already been discarded
+ * (or reused) and inpcb is freed here
+ * #3 tcp_detach is called() after timewait ends (or reuse)
+ * (e.g. by soclose), then tcptw has already been discarded
+ * (or reused) and inpcb is freed here
+ *
+ * In all three cases the tcptw should not be freed here.
*/
if (inp->inp_flags & INP_DROPPED) {
- KASSERT(tp == NULL, ("tcp_detach: INP_TIMEWAIT && "
- "INP_DROPPED && tp != NULL"));
in_pcbdetach(inp);
- in_pcbfree(inp);
+ if (__predict_true(tp == NULL)) {
+ in_pcbfree(inp);
+ } else {
+ /*
+ * This case should not happen as in TIMEWAIT
+ * state the inp should not be destroyed before
+ * its tcptw. If INVARIANTS is defined, panic.
+ */
+#ifdef INVARIANTS
+ panic("%s: Panic before an inp double-free: "
+ "INP_TIMEWAIT && INP_DROPPED && tp != NULL"
+ , __func__);
+#else
+ log(LOG_ERR, "%s: Avoid an inp double-free: "
+ "INP_TIMEWAIT && INP_DROPPED && tp != NULL"
+ , __func__);
+#endif
+ INP_WUNLOCK(inp);
+ }
} else {
in_pcbdetach(inp);
INP_WUNLOCK(inp);
@@ -224,15 +261,20 @@
tcp_usr_detach(struct socket *so)
{
struct inpcb *inp;
+ int rlock = 0;
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL"));
- INP_INFO_WLOCK(&V_tcbinfo);
+ if (!INP_INFO_WLOCKED(&V_tcbinfo)) {
+ INP_INFO_RLOCK(&V_tcbinfo);
+ rlock = 1;
+ }
INP_WLOCK(inp);
KASSERT(inp->inp_socket != NULL,
("tcp_usr_detach: inp_socket == NULL"));
tcp_detach(so, inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (rlock)
+ INP_INFO_RUNLOCK(&V_tcbinfo);
}
#ifdef INET
@@ -366,12 +408,19 @@
error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
INP_HASH_WUNLOCK(&V_tcbinfo);
if (error == 0) {
- tp->t_state = TCPS_LISTEN;
+ tcp_state_change(tp, TCPS_LISTEN);
solisten_proto(so, backlog);
- tcp_offload_listen_open(tp);
+#ifdef TCP_OFFLOAD
+ if ((so->so_options & SO_NO_OFFLOAD) == 0)
+ tcp_offload_listen_start(tp);
+#endif
}
SOCK_UNLOCK(so);
+#ifdef TCP_RFC7413
+ if (tp->t_flags & TF_FASTOPEN)
+ tp->t_tfo_pending = tcp_fastopen_alloc_counter();
+#endif
out:
TCPDEBUG2(PRU_LISTEN);
INP_WUNLOCK(inp);
@@ -408,11 +457,19 @@
}
INP_HASH_WUNLOCK(&V_tcbinfo);
if (error == 0) {
- tp->t_state = TCPS_LISTEN;
+ tcp_state_change(tp, TCPS_LISTEN);
solisten_proto(so, backlog);
+#ifdef TCP_OFFLOAD
+ if ((so->so_options & SO_NO_OFFLOAD) == 0)
+ tcp_offload_listen_start(tp);
+#endif
}
SOCK_UNLOCK(so);
+#ifdef TCP_RFC7413
+ if (tp->t_flags & TF_FASTOPEN)
+ tp->t_tfo_pending = tcp_fastopen_alloc_counter();
+#endif
out:
TCPDEBUG2(PRU_LISTEN);
INP_WUNLOCK(inp);
@@ -452,15 +509,26 @@
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL"));
INP_WLOCK(inp);
- if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
- error = EINVAL;
+ if (inp->inp_flags & INP_TIMEWAIT) {
+ error = EADDRINUSE;
goto out;
}
+ if (inp->inp_flags & INP_DROPPED) {
+ error = ECONNREFUSED;
+ goto out;
+ }
tp = intotcpcb(inp);
TCPDEBUG1();
if ((error = tcp_connect(tp, nam, td)) != 0)
goto out;
- error = tcp_output_connect(so, nam);
+#ifdef TCP_OFFLOAD
+ if (registered_toedevs > 0 &&
+ (so->so_options & SO_NO_OFFLOAD) == 0 &&
+ (error = tcp_offload_connect(so, nam)) == 0)
+ goto out;
+#endif
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
+ error = tcp_output(tp);
out:
TCPDEBUG2(PRU_CONNECT);
INP_WUNLOCK(inp);
@@ -492,10 +560,14 @@
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL"));
INP_WLOCK(inp);
- if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
- error = EINVAL;
+ if (inp->inp_flags & INP_TIMEWAIT) {
+ error = EADDRINUSE;
goto out;
}
+ if (inp->inp_flags & INP_DROPPED) {
+ error = ECONNREFUSED;
+ goto out;
+ }
tp = intotcpcb(inp);
TCPDEBUG1();
#ifdef INET
@@ -520,7 +592,13 @@
goto out;
if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0)
goto out;
- error = tcp_output_connect(so, nam);
+#ifdef TCP_OFFLOAD
+ if (registered_toedevs > 0 &&
+ (so->so_options & SO_NO_OFFLOAD) == 0 &&
+ (error = tcp_offload_connect(so, nam)) == 0)
+ goto out;
+#endif
+ error = tcp_output(tp);
goto out;
}
#endif
@@ -531,7 +609,14 @@
goto out;
if ((error = tcp6_connect(tp, nam, td)) != 0)
goto out;
- error = tcp_output_connect(so, nam);
+#ifdef TCP_OFFLOAD
+ if (registered_toedevs > 0 &&
+ (so->so_options & SO_NO_OFFLOAD) == 0 &&
+ (error = tcp_offload_connect(so, nam)) == 0)
+ goto out;
+#endif
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
+ error = tcp_output(tp);
out:
TCPDEBUG2(PRU_CONNECT);
@@ -559,11 +644,13 @@
int error = 0;
TCPDEBUG0;
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL"));
INP_WLOCK(inp);
- if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+ if (inp->inp_flags & INP_TIMEWAIT)
+ goto out;
+ if (inp->inp_flags & INP_DROPPED) {
error = ECONNRESET;
goto out;
}
@@ -573,7 +660,7 @@
out:
TCPDEBUG2(PRU_DISCONNECT);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return (error);
}
@@ -581,13 +668,6 @@
/*
* Accept a connection. Essentially all the work is done at higher levels;
* just return the address of the peer, storing through addr.
- *
- * The rationale for acquiring the tcbinfo lock here is somewhat complicated,
- * and is described in detail in the commit log entry for r175612. Acquiring
- * it delays an accept(2) racing with sonewconn(), which inserts the socket
- * before the inpcb address/port fields are initialized. A better fix would
- * prevent the socket from being placed in the listen queue until all fields
- * are fully initialized.
*/
static int
tcp_usr_accept(struct socket *so, struct sockaddr **nam)
@@ -604,7 +684,6 @@
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL"));
- INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
error = ECONNABORTED;
@@ -624,7 +703,6 @@
out:
TCPDEBUG2(PRU_ACCEPT);
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
if (error == 0)
*nam = in_sockaddr(port, &addr);
return error;
@@ -697,7 +775,7 @@
struct tcpcb *tp = NULL;
TCPDEBUG0;
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("inp == NULL"));
INP_WLOCK(inp);
@@ -710,12 +788,12 @@
socantsendmore(so);
tcp_usrclosed(tp);
if (!(inp->inp_flags & INP_DROPPED))
- error = tcp_output_disconnect(tp);
+ error = tcp_output(tp);
out:
TCPDEBUG2(PRU_SHUTDOWN);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return (error);
}
@@ -740,7 +818,24 @@
}
tp = intotcpcb(inp);
TCPDEBUG1();
- tcp_output_rcvd(tp);
+#ifdef TCP_RFC7413
+ /*
+ * For passively-created TFO connections, don't attempt a window
+ * update while still in SYN_RECEIVED as this may trigger an early
+ * SYN|ACK. It is preferable to have the SYN|ACK be sent along with
+ * application response data, or failing that, when the DELACK timer
+ * expires.
+ */
+ if ((tp->t_flags & TF_FASTOPEN) &&
+ (tp->t_state == TCPS_SYN_RECEIVED))
+ goto out;
+#endif
+#ifdef TCP_OFFLOAD
+ if (tp->t_flags & TF_TOE)
+ tcp_offload_rcvd(tp);
+ else
+#endif
+ tcp_output(tp);
out:
TCPDEBUG2(PRU_RCVD);
@@ -772,7 +867,7 @@
* this call.
*/
if (flags & PRUS_EOF)
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
INP_WLOCK(inp);
@@ -829,7 +924,7 @@
* Close the send side of the connection after
* the data is sent.
*/
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
socantsendmore(so);
tcp_usrclosed(tp);
}
@@ -836,7 +931,7 @@
if (!(inp->inp_flags & INP_DROPPED)) {
if (flags & PRUS_MORETOCOME)
tp->t_flags |= TF_MORETOCOME;
- error = tcp_output_send(tp);
+ error = tcp_output(tp);
if (flags & PRUS_MORETOCOME)
tp->t_flags &= ~TF_MORETOCOME;
}
@@ -885,7 +980,7 @@
}
tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
tp->t_flags |= TF_FORCEDATA;
- error = tcp_output_send(tp);
+ error = tcp_output(tp);
tp->t_flags &= ~TF_FORCEDATA;
}
out:
@@ -893,7 +988,7 @@
((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
INP_WUNLOCK(inp);
if (flags & PRUS_EOF)
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return (error);
}
@@ -910,7 +1005,7 @@
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL"));
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
KASSERT(inp->inp_socket != NULL,
("tcp_usr_abort: inp_socket == NULL"));
@@ -932,7 +1027,7 @@
inp->inp_flags |= INP_SOCKREF;
}
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
}
/*
@@ -948,7 +1043,7 @@
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL"));
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
KASSERT(inp->inp_socket != NULL,
("tcp_usr_close: inp_socket == NULL"));
@@ -971,7 +1066,7 @@
inp->inp_flags |= INP_SOCKREF;
}
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
}
/*
@@ -1119,8 +1214,7 @@
soisconnecting(so);
TCPSTAT_INC(tcps_connattempt);
- tp->t_state = TCPS_SYN_SENT;
- tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
+ tcp_state_change(tp, TCPS_SYN_SENT);
tp->iss = tcp_new_isn(tp);
tcp_sendseqinit(tp);
@@ -1192,8 +1286,7 @@
soisconnecting(so);
TCPSTAT_INC(tcps_connattempt);
- tp->t_state = TCPS_SYN_SENT;
- tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
+ tcp_state_change(tp, TCPS_SYN_SENT);
tp->iss = tcp_new_isn(tp);
tcp_sendseqinit(tp);
@@ -1324,9 +1417,9 @@
tp->t_flags |= TF_SIGNATURE;
else
tp->t_flags &= ~TF_SIGNATURE;
- INP_WUNLOCK(inp);
- break;
+ goto unlock_and_done;
#endif /* TCP_SIGNATURE */
+
case TCP_NODELAY:
case TCP_NOOPT:
INP_WUNLOCK(inp);
@@ -1352,6 +1445,13 @@
tp->t_flags |= opt;
else
tp->t_flags &= ~opt;
+unlock_and_done:
+#ifdef TCP_OFFLOAD
+ if (tp->t_flags & TF_TOE) {
+ tcp_offload_ctloutput(tp, sopt->sopt_dir,
+ sopt->sopt_name);
+ }
+#endif
INP_WUNLOCK(inp);
break;
@@ -1370,8 +1470,7 @@
if (TCPS_HAVEESTABLISHED(tp->t_state))
error = tcp_output(tp);
}
- INP_WUNLOCK(inp);
- break;
+ goto unlock_and_done;
case TCP_MAXSEG:
INP_WUNLOCK(inp);
@@ -1386,8 +1485,7 @@
tp->t_maxseg = optval;
else
error = EINVAL;
- INP_WUNLOCK(inp);
- break;
+ goto unlock_and_done;
case TCP_INFO:
INP_WUNLOCK(inp);
@@ -1439,8 +1537,7 @@
}
}
CC_LIST_RUNLOCK();
- INP_WUNLOCK(inp);
- break;
+ goto unlock_and_done;
case TCP_KEEPIDLE:
case TCP_KEEPINTVL:
@@ -1484,8 +1581,7 @@
TP_KEEPINIT(tp));
break;
}
- INP_WUNLOCK(inp);
- break;
+ goto unlock_and_done;
case TCP_KEEPCNT:
INP_WUNLOCK(inp);
@@ -1499,9 +1595,31 @@
(TP_MAXIDLE(tp) > 0))
tcp_timer_activate(tp, TT_2MSL,
TP_MAXIDLE(tp));
+ goto unlock_and_done;
+
+#ifdef TCP_RFC7413
+ case TCP_FASTOPEN:
INP_WUNLOCK(inp);
- break;
+ if (!V_tcp_fastopen_enabled)
+ return (EPERM);
+ error = sooptcopyin(sopt, &optval, sizeof optval,
+ sizeof optval);
+ if (error)
+ return (error);
+
+ INP_WLOCK_RECHECK(inp);
+ if (optval) {
+ tp->t_flags |= TF_FASTOPEN;
+ if ((tp->t_state == TCPS_LISTEN) &&
+ (tp->t_tfo_pending == NULL))
+ tp->t_tfo_pending =
+ tcp_fastopen_alloc_counter();
+ } else
+ tp->t_flags &= ~TF_FASTOPEN;
+ goto unlock_and_done;
+#endif
+
default:
INP_WUNLOCK(inp);
error = ENOPROTOOPT;
@@ -1551,6 +1669,34 @@
INP_WUNLOCK(inp);
error = sooptcopyout(sopt, buf, TCP_CA_NAME_MAX);
break;
+ case TCP_KEEPIDLE:
+ case TCP_KEEPINTVL:
+ case TCP_KEEPINIT:
+ case TCP_KEEPCNT:
+ switch (sopt->sopt_name) {
+ case TCP_KEEPIDLE:
+ ui = tp->t_keepidle / hz;
+ break;
+ case TCP_KEEPINTVL:
+ ui = tp->t_keepintvl / hz;
+ break;
+ case TCP_KEEPINIT:
+ ui = tp->t_keepinit / hz;
+ break;
+ case TCP_KEEPCNT:
+ ui = tp->t_keepcnt;
+ break;
+ }
+ INP_WUNLOCK(inp);
+ error = sooptcopyout(sopt, &ui, sizeof(ui));
+ break;
+#ifdef TCP_RFC7413
+ case TCP_FASTOPEN:
+ optval = tp->t_flags & TF_FASTOPEN;
+ INP_WUNLOCK(inp);
+ error = sooptcopyout(sopt, &optval, sizeof optval);
+ break;
+#endif
default:
INP_WUNLOCK(inp);
error = ENOPROTOOPT;
@@ -1563,18 +1709,6 @@
#undef INP_WLOCK_RECHECK
/*
- * tcp_sendspace and tcp_recvspace are the default send and receive window
- * sizes, respectively. These are obsolescent (this information should
- * be set by the route).
- */
-u_long tcp_sendspace = 1024*32;
-SYSCTL_ULONG(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW,
- &tcp_sendspace , 0, "Maximum outgoing TCP datagram size");
-u_long tcp_recvspace = 1024*64;
-SYSCTL_ULONG(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
- &tcp_recvspace , 0, "Maximum incoming TCP datagram size");
-
-/*
* Attach TCP protocol to socket, allocating
* internet protocol control block, tcp control block,
* bufer space, and entering LISTEN state if to accept connections.
@@ -1587,16 +1721,16 @@
int error;
if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
- error = soreserve(so, tcp_sendspace, tcp_recvspace);
+ error = soreserve(so, V_tcp_sendspace, V_tcp_recvspace);
if (error)
return (error);
}
so->so_rcv.sb_flags |= SB_AUTOSIZE;
so->so_snd.sb_flags |= SB_AUTOSIZE;
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
error = in_pcballoc(so, &V_tcbinfo);
if (error) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return (error);
}
inp = sotoinpcb(so);
@@ -1612,12 +1746,12 @@
if (tp == NULL) {
in_pcbdetach(inp);
in_pcbfree(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return (ENOBUFS);
}
tp->t_state = TCPS_CLOSED;
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return (0);
}
@@ -1635,7 +1769,7 @@
struct inpcb *inp = tp->t_inpcb;
struct socket *so = inp->inp_socket;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
/*
@@ -1655,7 +1789,7 @@
sbflush(&so->so_rcv);
tcp_usrclosed(tp);
if (!(inp->inp_flags & INP_DROPPED))
- tcp_output_disconnect(tp);
+ tcp_output(tp);
}
}
@@ -1673,15 +1807,17 @@
tcp_usrclosed(struct tcpcb *tp)
{
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb);
switch (tp->t_state) {
case TCPS_LISTEN:
- tcp_offload_listen_close(tp);
+#ifdef TCP_OFFLOAD
+ tcp_offload_listen_stop(tp);
+#endif
+ tcp_state_change(tp, TCPS_CLOSED);
/* FALLTHROUGH */
case TCPS_CLOSED:
- tp->t_state = TCPS_CLOSED;
tp = tcp_close(tp);
/*
* tcp_close() should never return NULL here as the socket is
@@ -1697,11 +1833,11 @@
break;
case TCPS_ESTABLISHED:
- tp->t_state = TCPS_FIN_WAIT_1;
+ tcp_state_change(tp, TCPS_FIN_WAIT_1);
break;
case TCPS_CLOSE_WAIT:
- tp->t_state = TCPS_LAST_ACK;
+ tcp_state_change(tp, TCPS_LAST_ACK);
break;
}
if (tp->t_state >= TCPS_FIN_WAIT_2) {
@@ -1884,6 +2020,10 @@
db_printf("%sTF_ECN_PERMIT", comma ? ", " : "");
comma = 1;
}
+ if (t_flags & TF_FASTOPEN) {
+ db_printf("%sTF_FASTOPEN", comma ? ", " : "");
+ comma = 1;
+ }
}
static void
Modified: trunk/sys/netinet/tcp_var.h
===================================================================
--- trunk/sys/netinet/tcp_var.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcp_var.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_var.h 8.4 (Berkeley) 5/24/95
- * $FreeBSD: stable/9/sys/netinet/tcp_var.h 235051 2012-05-05 07:55:50Z glebius $
+ * $FreeBSD: stable/10/sys/netinet/tcp_var.h 317375 2017-04-24 16:31:28Z smh $
*/
#ifndef _NETINET_TCP_VAR_H_
@@ -74,7 +74,12 @@
tcp_seq last_sack_ack; /* Most recent/largest sacked ack */
int ispare; /* explicit pad for 64bit alignment */
- uint64_t _pad[2]; /* 1 sacked_bytes, 1 TBD */
+ int sacked_bytes; /*
+ * Total sacked bytes reported by the
+ * receiver via sack option
+ */
+ uint32_t _pad1[1]; /* TBD */
+ uint64_t _pad[1]; /* TBD */
};
struct tcptemp {
@@ -195,7 +200,7 @@
int t_rttlow; /* smallest observerved RTT */
u_int32_t rfbuf_ts; /* recv buffer autoscaling timestamp */
int rfbuf_cnt; /* recv buffer autoscaling byte count */
- struct toe_usrreqs *t_tu; /* offload operations vector */
+ struct toedev *tod; /* toedev handling this connection */
int t_sndrexmitpack; /* retransmit packets sent */
int t_rcvoopack; /* out-of-order packets received */
void *t_toe; /* TOE pcb pointer */
@@ -209,9 +214,25 @@
u_int t_keepintvl; /* interval between keepalives */
u_int t_keepcnt; /* number of keepalives before close */
- uint32_t t_ispare[8]; /* 5 UTO, 3 TBD */
- void *t_pspare2[4]; /* 4 TBD */
- uint64_t _pad[6]; /* 6 TBD (1-2 CC/RTT?) */
+ u_int t_tsomax; /* TSO total burst length limit in bytes */
+ u_int t_pmtud_saved_maxopd; /* pre-blackhole MSS */
+ u_int t_flags2; /* More tcpcb flags storage */
+
+ uint32_t t_ispare[6]; /* 5 UTO, 1 TBD */
+#if defined(_KERNEL) && defined(TCP_RFC7413)
+ void *t_pspare2[3]; /* 1 TCP_SIGNATURE, 2 TBD */
+ unsigned int *t_tfo_pending; /* TCP Fast Open pending counter */
+#else
+ void *t_pspare2[4]; /* 1 TCP_SIGNATURE, 3 TBD */
+#endif
+#if defined(_KERNEL) && defined(TCP_RFC7413)
+ uint64_t _pad[4]; /* 4 TBD (1-2 CC/RTT?) */
+ uint64_t t_tfo_cookie; /* TCP Fast Open cookie */
+#else
+ uint64_t _pad[5]; /* 5 TBD (1-2 CC/RTT?) */
+#endif
+ uint32_t t_tsomaxsegcount; /* TSO maximum segment count */
+ uint32_t t_tsomaxsegsize; /* TSO maximum segment size in bytes */
};
/*
@@ -246,6 +267,7 @@
#define TF_ECN_SND_ECE 0x10000000 /* ECN ECE in queue */
#define TF_CONGRECOVERY 0x20000000 /* congestion recovery mode */
#define TF_WASCRECOVERY 0x40000000 /* was in congestion recovery */
+#define TF_FASTOPEN 0x80000000 /* TCP Fast Open indication */
#define IN_FASTRECOVERY(t_flags) (t_flags & TF_FASTRECOVERY)
#define ENTER_FASTRECOVERY(t_flags) t_flags |= TF_FASTRECOVERY
@@ -283,6 +305,13 @@
#endif /* TCP_SIGNATURE */
/*
+ * Flags for PLPMTU handling, t_flags2
+ */
+#define TF2_PLPMTU_BLACKHOLE 0x00000001 /* Possible PLPMTUD Black Hole. */
+#define TF2_PLPMTU_PMTUD 0x00000002 /* Allowed to attempt PLPMTUD. */
+#define TF2_PLPMTU_MAXSEGSNT 0x00000004 /* Last seg sent was full seg. */
+
+/*
* Structure to hold TCP options that are only used during segment
* processing (in tcp_input), but not held in the tcpcb.
* It's basically used to reduce the number of parameters
@@ -298,14 +327,17 @@
#define TOF_TS 0x0010 /* timestamp */
#define TOF_SIGNATURE 0x0040 /* TCP-MD5 signature option (RFC2385) */
#define TOF_SACK 0x0080 /* Peer sent SACK option */
-#define TOF_MAXOPT 0x0100
+#define TOF_FASTOPEN 0x0100 /* TCP Fast Open (TFO) cookie */
+#define TOF_MAXOPT 0x0200
u_int32_t to_tsval; /* new timestamp */
u_int32_t to_tsecr; /* reflected timestamp */
u_char *to_sacks; /* pointer to the first SACK blocks */
u_char *to_signature; /* pointer to the TCP-MD5 signature */
+ u_char *to_tfo_cookie; /* pointer to the TFO cookie */
u_int16_t to_mss; /* maximum segment size */
u_int8_t to_wscale; /* window scaling */
u_int8_t to_nsacks; /* number of SACK blocks */
+ u_int8_t to_tfo_len; /* TFO cookie length */
u_int32_t to_spare; /* UTO */
};
@@ -325,6 +357,17 @@
u_long rmx_recvpipe; /* inbound delay-bandwidth product */
};
+/*
+ * Used by tcp_maxmtu() to communicate interface specific features
+ * and limits at the time of connection setup.
+ */
+struct tcp_ifcap {
+ int ifcap;
+ u_int tsomax;
+ u_int tsomaxsegcount;
+ u_int tsomaxsegsize;
+};
+
#ifndef _NETINET_IN_PCB_H_
struct in_conninfo;
#endif /* _NETINET_IN_PCB_H_ */
@@ -343,6 +386,8 @@
u_int t_starttime;
int tw_time;
TAILQ_ENTRY(tcptw) tw_2msl;
+ void *tw_pspare; /* TCP_SIGNATURE */
+ u_int *tw_spare; /* TCP_SIGNATURE */
};
#define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb)
@@ -390,125 +435,129 @@
* but that's inconvenient at the moment.
*/
struct tcpstat {
- u_long tcps_connattempt; /* connections initiated */
- u_long tcps_accepts; /* connections accepted */
- u_long tcps_connects; /* connections established */
- u_long tcps_drops; /* connections dropped */
- u_long tcps_conndrops; /* embryonic connections dropped */
- u_long tcps_minmssdrops; /* average minmss too low drops */
- u_long tcps_closed; /* conn. closed (includes drops) */
- u_long tcps_segstimed; /* segs where we tried to get rtt */
- u_long tcps_rttupdated; /* times we succeeded */
- u_long tcps_delack; /* delayed acks sent */
- u_long tcps_timeoutdrop; /* conn. dropped in rxmt timeout */
- u_long tcps_rexmttimeo; /* retransmit timeouts */
- u_long tcps_persisttimeo; /* persist timeouts */
- u_long tcps_keeptimeo; /* keepalive timeouts */
- u_long tcps_keepprobe; /* keepalive probes sent */
- u_long tcps_keepdrops; /* connections dropped in keepalive */
+ uint64_t tcps_connattempt; /* connections initiated */
+ uint64_t tcps_accepts; /* connections accepted */
+ uint64_t tcps_connects; /* connections established */
+ uint64_t tcps_drops; /* connections dropped */
+ uint64_t tcps_conndrops; /* embryonic connections dropped */
+ uint64_t tcps_minmssdrops; /* average minmss too low drops */
+ uint64_t tcps_closed; /* conn. closed (includes drops) */
+ uint64_t tcps_segstimed; /* segs where we tried to get rtt */
+ uint64_t tcps_rttupdated; /* times we succeeded */
+ uint64_t tcps_delack; /* delayed acks sent */
+ uint64_t tcps_timeoutdrop; /* conn. dropped in rxmt timeout */
+ uint64_t tcps_rexmttimeo; /* retransmit timeouts */
+ uint64_t tcps_persisttimeo; /* persist timeouts */
+ uint64_t tcps_keeptimeo; /* keepalive timeouts */
+ uint64_t tcps_keepprobe; /* keepalive probes sent */
+ uint64_t tcps_keepdrops; /* connections dropped in keepalive */
- u_long tcps_sndtotal; /* total packets sent */
- u_long tcps_sndpack; /* data packets sent */
- u_long tcps_sndbyte; /* data bytes sent */
- u_long tcps_sndrexmitpack; /* data packets retransmitted */
- u_long tcps_sndrexmitbyte; /* data bytes retransmitted */
- u_long tcps_sndrexmitbad; /* unnecessary packet retransmissions */
- u_long tcps_sndacks; /* ack-only packets sent */
- u_long tcps_sndprobe; /* window probes sent */
- u_long tcps_sndurg; /* packets sent with URG only */
- u_long tcps_sndwinup; /* window update-only packets sent */
- u_long tcps_sndctrl; /* control (SYN|FIN|RST) packets sent */
+ uint64_t tcps_sndtotal; /* total packets sent */
+ uint64_t tcps_sndpack; /* data packets sent */
+ uint64_t tcps_sndbyte; /* data bytes sent */
+ uint64_t tcps_sndrexmitpack; /* data packets retransmitted */
+ uint64_t tcps_sndrexmitbyte; /* data bytes retransmitted */
+ uint64_t tcps_sndrexmitbad; /* unnecessary packet retransmissions */
+ uint64_t tcps_sndacks; /* ack-only packets sent */
+ uint64_t tcps_sndprobe; /* window probes sent */
+ uint64_t tcps_sndurg; /* packets sent with URG only */
+ uint64_t tcps_sndwinup; /* window update-only packets sent */
+ uint64_t tcps_sndctrl; /* control (SYN|FIN|RST) packets sent */
- u_long tcps_rcvtotal; /* total packets received */
- u_long tcps_rcvpack; /* packets received in sequence */
- u_long tcps_rcvbyte; /* bytes received in sequence */
- u_long tcps_rcvbadsum; /* packets received with ccksum errs */
- u_long tcps_rcvbadoff; /* packets received with bad offset */
- u_long tcps_rcvmemdrop; /* packets dropped for lack of memory */
- u_long tcps_rcvshort; /* packets received too short */
- u_long tcps_rcvduppack; /* duplicate-only packets received */
- u_long tcps_rcvdupbyte; /* duplicate-only bytes received */
- u_long tcps_rcvpartduppack; /* packets with some duplicate data */
- u_long tcps_rcvpartdupbyte; /* dup. bytes in part-dup. packets */
- u_long tcps_rcvoopack; /* out-of-order packets received */
- u_long tcps_rcvoobyte; /* out-of-order bytes received */
- u_long tcps_rcvpackafterwin; /* packets with data after window */
- u_long tcps_rcvbyteafterwin; /* bytes rcvd after window */
- u_long tcps_rcvafterclose; /* packets rcvd after "close" */
- u_long tcps_rcvwinprobe; /* rcvd window probe packets */
- u_long tcps_rcvdupack; /* rcvd duplicate acks */
- u_long tcps_rcvacktoomuch; /* rcvd acks for unsent data */
- u_long tcps_rcvackpack; /* rcvd ack packets */
- u_long tcps_rcvackbyte; /* bytes acked by rcvd acks */
- u_long tcps_rcvwinupd; /* rcvd window update packets */
- u_long tcps_pawsdrop; /* segments dropped due to PAWS */
- u_long tcps_predack; /* times hdr predict ok for acks */
- u_long tcps_preddat; /* times hdr predict ok for data pkts */
- u_long tcps_pcbcachemiss;
- u_long tcps_cachedrtt; /* times cached RTT in route updated */
- u_long tcps_cachedrttvar; /* times cached rttvar updated */
- u_long tcps_cachedssthresh; /* times cached ssthresh updated */
- u_long tcps_usedrtt; /* times RTT initialized from route */
- u_long tcps_usedrttvar; /* times RTTVAR initialized from rt */
- u_long tcps_usedssthresh; /* times ssthresh initialized from rt*/
- u_long tcps_persistdrop; /* timeout in persist state */
- u_long tcps_badsyn; /* bogus SYN, e.g. premature ACK */
- u_long tcps_mturesent; /* resends due to MTU discovery */
- u_long tcps_listendrop; /* listen queue overflows */
- u_long tcps_badrst; /* ignored RSTs in the window */
+ uint64_t tcps_rcvtotal; /* total packets received */
+ uint64_t tcps_rcvpack; /* packets received in sequence */
+ uint64_t tcps_rcvbyte; /* bytes received in sequence */
+ uint64_t tcps_rcvbadsum; /* packets received with ccksum errs */
+ uint64_t tcps_rcvbadoff; /* packets received with bad offset */
+ uint64_t tcps_rcvmemdrop; /* packets dropped for lack of memory */
+ uint64_t tcps_rcvshort; /* packets received too short */
+ uint64_t tcps_rcvduppack; /* duplicate-only packets received */
+ uint64_t tcps_rcvdupbyte; /* duplicate-only bytes received */
+ uint64_t tcps_rcvpartduppack; /* packets with some duplicate data */
+ uint64_t tcps_rcvpartdupbyte; /* dup. bytes in part-dup. packets */
+ uint64_t tcps_rcvoopack; /* out-of-order packets received */
+ uint64_t tcps_rcvoobyte; /* out-of-order bytes received */
+ uint64_t tcps_rcvpackafterwin; /* packets with data after window */
+ uint64_t tcps_rcvbyteafterwin; /* bytes rcvd after window */
+ uint64_t tcps_rcvafterclose; /* packets rcvd after "close" */
+ uint64_t tcps_rcvwinprobe; /* rcvd window probe packets */
+ uint64_t tcps_rcvdupack; /* rcvd duplicate acks */
+ uint64_t tcps_rcvacktoomuch; /* rcvd acks for unsent data */
+ uint64_t tcps_rcvackpack; /* rcvd ack packets */
+ uint64_t tcps_rcvackbyte; /* bytes acked by rcvd acks */
+ uint64_t tcps_rcvwinupd; /* rcvd window update packets */
+ uint64_t tcps_pawsdrop; /* segments dropped due to PAWS */
+ uint64_t tcps_predack; /* times hdr predict ok for acks */
+ uint64_t tcps_preddat; /* times hdr predict ok for data pkts */
+ uint64_t tcps_pcbcachemiss;
+ uint64_t tcps_cachedrtt; /* times cached RTT in route updated */
+ uint64_t tcps_cachedrttvar; /* times cached rttvar updated */
+ uint64_t tcps_cachedssthresh; /* times cached ssthresh updated */
+ uint64_t tcps_usedrtt; /* times RTT initialized from route */
+ uint64_t tcps_usedrttvar; /* times RTTVAR initialized from rt */
+ uint64_t tcps_usedssthresh; /* times ssthresh initialized from rt*/
+ uint64_t tcps_persistdrop; /* timeout in persist state */
+ uint64_t tcps_badsyn; /* bogus SYN, e.g. premature ACK */
+ uint64_t tcps_mturesent; /* resends due to MTU discovery */
+ uint64_t tcps_listendrop; /* listen queue overflows */
+ uint64_t tcps_badrst; /* ignored RSTs in the window */
- u_long tcps_sc_added; /* entry added to syncache */
- u_long tcps_sc_retransmitted; /* syncache entry was retransmitted */
- u_long tcps_sc_dupsyn; /* duplicate SYN packet */
- u_long tcps_sc_dropped; /* could not reply to packet */
- u_long tcps_sc_completed; /* successful extraction of entry */
- u_long tcps_sc_bucketoverflow; /* syncache per-bucket limit hit */
- u_long tcps_sc_cacheoverflow; /* syncache cache limit hit */
- u_long tcps_sc_reset; /* RST removed entry from syncache */
- u_long tcps_sc_stale; /* timed out or listen socket gone */
- u_long tcps_sc_aborted; /* syncache entry aborted */
- u_long tcps_sc_badack; /* removed due to bad ACK */
- u_long tcps_sc_unreach; /* ICMP unreachable received */
- u_long tcps_sc_zonefail; /* zalloc() failed */
- u_long tcps_sc_sendcookie; /* SYN cookie sent */
- u_long tcps_sc_recvcookie; /* SYN cookie received */
+ uint64_t tcps_sc_added; /* entry added to syncache */
+ uint64_t tcps_sc_retransmitted; /* syncache entry was retransmitted */
+ uint64_t tcps_sc_dupsyn; /* duplicate SYN packet */
+ uint64_t tcps_sc_dropped; /* could not reply to packet */
+ uint64_t tcps_sc_completed; /* successful extraction of entry */
+ uint64_t tcps_sc_bucketoverflow;/* syncache per-bucket limit hit */
+ uint64_t tcps_sc_cacheoverflow; /* syncache cache limit hit */
+ uint64_t tcps_sc_reset; /* RST removed entry from syncache */
+ uint64_t tcps_sc_stale; /* timed out or listen socket gone */
+ uint64_t tcps_sc_aborted; /* syncache entry aborted */
+ uint64_t tcps_sc_badack; /* removed due to bad ACK */
+ uint64_t tcps_sc_unreach; /* ICMP unreachable received */
+ uint64_t tcps_sc_zonefail; /* zalloc() failed */
+ uint64_t tcps_sc_sendcookie; /* SYN cookie sent */
+ uint64_t tcps_sc_recvcookie; /* SYN cookie received */
- u_long tcps_hc_added; /* entry added to hostcache */
- u_long tcps_hc_bucketoverflow; /* hostcache per bucket limit hit */
+ uint64_t tcps_hc_added; /* entry added to hostcache */
+ uint64_t tcps_hc_bucketoverflow;/* hostcache per bucket limit hit */
- u_long tcps_finwait2_drops; /* Drop FIN_WAIT_2 connection after time limit */
+ uint64_t tcps_finwait2_drops; /* Drop FIN_WAIT_2 connection after time limit */
/* SACK related stats */
- u_long tcps_sack_recovery_episode; /* SACK recovery episodes */
- u_long tcps_sack_rexmits; /* SACK rexmit segments */
- u_long tcps_sack_rexmit_bytes; /* SACK rexmit bytes */
- u_long tcps_sack_rcv_blocks; /* SACK blocks (options) received */
- u_long tcps_sack_send_blocks; /* SACK blocks (options) sent */
- u_long tcps_sack_sboverflow; /* times scoreboard overflowed */
+ uint64_t tcps_sack_recovery_episode; /* SACK recovery episodes */
+ uint64_t tcps_sack_rexmits; /* SACK rexmit segments */
+ uint64_t tcps_sack_rexmit_bytes; /* SACK rexmit bytes */
+ uint64_t tcps_sack_rcv_blocks; /* SACK blocks (options) received */
+ uint64_t tcps_sack_send_blocks; /* SACK blocks (options) sent */
+ uint64_t tcps_sack_sboverflow; /* times scoreboard overflowed */
/* ECN related stats */
- u_long tcps_ecn_ce; /* ECN Congestion Experienced */
- u_long tcps_ecn_ect0; /* ECN Capable Transport */
- u_long tcps_ecn_ect1; /* ECN Capable Transport */
- u_long tcps_ecn_shs; /* ECN successful handshakes */
- u_long tcps_ecn_rcwnd; /* # times ECN reduced the cwnd */
+ uint64_t tcps_ecn_ce; /* ECN Congestion Experienced */
+ uint64_t tcps_ecn_ect0; /* ECN Capable Transport */
+ uint64_t tcps_ecn_ect1; /* ECN Capable Transport */
+ uint64_t tcps_ecn_shs; /* ECN successful handshakes */
+ uint64_t tcps_ecn_rcwnd; /* # times ECN reduced the cwnd */
/* TCP_SIGNATURE related stats */
- u_long tcps_sig_rcvgoodsig; /* Total matching signature received */
- u_long tcps_sig_rcvbadsig; /* Total bad signature received */
- u_long tcps_sig_err_buildsig; /* Mismatching signature received */
- u_long tcps_sig_err_sigopt; /* No signature expected by socket */
- u_long tcps_sig_err_nosigopt; /* No signature provided by segment */
+ uint64_t tcps_sig_rcvgoodsig; /* Total matching signature received */
+ uint64_t tcps_sig_rcvbadsig; /* Total bad signature received */
+ uint64_t tcps_sig_err_buildsig; /* Mismatching signature received */
+ uint64_t tcps_sig_err_sigopt; /* No signature expected by socket */
+ uint64_t tcps_sig_err_nosigopt; /* No signature provided by segment */
- u_long _pad[12]; /* 6 UTO, 6 TBD */
+ uint64_t _pad[12]; /* 6 UTO, 6 TBD */
};
#ifdef _KERNEL
+#include <sys/counter.h>
+
+VNET_PCPUSTAT_DECLARE(struct tcpstat, tcpstat); /* tcp statistics */
/*
* In-kernel consumers can use these accessor macros directly to update
* stats.
*/
-#define TCPSTAT_ADD(name, val) V_tcpstat.name += (val)
+#define TCPSTAT_ADD(name, val) \
+ VNET_PCPUSTAT_ADD(struct tcpstat, tcpstat, name, (val))
#define TCPSTAT_INC(name) TCPSTAT_ADD(name, 1)
/*
@@ -516,7 +565,7 @@
*/
void kmod_tcpstat_inc(int statnum);
#define KMOD_TCPSTAT_INC(name) \
- kmod_tcpstat_inc(offsetof(struct tcpstat, name) / sizeof(u_long))
+ kmod_tcpstat_inc(offsetof(struct tcpstat, name) / sizeof(uint64_t))
/*
* TCP specific helper hook point identifiers.
@@ -579,24 +628,6 @@
#define TCPCTL_MAXID 16
#define TCPCTL_FINWAIT2_TIMEOUT 17
-#define TCPCTL_NAMES { \
- { 0, 0 }, \
- { "rfc1323", CTLTYPE_INT }, \
- { "mssdflt", CTLTYPE_INT }, \
- { "stats", CTLTYPE_STRUCT }, \
- { "rttdflt", CTLTYPE_INT }, \
- { "keepidle", CTLTYPE_INT }, \
- { "keepintvl", CTLTYPE_INT }, \
- { "sendspace", CTLTYPE_INT }, \
- { "recvspace", CTLTYPE_INT }, \
- { "keepinit", CTLTYPE_INT }, \
- { "pcblist", CTLTYPE_STRUCT }, \
- { "delacktime", CTLTYPE_INT }, \
- { "v6mssdflt", CTLTYPE_INT }, \
- { "maxid", CTLTYPE_INT }, \
-}
-
-
#ifdef _KERNEL
#ifdef SYSCTL_DECL
SYSCTL_DECL(_net_inet_tcp);
@@ -606,27 +637,27 @@
VNET_DECLARE(struct inpcbhead, tcb); /* queue of active tcpcb's */
VNET_DECLARE(struct inpcbinfo, tcbinfo);
-VNET_DECLARE(struct tcpstat, tcpstat); /* tcp statistics */
extern int tcp_log_in_vain;
VNET_DECLARE(int, tcp_mssdflt); /* XXX */
VNET_DECLARE(int, tcp_minmss);
VNET_DECLARE(int, tcp_delack_enabled);
VNET_DECLARE(int, tcp_do_rfc3390);
+VNET_DECLARE(int, tcp_do_initcwnd10);
+VNET_DECLARE(int, tcp_sendspace);
+VNET_DECLARE(int, tcp_recvspace);
VNET_DECLARE(int, path_mtu_discovery);
-VNET_DECLARE(int, ss_fltsz);
-VNET_DECLARE(int, ss_fltsz_local);
VNET_DECLARE(int, tcp_do_rfc3465);
VNET_DECLARE(int, tcp_abc_l_var);
#define V_tcb VNET(tcb)
#define V_tcbinfo VNET(tcbinfo)
-#define V_tcpstat VNET(tcpstat)
#define V_tcp_mssdflt VNET(tcp_mssdflt)
#define V_tcp_minmss VNET(tcp_minmss)
#define V_tcp_delack_enabled VNET(tcp_delack_enabled)
#define V_tcp_do_rfc3390 VNET(tcp_do_rfc3390)
+#define V_tcp_do_initcwnd10 VNET(tcp_do_initcwnd10)
+#define V_tcp_sendspace VNET(tcp_sendspace)
+#define V_tcp_recvspace VNET(tcp_recvspace)
#define V_path_mtu_discovery VNET(path_mtu_discovery)
-#define V_ss_fltsz VNET(ss_fltsz)
-#define V_ss_fltsz_local VNET(ss_fltsz_local)
#define V_tcp_do_rfc3465 VNET(tcp_do_rfc3465)
#define V_tcp_abc_l_var VNET(tcp_abc_l_var)
@@ -643,6 +674,9 @@
VNET_DECLARE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST + 1]);
#define V_tcp_hhh VNET(tcp_hhh)
+VNET_DECLARE(int, tcp_do_rfc6675_pipe);
+#define V_tcp_do_rfc6675_pipe VNET(tcp_do_rfc6675_pipe)
+
int tcp_addoptions(struct tcpopt *, u_char *);
int tcp_ccalgounload(struct cc_algo *unload_algo);
struct tcpcb *
@@ -652,7 +686,7 @@
#if 0
int tcp_twrecycleable(struct tcptw *tw);
#endif
-void tcp_twclose(struct tcptw *_tw, int _reuse);
+void tcp_twclose(struct tcptw *, int);
void tcp_ctlinput(int, struct sockaddr *, void *);
int tcp_ctloutput(struct socket *, struct sockopt *);
struct tcpcb *
@@ -668,16 +702,15 @@
char *tcp_log_vain(struct in_conninfo *, struct tcphdr *, void *,
const void *);
int tcp_reass(struct tcpcb *, struct tcphdr *, int *, struct mbuf *);
-void tcp_reass_init(void);
+void tcp_reass_global_init(void);
void tcp_reass_flush(struct tcpcb *);
-#ifdef VIMAGE
-void tcp_reass_destroy(void);
-#endif
void tcp_input(struct mbuf *, int);
-u_long tcp_maxmtu(struct in_conninfo *, int *);
-u_long tcp_maxmtu6(struct in_conninfo *, int *);
+int tcp_autorcvbuf(struct mbuf *, struct tcphdr *, struct socket *,
+ struct tcpcb *, int);
+u_long tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *);
+u_long tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *);
void tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *,
- int *);
+ struct tcp_ifcap *);
void tcp_mss(struct tcpcb *, int);
int tcp_mssopt(struct in_conninfo *);
struct inpcb *
@@ -687,6 +720,7 @@
struct tcpcb *
tcp_newtcpcb(struct inpcb *);
int tcp_output(struct tcpcb *);
+void tcp_state_change(struct tcpcb *, int);
void tcp_respond(struct tcpcb *, void *,
struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int);
void tcp_tw_init(void);
@@ -696,7 +730,6 @@
void tcp_tw_zone_change(void);
int tcp_twcheck(struct inpcb *, struct tcpopt *, struct tcphdr *,
struct mbuf *, int);
-int tcp_twrespond(struct tcptw *, int);
void tcp_setpersist(struct tcpcb *);
#ifdef TCP_SIGNATURE
int tcp_signature_compute(struct mbuf *, int, int, int, u_char *, u_int);
@@ -707,8 +740,9 @@
struct tcptemp *
tcpip_maketemplate(struct inpcb *);
void tcpip_fillheaders(struct inpcb *, void *, void *);
-void tcp_timer_activate(struct tcpcb *, int, u_int);
-int tcp_timer_active(struct tcpcb *, int);
+void tcp_timer_activate(struct tcpcb *, uint32_t, u_int);
+int tcp_timer_active(struct tcpcb *, uint32_t);
+void tcp_timer_stop(struct tcpcb *, uint32_t);
void tcp_trace(short, short, struct tcpcb *, void *, struct tcphdr *, int);
/*
* All tcp_hc_* functions are IPv4 and IPv6 (via in_conninfo)
@@ -723,11 +757,9 @@
void tcp_hc_update(struct in_conninfo *, struct hc_metrics_lite *);
extern struct pr_usrreqs tcp_usrreqs;
-extern u_long tcp_sendspace;
-extern u_long tcp_recvspace;
tcp_seq tcp_new_isn(struct tcpcb *);
-void tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
+int tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend);
void tcp_clean_sackreport(struct tcpcb *tp);
void tcp_sack_adjust(struct tcpcb *tp);
@@ -735,10 +767,31 @@
void tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
void tcp_free_sackholes(struct tcpcb *tp);
int tcp_newreno(struct tcpcb *, struct tcphdr *);
-u_long tcp_seq_subtract(u_long, u_long );
+int tcp_compute_pipe(struct tcpcb *);
void cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type);
+static inline void
+tcp_fields_to_host(struct tcphdr *th)
+{
+
+ th->th_seq = ntohl(th->th_seq);
+ th->th_ack = ntohl(th->th_ack);
+ th->th_win = ntohs(th->th_win);
+ th->th_urp = ntohs(th->th_urp);
+}
+
+#ifdef TCP_SIGNATURE
+static inline void
+tcp_fields_to_net(struct tcphdr *th)
+{
+
+ th->th_seq = htonl(th->th_seq);
+ th->th_ack = htonl(th->th_ack);
+ th->th_win = htons(th->th_win);
+ th->th_urp = htons(th->th_urp);
+}
+#endif
#endif /* _KERNEL */
#endif /* _NETINET_TCP_VAR_H_ */
Modified: trunk/sys/netinet/tcpip.h
===================================================================
--- trunk/sys/netinet/tcpip.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/tcpip.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* @(#)tcpip.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet/tcpip.h 139823 2005-01-07 01:45:51Z imp $
+ * $FreeBSD: stable/10/sys/netinet/tcpip.h 139823 2005-01-07 01:45:51Z imp $
*/
#ifndef _NETINET_TCPIP_H_
Added: trunk/sys/netinet/toecore.c
===================================================================
--- trunk/sys/netinet/toecore.c (rev 0)
+++ trunk/sys/netinet/toecore.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -0,0 +1,659 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2012 Chelsio Communications, Inc.
+ * All rights reserved.
+ * Written by: Navdeep Parhar <np at FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/netinet/toecore.c 329982 2018-02-25 11:29:55Z hselasky $");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/types.h>
+#include <sys/sockopt.h>
+#include <sys/sysctl.h>
+#include <sys/socket.h>
+
+#if defined(KLD_MODULE) || defined(INET) || defined(INET6)
+
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/if_vlan_var.h>
+#include <net/if_llatbl.h>
+#include <net/route.h>
+
+#include <netinet/if_ether.h>
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/in6_pcb.h>
+#include <netinet6/nd6.h>
+#define TCPSTATES
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp_syncache.h>
+#include <netinet/tcp_offload.h>
+#include <netinet/toecore.h>
+
+static struct mtx toedev_lock;
+static TAILQ_HEAD(, toedev) toedev_list;
+static eventhandler_tag listen_start_eh;
+static eventhandler_tag listen_stop_eh;
+static eventhandler_tag lle_event_eh;
+static eventhandler_tag route_redirect_eh;
+
+static int
+toedev_connect(struct toedev *tod __unused, struct socket *so __unused,
+ struct rtentry *rt __unused, struct sockaddr *nam __unused)
+{
+
+ return (ENOTSUP);
+}
+
+static int
+toedev_listen_start(struct toedev *tod __unused, struct tcpcb *tp __unused)
+{
+
+ return (ENOTSUP);
+}
+
+static int
+toedev_listen_stop(struct toedev *tod __unused, struct tcpcb *tp __unused)
+{
+
+ return (ENOTSUP);
+}
+
+static void
+toedev_input(struct toedev *tod __unused, struct tcpcb *tp __unused,
+ struct mbuf *m)
+{
+
+ m_freem(m);
+ return;
+}
+
+static void
+toedev_rcvd(struct toedev *tod __unused, struct tcpcb *tp __unused)
+{
+
+ return;
+}
+
+static int
+toedev_output(struct toedev *tod __unused, struct tcpcb *tp __unused)
+{
+
+ return (ENOTSUP);
+}
+
+static void
+toedev_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp __unused)
+{
+
+ return;
+}
+
+static void
+toedev_l2_update(struct toedev *tod __unused, struct ifnet *ifp __unused,
+ struct sockaddr *sa __unused, uint8_t *lladdr __unused,
+ uint16_t vtag __unused)
+{
+
+ return;
+}
+
+static void
+toedev_route_redirect(struct toedev *tod __unused, struct ifnet *ifp __unused,
+ struct rtentry *rt0 __unused, struct rtentry *rt1 __unused)
+{
+
+ return;
+}
+
+static void
+toedev_syncache_added(struct toedev *tod __unused, void *ctx __unused)
+{
+
+ return;
+}
+
+static void
+toedev_syncache_removed(struct toedev *tod __unused, void *ctx __unused)
+{
+
+ return;
+}
+
+static int
+toedev_syncache_respond(struct toedev *tod __unused, void *ctx __unused,
+ struct mbuf *m)
+{
+
+ m_freem(m);
+ return (0);
+}
+
+static void
+toedev_offload_socket(struct toedev *tod __unused, void *ctx __unused,
+ struct socket *so __unused)
+{
+
+ return;
+}
+
+static void
+toedev_ctloutput(struct toedev *tod __unused, struct tcpcb *tp __unused,
+ int sopt_dir __unused, int sopt_name __unused)
+{
+
+ return;
+}
+
+/*
+ * Inform one or more TOE devices about a listening socket.
+ */
+static void
+toe_listen_start(struct inpcb *inp, void *arg)
+{
+ struct toedev *t, *tod;
+ struct tcpcb *tp;
+
+ INP_WLOCK_ASSERT(inp);
+ KASSERT(inp->inp_pcbinfo == &V_tcbinfo,
+ ("%s: inp is not a TCP inp", __func__));
+
+ if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))
+ return;
+
+ tp = intotcpcb(inp);
+ if (tp->t_state != TCPS_LISTEN)
+ return;
+
+ t = arg;
+ mtx_lock(&toedev_lock);
+ TAILQ_FOREACH(tod, &toedev_list, link) {
+ if (t == NULL || t == tod)
+ tod->tod_listen_start(tod, tp);
+ }
+ mtx_unlock(&toedev_lock);
+}
+
+static void
+toe_listen_start_event(void *arg __unused, struct tcpcb *tp)
+{
+ struct inpcb *inp = tp->t_inpcb;
+
+ INP_WLOCK_ASSERT(inp);
+ KASSERT(tp->t_state == TCPS_LISTEN,
+ ("%s: t_state %s", __func__, tcpstates[tp->t_state]));
+
+ toe_listen_start(inp, NULL);
+}
+
+static void
+toe_listen_stop_event(void *arg __unused, struct tcpcb *tp)
+{
+ struct toedev *tod;
+#ifdef INVARIANTS
+ struct inpcb *inp = tp->t_inpcb;
+#endif
+
+ INP_WLOCK_ASSERT(inp);
+ KASSERT(tp->t_state == TCPS_LISTEN,
+ ("%s: t_state %s", __func__, tcpstates[tp->t_state]));
+
+ mtx_lock(&toedev_lock);
+ TAILQ_FOREACH(tod, &toedev_list, link)
+ tod->tod_listen_stop(tod, tp);
+ mtx_unlock(&toedev_lock);
+}
+
+/*
+ * Fill up a freshly allocated toedev struct with reasonable defaults.
+ */
+void
+init_toedev(struct toedev *tod)
+{
+
+ tod->tod_softc = NULL;
+
+ /*
+ * Provide no-op defaults so that the kernel can call any toedev
+ * function without having to check whether the TOE driver supplied one
+ * or not.
+ */
+ tod->tod_connect = toedev_connect;
+ tod->tod_listen_start = toedev_listen_start;
+ tod->tod_listen_stop = toedev_listen_stop;
+ tod->tod_input = toedev_input;
+ tod->tod_rcvd = toedev_rcvd;
+ tod->tod_output = toedev_output;
+ tod->tod_send_rst = toedev_output;
+ tod->tod_send_fin = toedev_output;
+ tod->tod_pcb_detach = toedev_pcb_detach;
+ tod->tod_l2_update = toedev_l2_update;
+ tod->tod_route_redirect = toedev_route_redirect;
+ tod->tod_syncache_added = toedev_syncache_added;
+ tod->tod_syncache_removed = toedev_syncache_removed;
+ tod->tod_syncache_respond = toedev_syncache_respond;
+ tod->tod_offload_socket = toedev_offload_socket;
+ tod->tod_ctloutput = toedev_ctloutput;
+}
+
+/*
+ * Register an active TOE device with the system. This allows it to receive
+ * notifications from the kernel.
+ */
+int
+register_toedev(struct toedev *tod)
+{
+ struct toedev *t;
+
+ mtx_lock(&toedev_lock);
+ TAILQ_FOREACH(t, &toedev_list, link) {
+ if (t == tod) {
+ mtx_unlock(&toedev_lock);
+ return (EEXIST);
+ }
+ }
+
+ TAILQ_INSERT_TAIL(&toedev_list, tod, link);
+ registered_toedevs++;
+ mtx_unlock(&toedev_lock);
+
+ inp_apply_all(toe_listen_start, tod);
+
+ return (0);
+}
+
+/*
+ * Remove the TOE device from the global list of active TOE devices. It is the
+ * caller's responsibility to ensure that the TOE device is quiesced prior to
+ * this call.
+ */
+int
+unregister_toedev(struct toedev *tod)
+{
+ struct toedev *t, *t2;
+ int rc = ENODEV;
+
+ mtx_lock(&toedev_lock);
+ TAILQ_FOREACH_SAFE(t, &toedev_list, link, t2) {
+ if (t == tod) {
+ TAILQ_REMOVE(&toedev_list, tod, link);
+ registered_toedevs--;
+ rc = 0;
+ break;
+ }
+ }
+ KASSERT(registered_toedevs >= 0,
+ ("%s: registered_toedevs (%d) < 0", __func__, registered_toedevs));
+ mtx_unlock(&toedev_lock);
+ return (rc);
+}
+
+void
+toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
+ struct inpcb *inp, void *tod, void *todctx)
+{
+ struct socket *lso = inp->inp_socket;
+
+ INP_WLOCK_ASSERT(inp);
+
+ syncache_add(inc, to, th, inp, &lso, NULL, tod, todctx);
+}
+
+int
+toe_syncache_expand(struct in_conninfo *inc, struct tcpopt *to,
+ struct tcphdr *th, struct socket **lsop)
+{
+
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+
+ return (syncache_expand(inc, to, th, lsop, NULL));
+}
+
+/*
+ * General purpose check to see if a 4-tuple is in use by the kernel. If a TCP
+ * header (presumably for an incoming SYN) is also provided, an existing 4-tuple
+ * in TIME_WAIT may be assassinated freeing it up for re-use.
+ *
+ * Note that the TCP header must have been run through tcp_fields_to_host() or
+ * equivalent.
+ */
+int
+toe_4tuple_check(struct in_conninfo *inc, struct tcphdr *th, struct ifnet *ifp)
+{
+ struct inpcb *inp;
+
+ if (inc->inc_flags & INC_ISIPV6) {
+#if defined(KLD_MODULE) || defined(INET6)
+ inp = in6_pcblookup(&V_tcbinfo, &inc->inc6_faddr,
+ inc->inc_fport, &inc->inc6_laddr, inc->inc_lport,
+ INPLOOKUP_WLOCKPCB, ifp);
+#else
+ inp = NULL;
+#endif
+ } else {
+#if defined(KLD_MODULE) || defined(INET)
+ inp = in_pcblookup(&V_tcbinfo, inc->inc_faddr, inc->inc_fport,
+ inc->inc_laddr, inc->inc_lport, INPLOOKUP_WLOCKPCB, ifp);
+#else
+ inp = NULL;
+#endif
+ }
+ if (inp != NULL) {
+ INP_WLOCK_ASSERT(inp);
+
+ if ((inp->inp_flags & INP_TIMEWAIT) && th != NULL) {
+
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* for twcheck */
+ if (!tcp_twcheck(inp, NULL, th, NULL, 0))
+ return (EADDRINUSE);
+ } else {
+ INP_WUNLOCK(inp);
+ return (EADDRINUSE);
+ }
+ }
+
+ return (0);
+}
+
+static void
+toe_lle_event(void *arg __unused, struct llentry *lle, int evt)
+{
+ struct toedev *tod;
+ struct ifnet *ifp;
+ struct sockaddr *sa;
+ uint8_t *lladdr;
+ uint16_t vtag;
+
+ LLE_WLOCK_ASSERT(lle);
+
+ ifp = lle->lle_tbl->llt_ifp;
+ sa = L3_ADDR(lle);
+
+ KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6,
+ ("%s: lle_event %d for lle %p but sa %p !INET && !INET6",
+ __func__, evt, lle, sa));
+
+ /*
+ * Not interested if the interface's TOE capability is not enabled.
+ */
+ if ((sa->sa_family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4)) ||
+ (sa->sa_family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6)))
+ return;
+
+ tod = TOEDEV(ifp);
+ if (tod == NULL)
+ return;
+
+ vtag = 0xfff;
+ if (evt != LLENTRY_RESOLVED) {
+
+ /*
+ * LLENTRY_TIMEDOUT, LLENTRY_DELETED, LLENTRY_EXPIRED all mean
+ * this entry is going to be deleted.
+ */
+
+ lladdr = NULL;
+ } else {
+
+ KASSERT(lle->la_flags & LLE_VALID,
+ ("%s: %p resolved but not valid?", __func__, lle));
+
+ lladdr = (uint8_t *)&lle->ll_addr;
+#ifdef VLAN_TAG
+ VLAN_TAG(ifp, &vtag);
+#endif
+ }
+
+ tod->tod_l2_update(tod, ifp, sa, lladdr, vtag);
+}
+
+/*
+ * XXX: implement.
+ */
+static void
+toe_route_redirect_event(void *arg __unused, struct rtentry *rt0,
+ struct rtentry *rt1, struct sockaddr *sa)
+{
+
+ return;
+}
+
+#ifdef INET6
+/*
+ * XXX: no checks to verify that sa is really a neighbor because we assume it is
+ * the result of a route lookup and is on-link on the given ifp.
+ */
+static int
+toe_nd6_resolve(struct ifnet *ifp, struct sockaddr *sa, uint8_t *lladdr)
+{
+ struct llentry *lle;
+ struct sockaddr_in6 *sin6 = (void *)sa;
+ int rc, flags = 0;
+
+restart:
+ IF_AFDATA_RLOCK(ifp);
+ lle = lla_lookup(LLTABLE6(ifp), flags, sa);
+ IF_AFDATA_RUNLOCK(ifp);
+ if (lle == NULL) {
+ IF_AFDATA_LOCK(ifp);
+ lle = nd6_lookup(&sin6->sin6_addr, ND6_CREATE | ND6_EXCLUSIVE,
+ ifp);
+ IF_AFDATA_UNLOCK(ifp);
+ if (lle == NULL)
+ return (ENOMEM); /* Couldn't create entry in cache. */
+ lle->ln_state = ND6_LLINFO_INCOMPLETE;
+ nd6_llinfo_settimer_locked(lle,
+ (long)ND_IFINFO(ifp)->retrans * hz / 1000);
+ LLE_WUNLOCK(lle);
+
+ nd6_ns_output(ifp, NULL, &sin6->sin6_addr, NULL, 0);
+
+ return (EWOULDBLOCK);
+ }
+
+ if (lle->ln_state == ND6_LLINFO_STALE) {
+ if ((flags & LLE_EXCLUSIVE) == 0) {
+ LLE_RUNLOCK(lle);
+ flags |= LLE_EXCLUSIVE;
+ goto restart;
+ }
+
+ LLE_WLOCK_ASSERT(lle);
+
+ lle->la_asked = 0;
+ lle->ln_state = ND6_LLINFO_DELAY;
+ nd6_llinfo_settimer_locked(lle, (long)V_nd6_delay * hz);
+ }
+
+ if (lle->la_flags & LLE_VALID) {
+ memcpy(lladdr, &lle->ll_addr, ifp->if_addrlen);
+ rc = 0;
+ } else
+ rc = EWOULDBLOCK;
+
+ if (flags & LLE_EXCLUSIVE)
+ LLE_WUNLOCK(lle);
+ else
+ LLE_RUNLOCK(lle);
+
+ return (rc);
+}
+#endif
+
+/*
+ * Returns 0 or EWOULDBLOCK on success (any other value is an error). 0 means
+ * lladdr and vtag are valid on return, EWOULDBLOCK means the TOE driver's
+ * tod_l2_update will be called later, when the entry is resolved or times out.
+ */
+int
+toe_l2_resolve(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
+ uint8_t *lladdr, uint16_t *vtag)
+{
+#ifdef INET
+ struct llentry *lle;
+#endif
+ int rc;
+
+ switch (sa->sa_family) {
+#ifdef INET
+ case AF_INET:
+ rc = arpresolve(ifp, NULL, NULL, sa, lladdr, &lle);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ rc = toe_nd6_resolve(ifp, sa, lladdr);
+ break;
+#endif
+ default:
+ return (EPROTONOSUPPORT);
+ }
+
+ if (rc == 0) {
+#ifdef VLAN_TAG
+ if (VLAN_TAG(ifp, vtag) != 0)
+#endif
+ *vtag = 0xfff;
+ }
+
+ return (rc);
+}
+
+void
+toe_connect_failed(struct toedev *tod, struct inpcb *inp, int err)
+{
+
+ INP_WLOCK_ASSERT(inp);
+
+ if (!(inp->inp_flags & INP_DROPPED)) {
+ struct tcpcb *tp = intotcpcb(inp);
+
+ KASSERT(tp->t_flags & TF_TOE,
+ ("%s: tp %p not offloaded.", __func__, tp));
+
+ if (err == EAGAIN) {
+
+ /*
+ * Temporary failure during offload, take this PCB back.
+ * Detach from the TOE driver and do the rest of what
+ * TCP's pru_connect would have done if the connection
+ * wasn't offloaded.
+ */
+
+ tod->tod_pcb_detach(tod, tp);
+ KASSERT(!(tp->t_flags & TF_TOE),
+ ("%s: tp %p still offloaded.", __func__, tp));
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
+ (void) tcp_output(tp);
+ } else {
+
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ tp = tcp_drop(tp, err);
+ if (tp == NULL)
+ INP_WLOCK(inp); /* re-acquire */
+ }
+ }
+ INP_WLOCK_ASSERT(inp);
+}
+
+static int
+toecore_load(void)
+{
+
+ mtx_init(&toedev_lock, "toedev lock", NULL, MTX_DEF);
+ TAILQ_INIT(&toedev_list);
+
+ listen_start_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_start,
+ toe_listen_start_event, NULL, EVENTHANDLER_PRI_ANY);
+ listen_stop_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_stop,
+ toe_listen_stop_event, NULL, EVENTHANDLER_PRI_ANY);
+ lle_event_eh = EVENTHANDLER_REGISTER(lle_event, toe_lle_event, NULL,
+ EVENTHANDLER_PRI_ANY);
+ route_redirect_eh = EVENTHANDLER_REGISTER(route_redirect_event,
+ toe_route_redirect_event, NULL, EVENTHANDLER_PRI_ANY);
+
+ return (0);
+}
+
+static int
+toecore_unload(void)
+{
+
+ mtx_lock(&toedev_lock);
+ if (!TAILQ_EMPTY(&toedev_list)) {
+ mtx_unlock(&toedev_lock);
+ return (EBUSY);
+ }
+
+ EVENTHANDLER_DEREGISTER(tcp_offload_listen_start, listen_start_eh);
+ EVENTHANDLER_DEREGISTER(tcp_offload_listen_stop, listen_stop_eh);
+ EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh);
+ EVENTHANDLER_DEREGISTER(route_redirect_event, route_redirect_eh);
+
+ mtx_unlock(&toedev_lock);
+ mtx_destroy(&toedev_lock);
+
+ return (0);
+}
+
+static int
+toecore_mod_handler(module_t mod, int cmd, void *arg)
+{
+
+ if (cmd == MOD_LOAD)
+ return (toecore_load());
+
+ if (cmd == MOD_UNLOAD)
+ return (toecore_unload());
+
+ return (EOPNOTSUPP);
+}
+
+static moduledata_t mod_data= {
+ "toecore",
+ toecore_mod_handler,
+ 0
+};
+
+DECLARE_MODULE(toecore, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
+#endif /* defined(KLD_MODULE) || defined(INET) || defined(INET6) */
+
+MODULE_VERSION(toecore, 1);
+
Property changes on: trunk/sys/netinet/toecore.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/netinet/toecore.h
===================================================================
--- trunk/sys/netinet/toecore.h (rev 0)
+++ trunk/sys/netinet/toecore.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -0,0 +1,131 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2012 Chelsio Communications, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/netinet/toecore.h 239511 2012-08-21 18:09:33Z np $
+ */
+
+#ifndef _NETINET_TOE_H_
+#define _NETINET_TOE_H_
+
+#ifndef _KERNEL
+#error "no user-serviceable parts inside"
+#endif
+
+struct tcpopt;
+struct tcphdr;
+struct in_conninfo;
+
+struct toedev {
+ TAILQ_ENTRY(toedev) link; /* glue for toedev_list */
+ void *tod_softc; /* TOE driver private data */
+
+ /*
+ * Active open. If a failure occurs, it is reported back by the driver
+ * via toe_connect_failed.
+ */
+ int (*tod_connect)(struct toedev *, struct socket *, struct rtentry *,
+ struct sockaddr *);
+
+ /* Passive open. */
+ int (*tod_listen_start)(struct toedev *, struct tcpcb *);
+ int (*tod_listen_stop)(struct toedev *, struct tcpcb *);
+
+ /*
+ * The kernel uses this routine to pass on any frame it receives for an
+ * offloaded connection to the TOE driver. This is an unusual event.
+ */
+ void (*tod_input)(struct toedev *, struct tcpcb *, struct mbuf *);
+
+ /*
+ * This is called by the kernel during pru_rcvd for an offloaded TCP
+ * connection and provides an opportunity for the TOE driver to manage
+ * its rx window and credits.
+ */
+ void (*tod_rcvd)(struct toedev *, struct tcpcb *);
+
+ /*
+ * Transmit routine. The kernel calls this to have the TOE driver
+ * evaluate whether there is data to be transmitted, and transmit it.
+ */
+ int (*tod_output)(struct toedev *, struct tcpcb *);
+
+ /* Immediate teardown: send RST to peer. */
+ int (*tod_send_rst)(struct toedev *, struct tcpcb *);
+
+ /* Initiate orderly disconnect by sending FIN to the peer. */
+ int (*tod_send_fin)(struct toedev *, struct tcpcb *);
+
+ /* Called to indicate that the kernel is done with this TCP PCB. */
+ void (*tod_pcb_detach)(struct toedev *, struct tcpcb *);
+
+ /*
+ * The kernel calls this once it has information about an L2 entry that
+ * the TOE driver enquired about previously (via toe_l2_resolve).
+ */
+ void (*tod_l2_update)(struct toedev *, struct ifnet *,
+ struct sockaddr *, uint8_t *, uint16_t);
+
+ /* XXX. Route has been redirected. */
+ void (*tod_route_redirect)(struct toedev *, struct ifnet *,
+ struct rtentry *, struct rtentry *);
+
+ /* Syncache interaction. */
+ void (*tod_syncache_added)(struct toedev *, void *);
+ void (*tod_syncache_removed)(struct toedev *, void *);
+ int (*tod_syncache_respond)(struct toedev *, void *, struct mbuf *);
+ void (*tod_offload_socket)(struct toedev *, void *, struct socket *);
+
+ /* TCP socket option */
+ void (*tod_ctloutput)(struct toedev *, struct tcpcb *, int, int);
+};
+
+#include <sys/eventhandler.h>
+typedef void (*tcp_offload_listen_start_fn)(void *, struct tcpcb *);
+typedef void (*tcp_offload_listen_stop_fn)(void *, struct tcpcb *);
+EVENTHANDLER_DECLARE(tcp_offload_listen_start, tcp_offload_listen_start_fn);
+EVENTHANDLER_DECLARE(tcp_offload_listen_stop, tcp_offload_listen_stop_fn);
+
+void init_toedev(struct toedev *);
+int register_toedev(struct toedev *);
+int unregister_toedev(struct toedev *);
+
+/*
+ * General interface for looking up L2 information for an IP address. If an
+ * answer is not available right away then the TOE driver's tod_l2_update will
+ * be called later.
+ */
+int toe_l2_resolve(struct toedev *, struct ifnet *, struct sockaddr *,
+ uint8_t *, uint16_t *);
+
+void toe_connect_failed(struct toedev *, struct inpcb *, int);
+
+void toe_syncache_add(struct in_conninfo *, struct tcpopt *, struct tcphdr *,
+ struct inpcb *, void *, void *);
+int toe_syncache_expand(struct in_conninfo *, struct tcpopt *, struct tcphdr *,
+ struct socket **);
+
+int toe_4tuple_check(struct in_conninfo *, struct tcphdr *, struct ifnet *);
+#endif
Property changes on: trunk/sys/netinet/toecore.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/netinet/udp.h
===================================================================
--- trunk/sys/netinet/udp.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/udp.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -29,7 +29,7 @@
* SUCH DAMAGE.
*
* @(#)udp.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet/udp.h 247525 2013-03-01 03:04:57Z jhb $
+ * $FreeBSD: stable/10/sys/netinet/udp.h 246210 2013-02-01 15:32:20Z jhb $
*/
#ifndef _NETINET_UDP_H_
Modified: trunk/sys/netinet/udp_usrreq.c
===================================================================
--- trunk/sys/netinet/udp_usrreq.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/udp_usrreq.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -4,6 +4,7 @@
* The Regents of the University of California.
* Copyright (c) 2008 Robert N. M. Watson
* Copyright (c) 2010-2011 Juniper Networks, Inc.
+ * Copyright (c) 2014 Kevin Lo
* All rights reserved.
*
* Portions of this software were developed by Robert N. M. Watson under
@@ -37,12 +38,13 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet/udp_usrreq.c 243586 2012-11-27 01:59:51Z ae $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet/udp_usrreq.c 313558 2017-02-10 16:11:11Z vangyzen $");
#include "opt_ipfw.h"
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
+#include "opt_kdtrace.h"
#include <sys/param.h>
#include <sys/domain.h>
@@ -55,6 +57,7 @@
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
+#include <sys/sdt.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
@@ -69,6 +72,7 @@
#include <net/route.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <netinet/in_pcb.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
@@ -85,6 +89,7 @@
#endif
#include <netinet/udp.h>
#include <netinet/udp_var.h>
+#include <netinet/udplite.h>
#ifdef IPSEC
#include <netipsec/ipsec.h>
@@ -96,8 +101,9 @@
#include <security/mac/mac_framework.h>
/*
- * UDP protocol implementation.
+ * UDP and UDP-Lite protocols implementation.
* Per RFC 768, August, 1980.
+ * Per RFC 3828, July, 2004.
*/
/*
@@ -137,6 +143,8 @@
VNET_DEFINE(struct inpcbhead, udb); /* from udp_var.h */
VNET_DEFINE(struct inpcbinfo, udbinfo);
+VNET_DEFINE(struct inpcbhead, ulitecb);
+VNET_DEFINE(struct inpcbinfo, ulitecbinfo);
static VNET_DEFINE(uma_zone_t, udpcb_zone);
#define V_udpcb_zone VNET(udpcb_zone)
@@ -144,11 +152,14 @@
#define UDBHASHSIZE 128
#endif
-VNET_DEFINE(struct udpstat, udpstat); /* from udp_var.h */
-SYSCTL_VNET_STRUCT(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RW,
- &VNET_NAME(udpstat), udpstat,
- "UDP statistics (struct udpstat, netinet/udp_var.h)");
+VNET_PCPUSTAT_DEFINE(struct udpstat, udpstat); /* from udp_var.h */
+VNET_PCPUSTAT_SYSINIT(udpstat);
+SYSCTL_VNET_PCPUSTAT(_net_inet_udp, UDPCTL_STATS, stats, struct udpstat,
+ udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)");
+#ifdef VIMAGE
+VNET_PCPUSTAT_SYSUNINIT(udpstat);
+#endif /* VIMAGE */
#ifdef INET
static void udp_detach(struct socket *so);
static int udp_output(struct inpcb *, struct mbuf *, struct sockaddr *,
@@ -182,6 +193,16 @@
return (0);
}
+static int
+udplite_inpcb_init(void *mem, int size, int flags)
+{
+ struct inpcb *inp;
+
+ inp = mem;
+ INP_LOCK_INIT(inp, "inp", "udpliteinp");
+ return (0);
+}
+
void
udp_init(void)
{
@@ -192,10 +213,20 @@
V_udpcb_zone = uma_zcreate("udpcb", sizeof(struct udpcb),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
uma_zone_set_max(V_udpcb_zone, maxsockets);
+ uma_zone_set_warning(V_udpcb_zone, "kern.ipc.maxsockets limit reached");
EVENTHANDLER_REGISTER(maxsockets_change, udp_zone_change, NULL,
EVENTHANDLER_PRI_ANY);
}
+void
+udplite_init(void)
+{
+
+ in_pcbinfo_init(&V_ulitecbinfo, "udplite", &V_ulitecb, UDBHASHSIZE,
+ UDBHASHSIZE, "udplite_inpcb", udplite_inpcb_init, NULL,
+ UMA_ZONE_NOFREE, IPI_HASHFIELDS_2TUPLE);
+}
+
/*
* Kernel module interface for updating udpstat. The argument is an index
* into udpstat treated as an array of u_long. While this encodes the
@@ -207,7 +238,7 @@
kmod_udpstat_inc(int statnum)
{
- (*((u_long *)&V_udpstat + statnum))++;
+ counter_u64_add(VNET(udpstat)[statnum], 1);
}
int
@@ -237,6 +268,13 @@
in_pcbinfo_destroy(&V_udbinfo);
uma_zdestroy(V_udpcb_zone);
}
+
+void
+udplite_destroy(void)
+{
+
+ in_pcbinfo_destroy(&V_ulitecbinfo);
+}
#endif
#ifdef INET
@@ -266,13 +304,11 @@
*/
up = intoudpcb(inp);
if (up->u_tun_func != NULL) {
- (*up->u_tun_func)(n, off, inp);
+ (*up->u_tun_func)(n, off, inp, (struct sockaddr *)udp_in,
+ up->u_tun_ctx);
return;
}
- if (n == NULL)
- return;
-
off += sizeof(struct udphdr);
#ifdef IPSEC
@@ -279,7 +315,7 @@
/* Check AH/ESP integrity. */
if (ipsec4_in_reject(n, inp)) {
m_freem(n);
- V_ipsec4stat.in_polvio++;
+ IPSECSTAT_INC(ips_in_polvio);
return;
}
#ifdef IPSEC_NAT_T
@@ -339,10 +375,13 @@
struct udphdr *uh;
struct ifnet *ifp;
struct inpcb *inp;
- int len;
+ uint16_t len, ip_len;
+ struct inpcbinfo *pcbinfo;
struct ip save_ip;
struct sockaddr_in udp_in;
struct m_tag *fwd_tag;
+ int cscov_partial;
+ uint8_t pr;
ifp = m->m_pkthdr.rcvif;
UDPSTAT_INC(udps_ipackets);
@@ -353,7 +392,7 @@
* check the checksum with options still present.
*/
if (iphlen > sizeof (struct ip)) {
- ip_stripoptions(m, (struct mbuf *)0);
+ ip_stripoptions(m);
iphlen = sizeof(struct ip);
}
@@ -362,7 +401,7 @@
*/
ip = mtod(m, struct ip *);
if (m->m_len < iphlen + sizeof(struct udphdr)) {
- if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == 0) {
+ if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == NULL) {
UDPSTAT_INC(udps_hdrops);
return;
}
@@ -369,6 +408,8 @@
ip = mtod(m, struct ip *);
}
uh = (struct udphdr *)((caddr_t)ip + iphlen);
+ pr = ip->ip_p;
+ cscov_partial = (pr == IPPROTO_UDPLITE) ? 1 : 0;
/*
* Destination port of 0 is illegal, based on RFC768.
@@ -391,13 +432,20 @@
* reflect UDP length, drop.
*/
len = ntohs((u_short)uh->uh_ulen);
- if (ip->ip_len != len) {
- if (len > ip->ip_len || len < sizeof(struct udphdr)) {
+ ip_len = ntohs(ip->ip_len) - iphlen;
+ if (pr == IPPROTO_UDPLITE && (len == 0 || len == ip_len)) {
+ /* Zero means checksum over the complete packet. */
+ if (len == 0)
+ len = ip_len;
+ cscov_partial = 0;
+ }
+ if (ip_len != len) {
+ if (len > ip_len || len < sizeof(struct udphdr)) {
UDPSTAT_INC(udps_badlen);
goto badunlocked;
}
- m_adj(m, len - ip->ip_len);
- /* ip->ip_len = len; */
+ if (pr == IPPROTO_UDP)
+ m_adj(m, len - ip_len);
}
/*
@@ -415,13 +463,14 @@
if (uh->uh_sum) {
u_short uh_sum;
- if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
+ if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID) &&
+ !cscov_partial) {
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
uh_sum = m->m_pkthdr.csum_data;
else
uh_sum = in_pseudo(ip->ip_src.s_addr,
ip->ip_dst.s_addr, htonl((u_short)len +
- m->m_pkthdr.csum_data + IPPROTO_UDP));
+ m->m_pkthdr.csum_data + pr));
uh_sum ^= 0xffff;
} else {
char b[9];
@@ -428,7 +477,8 @@
bcopy(((struct ipovly *)ip)->ih_x1, b, 9);
bzero(((struct ipovly *)ip)->ih_x1, 9);
- ((struct ipovly *)ip)->ih_len = uh->uh_ulen;
+ ((struct ipovly *)ip)->ih_len = (pr == IPPROTO_UDP) ?
+ uh->uh_ulen : htons(ip_len);
uh_sum = in_cksum(m, len + sizeof (struct ip));
bcopy(b, ((struct ipovly *)ip)->ih_x1, 9);
}
@@ -437,17 +487,28 @@
m_freem(m);
return;
}
- } else
- UDPSTAT_INC(udps_nosum);
+ } else {
+ if (pr == IPPROTO_UDP) {
+ UDPSTAT_INC(udps_nosum);
+ } else {
+ /* UDPLite requires a checksum */
+ /* XXX: What is the right UDPLite MIB counter here? */
+ m_freem(m);
+ return;
+ }
+ }
+ pcbinfo = get_inpcbinfo(pr);
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
in_broadcast(ip->ip_dst, ifp)) {
struct inpcb *last;
+ struct inpcbhead *pcblist;
struct ip_moptions *imo;
- INP_INFO_RLOCK(&V_udbinfo);
+ INP_INFO_RLOCK(pcbinfo);
+ pcblist = get_pcblist(pr);
last = NULL;
- LIST_FOREACH(inp, &V_udb, inp_list) {
+ LIST_FOREACH(inp, pcblist, inp_list) {
if (inp->inp_lport != uh->uh_dport)
continue;
#ifdef INET6
@@ -506,8 +567,12 @@
if (last != NULL) {
struct mbuf *n;
- n = m_copy(m, 0, M_COPYALL);
- udp_append(last, ip, n, iphlen, &udp_in);
+ if ((n = m_copy(m, 0, M_COPYALL)) != NULL) {
+ UDP_PROBE(receive, NULL, last, ip,
+ last, uh);
+ udp_append(last, ip, n, iphlen,
+ &udp_in);
+ }
INP_RUNLOCK(last);
}
last = inp;
@@ -533,12 +598,13 @@
UDPSTAT_INC(udps_noportbcast);
if (inp)
INP_RUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_udbinfo);
+ INP_INFO_RUNLOCK(pcbinfo);
goto badunlocked;
}
+ UDP_PROBE(receive, NULL, last, ip, last, uh);
udp_append(last, ip, m, iphlen, &udp_in);
INP_RUNLOCK(last);
- INP_INFO_RUNLOCK(&V_udbinfo);
+ INP_INFO_RUNLOCK(pcbinfo);
return;
}
@@ -559,7 +625,7 @@
* Transparently forwarded. Pretend to be the destination.
* Already got one like this?
*/
- inp = in_pcblookup_mbuf(&V_udbinfo, ip->ip_src, uh->uh_sport,
+ inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport,
ip->ip_dst, uh->uh_dport, INPLOOKUP_RLOCKPCB, ifp, m);
if (!inp) {
/*
@@ -567,7 +633,7 @@
* Because we've rewritten the destination address,
* any hardware-generated hash is ignored.
*/
- inp = in_pcblookup(&V_udbinfo, ip->ip_src,
+ inp = in_pcblookup(pcbinfo, ip->ip_src,
uh->uh_sport, next_hop->sin_addr,
next_hop->sin_port ? htons(next_hop->sin_port) :
uh->uh_dport, INPLOOKUP_WILDCARD |
@@ -577,18 +643,18 @@
m_tag_delete(m, fwd_tag);
m->m_flags &= ~M_IP_NEXTHOP;
} else
- inp = in_pcblookup_mbuf(&V_udbinfo, ip->ip_src, uh->uh_sport,
+ inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport,
ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD |
INPLOOKUP_RLOCKPCB, ifp, m);
if (inp == NULL) {
if (udp_log_in_vain) {
- char buf[4*sizeof "123"];
+ char src[INET_ADDRSTRLEN];
+ char dst[INET_ADDRSTRLEN];
- strcpy(buf, inet_ntoa(ip->ip_dst));
log(LOG_INFO,
"Connection attempt to UDP %s:%d from %s:%d\n",
- buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src),
- ntohs(uh->uh_sport));
+ inet_ntoa_r(ip->ip_dst, dst), ntohs(uh->uh_dport),
+ inet_ntoa_r(ip->ip_src, src), ntohs(uh->uh_sport));
}
UDPSTAT_INC(udps_noport);
if (m->m_flags & (M_BCAST | M_MCAST)) {
@@ -600,7 +666,6 @@
if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0)
goto badunlocked;
*ip = save_ip;
- ip->ip_len += iphlen;
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
return;
}
@@ -614,6 +679,18 @@
m_freem(m);
return;
}
+ if (cscov_partial) {
+ struct udpcb *up;
+
+ up = intoudpcb(inp);
+ if (up->u_rxcslen == 0 || up->u_rxcslen > len) {
+ INP_RUNLOCK(inp);
+ m_freem(m);
+ return;
+ }
+ }
+
+ UDP_PROBE(receive, NULL, inp, ip, inp, uh);
udp_append(inp, ip, m, iphlen, &udp_in);
INP_RUNLOCK(inp);
return;
@@ -646,8 +723,9 @@
}
#ifdef INET
-void
-udp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
+static void
+udp_common_ctlinput(int cmd, struct sockaddr *sa, void *vip,
+ struct inpcbinfo *pcbinfo)
{
struct ip *ip = vip;
struct udphdr *uh;
@@ -676,7 +754,7 @@
return;
if (ip != NULL) {
uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
- inp = in_pcblookup(&V_udbinfo, faddr, uh->uh_dport,
+ inp = in_pcblookup(pcbinfo, faddr, uh->uh_dport,
ip->ip_src, uh->uh_sport, INPLOOKUP_RLOCKPCB, NULL);
if (inp != NULL) {
INP_RLOCK_ASSERT(inp);
@@ -686,9 +764,22 @@
INP_RUNLOCK(inp);
}
} else
- in_pcbnotifyall(&V_udbinfo, faddr, inetctlerrmap[cmd],
+ in_pcbnotifyall(pcbinfo, faddr, inetctlerrmap[cmd],
udp_notify);
}
+void
+udp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
+{
+
+ return (udp_common_ctlinput(cmd, sa, vip, &V_udbinfo));
+}
+
+void
+udplite_ctlinput(int cmd, struct sockaddr *sa, void *vip)
+{
+
+ return (udp_common_ctlinput(cmd, sa, vip, &V_ulitecbinfo));
+}
#endif /* INET */
static int
@@ -844,16 +935,16 @@
int
udp_ctloutput(struct socket *so, struct sockopt *sopt)
{
- int error = 0, optval;
struct inpcb *inp;
-#ifdef IPSEC_NAT_T
struct udpcb *up;
-#endif
+ int isudplite, error, optval;
+ error = 0;
+ isudplite = (so->so_proto->pr_protocol == IPPROTO_UDPLITE) ? 1 : 0;
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
INP_WLOCK(inp);
- if (sopt->sopt_level != IPPROTO_UDP) {
+ if (sopt->sopt_level != so->so_proto->pr_protocol) {
#ifdef INET6
if (INP_CHECK_SOCKAF(so, AF_INET6)) {
INP_WUNLOCK(inp);
@@ -911,6 +1002,34 @@
}
INP_WUNLOCK(inp);
break;
+ case UDPLITE_SEND_CSCOV:
+ case UDPLITE_RECV_CSCOV:
+ if (!isudplite) {
+ INP_WUNLOCK(inp);
+ error = ENOPROTOOPT;
+ break;
+ }
+ INP_WUNLOCK(inp);
+ error = sooptcopyin(sopt, &optval, sizeof(optval),
+ sizeof(optval));
+ if (error != 0)
+ break;
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
+ INP_WLOCK(inp);
+ up = intoudpcb(inp);
+ KASSERT(up != NULL, ("%s: up == NULL", __func__));
+ if ((optval != 0 && optval < 8) || (optval > 65535)) {
+ INP_WUNLOCK(inp);
+ error = EINVAL;
+ break;
+ }
+ if (sopt->sopt_name == UDPLITE_SEND_CSCOV)
+ up->u_txcslen = optval;
+ else
+ up->u_rxcslen = optval;
+ INP_WUNLOCK(inp);
+ break;
default:
INP_WUNLOCK(inp);
error = ENOPROTOOPT;
@@ -928,6 +1047,22 @@
error = sooptcopyout(sopt, &optval, sizeof optval);
break;
#endif
+ case UDPLITE_SEND_CSCOV:
+ case UDPLITE_RECV_CSCOV:
+ if (!isudplite) {
+ INP_WUNLOCK(inp);
+ error = ENOPROTOOPT;
+ break;
+ }
+ up = intoudpcb(inp);
+ KASSERT(up != NULL, ("%s: up == NULL", __func__));
+ if (sopt->sopt_name == UDPLITE_SEND_CSCOV)
+ optval = up->u_txcslen;
+ else
+ optval = up->u_rxcslen;
+ INP_WUNLOCK(inp);
+ error = sooptcopyout(sopt, &optval, sizeof(optval));
+ break;
default:
INP_WUNLOCK(inp);
error = ENOPROTOOPT;
@@ -950,12 +1085,16 @@
int len = m->m_pkthdr.len;
struct in_addr faddr, laddr;
struct cmsghdr *cm;
+ struct inpcbinfo *pcbinfo;
struct sockaddr_in *sin, src;
+ int cscov_partial = 0;
int error = 0;
int ipflags;
u_short fport, lport;
int unlock_udbinfo;
u_char tos;
+ uint8_t pr;
+ uint16_t cscov = 0;
/*
* udp_output() may need to temporarily bind or connect the current
@@ -1050,12 +1189,14 @@
*
* XXXRW: Check that hash locking update here is correct.
*/
+ pr = inp->inp_socket->so_proto->pr_protocol;
+ pcbinfo = get_inpcbinfo(pr);
sin = (struct sockaddr_in *)addr;
if (sin != NULL &&
(inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
INP_RUNLOCK(inp);
INP_WLOCK(inp);
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
unlock_udbinfo = UH_WLOCKED;
} else if ((sin != NULL && (
(sin->sin_addr.s_addr == INADDR_ANY) ||
@@ -1063,7 +1204,7 @@
(inp->inp_laddr.s_addr == INADDR_ANY) ||
(inp->inp_lport == 0))) ||
(src.sin_family == AF_INET)) {
- INP_HASH_RLOCK(&V_udbinfo);
+ INP_HASH_RLOCK(pcbinfo);
unlock_udbinfo = UH_RLOCKED;
} else
unlock_udbinfo = UH_UNLOCKED;
@@ -1076,7 +1217,7 @@
laddr = inp->inp_laddr;
lport = inp->inp_lport;
if (src.sin_family == AF_INET) {
- INP_HASH_LOCK_ASSERT(&V_udbinfo);
+ INP_HASH_LOCK_ASSERT(pcbinfo);
if ((lport == 0) ||
(laddr.s_addr == INADDR_ANY &&
src.sin_addr.s_addr == INADDR_ANY)) {
@@ -1127,7 +1268,7 @@
inp->inp_lport == 0 ||
sin->sin_addr.s_addr == INADDR_ANY ||
sin->sin_addr.s_addr == INADDR_BROADCAST) {
- INP_HASH_LOCK_ASSERT(&V_udbinfo);
+ INP_HASH_LOCK_ASSERT(pcbinfo);
error = in_pcbconnect_setup(inp, addr, &laddr.s_addr,
&lport, &faddr.s_addr, &fport, NULL,
td->td_ucred);
@@ -1142,7 +1283,7 @@
if (inp->inp_laddr.s_addr == INADDR_ANY &&
inp->inp_lport == 0) {
INP_WLOCK_ASSERT(inp);
- INP_HASH_WLOCK_ASSERT(&V_udbinfo);
+ INP_HASH_WLOCK_ASSERT(pcbinfo);
/*
* Remember addr if jailed, to prevent
* rebinding.
@@ -1176,7 +1317,7 @@
* link-layer headers. Immediate slide the data pointer back forward
* since we won't use that space at this layer.
*/
- M_PREPEND(m, sizeof(struct udpiphdr) + max_linkhdr, M_DONTWAIT);
+ M_PREPEND(m, sizeof(struct udpiphdr) + max_linkhdr, M_NOWAIT);
if (m == NULL) {
error = ENOBUFS;
goto release;
@@ -1191,13 +1332,31 @@
*/
ui = mtod(m, struct udpiphdr *);
bzero(ui->ui_x1, sizeof(ui->ui_x1)); /* XXX still needed? */
- ui->ui_pr = IPPROTO_UDP;
+ ui->ui_pr = pr;
ui->ui_src = laddr;
ui->ui_dst = faddr;
ui->ui_sport = lport;
ui->ui_dport = fport;
ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr));
+ if (pr == IPPROTO_UDPLITE) {
+ struct udpcb *up;
+ uint16_t plen;
+ up = intoudpcb(inp);
+ cscov = up->u_txcslen;
+ plen = (u_short)len + sizeof(struct udphdr);
+ if (cscov >= plen)
+ cscov = 0;
+ ui->ui_len = htons(plen);
+ ui->ui_ulen = htons(cscov);
+ /*
+ * For UDP-Lite, checksum coverage length of zero means
+ * the entire UDPLite packet is covered by the checksum.
+ */
+ cscov_partial = (cscov == 0) ? 0 : 1;
+ } else
+ ui->ui_v = IPVERSION << 4;
+
/*
* Set the Don't Fragment bit in the IP header.
*/
@@ -1205,7 +1364,7 @@
struct ip *ip;
ip = (struct ip *)&ui->ui_i;
- ip->ip_off |= IP_DF;
+ ip->ip_off |= htons(IP_DF);
}
ipflags = 0;
@@ -1223,24 +1382,35 @@
/*
* Set up checksum and output datagram.
*/
- if (V_udp_cksum) {
+ ui->ui_sum = 0;
+ if (pr == IPPROTO_UDPLITE) {
if (inp->inp_flags & INP_ONESBCAST)
faddr.s_addr = INADDR_BROADCAST;
+ if (cscov_partial) {
+ if ((ui->ui_sum = in_cksum(m, sizeof(struct ip) + cscov)) == 0)
+ ui->ui_sum = 0xffff;
+ } else {
+ if ((ui->ui_sum = in_cksum(m, sizeof(struct udpiphdr) + len)) == 0)
+ ui->ui_sum = 0xffff;
+ }
+ } else if (V_udp_cksum) {
+ if (inp->inp_flags & INP_ONESBCAST)
+ faddr.s_addr = INADDR_BROADCAST;
ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr,
- htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP));
+ htons((u_short)len + sizeof(struct udphdr) + pr));
m->m_pkthdr.csum_flags = CSUM_UDP;
m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
- } else
- ui->ui_sum = 0;
- ((struct ip *)ui)->ip_len = sizeof (struct udpiphdr) + len;
+ }
+ ((struct ip *)ui)->ip_len = htons(sizeof(struct udpiphdr) + len);
((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */
((struct ip *)ui)->ip_tos = tos; /* XXX */
UDPSTAT_INC(udps_opackets);
if (unlock_udbinfo == UH_WLOCKED)
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
else if (unlock_udbinfo == UH_RLOCKED)
- INP_HASH_RUNLOCK(&V_udbinfo);
+ INP_HASH_RUNLOCK(pcbinfo);
+ UDP_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
error = ip_output(m, inp->inp_options, NULL, ipflags,
inp->inp_moptions, inp);
if (unlock_udbinfo == UH_WLOCKED)
@@ -1251,10 +1421,10 @@
release:
if (unlock_udbinfo == UH_WLOCKED) {
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
INP_WUNLOCK(inp);
} else if (unlock_udbinfo == UH_RLOCKED) {
- INP_HASH_RUNLOCK(&V_udbinfo);
+ INP_HASH_RUNLOCK(pcbinfo);
INP_RUNLOCK(inp);
} else
INP_RUNLOCK(inp);
@@ -1292,7 +1462,7 @@
if (minlen > m->m_pkthdr.len)
minlen = m->m_pkthdr.len;
if ((m = m_pullup(m, minlen)) == NULL) {
- V_ipsec4stat.in_inval++;
+ IPSECSTAT_INC(ips_in_inval);
return (NULL); /* Bypass caller processing. */
}
data = mtod(m, caddr_t); /* Points to ip header. */
@@ -1332,7 +1502,7 @@
uint32_t spi;
if (payload <= sizeof(struct esp)) {
- V_ipsec4stat.in_inval++;
+ IPSECSTAT_INC(ips_in_inval);
m_freem(m);
return (NULL); /* Discard. */
}
@@ -1353,7 +1523,7 @@
tag = m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS,
2 * sizeof(uint16_t), M_NOWAIT);
if (tag == NULL) {
- V_ipsec4stat.in_nomem++;
+ IPSECSTAT_INC(ips_in_nomem);
m_freem(m);
return (NULL); /* Discard. */
}
@@ -1382,7 +1552,7 @@
m_adj(m, skip);
ip = mtod(m, struct ip *);
- ip->ip_len -= skip;
+ ip->ip_len = htons(ntohs(ip->ip_len) - skip);
ip->ip_p = IPPROTO_ESP;
/*
@@ -1401,15 +1571,17 @@
udp_abort(struct socket *so)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
+ pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp_abort: inp == NULL"));
INP_WLOCK(inp);
if (inp->inp_faddr.s_addr != INADDR_ANY) {
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
in_pcbdisconnect(inp);
inp->inp_laddr.s_addr = INADDR_ANY;
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
soisdisconnected(so);
}
INP_WUNLOCK(inp);
@@ -1419,17 +1591,19 @@
udp_attach(struct socket *so, int proto, struct thread *td)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
int error;
+ pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp == NULL, ("udp_attach: inp != NULL"));
error = soreserve(so, udp_sendspace, udp_recvspace);
if (error)
return (error);
- INP_INFO_WLOCK(&V_udbinfo);
- error = in_pcballoc(so, &V_udbinfo);
+ INP_INFO_WLOCK(pcbinfo);
+ error = in_pcballoc(so, pcbinfo);
if (error) {
- INP_INFO_WUNLOCK(&V_udbinfo);
+ INP_INFO_WUNLOCK(pcbinfo);
return (error);
}
@@ -1441,18 +1615,18 @@
if (error) {
in_pcbdetach(inp);
in_pcbfree(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
+ INP_INFO_WUNLOCK(pcbinfo);
return (error);
}
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
+ INP_INFO_WUNLOCK(pcbinfo);
return (0);
}
#endif /* INET */
int
-udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f)
+udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f, void *ctx)
{
struct inpcb *inp;
struct udpcb *up;
@@ -1468,6 +1642,7 @@
return (EBUSY);
}
up->u_tun_func = f;
+ up->u_tun_ctx = ctx;
INP_WUNLOCK(inp);
return (0);
}
@@ -1477,14 +1652,16 @@
udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
int error;
+ pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp_bind: inp == NULL"));
INP_WLOCK(inp);
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
error = in_pcbbind(inp, nam, td->td_ucred);
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
INP_WUNLOCK(inp);
return (error);
}
@@ -1493,15 +1670,17 @@
udp_close(struct socket *so)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
+ pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp_close: inp == NULL"));
INP_WLOCK(inp);
if (inp->inp_faddr.s_addr != INADDR_ANY) {
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
in_pcbdisconnect(inp);
inp->inp_laddr.s_addr = INADDR_ANY;
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
soisdisconnected(so);
}
INP_WUNLOCK(inp);
@@ -1511,9 +1690,11 @@
udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
+ struct sockaddr_in *sin;
int error;
- struct sockaddr_in *sin;
+ pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp_connect: inp == NULL"));
INP_WLOCK(inp);
@@ -1527,9 +1708,9 @@
INP_WUNLOCK(inp);
return (error);
}
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
error = in_pcbconnect(inp, nam, td->td_ucred);
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
if (error == 0)
soisconnected(so);
INP_WUNLOCK(inp);
@@ -1540,13 +1721,15 @@
udp_detach(struct socket *so)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
struct udpcb *up;
+ pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp_detach: inp == NULL"));
KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
("udp_detach: not disconnected"));
- INP_INFO_WLOCK(&V_udbinfo);
+ INP_INFO_WLOCK(pcbinfo);
INP_WLOCK(inp);
up = intoudpcb(inp);
KASSERT(up != NULL, ("%s: up == NULL", __func__));
@@ -1553,7 +1736,7 @@
inp->inp_ppcb = NULL;
in_pcbdetach(inp);
in_pcbfree(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
+ INP_INFO_WUNLOCK(pcbinfo);
udp_discardcb(up);
}
@@ -1561,7 +1744,9 @@
udp_disconnect(struct socket *so)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
+ pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp_disconnect: inp == NULL"));
INP_WLOCK(inp);
@@ -1569,10 +1754,10 @@
INP_WUNLOCK(inp);
return (ENOTCONN);
}
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
in_pcbdisconnect(inp);
inp->inp_laddr.s_addr = INADDR_ANY;
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
SOCK_LOCK(so);
so->so_state &= ~SS_ISCONNECTED; /* XXX */
SOCK_UNLOCK(so);
Modified: trunk/sys/netinet/udp_var.h
===================================================================
--- trunk/sys/netinet/udp_var.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet/udp_var.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -29,7 +29,7 @@
* SUCH DAMAGE.
*
* @(#)udp_var.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet/udp_var.h 234780 2012-04-29 08:50:50Z bz $
+ * $FreeBSD: stable/10/sys/netinet/udp_var.h 278812 2015-02-15 20:53:50Z marius $
*/
#ifndef _NETINET_UDP_VAR_H_
@@ -43,6 +43,7 @@
struct udphdr ui_u; /* udp header */
};
#define ui_x1 ui_i.ih_x1
+#define ui_v ui_i.ih_x1[0]
#define ui_pr ui_i.ih_pr
#define ui_len ui_i.ih_len
#define ui_src ui_i.ih_src
@@ -52,8 +53,12 @@
#define ui_ulen ui_u.uh_ulen
#define ui_sum ui_u.uh_sum
-typedef void(*udp_tun_func_t)(struct mbuf *, int off, struct inpcb *);
+struct inpcb;
+struct mbuf;
+typedef void(*udp_tun_func_t)(struct mbuf *, int off, struct inpcb *,
+ const struct sockaddr *, void *);
+
/*
* UDP control block; one per udp.
*/
@@ -60,6 +65,9 @@
struct udpcb {
udp_tun_func_t u_tun_func; /* UDP kernel tunneling callback. */
u_int u_flags; /* Generic UDP flags. */
+ uint16_t u_rxcslen; /* Coverage for incoming datagrams. */
+ uint16_t u_txcslen; /* Coverage for outgoing datagrams. */
+ void *u_tun_ctx; /* Tunneling callback context. */
};
#define intoudpcb(ip) ((struct udpcb *)(ip)->inp_ppcb)
@@ -73,30 +81,34 @@
struct udpstat {
/* input statistics: */
- u_long udps_ipackets; /* total input packets */
- u_long udps_hdrops; /* packet shorter than header */
- u_long udps_badsum; /* checksum error */
- u_long udps_nosum; /* no checksum */
- u_long udps_badlen; /* data length larger than packet */
- u_long udps_noport; /* no socket on port */
- u_long udps_noportbcast; /* of above, arrived as broadcast */
- u_long udps_fullsock; /* not delivered, input socket full */
- u_long udpps_pcbcachemiss; /* input packets missing pcb cache */
- u_long udpps_pcbhashmiss; /* input packets not for hashed pcb */
+ uint64_t udps_ipackets; /* total input packets */
+ uint64_t udps_hdrops; /* packet shorter than header */
+ uint64_t udps_badsum; /* checksum error */
+ uint64_t udps_nosum; /* no checksum */
+ uint64_t udps_badlen; /* data length larger than packet */
+ uint64_t udps_noport; /* no socket on port */
+ uint64_t udps_noportbcast; /* of above, arrived as broadcast */
+ uint64_t udps_fullsock; /* not delivered, input socket full */
+ uint64_t udpps_pcbcachemiss; /* input packets missing pcb cache */
+ uint64_t udpps_pcbhashmiss; /* input packets not for hashed pcb */
/* output statistics: */
- u_long udps_opackets; /* total output packets */
- u_long udps_fastout; /* output packets on fast path */
+ uint64_t udps_opackets; /* total output packets */
+ uint64_t udps_fastout; /* output packets on fast path */
/* of no socket on port, arrived as multicast */
- u_long udps_noportmcast;
- u_long udps_filtermcast; /* blocked by multicast filter */
+ uint64_t udps_noportmcast;
+ uint64_t udps_filtermcast; /* blocked by multicast filter */
};
#ifdef _KERNEL
+#include <sys/counter.h>
+
+VNET_PCPUSTAT_DECLARE(struct udpstat, udpstat);
/*
* In-kernel consumers can use these accessor macros directly to update
* stats.
*/
-#define UDPSTAT_ADD(name, val) V_udpstat.name += (val)
+#define UDPSTAT_ADD(name, val) \
+ VNET_PCPUSTAT_ADD(struct udpstat, udpstat, name, (val))
#define UDPSTAT_INC(name) UDPSTAT_ADD(name, 1)
/*
@@ -103,8 +115,8 @@
* Kernel module consumers must use this accessor macro.
*/
void kmod_udpstat_inc(int statnum);
-#define KMOD_UDPSTAT_INC(name) \
- kmod_udpstat_inc(offsetof(struct udpstat, name) / sizeof(u_long))
+#define KMOD_UDPSTAT_INC(name) \
+ kmod_udpstat_inc(offsetof(struct udpstat, name) / sizeof(uint64_t))
#endif
/*
@@ -117,48 +129,60 @@
#define UDPCTL_PCBLIST 5 /* list of PCBs for UDP sockets */
#define UDPCTL_MAXID 6
-#define UDPCTL_NAMES { \
- { 0, 0 }, \
- { "checksum", CTLTYPE_INT }, \
- { "stats", CTLTYPE_STRUCT }, \
- { "maxdgram", CTLTYPE_INT }, \
- { "recvspace", CTLTYPE_INT }, \
- { "pcblist", CTLTYPE_STRUCT }, \
-}
-
#ifdef _KERNEL
+#include <netinet/in_pcb.h>
SYSCTL_DECL(_net_inet_udp);
extern struct pr_usrreqs udp_usrreqs;
VNET_DECLARE(struct inpcbhead, udb);
VNET_DECLARE(struct inpcbinfo, udbinfo);
+VNET_DECLARE(struct inpcbhead, ulitecb);
+VNET_DECLARE(struct inpcbinfo, ulitecbinfo);
#define V_udb VNET(udb)
#define V_udbinfo VNET(udbinfo)
+#define V_ulitecb VNET(ulitecb)
+#define V_ulitecbinfo VNET(ulitecbinfo)
extern u_long udp_sendspace;
extern u_long udp_recvspace;
VNET_DECLARE(int, udp_cksum);
-VNET_DECLARE(struct udpstat, udpstat);
VNET_DECLARE(int, udp_blackhole);
#define V_udp_cksum VNET(udp_cksum)
-#define V_udpstat VNET(udpstat)
#define V_udp_blackhole VNET(udp_blackhole)
extern int udp_log_in_vain;
-int udp_newudpcb(struct inpcb *);
-void udp_discardcb(struct udpcb *);
+static __inline struct inpcbinfo *
+get_inpcbinfo(uint8_t protocol)
+{
+ return (protocol == IPPROTO_UDP) ? &V_udbinfo : &V_ulitecbinfo;
+}
-void udp_ctlinput(int, struct sockaddr *, void *);
-int udp_ctloutput(struct socket *, struct sockopt *);
-void udp_init(void);
+static __inline struct inpcbhead *
+get_pcblist(uint8_t protocol)
+{
+ return (protocol == IPPROTO_UDP) ? &V_udb : &V_ulitecb;
+}
+
+int udp_newudpcb(struct inpcb *);
+void udp_discardcb(struct udpcb *);
+
+void udp_ctlinput(int, struct sockaddr *, void *);
+void udplite_ctlinput(int, struct sockaddr *, void *);
+int udp_ctloutput(struct socket *, struct sockopt *);
+void udp_init(void);
+void udplite_init(void);
#ifdef VIMAGE
-void udp_destroy(void);
+void udp_destroy(void);
+void udplite_destroy(void);
#endif
-void udp_input(struct mbuf *, int);
+void udp_input(struct mbuf *, int);
+void udplite_input(struct mbuf *, int);
struct inpcb *udp_notify(struct inpcb *inp, int errno);
-int udp_shutdown(struct socket *so);
+int udp_shutdown(struct socket *so);
-int udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f);
-#endif
+int udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f,
+ void *ctx);
-#endif
+#endif /* _KERNEL */
+
+#endif /* _NETINET_UDP_VAR_H_ */
Added: trunk/sys/netinet/udplite.h
===================================================================
--- trunk/sys/netinet/udplite.h (rev 0)
+++ trunk/sys/netinet/udplite.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -0,0 +1,39 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2014, Kevin Lo
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: stable/10/sys/netinet/udplite.h 265946 2014-05-13 06:05:53Z kevlo $
+ */
+
+#ifndef _NETINET_UDPLITE_H_
+#define _NETINET_UDPLITE_H_
+
+/*
+ * User-settable options (used with setsockopt).
+ */
+#define UDPLITE_SEND_CSCOV 2 /* Sender checksum coverage. */
+#define UDPLITE_RECV_CSCOV 4 /* Receiver checksum coverage. */
+
+#endif /* !_NETINET_UDPLITE_H_ */
Property changes on: trunk/sys/netinet/udplite.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/netinet6/dest6.c
===================================================================
--- trunk/sys/netinet6/dest6.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/dest6.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/dest6.c 196019 2009-08-01 19:26:27Z rwatson $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/dest6.c 249294 2013-04-09 07:11:22Z ae $");
#include "opt_inet.h"
#include "opt_inet6.h"
Modified: trunk/sys/netinet6/frag6.c
===================================================================
--- trunk/sys/netinet6/frag6.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/frag6.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/frag6.c 238479 2012-07-15 11:27:15Z bz $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/frag6.c 329158 2018-02-12 13:52:58Z ae $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -223,9 +223,8 @@
offset += sizeof(struct ip6_frag);
/*
- * XXX-BZ RFC XXXX (draft-gont-6man-ipv6-atomic-fragments)
- * Handle "atomic" fragments (offset and m bit set to 0) upfront,
- * unrelated to any reassembly. Just skip the fragment header.
+ * RFC 6946: Handle "atomic" fragments (offset and m bit set to 0)
+ * upfront, unrelated to any reassembly. Just skip the fragment header.
*/
if ((ip6f->ip6f_offlg & ~IP6F_RESERVED_MASK) == 0) {
/* XXX-BZ we want dedicated counters for this. */
@@ -535,6 +534,11 @@
af6 = ip6af->ip6af_down;
frag6_deq(ip6af);
while (af6 != (struct ip6asfrag *)q6) {
+ m->m_pkthdr.csum_flags &=
+ IP6_REASS_MBUF(af6)->m_pkthdr.csum_flags;
+ m->m_pkthdr.csum_data +=
+ IP6_REASS_MBUF(af6)->m_pkthdr.csum_data;
+
af6dwn = af6->ip6af_down;
frag6_deq(af6);
while (t->m_next)
@@ -545,6 +549,10 @@
af6 = af6dwn;
}
+ while (m->m_pkthdr.csum_data & 0xffff0000)
+ m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
+ (m->m_pkthdr.csum_data >> 16);
+
/* adjust offset to point where the original next header starts */
offset = ip6af->ip6af_offset - sizeof(struct ip6_frag);
free(ip6af, M_FTABLE);
@@ -557,36 +565,23 @@
*q6->ip6q_nxtp = (u_char)(nxt & 0xff);
#endif
- /* Delete frag6 header */
- if (m->m_len >= offset + sizeof(struct ip6_frag)) {
- /* This is the only possible case with !PULLDOWN_TEST */
- ovbcopy((caddr_t)ip6, (caddr_t)ip6 + sizeof(struct ip6_frag),
- offset);
- m->m_data += sizeof(struct ip6_frag);
- m->m_len -= sizeof(struct ip6_frag);
- } else {
- /* this comes with no copy if the boundary is on cluster */
- if ((t = m_split(m, offset, M_DONTWAIT)) == NULL) {
- frag6_remque(q6);
- V_frag6_nfrags -= q6->ip6q_nfrag;
+ if (ip6_deletefraghdr(m, offset, M_NOWAIT) != 0) {
+ frag6_remque(q6);
+ V_frag6_nfrags -= q6->ip6q_nfrag;
#ifdef MAC
- mac_ip6q_destroy(q6);
+ mac_ip6q_destroy(q6);
#endif
- free(q6, M_FTABLE);
- V_frag6_nfragpackets--;
- goto dropfrag;
- }
- m_adj(t, sizeof(struct ip6_frag));
- m_cat(m, t);
+ free(q6, M_FTABLE);
+ V_frag6_nfragpackets--;
+
+ goto dropfrag;
}
/*
* Store NXT to the original.
*/
- {
- char *prvnxtp = ip6_get_prevhdr(m, offset); /* XXX */
- *prvnxtp = nxt;
- }
+ m_copyback(m, ip6_get_prevhdr(m, offset), sizeof(uint8_t),
+ (caddr_t)&nxt);
frag6_remque(q6);
V_frag6_nfrags -= q6->ip6q_nfrag;
@@ -791,3 +786,27 @@
IP6Q_UNLOCK();
VNET_LIST_RUNLOCK_NOSLEEP();
}
+
+int
+ip6_deletefraghdr(struct mbuf *m, int offset, int wait)
+{
+ struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
+ struct mbuf *t;
+
+ /* Delete frag6 header. */
+ if (m->m_len >= offset + sizeof(struct ip6_frag)) {
+ /* This is the only possible case with !PULLDOWN_TEST. */
+ bcopy(ip6, (char *)ip6 + sizeof(struct ip6_frag),
+ offset);
+ m->m_data += sizeof(struct ip6_frag);
+ m->m_len -= sizeof(struct ip6_frag);
+ } else {
+ /* This comes with no copy if the boundary is on cluster. */
+ if ((t = m_split(m, offset, wait)) == NULL)
+ return (ENOMEM);
+ m_adj(t, sizeof(struct ip6_frag));
+ m_cat(m, t);
+ }
+
+ return (0);
+}
Modified: trunk/sys/netinet6/icmp6.c
===================================================================
--- trunk/sys/netinet6/icmp6.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/icmp6.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -62,11 +62,10 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/icmp6.c 244524 2012-12-21 00:41:52Z delphij $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/icmp6.c 303459 2016-07-28 20:11:34Z sbruno $");
#include "opt_inet.h"
#include "opt_inet6.h"
-#include "opt_ipsec.h"
#include <sys/param.h>
#include <sys/domain.h>
@@ -108,15 +107,15 @@
#include <netinet6/nd6.h>
#include <netinet6/send.h>
-#ifdef IPSEC
-#include <netipsec/ipsec.h>
-#include <netipsec/key.h>
-#endif
-
extern struct domain inet6domain;
-VNET_DEFINE(struct icmp6stat, icmp6stat);
+VNET_PCPUSTAT_DEFINE(struct icmp6stat, icmp6stat);
+VNET_PCPUSTAT_SYSINIT(icmp6stat);
+#ifdef VIMAGE
+VNET_PCPUSTAT_SYSUNINIT(icmp6stat);
+#endif /* VIMAGE */
+
VNET_DECLARE(struct inpcbinfo, ripcbinfo);
VNET_DECLARE(struct inpcbhead, ripcb);
VNET_DECLARE(int, icmp6errppslim);
@@ -131,7 +130,7 @@
#define V_icmp6errppslim_last VNET(icmp6errppslim_last)
#define V_icmp6_nodeinfo VNET(icmp6_nodeinfo)
-static void icmp6_errcount(struct icmp6errstat *, int, int);
+static void icmp6_errcount(int, int);
static int icmp6_rip6_input(struct mbuf **, int);
static int icmp6_ratelimit(const struct in6_addr *, const int, const int);
static const char *icmp6_redirect_diag(struct in6_addr *,
@@ -156,42 +155,42 @@
kmod_icmp6stat_inc(int statnum)
{
- (*((u_quad_t *)&V_icmp6stat + statnum))++;
+ counter_u64_add(VNET(icmp6stat)[statnum], 1);
}
static void
-icmp6_errcount(struct icmp6errstat *stat, int type, int code)
+icmp6_errcount(int type, int code)
{
switch (type) {
case ICMP6_DST_UNREACH:
switch (code) {
case ICMP6_DST_UNREACH_NOROUTE:
- stat->icp6errs_dst_unreach_noroute++;
+ ICMP6STAT_INC(icp6s_odst_unreach_noroute);
return;
case ICMP6_DST_UNREACH_ADMIN:
- stat->icp6errs_dst_unreach_admin++;
+ ICMP6STAT_INC(icp6s_odst_unreach_admin);
return;
case ICMP6_DST_UNREACH_BEYONDSCOPE:
- stat->icp6errs_dst_unreach_beyondscope++;
+ ICMP6STAT_INC(icp6s_odst_unreach_beyondscope);
return;
case ICMP6_DST_UNREACH_ADDR:
- stat->icp6errs_dst_unreach_addr++;
+ ICMP6STAT_INC(icp6s_odst_unreach_addr);
return;
case ICMP6_DST_UNREACH_NOPORT:
- stat->icp6errs_dst_unreach_noport++;
+ ICMP6STAT_INC(icp6s_odst_unreach_noport);
return;
}
break;
case ICMP6_PACKET_TOO_BIG:
- stat->icp6errs_packet_too_big++;
+ ICMP6STAT_INC(icp6s_opacket_too_big);
return;
case ICMP6_TIME_EXCEEDED:
switch (code) {
case ICMP6_TIME_EXCEED_TRANSIT:
- stat->icp6errs_time_exceed_transit++;
+ ICMP6STAT_INC(icp6s_otime_exceed_transit);
return;
case ICMP6_TIME_EXCEED_REASSEMBLY:
- stat->icp6errs_time_exceed_reassembly++;
+ ICMP6STAT_INC(icp6s_otime_exceed_reassembly);
return;
}
break;
@@ -198,21 +197,21 @@
case ICMP6_PARAM_PROB:
switch (code) {
case ICMP6_PARAMPROB_HEADER:
- stat->icp6errs_paramprob_header++;
+ ICMP6STAT_INC(icp6s_oparamprob_header);
return;
case ICMP6_PARAMPROB_NEXTHEADER:
- stat->icp6errs_paramprob_nextheader++;
+ ICMP6STAT_INC(icp6s_oparamprob_nextheader);
return;
case ICMP6_PARAMPROB_OPTION:
- stat->icp6errs_paramprob_option++;
+ ICMP6STAT_INC(icp6s_oparamprob_option);
return;
}
break;
case ND_REDIRECT:
- stat->icp6errs_redirect++;
+ ICMP6STAT_INC(icp6s_oredirect);
return;
}
- stat->icp6errs_unknown++;
+ ICMP6STAT_INC(icp6s_ounknown);
}
/*
@@ -263,7 +262,7 @@
ICMP6STAT_INC(icp6s_error);
/* count per-type-code statistics */
- icmp6_errcount(&V_icmp6stat.icp6s_outerrhist, type, code);
+ icmp6_errcount(type, code);
#ifdef M_DECRYPTED /*not openbsd*/
if (m->m_flags & M_DECRYPTED) {
@@ -361,9 +360,7 @@
m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len);
preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
- M_PREPEND(m, preplen, M_DONTWAIT); /* FIB is also copied over. */
- if (m && m->m_len < preplen)
- m = m_pullup(m, preplen);
+ M_PREPEND(m, preplen, M_NOWAIT); /* FIB is also copied over. */
if (m == NULL) {
nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__));
return;
@@ -579,25 +576,18 @@
if ((n->m_flags & M_EXT) != 0
|| n->m_len < off + sizeof(struct icmp6_hdr)) {
struct mbuf *n0 = n;
- const int maxlen = sizeof(*nip6) + sizeof(*nicmp6);
int n0len;
- MGETHDR(n, M_DONTWAIT, n0->m_type);
- n0len = n0->m_pkthdr.len; /* save for use below */
- if (n)
- M_MOVE_PKTHDR(n, n0); /* FIB copied. */
- if (n && maxlen >= MHLEN) {
- MCLGET(n, M_DONTWAIT);
- if ((n->m_flags & M_EXT) == 0) {
- m_free(n);
- n = NULL;
- }
- }
+ CTASSERT(sizeof(*nip6) + sizeof(*nicmp6) <= MHLEN);
+ n = m_gethdr(M_NOWAIT, n0->m_type);
if (n == NULL) {
/* Give up remote */
m_freem(n0);
break;
}
+
+ m_move_pkthdr(n, n0); /* FIB copied. */
+ n0len = n0->m_pkthdr.len; /* save for use below */
/*
* Copy IPv6 and ICMPv6 only.
*/
@@ -684,31 +674,27 @@
} else {
struct prison *pr;
u_char *p;
- int maxlen, maxhlen, hlen;
+ int maxhlen, hlen;
/*
* XXX: this combination of flags is pointless,
* but should we keep this for compatibility?
*/
- if ((V_icmp6_nodeinfo & 5) != 5)
+ if ((V_icmp6_nodeinfo & (ICMP6_NODEINFO_FQDNOK |
+ ICMP6_NODEINFO_TMPADDROK)) !=
+ (ICMP6_NODEINFO_FQDNOK | ICMP6_NODEINFO_TMPADDROK))
break;
if (code != 0)
goto badcode;
- maxlen = sizeof(*nip6) + sizeof(*nicmp6) + 4;
- if (maxlen >= MCLBYTES) {
+
+ CTASSERT(sizeof(*nip6) + sizeof(*nicmp6) + 4 <= MHLEN);
+ n = m_gethdr(M_NOWAIT, m->m_type);
+ if (n == NULL) {
/* Give up remote */
break;
}
- MGETHDR(n, M_DONTWAIT, m->m_type);
- if (n && maxlen > MHLEN) {
- MCLGET(n, M_DONTWAIT);
- if ((n->m_flags & M_EXT) == 0) {
- m_free(n);
- n = NULL;
- }
- }
- if (n && !m_dup_pkthdr(n, m, M_DONTWAIT)) {
+ if (!m_dup_pkthdr(n, m, M_NOWAIT)) {
/*
* Previous code did a blind M_COPY_PKTHDR
* and said "just for rcvif". If true, then
@@ -719,13 +705,8 @@
m_free(n);
n = NULL;
}
- if (n == NULL) {
- /* Give up remote */
- break;
- }
- n->m_pkthdr.rcvif = NULL;
- n->m_len = 0;
- maxhlen = M_TRAILINGSPACE(n) - maxlen;
+ maxhlen = M_TRAILINGSPACE(n) -
+ (sizeof(*nip6) + sizeof(*nicmp6) + 4);
pr = curthread->td_ucred->cr_prison;
mtx_lock(&pr->pr_mtx);
hlen = strlen(pr->pr_hostname);
@@ -768,7 +749,7 @@
goto badcode;
if (icmp6len < sizeof(struct nd_router_solicit))
goto badlen;
- if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
+ if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
/* give up local */
/* Send incoming SeND packet to user space. */
@@ -806,7 +787,7 @@
goto badcode;
if (icmp6len < sizeof(struct nd_router_advert))
goto badlen;
- if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
+ if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
/* Send incoming SeND-protected/ND packet to user space. */
if (send_sendso_input_hook != NULL) {
@@ -837,7 +818,7 @@
goto badcode;
if (icmp6len < sizeof(struct nd_neighbor_solicit))
goto badlen;
- if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
+ if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
if (send_sendso_input_hook != NULL) {
error = send_sendso_input_hook(m, ifp,
SND_IN, ip6len);
@@ -866,7 +847,7 @@
goto badcode;
if (icmp6len < sizeof(struct nd_neighbor_advert))
goto badlen;
- if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
+ if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
/* Send incoming SeND-protected/ND packet to user space. */
if (send_sendso_input_hook != NULL) {
@@ -897,7 +878,7 @@
goto badcode;
if (icmp6len < sizeof(struct nd_redirect))
goto badlen;
- if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
+ if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
if (send_sendso_input_hook != NULL) {
error = send_sendso_input_hook(m, ifp,
SND_IN, ip6len);
@@ -1178,8 +1159,6 @@
ip6cp.ip6c_src = &icmp6src;
ip6cp.ip6c_nxt = nxt;
- m_addr_changed(m);
-
if (icmp6type == ICMP6_PACKET_TOO_BIG) {
notifymtu = ntohl(icmp6->icmp6_mtu);
ip6cp.ip6c_cmdarg = (void *)¬ifymtu;
@@ -1243,6 +1222,7 @@
mtu = IPV6_MMTU - 8;
bzero(&inc, sizeof(inc));
+ inc.inc_fibnum = M_GETFIB(m);
inc.inc_flags |= INC_ISIPV6;
inc.inc6_faddr = *dst;
if (in6_setscope(&inc.inc6_faddr, m->m_pkthdr.rcvif, NULL))
@@ -1497,26 +1477,23 @@
break;
}
- /* allocate an mbuf to reply. */
- MGETHDR(n, M_DONTWAIT, m->m_type);
+ /* Allocate an mbuf to reply. */
+ if (replylen > MCLBYTES) {
+ /*
+ * XXX: should we try to allocate more? But MCLBYTES
+ * is probably much larger than IPV6_MMTU...
+ */
+ goto bad;
+ }
+ if (replylen > MHLEN)
+ n = m_getcl(M_NOWAIT, m->m_type, M_PKTHDR);
+ else
+ n = m_gethdr(M_NOWAIT, m->m_type);
if (n == NULL) {
m_freem(m);
return (NULL);
}
- M_MOVE_PKTHDR(n, m); /* just for recvif and FIB */
- if (replylen > MHLEN) {
- if (replylen > MCLBYTES) {
- /*
- * XXX: should we try to allocate more? But MCLBYTES
- * is probably much larger than IPV6_MMTU...
- */
- goto bad;
- }
- MCLGET(n, M_DONTWAIT);
- if ((n->m_flags & M_EXT) == 0) {
- goto bad;
- }
- }
+ m_move_pkthdr(n, m); /* just for recvif and FIB */
n->m_pkthdr.len = n->m_len = replylen;
/* copy mbuf header and IPv6 + Node Information base headers */
@@ -1611,16 +1588,13 @@
else
len = MCLBYTES;
- /* because MAXHOSTNAMELEN is usually 256, we use cluster mbuf */
- MGET(m, M_DONTWAIT, MT_DATA);
- if (m && len > MLEN) {
- MCLGET(m, M_DONTWAIT);
- if ((m->m_flags & M_EXT) == 0)
- goto fail;
- }
- if (!m)
+ /* Because MAXHOSTNAMELEN is usually 256, we use cluster mbuf. */
+ if (len > MLEN)
+ m = m_getcl(M_NOWAIT, MT_DATA, 0);
+ else
+ m = m_get(M_NOWAIT, MT_DATA);
+ if (m == NULL)
goto fail;
- m->m_next = NULL;
if (old) {
m->m_len = len;
@@ -1953,8 +1927,8 @@
ltime = ND6_INFINITE_LIFETIME;
else {
if (ifa6->ia6_lifetime.ia6t_expire >
- time_second)
- ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_second);
+ time_uptime)
+ ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_uptime);
else
ltime = 0;
}
@@ -2066,7 +2040,7 @@
*/
if ((m->m_flags & M_EXT) && m->m_next == NULL &&
m->m_len <= MHLEN) {
- MGET(n, M_DONTWAIT, m->m_type);
+ n = m_get(M_NOWAIT, m->m_type);
if (n != NULL) {
if (m_dup_pkthdr(n, m, M_NOWAIT)) {
bcopy(m->m_data, n->m_data,
@@ -2116,7 +2090,7 @@
m->m_len <= MHLEN) {
struct mbuf *n;
- MGET(n, M_DONTWAIT, m->m_type);
+ n = m_get(M_NOWAIT, m->m_type);
if (n != NULL) {
if (m_dup_pkthdr(n, m, M_NOWAIT)) {
bcopy(m->m_data, n->m_data, m->m_len);
@@ -2299,8 +2273,6 @@
m->m_flags &= ~(M_BCAST|M_MCAST);
- m_addr_changed(m);
-
ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL);
if (outif)
icmp6_ifoutstat_inc(outif, type, code);
@@ -2528,9 +2500,6 @@
sdst.sin6_len = sizeof(struct sockaddr_in6);
bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst);
-#ifdef IPSEC
- key_sa_routechange((struct sockaddr *)&sdst);
-#endif /* IPSEC */
}
freeit:
@@ -2559,7 +2528,7 @@
struct ifnet *outif = NULL;
struct sockaddr_in6 src_sa;
- icmp6_errcount(&V_icmp6stat.icp6s_outerrhist, ND_REDIRECT, 0);
+ icmp6_errcount(ND_REDIRECT, 0);
/* if we are not router, we don't send icmp6 redirect */
if (!V_ip6_forwarding)
@@ -2597,14 +2566,10 @@
#if IPV6_MMTU >= MCLBYTES
# error assumption failed about IPV6_MMTU and MCLBYTES
#endif
- MGETHDR(m, M_DONTWAIT, MT_HEADER);
- if (m && IPV6_MMTU >= MHLEN)
- MCLGET(m, M_DONTWAIT);
- if (!m)
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+ if (m == NULL)
goto fail;
M_SETFIB(m, rt->rt_fibnum);
- m->m_pkthdr.rcvif = NULL;
- m->m_len = 0;
maxlen = M_TRAILINGSPACE(m);
maxlen = min(IPV6_MMTU, maxlen);
/* just for safety */
Modified: trunk/sys/netinet6/icmp6.h
===================================================================
--- trunk/sys/netinet6/icmp6.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/icmp6.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,5 +1,5 @@
/* $MidnightBSD$ */
-/* $FreeBSD: stable/9/sys/netinet6/icmp6.h 62587 2000-07-04 16:35:15Z itojun $ */
+/* $FreeBSD: stable/10/sys/netinet6/icmp6.h 62587 2000-07-04 16:35:15Z itojun $ */
/* $KAME: icmp6.h,v 1.17 2000/06/11 17:23:40 jinmei Exp $ */
#error "netinet6/icmp6.h is obsolete. use netinet/icmp6.h"
Modified: trunk/sys/netinet6/in6.c
===================================================================
--- trunk/sys/netinet6/in6.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/in6.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -62,7 +62,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/in6.c 244524 2012-12-21 00:41:52Z delphij $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/in6.c 303458 2016-07-28 20:08:01Z sbruno $");
#include "opt_compat.h"
#include "opt_inet.h"
@@ -96,6 +96,7 @@
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
+#include <netinet/ip_carp.h>
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
@@ -106,6 +107,9 @@
#include <netinet6/scope6_var.h>
#include <netinet6/in6_pcb.h>
+VNET_DECLARE(int, icmp6_nodeinfo_oldmcprefix);
+#define V_icmp6_nodeinfo_oldmcprefix VNET(icmp6_nodeinfo_oldmcprefix)
+
/*
* Definitions of some costant IP6 addresses.
*/
@@ -152,27 +156,28 @@
ia = ifa2ia6(ifa);
ifp = ifa->ifa_ifp;
- IF_AFDATA_LOCK(ifp);
- ifa->ifa_rtrequest = nd6_rtrequest;
- ln = lla_lookup(LLTABLE6(ifp), (LLE_CREATE | LLE_IFADDR |
- LLE_EXCLUSIVE), (struct sockaddr *)&ia->ia_addr);
- IF_AFDATA_UNLOCK(ifp);
- if (ln != NULL) {
- ln->la_expire = 0; /* for IPv6 this means permanent */
- ln->ln_state = ND6_LLINFO_REACHABLE;
- /*
- * initialize for rtmsg generation
- */
- bzero(&gateway, sizeof(gateway));
- gateway.sdl_len = sizeof(gateway);
- gateway.sdl_family = AF_LINK;
- gateway.sdl_nlen = 0;
- gateway.sdl_alen = 6;
- memcpy(gateway.sdl_data, &ln->ll_addr.mac_aligned,
- sizeof(ln->ll_addr));
- LLE_WUNLOCK(ln);
+ /*
+ * initialize for rtmsg generation
+ */
+ bzero(&gateway, sizeof(gateway));
+ gateway.sdl_len = sizeof(gateway);
+ gateway.sdl_family = AF_LINK;
+ if (nd6_need_cache(ifp) != 0) {
+ IF_AFDATA_LOCK(ifp);
+ ifa->ifa_rtrequest = nd6_rtrequest;
+ ln = lla_lookup(LLTABLE6(ifp), (LLE_CREATE | LLE_IFADDR |
+ LLE_EXCLUSIVE), (struct sockaddr *)&ia->ia_addr);
+ IF_AFDATA_UNLOCK(ifp);
+ if (ln != NULL) {
+ ln->la_expire = 0; /* for IPv6 this means permanent */
+ ln->ln_state = ND6_LLINFO_REACHABLE;
+
+ gateway.sdl_alen = 6;
+ memcpy(gateway.sdl_data, &ln->ll_addr.mac_aligned,
+ sizeof(ln->ll_addr));
+ LLE_WUNLOCK(ln);
+ }
}
-
bzero(&rt, sizeof(rt));
rt.rt_gateway = (struct sockaddr *)&gateway;
memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask));
@@ -269,8 +274,16 @@
struct in6_ifaddr *ia = NULL;
struct in6_aliasreq *ifra = (struct in6_aliasreq *)data;
struct sockaddr_in6 *sa6;
+ int carp_attached = 0;
int error;
+ u_long ocmd = cmd;
+ /*
+ * Compat to make pre-10.x ifconfig(8) operable.
+ */
+ if (cmd == OSIOCAIFADDR_IN6)
+ cmd = SIOCAIFADDR_IN6;
+
switch (cmd) {
case SIOCGETSGCNT_IN6:
case SIOCGETMIFCNT_IN6:
@@ -282,7 +295,7 @@
return (mrt6_ioctl ? mrt6_ioctl(cmd, data) : EOPNOTSUPP);
}
- switch(cmd) {
+ switch (cmd) {
case SIOCAADDRCTL_POLICY:
case SIOCDADDRCTL_POLICY:
if (td != NULL) {
@@ -311,8 +324,6 @@
/* FALLTHROUGH */
case OSIOCGIFINFO_IN6:
case SIOCGIFINFO_IN6:
- case SIOCGDRLST_IN6:
- case SIOCGPRLST_IN6:
case SIOCGNBRINFO_IN6:
case SIOCGDEFIFACE_IN6:
return (nd6_ioctl(cmd, data, ifp));
@@ -354,14 +365,10 @@
if (error)
return (error);
}
- return (scope6_set(ifp,
- (struct scope6_id *)ifr->ifr_ifru.ifru_scope_id));
+ /* FALLTHROUGH */
case SIOCGSCOPE6:
- return (scope6_get(ifp,
- (struct scope6_id *)ifr->ifr_ifru.ifru_scope_id));
case SIOCGSCOPE6DEF:
- return (scope6_get_default((struct scope6_id *)
- ifr->ifr_ifru.ifru_scope_id));
+ return (scope6_ioctl(cmd, data, ifp));
}
switch (cmd) {
@@ -420,6 +427,18 @@
case SIOCGIFSTAT_ICMP6:
sa6 = &ifr->ifr_addr;
break;
+ case SIOCSIFADDR:
+ case SIOCSIFBRDADDR:
+ case SIOCSIFDSTADDR:
+ case SIOCSIFNETMASK:
+ /*
+ * Although we should pass any non-INET6 ioctl requests
+ * down to driver, we filter some legacy INET requests.
+ * Drivers trust SIOCSIFADDR et al to come from an already
+ * privileged layer, and do not perform any credentials
+ * checks or input validation.
+ */
+ return (EINVAL);
default:
sa6 = NULL;
break;
@@ -480,6 +499,13 @@
if (error)
goto out;
}
+ /* FALLTHROUGH */
+ case SIOCGIFSTAT_IN6:
+ case SIOCGIFSTAT_ICMP6:
+ if (ifp->if_afdata[AF_INET6] == NULL) {
+ error = EPFNOSUPPORT;
+ goto out;
+ }
break;
case SIOCGIFADDR_IN6:
@@ -512,12 +538,12 @@
/* sanity for overflow - beware unsigned */
lt = &ifr->ifr_ifru.ifru_lifetime;
if (lt->ia6t_vltime != ND6_INFINITE_LIFETIME &&
- lt->ia6t_vltime + time_second < time_second) {
+ lt->ia6t_vltime + time_uptime < time_uptime) {
error = EINVAL;
goto out;
}
if (lt->ia6t_pltime != ND6_INFINITE_LIFETIME &&
- lt->ia6t_pltime + time_second < time_second) {
+ lt->ia6t_pltime + time_uptime < time_uptime) {
error = EINVAL;
goto out;
}
@@ -555,25 +581,17 @@
break;
case SIOCGIFSTAT_IN6:
- if (ifp == NULL) {
- error = EINVAL;
- goto out;
- }
- bzero(&ifr->ifr_ifru.ifru_stat,
- sizeof(ifr->ifr_ifru.ifru_stat));
- ifr->ifr_ifru.ifru_stat =
- *((struct in6_ifextra *)ifp->if_afdata[AF_INET6])->in6_ifstat;
+ COUNTER_ARRAY_COPY(((struct in6_ifextra *)
+ ifp->if_afdata[AF_INET6])->in6_ifstat,
+ &ifr->ifr_ifru.ifru_stat,
+ sizeof(struct in6_ifstat) / sizeof(uint64_t));
break;
case SIOCGIFSTAT_ICMP6:
- if (ifp == NULL) {
- error = EINVAL;
- goto out;
- }
- bzero(&ifr->ifr_ifru.ifru_icmp6stat,
- sizeof(ifr->ifr_ifru.ifru_icmp6stat));
- ifr->ifr_ifru.ifru_icmp6stat =
- *((struct in6_ifextra *)ifp->if_afdata[AF_INET6])->icmp6_ifstat;
+ COUNTER_ARRAY_COPY(((struct in6_ifextra *)
+ ifp->if_afdata[AF_INET6])->icmp6_ifstat,
+ &ifr->ifr_ifru.ifru_icmp6stat,
+ sizeof(struct icmp6_ifstat) / sizeof(uint64_t));
break;
case SIOCGIFALIFETIME_IN6:
@@ -621,12 +639,12 @@
/* for sanity */
if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
ia->ia6_lifetime.ia6t_expire =
- time_second + ia->ia6_lifetime.ia6t_vltime;
+ time_uptime + ia->ia6_lifetime.ia6t_vltime;
} else
ia->ia6_lifetime.ia6t_expire = 0;
if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
ia->ia6_lifetime.ia6t_preferred =
- time_second + ia->ia6_lifetime.ia6t_pltime;
+ time_uptime + ia->ia6_lifetime.ia6t_pltime;
} else
ia->ia6_lifetime.ia6t_preferred = 0;
break;
@@ -654,6 +672,18 @@
break;
}
+ if (cmd == ocmd && ifra->ifra_vhid > 0) {
+ if (carp_attach_p != NULL)
+ error = (*carp_attach_p)(&ia->ia_ifa,
+ ifra->ifra_vhid);
+ else
+ error = EPROTONOSUPPORT;
+ if (error)
+ goto out;
+ else
+ carp_attached = 1;
+ }
+
/*
* then, make the prefix on-link on the interface.
* XXX: we'd rather create the prefix before the address, but
@@ -670,7 +700,8 @@
pr0.ndpr_plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr,
NULL);
if (pr0.ndpr_plen == 128) {
- break; /* we don't need to install a host route. */
+ /* we don't need to install a host route. */
+ goto aifaddr_out;
}
pr0.ndpr_prefix = ifra->ifra_addr;
/* apply the mask for safety. */
@@ -697,13 +728,10 @@
* nd6_prelist_add will install the corresponding
* interface route.
*/
- if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0)
+ if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0) {
+ if (carp_attached)
+ (*carp_detach_p)(&ia->ia_ifa);
goto out;
- if (pr == NULL) {
- log(LOG_ERR, "nd6_prelist_add succeeded but "
- "no prefix\n");
- error = EINVAL;
- goto out;
}
}
@@ -733,32 +761,29 @@
* that is, this address might make other addresses detached.
*/
pfxlist_onlink_check();
- if (error == 0 && ia) {
- if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) {
- /*
- * Try to clear the flag when a new
- * IPv6 address is added onto an
- * IFDISABLED interface and it
- * succeeds.
- */
- struct in6_ndireq nd;
+aifaddr_out:
+ if (error != 0 || ia == NULL)
+ break;
+ /*
+ * Try to clear the flag when a new IPv6 address is added
+ * onto an IFDISABLED interface and it succeeds.
+ */
+ if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) {
+ struct in6_ndireq nd;
- memset(&nd, 0, sizeof(nd));
- nd.ndi.flags = ND_IFINFO(ifp)->flags;
- nd.ndi.flags &= ~ND6_IFF_IFDISABLED;
- if (nd6_ioctl(SIOCSIFINFO_FLAGS,
- (caddr_t)&nd, ifp) < 0)
- log(LOG_NOTICE, "SIOCAIFADDR_IN6: "
- "SIOCSIFINFO_FLAGS for -ifdisabled "
- "failed.");
- /*
- * Ignore failure of clearing the flag
- * intentionally. The failure means
- * address duplication was detected.
- */
- }
- EVENTHANDLER_INVOKE(ifaddr_event, ifp);
+ memset(&nd, 0, sizeof(nd));
+ nd.ndi.flags = ND_IFINFO(ifp)->flags;
+ nd.ndi.flags &= ~ND6_IFF_IFDISABLED;
+ if (nd6_ioctl(SIOCSIFINFO_FLAGS, (caddr_t)&nd, ifp) < 0)
+ log(LOG_NOTICE, "SIOCAIFADDR_IN6: "
+ "SIOCSIFINFO_FLAGS for -ifdisabled "
+ "failed.");
+ /*
+ * Ignore failure of clearing the flag intentionally.
+ * The failure means address duplication was detected.
+ */
}
+ EVENTHANDLER_INVOKE(ifaddr_event, ifp);
break;
}
@@ -785,7 +810,7 @@
}
default:
- if (ifp == NULL || ifp->if_ioctl == 0) {
+ if (ifp->if_ioctl == NULL) {
error = EOPNOTSUPP;
goto out;
}
@@ -922,6 +947,17 @@
else
LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
}
+ if (V_icmp6_nodeinfo_oldmcprefix &&
+ in6_nigroup_oldmcprefix(ifp, NULL, -1, &mltaddr.sin6_addr) == 0) {
+ imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, delay);
+ if (imm == NULL)
+ nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s "
+ "(errno=%d)\n", __func__, ip6_sprintf(ip6buf,
+ &mltaddr.sin6_addr), if_name(ifp), error));
+ /* XXX not very fatal, go on... */
+ else
+ LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
+ }
/*
* Join interface-local all-nodes address.
@@ -968,7 +1004,6 @@
* Update parameters of an IPv6 interface address.
* If necessary, a new entry is created and linked into address chains.
* This function is separated from in6_control().
- * XXX: should this be performed under splnet()?
*/
int
in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
@@ -1102,7 +1137,7 @@
ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
ia->ia_addr.sin6_family = AF_INET6;
ia->ia_addr.sin6_len = sizeof(ia->ia_addr);
- ia->ia6_createtime = time_second;
+ ia->ia6_createtime = time_uptime;
if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) != 0) {
/*
* XXX: some functions expect that ifa_dstaddr is not
@@ -1123,11 +1158,13 @@
ifa_ref(&ia->ia_ifa); /* in6_ifaddrhead */
IN6_IFADDR_WLOCK();
TAILQ_INSERT_TAIL(&V_in6_ifaddrhead, ia, ia_link);
+ LIST_INSERT_HEAD(IN6ADDR_HASH(&ifra->ifra_addr.sin6_addr),
+ ia, ia6_hash);
IN6_IFADDR_WUNLOCK();
}
/* update timestamp */
- ia->ia6_updatetime = time_second;
+ ia->ia6_updatetime = time_uptime;
/* set prefix mask */
if (ifra->ifra_prefixmask.sin6_len) {
@@ -1146,6 +1183,7 @@
goto unlink;
}
ia->ia_prefixmask = ifra->ifra_prefixmask;
+ ia->ia_prefixmask.sin6_family = AF_INET6;
}
/*
@@ -1176,12 +1214,12 @@
ia->ia6_lifetime = ifra->ifra_lifetime;
if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
ia->ia6_lifetime.ia6t_expire =
- time_second + ia->ia6_lifetime.ia6t_vltime;
+ time_uptime + ia->ia6_lifetime.ia6t_vltime;
} else
ia->ia6_lifetime.ia6t_expire = 0;
if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
ia->ia6_lifetime.ia6t_preferred =
- time_second + ia->ia6_lifetime.ia6t_pltime;
+ time_uptime + ia->ia6_lifetime.ia6t_pltime;
} else
ia->ia6_lifetime.ia6t_preferred = 0;
@@ -1199,7 +1237,7 @@
*/
if ((ifra->ifra_flags & IN6_IFF_DEPRECATED) != 0) {
ia->ia6_lifetime.ia6t_pltime = 0;
- ia->ia6_lifetime.ia6t_preferred = time_second;
+ ia->ia6_lifetime.ia6t_preferred = time_uptime;
}
/*
* Make the address tentative before joining multicast addresses,
@@ -1207,11 +1245,13 @@
* source address.
*/
ia->ia6_flags &= ~IN6_IFF_DUPLICATED; /* safety */
- if (hostIsNew && in6if_do_dad(ifp))
- ia->ia6_flags |= IN6_IFF_TENTATIVE;
- /* DAD should be performed after ND6_IFF_IFDISABLED is cleared. */
- if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)
+ /*
+ * DAD should be performed for an new address or addresses on
+ * an interface with ND6_IFF_IFDISABLED.
+ */
+ if (in6if_do_dad(ifp) &&
+ (hostIsNew || (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)))
ia->ia6_flags |= IN6_IFF_TENTATIVE;
/*
@@ -1233,13 +1273,8 @@
goto cleanup;
}
- /*
- * Perform DAD, if needed.
- * XXX It may be of use, if we can administratively disable DAD.
- */
- if (in6if_do_dad(ifp) && ((ifra->ifra_flags & IN6_IFF_NODAD) == 0) &&
- (ia->ia6_flags & IN6_IFF_TENTATIVE))
- {
+ /* Perform DAD, if the address is TENTATIVE. */
+ if ((ia->ia6_flags & IN6_IFF_TENTATIVE)) {
int mindelay, maxdelay;
delay = 0;
@@ -1434,6 +1469,9 @@
int plen, error;
struct ifaddr *ifa0;
+ if (ifa->ifa_carp)
+ (*carp_detach_p)(ifa);
+
/*
* find another IPv6 address as the gateway for the
* link-local and node-local all-nodes multicast
@@ -1492,7 +1530,7 @@
static void
in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp)
{
- int s = splnet();
+ char ip6buf[INET6_ADDRSTRLEN];
IF_ADDR_WLOCK(ifp);
TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
@@ -1506,6 +1544,7 @@
*/
IN6_IFADDR_WLOCK();
TAILQ_REMOVE(&V_in6_ifaddrhead, ia, ia_link);
+ LIST_REMOVE(ia, ia6_hash);
IN6_IFADDR_WUNLOCK();
/*
@@ -1515,7 +1554,7 @@
if (ia->ia6_ndpr == NULL) {
nd6log((LOG_NOTICE,
"in6_unlink_ifa: autoconf'ed address "
- "%p has no prefix\n", ia));
+ "%s has no prefix\n", ip6_sprintf(ip6buf, IA6_IN6(ia))));
} else {
ia->ia6_ndpr->ndpr_refcnt--;
ia->ia6_ndpr = NULL;
@@ -1530,7 +1569,6 @@
pfxlist_onlink_check();
}
ifa_free(&ia->ia_ifa); /* in6_ifaddrhead */
- splx(s);
}
void
@@ -1569,6 +1607,10 @@
* in the future.
* RFC2373 defines interface id to be 64bit, but it allows non-RFC2374
* address encoding scheme. (see figure on page 8)
+ * Notifies other subsystems about address change/arrival:
+ * 1) Notifies device handler on the first IPv6 address assignment
+ * 2) Handle routing table changes for P2P links and route
+ * 3) Handle routing table changes for address host route
*/
static int
in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
@@ -1826,7 +1868,6 @@
struct sockaddr_in6 *sin6, int newhost)
{
int error = 0, plen, ifacount = 0;
- int s = splimp();
struct ifaddr *ifa;
/*
@@ -1846,12 +1887,9 @@
if (ifacount <= 1 && ifp->if_ioctl) {
error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia);
- if (error) {
- splx(s);
+ if (error)
return (error);
- }
}
- splx(s);
ia->ia_ifa.ifa_metric = ifp->if_metric;
@@ -1948,6 +1986,32 @@
}
/*
+ * Find a link-local scoped address on ifp and return it if any.
+ */
+struct in6_ifaddr *
+in6ifa_llaonifp(struct ifnet *ifp)
+{
+ struct sockaddr_in6 *sin6;
+ struct ifaddr *ifa;
+
+ if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)
+ return (NULL);
+ IF_ADDR_RLOCK(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr->sa_family != AF_INET6)
+ continue;
+ sin6 = (struct sockaddr_in6 *)ifa->ifa_addr;
+ if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) ||
+ IN6_IS_ADDR_MC_INTFACELOCAL(&sin6->sin6_addr) ||
+ IN6_IS_ADDR_MC_NODELOCAL(&sin6->sin6_addr))
+ break;
+ }
+ IF_ADDR_RUNLOCK(ifp);
+
+ return ((struct in6_ifaddr *)ifa);
+}
+
+/*
* Convert IP6 address to printable (loggable) representation. Caller
* has to make sure that ip6buf is at least INET6_ADDRSTRLEN long.
*/
@@ -2060,7 +2124,7 @@
struct in6_ifaddr *ia;
IN6_IFADDR_RLOCK();
- TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
+ LIST_FOREACH(ia, IN6ADDR_HASH(in6), ia6_hash) {
if (IN6_ARE_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr)) {
IN6_IFADDR_RUNLOCK();
return (1);
@@ -2070,7 +2134,6 @@
return (0);
}
-
int
in6_is_addr_deprecated(struct sockaddr_in6 *sa6)
{
@@ -2077,15 +2140,14 @@
struct in6_ifaddr *ia;
IN6_IFADDR_RLOCK();
- TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
- if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
- &sa6->sin6_addr) &&
- (ia->ia6_flags & IN6_IFF_DEPRECATED) != 0) {
- IN6_IFADDR_RUNLOCK();
- return (1); /* true */
+ LIST_FOREACH(ia, IN6ADDR_HASH(&sa6->sin6_addr), ia6_hash) {
+ if (IN6_ARE_ADDR_EQUAL(IA6_IN6(ia), &sa6->sin6_addr)) {
+ if (ia->ia6_flags & IN6_IFF_DEPRECATED) {
+ IN6_IFADDR_RUNLOCK();
+ return (1); /* true */
+ }
+ break;
}
-
- /* XXX: do we still have to go thru the rest of the list? */
}
IN6_IFADDR_RUNLOCK();
@@ -2297,7 +2359,8 @@
if ((ifp->if_flags & IFF_LOOPBACK) != 0)
return (0);
- if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)
+ if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) ||
+ (ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD))
return (0);
switch (ifp->if_type) {
@@ -2318,13 +2381,13 @@
* However, some interfaces can be up before the RUNNING
* status. Additionaly, users may try to assign addresses
* before the interface becomes up (or running).
- * We simply skip DAD in such a case as a work around.
- * XXX: we should rather mark "tentative" on such addresses,
- * and do DAD after the interface becomes ready.
+ * This function returns EAGAIN in that case.
+ * The caller should mark "tentative" on the address instead of
+ * performing DAD immediately.
*/
if (!((ifp->if_flags & IFF_UP) &&
(ifp->if_drv_flags & IFF_DRV_RUNNING)))
- return (0);
+ return (EAGAIN);
return (1);
}
@@ -2379,6 +2442,7 @@
#ifdef IFT_MIP
case IFT_MIP: /* ditto */
#endif
+ case IFT_BRIDGE: /* bridge(4) only does Ethernet-like links */
case IFT_INFINIBAND:
return (64);
case IFT_FDDI: /* RFC2467 */
@@ -2448,8 +2512,7 @@
lle->base.lle_refcnt = 1;
lle->base.lle_free = in6_lltable_free;
LLE_LOCK_INIT(&lle->base);
- callout_init_rw(&lle->base.ln_timer_ch, &lle->base.lle_lock,
- CALLOUT_RETURNUNLOCKED);
+ callout_init(&lle->base.ln_timer_ch, 1);
return (&lle->base);
}
@@ -2552,6 +2615,7 @@
if (lle == NULL) {
if (!(flags & LLE_CREATE))
return (NULL);
+ IF_AFDATA_WLOCK_ASSERT(ifp);
/*
* A route that covers the given address must have
* been installed 1st because we are doing a resolution,
@@ -2580,6 +2644,7 @@
if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) {
LLE_WLOCK(lle);
lle->la_flags |= LLE_DELETED;
+ EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED);
#ifdef DIAGNOSTIC
log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
#endif
@@ -2650,6 +2715,8 @@
ndpc.sin6.sin6_family = AF_INET6;
ndpc.sin6.sin6_len = sizeof(ndpc.sin6);
bcopy(L3_ADDR(lle), &ndpc.sin6, L3_ADDR_LEN(lle));
+ if (V_deembed_scopeid)
+ sa6_recoverscope(&ndpc.sin6);
/* publish */
if (lle->la_flags & LLE_PUB)
@@ -2681,17 +2748,26 @@
{
struct in6_ifextra *ext;
+ /* There are not IPv6-capable interfaces. */
+ switch (ifp->if_type) {
+ case IFT_PFLOG:
+ case IFT_PFSYNC:
+ case IFT_USB:
+ return (NULL);
+ }
ext = (struct in6_ifextra *)malloc(sizeof(*ext), M_IFADDR, M_WAITOK);
bzero(ext, sizeof(*ext));
- ext->in6_ifstat = (struct in6_ifstat *)malloc(sizeof(struct in6_ifstat),
- M_IFADDR, M_WAITOK);
- bzero(ext->in6_ifstat, sizeof(*ext->in6_ifstat));
+ ext->in6_ifstat = malloc(sizeof(counter_u64_t) *
+ sizeof(struct in6_ifstat) / sizeof(uint64_t), M_IFADDR, M_WAITOK);
+ COUNTER_ARRAY_ALLOC(ext->in6_ifstat,
+ sizeof(struct in6_ifstat) / sizeof(uint64_t), M_WAITOK);
- ext->icmp6_ifstat =
- (struct icmp6_ifstat *)malloc(sizeof(struct icmp6_ifstat),
- M_IFADDR, M_WAITOK);
- bzero(ext->icmp6_ifstat, sizeof(*ext->icmp6_ifstat));
+ ext->icmp6_ifstat = malloc(sizeof(counter_u64_t) *
+ sizeof(struct icmp6_ifstat) / sizeof(uint64_t), M_IFADDR,
+ M_WAITOK);
+ COUNTER_ARRAY_ALLOC(ext->icmp6_ifstat,
+ sizeof(struct icmp6_ifstat) / sizeof(uint64_t), M_WAITOK);
ext->nd_ifinfo = nd6_ifattach(ifp);
ext->scope6_id = scope6_ifattach(ifp);
@@ -2716,7 +2792,11 @@
scope6_ifdetach(ext->scope6_id);
nd6_ifdetach(ext->nd_ifinfo);
lltable_free(ext->lltable);
+ COUNTER_ARRAY_FREE(ext->in6_ifstat,
+ sizeof(struct in6_ifstat) / sizeof(uint64_t));
free(ext->in6_ifstat, M_IFADDR);
+ COUNTER_ARRAY_FREE(ext->icmp6_ifstat,
+ sizeof(struct icmp6_ifstat) / sizeof(uint64_t));
free(ext->icmp6_ifstat, M_IFADDR);
free(ext, M_IFADDR);
}
Modified: trunk/sys/netinet6/in6.h
===================================================================
--- trunk/sys/netinet6/in6.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/in6.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -59,7 +59,7 @@
* SUCH DAMAGE.
*
* @(#)in.h 8.3 (Berkeley) 1/3/94
- * $FreeBSD: stable/9/sys/netinet6/in6.h 244524 2012-12-21 00:41:52Z delphij $
+ * $FreeBSD: stable/10/sys/netinet6/in6.h 272662 2014-10-06 17:08:19Z tuexen $
*/
#ifndef __KAME_NETINET_IN_H_INCLUDED_
@@ -362,11 +362,11 @@
#define IFA6_IS_DEPRECATED(a) \
((a)->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME && \
- (u_int32_t)((time_second - (a)->ia6_updatetime)) > \
+ (u_int32_t)((time_uptime - (a)->ia6_updatetime)) > \
(a)->ia6_lifetime.ia6t_pltime)
#define IFA6_IS_INVALID(a) \
((a)->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME && \
- (u_int32_t)((time_second - (a)->ia6_updatetime)) > \
+ (u_int32_t)((time_uptime - (a)->ia6_updatetime)) > \
(a)->ia6_lifetime.ia6t_vltime)
#endif /* _KERNEL */
@@ -623,13 +623,18 @@
#endif /* __BSD_VISIBLE */
/*
- * Redefinition of mbuf flags
+ * Since both netinet/ and netinet6/ call into netipsec/ and netpfil/,
+ * the protocol specific mbuf flags are shared between them.
*/
-#define M_AUTHIPHDR M_PROTO2
-#define M_DECRYPTED M_PROTO3
-#define M_LOOP M_PROTO4
-#define M_AUTHIPDGM M_PROTO5
-#define M_RTALERT_MLD M_PROTO6
+#define M_FASTFWD_OURS M_PROTO1 /* changed dst to local */
+#define M_IP6_NEXTHOP M_PROTO2 /* explicit ip nexthop */
+#define M_IP_NEXTHOP M_PROTO2 /* explicit ip nexthop */
+#define M_SKIP_FIREWALL M_PROTO3 /* skip firewall processing */
+#define M_AUTHIPHDR M_PROTO4
+#define M_DECRYPTED M_PROTO5
+#define M_LOOP M_PROTO6
+#define M_AUTHIPDGM M_PROTO7
+#define M_RTALERT_MLD M_PROTO8
#ifdef _KERNEL
struct cmsghdr;
@@ -637,6 +642,8 @@
int in6_cksum_pseudo(struct ip6_hdr *, uint32_t, uint8_t, uint16_t);
int in6_cksum(struct mbuf *, u_int8_t, u_int32_t, u_int32_t);
+int in6_cksum_partial(struct mbuf *, u_int8_t, u_int32_t, u_int32_t,
+ u_int32_t);
int in6_localaddr(struct in6_addr *);
int in6_localip(struct in6_addr *);
int in6_addrscope(struct in6_addr *);
Modified: trunk/sys/netinet6/in6_cksum.c
===================================================================
--- trunk/sys/netinet6/in6_cksum.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/in6_cksum.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -62,7 +62,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/in6_cksum.c 238227 2012-07-08 10:29:01Z bz $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/in6_cksum.c 272662 2014-10-06 17:08:19Z tuexen $");
#include <sys/param.h>
#include <sys/mbuf.h>
@@ -146,9 +146,11 @@
* off is an offset where TCP/UDP/ICMP6 header starts.
* len is a total length of a transport segment.
* (e.g. TCP header + TCP payload)
+ * cov is the number of bytes to be taken into account for the checksum
*/
int
-in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len)
+in6_cksum_partial(struct mbuf *m, u_int8_t nxt, u_int32_t off,
+ u_int32_t len, u_int32_t cov)
{
struct ip6_hdr *ip6;
u_int16_t *w, scope;
@@ -216,9 +218,9 @@
}
w = (u_int16_t *)(mtod(m, u_char *) + off);
mlen = m->m_len - off;
- if (len < mlen)
- mlen = len;
- len -= mlen;
+ if (cov < mlen)
+ mlen = cov;
+ cov -= mlen;
/*
* Force to even boundary.
*/
@@ -274,7 +276,7 @@
* Lastly calculate a summary of the rest of mbufs.
*/
- for (;m && len; m = m->m_next) {
+ for (;m && cov; m = m->m_next) {
if (m->m_len == 0)
continue;
w = mtod(m, u_int16_t *);
@@ -291,12 +293,12 @@
sum += s_util.s;
w = (u_int16_t *)((char *)w + 1);
mlen = m->m_len - 1;
- len--;
+ cov--;
} else
mlen = m->m_len;
- if (len < mlen)
- mlen = len;
- len -= mlen;
+ if (cov < mlen)
+ mlen = cov;
+ cov -= mlen;
/*
* Force to even boundary.
*/
@@ -344,7 +346,7 @@
} else if (mlen == -1)
s_util.c[0] = *(char *)w;
}
- if (len)
+ if (cov)
panic("in6_cksum: out of data");
if (mlen == -1) {
/* The last mbuf has odd # of bytes. Follow the
@@ -356,3 +358,9 @@
REDUCE;
return (~sum & 0xffff);
}
+
+int
+in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len)
+{
+ return (in6_cksum_partial(m, nxt, off, len, len));
+}
Modified: trunk/sys/netinet6/in6_gif.c
===================================================================
--- trunk/sys/netinet6/in6_gif.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/in6_gif.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,12 +31,14 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/in6_gif.c 232292 2012-02-29 09:47:26Z bz $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/in6_gif.c 284072 2015-06-06 13:26:13Z ae $");
#include "opt_inet.h"
#include "opt_inet6.h"
#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
#include <sys/systm.h>
#include <sys/socket.h>
#include <sys/sockio.h>
@@ -50,7 +52,9 @@
#include <sys/malloc.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/route.h>
+#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
@@ -61,7 +65,6 @@
#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
-#include <netinet6/in6_gif.h>
#include <netinet6/in6_var.h>
#endif
#include <netinet6/ip6protosw.h>
@@ -72,7 +75,8 @@
#include <net/if_gif.h>
-VNET_DEFINE(int, ip6_gif_hlim) = GIF_HLIM;
+#define GIF_HLIM 30
+static VNET_DEFINE(int, ip6_gif_hlim) = GIF_HLIM;
#define V_ip6_gif_hlim VNET(ip6_gif_hlim)
SYSCTL_DECL(_net_inet6_ip6);
@@ -81,9 +85,10 @@
static int gif_validate6(const struct ip6_hdr *, struct gif_softc *,
struct ifnet *);
+static int in6_gif_input(struct mbuf **, int *, int);
extern struct domain inet6domain;
-struct ip6protosw in6_gif_protosw = {
+static struct ip6protosw in6_gif_protosw = {
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
.pr_protocol = 0, /* IPPROTO_IPV[46] */
@@ -95,112 +100,24 @@
};
int
-in6_gif_output(struct ifnet *ifp,
- int family, /* family of the packet to be encapsulate */
- struct mbuf *m)
+in6_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn)
{
+ GIF_RLOCK_TRACKER;
struct gif_softc *sc = ifp->if_softc;
- struct sockaddr_in6 *dst = (struct sockaddr_in6 *)&sc->gif_ro6.ro_dst;
- struct sockaddr_in6 *sin6_src = (struct sockaddr_in6 *)sc->gif_psrc;
- struct sockaddr_in6 *sin6_dst = (struct sockaddr_in6 *)sc->gif_pdst;
struct ip6_hdr *ip6;
- struct etherip_header eiphdr;
- int error, len, proto;
- u_int8_t itos, otos;
+ int len;
- GIF_LOCK_ASSERT(sc);
-
- if (sin6_src == NULL || sin6_dst == NULL ||
- sin6_src->sin6_family != AF_INET6 ||
- sin6_dst->sin6_family != AF_INET6) {
- m_freem(m);
- return EAFNOSUPPORT;
- }
-
- switch (family) {
-#ifdef INET
- case AF_INET:
- {
- struct ip *ip;
-
- proto = IPPROTO_IPV4;
- if (m->m_len < sizeof(*ip)) {
- m = m_pullup(m, sizeof(*ip));
- if (!m)
- return ENOBUFS;
- }
- ip = mtod(m, struct ip *);
- itos = ip->ip_tos;
- break;
- }
-#endif
-#ifdef INET6
- case AF_INET6:
- {
- struct ip6_hdr *ip6;
- proto = IPPROTO_IPV6;
- if (m->m_len < sizeof(*ip6)) {
- m = m_pullup(m, sizeof(*ip6));
- if (!m)
- return ENOBUFS;
- }
- ip6 = mtod(m, struct ip6_hdr *);
- itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
- break;
- }
-#endif
- case AF_LINK:
- proto = IPPROTO_ETHERIP;
-
- /*
- * GIF_SEND_REVETHIP (disabled by default) intentionally
- * sends an EtherIP packet with revered version field in
- * the header. This is a knob for backward compatibility
- * with FreeBSD 7.2R or prior.
- */
- if ((sc->gif_options & GIF_SEND_REVETHIP)) {
- eiphdr.eip_ver = 0;
- eiphdr.eip_resvl = ETHERIP_VERSION;
- eiphdr.eip_resvh = 0;
- } else {
- eiphdr.eip_ver = ETHERIP_VERSION;
- eiphdr.eip_resvl = 0;
- eiphdr.eip_resvh = 0;
- }
- /* prepend Ethernet-in-IP header */
- M_PREPEND(m, sizeof(struct etherip_header), M_DONTWAIT);
- if (m && m->m_len < sizeof(struct etherip_header))
- m = m_pullup(m, sizeof(struct etherip_header));
- if (m == NULL)
- return ENOBUFS;
- bcopy(&eiphdr, mtod(m, struct etherip_header *),
- sizeof(struct etherip_header));
- break;
-
- default:
-#ifdef DEBUG
- printf("in6_gif_output: warning: unknown family %d passed\n",
- family);
-#endif
- m_freem(m);
- return EAFNOSUPPORT;
- }
-
/* prepend new IP header */
len = sizeof(struct ip6_hdr);
#ifndef __NO_STRICT_ALIGNMENT
- if (family == AF_LINK)
+ if (proto == IPPROTO_ETHERIP)
len += ETHERIP_ALIGN;
#endif
- M_PREPEND(m, len, M_DONTWAIT);
- if (m != NULL && m->m_len < len)
- m = m_pullup(m, len);
- if (m == NULL) {
- printf("ENOBUFS in in6_gif_output %d\n", __LINE__);
- return ENOBUFS;
- }
+ M_PREPEND(m, len, M_NOWAIT);
+ if (m == NULL)
+ return (ENOBUFS);
#ifndef __NO_STRICT_ALIGNMENT
- if (family == AF_LINK) {
+ if (proto == IPPROTO_ETHERIP) {
len = mtod(m, vm_offset_t) & 3;
KASSERT(len == 0 || len == ETHERIP_ALIGN,
("in6_gif_output: unexpected misalignment"));
@@ -210,167 +127,52 @@
#endif
ip6 = mtod(m, struct ip6_hdr *);
- ip6->ip6_flow = 0;
- ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
- ip6->ip6_vfc |= IPV6_VERSION;
- ip6->ip6_plen = htons((u_short)m->m_pkthdr.len);
- ip6->ip6_nxt = proto;
- ip6->ip6_hlim = V_ip6_gif_hlim;
- ip6->ip6_src = sin6_src->sin6_addr;
- /* bidirectional configured tunnel mode */
- if (!IN6_IS_ADDR_UNSPECIFIED(&sin6_dst->sin6_addr))
- ip6->ip6_dst = sin6_dst->sin6_addr;
- else {
+ GIF_RLOCK(sc);
+ if (sc->gif_family != AF_INET6) {
m_freem(m);
- return ENETUNREACH;
+ GIF_RUNLOCK(sc);
+ return (ENETDOWN);
}
- ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED : ECN_NOCARE,
- &otos, &itos);
- ip6->ip6_flow &= ~htonl(0xff << 20);
- ip6->ip6_flow |= htonl((u_int32_t)otos << 20);
+ bcopy(sc->gif_ip6hdr, ip6, sizeof(struct ip6_hdr));
+ GIF_RUNLOCK(sc);
- M_SETFIB(m, sc->gif_fibnum);
-
- if (dst->sin6_family != sin6_dst->sin6_family ||
- !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &sin6_dst->sin6_addr)) {
- /* cache route doesn't match */
- bzero(dst, sizeof(*dst));
- dst->sin6_family = sin6_dst->sin6_family;
- dst->sin6_len = sizeof(struct sockaddr_in6);
- dst->sin6_addr = sin6_dst->sin6_addr;
- if (sc->gif_ro6.ro_rt) {
- RTFREE(sc->gif_ro6.ro_rt);
- sc->gif_ro6.ro_rt = NULL;
- }
-#if 0
- GIF2IFP(sc)->if_mtu = GIF_MTU;
-#endif
- }
-
- if (sc->gif_ro6.ro_rt == NULL) {
- in6_rtalloc(&sc->gif_ro6, sc->gif_fibnum);
- if (sc->gif_ro6.ro_rt == NULL) {
- m_freem(m);
- return ENETUNREACH;
- }
-
- /* if it constitutes infinite encapsulation, punt. */
- if (sc->gif_ro.ro_rt->rt_ifp == ifp) {
- m_freem(m);
- return ENETUNREACH; /*XXX*/
- }
-#if 0
- ifp->if_mtu = sc->gif_ro6.ro_rt->rt_ifp->if_mtu
- - sizeof(struct ip6_hdr);
-#endif
- }
-
- m_addr_changed(m);
-
-#ifdef IPV6_MINMTU
+ ip6->ip6_flow |= htonl((uint32_t)ecn << 20);
+ ip6->ip6_nxt = proto;
+ ip6->ip6_hlim = V_ip6_gif_hlim;
/*
* force fragmentation to minimum MTU, to avoid path MTU discovery.
* it is too painful to ask for resend of inner packet, to achieve
* path MTU discovery for encapsulated packets.
*/
- error = ip6_output(m, 0, &sc->gif_ro6, IPV6_MINMTU, 0, NULL, NULL);
-#else
- error = ip6_output(m, 0, &sc->gif_ro6, 0, 0, NULL, NULL);
-#endif
-
- if (!(GIF2IFP(sc)->if_flags & IFF_LINK0) &&
- sc->gif_ro6.ro_rt != NULL) {
- RTFREE(sc->gif_ro6.ro_rt);
- sc->gif_ro6.ro_rt = NULL;
- }
-
- return (error);
+ return (ip6_output(m, 0, NULL, IPV6_MINMTU, 0, NULL, NULL));
}
-int
+static int
in6_gif_input(struct mbuf **mp, int *offp, int proto)
{
struct mbuf *m = *mp;
- struct ifnet *gifp = NULL;
+ struct ifnet *gifp;
struct gif_softc *sc;
struct ip6_hdr *ip6;
- int af = 0;
- u_int32_t otos;
+ uint8_t ecn;
- ip6 = mtod(m, struct ip6_hdr *);
-
- sc = (struct gif_softc *)encap_getarg(m);
+ sc = encap_getarg(m);
if (sc == NULL) {
m_freem(m);
IP6STAT_INC(ip6s_nogif);
- return IPPROTO_DONE;
+ return (IPPROTO_DONE);
}
-
gifp = GIF2IFP(sc);
- if (gifp == NULL || (gifp->if_flags & IFF_UP) == 0) {
+ if ((gifp->if_flags & IFF_UP) != 0) {
+ ip6 = mtod(m, struct ip6_hdr *);
+ ecn = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+ m_adj(m, *offp);
+ gif_input(m, gifp, proto, ecn);
+ } else {
m_freem(m);
IP6STAT_INC(ip6s_nogif);
- return IPPROTO_DONE;
}
-
- otos = ip6->ip6_flow;
- m_adj(m, *offp);
-
- switch (proto) {
-#ifdef INET
- case IPPROTO_IPV4:
- {
- struct ip *ip;
- u_int8_t otos8;
- af = AF_INET;
- otos8 = (ntohl(otos) >> 20) & 0xff;
- if (m->m_len < sizeof(*ip)) {
- m = m_pullup(m, sizeof(*ip));
- if (!m)
- return IPPROTO_DONE;
- }
- ip = mtod(m, struct ip *);
- if (ip_ecn_egress((gifp->if_flags & IFF_LINK1) ?
- ECN_ALLOWED : ECN_NOCARE,
- &otos8, &ip->ip_tos) == 0) {
- m_freem(m);
- return IPPROTO_DONE;
- }
- break;
- }
-#endif /* INET */
-#ifdef INET6
- case IPPROTO_IPV6:
- {
- struct ip6_hdr *ip6;
- af = AF_INET6;
- if (m->m_len < sizeof(*ip6)) {
- m = m_pullup(m, sizeof(*ip6));
- if (!m)
- return IPPROTO_DONE;
- }
- ip6 = mtod(m, struct ip6_hdr *);
- if (ip6_ecn_egress((gifp->if_flags & IFF_LINK1) ?
- ECN_ALLOWED : ECN_NOCARE,
- &otos, &ip6->ip6_flow) == 0) {
- m_freem(m);
- return IPPROTO_DONE;
- }
- break;
- }
-#endif
- case IPPROTO_ETHERIP:
- af = AF_LINK;
- break;
-
- default:
- IP6STAT_INC(ip6s_nogif);
- m_freem(m);
- return IPPROTO_DONE;
- }
-
- gif_input(m, af, gifp);
- return IPPROTO_DONE;
+ return (IPPROTO_DONE);
}
/*
@@ -380,19 +182,22 @@
gif_validate6(const struct ip6_hdr *ip6, struct gif_softc *sc,
struct ifnet *ifp)
{
- struct sockaddr_in6 *src, *dst;
+ int ret;
- src = (struct sockaddr_in6 *)sc->gif_psrc;
- dst = (struct sockaddr_in6 *)sc->gif_pdst;
-
+ GIF_RLOCK_ASSERT(sc);
/*
* Check for address match. Note that the check is for an incoming
* packet. We should compare the *source* address in our configuration
* and the *destination* address of the packet, and vice versa.
*/
- if (!IN6_ARE_ADDR_EQUAL(&src->sin6_addr, &ip6->ip6_dst) ||
- !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_src))
- return 0;
+ if (!IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src, &ip6->ip6_dst))
+ return (0);
+ ret = 128;
+ if (!IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_dst, &ip6->ip6_src)) {
+ if ((sc->gif_options & GIF_IGNORE_SOURCE) == 0)
+ return (0);
+ } else
+ ret += 128;
/* martian filters on outer source - done in ip6_input */
@@ -410,29 +215,22 @@
rt = in6_rtalloc1((struct sockaddr *)&sin6, 0, 0UL,
sc->gif_fibnum);
if (!rt || rt->rt_ifp != ifp) {
-#if 0
- char ip6buf[INET6_ADDRSTRLEN];
- log(LOG_WARNING, "%s: packet from %s dropped "
- "due to ingress filter\n", if_name(GIF2IFP(sc)),
- ip6_sprintf(ip6buf, &sin6.sin6_addr));
-#endif
if (rt)
RTFREE_LOCKED(rt);
- return 0;
+ return (0);
}
RTFREE_LOCKED(rt);
}
- return 128 * 2;
+ return (ret);
}
/*
* we know that we are in IFF_UP, outer address available, and outer family
* matched the physical addr family. see gif_encapcheck().
- * sanity check for arg should have been done in the caller.
*/
int
-gif_encapcheck6(const struct mbuf *m, int off, int proto, void *arg)
+in6_gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
{
struct ip6_hdr ip6;
struct gif_softc *sc;
@@ -440,31 +238,21 @@
/* sanity check done in caller */
sc = (struct gif_softc *)arg;
+ GIF_RLOCK_ASSERT(sc);
- /* LINTED const cast */
m_copydata(m, 0, sizeof(ip6), (caddr_t)&ip6);
ifp = ((m->m_flags & M_PKTHDR) != 0) ? m->m_pkthdr.rcvif : NULL;
-
- return gif_validate6(&ip6, sc, ifp);
+ return (gif_validate6(&ip6, sc, ifp));
}
int
in6_gif_attach(struct gif_softc *sc)
{
- sc->encap_cookie6 = encap_attach_func(AF_INET6, -1, gif_encapcheck,
+
+ KASSERT(sc->gif_ecookie == NULL, ("gif_ecookie isn't NULL"));
+ sc->gif_ecookie = encap_attach_func(AF_INET6, -1, gif_encapcheck,
(void *)&in6_gif_protosw, sc);
- if (sc->encap_cookie6 == NULL)
- return EEXIST;
- return 0;
+ if (sc->gif_ecookie == NULL)
+ return (EEXIST);
+ return (0);
}
-
-int
-in6_gif_detach(struct gif_softc *sc)
-{
- int error;
-
- error = encap_detach(sc->encap_cookie6);
- if (error == 0)
- sc->encap_cookie6 = NULL;
- return error;
-}
Modified: trunk/sys/netinet6/in6_ifattach.c
===================================================================
--- trunk/sys/netinet6/in6_ifattach.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/in6_ifattach.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/in6_ifattach.c 233200 2012-03-19 20:49:16Z jhb $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/in6_ifattach.c 287734 2015-09-13 02:09:06Z hrs $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -267,6 +267,7 @@
/* get EUI64 */
switch (ifp->if_type) {
+ case IFT_BRIDGE:
case IFT_ETHER:
case IFT_L2VLAN:
case IFT_FDDI:
@@ -595,12 +596,6 @@
ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME;
ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME;
- /* we don't need to perform DAD on loopback interfaces. */
- ifra.ifra_flags |= IN6_IFF_NODAD;
-
- /* skip registration to the prefix list. XXX should be temporary. */
- ifra.ifra_flags |= IN6_IFF_NOPFX;
-
/*
* We are sure that this is a newly assigned address, so we can set
* NULL to the 3rd arg.
@@ -617,13 +612,16 @@
/*
* compute NI group address, based on the current hostname setting.
- * see draft-ietf-ipngwg-icmp-name-lookup-* (04 and later).
+ * see RFC 4620.
*
* when ifp == NULL, the caller is responsible for filling scopeid.
+ *
+ * If oldmcprefix == 1, FF02:0:0:0:0:2::/96 is used for NI group address
+ * while it is FF02:0:0:0:0:2:FF00::/104 in RFC 4620.
*/
-int
-in6_nigroup(struct ifnet *ifp, const char *name, int namelen,
- struct in6_addr *in6)
+static int
+in6_nigroup0(struct ifnet *ifp, const char *name, int namelen,
+ struct in6_addr *in6, int oldmcprefix)
{
struct prison *pr;
const char *p;
@@ -668,7 +666,7 @@
*q = *q - 'A' + 'a';
}
- /* generate 8 bytes of pseudo-random value. */
+ /* generate 16 bytes of pseudo-random value. */
bzero(&ctxt, sizeof(ctxt));
MD5Init(&ctxt);
MD5Update(&ctxt, &l, sizeof(l));
@@ -678,7 +676,14 @@
bzero(in6, sizeof(*in6));
in6->s6_addr16[0] = IPV6_ADDR_INT16_MLL;
in6->s6_addr8[11] = 2;
- bcopy(digest, &in6->s6_addr32[3], sizeof(in6->s6_addr32[3]));
+ if (oldmcprefix == 0) {
+ in6->s6_addr8[12] = 0xff;
+ /* Copy the first 24 bits of 128-bit hash into the address. */
+ bcopy(digest, &in6->s6_addr8[13], 3);
+ } else {
+ /* Copy the first 32 bits of 128-bit hash into the address. */
+ bcopy(digest, &in6->s6_addr32[3], sizeof(in6->s6_addr32[3]));
+ }
if (in6_setscope(in6, ifp, NULL))
return (-1); /* XXX: should not fail */
@@ -685,6 +690,22 @@
return 0;
}
+int
+in6_nigroup(struct ifnet *ifp, const char *name, int namelen,
+ struct in6_addr *in6)
+{
+
+ return (in6_nigroup0(ifp, name, namelen, in6, 0));
+}
+
+int
+in6_nigroup_oldmcprefix(struct ifnet *ifp, const char *name, int namelen,
+ struct in6_addr *in6)
+{
+
+ return (in6_nigroup0(ifp, name, namelen, in6, 1));
+}
+
/*
* XXX multiple loopback interface needs more care. for instance,
* nodelocal address needs to be configured onto only one of them.
@@ -698,19 +719,12 @@
struct in6_ifaddr *ia;
struct in6_addr in6;
- /* some of the interfaces are inherently not IPv6 capable */
- switch (ifp->if_type) {
- case IFT_PFLOG:
- case IFT_PFSYNC:
- case IFT_CARP:
+ if (ifp->if_afdata[AF_INET6] == NULL)
return;
- }
-
/*
* quirks based on interface type
*/
switch (ifp->if_type) {
-#ifdef IFT_STF
case IFT_STF:
/*
* 6to4 interface is a very special kind of beast.
@@ -718,8 +732,8 @@
* linklocals for 6to4 interface, but there's no use and
* it is rather harmful to have one.
*/
- goto statinit;
-#endif
+ ND_IFINFO(ifp)->flags &= ~ND6_IFF_AUTO_LINKLOCAL;
+ break;
default:
break;
}
@@ -753,8 +767,7 @@
/*
* assign a link-local address, if there's none.
*/
- if (ifp->if_type != IFT_BRIDGE &&
- !(ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) &&
+ if (!(ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) &&
ND_IFINFO(ifp)->flags & ND6_IFF_AUTO_LINKLOCAL) {
int error;
@@ -771,10 +784,6 @@
ifa_free(&ia->ia_ifa);
}
-#ifdef IFT_STF /* XXX */
-statinit:
-#endif
-
/* update dynamically. */
if (V_in6_maxmtu < ifp->if_mtu)
V_in6_maxmtu = ifp->if_mtu;
@@ -795,6 +804,9 @@
struct sockaddr_in6 sin6;
struct in6_multi_mship *imm;
+ if (ifp->if_afdata[AF_INET6] == NULL)
+ return;
+
/* remove neighbor management table */
nd6_purge(ifp);
@@ -839,6 +851,7 @@
}
in6_pcbpurgeif0(&V_udbinfo, ifp);
+ in6_pcbpurgeif0(&V_ulitecbinfo, ifp);
in6_pcbpurgeif0(&V_ripcbinfo, ifp);
/* leave from all multicast groups joined */
in6_purgemaddrs(ifp);
@@ -918,6 +931,8 @@
bzero(nullbuf, sizeof(nullbuf));
TAILQ_FOREACH(ifp, &V_ifnet, if_list) {
+ if (ifp->if_afdata[AF_INET6] == NULL)
+ continue;
ndi = ND_IFINFO(ifp);
if (bcmp(ndi->randomid, nullbuf, sizeof(nullbuf)) != 0) {
/*
Modified: trunk/sys/netinet6/in6_ifattach.h
===================================================================
--- trunk/sys/netinet6/in6_ifattach.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/in6_ifattach.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* $KAME: in6_ifattach.h,v 1.14 2001/02/08 12:48:39 jinmei Exp $
- * $FreeBSD: stable/9/sys/netinet6/in6_ifattach.h 244524 2012-12-21 00:41:52Z delphij $
+ * $FreeBSD: stable/10/sys/netinet6/in6_ifattach.h 250251 2013-05-04 19:16:26Z hrs $
*/
#ifndef _NETINET6_IN6_IFATTACH_H_
@@ -41,6 +41,7 @@
void in6_tmpaddrtimer(void *);
int in6_get_hw_ifid(struct ifnet *, struct in6_addr *);
int in6_nigroup(struct ifnet *, const char *, int, struct in6_addr *);
+int in6_nigroup_oldmcprefix(struct ifnet *, const char *, int, struct in6_addr *);
#endif /* _KERNEL */
#endif /* _NETINET6_IN6_IFATTACH_H_ */
Modified: trunk/sys/netinet6/in6_mcast.c
===================================================================
--- trunk/sys/netinet6/in6_mcast.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/in6_mcast.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -34,7 +34,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/in6_mcast.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/in6_mcast.c 321134 2017-07-18 16:58:52Z ngie $");
#include "opt_inet6.h"
@@ -132,7 +132,9 @@
static int in6m_get_source(struct in6_multi *inm,
const struct in6_addr *addr, const int noalloc,
struct ip6_msource **pims);
+#ifdef KTR
static int in6m_is_ifp_detached(const struct in6_multi *);
+#endif
static int in6m_merge(struct in6_multi *, /*const*/ struct in6_mfilter *);
static void in6m_purge(struct in6_multi *);
static void in6m_reap(struct in6_multi *);
@@ -176,6 +178,7 @@
CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip6_mcast_filters,
"Per-interface stack-wide source filters");
+#ifdef KTR
/*
* Inline function which wraps assertions for a valid ifp.
* The ifnet layer will set the ifma's ifp pointer to NULL if the ifp
@@ -198,6 +201,7 @@
return (ifp == NULL);
}
+#endif
/*
* Initialize an in6_mfilter structure to a known state at t0, t1
@@ -997,9 +1001,10 @@
/* Decrement ASM listener count on transition out of ASM mode. */
if (imf->im6f_st[0] == MCAST_EXCLUDE && nsrc0 == 0) {
if ((imf->im6f_st[1] != MCAST_EXCLUDE) ||
- (imf->im6f_st[1] == MCAST_EXCLUDE && nsrc1 > 0))
+ (imf->im6f_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) {
CTR1(KTR_MLD, "%s: --asm on inm at t1", __func__);
--inm->in6m_st[1].iss_asm;
+ }
}
/* Increment ASM listener count on transition to ASM mode. */
@@ -1072,6 +1077,8 @@
free(ims, M_IP6MSOURCE);
inm->in6m_nsrc--;
}
+ /* Free state-change requests that might be queued. */
+ _IF_DRAIN(&inm->in6m_scq);
}
/*
@@ -1446,16 +1453,15 @@
CTR1(KTR_MLD, "%s: merge inm state", __func__);
error = in6m_merge(inm, imf);
- if (error) {
+ if (error)
CTR1(KTR_MLD, "%s: failed to merge inm state", __func__);
- goto out_im6f_rollback;
+ else {
+ CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
+ error = mld_change_state(inm, 0);
+ if (error)
+ CTR1(KTR_MLD, "%s: failed mld downcall", __func__);
}
- CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
- error = mld_change_state(inm, 0);
- if (error)
- CTR1(KTR_MLD, "%s: failed mld downcall", __func__);
-
IN6_MULTI_UNLOCK();
out_im6f_rollback:
@@ -1626,6 +1632,8 @@
* has asked for, but we always tell userland how big the
* buffer really needs to be.
*/
+ if (msfr.msfr_nsrcs > in6_mcast_maxsocksrc)
+ msfr.msfr_nsrcs = in6_mcast_maxsocksrc;
tss = NULL;
if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
@@ -1775,8 +1783,6 @@
("%s: not INP_IPV6 inpcb", __func__));
KASSERT(gsin6->sin6_family == AF_INET6,
("%s: not AF_INET6 group", __func__));
- KASSERT(IN6_IS_ADDR_MULTICAST(&gsin6->sin6_addr),
- ("%s: not multicast", __func__));
ifp = NULL;
memset(&ro6, 0, sizeof(struct route_in6));
@@ -1845,8 +1851,7 @@
if (mreq.ipv6mr_interface == 0) {
ifp = in6p_lookup_mcast_ifp(inp, &gsa->sin6);
} else {
- if (mreq.ipv6mr_interface < 0 ||
- V_if_index < mreq.ipv6mr_interface)
+ if (V_if_index < mreq.ipv6mr_interface)
return (EADDRNOTAVAIL);
ifp = ifnet_byindex(mreq.ipv6mr_interface);
}
@@ -2041,29 +2046,27 @@
if (is_new) {
error = in6_mc_join_locked(ifp, &gsa->sin6.sin6_addr, imf,
&inm, 0);
- if (error)
+ if (error) {
+ IN6_MULTI_UNLOCK();
goto out_im6o_free;
+ }
imo->im6o_membership[idx] = inm;
} else {
CTR1(KTR_MLD, "%s: merge inm state", __func__);
error = in6m_merge(inm, imf);
- if (error) {
+ if (error)
CTR1(KTR_MLD, "%s: failed to merge inm state",
__func__);
- goto out_im6f_rollback;
+ else {
+ CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
+ error = mld_change_state(inm, 0);
+ if (error)
+ CTR1(KTR_MLD, "%s: failed mld downcall",
+ __func__);
}
- CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
- error = mld_change_state(inm, 0);
- if (error) {
- CTR1(KTR_MLD, "%s: failed mld downcall",
- __func__);
- goto out_im6f_rollback;
- }
}
IN6_MULTI_UNLOCK();
-
-out_im6f_rollback:
INP_WLOCK_ASSERT(inp);
if (error) {
im6f_rollback(imf);
@@ -2192,7 +2195,7 @@
* XXX SCOPE6 lock potentially taken here.
*/
if (ifindex != 0) {
- if (ifindex < 0 || V_if_index < ifindex)
+ if (V_if_index < ifindex)
return (EADDRNOTAVAIL);
ifp = ifnet_byindex(ifindex);
if (ifp == NULL)
@@ -2290,23 +2293,20 @@
} else {
CTR1(KTR_MLD, "%s: merge inm state", __func__);
error = in6m_merge(inm, imf);
- if (error) {
+ if (error)
CTR1(KTR_MLD, "%s: failed to merge inm state",
__func__);
- goto out_im6f_rollback;
+ else {
+ CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
+ error = mld_change_state(inm, 0);
+ if (error)
+ CTR1(KTR_MLD, "%s: failed mld downcall",
+ __func__);
}
-
- CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
- error = mld_change_state(inm, 0);
- if (error) {
- CTR1(KTR_MLD, "%s: failed mld downcall",
- __func__);
- }
}
IN6_MULTI_UNLOCK();
-out_im6f_rollback:
if (error)
im6f_rollback(imf);
else
@@ -2350,13 +2350,17 @@
error = sooptcopyin(sopt, &ifindex, sizeof(u_int), sizeof(u_int));
if (error)
return (error);
- if (ifindex < 0 || V_if_index < ifindex)
+ if (V_if_index < ifindex)
return (EINVAL);
-
- ifp = ifnet_byindex(ifindex);
- if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
- return (EADDRNOTAVAIL);
-
+ if (ifindex == 0)
+ ifp = NULL;
+ else {
+ ifp = ifnet_byindex(ifindex);
+ if (ifp == NULL)
+ return (EINVAL);
+ if ((ifp->if_flags & IFF_MULTICAST) == 0)
+ return (EADDRNOTAVAIL);
+ }
imo = in6p_findmoptions(inp);
imo->im6o_multicast_ifp = ifp;
INP_WUNLOCK(inp);
@@ -2515,16 +2519,15 @@
*/
CTR1(KTR_MLD, "%s: merge inm state", __func__);
error = in6m_merge(inm, imf);
- if (error) {
+ if (error)
CTR1(KTR_MLD, "%s: failed to merge inm state", __func__);
- goto out_im6f_rollback;
+ else {
+ CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
+ error = mld_change_state(inm, 0);
+ if (error)
+ CTR1(KTR_MLD, "%s: failed mld downcall", __func__);
}
- CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
- error = mld_change_state(inm, 0);
- if (error)
- CTR1(KTR_MLD, "%s: failed mld downcall", __func__);
-
IN6_MULTI_UNLOCK();
out_im6f_rollback:
Modified: trunk/sys/netinet6/in6_pcb.c
===================================================================
--- trunk/sys/netinet6/in6_pcb.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/in6_pcb.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -66,7 +66,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/in6_pcb.c 234279 2012-04-14 10:36:43Z glebius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/in6_pcb.c 309108 2016-11-24 14:48:46Z jch $");
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -107,8 +107,6 @@
#include <netinet6/in6_pcb.h>
#include <netinet6/scope6_var.h>
-struct in6_addr zeroin6_addr;
-
int
in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam,
struct ucred *cred)
@@ -159,7 +157,7 @@
* and a multicast address is bound on both
* new and duplicated sockets.
*/
- if (so->so_options & SO_REUSEADDR)
+ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0)
reuseport = SO_REUSEADDR|SO_REUSEPORT;
} else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
struct ifaddr *ifa;
@@ -246,8 +244,7 @@
if (tw == NULL ||
(reuseport & tw->tw_so_options) == 0)
return (EADDRINUSE);
- } else if (t && (reuseport == 0 ||
- (t->inp_flags2 & INP_REUSEPORT) == 0)) {
+ } else if (t && (reuseport & inp_so_options(t)) == 0) {
return (EADDRINUSE);
}
#ifdef INET
@@ -268,8 +265,8 @@
INP_IPV6PROTO) ==
(t->inp_vflag & INP_IPV6PROTO))))
return (EADDRINUSE);
- } else if (t && (reuseport == 0 ||
- (t->inp_flags2 & INP_REUSEPORT) == 0) &&
+ } else if (t &&
+ (reuseport & inp_so_options(t)) == 0 &&
(ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
(t->inp_vflag & INP_IPV6PROTO) != 0))
return (EADDRINUSE);
@@ -626,18 +623,12 @@
/*
* If the error designates a new path MTU for a destination
* and the application (associated with this socket) wanted to
- * know the value, notify. Note that we notify for all
- * disconnected sockets if the corresponding application
- * wanted. This is because some UDP applications keep sending
- * sockets disconnected.
+ * know the value, notify.
* XXX: should we avoid to notify the value to TCP sockets?
*/
- if (cmd == PRC_MSGSIZE && (inp->inp_flags & IN6P_MTU) != 0 &&
- (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) ||
- IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &sa6_dst->sin6_addr))) {
+ if (cmd == PRC_MSGSIZE && cmdarg != NULL)
ip6_notify_pmtu(inp, (struct sockaddr_in6 *)dst,
- (u_int32_t *)cmdarg);
- }
+ *(u_int32_t *)cmdarg);
/*
* Detect if we should notify the error. If no source and
@@ -778,7 +769,7 @@
struct ip6_moptions *im6o;
int i, gap;
- INP_INFO_RLOCK(pcbinfo);
+ INP_INFO_WLOCK(pcbinfo);
LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) {
INP_WLOCK(in6p);
im6o = in6p->in6p_moptions;
@@ -809,7 +800,7 @@
}
INP_WUNLOCK(in6p);
}
- INP_INFO_RUNLOCK(pcbinfo);
+ INP_INFO_WUNLOCK(pcbinfo);
}
/*
Modified: trunk/sys/netinet6/in6_pcb.h
===================================================================
--- trunk/sys/netinet6/in6_pcb.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/in6_pcb.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -59,7 +59,7 @@
* SUCH DAMAGE.
*
* @(#)in_pcb.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet6/in6_pcb.h 244524 2012-12-21 00:41:52Z delphij $
+ * $FreeBSD: stable/10/sys/netinet6/in6_pcb.h 241916 2012-10-22 21:49:56Z delphij $
*/
#ifndef _NETINET6_IN6_PCB_H_
Modified: trunk/sys/netinet6/in6_pcbgroup.c
===================================================================
--- trunk/sys/netinet6/in6_pcbgroup.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/in6_pcbgroup.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -30,7 +30,7 @@
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/in6_pcbgroup.c 222748 2011-06-06 12:55:02Z rwatson $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/in6_pcbgroup.c 222748 2011-06-06 12:55:02Z rwatson $");
#include "opt_inet6.h"
Modified: trunk/sys/netinet6/in6_proto.c
===================================================================
--- trunk/sys/netinet6/in6_proto.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/in6_proto.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -62,7 +62,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/in6_proto.c 239936 2012-08-31 06:38:43Z maxim $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/in6_proto.c 284066 2015-06-06 12:44:42Z ae $");
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -127,10 +127,6 @@
#include <netinet6/ip6protosw.h>
-#ifdef FLOWTABLE
-#include <net/flowtable.h>
-#endif
-
/*
* TCP/IP protocol family: IP6, ICMP6, UDP, TCP.
*/
@@ -212,7 +208,7 @@
.pr_protocol = IPPROTO_SCTP,
.pr_flags = PR_WANTRCVD,
.pr_input = sctp6_input,
- .pr_ctlinput = sctp6_ctlinput,
+ .pr_ctlinput = sctp6_ctlinput,
.pr_ctloutput = sctp_ctloutput,
.pr_drain = sctp_drain,
.pr_usrreqs = &sctp6_usrreqs
@@ -219,6 +215,19 @@
},
#endif /* SCTP */
{
+ .pr_type = SOCK_DGRAM,
+ .pr_domain = &inet6domain,
+ .pr_protocol = IPPROTO_UDPLITE,
+ .pr_flags = PR_ATOMIC|PR_ADDR,
+ .pr_input = udp6_input,
+ .pr_ctlinput = udplite6_ctlinput,
+ .pr_ctloutput = udp_ctloutput,
+#ifndef INET /* Do not call initialization twice. */
+ .pr_init = udplite_init,
+#endif
+ .pr_usrreqs = &udp6_usrreqs,
+},
+{
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_RAW,
@@ -323,6 +332,17 @@
{
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
+ .pr_protocol = IPPROTO_GRE,
+ .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
+ .pr_input = encap6_input,
+ .pr_output = rip6_output,
+ .pr_ctloutput = rip6_ctloutput,
+ .pr_init = encap_init,
+ .pr_usrreqs = &rip6_usrreqs
+},
+{
+ .pr_type = SOCK_RAW,
+ .pr_domain = &inet6domain,
.pr_protocol = IPPROTO_PIM,
.pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
.pr_input = encap6_input,
@@ -432,16 +452,6 @@
VNET_DEFINE(int, pmtu_expire) = 60*10;
VNET_DEFINE(int, pmtu_probe) = 60*2;
-/* raw IP6 parameters */
-/*
- * Nominal space allocated to a raw ip socket.
- */
-#define RIPV6SNDQ 8192
-#define RIPV6RCVQ 8192
-
-VNET_DEFINE(u_long, rip6_sendspace) = RIPV6SNDQ;
-VNET_DEFINE(u_long, rip6_recvspace) = RIPV6RCVQ;
-
/* ICMPV6 parameters */
VNET_DEFINE(int, icmp6_rediraccept) = 1;/* accept and process redirects */
VNET_DEFINE(int, icmp6_redirtimeout) = 10 * 60; /* 10 minutes */
@@ -449,12 +459,8 @@
/* control how to respond to NI queries */
VNET_DEFINE(int, icmp6_nodeinfo) =
(ICMP6_NODEINFO_FQDNOK|ICMP6_NODEINFO_NODEADDROK);
+VNET_DEFINE(int, icmp6_nodeinfo_oldmcprefix) = 1;
-/* UDP on IP6 parameters */
-VNET_DEFINE(int, udp6_sendspace) = 9216;/* really max datagram size */
-VNET_DEFINE(int, udp6_recvspace) = 40 * (1024 + sizeof(struct sockaddr_in6));
- /* 40 1K datagrams */
-
/*
* sysctl related items.
*/
@@ -480,8 +486,6 @@
int error = 0;
int old;
- VNET_SYSCTL_ARG(req, arg1);
-
error = SYSCTL_OUT(req, arg1, sizeof(int));
if (error || !req->newptr)
return (error);
@@ -501,8 +505,6 @@
int error = 0;
int old;
- VNET_SYSCTL_ARG(req, arg1);
-
error = SYSCTL_OUT(req, arg1, sizeof(int));
if (error || !req->newptr)
return (error);
@@ -521,8 +523,8 @@
&VNET_NAME(ip6_sendredirects), 0, "");
SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_DEFHLIM, hlim, CTLFLAG_RW,
&VNET_NAME(ip6_defhlim), 0, "");
-SYSCTL_VNET_STRUCT(_net_inet6_ip6, IPV6CTL_STATS, stats, CTLFLAG_RW,
- &VNET_NAME(ip6stat), ip6stat, "");
+SYSCTL_VNET_PCPUSTAT(_net_inet6_ip6, IPV6CTL_STATS, stats, struct ip6stat,
+ ip6stat, "IP6 statistics (struct ip6stat, netinet6/ip6_var.h)");
SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets,
CTLFLAG_RW, &VNET_NAME(ip6_maxfragpackets), 0, "");
SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_ACCEPT_RTADV, accept_rtadv,
@@ -574,8 +576,9 @@
CTLFLAG_RW, &VNET_NAME(ip6_auto_linklocal), 0,
"Default value of per-interface flag for automatically adding an IPv6"
" link-local address to interfaces when attached");
-SYSCTL_VNET_STRUCT(_net_inet6_ip6, IPV6CTL_RIP6STATS, rip6stats, CTLFLAG_RW,
- &VNET_NAME(rip6stat), rip6stat, "");
+SYSCTL_VNET_PCPUSTAT(_net_inet6_ip6, IPV6CTL_RIP6STATS, rip6stats,
+ struct rip6stat, rip6stat,
+ "Raw IP6 statistics (struct rip6stat, netinet6/raw_ip6.h)");
SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_PREFER_TEMPADDR, prefer_tempaddr,
CTLFLAG_RW, &VNET_NAME(ip6_prefer_tempaddr), 0, "");
SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_USE_DEFAULTZONE, use_defaultzone,
@@ -589,23 +592,14 @@
&VNET_NAME(ip6stealth), 0, "");
#endif
-#ifdef FLOWTABLE
-VNET_DEFINE(int, ip6_output_flowtable_size) = 2048;
-VNET_DEFINE(struct flowtable *, ip6_ft);
-#define V_ip6_output_flowtable_size VNET(ip6_output_flowtable_size)
-
-SYSCTL_VNET_INT(_net_inet6_ip6, OID_AUTO, output_flowtable_size, CTLFLAG_RDTUN,
- &VNET_NAME(ip6_output_flowtable_size), 2048,
- "number of entries in the per-cpu output flow caches");
-#endif
-
/* net.inet6.icmp6 */
SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRACCEPT, rediraccept,
CTLFLAG_RW, &VNET_NAME(icmp6_rediraccept), 0, "");
SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRTIMEOUT, redirtimeout,
CTLFLAG_RW, &VNET_NAME(icmp6_redirtimeout), 0, "");
-SYSCTL_VNET_STRUCT(_net_inet6_icmp6, ICMPV6CTL_STATS, stats, CTLFLAG_RW,
- &VNET_NAME(icmp6stat), icmp6stat, "");
+SYSCTL_VNET_PCPUSTAT(_net_inet6_icmp6, ICMPV6CTL_STATS, stats,
+ struct icmp6stat, icmp6stat,
+ "ICMPv6 statistics (struct icmp6stat, netinet/icmp6.h)");
SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_PRUNE, nd6_prune, CTLFLAG_RW,
&VNET_NAME(nd6_prune), 0, "");
SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DELAY, nd6_delay, CTLFLAG_RW,
@@ -618,6 +612,11 @@
CTLFLAG_RW, &VNET_NAME(nd6_useloopback), 0, "");
SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO, nodeinfo, CTLFLAG_RW,
&VNET_NAME(icmp6_nodeinfo), 0, "");
+SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO_OLDMCPREFIX,
+ nodeinfo_oldmcprefix, CTLFLAG_RW,
+ &VNET_NAME(icmp6_nodeinfo_oldmcprefix), 0,
+ "Join old IPv6 NI group address in draft-ietf-ipngwg-icmp-name-lookup"
+ " for compatibility with KAME implememtation.");
SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ERRPPSLIMIT, errppslimit,
CTLFLAG_RW, &VNET_NAME(icmp6errppslim), 0, "");
SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXNUDHINT, nd6_maxnudhint,
Modified: trunk/sys/netinet6/in6_rmx.c
===================================================================
--- trunk/sys/netinet6/in6_rmx.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/in6_rmx.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -61,7 +61,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/in6_rmx.c 242646 2012-11-06 01:18:53Z melifaro $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/in6_rmx.c 314667 2017-03-04 13:03:31Z avg $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -136,8 +136,8 @@
}
}
- if (!rt->rt_rmx.rmx_mtu && rt->rt_ifp)
- rt->rt_rmx.rmx_mtu = IN6_LINKMTU(rt->rt_ifp);
+ if (!rt->rt_mtu && rt->rt_ifp)
+ rt->rt_mtu = IN6_LINKMTU(rt->rt_ifp);
ret = rn_addroute(v_arg, n_arg, head, treenodes);
if (ret == NULL) {
@@ -207,12 +207,11 @@
if (!rt)
panic("rt == NULL in in6_mtuexpire");
- if (rt->rt_rmx.rmx_expire && !(rt->rt_flags & RTF_PROBEMTU)) {
- if (rt->rt_rmx.rmx_expire <= time_uptime) {
+ if (rt->rt_expire && !(rt->rt_flags & RTF_PROBEMTU)) {
+ if (rt->rt_expire <= time_uptime) {
rt->rt_flags |= RTF_PROBEMTU;
} else {
- ap->nextstop = lmin(ap->nextstop,
- rt->rt_rmx.rmx_expire);
+ ap->nextstop = lmin(ap->nextstop, rt->rt_expire);
}
}
@@ -278,7 +277,7 @@
rnh->rnh_addaddr = in6_addroute;
if (V__in6_rt_was_here == 0) {
- callout_init(&V_rtq_mtutimer, CALLOUT_MPSAFE);
+ callout_init(&V_rtq_mtutimer, 1);
in6_mtutimo(curvnet); /* kick off timeout first time */
V__in6_rt_was_here = 1;
}
@@ -292,7 +291,7 @@
{
callout_drain(&V_rtq_mtutimer);
- return (1);
+ return (rn_detachhead(head));
}
#endif
Modified: trunk/sys/netinet6/in6_src.c
===================================================================
--- trunk/sys/netinet6/in6_src.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/in6_src.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -62,7 +62,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/in6_src.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/in6_src.c 297445 2016-03-31 09:55:21Z ae $");
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -141,7 +141,7 @@
static int add_addrsel_policyent(struct in6_addrpolicy *);
static int delete_addrsel_policyent(struct in6_addrpolicy *);
static int walk_addrsel_policy(int (*)(struct in6_addrpolicy *, void *),
- void *);
+ void *);
static int dump_addrsel_policyent(struct in6_addrpolicy *, void *);
static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *);
@@ -250,19 +250,27 @@
(inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0)
return (error);
- ia6 = (struct in6_ifaddr *)ifa_ifwithaddr(
- (struct sockaddr *)&srcsock);
- if (ia6 == NULL ||
- (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) {
- if (ia6 != NULL)
- ifa_free(&ia6->ia_ifa);
- return (EADDRNOTAVAIL);
- }
+ /*
+ * If IPV6_BINDANY socket option is set, we allow to specify
+ * non local addresses as source address in IPV6_PKTINFO
+ * ancillary data.
+ */
+ if ((inp->inp_flags & INP_BINDANY) == 0) {
+ ia6 = (struct in6_ifaddr *)ifa_ifwithaddr(
+ (struct sockaddr *)&srcsock);
+ if (ia6 == NULL || (ia6->ia6_flags & (IN6_IFF_ANYCAST |
+ IN6_IFF_NOTREADY))) {
+ if (ia6 != NULL)
+ ifa_free(&ia6->ia_ifa);
+ return (EADDRNOTAVAIL);
+ }
+ bcopy(&ia6->ia_addr.sin6_addr, srcp, sizeof(*srcp));
+ ifa_free(&ia6->ia_ifa);
+ } else
+ bcopy(&srcsock.sin6_addr, srcp, sizeof(*srcp));
pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */
if (ifpp)
*ifpp = ifp;
- bcopy(&ia6->ia_addr.sin6_addr, srcp, sizeof(*srcp));
- ifa_free(&ia6->ia_ifa);
return (0);
}
@@ -444,6 +452,24 @@
REPLACE(8);
/*
+ * Rule 9: prefer address with better virtual status.
+ */
+ if (ifa_preferred(&ia_best->ia_ifa, &ia->ia_ifa))
+ REPLACE(9);
+ if (ifa_preferred(&ia->ia_ifa, &ia_best->ia_ifa))
+ NEXT(9);
+
+ /*
+ * Rule 10: prefer address with `prefer_source' flag.
+ */
+ if ((ia_best->ia6_flags & IN6_IFF_PREFER_SOURCE) == 0 &&
+ (ia->ia6_flags & IN6_IFF_PREFER_SOURCE) != 0)
+ REPLACE(10);
+ if ((ia_best->ia6_flags & IN6_IFF_PREFER_SOURCE) != 0 &&
+ (ia->ia6_flags & IN6_IFF_PREFER_SOURCE) == 0)
+ NEXT(10);
+
+ /*
* Rule 14: Use longest matching prefix.
* Note: in the address selection draft, this rule is
* documented as "Rule 8". However, since it is also
@@ -608,6 +634,7 @@
if (ron->ro_rt == NULL) {
in6_rtalloc(ron, fibnum); /* multi path case? */
if (ron->ro_rt == NULL) {
+ /* XXX-BZ WT.? */
if (ron->ro_rt) {
RTFREE(ron->ro_rt);
ron->ro_rt = NULL;
@@ -1114,8 +1141,7 @@
}
static int
-walk_addrsel_policy(int (*callback)(struct in6_addrpolicy *, void *),
- void *w)
+walk_addrsel_policy(int (*callback)(struct in6_addrpolicy *, void *), void *w)
{
struct addrsel_policyent *pol;
int error = 0;
Modified: trunk/sys/netinet6/in6_var.h
===================================================================
--- trunk/sys/netinet6/in6_var.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/in6_var.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -59,7 +59,7 @@
* SUCH DAMAGE.
*
* @(#)in_var.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet6/in6_var.h 244524 2012-12-21 00:41:52Z delphij $
+ * $FreeBSD: stable/10/sys/netinet6/in6_var.h 287734 2015-09-13 02:09:06Z hrs $
*/
#ifndef _NETINET6_IN6_VAR_H_
@@ -68,6 +68,7 @@
#include <sys/tree.h>
#ifdef _KERNEL
+#include <sys/fnv_hash.h>
#include <sys/libkern.h>
#endif
@@ -98,15 +99,29 @@
struct lltable;
struct mld_ifinfo;
+#ifdef _KERNEL
+#include <sys/counter.h>
+
struct in6_ifextra {
- struct in6_ifstat *in6_ifstat;
- struct icmp6_ifstat *icmp6_ifstat;
+ counter_u64_t *in6_ifstat;
+ counter_u64_t *icmp6_ifstat;
struct nd_ifinfo *nd_ifinfo;
struct scope6_id *scope6_id;
struct lltable *lltable;
struct mld_ifinfo *mld_ifinfo;
};
+#else
+struct in6_ifextra {
+ void *in6_ifstat;
+ void *icmp6_ifstat;
+ struct nd_ifinfo *nd_ifinfo;
+ struct scope6_id *scope6_id;
+ struct lltable *lltable;
+ struct mld_ifinfo *mld_ifinfo;
+};
+#endif /* !_KERNEL */
+
#define LLTABLE6(ifp) (((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->lltable)
struct in6_ifaddr {
@@ -132,10 +147,13 @@
/* multicast addresses joined from the kernel */
LIST_HEAD(, in6_multi_mship) ia6_memberships;
+ /* entry in bucket of inet6 addresses */
+ LIST_ENTRY(in6_ifaddr) ia6_hash;
};
/* List of in6_ifaddr's. */
TAILQ_HEAD(in6_ifaddrhead, in6_ifaddr);
+LIST_HEAD(in6_ifaddrlisthead, in6_ifaddr);
/* control structure to manage address selection policy */
struct in6_addrpolicy {
@@ -150,37 +168,37 @@
* IPv6 interface statistics, as defined in RFC2465 Ipv6IfStatsEntry (p12).
*/
struct in6_ifstat {
- u_quad_t ifs6_in_receive; /* # of total input datagram */
- u_quad_t ifs6_in_hdrerr; /* # of datagrams with invalid hdr */
- u_quad_t ifs6_in_toobig; /* # of datagrams exceeded MTU */
- u_quad_t ifs6_in_noroute; /* # of datagrams with no route */
- u_quad_t ifs6_in_addrerr; /* # of datagrams with invalid dst */
- u_quad_t ifs6_in_protounknown; /* # of datagrams with unknown proto */
+ uint64_t ifs6_in_receive; /* # of total input datagram */
+ uint64_t ifs6_in_hdrerr; /* # of datagrams with invalid hdr */
+ uint64_t ifs6_in_toobig; /* # of datagrams exceeded MTU */
+ uint64_t ifs6_in_noroute; /* # of datagrams with no route */
+ uint64_t ifs6_in_addrerr; /* # of datagrams with invalid dst */
+ uint64_t ifs6_in_protounknown; /* # of datagrams with unknown proto */
/* NOTE: increment on final dst if */
- u_quad_t ifs6_in_truncated; /* # of truncated datagrams */
- u_quad_t ifs6_in_discard; /* # of discarded datagrams */
+ uint64_t ifs6_in_truncated; /* # of truncated datagrams */
+ uint64_t ifs6_in_discard; /* # of discarded datagrams */
/* NOTE: fragment timeout is not here */
- u_quad_t ifs6_in_deliver; /* # of datagrams delivered to ULP */
+ uint64_t ifs6_in_deliver; /* # of datagrams delivered to ULP */
/* NOTE: increment on final dst if */
- u_quad_t ifs6_out_forward; /* # of datagrams forwarded */
+ uint64_t ifs6_out_forward; /* # of datagrams forwarded */
/* NOTE: increment on outgoing if */
- u_quad_t ifs6_out_request; /* # of outgoing datagrams from ULP */
+ uint64_t ifs6_out_request; /* # of outgoing datagrams from ULP */
/* NOTE: does not include forwrads */
- u_quad_t ifs6_out_discard; /* # of discarded datagrams */
- u_quad_t ifs6_out_fragok; /* # of datagrams fragmented */
- u_quad_t ifs6_out_fragfail; /* # of datagrams failed on fragment */
- u_quad_t ifs6_out_fragcreat; /* # of fragment datagrams */
+ uint64_t ifs6_out_discard; /* # of discarded datagrams */
+ uint64_t ifs6_out_fragok; /* # of datagrams fragmented */
+ uint64_t ifs6_out_fragfail; /* # of datagrams failed on fragment */
+ uint64_t ifs6_out_fragcreat; /* # of fragment datagrams */
/* NOTE: this is # after fragment */
- u_quad_t ifs6_reass_reqd; /* # of incoming fragmented packets */
+ uint64_t ifs6_reass_reqd; /* # of incoming fragmented packets */
/* NOTE: increment on final dst if */
- u_quad_t ifs6_reass_ok; /* # of reassembled packets */
+ uint64_t ifs6_reass_ok; /* # of reassembled packets */
/* NOTE: this is # after reass */
/* NOTE: increment on final dst if */
- u_quad_t ifs6_reass_fail; /* # of reass failures */
+ uint64_t ifs6_reass_fail; /* # of reass failures */
/* NOTE: may not be packet count */
/* NOTE: increment on final dst if */
- u_quad_t ifs6_in_mcast; /* # of inbound multicast datagrams */
- u_quad_t ifs6_out_mcast; /* # of outbound multicast datagrams */
+ uint64_t ifs6_in_mcast; /* # of inbound multicast datagrams */
+ uint64_t ifs6_out_mcast; /* # of outbound multicast datagrams */
};
/*
@@ -192,77 +210,77 @@
* Input statistics
*/
/* ipv6IfIcmpInMsgs, total # of input messages */
- u_quad_t ifs6_in_msg;
+ uint64_t ifs6_in_msg;
/* ipv6IfIcmpInErrors, # of input error messages */
- u_quad_t ifs6_in_error;
+ uint64_t ifs6_in_error;
/* ipv6IfIcmpInDestUnreachs, # of input dest unreach errors */
- u_quad_t ifs6_in_dstunreach;
+ uint64_t ifs6_in_dstunreach;
/* ipv6IfIcmpInAdminProhibs, # of input administratively prohibited errs */
- u_quad_t ifs6_in_adminprohib;
+ uint64_t ifs6_in_adminprohib;
/* ipv6IfIcmpInTimeExcds, # of input time exceeded errors */
- u_quad_t ifs6_in_timeexceed;
+ uint64_t ifs6_in_timeexceed;
/* ipv6IfIcmpInParmProblems, # of input parameter problem errors */
- u_quad_t ifs6_in_paramprob;
+ uint64_t ifs6_in_paramprob;
/* ipv6IfIcmpInPktTooBigs, # of input packet too big errors */
- u_quad_t ifs6_in_pkttoobig;
+ uint64_t ifs6_in_pkttoobig;
/* ipv6IfIcmpInEchos, # of input echo requests */
- u_quad_t ifs6_in_echo;
+ uint64_t ifs6_in_echo;
/* ipv6IfIcmpInEchoReplies, # of input echo replies */
- u_quad_t ifs6_in_echoreply;
+ uint64_t ifs6_in_echoreply;
/* ipv6IfIcmpInRouterSolicits, # of input router solicitations */
- u_quad_t ifs6_in_routersolicit;
+ uint64_t ifs6_in_routersolicit;
/* ipv6IfIcmpInRouterAdvertisements, # of input router advertisements */
- u_quad_t ifs6_in_routeradvert;
+ uint64_t ifs6_in_routeradvert;
/* ipv6IfIcmpInNeighborSolicits, # of input neighbor solicitations */
- u_quad_t ifs6_in_neighborsolicit;
+ uint64_t ifs6_in_neighborsolicit;
/* ipv6IfIcmpInNeighborAdvertisements, # of input neighbor advertisements */
- u_quad_t ifs6_in_neighboradvert;
+ uint64_t ifs6_in_neighboradvert;
/* ipv6IfIcmpInRedirects, # of input redirects */
- u_quad_t ifs6_in_redirect;
+ uint64_t ifs6_in_redirect;
/* ipv6IfIcmpInGroupMembQueries, # of input MLD queries */
- u_quad_t ifs6_in_mldquery;
+ uint64_t ifs6_in_mldquery;
/* ipv6IfIcmpInGroupMembResponses, # of input MLD reports */
- u_quad_t ifs6_in_mldreport;
+ uint64_t ifs6_in_mldreport;
/* ipv6IfIcmpInGroupMembReductions, # of input MLD done */
- u_quad_t ifs6_in_mlddone;
+ uint64_t ifs6_in_mlddone;
/*
* Output statistics. We should solve unresolved routing problem...
*/
/* ipv6IfIcmpOutMsgs, total # of output messages */
- u_quad_t ifs6_out_msg;
+ uint64_t ifs6_out_msg;
/* ipv6IfIcmpOutErrors, # of output error messages */
- u_quad_t ifs6_out_error;
+ uint64_t ifs6_out_error;
/* ipv6IfIcmpOutDestUnreachs, # of output dest unreach errors */
- u_quad_t ifs6_out_dstunreach;
+ uint64_t ifs6_out_dstunreach;
/* ipv6IfIcmpOutAdminProhibs, # of output administratively prohibited errs */
- u_quad_t ifs6_out_adminprohib;
+ uint64_t ifs6_out_adminprohib;
/* ipv6IfIcmpOutTimeExcds, # of output time exceeded errors */
- u_quad_t ifs6_out_timeexceed;
+ uint64_t ifs6_out_timeexceed;
/* ipv6IfIcmpOutParmProblems, # of output parameter problem errors */
- u_quad_t ifs6_out_paramprob;
+ uint64_t ifs6_out_paramprob;
/* ipv6IfIcmpOutPktTooBigs, # of output packet too big errors */
- u_quad_t ifs6_out_pkttoobig;
+ uint64_t ifs6_out_pkttoobig;
/* ipv6IfIcmpOutEchos, # of output echo requests */
- u_quad_t ifs6_out_echo;
+ uint64_t ifs6_out_echo;
/* ipv6IfIcmpOutEchoReplies, # of output echo replies */
- u_quad_t ifs6_out_echoreply;
+ uint64_t ifs6_out_echoreply;
/* ipv6IfIcmpOutRouterSolicits, # of output router solicitations */
- u_quad_t ifs6_out_routersolicit;
+ uint64_t ifs6_out_routersolicit;
/* ipv6IfIcmpOutRouterAdvertisements, # of output router advertisements */
- u_quad_t ifs6_out_routeradvert;
+ uint64_t ifs6_out_routeradvert;
/* ipv6IfIcmpOutNeighborSolicits, # of output neighbor solicitations */
- u_quad_t ifs6_out_neighborsolicit;
+ uint64_t ifs6_out_neighborsolicit;
/* ipv6IfIcmpOutNeighborAdvertisements, # of output neighbor advertisements */
- u_quad_t ifs6_out_neighboradvert;
+ uint64_t ifs6_out_neighboradvert;
/* ipv6IfIcmpOutRedirects, # of output redirects */
- u_quad_t ifs6_out_redirect;
+ uint64_t ifs6_out_redirect;
/* ipv6IfIcmpOutGroupMembQueries, # of output MLD queries */
- u_quad_t ifs6_out_mldquery;
+ uint64_t ifs6_out_mldquery;
/* ipv6IfIcmpOutGroupMembResponses, # of output MLD reports */
- u_quad_t ifs6_out_mldreport;
+ uint64_t ifs6_out_mldreport;
/* ipv6IfIcmpOutGroupMembReductions, # of output MLD done */
- u_quad_t ifs6_out_mlddone;
+ uint64_t ifs6_out_mlddone;
};
struct in6_ifreq {
@@ -288,8 +306,19 @@
struct sockaddr_in6 ifra_prefixmask;
int ifra_flags;
struct in6_addrlifetime ifra_lifetime;
+ int ifra_vhid;
};
+/* pre-10.x compat */
+struct oin6_aliasreq {
+ char ifra_name[IFNAMSIZ];
+ struct sockaddr_in6 ifra_addr;
+ struct sockaddr_in6 ifra_dstaddr;
+ struct sockaddr_in6 ifra_prefixmask;
+ int ifra_flags;
+ struct in6_addrlifetime ifra_lifetime;
+};
+
/* prefix type macro */
#define IN6_PREFIX_ND 1
#define IN6_PREFIX_RR 2
@@ -410,7 +439,8 @@
#define SIOCGIFNETMASK_IN6 _IOWR('i', 37, struct in6_ifreq)
#define SIOCDIFADDR_IN6 _IOW('i', 25, struct in6_ifreq)
-#define SIOCAIFADDR_IN6 _IOW('i', 26, struct in6_aliasreq)
+#define OSIOCAIFADDR_IN6 _IOW('i', 26, struct oin6_aliasreq)
+#define SIOCAIFADDR_IN6 _IOW('i', 27, struct in6_aliasreq)
#define SIOCSIFPHYADDR_IN6 _IOW('i', 70, struct in6_aliasreq)
#define SIOCGIFPSRCADDR_IN6 _IOWR('i', 71, struct in6_ifreq)
@@ -418,12 +448,7 @@
#define SIOCGIFAFLAG_IN6 _IOWR('i', 73, struct in6_ifreq)
-#define SIOCGDRLST_IN6 _IOWR('i', 74, struct in6_drlist)
#ifdef _KERNEL
-/* XXX: SIOCGPRLST_IN6 is exposed in KAME but in6_oprlist is not. */
-#define SIOCGPRLST_IN6 _IOWR('i', 75, struct in6_oprlist)
-#endif
-#ifdef _KERNEL
#define OSIOCGIFINFO_IN6 _IOWR('i', 76, struct in6_ondireq)
#endif
#define SIOCGIFINFO_IN6 _IOWR('i', 108, struct in6_ndireq)
@@ -470,13 +495,11 @@
#define IN6_IFF_DETACHED 0x08 /* may be detached from the link */
#define IN6_IFF_DEPRECATED 0x10 /* deprecated address */
#define IN6_IFF_NODAD 0x20 /* don't perform DAD on this address
- * (used only at first SIOC* call)
+ * (obsolete)
*/
#define IN6_IFF_AUTOCONF 0x40 /* autoconfigurable address. */
#define IN6_IFF_TEMPORARY 0x80 /* temporary (anonymous) address. */
-#define IN6_IFF_NOPFX 0x8000 /* skip kernel prefix management.
- * XXX: this should be temporary.
- */
+#define IN6_IFF_PREFER_SOURCE 0x0100 /* preferred address for SAS */
/* do not input/output */
#define IN6_IFF_NOTREADY (IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED)
@@ -488,8 +511,28 @@
#ifdef _KERNEL
VNET_DECLARE(struct in6_ifaddrhead, in6_ifaddrhead);
+VNET_DECLARE(struct in6_ifaddrlisthead *, in6_ifaddrhashtbl);
+VNET_DECLARE(u_long, in6_ifaddrhmask);
#define V_in6_ifaddrhead VNET(in6_ifaddrhead)
+#define V_in6_ifaddrhashtbl VNET(in6_ifaddrhashtbl)
+#define V_in6_ifaddrhmask VNET(in6_ifaddrhmask)
+#define IN6ADDR_NHASH_LOG2 8
+#define IN6ADDR_NHASH (1 << IN6ADDR_NHASH_LOG2)
+#define IN6ADDR_HASHVAL(x) (in6_addrhash(x))
+#define IN6ADDR_HASH(x) \
+ (&V_in6_ifaddrhashtbl[IN6ADDR_HASHVAL(x) & V_in6_ifaddrhmask])
+
+static __inline uint32_t
+in6_addrhash(struct in6_addr *in6)
+{
+ uint32_t x;
+
+ x = in6->s6_addr32[0] ^ in6->s6_addr32[1] ^ in6->s6_addr32[2] ^
+ in6->s6_addr32[3];
+ return (fnv_32_buf(&x, sizeof(x), FNV1_32_INIT));
+}
+
extern struct rwlock in6_ifaddr_lock;
#define IN6_IFADDR_LOCK_ASSERT( ) rw_assert(&in6_ifaddr_lock, RA_LOCKED)
#define IN6_IFADDR_RLOCK() rw_rlock(&in6_ifaddr_lock)
@@ -499,15 +542,14 @@
#define IN6_IFADDR_WLOCK_ASSERT() rw_assert(&in6_ifaddr_lock, RA_WLOCKED)
#define IN6_IFADDR_WUNLOCK() rw_wunlock(&in6_ifaddr_lock)
-VNET_DECLARE(struct icmp6stat, icmp6stat);
-#define V_icmp6stat VNET(icmp6stat)
#define in6_ifstat_inc(ifp, tag) \
do { \
if (ifp) \
- ((struct in6_ifextra *)((ifp)->if_afdata[AF_INET6]))->in6_ifstat->tag++; \
+ counter_u64_add(((struct in6_ifextra *) \
+ ((ifp)->if_afdata[AF_INET6]))->in6_ifstat[ \
+ offsetof(struct in6_ifstat, tag) / sizeof(uint64_t)], 1);\
} while (/*CONSTCOND*/ 0)
-extern struct in6_addr zeroin6_addr;
extern u_char inet6ctlerrmap[];
VNET_DECLARE(unsigned long, in6_maxmtu);
#define V_in6_maxmtu VNET(in6_maxmtu)
@@ -766,6 +808,7 @@
int in6_if2idlen(struct ifnet *);
struct in6_ifaddr *in6ifa_ifpforlinklocal(struct ifnet *, int);
struct in6_ifaddr *in6ifa_ifpwithaddr(struct ifnet *, struct in6_addr *);
+struct in6_ifaddr *in6ifa_llaonifp(struct ifnet *);
char *ip6_sprintf(char *, const struct in6_addr *);
int in6_addr2zoneid(struct ifnet *, struct in6_addr *, u_int32_t *);
int in6_matchlen(struct in6_addr *, struct in6_addr *);
Modified: trunk/sys/netinet6/ip6.h
===================================================================
--- trunk/sys/netinet6/ip6.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/ip6.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -1,5 +1,5 @@
/* $MidnightBSD$ */
-/* $FreeBSD: stable/9/sys/netinet6/ip6.h 62587 2000-07-04 16:35:15Z itojun $ */
+/* $FreeBSD: stable/10/sys/netinet6/ip6.h 62587 2000-07-04 16:35:15Z itojun $ */
/* $KAME: ip6.h,v 1.7 2000/03/25 07:23:36 sumikawa Exp $ */
#error "netinet6/ip6.h is obsolete. use netinet/ip6.h"
Modified: trunk/sys/netinet6/ip6_ecn.h
===================================================================
--- trunk/sys/netinet6/ip6_ecn.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/ip6_ecn.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* $KAME: ip_ecn.h,v 1.5 2000/03/27 04:58:38 sumikawa Exp $
- * $FreeBSD: stable/9/sys/netinet6/ip6_ecn.h 174510 2007-12-10 16:03:40Z obrien $
+ * $FreeBSD: stable/10/sys/netinet6/ip6_ecn.h 174510 2007-12-10 16:03:40Z obrien $
*/
/*
Modified: trunk/sys/netinet6/ip6_forward.c
===================================================================
--- trunk/sys/netinet6/ip6_forward.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/ip6_forward.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/ip6_forward.c 243586 2012-11-27 01:59:51Z ae $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/ip6_forward.c 284576 2015-06-18 20:57:21Z kp $");
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -121,7 +121,7 @@
* before forwarding packet actually.
*/
if (ipsec6_in_reject(m, NULL)) {
- V_ipsec6stat.in_polvio++;
+ IPSEC6STAT_INC(ips_in_polvio);
m_freem(m);
return;
}
@@ -138,8 +138,8 @@
IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
IP6STAT_INC(ip6s_cantforward);
/* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */
- if (V_ip6_log_time + V_ip6_log_interval < time_second) {
- V_ip6_log_time = time_second;
+ if (V_ip6_log_time + V_ip6_log_interval < time_uptime) {
+ V_ip6_log_time = time_uptime;
log(LOG_DEBUG,
"cannot forward "
"from %s to %s nxt %d received on %s\n",
@@ -183,7 +183,7 @@
sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
IP_FORWARDING, &error);
if (sp == NULL) {
- V_ipsec6stat.out_inval++;
+ IPSEC6STAT_INC(ips_out_inval);
IP6STAT_INC(ip6s_cantforward);
if (mcopy) {
#if 0
@@ -204,7 +204,7 @@
/*
* This packet is just discarded.
*/
- V_ipsec6stat.out_polvio++;
+ IPSEC6STAT_INC(ips_out_polvio);
IP6STAT_INC(ip6s_cantforward);
KEY_FREESP(&sp);
if (mcopy) {
@@ -252,7 +252,6 @@
{
struct ipsecrequest *isr = NULL;
- struct ipsec_output_state state;
/*
* when the kernel forwards a packet, it is not proper to apply
@@ -285,18 +284,27 @@
*
* IPv6 [ESP|AH] IPv6 [extension headers] payload
*/
- bzero(&state, sizeof(state));
- state.m = m;
- state.ro = NULL; /* update at ipsec6_output_tunnel() */
- state.dst = NULL; /* update at ipsec6_output_tunnel() */
- error = ipsec6_output_tunnel(&state, sp, 0);
+ /*
+ * If we need to encapsulate the packet, do it here
+ * ipsec6_proces_packet will send the packet using ip6_output
+ */
+ error = ipsec6_process_packet(m, sp->req);
- m = state.m;
KEY_FREESP(&sp);
+ if (error == EJUSTRETURN) {
+ /*
+ * We had a SP with a level of 'use' and no SA. We
+ * will just continue to process the packet without
+ * IPsec processing.
+ */
+ error = 0;
+ goto skip_ipsec;
+ }
+
if (error) {
- /* mbuf is already reclaimed in ipsec6_output_tunnel. */
+ /* mbuf is already reclaimed in ipsec6_process_packet. */
switch (error) {
case EHOSTUNREACH:
case ENETUNREACH:
@@ -319,7 +327,6 @@
m_freem(mcopy);
#endif
}
- m_freem(m);
return;
} else {
/*
@@ -331,25 +338,7 @@
m = NULL;
goto freecopy;
}
-
- if ((m != NULL) && (ip6 != mtod(m, struct ip6_hdr *)) ){
- /*
- * now tunnel mode headers are added. we are originating
- * packet instead of forwarding the packet.
- */
- ip6_output(m, NULL, NULL, IPV6_FORWARDING/*XXX*/, NULL, NULL,
- NULL);
- goto freecopy;
- }
-
- /* adjust pointer */
- dst = (struct sockaddr_in6 *)state.dst;
- rt = state.ro ? state.ro->ro_rt : NULL;
- if (dst != NULL && rt != NULL)
- ipsecrt = 1;
}
- if (ipsecrt)
- goto skip_routing;
skip_ipsec:
#endif
again:
@@ -372,9 +361,6 @@
goto bad;
}
rt = rin6.ro_rt;
-#ifdef IPSEC
-skip_routing:
-#endif
/*
* Source scope check: if a packet can't be delivered to its
@@ -406,8 +392,8 @@
IP6STAT_INC(ip6s_badscope);
in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard);
- if (V_ip6_log_time + V_ip6_log_interval < time_second) {
- V_ip6_log_time = time_second;
+ if (V_ip6_log_time + V_ip6_log_interval < time_uptime) {
+ V_ip6_log_time = time_uptime;
log(LOG_DEBUG,
"cannot forward "
"src %s, dst %s, nxt %d, rcvif %s, outif %s\n",
@@ -438,46 +424,6 @@
goto bad;
}
- if (m->m_pkthdr.len > IN6_LINKMTU(rt->rt_ifp)) {
- in6_ifstat_inc(rt->rt_ifp, ifs6_in_toobig);
- if (mcopy) {
- u_long mtu;
-#ifdef IPSEC
- struct secpolicy *sp;
- int ipsecerror;
- size_t ipsechdrsiz;
-#endif /* IPSEC */
-
- mtu = IN6_LINKMTU(rt->rt_ifp);
-#ifdef IPSEC
- /*
- * When we do IPsec tunnel ingress, we need to play
- * with the link value (decrement IPsec header size
- * from mtu value). The code is much simpler than v4
- * case, as we have the outgoing interface for
- * encapsulated packet as "rt->rt_ifp".
- */
- sp = ipsec_getpolicybyaddr(mcopy, IPSEC_DIR_OUTBOUND,
- IP_FORWARDING, &ipsecerror);
- if (sp) {
- ipsechdrsiz = ipsec_hdrsiz(mcopy,
- IPSEC_DIR_OUTBOUND, NULL);
- if (ipsechdrsiz < mtu)
- mtu -= ipsechdrsiz;
- }
-
- /*
- * if mtu becomes less than minimum MTU,
- * tell minimum MTU (and I'll need to fragment it).
- */
- if (mtu < IPV6_MMTU)
- mtu = IPV6_MMTU;
-#endif /* IPSEC */
- icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, mtu);
- }
- goto bad;
- }
-
if (rt->rt_flags & RTF_GATEWAY)
dst = (struct sockaddr_in6 *)rt->rt_gateway;
@@ -564,10 +510,8 @@
odst = ip6->ip6_dst;
/* Run through list of hooks for output packets. */
error = pfil_run_hooks(&V_inet6_pfil_hook, &m, rt->rt_ifp, PFIL_OUT, NULL);
- if (error != 0)
- goto senderr;
- if (m == NULL)
- goto freecopy;
+ if (error != 0 || m == NULL)
+ goto freecopy; /* consumed by filter */
ip6 = mtod(m, struct ip6_hdr *);
/* See if destination IP address was changed by packet filter. */
@@ -574,22 +518,9 @@
if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
m->m_flags |= M_SKIP_FIREWALL;
/* If destination is now ourself drop to ip6_input(). */
- if (in6_localip(&ip6->ip6_dst)) {
+ if (in6_localip(&ip6->ip6_dst))
m->m_flags |= M_FASTFWD_OURS;
- if (m->m_pkthdr.rcvif == NULL)
- m->m_pkthdr.rcvif = V_loif;
- if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
- m->m_pkthdr.csum_flags |=
- CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
- m->m_pkthdr.csum_data = 0xffff;
- }
-#ifdef SCTP
- if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
- m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
-#endif
- error = netisr_queue(NETISR_IPV6, m);
- goto out;
- } else
+ else
goto again; /* Redo the routing table lookup. */
}
@@ -621,6 +552,47 @@
}
pass:
+ /* See if the size was changed by the packet filter. */
+ if (m->m_pkthdr.len > IN6_LINKMTU(rt->rt_ifp)) {
+ in6_ifstat_inc(rt->rt_ifp, ifs6_in_toobig);
+ if (mcopy) {
+ u_long mtu;
+#ifdef IPSEC
+ struct secpolicy *sp;
+ int ipsecerror;
+ size_t ipsechdrsiz;
+#endif /* IPSEC */
+
+ mtu = IN6_LINKMTU(rt->rt_ifp);
+#ifdef IPSEC
+ /*
+ * When we do IPsec tunnel ingress, we need to play
+ * with the link value (decrement IPsec header size
+ * from mtu value). The code is much simpler than v4
+ * case, as we have the outgoing interface for
+ * encapsulated packet as "rt->rt_ifp".
+ */
+ sp = ipsec_getpolicybyaddr(mcopy, IPSEC_DIR_OUTBOUND,
+ IP_FORWARDING, &ipsecerror);
+ if (sp) {
+ ipsechdrsiz = ipsec_hdrsiz(mcopy,
+ IPSEC_DIR_OUTBOUND, NULL);
+ if (ipsechdrsiz < mtu)
+ mtu -= ipsechdrsiz;
+ }
+
+ /*
+ * if mtu becomes less than minimum MTU,
+ * tell minimum MTU (and I'll need to fragment it).
+ */
+ if (mtu < IPV6_MMTU)
+ mtu = IPV6_MMTU;
+#endif /* IPSEC */
+ icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, mtu);
+ }
+ goto bad;
+ }
+
error = nd6_output(rt->rt_ifp, origifp, m, dst, rt);
if (error) {
in6_ifstat_inc(rt->rt_ifp, ifs6_out_discard);
@@ -636,7 +608,6 @@
}
}
-senderr:
if (mcopy == NULL)
goto out;
switch (error) {
Added: trunk/sys/netinet6/ip6_gre.c
===================================================================
--- trunk/sys/netinet6/ip6_gre.c (rev 0)
+++ trunk/sys/netinet6/ip6_gre.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -0,0 +1,148 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2014 Andrey V. Elsukov <ae at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/ip6_gre.c 284072 2015-06-06 13:26:13Z ae $");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/systm.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/mbuf.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+#include <sys/syslog.h>
+#include <sys/sysctl.h>
+#include <sys/protosw.h>
+#include <sys/malloc.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#ifdef INET
+#include <net/ethernet.h>
+#include <netinet/ip.h>
+#endif
+#include <netinet/ip_encap.h>
+#include <netinet/ip6.h>
+#include <netinet6/ip6protosw.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/in6_var.h>
+#include <net/if_gre.h>
+
+extern struct domain inet6domain;
+struct ip6protosw in6_gre_protosw = {
+ .pr_type = SOCK_RAW,
+ .pr_domain = &inet6domain,
+ .pr_protocol = IPPROTO_GRE,
+ .pr_flags = PR_ATOMIC|PR_ADDR,
+ .pr_input = gre_input,
+ .pr_output = rip6_output,
+ .pr_ctloutput = rip6_ctloutput,
+ .pr_usrreqs = &rip6_usrreqs
+};
+
+VNET_DEFINE(int, ip6_gre_hlim) = IPV6_DEFHLIM;
+#define V_ip6_gre_hlim VNET(ip6_gre_hlim)
+
+SYSCTL_DECL(_net_inet6_ip6);
+SYSCTL_INT(_net_inet6_ip6, OID_AUTO, grehlim, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(ip6_gre_hlim), 0, "Default hop limit for encapsulated packets");
+
+static int
+in6_gre_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
+{
+ GRE_RLOCK_TRACKER;
+ struct gre_softc *sc;
+ struct ip6_hdr *ip6;
+
+ sc = (struct gre_softc *)arg;
+ if ((GRE2IFP(sc)->if_flags & IFF_UP) == 0)
+ return (0);
+
+ M_ASSERTPKTHDR(m);
+ /*
+ * We expect that payload contains at least IPv4
+ * or IPv6 packet.
+ */
+ if (m->m_pkthdr.len < sizeof(struct greip6) +
+#ifdef INET
+ sizeof(struct ip))
+#else
+ sizeof(struct ip6_hdr))
+#endif
+ return (0);
+
+ GRE_RLOCK(sc);
+ if (sc->gre_family == 0)
+ goto bad;
+
+ KASSERT(sc->gre_family == AF_INET6,
+ ("wrong gre_family: %d", sc->gre_family));
+
+ ip6 = mtod(m, struct ip6_hdr *);
+ if (!IN6_ARE_ADDR_EQUAL(&sc->gre_oip6.ip6_src, &ip6->ip6_dst) ||
+ !IN6_ARE_ADDR_EQUAL(&sc->gre_oip6.ip6_dst, &ip6->ip6_src))
+ goto bad;
+
+ GRE_RUNLOCK(sc);
+ return (128 * 2);
+bad:
+ GRE_RUNLOCK(sc);
+ return (0);
+}
+
+int
+in6_gre_output(struct mbuf *m, int af, int hlen)
+{
+ struct greip6 *gi6;
+
+ gi6 = mtod(m, struct greip6 *);
+ gi6->gi6_ip6.ip6_hlim = V_ip6_gre_hlim;
+ return (ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, NULL, NULL));
+}
+
+int
+in6_gre_attach(struct gre_softc *sc)
+{
+
+ KASSERT(sc->gre_ecookie == NULL, ("gre_ecookie isn't NULL"));
+ sc->gre_ecookie = encap_attach_func(AF_INET6, IPPROTO_GRE,
+ in6_gre_encapcheck, (void *)&in6_gre_protosw, sc);
+ if (sc->gre_ecookie == NULL)
+ return (EEXIST);
+ return (0);
+}
Property changes on: trunk/sys/netinet6/ip6_gre.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/netinet6/ip6_id.c
===================================================================
--- trunk/sys/netinet6/ip6_id.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/ip6_id.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -64,11 +64,11 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
- * $OpenBSD: ip_id.c,v 1.6 2002/03/15 18:19:52 millert Exp $
+ * $OpenBSD: ip6_id.c,v 1.2 2003/12/10 07:21:01 itojun Exp $
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/ip6_id.c 174510 2007-12-10 16:03:40Z obrien $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/ip6_id.c 327550 2018-01-04 15:57:49Z pfg $");
/*
* seed = random (bits - 1) bit
@@ -222,7 +222,7 @@
p->ru_g = pmod(p->ru_gen, j, p->ru_n);
p->ru_counter = 0;
- p->ru_reseed = time_second + p->ru_out;
+ p->ru_reseed = time_uptime + p->ru_out;
p->ru_msb = p->ru_msb ? 0 : (1U << (p->ru_bits - 1));
}
@@ -230,15 +230,12 @@
randomid(struct randomtab *p)
{
int i, n;
- u_int32_t tmp;
- if (p->ru_counter >= p->ru_max || time_second > p->ru_reseed)
+ if (p->ru_counter >= p->ru_max || time_uptime > p->ru_reseed)
initid(p);
- tmp = arc4random();
-
/* Skip a random number of ids */
- n = tmp & 0x3; tmp = tmp >> 2;
+ n = arc4random() & 0x3;
if (p->ru_counter + n >= p->ru_max)
initid(p);
@@ -249,7 +246,7 @@
p->ru_counter += i;
- return (p->ru_seed ^ pmod(p->ru_g, p->ru_seed2 ^ p->ru_x, p->ru_n)) |
+ return (p->ru_seed ^ pmod(p->ru_g, p->ru_seed2 + p->ru_x, p->ru_n)) |
p->ru_msb;
}
Modified: trunk/sys/netinet6/ip6_input.c
===================================================================
--- trunk/sys/netinet6/ip6_input.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/ip6_input.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -62,12 +62,13 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/ip6_input.c 243586 2012-11-27 01:59:51Z ae $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/ip6_input.c 329158 2018-02-12 13:52:58Z ae $");
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipfw.h"
#include "opt_ipsec.h"
+#include "opt_kdtrace.h"
#include "opt_route.h"
#include <sys/param.h>
@@ -77,6 +78,7 @@
#include <sys/proc.h>
#include <sys/domain.h>
#include <sys/protosw.h>
+#include <sys/sdt.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/errno.h>
@@ -93,6 +95,7 @@
#include <net/vnet.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <netinet/ip_var.h>
#include <netinet/in_systm.h>
#include <net/if_llatbl.h>
@@ -117,16 +120,12 @@
#include <netinet6/ip6protosw.h>
-#ifdef FLOWTABLE
-#include <net/flowtable.h>
-VNET_DECLARE(int, ip6_output_flowtable_size);
-#define V_ip6_output_flowtable_size VNET(ip6_output_flowtable_size)
-#endif
-
extern struct domain inet6domain;
u_char ip6_protox[IPPROTO_MAX];
VNET_DEFINE(struct in6_ifaddrhead, in6_ifaddrhead);
+VNET_DEFINE(struct in6_ifaddrlisthead *, in6_ifaddrhashtbl);
+VNET_DEFINE(u_long, in6_ifaddrhmask);
static struct netisr_handler ip6_nh = {
.nh_name = "ip6",
@@ -140,7 +139,11 @@
VNET_DEFINE(struct pfil_head, inet6_pfil_hook);
-VNET_DEFINE(struct ip6stat, ip6stat);
+VNET_PCPUSTAT_DEFINE(struct ip6stat, ip6stat);
+VNET_PCPUSTAT_SYSINIT(ip6stat);
+#ifdef VIMAGE
+VNET_PCPUSTAT_SYSUNINIT(ip6stat);
+#endif /* VIMAGE */
struct rwlock in6_ifaddr_lock;
RW_SYSINIT(in6_ifaddr_lock, &in6_ifaddr_lock, "in6_ifaddr_lock");
@@ -167,8 +170,12 @@
TUNABLE_INT_FETCH("net.inet6.ip6.auto_linklocal",
&V_ip6_auto_linklocal);
+ TUNABLE_INT_FETCH("net.inet6.ip6.accept_rtadv", &V_ip6_accept_rtadv);
+ TUNABLE_INT_FETCH("net.inet6.ip6.no_radr", &V_ip6_no_radr);
TAILQ_INIT(&V_in6_ifaddrhead);
+ V_in6_ifaddrhashtbl = hashinit(IN6ADDR_NHASH, M_IFADDR,
+ &V_in6_ifaddrhmask);
/* Initialize packet filter hooks. */
V_inet6_pfil_hook.ph_type = PFIL_TYPE_AF;
@@ -182,24 +189,6 @@
nd6_init();
frag6_init();
-#ifdef FLOWTABLE
- if (TUNABLE_INT_FETCH("net.inet6.ip6.output_flowtable_size",
- &V_ip6_output_flowtable_size)) {
- if (V_ip6_output_flowtable_size < 256)
- V_ip6_output_flowtable_size = 256;
- if (!powerof2(V_ip6_output_flowtable_size)) {
- printf("flowtable must be power of 2 size\n");
- V_ip6_output_flowtable_size = 2048;
- }
- } else {
- /*
- * round up to the next power of 2
- */
- V_ip6_output_flowtable_size = 1 << fls((1024 + maxusers * 64)-1);
- }
- V_ip6_ft = flowtable_alloc("ipv6", V_ip6_output_flowtable_size, FL_IPV6|FL_PCPU);
-#endif
-
V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR;
/* Skip global initialization stuff for non-default instances. */
@@ -295,7 +284,12 @@
void
ip6_destroy()
{
+ int i;
+ if ((i = pfil_head_unregister(&V_inet6_pfil_hook)) != 0)
+ printf("%s: WARNING: unable to unregister pfil hook, "
+ "error %d\n", __func__, i);
+ hashdestroy(V_in6_ifaddrhashtbl, M_IFADDR, V_in6_ifaddrhmask);
nd6_destroy();
callout_drain(&V_in6_tmpaddrtimer_ch);
}
@@ -490,21 +484,16 @@
if (m && m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) {
struct mbuf *n;
- MGETHDR(n, M_DONTWAIT, MT_HEADER);
- if (n)
- M_MOVE_PKTHDR(n, m);
- if (n && n->m_pkthdr.len > MHLEN) {
- MCLGET(n, M_DONTWAIT);
- if ((n->m_flags & M_EXT) == 0) {
- m_freem(n);
- n = NULL;
- }
- }
+ if (m->m_pkthdr.len > MHLEN)
+ n = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+ else
+ n = m_gethdr(M_NOWAIT, MT_DATA);
if (n == NULL) {
m_freem(m);
return; /* ENOBUFS */
}
+ m_move_pkthdr(n, m);
m_copydata(m, 0, n->m_pkthdr.len, mtod(n, caddr_t));
n->m_len = n->m_pkthdr.len;
m_freem(m);
@@ -533,6 +522,8 @@
IP6STAT_INC(ip6s_nxthist[ip6->ip6_nxt]);
+ IP_PROBE(receive, NULL, NULL, ip6, m->m_pkthdr.rcvif, NULL, ip6);
+
/*
* Check against address spoofing/corruption.
*/
@@ -557,7 +548,18 @@
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
goto bad;
}
-
+ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
+ IPV6_ADDR_MC_SCOPE(&ip6->ip6_dst) == 0) {
+ /*
+ * RFC4291 2.7:
+ * Nodes must not originate a packet to a multicast address
+ * whose scop field contains the reserved value 0; if such
+ * a packet is received, it must be silently dropped.
+ */
+ IP6STAT_INC(ip6s_badscope);
+ in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
+ goto bad;
+ }
#ifdef ALTQ
if (altq_input != NULL && (*altq_input)(m, AF_INET6) == 0) {
/* packet is dropped by traffic conditioner */
@@ -1073,7 +1075,6 @@
struct mbuf *m = *mp;
int off = *offp, hbhlen;
struct ip6_hbh *hbh;
- u_int8_t *opt;
/* validation of the length of the header */
#ifndef PULLDOWN_TEST
@@ -1100,8 +1101,6 @@
#endif
off += hbhlen;
hbhlen -= sizeof(struct ip6_hbh);
- opt = (u_int8_t *)hbh + sizeof(struct ip6_hbh);
-
if (ip6_process_hopopts(m, (u_int8_t *)hbh + sizeof(struct ip6_hbh),
hbhlen, rtalertp, plenp) < 0)
return (-1);
@@ -1594,24 +1593,28 @@
#undef IS2292
void
-ip6_notify_pmtu(struct inpcb *in6p, struct sockaddr_in6 *dst, u_int32_t *mtu)
+ip6_notify_pmtu(struct inpcb *inp, struct sockaddr_in6 *dst, u_int32_t mtu)
{
struct socket *so;
struct mbuf *m_mtu;
struct ip6_mtuinfo mtuctl;
- so = in6p->inp_socket;
-
- if (mtu == NULL)
+ KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
+ /*
+ * Notify the error by sending IPV6_PATHMTU ancillary data if
+ * application wanted to know the MTU value.
+ * NOTE: we notify disconnected sockets, because some udp
+ * applications keep sending sockets disconnected.
+ * NOTE: our implementation doesn't notify connected sockets that has
+ * foreign address that is different than given destination addresses
+ * (this is permitted by RFC 3542).
+ */
+ if ((inp->inp_flags & IN6P_MTU) == 0 || (
+ !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
+ !IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &dst->sin6_addr)))
return;
-#ifdef DIAGNOSTIC
- if (so == NULL) /* I believe this is impossible */
- panic("ip6_notify_pmtu: socket is NULL");
-#endif
-
- bzero(&mtuctl, sizeof(mtuctl)); /* zero-clear for safety */
- mtuctl.ip6m_mtu = *mtu;
+ mtuctl.ip6m_mtu = mtu;
mtuctl.ip6m_addr = *dst;
if (sa6_recoverscope(&mtuctl.ip6m_addr))
return;
@@ -1620,6 +1623,7 @@
IPV6_PATHMTU, IPPROTO_IPV6)) == NULL)
return;
+ so = inp->inp_socket;
if (sbappendaddr(&so->so_rcv, (struct sockaddr *)dst, NULL, m_mtu)
== 0) {
m_freem(m_mtu);
@@ -1626,8 +1630,6 @@
/* XXX: should count statistics */
} else
sorwakeup(so);
-
- return;
}
#ifdef PULLDOWN_TEST
@@ -1660,23 +1662,13 @@
else
elen = (ip6e.ip6e_len + 1) << 3;
- MGET(n, M_DONTWAIT, MT_DATA);
- if (n && elen >= MLEN) {
- MCLGET(n, M_DONTWAIT);
- if ((n->m_flags & M_EXT) == 0) {
- m_free(n);
- n = NULL;
- }
- }
- if (!n)
+ if (elen > MLEN)
+ n = m_getcl(M_NOWAIT, MT_DATA, 0);
+ else
+ n = m_get(M_NOWAIT, MT_DATA);
+ if (n == NULL)
return NULL;
- n->m_len = 0;
- if (elen >= M_TRAILINGSPACE(n)) {
- m_free(n);
- return NULL;
- }
-
m_copydata(m, off, elen, mtod(n, caddr_t));
n->m_len = elen;
return n;
@@ -1686,49 +1678,39 @@
/*
* Get pointer to the previous header followed by the header
* currently processed.
- * XXX: This function supposes that
- * M includes all headers,
- * the next header field and the header length field of each header
- * are valid, and
- * the sum of each header length equals to OFF.
- * Because of these assumptions, this function must be called very
- * carefully. Moreover, it will not be used in the near future when
- * we develop `neater' mechanism to process extension headers.
*/
-char *
+int
ip6_get_prevhdr(struct mbuf *m, int off)
{
- struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
+ struct ip6_ext ip6e;
+ struct ip6_hdr *ip6;
+ int len, nlen, nxt;
if (off == sizeof(struct ip6_hdr))
- return (&ip6->ip6_nxt);
- else {
- int len, nxt;
- struct ip6_ext *ip6e = NULL;
+ return (offsetof(struct ip6_hdr, ip6_nxt));
+ if (off < sizeof(struct ip6_hdr))
+ panic("%s: off < sizeof(struct ip6_hdr)", __func__);
- nxt = ip6->ip6_nxt;
- len = sizeof(struct ip6_hdr);
- while (len < off) {
- ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + len);
-
- switch (nxt) {
- case IPPROTO_FRAGMENT:
- len += sizeof(struct ip6_frag);
- break;
- case IPPROTO_AH:
- len += (ip6e->ip6e_len + 2) << 2;
- break;
- default:
- len += (ip6e->ip6e_len + 1) << 3;
- break;
- }
- nxt = ip6e->ip6e_nxt;
+ ip6 = mtod(m, struct ip6_hdr *);
+ nxt = ip6->ip6_nxt;
+ len = sizeof(struct ip6_hdr);
+ nlen = 0;
+ while (len < off) {
+ m_copydata(m, len, sizeof(ip6e), (caddr_t)&ip6e);
+ switch (nxt) {
+ case IPPROTO_FRAGMENT:
+ nlen = sizeof(struct ip6_frag);
+ break;
+ case IPPROTO_AH:
+ nlen = (ip6e.ip6e_len + 2) << 2;
+ break;
+ default:
+ nlen = (ip6e.ip6e_len + 1) << 3;
}
- if (ip6e)
- return (&ip6e->ip6e_nxt);
- else
- return NULL;
+ len += nlen;
+ nxt = ip6e.ip6e_nxt;
}
+ return (len - nlen);
}
/*
Modified: trunk/sys/netinet6/ip6_ipsec.c
===================================================================
--- trunk/sys/netinet6/ip6_ipsec.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/ip6_ipsec.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -29,7 +29,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/ip6_ipsec.c 239831 2012-08-29 13:14:39Z bz $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/ip6_ipsec.c 283901 2015-06-02 03:14:42Z ae $");
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -48,7 +48,6 @@
#include <sys/syslog.h>
#include <net/if.h>
-#include <net/route.h>
#include <net/vnet.h>
#include <netinet/in.h>
@@ -104,7 +103,7 @@
int
ip6_ipsec_filtertunnel(struct mbuf *m)
{
-#if defined(IPSEC)
+#ifdef IPSEC
/*
* Bypass packet filtering for packets previously handled by IPsec.
@@ -129,9 +128,8 @@
struct m_tag *mtag;
struct tdb_ident *tdbi;
struct secpolicy *sp;
- int s, error;
+ int error;
mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
- s = splnet();
if (mtag != NULL) {
tdbi = (struct tdb_ident *)(mtag + 1);
sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
@@ -140,7 +138,6 @@
IP_FORWARDING, &error);
}
if (sp == NULL) { /* NB: can happen if error */
- splx(s);
/*XXX error stat???*/
DPRINTF(("%s: no SP for forwarding\n", __func__)); /*XXX*/
return 1;
@@ -151,7 +148,6 @@
*/
error = ipsec_in_reject(sp, m);
KEY_FREESP(&sp);
- splx(s);
if (error) {
IP6STAT_INC(ip6s_cantforward);
return 1;
@@ -174,7 +170,7 @@
struct m_tag *mtag;
struct tdb_ident *tdbi;
struct secpolicy *sp;
- int s, error;
+ int error;
/*
* enforce IPsec policy checking if we are seeing last header.
* note that we do not visit this with protocols with pcb layer
@@ -190,7 +186,6 @@
* packet is returned to the ip input queue for delivery.
*/
mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
- s = splnet();
if (mtag != NULL) {
tdbi = (struct tdb_ident *)(mtag + 1);
sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
@@ -210,7 +205,6 @@
DPRINTF(("%s: no SP, packet discarded\n", __func__));/*XXX*/
return 1;
}
- splx(s);
if (error)
return 1;
}
@@ -226,23 +220,22 @@
int
ip6_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error,
- struct ifnet **ifp, struct secpolicy **sp)
+ struct ifnet **ifp)
{
#ifdef IPSEC
+ struct secpolicy *sp = NULL;
struct tdb_ident *tdbi;
struct m_tag *mtag;
/* XXX int s; */
- if (sp == NULL)
- return 1;
mtag = m_tag_find(*m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
if (mtag != NULL) {
tdbi = (struct tdb_ident *)(mtag + 1);
- *sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND);
- if (*sp == NULL)
+ sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND);
+ if (sp == NULL)
*error = -EINVAL; /* force silent drop */
m_tag_delete(*m, mtag);
} else {
- *sp = ipsec4_checkpolicy(*m, IPSEC_DIR_OUTBOUND, *flags,
+ sp = ipsec4_checkpolicy(*m, IPSEC_DIR_OUTBOUND, *flags,
error, inp);
}
@@ -253,9 +246,9 @@
* sp == NULL, error == -EINVAL discard packet w/o error
* sp == NULL, error != 0 discard packet, report error
*/
- if (*sp != NULL) {
+ if (sp != NULL) {
/* Loop detection, check if ipsec processing already done */
- KASSERT((*sp)->req != NULL, ("ip_output: no ipsec request"));
+ KASSERT(sp->req != NULL, ("ip_output: no ipsec request"));
for (mtag = m_tag_first(*m); mtag != NULL;
mtag = m_tag_next(*m, mtag)) {
if (mtag->m_tag_cookie != MTAG_ABI_COMPAT)
@@ -269,22 +262,17 @@
* an SA; e.g. on first reference. If it occurs,
* then we let ipsec4_process_packet do its thing.
*/
- if ((*sp)->req->sav == NULL)
+ if (sp->req->sav == NULL)
break;
tdbi = (struct tdb_ident *)(mtag + 1);
- if (tdbi->spi == (*sp)->req->sav->spi &&
- tdbi->proto == (*sp)->req->sav->sah->saidx.proto &&
- bcmp(&tdbi->dst, &(*sp)->req->sav->sah->saidx.dst,
+ if (tdbi->spi == sp->req->sav->spi &&
+ tdbi->proto == sp->req->sav->sah->saidx.proto &&
+ bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst,
sizeof (union sockaddr_union)) == 0) {
/*
* No IPsec processing is needed, free
* reference to SP.
- *
- * NB: null pointer to avoid free at
- * done: below.
*/
- KEY_FREESP(sp), *sp = NULL;
- /* XXX splx(s); */
goto done;
}
}
@@ -292,17 +280,38 @@
/*
* Do delayed checksums now because we send before
* this is done in the normal processing path.
- * For IPv6 we do delayed checksums in ip6_output.c.
*/
#ifdef INET
if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
- ipseclog((LOG_DEBUG,
- "%s: we do not support IPv4 over IPv6", __func__));
in_delayed_cksum(*m);
(*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
}
#endif
+ if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
+ in6_delayed_cksum(*m, (*m)->m_pkthdr.len - sizeof(struct ip6_hdr),
+ sizeof(struct ip6_hdr));
+ (*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
+ }
+#ifdef SCTP
+ if ((*m)->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) {
+ sctp_delayed_cksum(*m, sizeof(struct ip6_hdr));
+ (*m)->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
+ }
+#endif
+ /* NB: callee frees mbuf */
+ *error = ipsec6_process_packet(*m, sp->req);
+
+ if (*error == EJUSTRETURN) {
+ /*
+ * We had a SP with a level of 'use' and no SA. We
+ * will just continue to process the packet without
+ * IPsec processing.
+ */
+ *error = 0;
+ goto done;
+ }
+
/*
* Preserve KAME behaviour: ENOENT can be returned
* when an SA acquire is in progress. Don't propagate
@@ -312,7 +321,7 @@
*/
if (*error == ENOENT)
*error = 0;
- goto do_ipsec;
+ goto reinjected;
} else { /* sp == NULL */
if (*error != 0) {
/*
@@ -329,10 +338,16 @@
}
}
done:
+ if (sp != NULL)
+ KEY_FREESP(&sp);
return 0;
-do_ipsec:
+reinjected:
+ if (sp != NULL)
+ KEY_FREESP(&sp);
return -1;
bad:
+ if (sp != NULL)
+ KEY_FREESP(&sp);
return 1;
#endif /* IPSEC */
return 0;
@@ -376,9 +391,7 @@
sp->req->sav->sah != NULL) {
ro = &sp->req->sav->sah->route_cache.sa_route;
if (ro->ro_rt && ro->ro_rt->rt_ifp) {
- mtu =
- ro->ro_rt->rt_rmx.rmx_mtu ?
- ro->ro_rt->rt_rmx.rmx_mtu :
+ mtu = ro->ro_rt->rt_mtu ? ro->ro_rt->rt_mtu :
ro->ro_rt->rt_ifp->if_mtu;
mtu -= ipsechdr;
}
Modified: trunk/sys/netinet6/ip6_ipsec.h
===================================================================
--- trunk/sys/netinet6/ip6_ipsec.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/ip6_ipsec.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -27,7 +27,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/netinet6/ip6_ipsec.h 177166 2008-03-14 11:44:30Z bz $
+ * $FreeBSD: stable/10/sys/netinet6/ip6_ipsec.h 274132 2014-11-05 09:23:29Z ae $
*/
#ifndef _NETINET_IP6_IPSEC_H_
@@ -37,7 +37,7 @@
int ip6_ipsec_fwd(struct mbuf *);
int ip6_ipsec_input(struct mbuf *, int);
int ip6_ipsec_output(struct mbuf **, struct inpcb *, int *, int *,
- struct ifnet **, struct secpolicy **sp);
+ struct ifnet **);
#if 0
int ip6_ipsec_mtu(struct mbuf *);
#endif
Modified: trunk/sys/netinet6/ip6_mroute.c
===================================================================
--- trunk/sys/netinet6/ip6_mroute.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/ip6_mroute.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -80,9 +80,10 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/ip6_mroute.c 248085 2013-03-09 02:36:32Z marius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/ip6_mroute.c 328878 2018-02-05 09:25:53Z ae $");
#include "opt_inet6.h"
+#include "opt_kdtrace.h"
#include <sys/param.h>
#include <sys/callout.h>
@@ -94,6 +95,7 @@
#include <sys/module.h>
#include <sys/domain.h>
#include <sys/protosw.h>
+#include <sys/sdt.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
@@ -115,6 +117,7 @@
#include <netinet/ip_encap.h>
#include <netinet/ip6.h>
+#include <netinet/in_kdtrace.h>
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
#include <netinet6/nd6.h>
@@ -163,6 +166,7 @@
&mrt6stat, mrt6stat,
"Multicast Routing Statistics (struct mrt6stat, netinet6/ip6_mroute.h)");
+#define MRT6STAT_INC(name) mrt6stat.name += 1
#define NO_RTE_FOUND 0x1
#define RTE_FOUND 0x2
@@ -218,6 +222,14 @@
#define DEBUG_XMIT 0x10
#define DEBUG_REG 0x20
#define DEBUG_PIM 0x40
+#define DEBUG_ERR 0x80
+#define DEBUG_ANY 0x7f
+#define MRT6_DLOG(m, fmt, ...) \
+ if (V_mrt6debug & (m)) \
+ log(((m) & DEBUG_ERR) ? LOG_ERR: LOG_DEBUG, \
+ "%s: " fmt "\n", __func__, ##__VA_ARGS__)
+#else
+#define MRT6_DLOG(m, fmt, ...)
#endif
static void expire_upcalls(void *);
@@ -252,8 +264,9 @@
static struct pim6stat pim6stat;
SYSCTL_STRUCT(_net_inet6_pim, PIM6CTL_STATS, stats, CTLFLAG_RW,
&pim6stat, pim6stat,
- "PIM Statistics (struct pim6stat, netinet6/pim_var.h)");
+ "PIM Statistics (struct pim6stat, netinet6/pim6_var.h)");
+#define PIM6STAT_INC(name) pim6stat.name += 1
static VNET_DEFINE(int, pim6);
#define V_pim6 VNET(pim6)
@@ -271,7 +284,6 @@
#define MF6CFIND(o, g, rt) do { \
struct mf6c *_rt = mf6ctable[MF6CHASH(o,g)]; \
rt = NULL; \
- mrt6stat.mrt6s_mfc_lookups++; \
while (_rt) { \
if (IN6_ARE_ADDR_EQUAL(&_rt->mf6c_origin.sin6_addr, &(o)) && \
IN6_ARE_ADDR_EQUAL(&_rt->mf6c_mcastgrp.sin6_addr, &(g)) && \
@@ -282,7 +294,7 @@
_rt = _rt->mf6c_next; \
} \
if (rt == NULL) { \
- mrt6stat.mrt6s_mfc_misses++; \
+ MRT6STAT_INC(mrt6s_mfc_misses); \
} \
} while (/*CONSTCOND*/ 0)
@@ -522,12 +534,8 @@
ip6_mrouter_init(struct socket *so, int v, int cmd)
{
-#ifdef MRT6DEBUG
- if (V_mrt6debug)
- log(LOG_DEBUG,
- "ip6_mrouter_init: so_type = %d, pr_protocol = %d\n",
- so->so_type, so->so_proto->pr_protocol);
-#endif
+ MRT6_DLOG(DEBUG_ANY, "so_type = %d, pr_protocol = %d",
+ so->so_type, so->so_proto->pr_protocol);
if (so->so_type != SOCK_RAW ||
so->so_proto->pr_protocol != IPPROTO_ICMPV6)
@@ -556,12 +564,8 @@
expire_upcalls, NULL);
MROUTER6_UNLOCK();
+ MRT6_DLOG(DEBUG_ANY, "finished");
-#ifdef MRT6DEBUG
- if (V_mrt6debug)
- log(LOG_DEBUG, "ip6_mrouter_init\n");
-#endif
-
return (0);
}
@@ -572,7 +576,7 @@
X_ip6_mrouter_done(void)
{
mifi_t mifi;
- int i;
+ u_long i;
struct mf6c *rt;
struct rtdetq *rte;
@@ -612,7 +616,7 @@
for (rte = rt->mf6c_stall; rte != NULL; ) {
struct rtdetq *n = rte->next;
- m_free(rte->m);
+ m_freem(rte->m);
free(rte, M_MRTABLE6);
rte = n;
}
@@ -638,12 +642,8 @@
V_ip6_mrouter_ver = 0;
MROUTER6_UNLOCK();
+ MRT6_DLOG(DEBUG_ANY, "finished");
-#ifdef MRT6DEBUG
- if (V_mrt6debug)
- log(LOG_DEBUG, "ip6_mrouter_done\n");
-#endif
-
return (0);
}
@@ -723,15 +723,9 @@
nummifs = mifcp->mif6c_mifi + 1;
MIF6_UNLOCK();
+ MRT6_DLOG(DEBUG_ANY, "mif #%d, phyint %s", mifcp->mif6c_mifi,
+ if_name(ifp));
-#ifdef MRT6DEBUG
- if (V_mrt6debug)
- log(LOG_DEBUG,
- "add_mif #%d, phyint %s\n",
- mifcp->mif6c_mifi,
- ifp->if_xname);
-#endif
-
return (0);
}
@@ -773,12 +767,8 @@
if (mif6table[mifi - 1].m6_ifp)
break;
nummifs = mifi;
+ MRT6_DLOG(DEBUG_ANY, "mif %d, nummifs %d", *mifip, nummifs);
-#ifdef MRT6DEBUG
- if (V_mrt6debug)
- log(LOG_DEBUG, "del_m6if %d, nummifs %d\n", *mifip, nummifs);
-#endif
-
return (0);
}
@@ -813,15 +803,10 @@
/* If an entry already exists, just update the fields */
if (rt) {
-#ifdef MRT6DEBUG
- if (V_mrt6debug & DEBUG_MFC) {
- log(LOG_DEBUG,
- "add_m6fc no upcall h %d o %s g %s p %x\n",
- ip6_sprintf(ip6bufo, &mfccp->mf6cc_origin.sin6_addr),
- ip6_sprintf(ip6bufg, &mfccp->mf6cc_mcastgrp.sin6_addr),
- mfccp->mf6cc_parent);
- }
-#endif
+ MRT6_DLOG(DEBUG_MFC, "no upcall o %s g %s p %x",
+ ip6_sprintf(ip6bufo, &mfccp->mf6cc_origin.sin6_addr),
+ ip6_sprintf(ip6bufg, &mfccp->mf6cc_mcastgrp.sin6_addr),
+ mfccp->mf6cc_parent);
rt->mf6c_parent = mfccp->mf6cc_parent;
rt->mf6c_ifset = mfccp->mf6cc_ifset;
@@ -852,16 +837,12 @@
&mfccp->mf6cc_mcastgrp.sin6_addr),
mfccp->mf6cc_parent, rt->mf6c_stall);
-#ifdef MRT6DEBUG
- if (V_mrt6debug & DEBUG_MFC)
- log(LOG_DEBUG,
- "add_m6fc o %s g %s p %x dbg %x\n",
- ip6_sprintf(ip6bufo,
- &mfccp->mf6cc_origin.sin6_addr),
- ip6_sprintf(ip6bufg,
- &mfccp->mf6cc_mcastgrp.sin6_addr),
- mfccp->mf6cc_parent, rt->mf6c_stall);
-#endif
+ MRT6_DLOG(DEBUG_MFC, "o %s g %s p %x dbg %p",
+ ip6_sprintf(ip6bufo,
+ &mfccp->mf6cc_origin.sin6_addr),
+ ip6_sprintf(ip6bufg,
+ &mfccp->mf6cc_mcastgrp.sin6_addr),
+ mfccp->mf6cc_parent, rt->mf6c_stall);
rt->mf6c_origin = mfccp->mf6cc_origin;
rt->mf6c_mcastgrp = mfccp->mf6cc_mcastgrp;
@@ -894,15 +875,10 @@
* It is possible that an entry is being inserted without an upcall
*/
if (nstl == 0) {
-#ifdef MRT6DEBUG
- if (V_mrt6debug & DEBUG_MFC)
- log(LOG_DEBUG,
- "add_mfc no upcall h %d o %s g %s p %x\n",
- hash,
- ip6_sprintf(ip6bufo, &mfccp->mf6cc_origin.sin6_addr),
- ip6_sprintf(ip6bufg, &mfccp->mf6cc_mcastgrp.sin6_addr),
- mfccp->mf6cc_parent);
-#endif
+ MRT6_DLOG(DEBUG_MFC, "no upcall h %lu o %s g %s p %x", hash,
+ ip6_sprintf(ip6bufo, &mfccp->mf6cc_origin.sin6_addr),
+ ip6_sprintf(ip6bufg, &mfccp->mf6cc_mcastgrp.sin6_addr),
+ mfccp->mf6cc_parent);
for (rt = mf6ctable[hash]; rt; rt = rt->mf6c_next) {
@@ -988,6 +964,9 @@
static int
del_m6fc(struct mf6cctl *mfccp)
{
+#ifdef MRT6DEBUG
+ char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN];
+#endif
struct sockaddr_in6 origin;
struct sockaddr_in6 mcastgrp;
struct mf6c *rt;
@@ -998,14 +977,9 @@
mcastgrp = mfccp->mf6cc_mcastgrp;
hash = MF6CHASH(origin.sin6_addr, mcastgrp.sin6_addr);
-#ifdef MRT6DEBUG
- if (V_mrt6debug & DEBUG_MFC) {
- char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN];
- log(LOG_DEBUG,"del_m6fc orig %s mcastgrp %s\n",
- ip6_sprintf(ip6bufo, &origin.sin6_addr),
- ip6_sprintf(ip6bufg, &mcastgrp.sin6_addr));
- }
-#endif
+ MRT6_DLOG(DEBUG_MFC, "orig %s mcastgrp %s",
+ ip6_sprintf(ip6bufo, &origin.sin6_addr),
+ ip6_sprintf(ip6bufg, &mcastgrp.sin6_addr));
MFC6_LOCK();
@@ -1070,20 +1044,24 @@
int
X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m)
{
+ struct rtdetq *rte;
+ struct mbuf *mb0;
struct mf6c *rt;
struct mif6 *mifp;
struct mbuf *mm;
+ u_long hash;
mifi_t mifi;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
+#ifdef UPCALL_TIMING
+ struct timeval tp;
-#ifdef MRT6DEBUG
- if (V_mrt6debug & DEBUG_FORWARD)
- log(LOG_DEBUG, "ip6_mforward: src %s, dst %s, ifindex %d\n",
- ip6_sprintf(ip6bufs, &ip6->ip6_src),
- ip6_sprintf(ip6bufd, &ip6->ip6_dst),
- ifp->if_index);
-#endif
+ GET_TIME(tp);
+#endif /* UPCALL_TIMING */
+ MRT6_DLOG(DEBUG_FORWARD, "src %s, dst %s, ifindex %d",
+ ip6_sprintf(ip6bufs, &ip6->ip6_src),
+ ip6_sprintf(ip6bufd, &ip6->ip6_dst), ifp->if_index);
+
/*
* Don't forward a packet with Hop limit of zero or one,
* or a packet destined to a local-only group.
@@ -1102,8 +1080,8 @@
*/
if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
IP6STAT_INC(ip6s_cantforward);
- if (V_ip6_log_time + V_ip6_log_interval < time_second) {
- V_ip6_log_time = time_second;
+ if (V_ip6_log_time + V_ip6_log_interval < time_uptime) {
+ V_ip6_log_time = time_uptime;
log(LOG_DEBUG,
"cannot forward "
"from %s to %s nxt %d received on %s\n",
@@ -1121,211 +1099,184 @@
* Determine forwarding mifs from the forwarding cache table
*/
MF6CFIND(ip6->ip6_src, ip6->ip6_dst, rt);
+ MRT6STAT_INC(mrt6s_mfc_lookups);
/* Entry exists, so forward if necessary */
if (rt) {
MFC6_UNLOCK();
return (ip6_mdq(m, ifp, rt));
- } else {
- /*
- * If we don't have a route for packet's origin,
- * Make a copy of the packet &
- * send message to routing daemon
- */
+ }
- struct mbuf *mb0;
- struct rtdetq *rte;
- u_long hash;
-/* int i, npkts;*/
-#ifdef UPCALL_TIMING
- struct timeval tp;
+ /*
+ * If we don't have a route for packet's origin,
+ * Make a copy of the packet & send message to routing daemon.
+ */
+ MRT6STAT_INC(mrt6s_no_route);
+ MRT6_DLOG(DEBUG_FORWARD | DEBUG_MFC, "no rte s %s g %s",
+ ip6_sprintf(ip6bufs, &ip6->ip6_src),
+ ip6_sprintf(ip6bufd, &ip6->ip6_dst));
- GET_TIME(tp);
-#endif /* UPCALL_TIMING */
+ /*
+ * Allocate mbufs early so that we don't do extra work if we
+ * are just going to fail anyway.
+ */
+ rte = (struct rtdetq *)malloc(sizeof(*rte), M_MRTABLE6, M_NOWAIT);
+ if (rte == NULL) {
+ MFC6_UNLOCK();
+ return (ENOBUFS);
+ }
+ mb0 = m_copy(m, 0, M_COPYALL);
+ /*
+ * Pullup packet header if needed before storing it,
+ * as other references may modify it in the meantime.
+ */
+ if (mb0 && (M_HASCL(mb0) || mb0->m_len < sizeof(struct ip6_hdr)))
+ mb0 = m_pullup(mb0, sizeof(struct ip6_hdr));
+ if (mb0 == NULL) {
+ free(rte, M_MRTABLE6);
+ MFC6_UNLOCK();
+ return (ENOBUFS);
+ }
- mrt6stat.mrt6s_no_route++;
-#ifdef MRT6DEBUG
- if (V_mrt6debug & (DEBUG_FORWARD | DEBUG_MFC))
- log(LOG_DEBUG, "ip6_mforward: no rte s %s g %s\n",
- ip6_sprintf(ip6bufs, &ip6->ip6_src),
- ip6_sprintf(ip6bufd, &ip6->ip6_dst));
+ /* is there an upcall waiting for this packet? */
+ hash = MF6CHASH(ip6->ip6_src, ip6->ip6_dst);
+ for (rt = mf6ctable[hash]; rt; rt = rt->mf6c_next) {
+ if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_src,
+ &rt->mf6c_origin.sin6_addr) &&
+ IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
+ &rt->mf6c_mcastgrp.sin6_addr) && (rt->mf6c_stall != NULL))
+ break;
+ }
+
+ if (rt == NULL) {
+ struct mrt6msg *im;
+#ifdef MRT6_OINIT
+ struct omrt6msg *oim;
#endif
-
- /*
- * Allocate mbufs early so that we don't do extra work if we
- * are just going to fail anyway.
- */
- rte = (struct rtdetq *)malloc(sizeof(*rte), M_MRTABLE6,
- M_NOWAIT);
- if (rte == NULL) {
+ /* no upcall, so make a new entry */
+ rt = (struct mf6c *)malloc(sizeof(*rt), M_MRTABLE6, M_NOWAIT);
+ if (rt == NULL) {
+ free(rte, M_MRTABLE6);
+ m_freem(mb0);
MFC6_UNLOCK();
return (ENOBUFS);
}
- mb0 = m_copy(m, 0, M_COPYALL);
/*
- * Pullup packet header if needed before storing it,
- * as other references may modify it in the meantime.
+ * Make a copy of the header to send to the user
+ * level process
*/
- if (mb0 &&
- (M_HASCL(mb0) || mb0->m_len < sizeof(struct ip6_hdr)))
- mb0 = m_pullup(mb0, sizeof(struct ip6_hdr));
- if (mb0 == NULL) {
+ mm = m_copy(mb0, 0, sizeof(struct ip6_hdr));
+ if (mm == NULL) {
free(rte, M_MRTABLE6);
+ m_freem(mb0);
+ free(rt, M_MRTABLE6);
MFC6_UNLOCK();
return (ENOBUFS);
}
- /* is there an upcall waiting for this packet? */
- hash = MF6CHASH(ip6->ip6_src, ip6->ip6_dst);
- for (rt = mf6ctable[hash]; rt; rt = rt->mf6c_next) {
- if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_src,
- &rt->mf6c_origin.sin6_addr) &&
- IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
- &rt->mf6c_mcastgrp.sin6_addr) &&
- (rt->mf6c_stall != NULL))
- break;
- }
-
- if (rt == NULL) {
- struct mrt6msg *im;
+ /*
+ * Send message to routing daemon
+ */
+ sin6.sin6_addr = ip6->ip6_src;
+ im = NULL;
#ifdef MRT6_OINIT
- struct omrt6msg *oim;
+ oim = NULL;
#endif
-
- /* no upcall, so make a new entry */
- rt = (struct mf6c *)malloc(sizeof(*rt), M_MRTABLE6,
- M_NOWAIT);
- if (rt == NULL) {
- free(rte, M_MRTABLE6);
- m_freem(mb0);
- MFC6_UNLOCK();
- return (ENOBUFS);
- }
- /*
- * Make a copy of the header to send to the user
- * level process
- */
- mm = m_copy(mb0, 0, sizeof(struct ip6_hdr));
-
- if (mm == NULL) {
- free(rte, M_MRTABLE6);
- m_freem(mb0);
- free(rt, M_MRTABLE6);
- MFC6_UNLOCK();
- return (ENOBUFS);
- }
-
- /*
- * Send message to routing daemon
- */
- sin6.sin6_addr = ip6->ip6_src;
-
- im = NULL;
+ switch (V_ip6_mrouter_ver) {
#ifdef MRT6_OINIT
- oim = NULL;
+ case MRT6_OINIT:
+ oim = mtod(mm, struct omrt6msg *);
+ oim->im6_msgtype = MRT6MSG_NOCACHE;
+ oim->im6_mbz = 0;
+ break;
#endif
- switch (V_ip6_mrouter_ver) {
-#ifdef MRT6_OINIT
- case MRT6_OINIT:
- oim = mtod(mm, struct omrt6msg *);
- oim->im6_msgtype = MRT6MSG_NOCACHE;
- oim->im6_mbz = 0;
- break;
-#endif
- case MRT6_INIT:
- im = mtod(mm, struct mrt6msg *);
- im->im6_msgtype = MRT6MSG_NOCACHE;
- im->im6_mbz = 0;
- break;
- default:
- free(rte, M_MRTABLE6);
- m_freem(mb0);
- free(rt, M_MRTABLE6);
- MFC6_UNLOCK();
- return (EINVAL);
- }
+ case MRT6_INIT:
+ im = mtod(mm, struct mrt6msg *);
+ im->im6_msgtype = MRT6MSG_NOCACHE;
+ im->im6_mbz = 0;
+ break;
+ default:
+ free(rte, M_MRTABLE6);
+ m_freem(mb0);
+ free(rt, M_MRTABLE6);
+ MFC6_UNLOCK();
+ return (EINVAL);
+ }
-#ifdef MRT6DEBUG
- if (V_mrt6debug & DEBUG_FORWARD)
- log(LOG_DEBUG,
- "getting the iif info in the kernel\n");
-#endif
-
- for (mifp = mif6table, mifi = 0;
- mifi < nummifs && mifp->m6_ifp != ifp;
- mifp++, mifi++)
+ MRT6_DLOG(DEBUG_FORWARD, "getting the iif info in the kernel");
+ for (mifp = mif6table, mifi = 0;
+ mifi < nummifs && mifp->m6_ifp != ifp; mifp++, mifi++)
;
- switch (V_ip6_mrouter_ver) {
+ switch (V_ip6_mrouter_ver) {
#ifdef MRT6_OINIT
- case MRT6_OINIT:
- oim->im6_mif = mifi;
- break;
+ case MRT6_OINIT:
+ oim->im6_mif = mifi;
+ break;
#endif
- case MRT6_INIT:
- im->im6_mif = mifi;
- break;
- }
+ case MRT6_INIT:
+ im->im6_mif = mifi;
+ break;
+ }
- if (socket_send(V_ip6_mrouter, mm, &sin6) < 0) {
- log(LOG_WARNING, "ip6_mforward: ip6_mrouter "
- "socket queue full\n");
- mrt6stat.mrt6s_upq_sockfull++;
+ if (socket_send(V_ip6_mrouter, mm, &sin6) < 0) {
+ log(LOG_WARNING, "ip6_mforward: ip6_mrouter "
+ "socket queue full\n");
+ MRT6STAT_INC(mrt6s_upq_sockfull);
+ free(rte, M_MRTABLE6);
+ m_freem(mb0);
+ free(rt, M_MRTABLE6);
+ MFC6_UNLOCK();
+ return (ENOBUFS);
+ }
+
+ MRT6STAT_INC(mrt6s_upcalls);
+
+ /* insert new entry at head of hash chain */
+ bzero(rt, sizeof(*rt));
+ rt->mf6c_origin.sin6_family = AF_INET6;
+ rt->mf6c_origin.sin6_len = sizeof(struct sockaddr_in6);
+ rt->mf6c_origin.sin6_addr = ip6->ip6_src;
+ rt->mf6c_mcastgrp.sin6_family = AF_INET6;
+ rt->mf6c_mcastgrp.sin6_len = sizeof(struct sockaddr_in6);
+ rt->mf6c_mcastgrp.sin6_addr = ip6->ip6_dst;
+ rt->mf6c_expire = UPCALL_EXPIRE;
+ n6expire[hash]++;
+ rt->mf6c_parent = MF6C_INCOMPLETE_PARENT;
+
+ /* link into table */
+ rt->mf6c_next = mf6ctable[hash];
+ mf6ctable[hash] = rt;
+ /* Add this entry to the end of the queue */
+ rt->mf6c_stall = rte;
+ } else {
+ /* determine if q has overflowed */
+ struct rtdetq **p;
+ int npkts = 0;
+
+ for (p = &rt->mf6c_stall; *p != NULL; p = &(*p)->next)
+ if (++npkts > MAX_UPQ6) {
+ MRT6STAT_INC(mrt6s_upq_ovflw);
free(rte, M_MRTABLE6);
m_freem(mb0);
- free(rt, M_MRTABLE6);
MFC6_UNLOCK();
- return (ENOBUFS);
+ return (0);
}
- mrt6stat.mrt6s_upcalls++;
+ /* Add this entry to the end of the queue */
+ *p = rte;
+ }
- /* insert new entry at head of hash chain */
- bzero(rt, sizeof(*rt));
- rt->mf6c_origin.sin6_family = AF_INET6;
- rt->mf6c_origin.sin6_len = sizeof(struct sockaddr_in6);
- rt->mf6c_origin.sin6_addr = ip6->ip6_src;
- rt->mf6c_mcastgrp.sin6_family = AF_INET6;
- rt->mf6c_mcastgrp.sin6_len = sizeof(struct sockaddr_in6);
- rt->mf6c_mcastgrp.sin6_addr = ip6->ip6_dst;
- rt->mf6c_expire = UPCALL_EXPIRE;
- n6expire[hash]++;
- rt->mf6c_parent = MF6C_INCOMPLETE_PARENT;
-
- /* link into table */
- rt->mf6c_next = mf6ctable[hash];
- mf6ctable[hash] = rt;
- /* Add this entry to the end of the queue */
- rt->mf6c_stall = rte;
- } else {
- /* determine if q has overflowed */
- struct rtdetq **p;
- int npkts = 0;
-
- for (p = &rt->mf6c_stall; *p != NULL; p = &(*p)->next)
- if (++npkts > MAX_UPQ6) {
- mrt6stat.mrt6s_upq_ovflw++;
- free(rte, M_MRTABLE6);
- m_freem(mb0);
- MFC6_UNLOCK();
- return (0);
- }
-
- /* Add this entry to the end of the queue */
- *p = rte;
- }
-
- rte->next = NULL;
- rte->m = mb0;
- rte->ifp = ifp;
+ rte->next = NULL;
+ rte->m = mb0;
+ rte->ifp = ifp;
#ifdef UPCALL_TIMING
- rte->t = tp;
+ rte->t = tp;
#endif /* UPCALL_TIMING */
- MFC6_UNLOCK();
+ MFC6_UNLOCK();
- return (0);
- }
+ return (0);
}
/*
@@ -1335,9 +1286,12 @@
static void
expire_upcalls(void *unused)
{
+#ifdef MRT6DEBUG
+ char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN];
+#endif
struct rtdetq *rte;
struct mf6c *mfc, **nptr;
- int i;
+ u_long i;
MFC6_LOCK();
for (i = 0; i < MF6CTBLSIZ; i++) {
@@ -1354,15 +1308,9 @@
if (rte != NULL &&
mfc->mf6c_expire != 0 &&
--mfc->mf6c_expire == 0) {
-#ifdef MRT6DEBUG
- if (V_mrt6debug & DEBUG_EXPIRE) {
- char ip6bufo[INET6_ADDRSTRLEN];
- char ip6bufg[INET6_ADDRSTRLEN];
- log(LOG_DEBUG, "expire_upcalls: expiring (%s %s)\n",
- ip6_sprintf(ip6bufo, &mfc->mf6c_origin.sin6_addr),
- ip6_sprintf(ip6bufg, &mfc->mf6c_mcastgrp.sin6_addr));
- }
-#endif
+ MRT6_DLOG(DEBUG_EXPIRE, "expiring (%s %s)",
+ ip6_sprintf(ip6bufo, &mfc->mf6c_origin.sin6_addr),
+ ip6_sprintf(ip6bufg, &mfc->mf6c_mcastgrp.sin6_addr));
/*
* drop all the packets
* free the mbuf with the pkt, if, timing info
@@ -1373,7 +1321,7 @@
free(rte, M_MRTABLE6);
rte = n;
} while (rte != NULL);
- mrt6stat.mrt6s_cache_cleanups++;
+ MRT6STAT_INC(mrt6s_cache_cleanups);
n6expire[i]--;
*nptr = mfc->mf6c_next;
@@ -1422,14 +1370,10 @@
mifi = rt->mf6c_parent;
if ((mifi >= nummifs) || (mif6table[mifi].m6_ifp != ifp)) {
/* came in the wrong interface */
-#ifdef MRT6DEBUG
- if (V_mrt6debug & DEBUG_FORWARD)
- log(LOG_DEBUG,
- "wrong if: ifid %d mifi %d mififid %x\n",
- ifp->if_index, mifi,
- mif6table[mifi].m6_ifp->if_index);
-#endif
- mrt6stat.mrt6s_wrong_if++;
+ MRT6_DLOG(DEBUG_FORWARD,
+ "wrong if: ifid %d mifi %d mififid %x", ifp->if_index,
+ mifi, mif6table[mifi].m6_ifp->if_index);
+ MRT6STAT_INC(mrt6s_wrong_if);
rt->mf6c_wrong_if++;
/*
* If we are doing PIM processing, and we are forwarding
@@ -1502,14 +1446,12 @@
break;
}
- mrt6stat.mrt6s_upcalls++;
+ MRT6STAT_INC(mrt6s_upcalls);
if (socket_send(V_ip6_mrouter, mm, &sin6) < 0) {
-#ifdef MRT6DEBUG
- if (V_mrt6debug)
- log(LOG_WARNING, "mdq, ip6_mrouter socket queue full\n");
-#endif
- ++mrt6stat.mrt6s_upq_sockfull;
+ MRT6_DLOG(DEBUG_ANY,
+ "ip6_mrouter socket queue full");
+ MRT6STAT_INC(mrt6s_upq_sockfull);
return (ENOBUFS);
} /* if socket Q full */
} /* if PIM */
@@ -1572,6 +1514,9 @@
static void
phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m)
{
+#ifdef MRT6DEBUG
+ char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
+#endif
struct mbuf *mb_copy;
struct ifnet *ifp = mifp->m6_ifp;
int error = 0;
@@ -1609,11 +1554,8 @@
error = ip6_output(mb_copy, NULL, NULL, IPV6_FORWARDING, &im6o,
NULL, NULL);
-#ifdef MRT6DEBUG
- if (V_mrt6debug & DEBUG_XMIT)
- log(LOG_DEBUG, "phyint_send on mif %d err %d\n",
- mifp - mif6table, error);
-#endif
+ MRT6_DLOG(DEBUG_XMIT, "mif %u err %d",
+ (uint16_t)(mifp - mif6table), error);
return;
}
@@ -1643,17 +1585,17 @@
dst6.sin6_len = sizeof(struct sockaddr_in6);
dst6.sin6_family = AF_INET6;
dst6.sin6_addr = ip6->ip6_dst;
+
+ IP_PROBE(send, NULL, NULL, ip6, ifp, NULL, ip6);
/*
* We just call if_output instead of nd6_output here, since
* we need no ND for a multicast forwarded packet...right?
*/
+ m_clrprotoflags(m); /* Avoid confusing lower layers. */
error = (*ifp->if_output)(ifp, mb_copy,
(struct sockaddr *)&dst6, NULL);
-#ifdef MRT6DEBUG
- if (V_mrt6debug & DEBUG_XMIT)
- log(LOG_DEBUG, "phyint_send on mif %d err %d\n",
- mifp - mif6table, error);
-#endif
+ MRT6_DLOG(DEBUG_XMIT, "mif %u err %d",
+ (uint16_t)(mifp - mif6table), error);
} else {
/*
* pMTU discovery is intentionally disabled by default, since
@@ -1663,19 +1605,11 @@
if (V_ip6_mcast_pmtu)
icmp6_error(mb_copy, ICMP6_PACKET_TOO_BIG, 0, linkmtu);
else {
-#ifdef MRT6DEBUG
- if (V_mrt6debug & DEBUG_XMIT) {
- char ip6bufs[INET6_ADDRSTRLEN];
- char ip6bufd[INET6_ADDRSTRLEN];
- log(LOG_DEBUG,
- "phyint_send: packet too big on %s o %s "
- "g %s size %d(discarded)\n",
- if_name(ifp),
- ip6_sprintf(ip6bufs, &ip6->ip6_src),
- ip6_sprintf(ip6bufd, &ip6->ip6_dst),
- mb_copy->m_pkthdr.len);
- }
-#endif /* MRT6DEBUG */
+ MRT6_DLOG(DEBUG_XMIT, " packet too big on %s o %s "
+ "g %s size %d (discarded)", if_name(ifp),
+ ip6_sprintf(ip6bufs, &ip6->ip6_src),
+ ip6_sprintf(ip6bufd, &ip6->ip6_dst),
+ mb_copy->m_pkthdr.len);
m_freem(mb_copy); /* simply discard the packet */
}
}
@@ -1684,26 +1618,23 @@
static int
register_send(struct ip6_hdr *ip6, struct mif6 *mif, struct mbuf *m)
{
+#ifdef MRT6DEBUG
+ char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
+#endif
struct mbuf *mm;
int i, len = m->m_pkthdr.len;
static struct sockaddr_in6 sin6 = { sizeof(sin6), AF_INET6 };
struct mrt6msg *im6;
-#ifdef MRT6DEBUG
- if (V_mrt6debug) {
- char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
- log(LOG_DEBUG, "** IPv6 register_send **\n src %s dst %s\n",
- ip6_sprintf(ip6bufs, &ip6->ip6_src),
- ip6_sprintf(ip6bufd, &ip6->ip6_dst));
- }
-#endif
- ++pim6stat.pim6s_snd_registers;
+ MRT6_DLOG(DEBUG_ANY, "src %s dst %s",
+ ip6_sprintf(ip6bufs, &ip6->ip6_src),
+ ip6_sprintf(ip6bufd, &ip6->ip6_dst));
+ PIM6STAT_INC(pim6s_snd_registers);
- /* Make a copy of the packet to send to the user level process */
- MGETHDR(mm, M_DONTWAIT, MT_HEADER);
+ /* Make a copy of the packet to send to the user level process. */
+ mm = m_gethdr(M_NOWAIT, MT_DATA);
if (mm == NULL)
return (ENOBUFS);
- mm->m_pkthdr.rcvif = NULL;
mm->m_data += max_linkhdr;
mm->m_len = sizeof(struct ip6_hdr);
@@ -1732,15 +1663,11 @@
im6->im6_mif = mif - mif6table;
/* iif info is not given for reg. encap.n */
- mrt6stat.mrt6s_upcalls++;
+ MRT6STAT_INC(mrt6s_upcalls);
if (socket_send(V_ip6_mrouter, mm, &sin6) < 0) {
-#ifdef MRT6DEBUG
- if (V_mrt6debug)
- log(LOG_WARNING,
- "register_send: ip6_mrouter socket queue full\n");
-#endif
- ++mrt6stat.mrt6s_upq_sockfull;
+ MRT6_DLOG(DEBUG_ANY, "ip6_mrouter socket queue full");
+ MRT6STAT_INC(mrt6s_upq_sockfull);
return (ENOBUFS);
}
return (0);
@@ -1781,7 +1708,7 @@
int minlen;
int off = *offp;
- ++pim6stat.pim6s_rcv_total;
+ PIM6STAT_INC(pim6s_rcv_total);
ip6 = mtod(m, struct ip6_hdr *);
pimlen = m->m_pkthdr.len - *offp;
@@ -1790,11 +1717,8 @@
* Validate lengths
*/
if (pimlen < PIM_MINLEN) {
- ++pim6stat.pim6s_rcv_tooshort;
-#ifdef MRT6DEBUG
- if (V_mrt6debug & DEBUG_PIM)
- log(LOG_DEBUG,"pim6_input: PIM packet too short\n");
-#endif
+ PIM6STAT_INC(pim6s_rcv_tooshort);
+ MRT6_DLOG(DEBUG_PIM, "PIM packet too short");
m_freem(m);
return (IPPROTO_DONE);
}
@@ -1823,7 +1747,7 @@
#else
IP6_EXTHDR_GET(pim, struct pim *, m, off, minlen);
if (pim == NULL) {
- pim6stat.pim6s_rcv_tooshort++;
+ PIM6STAT_INC(pim6s_rcv_tooshort);
return (IPPROTO_DONE);
}
#endif
@@ -1843,12 +1767,8 @@
cksumlen = pimlen;
if (in6_cksum(m, IPPROTO_PIM, off, cksumlen)) {
- ++pim6stat.pim6s_rcv_badsum;
-#ifdef MRT6DEBUG
- if (V_mrt6debug & DEBUG_PIM)
- log(LOG_DEBUG,
- "pim6_input: invalid checksum\n");
-#endif
+ PIM6STAT_INC(pim6s_rcv_badsum);
+ MRT6_DLOG(DEBUG_PIM, "invalid checksum");
m_freem(m);
return (IPPROTO_DONE);
}
@@ -1857,12 +1777,10 @@
/* PIM version check */
if (pim->pim_ver != PIM_VERSION) {
- ++pim6stat.pim6s_rcv_badversion;
-#ifdef MRT6DEBUG
- log(LOG_ERR,
- "pim6_input: incorrect version %d, expecting %d\n",
+ PIM6STAT_INC(pim6s_rcv_badversion);
+ MRT6_DLOG(DEBUG_ANY | DEBUG_ERR,
+ "incorrect version %d, expecting %d",
pim->pim_ver, PIM_VERSION);
-#endif
m_freem(m);
return (IPPROTO_DONE);
}
@@ -1883,15 +1801,11 @@
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
#endif
- ++pim6stat.pim6s_rcv_registers;
+ PIM6STAT_INC(pim6s_rcv_registers);
if ((reg_mif_num >= nummifs) || (reg_mif_num == (mifi_t) -1)) {
-#ifdef MRT6DEBUG
- if (V_mrt6debug & DEBUG_PIM)
- log(LOG_DEBUG,
- "pim6_input: register mif not set: %d\n",
- reg_mif_num);
-#endif
+ MRT6_DLOG(DEBUG_PIM, "register mif not set: %d",
+ reg_mif_num);
m_freem(m);
return (IPPROTO_DONE);
}
@@ -1905,51 +1819,37 @@
* Validate length
*/
if (pimlen < PIM6_REG_MINLEN) {
- ++pim6stat.pim6s_rcv_tooshort;
- ++pim6stat.pim6s_rcv_badregisters;
-#ifdef MRT6DEBUG
- log(LOG_ERR,
- "pim6_input: register packet size too "
- "small %d from %s\n",
+ PIM6STAT_INC(pim6s_rcv_tooshort);
+ PIM6STAT_INC(pim6s_rcv_badregisters);
+ MRT6_DLOG(DEBUG_ANY | DEBUG_ERR, "register packet "
+ "size too small %d from %s",
pimlen, ip6_sprintf(ip6bufs, &ip6->ip6_src));
-#endif
m_freem(m);
return (IPPROTO_DONE);
}
eip6 = (struct ip6_hdr *) (reghdr + 1);
-#ifdef MRT6DEBUG
- if (V_mrt6debug & DEBUG_PIM)
- log(LOG_DEBUG,
- "pim6_input[register], eip6: %s -> %s, "
- "eip6 plen %d\n",
- ip6_sprintf(ip6bufs, &eip6->ip6_src),
- ip6_sprintf(ip6bufd, &eip6->ip6_dst),
- ntohs(eip6->ip6_plen));
-#endif
+ MRT6_DLOG(DEBUG_PIM, "eip6: %s -> %s, eip6 plen %d",
+ ip6_sprintf(ip6bufs, &eip6->ip6_src),
+ ip6_sprintf(ip6bufd, &eip6->ip6_dst),
+ ntohs(eip6->ip6_plen));
/* verify the version number of the inner packet */
if ((eip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
- ++pim6stat.pim6s_rcv_badregisters;
-#ifdef MRT6DEBUG
- log(LOG_DEBUG, "pim6_input: invalid IP version (%d) "
- "of the inner packet\n",
+ PIM6STAT_INC(pim6s_rcv_badregisters);
+ MRT6_DLOG(DEBUG_ANY, "invalid IP version (%d) "
+ "of the inner packet",
(eip6->ip6_vfc & IPV6_VERSION));
-#endif
m_freem(m);
- return (IPPROTO_NONE);
+ return (IPPROTO_DONE);
}
/* verify the inner packet is destined to a mcast group */
if (!IN6_IS_ADDR_MULTICAST(&eip6->ip6_dst)) {
- ++pim6stat.pim6s_rcv_badregisters;
-#ifdef MRT6DEBUG
- if (V_mrt6debug & DEBUG_PIM)
- log(LOG_DEBUG,
- "pim6_input: inner packet of register "
- "is not multicast %s\n",
- ip6_sprintf(ip6bufd, &eip6->ip6_dst));
-#endif
+ PIM6STAT_INC(pim6s_rcv_badregisters);
+ MRT6_DLOG(DEBUG_PIM, "inner packet of register "
+ "is not multicast %s",
+ ip6_sprintf(ip6bufd, &eip6->ip6_dst));
m_freem(m);
return (IPPROTO_DONE);
}
@@ -1959,11 +1859,8 @@
*/
mcp = m_copy(m, 0, off + PIM6_REG_MINLEN);
if (mcp == NULL) {
-#ifdef MRT6DEBUG
- log(LOG_ERR,
- "pim6_input: pim register: "
- "could not copy register head\n");
-#endif
+ MRT6_DLOG(DEBUG_ANY | DEBUG_ERR, "pim register: "
+ "could not copy register head");
m_freem(m);
return (IPPROTO_DONE);
}
@@ -1972,16 +1869,10 @@
* forward the inner ip6 packet; point m_data at the inner ip6.
*/
m_adj(m, off + PIM_MINLEN);
-#ifdef MRT6DEBUG
- if (V_mrt6debug & DEBUG_PIM) {
- log(LOG_DEBUG,
- "pim6_input: forwarding decapsulated register: "
- "src %s, dst %s, mif %d\n",
- ip6_sprintf(ip6bufs, &eip6->ip6_src),
- ip6_sprintf(ip6bufd, &eip6->ip6_dst),
- reg_mif_num);
- }
-#endif
+ MRT6_DLOG(DEBUG_PIM, "forwarding decapsulated register: "
+ "src %s, dst %s, mif %d",
+ ip6_sprintf(ip6bufs, &eip6->ip6_src),
+ ip6_sprintf(ip6bufd, &eip6->ip6_dst), reg_mif_num);
rc = if_simloop(mif6table[reg_mif_num].m6_ifp, m,
dst.sin6_family, 0);
Modified: trunk/sys/netinet6/ip6_mroute.h
===================================================================
--- trunk/sys/netinet6/ip6_mroute.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/ip6_mroute.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* $KAME: ip6_mroute.h,v 1.19 2001/06/14 06:12:55 suz Exp $
- * $FreeBSD: stable/9/sys/netinet6/ip6_mroute.h 240305 2012-09-10 11:38:02Z glebius $
+ * $FreeBSD: stable/10/sys/netinet6/ip6_mroute.h 261218 2014-01-28 00:30:17Z ae $
*/
/* BSDI ip_mroute.h,v 2.5 1996/10/11 16:01:48 pjd Exp */
@@ -122,19 +122,19 @@
* The kernel's multicast routing statistics.
*/
struct mrt6stat {
- u_quad_t mrt6s_mfc_lookups; /* # forw. cache hash table hits */
- u_quad_t mrt6s_mfc_misses; /* # forw. cache hash table misses */
- u_quad_t mrt6s_upcalls; /* # calls to multicast routing daemon */
- u_quad_t mrt6s_no_route; /* no route for packet's origin */
- u_quad_t mrt6s_bad_tunnel; /* malformed tunnel options */
- u_quad_t mrt6s_cant_tunnel; /* no room for tunnel options */
- u_quad_t mrt6s_wrong_if; /* arrived on wrong interface */
- u_quad_t mrt6s_upq_ovflw; /* upcall Q overflow */
- u_quad_t mrt6s_cache_cleanups; /* # entries with no upcalls */
- u_quad_t mrt6s_drop_sel; /* pkts dropped selectively */
- u_quad_t mrt6s_q_overflow; /* pkts dropped - Q overflow */
- u_quad_t mrt6s_pkt2large; /* pkts dropped - size > BKT SIZE */
- u_quad_t mrt6s_upq_sockfull; /* upcalls dropped - socket full */
+ uint64_t mrt6s_mfc_lookups; /* # forw. cache hash table hits */
+ uint64_t mrt6s_mfc_misses; /* # forw. cache hash table misses */
+ uint64_t mrt6s_upcalls; /* # calls to multicast routing daemon */
+ uint64_t mrt6s_no_route; /* no route for packet's origin */
+ uint64_t mrt6s_bad_tunnel; /* malformed tunnel options */
+ uint64_t mrt6s_cant_tunnel; /* no room for tunnel options */
+ uint64_t mrt6s_wrong_if; /* arrived on wrong interface */
+ uint64_t mrt6s_upq_ovflw; /* upcall Q overflow */
+ uint64_t mrt6s_cache_cleanups; /* # entries with no upcalls */
+ uint64_t mrt6s_drop_sel; /* pkts dropped selectively */
+ uint64_t mrt6s_q_overflow; /* pkts dropped - Q overflow */
+ uint64_t mrt6s_pkt2large; /* pkts dropped - size > BKT SIZE */
+ uint64_t mrt6s_upq_sockfull; /* upcalls dropped - socket full */
};
#ifdef MRT6_OINIT
@@ -146,11 +146,6 @@
struct omrt6msg {
u_long unused1;
u_char im6_msgtype; /* what type of message */
-#if 0
-#define MRT6MSG_NOCACHE 1
-#define MRT6MSG_WRONGMIF 2
-#define MRT6MSG_WHOLEPKT 3 /* used for user level encap*/
-#endif
u_char im6_mbz; /* must be zero */
u_char im6_mif; /* mif rec'd on */
u_char unused2;
Modified: trunk/sys/netinet6/ip6_output.c
===================================================================
--- trunk/sys/netinet6/ip6_output.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/ip6_output.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -62,7 +62,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/ip6_output.c 245067 2013-01-05 20:07:28Z ae $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/ip6_output.c 317335 2017-04-23 08:59:57Z kp $");
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -185,7 +185,7 @@
}\
} while (/*CONSTCOND*/ 0)
-static void
+void
in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset)
{
u_short csum;
@@ -197,8 +197,8 @@
if (offset + sizeof(u_short) > m->m_len) {
printf("%s: delayed m_pullup, m->len: %d plen %u off %u "
- "csum_flags=0x%04x\n", __func__, m->m_len, plen, offset,
- m->m_pkthdr.csum_flags);
+ "csum_flags=%b\n", __func__, m->m_len, plen, offset,
+ (int)m->m_pkthdr.csum_flags, CSUM_BITS);
/*
* XXX this should not happen, but if it does, the correct
* behavior may be to insert the checksum in the appropriate
@@ -209,6 +209,66 @@
*(u_short *)(m->m_data + offset) = csum;
}
+int
+ip6_fragment(struct ifnet *ifp, struct mbuf *m0, int hlen, u_char nextproto,
+ int mtu, uint32_t id)
+{
+ struct mbuf *m, **mnext, *m_frgpart;
+ struct ip6_hdr *ip6, *mhip6;
+ struct ip6_frag *ip6f;
+ int off;
+ int error;
+ int tlen = m0->m_pkthdr.len;
+
+ KASSERT(( mtu % 8 == 0), ("Fragment length must be a multiple of 8"));
+
+ m = m0;
+ ip6 = mtod(m, struct ip6_hdr *);
+ mnext = &m->m_nextpkt;
+
+ for (off = hlen; off < tlen; off += mtu) {
+ m = m_gethdr(M_NOWAIT, MT_DATA);
+ if (!m) {
+ IP6STAT_INC(ip6s_odropped);
+ return (ENOBUFS);
+ }
+ m->m_flags = m0->m_flags & M_COPYFLAGS;
+ *mnext = m;
+ mnext = &m->m_nextpkt;
+ m->m_data += max_linkhdr;
+ mhip6 = mtod(m, struct ip6_hdr *);
+ *mhip6 = *ip6;
+ m->m_len = sizeof(*mhip6);
+ error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
+ if (error) {
+ IP6STAT_INC(ip6s_odropped);
+ return (error);
+ }
+ ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
+ if (off + mtu >= tlen)
+ mtu = tlen - off;
+ else
+ ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
+ mhip6->ip6_plen = htons((u_short)(mtu + hlen +
+ sizeof(*ip6f) - sizeof(struct ip6_hdr)));
+ if ((m_frgpart = m_copy(m0, off, mtu)) == 0) {
+ IP6STAT_INC(ip6s_odropped);
+ return (ENOBUFS);
+ }
+ m_cat(m, m_frgpart);
+ m->m_pkthdr.len = mtu + hlen + sizeof(*ip6f);
+ m->m_pkthdr.fibnum = m0->m_pkthdr.fibnum;
+ m->m_pkthdr.rcvif = NULL;
+ ip6f->ip6f_reserved = 0;
+ ip6f->ip6f_ident = id;
+ ip6f->ip6f_nxt = nextproto;
+ IP6STAT_INC(ip6s_ofragments);
+ in6_ifstat_inc(ifp, ifs6_out_fragcreat);
+ }
+
+ return (0);
+}
+
/*
* IP6 output. The packet in mbuf chain m contains a skeletal IP6
* header (with pri, len, nxt, hlim, src, dst).
@@ -219,9 +279,9 @@
* skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL,
* then result of route lookup is stored in ro->ro_rt.
*
- * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
+ * type of "mtu": rt_mtu is u_long, ifnet.ifr_mtu is int, and
* nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one,
- * which is rt_rmx.rmx_mtu.
+ * which is rt_mtu.
*
* ifpp - XXX: just for statistics
*/
@@ -230,11 +290,11 @@
struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
struct ifnet **ifpp, struct inpcb *inp)
{
- struct ip6_hdr *ip6, *mhip6;
+ struct ip6_hdr *ip6;
struct ifnet *ifp, *origifp;
struct mbuf *m = m0;
struct mbuf *mprev = NULL;
- int hlen, tlen, len, off;
+ int hlen, tlen, len;
struct route_in6 ip6route;
struct rtentry *rt = NULL;
struct sockaddr_in6 *dst, src_sa, dst_sa;
@@ -249,16 +309,9 @@
u_int32_t zone;
struct route_in6 *ro_pmtu = NULL;
int hdrsplit = 0;
- int needipsec = 0;
int sw_csum, tso;
-#ifdef IPSEC
- struct ipsec_output_state state;
- struct ip6_rthdr *rh = NULL;
- int needipsectun = 0;
- int segleft_org = 0;
- struct secpolicy *sp = NULL;
-#endif /* IPSEC */
struct m_tag *fwd_tag = NULL;
+ uint32_t id;
ip6 = mtod(m, struct ip6_hdr *);
if (ip6 == NULL) {
@@ -299,26 +352,12 @@
* IPSec checking which handles several cases.
* FAST IPSEC: We re-injected the packet.
*/
- switch(ip6_ipsec_output(&m, inp, &flags, &error, &ifp, &sp))
+ switch(ip6_ipsec_output(&m, inp, &flags, &error, &ifp))
{
case 1: /* Bad packet */
goto freehdrs;
- case -1: /* Do IPSec */
- needipsec = 1;
- /*
- * Do delayed checksums now, as we may send before returning.
- */
- if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
- plen = m->m_pkthdr.len - sizeof(*ip6);
- in6_delayed_cksum(m, plen, sizeof(struct ip6_hdr));
- m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
- }
-#ifdef SCTP
- if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) {
- sctp_delayed_cksum(m, sizeof(struct ip6_hdr));
- m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
- }
-#endif
+ case -1: /* IPSec done */
+ goto done;
case 0: /* No IPSec */
default:
break;
@@ -338,15 +377,15 @@
optlen += exthdrs.ip6e_rthdr->m_len;
unfragpartlen = optlen + sizeof(struct ip6_hdr);
- /* NOTE: we don't add AH/ESP length here. do that later. */
+ /* NOTE: we don't add AH/ESP length here (done in ip6_ipsec_output) */
if (exthdrs.ip6e_dest2)
optlen += exthdrs.ip6e_dest2->m_len;
/*
- * If we need IPsec, or there is at least one extension header,
+ * If there is at least one extension header,
* separate IP6 header from the payload.
*/
- if ((needipsec || optlen) && !hdrsplit) {
+ if (optlen && !hdrsplit) {
if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
m = NULL;
goto freehdrs;
@@ -421,73 +460,7 @@
MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
IPPROTO_ROUTING);
-#ifdef IPSEC
- if (!needipsec)
- goto skip_ipsec2;
-
/*
- * pointers after IPsec headers are not valid any more.
- * other pointers need a great care too.
- * (IPsec routines should not mangle mbufs prior to AH/ESP)
- */
- exthdrs.ip6e_dest2 = NULL;
-
- if (exthdrs.ip6e_rthdr) {
- rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
- segleft_org = rh->ip6r_segleft;
- rh->ip6r_segleft = 0;
- }
-
- bzero(&state, sizeof(state));
- state.m = m;
- error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
- &needipsectun);
- m = state.m;
- if (error == EJUSTRETURN) {
- /*
- * We had a SP with a level of 'use' and no SA. We
- * will just continue to process the packet without
- * IPsec processing.
- */
- ;
- } else if (error) {
- /* mbuf is already reclaimed in ipsec6_output_trans. */
- m = NULL;
- switch (error) {
- case EHOSTUNREACH:
- case ENETUNREACH:
- case EMSGSIZE:
- case ENOBUFS:
- case ENOMEM:
- break;
- default:
- printf("[%s:%d] (ipsec): error code %d\n",
- __func__, __LINE__, error);
- /* FALLTHROUGH */
- case ENOENT:
- /* don't show these error codes to the user */
- error = 0;
- break;
- }
- goto bad;
- } else if (!needipsectun) {
- /*
- * In the FAST IPSec case we have already
- * re-injected the packet and it has been freed
- * by the ipsec_done() function. So, just clean
- * up after ourselves.
- */
- m = NULL;
- goto done;
- }
- if (exthdrs.ip6e_rthdr) {
- /* ah6_output doesn't modify mbuf chain */
- rh->ip6r_segleft = segleft_org;
- }
-skip_ipsec2:;
-#endif /* IPSEC */
-
- /*
* If there is a routing header, discard the packet.
*/
if (exthdrs.ip6e_rthdr) {
@@ -522,19 +495,8 @@
ro = &opt->ip6po_route;
dst = (struct sockaddr_in6 *)&ro->ro_dst;
#ifdef FLOWTABLE
- if (ro->ro_rt == NULL) {
- struct flentry *fle;
-
- /*
- * The flow table returns route entries valid for up to 30
- * seconds; we rely on the remainder of ip_output() taking no
- * longer than that long for the stability of ro_rt. The
- * flow ID assignment must have happened before this point.
- */
- fle = flowtable_lookup_mbuf(V_ip6_ft, m, AF_INET6);
- if (fle != NULL)
- flow_to_route_in6(fle, ro);
- }
+ if (ro->ro_rt == NULL)
+ (void )flowtable_lookup(AF_INET6, m, (struct route *)ro);
#endif
again:
/*
@@ -563,77 +525,6 @@
ip6->ip6_hlim = V_ip6_defmcasthlim;
}
-#ifdef IPSEC
- /*
- * We may re-inject packets into the stack here.
- */
- if (needipsec && needipsectun) {
- struct ipsec_output_state state;
-
- /*
- * All the extension headers will become inaccessible
- * (since they can be encrypted).
- * Don't panic, we need no more updates to extension headers
- * on inner IPv6 packet (since they are now encapsulated).
- *
- * IPv6 [ESP|AH] IPv6 [extension headers] payload
- */
- bzero(&exthdrs, sizeof(exthdrs));
- exthdrs.ip6e_ip6 = m;
-
- bzero(&state, sizeof(state));
- state.m = m;
- state.ro = (struct route *)ro;
- state.dst = (struct sockaddr *)dst;
-
- error = ipsec6_output_tunnel(&state, sp, flags);
-
- m = state.m;
- ro = (struct route_in6 *)state.ro;
- dst = (struct sockaddr_in6 *)state.dst;
- if (error == EJUSTRETURN) {
- /*
- * We had a SP with a level of 'use' and no SA. We
- * will just continue to process the packet without
- * IPsec processing.
- */
- ;
- } else if (error) {
- /* mbuf is already reclaimed in ipsec6_output_tunnel. */
- m0 = m = NULL;
- m = NULL;
- switch (error) {
- case EHOSTUNREACH:
- case ENETUNREACH:
- case EMSGSIZE:
- case ENOBUFS:
- case ENOMEM:
- break;
- default:
- printf("[%s:%d] (ipsec): error code %d\n",
- __func__, __LINE__, error);
- /* FALLTHROUGH */
- case ENOENT:
- /* don't show these error codes to the user */
- error = 0;
- break;
- }
- goto bad;
- } else {
- /*
- * In the FAST IPSec case we have already
- * re-injected the packet and it has been freed
- * by the ipsec_done() function. So, just clean
- * up after ourselves.
- */
- m = NULL;
- goto done;
- }
-
- exthdrs.ip6e_ip6 = m;
- }
-#endif /* IPSEC */
-
/* adjust pointer */
ip6 = mtod(m, struct ip6_hdr *);
@@ -672,7 +563,7 @@
}
if (rt != NULL) {
ia = (struct in6_ifaddr *)(rt->rt_ifa);
- rt->rt_use++;
+ counter_u64_add(rt->rt_pksent, 1);
}
@@ -775,9 +666,7 @@
/*
* XXX: ip6_mforward expects that rcvif is NULL
* when it is called from the originating path.
- * However, it is not always the case, since
- * some versions of MGETHDR() does not
- * initialize the field.
+ * However, it may not always be the case.
*/
m->m_pkthdr.rcvif = NULL;
if (ip6_mforward(ip6, ifp, m) != 0) {
@@ -997,19 +886,12 @@
* Even if the DONTFRAG option is specified, we cannot send the
* packet when the data length is larger than the MTU of the
* outgoing interface.
- * Notify the error by sending IPV6_PATHMTU ancillary data as
- * well as returning an error code (the latter is not described
- * in the API spec.)
+ * Notify the error by sending IPV6_PATHMTU ancillary data if
+ * application wanted to know the MTU value. Also return an
+ * error code (this is not described in the API spec).
*/
- u_int32_t mtu32;
- struct ip6ctlparam ip6cp;
-
- mtu32 = (u_int32_t)mtu;
- bzero(&ip6cp, sizeof(ip6cp));
- ip6cp.ip6c_cmdarg = (void *)&mtu32;
- pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
- (void *)&ip6cp);
-
+ if (inp != NULL)
+ ip6_notify_pmtu(inp, &dst_sa, (u_int32_t)mtu);
error = EMSGSIZE;
goto bad;
}
@@ -1046,9 +928,6 @@
in6_ifstat_inc(ifp, ifs6_out_fragfail);
goto bad;
} else {
- struct mbuf **mnext, *m_frgpart;
- struct ip6_frag *ip6f;
- u_int32_t id = htonl(ip6_randomid());
u_char nextproto;
int qslots = ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len;
@@ -1096,8 +975,6 @@
m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
}
#endif
- mnext = &m->m_nextpkt;
-
/*
* Change the next header field of the last header in the
* unfragmentable part.
@@ -1122,47 +999,9 @@
* chain.
*/
m0 = m;
- for (off = hlen; off < tlen; off += len) {
- MGETHDR(m, M_DONTWAIT, MT_HEADER);
- if (!m) {
- error = ENOBUFS;
- IP6STAT_INC(ip6s_odropped);
- goto sendorfree;
- }
- m->m_pkthdr.rcvif = NULL;
- m->m_flags = m0->m_flags & M_COPYFLAGS; /* incl. FIB */
- *mnext = m;
- mnext = &m->m_nextpkt;
- m->m_data += max_linkhdr;
- mhip6 = mtod(m, struct ip6_hdr *);
- *mhip6 = *ip6;
- m->m_len = sizeof(*mhip6);
- error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
- if (error) {
- IP6STAT_INC(ip6s_odropped);
- goto sendorfree;
- }
- ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
- if (off + len >= tlen)
- len = tlen - off;
- else
- ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
- mhip6->ip6_plen = htons((u_short)(len + hlen +
- sizeof(*ip6f) - sizeof(struct ip6_hdr)));
- if ((m_frgpart = m_copy(m0, off, len)) == 0) {
- error = ENOBUFS;
- IP6STAT_INC(ip6s_odropped);
- goto sendorfree;
- }
- m_cat(m, m_frgpart);
- m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
- m->m_pkthdr.rcvif = NULL;
- ip6f->ip6f_reserved = 0;
- ip6f->ip6f_ident = id;
- ip6f->ip6f_nxt = nextproto;
- IP6STAT_INC(ip6s_ofragments);
- in6_ifstat_inc(ifp, ifs6_out_fragcreat);
- }
+ id = htonl(ip6_randomid());
+ if ((error = ip6_fragment(ifp, m, hlen, nextproto, len, id)))
+ goto sendorfree;
in6_ifstat_inc(ifp, ifs6_out_fragok);
}
@@ -1196,11 +1035,6 @@
RO_RTFREE(ro);
if (ro_pmtu == &ip6route)
RO_RTFREE(ro_pmtu);
-#ifdef IPSEC
- if (sp != NULL)
- KEY_FREESP(&sp);
-#endif
-
return (error);
freehdrs:
@@ -1223,17 +1057,12 @@
if (hlen > MCLBYTES)
return (ENOBUFS); /* XXX */
- MGET(m, M_DONTWAIT, MT_DATA);
- if (!m)
+ if (hlen > MLEN)
+ m = m_getcl(M_NOWAIT, MT_DATA, 0);
+ else
+ m = m_get(M_NOWAIT, MT_DATA);
+ if (m == NULL)
return (ENOBUFS);
-
- if (hlen > MLEN) {
- MCLGET(m, M_DONTWAIT);
- if ((m->m_flags & M_EXT) == 0) {
- m_free(m);
- return (ENOBUFS);
- }
- }
m->m_len = hlen;
if (hdr)
bcopy(hdr, mtod(m, caddr_t), hlen);
@@ -1261,8 +1090,8 @@
* Otherwise, use it to store the options.
*/
if (exthdrs->ip6e_hbh == 0) {
- MGET(mopt, M_DONTWAIT, MT_DATA);
- if (mopt == 0)
+ mopt = m_get(M_NOWAIT, MT_DATA);
+ if (mopt == NULL)
return (ENOBUFS);
mopt->m_len = JUMBOOPTLEN;
optbuf = mtod(mopt, u_char *);
@@ -1293,15 +1122,8 @@
* As a consequence, we must always prepare a cluster
* at this point.
*/
- MGET(n, M_DONTWAIT, MT_DATA);
- if (n) {
- MCLGET(n, M_DONTWAIT);
- if ((n->m_flags & M_EXT) == 0) {
- m_freem(n);
- n = NULL;
- }
- }
- if (!n)
+ n = m_getcl(M_NOWAIT, MT_DATA, 0);
+ if (n == NULL)
return (ENOBUFS);
n->m_len = oldoptlen + JUMBOOPTLEN;
bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
@@ -1348,7 +1170,7 @@
if (hlen > sizeof(struct ip6_hdr)) {
n = m_copym(m0, sizeof(struct ip6_hdr),
- hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
+ hlen - sizeof(struct ip6_hdr), M_NOWAIT);
if (n == 0)
return (ENOBUFS);
m->m_next = n;
@@ -1370,8 +1192,8 @@
/* allocate a new mbuf for the fragment header */
struct mbuf *mfrg;
- MGET(mfrg, M_DONTWAIT, MT_DATA);
- if (mfrg == 0)
+ mfrg = m_get(M_NOWAIT, MT_DATA);
+ if (mfrg == NULL)
return (ENOBUFS);
mfrg->m_len = sizeof(struct ip6_frag);
*frghdrp = mtod(mfrg, struct ip6_frag *);
@@ -1422,9 +1244,9 @@
ifmtu = IN6_LINKMTU(ifp);
mtu = tcp_hc_getmtu(&inc);
if (mtu)
- mtu = min(mtu, ro_pmtu->ro_rt->rt_rmx.rmx_mtu);
+ mtu = min(mtu, ro_pmtu->ro_rt->rt_mtu);
else
- mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
+ mtu = ro_pmtu->ro_rt->rt_mtu;
if (mtu == 0)
mtu = ifmtu;
else if (mtu < IPV6_MMTU) {
@@ -1448,7 +1270,7 @@
* field isn't locked).
*/
mtu = ifmtu;
- ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
+ ro_pmtu->ro_rt->rt_mtu = mtu;
}
} else if (ifp) {
mtu = IN6_LINKMTU(ifp);
@@ -1492,13 +1314,10 @@
switch (sopt->sopt_name) {
case SO_REUSEADDR:
INP_WLOCK(in6p);
- if (IN_MULTICAST(ntohl(in6p->inp_laddr.s_addr))) {
- if ((so->so_options &
- (SO_REUSEADDR | SO_REUSEPORT)) != 0)
- in6p->inp_flags2 |= INP_REUSEPORT;
- else
- in6p->inp_flags2 &= ~INP_REUSEPORT;
- }
+ if ((so->so_options & SO_REUSEADDR) != 0)
+ in6p->inp_flags2 |= INP_REUSEADDR;
+ else
+ in6p->inp_flags2 &= ~INP_REUSEADDR;
INP_WUNLOCK(in6p);
error = 0;
break;
@@ -3016,14 +2835,6 @@
if (copym == NULL)
return;
}
-
-#ifdef DIAGNOSTIC
- if (copym->m_len < sizeof(*ip6)) {
- m_freem(copym);
- return;
- }
-#endif
-
ip6 = mtod(copym, struct ip6_hdr *);
/*
* clear embedded scope identifiers if necessary.
@@ -3031,7 +2842,11 @@
*/
in6_clearscope(&ip6->ip6_src);
in6_clearscope(&ip6->ip6_dst);
-
+ if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
+ copym->m_pkthdr.csum_flags |= CSUM_DATA_VALID_IPV6 |
+ CSUM_PSEUDO_HDR;
+ copym->m_pkthdr.csum_data = 0xffff;
+ }
(void)if_simloop(ifp, copym, dst->sin6_family, 0);
}
@@ -3046,12 +2861,12 @@
ip6 = mtod(m, struct ip6_hdr *);
if (m->m_len > sizeof(*ip6)) {
- MGETHDR(mh, M_DONTWAIT, MT_HEADER);
- if (mh == 0) {
+ mh = m_gethdr(M_NOWAIT, MT_DATA);
+ if (mh == NULL) {
m_freem(m);
return ENOBUFS;
}
- M_MOVE_PKTHDR(mh, m);
+ m_move_pkthdr(mh, m);
MH_ALIGN(mh, sizeof(*ip6));
m->m_len -= sizeof(*ip6);
m->m_data += sizeof(*ip6);
Modified: trunk/sys/netinet6/ip6_var.h
===================================================================
--- trunk/sys/netinet6/ip6_var.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/ip6_var.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -59,7 +59,7 @@
* SUCH DAMAGE.
*
* @(#)ip_var.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet6/ip6_var.h 244524 2012-12-21 00:41:52Z delphij $
+ * $FreeBSD: stable/10/sys/netinet6/ip6_var.h 329158 2018-02-12 13:52:58Z ae $
*/
#ifndef _NETINET6_IP6_VAR_H_
@@ -182,39 +182,39 @@
*/
struct ip6stat {
- u_quad_t ip6s_total; /* total packets received */
- u_quad_t ip6s_tooshort; /* packet too short */
- u_quad_t ip6s_toosmall; /* not enough data */
- u_quad_t ip6s_fragments; /* fragments received */
- u_quad_t ip6s_fragdropped; /* frags dropped(dups, out of space) */
- u_quad_t ip6s_fragtimeout; /* fragments timed out */
- u_quad_t ip6s_fragoverflow; /* fragments that exceeded limit */
- u_quad_t ip6s_forward; /* packets forwarded */
- u_quad_t ip6s_cantforward; /* packets rcvd for unreachable dest */
- u_quad_t ip6s_redirectsent; /* packets forwarded on same net */
- u_quad_t ip6s_delivered; /* datagrams delivered to upper level*/
- u_quad_t ip6s_localout; /* total ip packets generated here */
- u_quad_t ip6s_odropped; /* lost packets due to nobufs, etc. */
- u_quad_t ip6s_reassembled; /* total packets reassembled ok */
- u_quad_t ip6s_fragmented; /* datagrams successfully fragmented */
- u_quad_t ip6s_ofragments; /* output fragments created */
- u_quad_t ip6s_cantfrag; /* don't fragment flag was set, etc. */
- u_quad_t ip6s_badoptions; /* error in option processing */
- u_quad_t ip6s_noroute; /* packets discarded due to no route */
- u_quad_t ip6s_badvers; /* ip6 version != 6 */
- u_quad_t ip6s_rawout; /* total raw ip packets generated */
- u_quad_t ip6s_badscope; /* scope error */
- u_quad_t ip6s_notmember; /* don't join this multicast group */
+ uint64_t ip6s_total; /* total packets received */
+ uint64_t ip6s_tooshort; /* packet too short */
+ uint64_t ip6s_toosmall; /* not enough data */
+ uint64_t ip6s_fragments; /* fragments received */
+ uint64_t ip6s_fragdropped; /* frags dropped(dups, out of space) */
+ uint64_t ip6s_fragtimeout; /* fragments timed out */
+ uint64_t ip6s_fragoverflow; /* fragments that exceeded limit */
+ uint64_t ip6s_forward; /* packets forwarded */
+ uint64_t ip6s_cantforward; /* packets rcvd for unreachable dest */
+ uint64_t ip6s_redirectsent; /* packets forwarded on same net */
+ uint64_t ip6s_delivered; /* datagrams delivered to upper level*/
+ uint64_t ip6s_localout; /* total ip packets generated here */
+ uint64_t ip6s_odropped; /* lost packets due to nobufs, etc. */
+ uint64_t ip6s_reassembled; /* total packets reassembled ok */
+ uint64_t ip6s_fragmented; /* datagrams successfully fragmented */
+ uint64_t ip6s_ofragments; /* output fragments created */
+ uint64_t ip6s_cantfrag; /* don't fragment flag was set, etc. */
+ uint64_t ip6s_badoptions; /* error in option processing */
+ uint64_t ip6s_noroute; /* packets discarded due to no route */
+ uint64_t ip6s_badvers; /* ip6 version != 6 */
+ uint64_t ip6s_rawout; /* total raw ip packets generated */
+ uint64_t ip6s_badscope; /* scope error */
+ uint64_t ip6s_notmember; /* don't join this multicast group */
#define IP6S_HDRCNT 256 /* headers count */
- u_quad_t ip6s_nxthist[IP6S_HDRCNT]; /* next header history */
- u_quad_t ip6s_m1; /* one mbuf */
+ uint64_t ip6s_nxthist[IP6S_HDRCNT]; /* next header history */
+ uint64_t ip6s_m1; /* one mbuf */
#define IP6S_M2MMAX 32
- u_quad_t ip6s_m2m[IP6S_M2MMAX]; /* two or more mbuf */
- u_quad_t ip6s_mext1; /* one ext mbuf */
- u_quad_t ip6s_mext2m; /* two or more ext mbuf */
- u_quad_t ip6s_exthdrtoolong; /* ext hdr are not contiguous */
- u_quad_t ip6s_nogif; /* no match gif found */
- u_quad_t ip6s_toomanyhdr; /* discarded due to too many headers */
+ uint64_t ip6s_m2m[IP6S_M2MMAX]; /* two or more mbuf */
+ uint64_t ip6s_mext1; /* one ext mbuf */
+ uint64_t ip6s_mext2m; /* two or more ext mbuf */
+ uint64_t ip6s_exthdrtoolong; /* ext hdr are not contiguous */
+ uint64_t ip6s_nogif; /* no match gif found */
+ uint64_t ip6s_toomanyhdr; /* discarded due to too many headers */
/*
* statistics for improvement of the source address selection
@@ -224,31 +224,35 @@
#define IP6S_RULESMAX 16
#define IP6S_SCOPECNT 16
/* number of times that address selection fails */
- u_quad_t ip6s_sources_none;
+ uint64_t ip6s_sources_none;
/* number of times that an address on the outgoing I/F is chosen */
- u_quad_t ip6s_sources_sameif[IP6S_SCOPECNT];
+ uint64_t ip6s_sources_sameif[IP6S_SCOPECNT];
/* number of times that an address on a non-outgoing I/F is chosen */
- u_quad_t ip6s_sources_otherif[IP6S_SCOPECNT];
+ uint64_t ip6s_sources_otherif[IP6S_SCOPECNT];
/*
* number of times that an address that has the same scope
* from the destination is chosen.
*/
- u_quad_t ip6s_sources_samescope[IP6S_SCOPECNT];
+ uint64_t ip6s_sources_samescope[IP6S_SCOPECNT];
/*
* number of times that an address that has a different scope
* from the destination is chosen.
*/
- u_quad_t ip6s_sources_otherscope[IP6S_SCOPECNT];
+ uint64_t ip6s_sources_otherscope[IP6S_SCOPECNT];
/* number of times that a deprecated address is chosen */
- u_quad_t ip6s_sources_deprecated[IP6S_SCOPECNT];
+ uint64_t ip6s_sources_deprecated[IP6S_SCOPECNT];
/* number of times that each rule of source selection is applied. */
- u_quad_t ip6s_sources_rule[IP6S_RULESMAX];
+ uint64_t ip6s_sources_rule[IP6S_RULESMAX];
};
#ifdef _KERNEL
-#define IP6STAT_ADD(name, val) V_ip6stat.name += (val)
-#define IP6STAT_SUB(name, val) V_ip6stat.name -= (val)
+#include <sys/counter.h>
+
+VNET_PCPUSTAT_DECLARE(struct ip6stat, ip6stat);
+#define IP6STAT_ADD(name, val) \
+ VNET_PCPUSTAT_ADD(struct ip6stat, ip6stat, name, (val))
+#define IP6STAT_SUB(name, val) IP6STAT_ADD(name, -(val))
#define IP6STAT_INC(name) IP6STAT_ADD(name, 1)
#define IP6STAT_DEC(name) IP6STAT_SUB(name, 1)
#endif
@@ -290,8 +294,6 @@
#define IPV6_FORWARDING 0x02 /* most of IPv6 header exists */
#define IPV6_MINMTU 0x04 /* use minimum MTU (IPV6_USE_MIN_MTU) */
-#define M_IP6_NEXTHOP M_PROTO7 /* explicit ip nexthop */
-
#ifdef __NO_STRICT_ALIGNMENT
#define IP6_HDR_ALIGNED_P(ip) 1
#else
@@ -298,7 +300,6 @@
#define IP6_HDR_ALIGNED_P(ip) ((((intptr_t) (ip)) & 3) == 0)
#endif
-VNET_DECLARE(struct ip6stat, ip6stat); /* statistics */
VNET_DECLARE(int, ip6_defhlim); /* default hop limit */
VNET_DECLARE(int, ip6_defmcasthlim); /* default multicast hop limit */
VNET_DECLARE(int, ip6_forwarding); /* act as router? */
@@ -307,7 +308,6 @@
* walk list every 5 sec. */
VNET_DECLARE(int, ip6_mcast_pmtu); /* enable pMTU discovery for multicast? */
VNET_DECLARE(int, ip6_v6only);
-#define V_ip6stat VNET(ip6stat)
#define V_ip6_defhlim VNET(ip6_defhlim)
#define V_ip6_defmcasthlim VNET(ip6_defmcasthlim)
#define V_ip6_forwarding VNET(ip6_forwarding)
@@ -391,7 +391,7 @@
void ip6_freepcbopts(struct ip6_pktopts *);
int ip6_unknown_opt(u_int8_t *, struct mbuf *, int);
-char * ip6_get_prevhdr(struct mbuf *, int);
+int ip6_get_prevhdr(struct mbuf *, int);
int ip6_nexthdr(struct mbuf *, int, int, int *);
int ip6_lasthdr(struct mbuf *, int, int, int *);
@@ -407,8 +407,7 @@
struct mbuf **ip6_savecontrol_v4(struct inpcb *, struct mbuf *,
struct mbuf **, int *);
void ip6_savecontrol(struct inpcb *, struct mbuf *, struct mbuf **);
-void ip6_notify_pmtu(struct inpcb *, struct sockaddr_in6 *,
- u_int32_t *);
+void ip6_notify_pmtu(struct inpcb *, struct sockaddr_in6 *, u_int32_t);
int ip6_sysctl(int *, u_int, void *, size_t *, void *, size_t);
void ip6_forward(struct mbuf *, int);
@@ -427,6 +426,9 @@
void ip6_clearpktopts(struct ip6_pktopts *, int);
struct ip6_pktopts *ip6_copypktopts(struct ip6_pktopts *, int);
int ip6_optlen(struct inpcb *);
+int ip6_deletefraghdr(struct mbuf *, int, int);
+int ip6_fragment(struct ifnet *, struct mbuf *, int, u_char, int,
+ uint32_t);
int route6_input(struct mbuf **, int *, int);
@@ -457,6 +459,7 @@
struct rtentry **, u_int);
u_int32_t ip6_randomid(void);
u_int32_t ip6_randomflowlabel(void);
+void in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset);
#endif /* _KERNEL */
#endif /* !_NETINET6_IP6_VAR_H_ */
Modified: trunk/sys/netinet6/ip6protosw.h
===================================================================
--- trunk/sys/netinet6/ip6protosw.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/ip6protosw.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -60,7 +60,7 @@
*
* @(#)protosw.h 8.1 (Berkeley) 6/2/93
* BSDI protosw.h,v 2.3 1996/10/11 16:02:40 pjd Exp
- * $FreeBSD: stable/9/sys/netinet6/ip6protosw.h 244524 2012-12-21 00:41:52Z delphij $
+ * $FreeBSD: stable/10/sys/netinet6/ip6protosw.h 241916 2012-10-22 21:49:56Z delphij $
*/
#ifndef _NETINET6_IP6PROTOSW_H_
Modified: trunk/sys/netinet6/mld6.c
===================================================================
--- trunk/sys/netinet6/mld6.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/mld6.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -65,7 +65,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/mld6.c 249132 2013-04-05 08:22:11Z mav $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/mld6.c 291987 2015-12-08 07:31:26Z ae $");
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -276,7 +276,7 @@
{
#ifdef VIMAGE
- m->m_pkthdr.header = ifp->if_vnet;
+ m->m_pkthdr.PH_loc.ptr = ifp->if_vnet;
#endif /* VIMAGE */
m->m_pkthdr.flowid = ifp->if_index;
}
@@ -285,7 +285,7 @@
mld_scrub_context(struct mbuf *m)
{
- m->m_pkthdr.header = NULL;
+ m->m_pkthdr.PH_loc.ptr = NULL;
m->m_pkthdr.flowid = 0;
}
@@ -301,7 +301,7 @@
{
#if defined(VIMAGE) && defined(INVARIANTS)
- KASSERT(curvnet == m->m_pkthdr.header,
+ KASSERT(curvnet == m->m_pkthdr.PH_loc.ptr,
("%s: called when curvnet was not restored", __func__));
#endif
return (m->m_pkthdr.flowid);
@@ -1800,13 +1800,13 @@
ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
/* ia may be NULL if link-local address is tentative. */
- MGETHDR(mh, M_DONTWAIT, MT_HEADER);
+ mh = m_gethdr(M_NOWAIT, MT_DATA);
if (mh == NULL) {
if (ia != NULL)
ifa_free(&ia->ia_ifa);
return (ENOMEM);
}
- MGET(md, M_DONTWAIT, MT_DATA);
+ md = m_get(M_NOWAIT, MT_DATA);
if (md == NULL) {
m_free(mh);
if (ia != NULL)
@@ -2448,9 +2448,9 @@
m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
if (!is_state_change && !is_group_query)
- m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
if (m == NULL)
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return (-ENOMEM);
@@ -2573,9 +2573,9 @@
CTR1(KTR_MLD, "%s: outbound queue full", __func__);
return (-ENOMEM);
}
- m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
if (m == NULL)
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return (-ENOMEM);
mld_save_context(m, ifp);
@@ -2727,9 +2727,9 @@
CTR1(KTR_MLD,
"%s: use previous packet", __func__);
} else {
- m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
if (m == NULL)
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL) {
CTR1(KTR_MLD,
"%s: m_get*() failed", __func__);
@@ -2990,6 +2990,15 @@
KASSERT(mli->mli_version == MLD_VERSION_2,
("%s: called when version %d", __func__, mli->mli_version));
+ /*
+ * Check that there are some packets queued. If so, send them first.
+ * For large number of groups the reply to general query can take
+ * many packets, we should finish sending them before starting of
+ * queuing the new reply.
+ */
+ if (mli->mli_gq.ifq_head != NULL)
+ goto send;
+
ifp = mli->mli_ifp;
IF_ADDR_RLOCK(ifp);
@@ -3025,6 +3034,7 @@
}
IF_ADDR_RUNLOCK(ifp);
+send:
mld_dispatch_queue(&mli->mli_gq, MLD_MAX_RESPONSE_BURST);
/*
@@ -3099,7 +3109,7 @@
}
mld_scrub_context(m0);
- m->m_flags &= ~(M_PROTOFLAGS);
+ m_clrprotoflags(m);
m0->m_pkthdr.rcvif = V_loif;
ip6 = mtod(m0, struct ip6_hdr *);
@@ -3174,7 +3184,7 @@
if (ia == NULL)
CTR1(KTR_MLD, "%s: warning: ia is NULL", __func__);
- MGETHDR(mh, M_DONTWAIT, MT_HEADER);
+ mh = m_gethdr(M_NOWAIT, MT_DATA);
if (mh == NULL) {
if (ia != NULL)
ifa_free(&ia->ia_ifa);
Modified: trunk/sys/netinet6/mld6.h
===================================================================
--- trunk/sys/netinet6/mld6.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/mld6.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -26,7 +26,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/netinet6/mld6.h 191666 2009-04-29 11:31:23Z bms $
+ * $FreeBSD: stable/10/sys/netinet6/mld6.h 191666 2009-04-29 11:31:23Z bms $
*/
#ifndef _NETINET6_MLD6_H_
Modified: trunk/sys/netinet6/mld6_var.h
===================================================================
--- trunk/sys/netinet6/mld6_var.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/mld6_var.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -26,7 +26,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/netinet6/mld6_var.h 200871 2009-12-22 20:40:22Z bms $
+ * $FreeBSD: stable/10/sys/netinet6/mld6_var.h 200871 2009-12-22 20:40:22Z bms $
*/
#ifndef _NETINET6_MLD6_VAR_H_
#define _NETINET6_MLD6_VAR_H_
Modified: trunk/sys/netinet6/nd6.c
===================================================================
--- trunk/sys/netinet6/nd6.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/nd6.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,10 +31,11 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/nd6.c 248852 2013-03-28 20:48:40Z emaste $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/nd6.c 303458 2016-07-28 20:08:01Z sbruno $");
#include "opt_inet.h"
#include "opt_inet6.h"
+#include "opt_kdtrace.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -51,6 +52,7 @@
#include <sys/lock.h>
#include <sys/rwlock.h>
#include <sys/queue.h>
+#include <sys/sdt.h>
#include <sys/sysctl.h>
#include <net/if.h>
@@ -63,6 +65,7 @@
#include <net/vnet.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <net/if_llatbl.h>
#define L3_ADDR_SIN6(le) ((struct sockaddr_in6 *) L3_ADDR(le))
#include <netinet/if_ether.h>
@@ -82,7 +85,7 @@
#define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
#define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
-#define SIN6(s) ((struct sockaddr_in6 *)s)
+#define SIN6(s) ((const struct sockaddr_in6 *)(s))
/* timer values */
VNET_DEFINE(int, nd6_prune) = 1; /* walk list every 1 seconds */
@@ -110,19 +113,13 @@
VNET_DEFINE(int, nd6_debug) = 0;
#endif
-/* for debugging? */
-#if 0
-static int nd6_inuse, nd6_allocated;
-#endif
-
VNET_DEFINE(struct nd_drhead, nd_defrouter);
VNET_DEFINE(struct nd_prhead, nd_prefix);
+VNET_DEFINE(struct rwlock, nd6_lock);
VNET_DEFINE(int, nd6_recalc_reachtm_interval) = ND6_RECALC_REACHTM_INTERVAL;
#define V_nd6_recalc_reachtm_interval VNET(nd6_recalc_reachtm_interval)
-static struct sockaddr_in6 all1_sa;
-
int (*send_sendso_input_hook)(struct mbuf *, struct ifnet *, int, int);
static int nd6_is_new_addr_neighbor(struct sockaddr_in6 *,
@@ -130,9 +127,13 @@
static void nd6_setmtu0(struct ifnet *, struct nd_ifinfo *);
static void nd6_slowtimo(void *);
static int regen_tmpaddr(struct in6_ifaddr *);
-static struct llentry *nd6_free(struct llentry *, int);
+static struct llentry *nd6_free(struct llentry **, int);
static void nd6_llinfo_timer(void *);
static void clear_llinfo_pqueue(struct llentry *);
+static int nd6_output_lle(struct ifnet *, struct ifnet *, struct mbuf *,
+ struct sockaddr_in6 *);
+static int nd6_output_ifp(struct ifnet *, struct ifnet *, struct mbuf *,
+ struct sockaddr_in6 *);
static VNET_DEFINE(struct callout, nd6_slowtimo_ch);
#define V_nd6_slowtimo_ch VNET(nd6_slowtimo_ch)
@@ -142,15 +143,11 @@
void
nd6_init(void)
{
- int i;
+ rw_init(&V_nd6_lock, "nd6");
+
LIST_INIT(&V_nd_prefix);
- all1_sa.sin6_family = AF_INET6;
- all1_sa.sin6_len = sizeof(struct sockaddr_in6);
- for (i = 0; i < sizeof(all1_sa.sin6_addr); i++)
- all1_sa.sin6_addr.s6_addr[i] = 0xff;
-
/* initialization of the default router list */
TAILQ_INIT(&V_nd_defrouter);
@@ -158,6 +155,8 @@
callout_init(&V_nd6_slowtimo_ch, 0);
callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
nd6_slowtimo, curvnet);
+
+ nd6_dad_init();
}
#ifdef VIMAGE
@@ -167,6 +166,7 @@
callout_drain(&V_nd6_slowtimo_ch);
callout_drain(&V_nd6_timer_ch);
+ rw_destroy(&V_nd6_lock);
}
#endif
@@ -175,7 +175,7 @@
{
struct nd_ifinfo *nd;
- nd = (struct nd_ifinfo *)malloc(sizeof(*nd), M_IP6NDP, M_WAITOK|M_ZERO);
+ nd = malloc(sizeof(*nd), M_IP6NDP, M_WAITOK | M_ZERO);
nd->initialized = 1;
nd->chlim = IPV6_DEFHLIM;
@@ -185,13 +185,25 @@
nd->flags = ND6_IFF_PERFORMNUD;
- /* A loopback interface always has ND6_IFF_AUTO_LINKLOCAL. */
- if (V_ip6_auto_linklocal || (ifp->if_flags & IFF_LOOPBACK))
+ /* A loopback interface always has ND6_IFF_AUTO_LINKLOCAL.
+ * XXXHRS: Clear ND6_IFF_AUTO_LINKLOCAL on an IFT_BRIDGE interface by
+ * default regardless of the V_ip6_auto_linklocal configuration to
+ * give a reasonable default behavior.
+ */
+ if ((V_ip6_auto_linklocal && ifp->if_type != IFT_BRIDGE) ||
+ (ifp->if_flags & IFF_LOOPBACK))
nd->flags |= ND6_IFF_AUTO_LINKLOCAL;
-
- /* A loopback interface does not need to accept RTADV. */
- if (V_ip6_accept_rtadv && !(ifp->if_flags & IFF_LOOPBACK))
- nd->flags |= ND6_IFF_ACCEPT_RTADV;
+ /*
+ * A loopback interface does not need to accept RTADV.
+ * XXXHRS: Clear ND6_IFF_ACCEPT_RTADV on an IFT_BRIDGE interface by
+ * default regardless of the V_ip6_accept_rtadv configuration to
+ * prevent the interface from accepting RA messages arrived
+ * on one of the member interfaces with ND6_IFF_ACCEPT_RTADV.
+ */
+ if (V_ip6_accept_rtadv &&
+ !(ifp->if_flags & IFF_LOOPBACK) &&
+ (ifp->if_type != IFT_BRIDGE))
+ nd->flags |= ND6_IFF_ACCEPT_RTADV;
if (V_ip6_no_radr && !(ifp->if_flags & IFF_LOOPBACK))
nd->flags |= ND6_IFF_NO_RADR;
@@ -215,6 +227,8 @@
void
nd6_setmtu(struct ifnet *ifp)
{
+ if (ifp->if_afdata[AF_INET6] == NULL)
+ return;
nd6_setmtu0(ifp, ND_IFINFO(ifp));
}
@@ -359,6 +373,7 @@
case ND_OPT_TARGET_LINKADDR:
case ND_OPT_MTU:
case ND_OPT_REDIRECTED_HEADER:
+ case ND_OPT_NONCE:
if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
nd6log((LOG_INFO,
"duplicated ND6 option found (type=%d)\n",
@@ -425,7 +440,7 @@
ln->ln_ntick = 0;
canceled = callout_stop(&ln->ln_timer_ch);
} else {
- ln->la_expire = time_second + tick / hz;
+ ln->la_expire = time_uptime + tick / hz;
LLE_ADDREF(ln);
if (tick > INT_MAX) {
ln->ln_ntick = tick - INT_MAX;
@@ -456,15 +471,38 @@
struct llentry *ln;
struct in6_addr *dst;
struct ifnet *ifp;
- struct nd_ifinfo *ndi = NULL;
+ struct nd_ifinfo *ndi;
KASSERT(arg != NULL, ("%s: arg NULL", __func__));
ln = (struct llentry *)arg;
- LLE_WLOCK_ASSERT(ln);
ifp = ln->lle_tbl->llt_ifp;
-
CURVNET_SET(ifp->if_vnet);
+ ND6_RLOCK();
+ LLE_WLOCK(ln);
+ if (callout_pending(&ln->la_timer)) {
+ /*
+ * Here we are a bit odd here in the treatment of
+ * active/pending. If the pending bit is set, it got
+ * rescheduled before I ran. The active
+ * bit we ignore, since if it was stopped
+ * in ll_tablefree() and was currently running
+ * it would have return 0 so the code would
+ * not have deleted it since the callout could
+ * not be stopped so we want to go through
+ * with the delete here now. If the callout
+ * was restarted, the pending bit will be back on and
+ * we just want to bail since the callout_reset would
+ * return 1 and our reference would have been removed
+ * by nd6_llinfo_settimer_locked above since canceled
+ * would have been 1.
+ */
+ LLE_WUNLOCK(ln);
+ ND6_RUNLOCK();
+ CURVNET_RESTORE();
+ return;
+ }
+
if (ln->ln_ntick > 0) {
if (ln->ln_ntick > INT_MAX) {
ln->ln_ntick -= INT_MAX;
@@ -483,8 +521,7 @@
}
if (ln->la_flags & LLE_DELETED) {
- (void)nd6_free(ln, 0);
- ln = NULL;
+ (void)nd6_free(&ln, 0);
goto done;
}
@@ -494,7 +531,7 @@
ln->la_asked++;
nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
LLE_WUNLOCK(ln);
- nd6_ns_output(ifp, NULL, dst, ln, 0);
+ nd6_ns_output(ifp, NULL, dst, ln, NULL);
LLE_WLOCK(ln);
} else {
struct mbuf *m = ln->la_hold;
@@ -510,8 +547,7 @@
ln->la_hold = m0;
clear_llinfo_pqueue(ln);
}
- (void)nd6_free(ln, 0);
- ln = NULL;
+ (void)nd6_free(&ln, 0);
if (m != NULL)
icmp6_error2(m, ICMP6_DST_UNREACH,
ICMP6_DST_UNREACH_ADDR, 0, ifp);
@@ -527,7 +563,8 @@
case ND6_LLINFO_STALE:
/* Garbage Collection(RFC 2461 5.3) */
if (!ND6_LLINFO_PERMANENT(ln)) {
- (void)nd6_free(ln, 1);
+ EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED);
+ (void)nd6_free(&ln, 1);
ln = NULL;
}
break;
@@ -539,7 +576,7 @@
ln->ln_state = ND6_LLINFO_PROBE;
nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
LLE_WUNLOCK(ln);
- nd6_ns_output(ifp, dst, dst, ln, 0);
+ nd6_ns_output(ifp, dst, dst, ln, NULL);
LLE_WLOCK(ln);
} else {
ln->ln_state = ND6_LLINFO_STALE; /* XXX */
@@ -551,11 +588,10 @@
ln->la_asked++;
nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
LLE_WUNLOCK(ln);
- nd6_ns_output(ifp, dst, dst, ln, 0);
+ nd6_ns_output(ifp, dst, dst, ln, NULL);
LLE_WLOCK(ln);
} else {
- (void)nd6_free(ln, 0);
- ln = NULL;
+ (void)nd6_free(&ln, 0);
}
break;
default:
@@ -563,8 +599,10 @@
__func__, ln->ln_state);
}
done:
- if (ln != NULL)
+ if (ln != NULL) {
+ ND6_RUNLOCK();
LLE_FREE_LOCKED(ln);
+ }
CURVNET_RESTORE();
}
@@ -576,7 +614,7 @@
nd6_timer(void *arg)
{
CURVNET_SET((struct vnet *) arg);
- int s;
+ struct nd_drhead drq;
struct nd_defrouter *dr, *ndr;
struct nd_prefix *pr, *npr;
struct in6_ifaddr *ia6, *nia6;
@@ -584,11 +622,18 @@
callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz,
nd6_timer, curvnet);
+ TAILQ_INIT(&drq);
+
/* expire default router list */
- s = splnet();
- TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) {
- if (dr->expire && dr->expire < time_second)
- defrtrlist_del(dr);
+ ND6_WLOCK();
+ TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr)
+ if (dr->expire && dr->expire < time_uptime)
+ defrouter_unlink(dr, &drq);
+ ND6_WUNLOCK();
+
+ while ((dr = TAILQ_FIRST(&drq)) != NULL) {
+ TAILQ_REMOVE(&drq, dr, dr_entry);
+ defrouter_del(dr);
}
/*
@@ -654,8 +699,31 @@
goto addrloop;
}
}
+ } else if ((ia6->ia6_flags & IN6_IFF_TENTATIVE) != 0) {
+ /*
+ * Schedule DAD for a tentative address. This happens
+ * if the interface was down or not running
+ * when the address was configured.
+ */
+ int delay;
+
+ delay = arc4random() %
+ (MAX_RTR_SOLICITATION_DELAY * hz);
+ nd6_dad_start((struct ifaddr *)ia6, delay);
} else {
/*
+ * Check status of the interface. If it is down,
+ * mark the address as tentative for future DAD.
+ */
+ if ((ia6->ia_ifp->if_flags & IFF_UP) == 0 ||
+ (ia6->ia_ifp->if_drv_flags & IFF_DRV_RUNNING)
+ == 0 ||
+ (ND_IFINFO(ia6->ia_ifp)->flags &
+ ND6_IFF_IFDISABLED) != 0) {
+ ia6->ia6_flags &= ~IN6_IFF_DUPLICATED;
+ ia6->ia6_flags |= IN6_IFF_TENTATIVE;
+ }
+ /*
* A new RA might have made a deprecated address
* preferred.
*/
@@ -671,7 +739,7 @@
* prefix is not necessary.
*/
if (pr->ndpr_vltime != ND6_INFINITE_LIFETIME &&
- time_second - pr->ndpr_lastupdate > pr->ndpr_vltime) {
+ time_uptime - pr->ndpr_lastupdate > pr->ndpr_vltime) {
/*
* address expiration and prefix expiration are
@@ -680,7 +748,6 @@
prelist_remove(pr);
}
}
- splx(s);
CURVNET_RESTORE();
}
@@ -732,11 +799,10 @@
* address with the prefix.
*/
if (!IFA6_IS_DEPRECATED(it6))
- public_ifa6 = it6;
-
- if (public_ifa6 != NULL)
- ifa_ref(&public_ifa6->ia_ifa);
+ public_ifa6 = it6;
}
+ if (public_ifa6 != NULL)
+ ifa_ref(&public_ifa6->ia_ifa);
IF_ADDR_RUNLOCK(ifp);
if (public_ifa6 != NULL) {
@@ -762,9 +828,12 @@
void
nd6_purge(struct ifnet *ifp)
{
+ struct nd_drhead drq;
struct nd_defrouter *dr, *ndr;
struct nd_prefix *pr, *npr;
+ TAILQ_INIT(&drq);
+
/*
* Nuke default router list entries toward ifp.
* We defer removal of default router list entries that is installed
@@ -771,22 +840,27 @@
* in the routing table, in order to keep additional side effects as
* small as possible.
*/
+ ND6_WLOCK();
TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) {
if (dr->installed)
continue;
-
if (dr->ifp == ifp)
- defrtrlist_del(dr);
+ defrouter_unlink(dr, &drq);
}
TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) {
if (!dr->installed)
continue;
-
if (dr->ifp == ifp)
- defrtrlist_del(dr);
+ defrouter_unlink(dr, &drq);
}
+ ND6_WUNLOCK();
+ while ((dr = TAILQ_FIRST(&drq)) != NULL) {
+ TAILQ_REMOVE(&drq, dr, dr_entry);
+ defrouter_del(dr);
+ }
+
/* Nuke prefix list entries toward ifp */
LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) {
if (pr->ndpr_ifp == ifp) {
@@ -995,14 +1069,28 @@
* that the change is safe.
*/
static struct llentry *
-nd6_free(struct llentry *ln, int gc)
+nd6_free(struct llentry **lnp, int gc)
{
- struct llentry *next;
+ struct ifnet *ifp;
+ struct llentry *ln, *next;
struct nd_defrouter *dr;
- struct ifnet *ifp;
+ ln = *lnp;
+ *lnp = NULL;
+
LLE_WLOCK_ASSERT(ln);
+ ND6_RLOCK_ASSERT();
+ ifp = ln->lle_tbl->llt_ifp;
+ if ((ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) != 0)
+ dr = defrouter_lookup_locked(&L3_ADDR_SIN6(ln)->sin6_addr, ifp);
+ else
+ dr = NULL;
+ ND6_RUNLOCK();
+
+ if ((ln->la_flags & LLE_DELETED) == 0)
+ EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED);
+
/*
* we used to have pfctlinput(PRC_HOSTDEAD) here.
* even though it is not harmful, it was not really necessary.
@@ -1011,11 +1099,7 @@
/* cancel timer */
nd6_llinfo_settimer_locked(ln, -1);
- ifp = ln->lle_tbl->llt_ifp;
-
if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
- dr = defrouter_lookup(&L3_ADDR_SIN6(ln)->sin6_addr, ifp);
-
if (dr != NULL && dr->expire &&
ln->ln_state == ND6_LLINFO_STALE && gc) {
/*
@@ -1030,9 +1114,9 @@
* XXX: the check for ln_state would be redundant,
* but we intentionally keep it just in case.
*/
- if (dr->expire > time_second)
+ if (dr->expire > time_uptime)
nd6_llinfo_settimer_locked(ln,
- (dr->expire - time_second) * hz);
+ (dr->expire - time_uptime) * hz);
else
nd6_llinfo_settimer_locked(ln,
(long)V_nd6_gctimer * hz);
@@ -1040,6 +1124,7 @@
next = LIST_NEXT(ln, lle_next);
LLE_REMREF(ln);
LLE_WUNLOCK(ln);
+ defrouter_rele(dr);
return (next);
}
@@ -1122,6 +1207,8 @@
IF_AFDATA_UNLOCK(ifp);
+ if (dr != NULL)
+ defrouter_rele(dr);
return (next);
}
@@ -1140,9 +1227,9 @@
return;
ifp = rt->rt_ifp;
- IF_AFDATA_LOCK(ifp);
+ IF_AFDATA_RLOCK(ifp);
ln = nd6_lookup(dst6, ND6_EXCLUSIVE, NULL);
- IF_AFDATA_UNLOCK(ifp);
+ IF_AFDATA_RUNLOCK(ifp);
if (ln == NULL)
return;
@@ -1200,12 +1287,13 @@
/*
* check for default route
*/
- if (IN6_ARE_ADDR_EQUAL(&in6addr_any,
- &SIN6(rt_key(rt))->sin6_addr)) {
-
+ if (IN6_ARE_ADDR_EQUAL(&in6addr_any,
+ &SIN6(rt_key(rt))->sin6_addr)) {
dr = defrouter_lookup(&gateway->sin6_addr, ifp);
- if (dr != NULL)
+ if (dr != NULL) {
dr->installed = 0;
+ defrouter_rele(dr);
+ }
}
break;
}
@@ -1215,100 +1303,14 @@
int
nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
{
- struct in6_drlist *drl = (struct in6_drlist *)data;
- struct in6_oprlist *oprl = (struct in6_oprlist *)data;
struct in6_ndireq *ndi = (struct in6_ndireq *)data;
struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data;
struct in6_ndifreq *ndif = (struct in6_ndifreq *)data;
- struct nd_defrouter *dr;
- struct nd_prefix *pr;
- int i = 0, error = 0;
- int s;
+ int error = 0;
+ if (ifp->if_afdata[AF_INET6] == NULL)
+ return (EPFNOSUPPORT);
switch (cmd) {
- case SIOCGDRLST_IN6:
- /*
- * obsolete API, use sysctl under net.inet6.icmp6
- */
- bzero(drl, sizeof(*drl));
- s = splnet();
- TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
- if (i >= DRLSTSIZ)
- break;
- drl->defrouter[i].rtaddr = dr->rtaddr;
- in6_clearscope(&drl->defrouter[i].rtaddr);
-
- drl->defrouter[i].flags = dr->flags;
- drl->defrouter[i].rtlifetime = dr->rtlifetime;
- drl->defrouter[i].expire = dr->expire;
- drl->defrouter[i].if_index = dr->ifp->if_index;
- i++;
- }
- splx(s);
- break;
- case SIOCGPRLST_IN6:
- /*
- * obsolete API, use sysctl under net.inet6.icmp6
- *
- * XXX the structure in6_prlist was changed in backward-
- * incompatible manner. in6_oprlist is used for SIOCGPRLST_IN6,
- * in6_prlist is used for nd6_sysctl() - fill_prlist().
- */
- /*
- * XXX meaning of fields, especialy "raflags", is very
- * differnet between RA prefix list and RR/static prefix list.
- * how about separating ioctls into two?
- */
- bzero(oprl, sizeof(*oprl));
- s = splnet();
- LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
- struct nd_pfxrouter *pfr;
- int j;
-
- if (i >= PRLSTSIZ)
- break;
- oprl->prefix[i].prefix = pr->ndpr_prefix.sin6_addr;
- oprl->prefix[i].raflags = pr->ndpr_raf;
- oprl->prefix[i].prefixlen = pr->ndpr_plen;
- oprl->prefix[i].vltime = pr->ndpr_vltime;
- oprl->prefix[i].pltime = pr->ndpr_pltime;
- oprl->prefix[i].if_index = pr->ndpr_ifp->if_index;
- if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME)
- oprl->prefix[i].expire = 0;
- else {
- time_t maxexpire;
-
- /* XXX: we assume time_t is signed. */
- maxexpire = (-1) &
- ~((time_t)1 <<
- ((sizeof(maxexpire) * 8) - 1));
- if (pr->ndpr_vltime <
- maxexpire - pr->ndpr_lastupdate) {
- oprl->prefix[i].expire =
- pr->ndpr_lastupdate +
- pr->ndpr_vltime;
- } else
- oprl->prefix[i].expire = maxexpire;
- }
-
- j = 0;
- LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) {
- if (j < DRLSTSIZ) {
-#define RTRADDR oprl->prefix[i].advrtr[j]
- RTRADDR = pfr->router->rtaddr;
- in6_clearscope(&RTRADDR);
-#undef RTRADDR
- }
- j++;
- }
- oprl->prefix[i].advrtrs = j;
- oprl->prefix[i].origin = PR_ORIG_RA;
-
- i++;
- }
- splx(s);
-
- break;
case OSIOCGIFINFO_IN6:
#define ND ndi->ndi
/* XXX: old ndp(8) assumes a positive value for linkmtu. */
@@ -1358,17 +1360,7 @@
struct ifaddr *ifa;
struct in6_ifaddr *ia;
- /*
- * Try to clear ifdisabled flag when enabling
- * accept_rtadv or auto_linklocal.
- */
if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) &&
- !(ND.flags & ND6_IFF_IFDISABLED) &&
- (ND.flags & (ND6_IFF_ACCEPT_RTADV |
- ND6_IFF_AUTO_LINKLOCAL)))
- ND.flags &= ~ND6_IFF_IFDISABLED;
-
- if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) &&
!(ND.flags & ND6_IFF_IFDISABLED)) {
/* ifdisabled 1->0 transision */
@@ -1378,8 +1370,6 @@
* do not clear ND6_IFF_IFDISABLED.
* See RFC 4862, Section 5.4.5.
*/
- int duplicated_linklocal = 0;
-
IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET6)
@@ -1386,14 +1376,13 @@
continue;
ia = (struct in6_ifaddr *)ifa;
if ((ia->ia6_flags & IN6_IFF_DUPLICATED) &&
- IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) {
- duplicated_linklocal = 1;
+ IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia)))
break;
- }
}
IF_ADDR_RUNLOCK(ifp);
- if (duplicated_linklocal) {
+ if (ifa != NULL) {
+ /* LLA is duplicated. */
ND.flags |= ND6_IFF_IFDISABLED;
log(LOG_ERR, "Cannot enable an interface"
" with a link-local address marked"
@@ -1409,14 +1398,19 @@
/* Mark all IPv6 address as tentative. */
ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED;
- IF_ADDR_RLOCK(ifp);
- TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- if (ifa->ifa_addr->sa_family != AF_INET6)
- continue;
- ia = (struct in6_ifaddr *)ifa;
- ia->ia6_flags |= IN6_IFF_TENTATIVE;
+ if (V_ip6_dad_count > 0 &&
+ (ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD) == 0) {
+ IF_ADDR_RLOCK(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead,
+ ifa_link) {
+ if (ifa->ifa_addr->sa_family !=
+ AF_INET6)
+ continue;
+ ia = (struct in6_ifaddr *)ifa;
+ ia->ia6_flags |= IN6_IFF_TENTATIVE;
+ }
+ IF_ADDR_RUNLOCK(ifp);
}
- IF_ADDR_RUNLOCK(ifp);
}
if (ND.flags & ND6_IFF_AUTO_LINKLOCAL) {
@@ -1434,20 +1428,19 @@
* address is assigned, and IFF_UP, try to
* assign one.
*/
- int haslinklocal = 0;
-
IF_ADDR_RLOCK(ifp);
- TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- if (ifa->ifa_addr->sa_family != AF_INET6)
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead,
+ ifa_link) {
+ if (ifa->ifa_addr->sa_family !=
+ AF_INET6)
continue;
ia = (struct in6_ifaddr *)ifa;
- if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) {
- haslinklocal = 1;
+ if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia)))
break;
- }
}
IF_ADDR_RUNLOCK(ifp);
- if (!haslinklocal)
+ if (ifa != NULL)
+ /* No LLA is configured. */
in6_ifattach(ifp, NULL);
}
}
@@ -1465,7 +1458,6 @@
/* flush all the prefix advertised by routers */
struct nd_prefix *pr, *next;
- s = splnet();
LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, next) {
struct in6_ifaddr *ia, *ia_next;
@@ -1484,21 +1476,28 @@
}
prelist_remove(pr);
}
- splx(s);
break;
}
case SIOCSRTRFLUSH_IN6:
{
/* flush all the default routers */
- struct nd_defrouter *dr, *next;
+ struct nd_drhead drq;
+ struct nd_defrouter *dr;
- s = splnet();
+ TAILQ_INIT(&drq);
+
defrouter_reset();
- TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, next) {
- defrtrlist_del(dr);
+
+ ND6_WLOCK();
+ while ((dr = TAILQ_FIRST(&V_nd_defrouter)) != NULL)
+ defrouter_unlink(dr, &drq);
+ ND6_WUNLOCK();
+ while ((dr = TAILQ_FIRST(&drq)) != NULL) {
+ TAILQ_REMOVE(&drq, dr, dr_entry);
+ defrouter_del(dr);
}
+
defrouter_select();
- splx(s);
break;
}
case SIOCGNBRINFO_IN6:
@@ -1520,7 +1519,11 @@
nbi->state = ln->ln_state;
nbi->asked = ln->la_asked;
nbi->isrouter = ln->ln_router;
- nbi->expire = ln->la_expire;
+ if (ln->la_expire == 0)
+ nbi->expire = 0;
+ else
+ nbi->expire = ln->la_expire +
+ (time_second - time_uptime);
LLE_RUNLOCK(ln);
break;
}
@@ -1579,16 +1582,16 @@
* description on it in NS section (RFC 2461 7.2.3).
*/
flags = lladdr ? ND6_EXCLUSIVE : 0;
- IF_AFDATA_LOCK(ifp);
+ IF_AFDATA_RLOCK(ifp);
ln = nd6_lookup(from, flags, ifp);
-
+ IF_AFDATA_RUNLOCK(ifp);
if (ln == NULL) {
flags |= ND6_EXCLUSIVE;
+ IF_AFDATA_LOCK(ifp);
ln = nd6_lookup(from, flags | ND6_CREATE, ifp);
IF_AFDATA_UNLOCK(ifp);
is_newentry = 1;
} else {
- IF_AFDATA_UNLOCK(ifp);
/* do nothing if static ndp is set */
if (ln->la_flags & LLE_STATIC) {
static_route = 1;
@@ -1624,6 +1627,7 @@
*/
bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen);
ln->la_flags |= LLE_VALID;
+ EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
}
if (!is_newentry) {
@@ -1648,42 +1652,8 @@
ln->ln_state = newstate;
if (ln->ln_state == ND6_LLINFO_STALE) {
- /*
- * XXX: since nd6_output() below will cause
- * state tansition to DELAY and reset the timer,
- * we must set the timer now, although it is actually
- * meaningless.
- */
- nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
-
- if (ln->la_hold) {
- struct mbuf *m_hold, *m_hold_next;
-
- /*
- * reset the la_hold in advance, to explicitly
- * prevent a la_hold lookup in nd6_output()
- * (wouldn't happen, though...)
- */
- for (m_hold = ln->la_hold, ln->la_hold = NULL;
- m_hold; m_hold = m_hold_next) {
- m_hold_next = m_hold->m_nextpkt;
- m_hold->m_nextpkt = NULL;
-
- /*
- * we assume ifp is not a p2p here, so
- * just set the 2nd argument as the
- * 1st one.
- */
- nd6_output_lle(ifp, ifp, m_hold, L3_ADDR_SIN6(ln), NULL, ln, &chain);
- }
- /*
- * If we have mbufs in the chain we need to do
- * deferred transmit. Copy the address from the
- * llentry before dropping the lock down below.
- */
- if (chain != NULL)
- memcpy(&sin6, L3_ADDR_SIN6(ln), sizeof(sin6));
- }
+ if (ln->la_hold != NULL)
+ nd6_grab_holdchain(ln, &chain, &sin6);
} else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
/* probe right away */
nd6_llinfo_settimer_locked((void *)ln, 0);
@@ -1766,8 +1736,8 @@
if (static_route)
ln = NULL;
}
- if (chain)
- nd6_output_flush(ifp, ifp, chain, &sin6, NULL);
+ if (chain != NULL)
+ nd6_flush_holdchain(ifp, ifp, chain, &sin6);
/*
* When the link-layer address of a router changes, select the
@@ -1816,6 +1786,8 @@
nd6_slowtimo, curvnet);
IFNET_RLOCK_NOSLEEP();
TAILQ_FOREACH(ifp, &V_ifnet, if_list) {
+ if (ifp->if_afdata[AF_INET6] == NULL)
+ continue;
nd6if = ND_IFINFO(ifp);
if (nd6if->basereachable && /* already initialized */
(nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
@@ -1833,46 +1805,154 @@
CURVNET_RESTORE();
}
+void
+nd6_grab_holdchain(struct llentry *ln, struct mbuf **chain,
+ struct sockaddr_in6 *sin6)
+{
+
+ LLE_WLOCK_ASSERT(ln);
+
+ *chain = ln->la_hold;
+ ln->la_hold = NULL;
+ memcpy(sin6, L3_ADDR_SIN6(ln), sizeof(*sin6));
+
+ if (ln->ln_state == ND6_LLINFO_STALE) {
+
+ /*
+ * The first time we send a packet to a
+ * neighbor whose entry is STALE, we have
+ * to change the state to DELAY and a sets
+ * a timer to expire in DELAY_FIRST_PROBE_TIME
+ * seconds to ensure do neighbor unreachability
+ * detection on expiration.
+ * (RFC 2461 7.3.3)
+ */
+ ln->la_asked = 0;
+ ln->ln_state = ND6_LLINFO_DELAY;
+ nd6_llinfo_settimer_locked(ln, (long)V_nd6_delay * hz);
+ }
+}
+
+static int
+nd6_output_ifp(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m,
+ struct sockaddr_in6 *dst)
+{
+ int error;
+ int ip6len;
+ struct ip6_hdr *ip6;
+ struct m_tag *mtag;
+
+#ifdef MAC
+ mac_netinet6_nd6_send(ifp, m);
+#endif
+
+ /*
+ * If called from nd6_ns_output() (NS), nd6_na_output() (NA),
+ * icmp6_redirect_output() (REDIRECT) or from rip6_output() (RS, RA
+ * as handled by rtsol and rtadvd), mbufs will be tagged for SeND
+ * to be diverted to user space. When re-injected into the kernel,
+ * send_output() will directly dispatch them to the outgoing interface.
+ */
+ if (send_sendso_input_hook != NULL) {
+ mtag = m_tag_find(m, PACKET_TAG_ND_OUTGOING, NULL);
+ if (mtag != NULL) {
+ ip6 = mtod(m, struct ip6_hdr *);
+ ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen);
+ /* Use the SEND socket */
+ error = send_sendso_input_hook(m, ifp, SND_OUT,
+ ip6len);
+ /* -1 == no app on SEND socket */
+ if (error == 0 || error != -1)
+ return (error);
+ }
+ }
+
+ m_clrprotoflags(m); /* Avoid confusing lower layers. */
+ IP_PROBE(send, NULL, NULL, mtod(m, struct ip6_hdr *), ifp, NULL,
+ mtod(m, struct ip6_hdr *));
+
+ if ((ifp->if_flags & IFF_LOOPBACK) == 0)
+ origifp = ifp;
+
+ error = (*ifp->if_output)(origifp, m, (struct sockaddr *)dst, NULL);
+ return (error);
+}
+
+/*
+ * IPv6 packet output - light version.
+ * Checks if destination LLE exists and is in proper state
+ * (e.g no modification required). If not true, fall back to
+ * "heavy" version.
+ */
int
-nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
+nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m,
struct sockaddr_in6 *dst, struct rtentry *rt0)
{
+ struct llentry *ln = NULL;
- return (nd6_output_lle(ifp, origifp, m0, dst, rt0, NULL, NULL));
+ /* discard the packet if IPv6 operation is disabled on the interface */
+ if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) {
+ m_freem(m);
+ return (ENETDOWN); /* better error? */
+ }
+
+ if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr))
+ goto sendpkt;
+
+ if (nd6_need_cache(ifp) == 0)
+ goto sendpkt;
+
+ IF_AFDATA_RLOCK(ifp);
+ ln = nd6_lookup(&dst->sin6_addr, 0, ifp);
+ IF_AFDATA_RUNLOCK(ifp);
+
+ /*
+ * Perform fast path for the following cases:
+ * 1) lle state is REACHABLE
+ * 2) lle state is DELAY (NS message sentNS message sent)
+ *
+ * Every other case involves lle modification, so we handle
+ * them separately.
+ */
+ if (ln == NULL || (ln->ln_state != ND6_LLINFO_REACHABLE &&
+ ln->ln_state != ND6_LLINFO_DELAY)) {
+ /* Fall back to slow processing path */
+ if (ln != NULL)
+ LLE_RUNLOCK(ln);
+ return (nd6_output_lle(ifp, origifp, m, dst));
+ }
+
+sendpkt:
+ if (ln != NULL)
+ LLE_RUNLOCK(ln);
+
+ return (nd6_output_ifp(ifp, origifp, m, dst));
}
/*
- * Note that I'm not enforcing any global serialization
- * lle state or asked changes here as the logic is too
- * complicated to avoid having to always acquire an exclusive
- * lock
- * KMM
+ * Output IPv6 packet - heavy version.
+ * Function assume that either
+ * 1) destination LLE does not exist, is invalid or stale, so
+ * ND6_EXCLUSIVE lock needs to be acquired
+ * 2) destination lle is provided (with ND6_EXCLUSIVE lock),
+ * in that case packets are queued in &chain.
*
*/
-#define senderr(e) { error = (e); goto bad;}
-
-int
-nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
- struct sockaddr_in6 *dst, struct rtentry *rt0, struct llentry *lle,
- struct mbuf **chain)
+static int
+nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m,
+ struct sockaddr_in6 *dst)
{
- struct mbuf *m = m0;
- struct m_tag *mtag;
- struct llentry *ln = lle;
- struct ip6_hdr *ip6;
- int error = 0;
+ struct llentry *lle = NULL;
int flags = 0;
- int ip6len;
-#ifdef INVARIANTS
- if (lle != NULL) {
-
- LLE_WLOCK_ASSERT(lle);
+ KASSERT(m != NULL, ("NULL mbuf, nothing to send"));
+ /* discard the packet if IPv6 operation is disabled on the interface */
+ if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) {
+ m_freem(m);
+ return (ENETDOWN); /* better error? */
+ }
- KASSERT(chain != NULL, (" lle locked but no mbuf chain pointer passed"));
- }
-#endif
if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr))
goto sendpkt;
@@ -1880,35 +1960,28 @@
goto sendpkt;
/*
- * next hop determination. This routine is derived from ether_output.
- */
-
- /*
* Address resolution or Neighbor Unreachability Detection
* for the next hop.
* At this point, the destination of the packet must be a unicast
* or an anycast address(i.e. not a multicast).
*/
-
- flags = ((m != NULL) || (lle != NULL)) ? LLE_EXCLUSIVE : 0;
- if (ln == NULL) {
- retry:
- IF_AFDATA_LOCK(ifp);
- ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)dst);
- IF_AFDATA_UNLOCK(ifp);
- if ((ln == NULL) && nd6_is_addr_neighbor(dst, ifp)) {
+ if (lle == NULL) {
+ IF_AFDATA_RLOCK(ifp);
+ lle = nd6_lookup(&dst->sin6_addr, ND6_EXCLUSIVE, ifp);
+ IF_AFDATA_RUNLOCK(ifp);
+ if ((lle == NULL) && nd6_is_addr_neighbor(dst, ifp)) {
/*
* Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
* the condition below is not very efficient. But we believe
* it is tolerable, because this should be a rare case.
*/
- flags = ND6_CREATE | (m ? ND6_EXCLUSIVE : 0);
+ flags = ND6_CREATE | ND6_EXCLUSIVE;
IF_AFDATA_LOCK(ifp);
- ln = nd6_lookup(&dst->sin6_addr, flags, ifp);
+ lle = nd6_lookup(&dst->sin6_addr, flags, ifp);
IF_AFDATA_UNLOCK(ifp);
}
}
- if (ln == NULL) {
+ if (lle == NULL) {
if ((ifp->if_flags & IFF_POINTOPOINT) == 0 &&
!(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) {
char ip6buf[INET6_ADDRSTRLEN];
@@ -1915,21 +1988,20 @@
log(LOG_DEBUG,
"nd6_output: can't allocate llinfo for %s "
"(ln=%p)\n",
- ip6_sprintf(ip6buf, &dst->sin6_addr), ln);
- senderr(EIO); /* XXX: good error? */
+ ip6_sprintf(ip6buf, &dst->sin6_addr), lle);
+ m_freem(m);
+ return (ENOBUFS);
}
goto sendpkt; /* send anyway */
}
+ LLE_WLOCK_ASSERT(lle);
+
/* We don't have to do link-layer address resolution on a p2p link. */
if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
- ln->ln_state < ND6_LLINFO_REACHABLE) {
- if ((flags & LLE_EXCLUSIVE) == 0) {
- flags |= LLE_EXCLUSIVE;
- goto retry;
- }
- ln->ln_state = ND6_LLINFO_STALE;
- nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
+ lle->ln_state < ND6_LLINFO_REACHABLE) {
+ lle->ln_state = ND6_LLINFO_STALE;
+ nd6_llinfo_settimer_locked(lle, (long)V_nd6_gctimer * hz);
}
/*
@@ -1939,15 +2011,10 @@
* neighbor unreachability detection on expiration.
* (RFC 2461 7.3.3)
*/
- if (ln->ln_state == ND6_LLINFO_STALE) {
- if ((flags & LLE_EXCLUSIVE) == 0) {
- flags |= LLE_EXCLUSIVE;
- LLE_RUNLOCK(ln);
- goto retry;
- }
- ln->la_asked = 0;
- ln->ln_state = ND6_LLINFO_DELAY;
- nd6_llinfo_settimer_locked(ln, (long)V_nd6_delay * hz);
+ if (lle->ln_state == ND6_LLINFO_STALE) {
+ lle->la_asked = 0;
+ lle->ln_state = ND6_LLINFO_DELAY;
+ nd6_llinfo_settimer_locked(lle, (long)V_nd6_delay * hz);
}
/*
@@ -1955,7 +2022,7 @@
* (i.e. its link-layer address is already resolved), just
* send the packet.
*/
- if (ln->ln_state > ND6_LLINFO_INCOMPLETE)
+ if (lle->ln_state > ND6_LLINFO_INCOMPLETE)
goto sendpkt;
/*
@@ -1965,23 +2032,15 @@
* does not exceed nd6_maxqueuelen. When it exceeds nd6_maxqueuelen,
* the oldest packet in the queue will be removed.
*/
- if (ln->ln_state == ND6_LLINFO_NOSTATE)
- ln->ln_state = ND6_LLINFO_INCOMPLETE;
+ if (lle->ln_state == ND6_LLINFO_NOSTATE)
+ lle->ln_state = ND6_LLINFO_INCOMPLETE;
- if ((flags & LLE_EXCLUSIVE) == 0) {
- flags |= LLE_EXCLUSIVE;
- LLE_RUNLOCK(ln);
- goto retry;
- }
-
- LLE_WLOCK_ASSERT(ln);
-
- if (ln->la_hold) {
+ if (lle->la_hold != NULL) {
struct mbuf *m_hold;
int i;
i = 0;
- for (m_hold = ln->la_hold; m_hold; m_hold = m_hold->m_nextpkt) {
+ for (m_hold = lle->la_hold; m_hold; m_hold = m_hold->m_nextpkt){
i++;
if (m_hold->m_nextpkt == NULL) {
m_hold->m_nextpkt = m;
@@ -1989,13 +2048,13 @@
}
}
while (i >= V_nd6_maxqueuelen) {
- m_hold = ln->la_hold;
- ln->la_hold = ln->la_hold->m_nextpkt;
+ m_hold = lle->la_hold;
+ lle->la_hold = lle->la_hold->m_nextpkt;
m_freem(m_hold);
i--;
}
} else {
- ln->la_hold = m;
+ lle->la_hold = m;
}
/*
@@ -2002,121 +2061,31 @@
* If there has been no NS for the neighbor after entering the
* INCOMPLETE state, send the first solicitation.
*/
- if (!ND6_LLINFO_PERMANENT(ln) && ln->la_asked == 0) {
- ln->la_asked++;
+ if (!ND6_LLINFO_PERMANENT(lle) && lle->la_asked == 0) {
+ lle->la_asked++;
- nd6_llinfo_settimer_locked(ln,
+ nd6_llinfo_settimer_locked(lle,
(long)ND_IFINFO(ifp)->retrans * hz / 1000);
- LLE_WUNLOCK(ln);
- nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0);
- if (lle != NULL && ln == lle)
- LLE_WLOCK(lle);
-
- } else if (lle == NULL || ln != lle) {
- /*
- * We did the lookup (no lle arg) so we
- * need to do the unlock here.
- */
- LLE_WUNLOCK(ln);
+ LLE_WUNLOCK(lle);
+ nd6_ns_output(ifp, NULL, &dst->sin6_addr, lle, NULL);
+ } else {
+ /* We did the lookup so we need to do the unlock here. */
+ LLE_WUNLOCK(lle);
}
return (0);
sendpkt:
- /* discard the packet if IPv6 operation is disabled on the interface */
- if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) {
- error = ENETDOWN; /* better error? */
- goto bad;
- }
- /*
- * ln is valid and the caller did not pass in
- * an llentry
- */
- if ((ln != NULL) && (lle == NULL)) {
- if (flags & LLE_EXCLUSIVE)
- LLE_WUNLOCK(ln);
- else
- LLE_RUNLOCK(ln);
- }
+ if (lle != NULL)
+ LLE_WUNLOCK(lle);
-#ifdef MAC
- mac_netinet6_nd6_send(ifp, m);
-#endif
-
- /*
- * If called from nd6_ns_output() (NS), nd6_na_output() (NA),
- * icmp6_redirect_output() (REDIRECT) or from rip6_output() (RS, RA
- * as handled by rtsol and rtadvd), mbufs will be tagged for SeND
- * to be diverted to user space. When re-injected into the kernel,
- * send_output() will directly dispatch them to the outgoing interface.
- */
- if (send_sendso_input_hook != NULL) {
- mtag = m_tag_find(m, PACKET_TAG_ND_OUTGOING, NULL);
- if (mtag != NULL) {
- ip6 = mtod(m, struct ip6_hdr *);
- ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen);
- /* Use the SEND socket */
- error = send_sendso_input_hook(m, ifp, SND_OUT,
- ip6len);
- /* -1 == no app on SEND socket */
- if (error == 0 || error != -1)
- return (error);
- }
- }
-
- /*
- * We were passed in a pointer to an lle with the lock held
- * this means that we can't call if_output as we will
- * recurse on the lle lock - so what we do is we create
- * a list of mbufs to send and transmit them in the caller
- * after the lock is dropped
- */
- if (lle != NULL) {
- if (*chain == NULL)
- *chain = m;
- else {
- struct mbuf *mb;
-
- /*
- * append mbuf to end of deferred chain
- */
- mb = *chain;
- while (mb->m_nextpkt != NULL)
- mb = mb->m_nextpkt;
- mb->m_nextpkt = m;
- }
- return (error);
- }
- /* Reset layer specific mbuf flags to avoid confusing lower layers. */
- m->m_flags &= ~(M_PROTOFLAGS);
- if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
- return ((*ifp->if_output)(origifp, m, (struct sockaddr *)dst,
- NULL));
- }
- error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, NULL);
- return (error);
-
- bad:
- /*
- * ln is valid and the caller did not pass in
- * an llentry
- */
- if ((ln != NULL) && (lle == NULL)) {
- if (flags & LLE_EXCLUSIVE)
- LLE_WUNLOCK(ln);
- else
- LLE_RUNLOCK(ln);
- }
- if (m)
- m_freem(m);
- return (error);
+ return (nd6_output_ifp(ifp, origifp, m, dst));
}
-#undef senderr
int
-nd6_output_flush(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain,
- struct sockaddr_in6 *dst, struct route *ro)
+nd6_flush_holdchain(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain,
+ struct sockaddr_in6 *dst)
{
struct mbuf *m, *m_head;
struct ifnet *outifp;
@@ -2131,7 +2100,7 @@
while (m_head) {
m = m_head;
m_head = m_head->m_nextpkt;
- error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, ro);
+ error = nd6_output_ifp(ifp, origifp, m, dst);
}
/*
@@ -2165,13 +2134,7 @@
#ifdef IFT_IEEE80211
case IFT_IEEE80211:
#endif
-#ifdef IFT_CARP
- case IFT_CARP:
-#endif
case IFT_INFINIBAND:
- case IFT_GIF: /* XXX need more cases? */
- case IFT_PPP:
- case IFT_TUNNEL:
case IFT_BRIDGE:
case IFT_PROPVIRTUAL:
return (1);
@@ -2186,13 +2149,13 @@
*/
int
nd6_storelladdr(struct ifnet *ifp, struct mbuf *m,
- struct sockaddr *dst, u_char *desten, struct llentry **lle)
+ const struct sockaddr *dst, u_char *desten, struct llentry **lle)
{
struct llentry *ln;
*lle = NULL;
IF_AFDATA_UNLOCK_ASSERT(ifp);
- if (m->m_flags & M_MCAST) {
+ if (m != NULL && m->m_flags & M_MCAST) {
int i;
switch (ifp->if_type) {
@@ -2261,7 +2224,6 @@
}
ln->la_hold = NULL;
- return;
}
static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS);
@@ -2275,6 +2237,8 @@
CTLFLAG_RD, nd6_sysctl_prlist, "");
SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXQLEN, nd6_maxqueuelen,
CTLFLAG_RW, &VNET_NAME(nd6_maxqueuelen), 1, "");
+SYSCTL_VNET_INT(_net_inet6_icmp6, OID_AUTO, nd6_gctimer,
+ CTLFLAG_RW, &VNET_NAME(nd6_gctimer), (60 * 60 * 24), "");
static int
nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS)
@@ -2283,30 +2247,33 @@
struct nd_defrouter *dr;
int error;
- if (req->newptr)
+ if (req->newptr != NULL)
return (EPERM);
+ error = sysctl_wire_old_buffer(req, 0);
+ if (error != 0)
+ return (error);
+
bzero(&d, sizeof(d));
d.rtaddr.sin6_family = AF_INET6;
d.rtaddr.sin6_len = sizeof(d.rtaddr);
- /*
- * XXX locking
- */
+ ND6_RLOCK();
TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
d.rtaddr.sin6_addr = dr->rtaddr;
error = sa6_recoverscope(&d.rtaddr);
if (error != 0)
- return (error);
- d.flags = dr->flags;
+ break;
+ d.flags = dr->raflags;
d.rtlifetime = dr->rtlifetime;
- d.expire = dr->expire;
+ d.expire = dr->expire + (time_second - time_uptime);
d.if_index = dr->ifp->if_index;
error = SYSCTL_OUT(req, &d, sizeof(d));
if (error != 0)
- return (error);
+ break;
}
- return (0);
+ ND6_RUNLOCK();
+ return (error);
}
static int
@@ -2352,7 +2319,8 @@
~((time_t)1 << ((sizeof(maxexpire) * 8) - 1));
if (pr->ndpr_vltime < maxexpire - pr->ndpr_lastupdate)
p.expire = pr->ndpr_lastupdate +
- pr->ndpr_vltime;
+ pr->ndpr_vltime +
+ (time_second - time_uptime);
else
p.expire = maxexpire;
}
Modified: trunk/sys/netinet6/nd6.h
===================================================================
--- trunk/sys/netinet6/nd6.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/nd6.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* $KAME: nd6.h,v 1.76 2001/12/18 02:10:31 itojun Exp $
- * $FreeBSD: stable/9/sys/netinet6/nd6.h 245555 2013-01-17 16:39:21Z ume $
+ * $FreeBSD: stable/10/sys/netinet6/nd6.h 303458 2016-07-28 20:08:01Z sbruno $
*/
#ifndef _NETINET6_ND6_H_
@@ -80,7 +80,7 @@
#define ND6_IFF_PERFORMNUD 0x1
#define ND6_IFF_ACCEPT_RTADV 0x2
-#define ND6_IFF_PREFER_SOURCE 0x4 /* XXX: not related to ND. */
+#define ND6_IFF_PREFER_SOURCE 0x4 /* Not used in FreeBSD. */
#define ND6_IFF_IFDISABLED 0x8 /* IPv6 operation is disabled due to
* DAD failure. (XXX: not ND-specific)
*/
@@ -88,6 +88,7 @@
#define ND6_IFF_AUTO_LINKLOCAL 0x20
#define ND6_IFF_NO_RADR 0x40
#define ND6_IFF_NO_PREFER_IFACE 0x80 /* XXX: not related to ND. */
+#define ND6_IFF_NO_DAD 0x100
#define ND6_CREATE LLE_CREATE
#define ND6_EXCLUSIVE LLE_EXCLUSIVE
@@ -235,14 +236,15 @@
((MAX_RANDOM_FACTOR - MIN_RANDOM_FACTOR) * (x >> 10)))) /1000)
TAILQ_HEAD(nd_drhead, nd_defrouter);
-struct nd_defrouter {
+struct nd_defrouter {
TAILQ_ENTRY(nd_defrouter) dr_entry;
- struct in6_addr rtaddr;
- u_char flags; /* flags on RA message */
+ struct in6_addr rtaddr;
+ u_char raflags; /* flags on RA message */
u_short rtlifetime;
u_long expire;
- struct ifnet *ifp;
+ struct ifnet *ifp;
int installed; /* is installed into kernel routing table */
+ u_int refcnt;
};
struct nd_prefixctl {
@@ -342,6 +344,19 @@
#define V_nd6_debug VNET(nd6_debug)
#define V_nd6_onlink_ns_rfc4861 VNET(nd6_onlink_ns_rfc4861)
+/* Lock for the prefix and default router lists. */
+VNET_DECLARE(struct rwlock, nd6_lock);
+#define V_nd6_lock VNET(nd6_lock)
+
+#define ND6_RLOCK() rw_rlock(&V_nd6_lock)
+#define ND6_RUNLOCK() rw_runlock(&V_nd6_lock)
+#define ND6_WLOCK() rw_wlock(&V_nd6_lock)
+#define ND6_WUNLOCK() rw_wunlock(&V_nd6_lock)
+#define ND6_WLOCK_ASSERT() rw_assert(&V_nd6_lock, RA_WLOCKED)
+#define ND6_RLOCK_ASSERT() rw_assert(&V_nd6_lock, RA_RLOCKED)
+#define ND6_LOCK_ASSERT() rw_assert(&V_nd6_lock, RA_LOCKED)
+#define ND6_UNLOCK_ASSERT() rw_assert(&V_nd6_lock, RA_UNLOCKED)
+
#define nd6log(x) do { if (V_nd6_debug) log x; } while (/*CONSTCOND*/ 0)
VNET_DECLARE(struct callout, nd6_timer_ch);
@@ -360,7 +375,7 @@
#define V_ip6_temp_regen_advance VNET(ip6_temp_regen_advance)
union nd_opts {
- struct nd_opt_hdr *nd_opt_array[8]; /* max = target address list */
+ struct nd_opt_hdr *nd_opt_array[16]; /* max = ND_OPT_NONCE */
struct {
struct nd_opt_hdr *zero;
struct nd_opt_hdr *src_lladdr;
@@ -368,6 +383,16 @@
struct nd_opt_prefix_info *pi_beg; /* multiple opts, start */
struct nd_opt_rd_hdr *rh;
struct nd_opt_mtu *mtu;
+ struct nd_opt_hdr *__res6;
+ struct nd_opt_hdr *__res7;
+ struct nd_opt_hdr *__res8;
+ struct nd_opt_hdr *__res9;
+ struct nd_opt_hdr *__res10;
+ struct nd_opt_hdr *__res11;
+ struct nd_opt_hdr *__res12;
+ struct nd_opt_hdr *__res13;
+ struct nd_opt_nonce *nonce;
+ struct nd_opt_hdr *__res15;
struct nd_opt_hdr *search; /* multiple opts */
struct nd_opt_hdr *last; /* multiple opts */
int done;
@@ -380,6 +405,7 @@
#define nd_opts_pi_end nd_opt_each.pi_end
#define nd_opts_rh nd_opt_each.rh
#define nd_opts_mtu nd_opt_each.mtu
+#define nd_opts_nonce nd_opt_each.nonce
#define nd_opts_search nd_opt_each.search
#define nd_opts_last nd_opt_each.last
#define nd_opts_done nd_opt_each.done
@@ -411,14 +437,13 @@
char *, int, int, int);
int nd6_output(struct ifnet *, struct ifnet *, struct mbuf *,
struct sockaddr_in6 *, struct rtentry *);
-int nd6_output_lle(struct ifnet *, struct ifnet *, struct mbuf *,
- struct sockaddr_in6 *, struct rtentry *, struct llentry *,
- struct mbuf **);
-int nd6_output_flush(struct ifnet *, struct ifnet *, struct mbuf *,
- struct sockaddr_in6 *, struct route *);
+void nd6_grab_holdchain(struct llentry *, struct mbuf **,
+ struct sockaddr_in6 *);
+int nd6_flush_holdchain(struct ifnet *, struct ifnet *, struct mbuf *,
+ struct sockaddr_in6 *);
int nd6_need_cache(struct ifnet *);
int nd6_storelladdr(struct ifnet *, struct mbuf *,
- struct sockaddr *, u_char *, struct llentry **);
+ const struct sockaddr *, u_char *, struct llentry **);
/* nd6_nbr.c */
void nd6_na_input(struct mbuf *, int, int);
@@ -426,24 +451,28 @@
const struct in6_addr *, u_long, int, struct sockaddr *);
void nd6_ns_input(struct mbuf *, int, int);
void nd6_ns_output(struct ifnet *, const struct in6_addr *,
- const struct in6_addr *, struct llentry *, int);
+ const struct in6_addr *, struct llentry *, uint8_t *);
caddr_t nd6_ifptomac(struct ifnet *);
+void nd6_dad_init(void);
void nd6_dad_start(struct ifaddr *, int);
void nd6_dad_stop(struct ifaddr *);
-void nd6_dad_duplicated(struct ifaddr *);
/* nd6_rtr.c */
void nd6_rs_input(struct mbuf *, int, int);
void nd6_ra_input(struct mbuf *, int, int);
-void prelist_del(struct nd_prefix *);
void defrouter_reset(void);
void defrouter_select(void);
-void defrtrlist_del(struct nd_defrouter *);
+void defrouter_ref(struct nd_defrouter *);
+void defrouter_rele(struct nd_defrouter *);
+bool defrouter_remove(struct in6_addr *, struct ifnet *);
+void defrouter_unlink(struct nd_defrouter *, struct nd_drhead *);
+void defrouter_del(struct nd_defrouter *);
void prelist_remove(struct nd_prefix *);
int nd6_prelist_add(struct nd_prefixctl *, struct nd_defrouter *,
struct nd_prefix **);
void pfxlist_onlink_check(void);
struct nd_defrouter *defrouter_lookup(struct in6_addr *, struct ifnet *);
+struct nd_defrouter *defrouter_lookup_locked(struct in6_addr *, struct ifnet *);
struct nd_prefix *nd6_prefix_lookup(struct nd_prefixctl *);
void rt6_flush(struct in6_addr *, struct ifnet *);
int nd6_setdefaultiface(int);
Modified: trunk/sys/netinet6/nd6_nbr.c
===================================================================
--- trunk/sys/netinet6/nd6_nbr.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/nd6_nbr.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/nd6_nbr.c 240305 2012-09-10 11:38:02Z glebius $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/nd6_nbr.c 303458 2016-07-28 20:08:01Z sbruno $");
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -41,6 +41,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
+#include <sys/libkern.h>
#include <sys/lock.h>
#include <sys/rwlock.h>
#include <sys/mbuf.h>
@@ -49,9 +50,11 @@
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/errno.h>
+#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/queue.h>
#include <sys/callout.h>
+#include <sys/refcount.h>
#include <net/if.h>
#include <net/if_types.h>
@@ -61,6 +64,7 @@
#ifdef RADIX_MPATH
#include <net/radix_mpath.h>
#endif
+#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
@@ -79,19 +83,32 @@
#define SDL(s) ((struct sockaddr_dl *)s)
struct dadq;
-static struct dadq *nd6_dad_find(struct ifaddr *);
-static void nd6_dad_starttimer(struct dadq *, int);
+static struct dadq *nd6_dad_find(struct ifaddr *, struct nd_opt_nonce *);
+static void nd6_dad_add(struct dadq *dp);
+static void nd6_dad_del(struct dadq *dp);
+static void nd6_dad_rele(struct dadq *);
+static void nd6_dad_starttimer(struct dadq *, int, int);
static void nd6_dad_stoptimer(struct dadq *);
static void nd6_dad_timer(struct dadq *);
-static void nd6_dad_ns_output(struct dadq *, struct ifaddr *);
-static void nd6_dad_ns_input(struct ifaddr *);
+static void nd6_dad_duplicated(struct ifaddr *, struct dadq *);
+static void nd6_dad_ns_output(struct dadq *);
+static void nd6_dad_ns_input(struct ifaddr *, struct nd_opt_nonce *);
static void nd6_dad_na_input(struct ifaddr *);
static void nd6_na_output_fib(struct ifnet *, const struct in6_addr *,
const struct in6_addr *, u_long, int, struct sockaddr *, u_int);
+static void nd6_ns_output_fib(struct ifnet *, const struct in6_addr *,
+ const struct in6_addr *, struct llentry *, uint8_t *, u_int);
-VNET_DEFINE(int, dad_ignore_ns) = 0; /* ignore NS in DAD - specwise incorrect*/
-VNET_DEFINE(int, dad_maxtry) = 15; /* max # of *tries* to transmit DAD packet */
-#define V_dad_ignore_ns VNET(dad_ignore_ns)
+static VNET_DEFINE(int, dad_enhanced) = 1;
+#define V_dad_enhanced VNET(dad_enhanced)
+
+SYSCTL_DECL(_net_inet6_ip6);
+SYSCTL_INT(_net_inet6_ip6, OID_AUTO, dad_enhanced, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(dad_enhanced), 0,
+ "Enable Enhanced DAD, which adds a random nonce to NS messages for DAD.");
+
+static VNET_DEFINE(int, dad_maxtry) = 15; /* max # of *tries* to
+ transmit DAD packet */
#define V_dad_maxtry VNET(dad_maxtry)
/*
@@ -228,46 +245,33 @@
/* (1) and (3) check. */
if (ifp->if_carp)
ifa = (*carp_iamatch6_p)(ifp, &taddr6);
- if (ifa == NULL)
+ else
ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
/* (2) check. */
if (ifa == NULL) {
- struct rtentry *rt;
- struct sockaddr_in6 tsin6;
+ struct route_in6 ro;
int need_proxy;
-#ifdef RADIX_MPATH
- struct route_in6 ro;
-#endif
- bzero(&tsin6, sizeof tsin6);
- tsin6.sin6_len = sizeof(struct sockaddr_in6);
- tsin6.sin6_family = AF_INET6;
- tsin6.sin6_addr = taddr6;
+ bzero(&ro, sizeof(ro));
+ ro.ro_dst.sin6_len = sizeof(struct sockaddr_in6);
+ ro.ro_dst.sin6_family = AF_INET6;
+ ro.ro_dst.sin6_addr = taddr6;
/* Always use the default FIB. */
#ifdef RADIX_MPATH
- bzero(&ro, sizeof(ro));
- ro.ro_dst = tsin6;
rtalloc_mpath_fib((struct route *)&ro, RTF_ANNOUNCE,
RT_DEFAULT_FIB);
- rt = ro.ro_rt;
#else
- rt = in6_rtalloc1((struct sockaddr *)&tsin6, 0, 0,
- RT_DEFAULT_FIB);
+ in6_rtalloc(&ro, RT_DEFAULT_FIB);
#endif
- need_proxy = (rt && (rt->rt_flags & RTF_ANNOUNCE) != 0 &&
- rt->rt_gateway->sa_family == AF_LINK);
- if (rt != NULL) {
- /*
- * Make a copy while we can be sure that rt_gateway
- * is still stable before unlocking to avoid lock
- * order problems. proxydl will only be used if
- * proxy will be set in the next block.
- */
+ need_proxy = (ro.ro_rt &&
+ (ro.ro_rt->rt_flags & RTF_ANNOUNCE) != 0 &&
+ ro.ro_rt->rt_gateway->sa_family == AF_LINK);
+ if (ro.ro_rt != NULL) {
if (need_proxy)
- proxydl = *SDL(rt->rt_gateway);
- RTFREE_LOCKED(rt);
+ proxydl = *SDL(ro.ro_rt->rt_gateway);
+ RTFREE(ro.ro_rt);
}
if (need_proxy) {
/*
@@ -328,7 +332,7 @@
* silently ignore it.
*/
if (IN6_IS_ADDR_UNSPECIFIED(&saddr6))
- nd6_dad_ns_input(ifa);
+ nd6_dad_ns_input(ifa, ndopts.nd_opts_nonce);
goto freeit;
}
@@ -389,12 +393,14 @@
* Based on RFC 2461
* Based on RFC 2462 (duplicate address detection)
*
- * ln - for source address determination
- * dad - duplicate address detection
+ * ln - for source address determination
+ * nonce - If non-NULL, NS is used for duplicate address detection and
+ * the value (length is ND_OPT_NONCE_LEN) is used as a random nonce.
*/
-void
-nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
- const struct in6_addr *taddr6, struct llentry *ln, int dad)
+static void
+nd6_ns_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6,
+ const struct in6_addr *taddr6, struct llentry *ln, uint8_t *nonce,
+ u_int fibnum)
{
struct mbuf *m;
struct m_tag *mtag;
@@ -412,25 +418,18 @@
/* estimate the size of message */
maxlen = sizeof(*ip6) + sizeof(*nd_ns);
maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7;
- if (max_linkhdr + maxlen >= MCLBYTES) {
-#ifdef DIAGNOSTIC
- printf("nd6_ns_output: max_linkhdr + maxlen >= MCLBYTES "
- "(%d + %d > %d)\n", max_linkhdr, maxlen, MCLBYTES);
-#endif
- return;
- }
+ KASSERT(max_linkhdr + maxlen <= MCLBYTES, (
+ "%s: max_linkhdr + maxlen > MCLBYTES (%d + %d > %d)",
+ __func__, max_linkhdr, maxlen, MCLBYTES));
- MGETHDR(m, M_DONTWAIT, MT_DATA);
- if (m && max_linkhdr + maxlen >= MHLEN) {
- MCLGET(m, M_DONTWAIT);
- if ((m->m_flags & M_EXT) == 0) {
- m_free(m);
- m = NULL;
- }
- }
+
+ if (max_linkhdr + maxlen > MHLEN)
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+ else
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return;
- m->m_pkthdr.rcvif = NULL;
+ M_SETFIB(m, fibnum);
bzero(&ro, sizeof(ro));
@@ -465,7 +464,7 @@
if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0)
goto bad;
}
- if (!dad) {
+ if (nonce == NULL) {
struct ifaddr *ifa;
/*
@@ -524,9 +523,8 @@
NULL, &ro, NULL, &oifp, &src_in);
if (error) {
char ip6buf[INET6_ADDRSTRLEN];
- nd6log((LOG_DEBUG,
- "nd6_ns_output: source can't be "
- "determined: dst=%s, error=%d\n",
+ nd6log((LOG_DEBUG, "%s: source can't be "
+ "determined: dst=%s, error=%d\n", __func__,
ip6_sprintf(ip6buf, &dst_sa.sin6_addr),
error));
goto bad;
@@ -562,7 +560,7 @@
* Multicast NS MUST add one add the option
* Unicast NS SHOULD add one add the option
*/
- if (!dad && (mac = nd6_ifptomac(ifp))) {
+ if (nonce == NULL && (mac = nd6_ifptomac(ifp))) {
int optlen = sizeof(struct nd_opt_hdr) + ifp->if_addrlen;
struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_ns + 1);
/* 8 byte alignments... */
@@ -576,7 +574,26 @@
nd_opt->nd_opt_len = optlen >> 3;
bcopy(mac, (caddr_t)(nd_opt + 1), ifp->if_addrlen);
}
+ /*
+ * Add a Nonce option (RFC 3971) to detect looped back NS messages.
+ * This behavior is documented as Enhanced Duplicate Address
+ * Detection in RFC 7527.
+ * net.inet6.ip6.dad_enhanced=0 disables this.
+ */
+ if (V_dad_enhanced != 0 && nonce != NULL) {
+ int optlen = sizeof(struct nd_opt_hdr) + ND_OPT_NONCE_LEN;
+ struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_ns + 1);
+ /* 8-byte alignment is required. */
+ optlen = (optlen + 7) & ~7;
+ m->m_pkthdr.len += optlen;
+ m->m_len += optlen;
+ icmp6len += optlen;
+ bzero((caddr_t)nd_opt, optlen);
+ nd_opt->nd_opt_type = ND_OPT_NONCE;
+ nd_opt->nd_opt_len = optlen >> 3;
+ bcopy(nonce, (caddr_t)(nd_opt + 1), ND_OPT_NONCE_LEN);
+ }
ip6->ip6_plen = htons((u_short)icmp6len);
nd_ns->nd_ns_cksum = 0;
nd_ns->nd_ns_cksum =
@@ -591,7 +608,8 @@
m_tag_prepend(m, mtag);
}
- ip6_output(m, NULL, &ro, dad ? IPV6_UNSPECSRC : 0, &im6o, NULL, NULL);
+ ip6_output(m, NULL, &ro, (nonce != NULL) ? IPV6_UNSPECSRC : 0,
+ &im6o, NULL, NULL);
icmp6_ifstat_inc(ifp, ifs6_out_msg);
icmp6_ifstat_inc(ifp, ifs6_out_neighborsolicit);
ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_SOLICIT]);
@@ -606,9 +624,17 @@
RTFREE(ro.ro_rt);
}
m_freem(m);
- return;
}
+#ifndef BURN_BRIDGES
+void
+nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
+ const struct in6_addr *taddr6, struct llentry *ln, uint8_t *nonce)
+{
+
+ nd6_ns_output_fib(ifp, daddr6, taddr6, ln, nonce, RT_DEFAULT_FIB);
+}
+#endif
/*
* Neighbor advertisement input handling.
*
@@ -638,7 +664,6 @@
struct llentry *ln = NULL;
union nd_opts ndopts;
struct mbuf *chain = NULL;
- struct m_tag *mtag;
struct sockaddr_in6 sin6;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
@@ -665,6 +690,7 @@
is_router = ((flags & ND_NA_FLAG_ROUTER) != 0);
is_solicited = ((flags & ND_NA_FLAG_SOLICITED) != 0);
is_override = ((flags & ND_NA_FLAG_OVERRIDE) != 0);
+ memset(&sin6, 0, sizeof(sin6));
taddr6 = nd_na->nd_na_target;
if (in6_setscope(&taddr6, ifp, NULL))
@@ -697,7 +723,14 @@
lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
}
- ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
+ /*
+ * This effectively disables the DAD check on a non-master CARP
+ * address.
+ */
+ if (ifp->if_carp)
+ ifa = (*carp_iamatch6_p)(ifp, &taddr6);
+ else
+ ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
/*
* Target address matches one of my interface address.
@@ -710,8 +743,8 @@
*/
if (ifa
&& (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_TENTATIVE)) {
+ nd6_dad_na_input(ifa);
ifa_free(ifa);
- nd6_dad_na_input(ifa);
goto freeit;
}
@@ -735,9 +768,9 @@
* If no neighbor cache entry is found, NA SHOULD silently be
* discarded.
*/
- IF_AFDATA_LOCK(ifp);
+ IF_AFDATA_RLOCK(ifp);
ln = nd6_lookup(&taddr6, LLE_EXCLUSIVE, ifp);
- IF_AFDATA_UNLOCK(ifp);
+ IF_AFDATA_RUNLOCK(ifp);
if (ln == NULL) {
goto freeit;
}
@@ -756,6 +789,7 @@
*/
bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen);
ln->la_flags |= LLE_VALID;
+ EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
if (is_solicited) {
ln->ln_state = ND6_LLINFO_REACHABLE;
ln->ln_byhint = 0;
@@ -831,6 +865,8 @@
if (lladdr != NULL) {
bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen);
ln->la_flags |= LLE_VALID;
+ EVENTHANDLER_INVOKE(lle_event, ln,
+ LLENTRY_RESOLVED);
}
/*
@@ -860,31 +896,17 @@
* Remove the sender from the Default Router List and
* update the Destination Cache entries.
*/
- struct nd_defrouter *dr;
- struct in6_addr *in6;
-
- in6 = &L3_ADDR_SIN6(ln)->sin6_addr;
-
- /*
- * Lock to protect the default router list.
- * XXX: this might be unnecessary, since this function
- * is only called under the network software interrupt
- * context. However, we keep it just for safety.
- */
- dr = defrouter_lookup(in6, ln->lle_tbl->llt_ifp);
- if (dr)
- defrtrlist_del(dr);
- else if (ND_IFINFO(ln->lle_tbl->llt_ifp)->flags &
- ND6_IFF_ACCEPT_RTADV) {
+ if (!defrouter_remove(&L3_ADDR_SIN6(ln)->sin6_addr,
+ ln->lle_tbl->llt_ifp) &&
+ (ND_IFINFO(ln->lle_tbl->llt_ifp)->flags &
+ ND6_IFF_ACCEPT_RTADV) != 0)
/*
* Even if the neighbor is not in the default
- * router list, the neighbor may be used
- * as a next hop for some destinations
- * (e.g. redirect case). So we must
- * call rt6_flush explicitly.
+ * router list, the neighbor may be used as a
+ * next hop for some destinations (e.g. redirect
+ * case). So we must call rt6_flush explicitly.
*/
rt6_flush(&ip6->ip6_src, ifp);
- }
}
ln->ln_router = is_router;
}
@@ -893,43 +915,15 @@
* rt->rt_flags &= ~RTF_REJECT;
*/
ln->la_asked = 0;
- if (ln->la_hold) {
- struct mbuf *m_hold, *m_hold_next;
-
- /*
- * reset the la_hold in advance, to explicitly
- * prevent a la_hold lookup in nd6_output()
- * (wouldn't happen, though...)
- */
- for (m_hold = ln->la_hold, ln->la_hold = NULL;
- m_hold; m_hold = m_hold_next) {
- m_hold_next = m_hold->m_nextpkt;
- m_hold->m_nextpkt = NULL;
- /*
- * we assume ifp is not a loopback here, so just set
- * the 2nd argument as the 1st one.
- */
-
- if (send_sendso_input_hook != NULL) {
- mtag = m_tag_get(PACKET_TAG_ND_OUTGOING,
- sizeof(unsigned short), M_NOWAIT);
- if (mtag == NULL)
- goto bad;
- m_tag_prepend(m, mtag);
- }
-
- nd6_output_lle(ifp, ifp, m_hold, L3_ADDR_SIN6(ln), NULL, ln, &chain);
- }
- }
+ if (ln->la_hold != NULL)
+ nd6_grab_holdchain(ln, &chain, &sin6);
freeit:
- if (ln != NULL) {
- if (chain)
- memcpy(&sin6, L3_ADDR_SIN6(ln), sizeof(sin6));
+ if (ln != NULL)
LLE_WUNLOCK(ln);
- if (chain)
- nd6_output_flush(ifp, ifp, chain, &sin6, NULL);
- }
+ if (chain != NULL)
+ nd6_flush_holdchain(ifp, ifp, chain, &sin6);
+
if (checklink)
pfxlist_onlink_check();
@@ -980,25 +974,16 @@
/* estimate the size of message */
maxlen = sizeof(*ip6) + sizeof(*nd_na);
maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7;
- if (max_linkhdr + maxlen >= MCLBYTES) {
-#ifdef DIAGNOSTIC
- printf("nd6_na_output: max_linkhdr + maxlen >= MCLBYTES "
- "(%d + %d > %d)\n", max_linkhdr, maxlen, MCLBYTES);
-#endif
- return;
- }
+ KASSERT(max_linkhdr + maxlen <= MCLBYTES, (
+ "%s: max_linkhdr + maxlen > MCLBYTES (%d + %d > %d)",
+ __func__, max_linkhdr, maxlen, MCLBYTES));
- MGETHDR(m, M_DONTWAIT, MT_DATA);
- if (m && max_linkhdr + maxlen >= MHLEN) {
- MCLGET(m, M_DONTWAIT);
- if ((m->m_flags & M_EXT) == 0) {
- m_free(m);
- m = NULL;
- }
- }
+ if (max_linkhdr + maxlen > MHLEN)
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+ else
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return;
- m->m_pkthdr.rcvif = NULL;
M_SETFIB(m, fibnum);
if (IN6_IS_ADDR_MULTICAST(&daddr6)) {
@@ -1128,7 +1113,6 @@
RTFREE(ro.ro_rt);
}
m_freem(m);
- return;
}
#ifndef BURN_BRIDGES
@@ -1157,9 +1141,6 @@
#ifdef IFT_IEEE80211
case IFT_IEEE80211:
#endif
-#ifdef IFT_CARP
- case IFT_CARP:
-#endif
case IFT_INFINIBAND:
case IFT_BRIDGE:
case IFT_ISO88025:
@@ -1177,31 +1158,80 @@
int dad_ns_ocount; /* NS sent so far */
int dad_ns_icount;
int dad_na_icount;
+ int dad_ns_lcount; /* looped back NS */
+ int dad_loopbackprobe; /* probing state for loopback detection */
struct callout dad_timer_ch;
struct vnet *dad_vnet;
+ u_int dad_refcnt;
+#define ND_OPT_NONCE_LEN32 \
+ ((ND_OPT_NONCE_LEN + sizeof(uint32_t) - 1)/sizeof(uint32_t))
+ uint32_t dad_nonce[ND_OPT_NONCE_LEN32];
};
static VNET_DEFINE(TAILQ_HEAD(, dadq), dadq);
-VNET_DEFINE(int, dad_init) = 0;
-#define V_dadq VNET(dadq)
-#define V_dad_init VNET(dad_init)
+static VNET_DEFINE(struct rwlock, dad_rwlock);
+#define V_dadq VNET(dadq)
+#define V_dad_rwlock VNET(dad_rwlock)
+#define DADQ_RLOCK() rw_rlock(&V_dad_rwlock)
+#define DADQ_RUNLOCK() rw_runlock(&V_dad_rwlock)
+#define DADQ_WLOCK() rw_wlock(&V_dad_rwlock)
+#define DADQ_WUNLOCK() rw_wunlock(&V_dad_rwlock)
+
+static void
+nd6_dad_add(struct dadq *dp)
+{
+
+ DADQ_WLOCK();
+ TAILQ_INSERT_TAIL(&V_dadq, dp, dad_list);
+ DADQ_WUNLOCK();
+}
+
+static void
+nd6_dad_del(struct dadq *dp)
+{
+
+ DADQ_WLOCK();
+ TAILQ_REMOVE(&V_dadq, dp, dad_list);
+ DADQ_WUNLOCK();
+ nd6_dad_rele(dp);
+}
+
static struct dadq *
-nd6_dad_find(struct ifaddr *ifa)
+nd6_dad_find(struct ifaddr *ifa, struct nd_opt_nonce *n)
{
struct dadq *dp;
- TAILQ_FOREACH(dp, &V_dadq, dad_list)
- if (dp->dad_ifa == ifa)
- return (dp);
+ DADQ_RLOCK();
+ TAILQ_FOREACH(dp, &V_dadq, dad_list) {
+ if (dp->dad_ifa != ifa)
+ continue;
+ /*
+ * Skip if the nonce matches the received one.
+ * +2 in the length is required because of type and
+ * length fields are included in a header.
+ */
+ if (n != NULL &&
+ n->nd_opt_nonce_len == (ND_OPT_NONCE_LEN + 2) / 8 &&
+ memcmp(&n->nd_opt_nonce[0], &dp->dad_nonce[0],
+ ND_OPT_NONCE_LEN) == 0) {
+ dp->dad_ns_lcount++;
+ continue;
+ }
+ refcount_acquire(&dp->dad_refcnt);
+ break;
+ }
+ DADQ_RUNLOCK();
- return (NULL);
+ return (dp);
}
static void
-nd6_dad_starttimer(struct dadq *dp, int ticks)
+nd6_dad_starttimer(struct dadq *dp, int ticks, int send_ns)
{
+ if (send_ns != 0)
+ nd6_dad_ns_output(dp);
callout_reset(&dp->dad_timer_ch, ticks,
(void (*)(void *))nd6_dad_timer, (void *)dp);
}
@@ -1210,9 +1240,27 @@
nd6_dad_stoptimer(struct dadq *dp)
{
- callout_stop(&dp->dad_timer_ch);
+ callout_drain(&dp->dad_timer_ch);
}
+static void
+nd6_dad_rele(struct dadq *dp)
+{
+
+ if (refcount_release(&dp->dad_refcnt)) {
+ ifa_free(dp->dad_ifa);
+ free(dp, M_IP6NDP);
+ }
+}
+
+void
+nd6_dad_init(void)
+{
+
+ rw_init(&V_dad_rwlock, "nd6 DAD queue");
+ TAILQ_INIT(&V_dadq);
+}
+
/*
* Start Duplicate Address Detection (DAD) for specified interface address.
*/
@@ -1222,12 +1270,8 @@
struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
struct dadq *dp;
char ip6buf[INET6_ADDRSTRLEN];
+ int send_ns;
- if (!V_dad_init) {
- TAILQ_INIT(&V_dadq);
- V_dad_init++;
- }
-
/*
* If we don't need DAD, don't do it.
* There are several cases:
@@ -1252,17 +1296,26 @@
}
if (ifa->ifa_ifp == NULL)
panic("nd6_dad_start: ifa->ifa_ifp == NULL");
- if (!(ifa->ifa_ifp->if_flags & IFF_UP)) {
+ if (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_NO_DAD) {
+ ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
return;
}
- if (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_IFDISABLED)
+ if (!(ifa->ifa_ifp->if_flags & IFF_UP) ||
+ !(ifa->ifa_ifp->if_drv_flags & IFF_DRV_RUNNING) ||
+ (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_IFDISABLED)) {
+ ia->ia6_flags |= IN6_IFF_TENTATIVE;
return;
- if (nd6_dad_find(ifa) != NULL) {
- /* DAD already in progress */
+ }
+ if ((dp = nd6_dad_find(ifa, NULL)) != NULL) {
+ /*
+ * DAD is already in progress. Let the existing entry
+ * finish it.
+ */
+ nd6_dad_rele(dp);
return;
}
- dp = malloc(sizeof(*dp), M_IP6NDP, M_NOWAIT);
+ dp = malloc(sizeof(*dp), M_IP6NDP, M_NOWAIT | M_ZERO);
if (dp == NULL) {
log(LOG_ERR, "nd6_dad_start: memory allocation failed for "
"%s(%s)\n",
@@ -1270,13 +1323,10 @@
ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
return;
}
- bzero(dp, sizeof(*dp));
callout_init(&dp->dad_timer_ch, 0);
#ifdef VIMAGE
dp->dad_vnet = curvnet;
#endif
- TAILQ_INSERT_TAIL(&V_dadq, (struct dadq *)dp, dad_list);
-
nd6log((LOG_DEBUG, "%s: starting DAD for %s\n", if_name(ifa->ifa_ifp),
ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
@@ -1287,17 +1337,19 @@
* (re)initialization.
*/
dp->dad_ifa = ifa;
- ifa_ref(ifa); /* just for safety */
+ ifa_ref(dp->dad_ifa);
dp->dad_count = V_ip6_dad_count;
dp->dad_ns_icount = dp->dad_na_icount = 0;
dp->dad_ns_ocount = dp->dad_ns_tcount = 0;
+ dp->dad_ns_lcount = dp->dad_loopbackprobe = 0;
+ refcount_init(&dp->dad_refcnt, 1);
+ nd6_dad_add(dp);
+ send_ns = 0;
if (delay == 0) {
- nd6_dad_ns_output(dp, ifa);
- nd6_dad_starttimer(dp,
- (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000);
- } else {
- nd6_dad_starttimer(dp, delay);
+ send_ns = 1;
+ delay = (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000;
}
+ nd6_dad_starttimer(dp, delay, send_ns);
}
/*
@@ -1308,9 +1360,7 @@
{
struct dadq *dp;
- if (!V_dad_init)
- return;
- dp = nd6_dad_find(ifa);
+ dp = nd6_dad_find(ifa, NULL);
if (!dp) {
/* DAD wasn't started yet */
return;
@@ -1318,10 +1368,16 @@
nd6_dad_stoptimer(dp);
- TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list);
- free(dp, M_IP6NDP);
- dp = NULL;
- ifa_free(ifa);
+ /*
+ * The DAD queue entry may have been removed by nd6_dad_timer() while
+ * we were waiting for it to stop, so re-do the lookup.
+ */
+ nd6_dad_rele(dp);
+ if (nd6_dad_find(ifa, NULL) == NULL)
+ return;
+
+ nd6_dad_del(dp);
+ nd6_dad_rele(dp);
}
static void
@@ -1328,24 +1384,28 @@
nd6_dad_timer(struct dadq *dp)
{
CURVNET_SET(dp->dad_vnet);
- int s;
struct ifaddr *ifa = dp->dad_ifa;
+ struct ifnet *ifp = dp->dad_ifa->ifa_ifp;
struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
char ip6buf[INET6_ADDRSTRLEN];
- s = splnet(); /* XXX */
-
/* Sanity check */
if (ia == NULL) {
log(LOG_ERR, "nd6_dad_timer: called with null parameter\n");
- goto done;
+ goto err;
}
+ if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) {
+ /* Do not need DAD for ifdisabled interface. */
+ log(LOG_ERR, "nd6_dad_timer: cancel DAD on %s because of "
+ "ND6_IFF_IFDISABLED.\n", ifp->if_xname);
+ goto err;
+ }
if (ia->ia6_flags & IN6_IFF_DUPLICATED) {
log(LOG_ERR, "nd6_dad_timer: called with duplicated address "
"%s(%s)\n",
ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
- goto done;
+ goto err;
}
if ((ia->ia6_flags & IN6_IFF_TENTATIVE) == 0) {
log(LOG_ERR, "nd6_dad_timer: called with non-tentative address "
@@ -1352,19 +1412,17 @@
"%s(%s)\n",
ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
- goto done;
+ goto err;
}
- /* timeouted with IFF_{RUNNING,UP} check */
- if (dp->dad_ns_tcount > V_dad_maxtry) {
- nd6log((LOG_INFO, "%s: could not run DAD, driver problem?\n",
+ /* Stop DAD if the interface is down even after dad_maxtry attempts. */
+ if ((dp->dad_ns_tcount > V_dad_maxtry) &&
+ (((ifp->if_flags & IFF_UP) == 0) ||
+ ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0))) {
+ nd6log((LOG_INFO, "%s: could not run DAD "
+ "because the interface was down or not running.\n",
if_name(ifa->ifa_ifp)));
-
- TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list);
- free(dp, M_IP6NDP);
- dp = NULL;
- ifa_free(ifa);
- goto done;
+ goto err;
}
/* Need more checks? */
@@ -1372,84 +1430,85 @@
/*
* We have more NS to go. Send NS packet for DAD.
*/
- nd6_dad_ns_output(dp, ifa);
nd6_dad_starttimer(dp,
- (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000);
+ (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000, 1);
+ goto done;
} else {
/*
* We have transmitted sufficient number of DAD packets.
* See what we've got.
*/
- int duplicate;
-
- duplicate = 0;
-
- if (dp->dad_na_icount) {
+ if (dp->dad_ns_icount > 0 || dp->dad_na_icount > 0)
+ /* We've seen NS or NA, means DAD has failed. */
+ nd6_dad_duplicated(ifa, dp);
+ else if (V_dad_enhanced != 0 &&
+ dp->dad_ns_lcount > 0 &&
+ dp->dad_ns_lcount > dp->dad_loopbackprobe) {
/*
- * the check is in nd6_dad_na_input(),
- * but just in case
+ * Sec. 4.1 in RFC 7527 requires transmission of
+ * additional probes until the loopback condition
+ * becomes clear when a looped back probe is detected.
*/
- duplicate++;
- }
-
- if (dp->dad_ns_icount) {
- /* We've seen NS, means DAD has failed. */
- duplicate++;
- }
-
- if (duplicate) {
- /* (*dp) will be freed in nd6_dad_duplicated() */
- dp = NULL;
- nd6_dad_duplicated(ifa);
+ log(LOG_ERR, "%s: a looped back NS message is "
+ "detected during DAD for %s. "
+ "Another DAD probes are being sent.\n",
+ if_name(ifa->ifa_ifp),
+ ip6_sprintf(ip6buf, IFA_IN6(ifa)));
+ dp->dad_loopbackprobe = dp->dad_ns_lcount;
+ /*
+ * Send an NS immediately and increase dad_count by
+ * V_nd6_mmaxtries - 1.
+ */
+ dp->dad_count =
+ dp->dad_ns_ocount + V_nd6_mmaxtries - 1;
+ nd6_dad_starttimer(dp,
+ (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000,
+ 1);
+ goto done;
} else {
/*
* We are done with DAD. No NA came, no NS came.
- * No duplicate address found.
+ * No duplicate address found. Check IFDISABLED flag
+ * again in case that it is changed between the
+ * beginning of this function and here.
*/
- ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
+ if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) == 0)
+ ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
nd6log((LOG_DEBUG,
"%s: DAD complete for %s - no duplicates found\n",
if_name(ifa->ifa_ifp),
ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
-
- TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list);
- free(dp, M_IP6NDP);
- dp = NULL;
- ifa_free(ifa);
+ if (dp->dad_ns_lcount > 0)
+ log(LOG_ERR, "%s: DAD completed while "
+ "a looped back NS message is detected "
+ "during DAD for %s.\n",
+ if_name(ifa->ifa_ifp),
+ ip6_sprintf(ip6buf, IFA_IN6(ifa)));
}
}
-
+err:
+ nd6_dad_del(dp);
done:
- splx(s);
CURVNET_RESTORE();
}
-void
-nd6_dad_duplicated(struct ifaddr *ifa)
+static void
+nd6_dad_duplicated(struct ifaddr *ifa, struct dadq *dp)
{
struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
struct ifnet *ifp;
- struct dadq *dp;
char ip6buf[INET6_ADDRSTRLEN];
- dp = nd6_dad_find(ifa);
- if (dp == NULL) {
- log(LOG_ERR, "nd6_dad_duplicated: DAD structure not found\n");
- return;
- }
-
log(LOG_ERR, "%s: DAD detected duplicate IPv6 address %s: "
- "NS in/out=%d/%d, NA in=%d\n",
+ "NS in/out/loopback=%d/%d/%d, NA in=%d\n",
if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
- dp->dad_ns_icount, dp->dad_ns_ocount, dp->dad_na_icount);
+ dp->dad_ns_icount, dp->dad_ns_ocount, dp->dad_ns_lcount,
+ dp->dad_na_icount);
ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
ia->ia6_flags |= IN6_IFF_DUPLICATED;
- /* We are done with DAD, with duplicate address found. (failure) */
- nd6_dad_stoptimer(dp);
-
ifp = ifa->ifa_ifp;
log(LOG_ERR, "%s: DAD complete for %s - duplicate found\n",
if_name(ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr));
@@ -1490,18 +1549,14 @@
break;
}
}
-
- TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list);
- free(dp, M_IP6NDP);
- dp = NULL;
- ifa_free(ifa);
}
static void
-nd6_dad_ns_output(struct dadq *dp, struct ifaddr *ifa)
+nd6_dad_ns_output(struct dadq *dp)
{
- struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
- struct ifnet *ifp = ifa->ifa_ifp;
+ struct in6_ifaddr *ia = (struct in6_ifaddr *)dp->dad_ifa;
+ struct ifnet *ifp = dp->dad_ifa->ifa_ifp;
+ int i;
dp->dad_ns_tcount++;
if ((ifp->if_flags & IFF_UP) == 0) {
@@ -1512,17 +1567,29 @@
}
dp->dad_ns_ocount++;
- nd6_ns_output(ifp, NULL, &ia->ia_addr.sin6_addr, NULL, 1);
+ if (V_dad_enhanced != 0) {
+ for (i = 0; i < ND_OPT_NONCE_LEN32; i++)
+ dp->dad_nonce[i] = arc4random();
+ /*
+ * XXXHRS: Note that in the case that
+ * DupAddrDetectTransmits > 1, multiple NS messages with
+ * different nonces can be looped back in an unexpected
+ * order. The current implementation recognizes only
+ * the latest nonce on the sender side. Practically it
+ * should work well in almost all cases.
+ */
+ }
+ nd6_ns_output(ifp, NULL, &ia->ia_addr.sin6_addr, NULL,
+ (uint8_t *)&dp->dad_nonce[0]);
}
static void
-nd6_dad_ns_input(struct ifaddr *ifa)
+nd6_dad_ns_input(struct ifaddr *ifa, struct nd_opt_nonce *ndopt_nonce)
{
struct in6_ifaddr *ia;
struct ifnet *ifp;
const struct in6_addr *taddr6;
struct dadq *dp;
- int duplicate;
if (ifa == NULL)
panic("ifa == NULL in nd6_dad_ns_input");
@@ -1530,39 +1597,15 @@
ia = (struct in6_ifaddr *)ifa;
ifp = ifa->ifa_ifp;
taddr6 = &ia->ia_addr.sin6_addr;
- duplicate = 0;
- dp = nd6_dad_find(ifa);
-
- /* Quickhack - completely ignore DAD NS packets */
- if (V_dad_ignore_ns) {
- char ip6buf[INET6_ADDRSTRLEN];
- nd6log((LOG_INFO,
- "nd6_dad_ns_input: ignoring DAD NS packet for "
- "address %s(%s)\n", ip6_sprintf(ip6buf, taddr6),
- if_name(ifa->ifa_ifp)));
+ /* Ignore Nonce option when Enhanced DAD is disabled. */
+ if (V_dad_enhanced == 0)
+ ndopt_nonce = NULL;
+ dp = nd6_dad_find(ifa, ndopt_nonce);
+ if (dp == NULL)
return;
- }
- /*
- * if I'm yet to start DAD, someone else started using this address
- * first. I have a duplicate and you win.
- */
- if (dp == NULL || dp->dad_ns_ocount == 0)
- duplicate++;
-
- /* XXX more checks for loopback situation - see nd6_dad_timer too */
-
- if (duplicate) {
- dp = NULL; /* will be freed in nd6_dad_duplicated() */
- nd6_dad_duplicated(ifa);
- } else {
- /*
- * not sure if I got a duplicate.
- * increment ns count and see what happens.
- */
- if (dp)
- dp->dad_ns_icount++;
- }
+ dp->dad_ns_icount++;
+ nd6_dad_rele(dp);
}
static void
@@ -1573,10 +1616,9 @@
if (ifa == NULL)
panic("ifa == NULL in nd6_dad_na_input");
- dp = nd6_dad_find(ifa);
- if (dp)
+ dp = nd6_dad_find(ifa, NULL);
+ if (dp != NULL) {
dp->dad_na_icount++;
-
- /* remove the address. */
- nd6_dad_duplicated(ifa);
+ nd6_dad_rele(dp);
+ }
}
Modified: trunk/sys/netinet6/nd6_rtr.c
===================================================================
--- trunk/sys/netinet6/nd6_rtr.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/nd6_rtr.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/nd6_rtr.c 244524 2012-12-21 00:41:52Z delphij $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/nd6_rtr.c 303458 2016-07-28 20:08:01Z sbruno $");
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -40,6 +40,7 @@
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
+#include <sys/refcount.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/time.h>
@@ -219,6 +220,8 @@
struct nd_defrouter *dr;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
+ dr = NULL;
+
/*
* We only accept RAs only when the per-interface flag
* ND6_IFF_ACCEPT_RTADV is on the receiving interface.
@@ -271,7 +274,7 @@
bzero(&dr0, sizeof(dr0));
dr0.rtaddr = saddr6;
- dr0.flags = nd_ra->nd_ra_flags_reserved;
+ dr0.raflags = nd_ra->nd_ra_flags_reserved;
/*
* Effectively-disable routes from RA messages when
* ND6_IFF_NO_RADR enabled on the receiving interface or
@@ -283,7 +286,7 @@
dr0.rtlifetime = 0;
else
dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime);
- dr0.expire = time_second + dr0.rtlifetime;
+ dr0.expire = time_uptime + dr0.rtlifetime;
dr0.ifp = ifp;
/* unspecified or not? (RFC 2461 6.3.4) */
if (advreachable) {
@@ -297,8 +300,16 @@
}
if (nd_ra->nd_ra_retransmit)
ndi->retrans = ntohl(nd_ra->nd_ra_retransmit);
- if (nd_ra->nd_ra_curhoplimit)
- ndi->chlim = nd_ra->nd_ra_curhoplimit;
+ if (nd_ra->nd_ra_curhoplimit) {
+ if (ndi->chlim < nd_ra->nd_ra_curhoplimit)
+ ndi->chlim = nd_ra->nd_ra_curhoplimit;
+ else if (ndi->chlim != nd_ra->nd_ra_curhoplimit) {
+ log(LOG_ERR, "RA with a lower CurHopLimit sent from "
+ "%s on %s (current = %d, received = %d). "
+ "Ignored.\n", ip6_sprintf(ip6bufs, &ip6->ip6_src),
+ if_name(ifp), ndi->chlim, nd_ra->nd_ra_curhoplimit);
+ }
+ }
dr = defrtrlist_update(&dr0);
}
@@ -360,6 +371,10 @@
(void)prelist_update(&pr, dr, m, mcast);
}
}
+ if (dr != NULL) {
+ defrouter_rele(dr);
+ dr = NULL;
+ }
/*
* MTU
@@ -437,10 +452,6 @@
m_freem(m);
}
-/*
- * default router list proccessing sub routines
- */
-
/* tell the change to user processes watching the routing socket. */
static void
nd6_rtmsg(int cmd, struct rtentry *rt)
@@ -469,12 +480,15 @@
ifa_free(ifa);
}
+/*
+ * default router list proccessing sub routines
+ */
+
static void
defrouter_addreq(struct nd_defrouter *new)
{
struct sockaddr_in6 def, mask, gate;
struct rtentry *newrt = NULL;
- int s;
int error;
bzero(&def, sizeof(def));
@@ -486,7 +500,6 @@
def.sin6_family = gate.sin6_family = AF_INET6;
gate.sin6_addr = new->rtaddr;
- s = splnet();
error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&def,
(struct sockaddr *)&gate, (struct sockaddr *)&mask,
RTF_GATEWAY, &newrt, RT_DEFAULT_FIB);
@@ -496,23 +509,48 @@
}
if (error == 0)
new->installed = 1;
- splx(s);
- return;
}
struct nd_defrouter *
-defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp)
+defrouter_lookup_locked(struct in6_addr *addr, struct ifnet *ifp)
{
struct nd_defrouter *dr;
- TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
- if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr))
+ ND6_LOCK_ASSERT();
+ TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry)
+ if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr)) {
+ defrouter_ref(dr);
return (dr);
- }
+ }
+ return (NULL);
+}
- return (NULL); /* search failed */
+struct nd_defrouter *
+defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp)
+{
+ struct nd_defrouter *dr;
+
+ ND6_RLOCK();
+ dr = defrouter_lookup_locked(addr, ifp);
+ ND6_RUNLOCK();
+ return (dr);
}
+void
+defrouter_ref(struct nd_defrouter *dr)
+{
+
+ refcount_acquire(&dr->refcnt);
+}
+
+void
+defrouter_rele(struct nd_defrouter *dr)
+{
+
+ if (refcount_release(&dr->refcnt))
+ free(dr, M_IP6NDP);
+}
+
/*
* Remove the default route for a given router.
* This is just a subroutine function for defrouter_select(), and should
@@ -545,16 +583,42 @@
}
/*
- * remove all default routes from default router list
+ * Remove all default routes from default router list.
*/
void
defrouter_reset(void)
{
- struct nd_defrouter *dr;
+ struct nd_defrouter *dr, **dra;
+ int count, i;
+ count = i = 0;
+
+ /*
+ * We can't delete routes with the ND lock held, so make a copy of the
+ * current default router list and use that when deleting routes.
+ */
+ ND6_RLOCK();
TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry)
- defrouter_delreq(dr);
+ count++;
+ ND6_RUNLOCK();
+ dra = malloc(count * sizeof(*dra), M_TEMP, M_WAITOK | M_ZERO);
+
+ ND6_RLOCK();
+ TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
+ if (i == count)
+ break;
+ defrouter_ref(dr);
+ dra[i++] = dr;
+ }
+ ND6_RUNLOCK();
+
+ for (i = 0; i < count && dra[i] != NULL; i++) {
+ defrouter_delreq(dra[i]);
+ defrouter_rele(dra[i]);
+ }
+ free(dra, M_TEMP);
+
/*
* XXX should we also nuke any default routers in the kernel, by
* going through them by rtalloc1()?
@@ -561,12 +625,53 @@
*/
}
+/*
+ * Look up a matching default router list entry and remove it. Returns true if a
+ * matching entry was found, false otherwise.
+ */
+bool
+defrouter_remove(struct in6_addr *addr, struct ifnet *ifp)
+{
+ struct nd_defrouter *dr;
+
+ ND6_WLOCK();
+ dr = defrouter_lookup_locked(addr, ifp);
+ if (dr == NULL) {
+ ND6_WUNLOCK();
+ return (false);
+ }
+
+ defrouter_unlink(dr, NULL);
+ ND6_WUNLOCK();
+ defrouter_del(dr);
+ defrouter_rele(dr);
+ return (true);
+}
+
+/*
+ * Remove a router from the global list and optionally stash it in a
+ * caller-supplied queue.
+ *
+ * The ND lock must be held.
+ */
void
-defrtrlist_del(struct nd_defrouter *dr)
+defrouter_unlink(struct nd_defrouter *dr, struct nd_drhead *drq)
{
+
+ ND6_WLOCK_ASSERT();
+ TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
+ if (drq != NULL)
+ TAILQ_INSERT_TAIL(drq, dr, dr_entry);
+}
+
+void
+defrouter_del(struct nd_defrouter *dr)
+{
struct nd_defrouter *deldr = NULL;
struct nd_prefix *pr;
+ ND6_UNLOCK_ASSERT();
+
/*
* Flush all the routing table entries that use the router
* as a next hop.
@@ -578,7 +683,6 @@
deldr = dr;
defrouter_delreq(dr);
}
- TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
/*
* Also delete all the pointers to the router in each prefix lists.
@@ -598,7 +702,10 @@
if (deldr)
defrouter_select();
- free(dr, M_IP6NDP);
+ /*
+ * Release the list reference.
+ */
+ defrouter_rele(dr);
}
/*
@@ -625,16 +732,16 @@
void
defrouter_select(void)
{
- int s = splnet();
- struct nd_defrouter *dr, *selected_dr = NULL, *installed_dr = NULL;
+ struct nd_defrouter *dr, *selected_dr, *installed_dr;
struct llentry *ln = NULL;
+ ND6_RLOCK();
/*
* Let's handle easy case (3) first:
* If default router list is empty, there's nothing to be done.
*/
if (TAILQ_EMPTY(&V_nd_defrouter)) {
- splx(s);
+ ND6_RUNLOCK();
return;
}
@@ -643,6 +750,7 @@
* We just pick up the first reachable one (if any), assuming that
* the ordering rule of the list described in defrtrlist_update().
*/
+ selected_dr = installed_dr = NULL;
TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
IF_AFDATA_RLOCK(dr->ifp);
if (selected_dr == NULL &&
@@ -649,6 +757,7 @@
(ln = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) &&
ND6_IS_LLINFO_PROBREACH(ln)) {
selected_dr = dr;
+ defrouter_ref(selected_dr);
}
IF_AFDATA_RUNLOCK(dr->ifp);
if (ln != NULL) {
@@ -656,12 +765,15 @@
ln = NULL;
}
- if (dr->installed && installed_dr == NULL)
- installed_dr = dr;
- else if (dr->installed && installed_dr) {
- /* this should not happen. warn for diagnosis. */
- log(LOG_ERR, "defrouter_select: more than one router"
- " is installed\n");
+ if (dr->installed) {
+ if (installed_dr == NULL) {
+ installed_dr = dr;
+ defrouter_ref(installed_dr);
+ } else {
+ /* this should not happen. warn for diagnosis. */
+ log(LOG_ERR,
+ "defrouter_select: more than one router is installed\n");
+ }
}
}
/*
@@ -673,15 +785,18 @@
* or when the new one has a really higher preference value.
*/
if (selected_dr == NULL) {
- if (installed_dr == NULL || !TAILQ_NEXT(installed_dr, dr_entry))
+ if (installed_dr == NULL ||
+ TAILQ_NEXT(installed_dr, dr_entry) == NULL)
selected_dr = TAILQ_FIRST(&V_nd_defrouter);
else
selected_dr = TAILQ_NEXT(installed_dr, dr_entry);
- } else if (installed_dr) {
+ defrouter_ref(selected_dr);
+ } else if (installed_dr != NULL) {
IF_AFDATA_RLOCK(installed_dr->ifp);
if ((ln = nd6_lookup(&installed_dr->rtaddr, 0, installed_dr->ifp)) &&
ND6_IS_LLINFO_PROBREACH(ln) &&
rtpref(selected_dr) <= rtpref(installed_dr)) {
+ defrouter_rele(selected_dr);
selected_dr = installed_dr;
}
IF_AFDATA_RUNLOCK(installed_dr->ifp);
@@ -688,6 +803,7 @@
if (ln != NULL)
LLE_RUNLOCK(ln);
}
+ ND6_RUNLOCK();
/*
* If the selected router is different than the installed one,
@@ -695,13 +811,13 @@
* Note that the selected router is never NULL here.
*/
if (installed_dr != selected_dr) {
- if (installed_dr)
+ if (installed_dr != NULL) {
defrouter_delreq(installed_dr);
+ defrouter_rele(installed_dr);
+ }
defrouter_addreq(selected_dr);
}
-
- splx(s);
- return;
+ defrouter_rele(selected_dr);
}
/*
@@ -711,7 +827,7 @@
static int
rtpref(struct nd_defrouter *dr)
{
- switch (dr->flags & ND_RA_FLAG_RTPREF_MASK) {
+ switch (dr->raflags & ND_RA_FLAG_RTPREF_MASK) {
case ND_RA_FLAG_RTPREF_HIGH:
return (RTPREF_HIGH);
case ND_RA_FLAG_RTPREF_MEDIUM:
@@ -725,7 +841,7 @@
* serious bug of kernel internal. We thus always bark here.
* Or, can we even panic?
*/
- log(LOG_ERR, "rtpref: impossible RA flag %x\n", dr->flags);
+ log(LOG_ERR, "rtpref: impossible RA flag %x\n", dr->raflags);
return (RTPREF_INVALID);
}
/* NOTREACHED */
@@ -735,63 +851,50 @@
defrtrlist_update(struct nd_defrouter *new)
{
struct nd_defrouter *dr, *n;
- int s = splnet();
+ int oldpref;
- if ((dr = defrouter_lookup(&new->rtaddr, new->ifp)) != NULL) {
- /* entry exists */
- if (new->rtlifetime == 0) {
- defrtrlist_del(dr);
- dr = NULL;
- } else {
- int oldpref = rtpref(dr);
+ if (new->rtlifetime == 0) {
+ defrouter_remove(&new->rtaddr, new->ifp);
+ return (NULL);
+ }
- /* override */
- dr->flags = new->flags; /* xxx flag check */
- dr->rtlifetime = new->rtlifetime;
- dr->expire = new->expire;
+ ND6_WLOCK();
+ dr = defrouter_lookup_locked(&new->rtaddr, new->ifp);
+ if (dr != NULL) {
+ oldpref = rtpref(dr);
- /*
- * If the preference does not change, there's no need
- * to sort the entries. Also make sure the selected
- * router is still installed in the kernel.
- */
- if (dr->installed && rtpref(new) == oldpref) {
- splx(s);
- return (dr);
- }
+ /* override */
+ dr->raflags = new->raflags; /* XXX flag check */
+ dr->rtlifetime = new->rtlifetime;
+ dr->expire = new->expire;
- /*
- * preferred router may be changed, so relocate
- * this router.
- * XXX: calling TAILQ_REMOVE directly is a bad manner.
- * However, since defrtrlist_del() has many side
- * effects, we intentionally do so here.
- * defrouter_select() below will handle routing
- * changes later.
- */
- TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
- n = dr;
- goto insert;
+ /*
+ * If the preference does not change, there's no need
+ * to sort the entries. Also make sure the selected
+ * router is still installed in the kernel.
+ */
+ if (dr->installed && rtpref(new) == oldpref) {
+ ND6_WUNLOCK();
+ return (dr);
}
- splx(s);
- return (dr);
- }
- /* entry does not exist */
- if (new->rtlifetime == 0) {
- splx(s);
- return (NULL);
+ /*
+ * The preferred router may have changed, so relocate this
+ * router.
+ */
+ TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
+ n = dr;
+ } else {
+ n = malloc(sizeof(*n), M_IP6NDP, M_NOWAIT | M_ZERO);
+ if (n == NULL) {
+ ND6_WUNLOCK();
+ return (NULL);
+ }
+ memcpy(n, new, sizeof(*n));
+ /* Initialize with an extra reference for the caller. */
+ refcount_init(&n->refcnt, 2);
}
- n = (struct nd_defrouter *)malloc(sizeof(*n), M_IP6NDP, M_NOWAIT);
- if (n == NULL) {
- splx(s);
- return (NULL);
- }
- bzero(n, sizeof(*n));
- *n = *new;
-
-insert:
/*
* Insert the new router in the Default Router List;
* The Default Router List should be in the descending order
@@ -804,15 +907,14 @@
if (rtpref(n) > rtpref(dr))
break;
}
- if (dr)
+ if (dr != NULL)
TAILQ_INSERT_BEFORE(dr, n, dr_entry);
else
TAILQ_INSERT_TAIL(&V_nd_defrouter, n, dr_entry);
+ ND6_WUNLOCK();
defrouter_select();
- splx(s);
-
return (n);
}
@@ -834,11 +936,11 @@
{
struct nd_pfxrouter *new;
- new = (struct nd_pfxrouter *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT);
+ new = malloc(sizeof(*new), M_IP6NDP, M_NOWAIT | M_ZERO);
if (new == NULL)
return;
- bzero(new, sizeof(*new));
new->router = dr;
+ defrouter_ref(dr);
LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry);
@@ -848,7 +950,9 @@
static void
pfxrtr_del(struct nd_pfxrouter *pfr)
{
+
LIST_REMOVE(pfr, pfr_entry);
+ defrouter_rele(pfr->router);
free(pfr, M_IP6NDP);
}
@@ -875,13 +979,12 @@
{
struct nd_prefix *new = NULL;
int error = 0;
- int i, s;
+ int i;
char ip6buf[INET6_ADDRSTRLEN];
- new = (struct nd_prefix *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT);
+ new = malloc(sizeof(*new), M_IP6NDP, M_NOWAIT | M_ZERO);
if (new == NULL)
- return(ENOMEM);
- bzero(new, sizeof(*new));
+ return (ENOMEM);
new->ndpr_ifp = pr->ndpr_ifp;
new->ndpr_prefix = pr->ndpr_prefix;
new->ndpr_plen = pr->ndpr_plen;
@@ -890,11 +993,9 @@
new->ndpr_flags = pr->ndpr_flags;
if ((error = in6_init_prefix_ltimes(new)) != 0) {
free(new, M_IP6NDP);
- return(error);
+ return (error);
}
- new->ndpr_lastupdate = time_second;
- if (newp != NULL)
- *newp = new;
+ new->ndpr_lastupdate = time_uptime;
/* initialization */
LIST_INIT(&new->ndpr_advrtrs);
@@ -904,10 +1005,8 @@
new->ndpr_prefix.sin6_addr.s6_addr32[i] &=
new->ndpr_mask.s6_addr32[i];
- s = splnet();
/* link ndpr_entry to nd_prefix list */
LIST_INSERT_HEAD(&V_nd_prefix, new, ndpr_entry);
- splx(s);
/* ND_OPT_PI_FLAG_ONLINK processing */
if (new->ndpr_raf_onlink) {
@@ -922,10 +1021,11 @@
}
}
- if (dr)
+ if (dr != NULL)
pfxrtr_add(new, dr);
-
- return 0;
+ if (newp != NULL)
+ *newp = new;
+ return (0);
}
void
@@ -932,7 +1032,7 @@
prelist_remove(struct nd_prefix *pr)
{
struct nd_pfxrouter *pfr, *next;
- int e, s;
+ int e;
char ip6buf[INET6_ADDRSTRLEN];
/* make sure to invalidate the prefix until it is really freed. */
@@ -957,17 +1057,13 @@
if (pr->ndpr_refcnt > 0)
return; /* notice here? */
- s = splnet();
-
/* unlink ndpr_entry from nd_prefix list */
LIST_REMOVE(pr, ndpr_entry);
- /* free list of routers that adversed the prefix */
+ /* free list of routers that advertised the prefix */
LIST_FOREACH_SAFE(pfr, &pr->ndpr_advrtrs, pfr_entry, next) {
- free(pfr, M_IP6NDP);
+ pfxrtr_del(pfr);
}
- splx(s);
-
free(pr, M_IP6NDP);
pfxlist_onlink_check();
@@ -985,9 +1081,7 @@
struct ifaddr *ifa;
struct ifnet *ifp = new->ndpr_ifp;
struct nd_prefix *pr;
- int s = splnet();
int error = 0;
- int newprefix = 0;
int auth;
struct in6_addrlifetime lt6_tmp;
char ip6buf[INET6_ADDRSTRLEN];
@@ -1023,7 +1117,7 @@
pr->ndpr_vltime = new->ndpr_vltime;
pr->ndpr_pltime = new->ndpr_pltime;
(void)in6_init_prefix_ltimes(pr); /* XXX error case? */
- pr->ndpr_lastupdate = time_second;
+ pr->ndpr_lastupdate = time_uptime;
}
if (new->ndpr_raf_onlink &&
@@ -1045,23 +1139,17 @@
if (dr && pfxrtr_lookup(pr, dr) == NULL)
pfxrtr_add(pr, dr);
} else {
- struct nd_prefix *newpr = NULL;
-
- newprefix = 1;
-
if (new->ndpr_vltime == 0)
goto end;
if (new->ndpr_raf_onlink == 0 && new->ndpr_raf_auto == 0)
goto end;
- error = nd6_prelist_add(new, dr, &newpr);
- if (error != 0 || newpr == NULL) {
+ error = nd6_prelist_add(new, dr, &pr);
+ if (error != 0) {
nd6log((LOG_NOTICE, "prelist_update: "
- "nd6_prelist_add failed for %s/%d on %s "
- "errno=%d, returnpr=%p\n",
+ "nd6_prelist_add failed for %s/%d on %s errno=%d\n",
ip6_sprintf(ip6buf, &new->ndpr_prefix.sin6_addr),
- new->ndpr_plen, if_name(new->ndpr_ifp),
- error, newpr));
+ new->ndpr_plen, if_name(new->ndpr_ifp), error));
goto end; /* we should just give up in this case. */
}
@@ -1072,13 +1160,11 @@
* addresses. Thus, we explicitly make sure that the prefix
* itself expires now.
*/
- if (newpr->ndpr_raf_onlink == 0) {
- newpr->ndpr_vltime = 0;
- newpr->ndpr_pltime = 0;
- in6_init_prefix_ltimes(newpr);
+ if (pr->ndpr_raf_onlink == 0) {
+ pr->ndpr_vltime = 0;
+ pr->ndpr_pltime = 0;
+ in6_init_prefix_ltimes(pr);
}
-
- pr = newpr;
}
/*
@@ -1161,7 +1247,7 @@
if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME)
remaininglifetime = ND6_INFINITE_LIFETIME;
- else if (time_second - ifa6->ia6_updatetime >
+ else if (time_uptime - ifa6->ia6_updatetime >
lt6_tmp.ia6t_vltime) {
/*
* The case of "invalid" address. We should usually
@@ -1170,7 +1256,7 @@
remaininglifetime = 0;
} else
remaininglifetime = lt6_tmp.ia6t_vltime -
- (time_second - ifa6->ia6_updatetime);
+ (time_uptime - ifa6->ia6_updatetime);
/* when not updating, keep the current stored lifetime. */
lt6_tmp.ia6t_vltime = remaininglifetime;
@@ -1206,18 +1292,18 @@
u_int32_t maxvltime, maxpltime;
if (V_ip6_temp_valid_lifetime >
- (u_int32_t)((time_second - ifa6->ia6_createtime) +
+ (u_int32_t)((time_uptime - ifa6->ia6_createtime) +
V_ip6_desync_factor)) {
maxvltime = V_ip6_temp_valid_lifetime -
- (time_second - ifa6->ia6_createtime) -
+ (time_uptime - ifa6->ia6_createtime) -
V_ip6_desync_factor;
} else
maxvltime = 0;
if (V_ip6_temp_preferred_lifetime >
- (u_int32_t)((time_second - ifa6->ia6_createtime) +
+ (u_int32_t)((time_uptime - ifa6->ia6_createtime) +
V_ip6_desync_factor)) {
maxpltime = V_ip6_temp_preferred_lifetime -
- (time_second - ifa6->ia6_createtime) -
+ (time_uptime - ifa6->ia6_createtime) -
V_ip6_desync_factor;
} else
maxpltime = 0;
@@ -1232,7 +1318,7 @@
}
}
ifa6->ia6_lifetime = lt6_tmp;
- ifa6->ia6_updatetime = time_second;
+ ifa6->ia6_updatetime = time_uptime;
}
IF_ADDR_RUNLOCK(ifp);
if (ia6_match == NULL && new->ndpr_vltime) {
@@ -1310,7 +1396,6 @@
}
end:
- splx(s);
return error;
}
@@ -1375,6 +1460,7 @@
* that does not advertise any prefixes.
*/
if (pr == NULL) {
+ ND6_RLOCK();
TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
struct nd_prefix *pr0;
@@ -1385,6 +1471,7 @@
if (pfxrtr != NULL)
break;
}
+ ND6_RUNLOCK();
}
if (pr != NULL || (!TAILQ_EMPTY(&V_nd_defrouter) && pfxrtr == NULL)) {
/*
@@ -1746,6 +1833,7 @@
}
}
error = a_failure;
+ a_failure = 1;
if (error == 0) {
pr->ndpr_stateflags &= ~NDPRF_ONLINK;
@@ -1784,7 +1872,8 @@
&opr->ndpr_prefix.sin6_addr),
opr->ndpr_plen, if_name(ifp),
if_name(opr->ndpr_ifp), e));
- }
+ } else
+ a_failure = 0;
}
}
} else {
@@ -1796,6 +1885,10 @@
if_name(ifp), error));
}
+ if (a_failure)
+ lltable_prefix_free(AF_INET6, (struct sockaddr *)&sa6,
+ (struct sockaddr *)&mask6, LLE_STATIC);
+
return (error);
}
@@ -2008,7 +2101,7 @@
if (ia0->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
vltime0 = IFA6_IS_INVALID(ia0) ? 0 :
(ia0->ia6_lifetime.ia6t_vltime -
- (time_second - ia0->ia6_updatetime));
+ (time_uptime - ia0->ia6_updatetime));
if (vltime0 > V_ip6_temp_valid_lifetime)
vltime0 = V_ip6_temp_valid_lifetime;
} else
@@ -2016,7 +2109,7 @@
if (ia0->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
pltime0 = IFA6_IS_DEPRECATED(ia0) ? 0 :
(ia0->ia6_lifetime.ia6t_pltime -
- (time_second - ia0->ia6_updatetime));
+ (time_uptime - ia0->ia6_updatetime));
if (pltime0 > V_ip6_temp_preferred_lifetime - V_ip6_desync_factor){
pltime0 = V_ip6_temp_preferred_lifetime -
V_ip6_desync_factor;
@@ -2074,11 +2167,11 @@
if (ndpr->ndpr_pltime == ND6_INFINITE_LIFETIME)
ndpr->ndpr_preferred = 0;
else
- ndpr->ndpr_preferred = time_second + ndpr->ndpr_pltime;
+ ndpr->ndpr_preferred = time_uptime + ndpr->ndpr_pltime;
if (ndpr->ndpr_vltime == ND6_INFINITE_LIFETIME)
ndpr->ndpr_expire = 0;
else
- ndpr->ndpr_expire = time_second + ndpr->ndpr_vltime;
+ ndpr->ndpr_expire = time_uptime + ndpr->ndpr_vltime;
return 0;
}
@@ -2090,7 +2183,7 @@
if (lt6->ia6t_vltime == ND6_INFINITE_LIFETIME)
lt6->ia6t_expire = 0;
else {
- lt6->ia6t_expire = time_second;
+ lt6->ia6t_expire = time_uptime;
lt6->ia6t_expire += lt6->ia6t_vltime;
}
@@ -2098,7 +2191,7 @@
if (lt6->ia6t_pltime == ND6_INFINITE_LIFETIME)
lt6->ia6t_preferred = 0;
else {
- lt6->ia6t_preferred = time_second;
+ lt6->ia6t_preferred = time_uptime;
lt6->ia6t_preferred += lt6->ia6t_pltime;
}
}
@@ -2113,13 +2206,10 @@
{
struct radix_node_head *rnh;
u_int fibnum;
- int s = splnet();
/* We'll care only link-local addresses */
- if (!IN6_IS_ADDR_LINKLOCAL(gateway)) {
- splx(s);
+ if (!IN6_IS_ADDR_LINKLOCAL(gateway))
return;
- }
/* XXX Do we really need to walk any but the default FIB? */
for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
@@ -2131,7 +2221,6 @@
rnh->rnh_walktree(rnh, rt6_deleteroute, (void *)gateway);
RADIX_NODE_HEAD_UNLOCK(rnh);
}
- splx(s);
}
static int
Modified: trunk/sys/netinet6/pim6.h
===================================================================
--- trunk/sys/netinet6/pim6.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/pim6.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* $KAME: pim6.h,v 1.3 2000/03/25 07:23:58 sumikawa Exp $
- * $FreeBSD: stable/9/sys/netinet6/pim6.h 174510 2007-12-10 16:03:40Z obrien $
+ * $FreeBSD: stable/10/sys/netinet6/pim6.h 174510 2007-12-10 16:03:40Z obrien $
*/
/*
* Protocol Independent Multicast (PIM) definitions
Modified: trunk/sys/netinet6/pim6_var.h
===================================================================
--- trunk/sys/netinet6/pim6_var.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/pim6_var.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* $KAME: pim6_var.h,v 1.8 2000/06/06 08:07:43 jinmei Exp $
- * $FreeBSD: stable/9/sys/netinet6/pim6_var.h 244524 2012-12-21 00:41:52Z delphij $
+ * $FreeBSD: stable/10/sys/netinet6/pim6_var.h 254925 2013-08-26 18:16:05Z jhb $
*/
/*
@@ -43,13 +43,13 @@
#define _NETINET6_PIM6_VAR_H_
struct pim6stat {
- u_quad_t pim6s_rcv_total; /* total PIM messages received */
- u_quad_t pim6s_rcv_tooshort; /* received with too few bytes */
- u_quad_t pim6s_rcv_badsum; /* received with bad checksum */
- u_quad_t pim6s_rcv_badversion; /* received bad PIM version */
- u_quad_t pim6s_rcv_registers; /* received registers */
- u_quad_t pim6s_rcv_badregisters; /* received invalid registers */
- u_quad_t pim6s_snd_registers; /* sent registers */
+ uint64_t pim6s_rcv_total; /* total PIM messages received */
+ uint64_t pim6s_rcv_tooshort; /* received with too few bytes */
+ uint64_t pim6s_rcv_badsum; /* received with bad checksum */
+ uint64_t pim6s_rcv_badversion; /* received bad PIM version */
+ uint64_t pim6s_rcv_registers; /* received registers */
+ uint64_t pim6s_rcv_badregisters; /* received invalid registers */
+ uint64_t pim6s_snd_registers; /* sent registers */
};
#if (defined(KERNEL)) || (defined(_KERNEL))
@@ -62,8 +62,4 @@
#define PIM6CTL_STATS 1 /* statistics (read-only) */
#define PIM6CTL_MAXID 2
-#define PIM6CTL_NAMES { \
- { 0, 0 }, \
- { 0, 0 }, \
-}
#endif /* _NETINET6_PIM6_VAR_H_ */
Modified: trunk/sys/netinet6/raw_ip6.c
===================================================================
--- trunk/sys/netinet6/raw_ip6.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/raw_ip6.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -61,7 +61,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/raw_ip6.c 248549 2013-03-20 09:12:28Z ae $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/raw_ip6.c 329158 2018-02-12 13:52:58Z ae $");
#include "opt_ipsec.h"
#include "opt_inet6.h"
@@ -69,6 +69,7 @@
#include <sys/param.h>
#include <sys/errno.h>
#include <sys/jail.h>
+#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
@@ -125,8 +126,13 @@
extern u_long rip_sendspace;
extern u_long rip_recvspace;
-VNET_DEFINE(struct rip6stat, rip6stat);
+VNET_PCPUSTAT_DEFINE(struct rip6stat, rip6stat);
+VNET_PCPUSTAT_SYSINIT(rip6stat);
+#ifdef VIMAGE
+VNET_PCPUSTAT_SYSUNINIT(rip6stat);
+#endif /* VIMAGE */
+
/*
* Hooks for multicast routing. They all default to NULL, so leave them not
* initialized and rely on BSS being set to 0.
@@ -161,7 +167,7 @@
struct mbuf *opts = NULL;
struct sockaddr_in6 fromsa;
- V_rip6stat.rip6s_ipackets++;
+ RIP6STAT_INC(rip6s_ipackets);
if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) {
/* XXX Send icmp6 host/port unreach? */
@@ -200,11 +206,11 @@
}
INP_RLOCK(in6p);
if (in6p->in6p_cksum != -1) {
- V_rip6stat.rip6s_isum++;
+ RIP6STAT_INC(rip6s_isum);
if (in6_cksum(m, proto, *offp,
m->m_pkthdr.len - *offp)) {
INP_RUNLOCK(in6p);
- V_rip6stat.rip6s_badsum++;
+ RIP6STAT_INC(rip6s_badsum);
continue;
}
}
@@ -264,7 +270,7 @@
*/
if (n && ipsec6_in_reject(n, last)) {
m_freem(n);
- V_ipsec6stat.in_polvio++;
+ IPSEC6STAT_INC(ips_in_polvio);
/* Do not inject data into pcb. */
} else
#endif /* IPSEC */
@@ -280,7 +286,7 @@
m_freem(n);
if (opts)
m_freem(opts);
- V_rip6stat.rip6s_fullsock++;
+ RIP6STAT_INC(rip6s_fullsock);
} else
sorwakeup(last->inp_socket);
opts = NULL;
@@ -296,7 +302,7 @@
*/
if ((last != NULL) && ipsec6_in_reject(m, last)) {
m_freem(m);
- V_ipsec6stat.in_polvio++;
+ IPSEC6STAT_INC(ips_in_polvio);
IP6STAT_DEC(ip6s_delivered);
/* Do not inject data into pcb. */
INP_RUNLOCK(last);
@@ -313,22 +319,20 @@
m_freem(m);
if (opts)
m_freem(opts);
- V_rip6stat.rip6s_fullsock++;
+ RIP6STAT_INC(rip6s_fullsock);
} else
sorwakeup(last->inp_socket);
INP_RUNLOCK(last);
} else {
- V_rip6stat.rip6s_nosock++;
+ RIP6STAT_INC(rip6s_nosock);
if (m->m_flags & M_MCAST)
- V_rip6stat.rip6s_nosockmcast++;
+ RIP6STAT_INC(rip6s_nosockmcast);
if (proto == IPPROTO_NONE)
m_freem(m);
- else {
- char *prvnxtp = ip6_get_prevhdr(m, *offp); /* XXX */
+ else
icmp6_error(m, ICMP6_PARAM_PROB,
ICMP6_PARAMPROB_NEXTHEADER,
- prvnxtp - mtod(m, char *));
- }
+ ip6_get_prevhdr(m, *offp));
IP6STAT_DEC(ip6s_delivered);
}
return (IPPROTO_DONE);
@@ -460,7 +464,7 @@
code = icmp6->icmp6_code;
}
- M_PREPEND(m, sizeof(*ip6), M_DONTWAIT);
+ M_PREPEND(m, sizeof(*ip6), M_NOWAIT);
if (m == NULL) {
error = ENOBUFS;
goto bad;
@@ -560,7 +564,7 @@
icmp6_ifoutstat_inc(oifp, type, code);
ICMP6STAT_INC(icp6s_outhist[type]);
} else
- V_rip6stat.rip6s_opackets++;
+ RIP6STAT_INC(rip6s_opackets);
goto freectl;
Modified: trunk/sys/netinet6/raw_ip6.h
===================================================================
--- trunk/sys/netinet6/raw_ip6.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/raw_ip6.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* $KAME: raw_ip6.h,v 1.2 2001/05/27 13:28:35 itojun Exp $
- * $FreeBSD: stable/9/sys/netinet6/raw_ip6.h 195727 2009-07-16 21:13:04Z rwatson $
+ * $FreeBSD: stable/10/sys/netinet6/raw_ip6.h 253085 2013-07-09 09:54:54Z ae $
*/
#ifndef _NETINET6_RAW_IP6_H_
@@ -38,19 +38,23 @@
* ICMPv6 stat is counted separately. see netinet/icmp6.h
*/
struct rip6stat {
- u_quad_t rip6s_ipackets; /* total input packets */
- u_quad_t rip6s_isum; /* input checksum computations */
- u_quad_t rip6s_badsum; /* of above, checksum error */
- u_quad_t rip6s_nosock; /* no matching socket */
- u_quad_t rip6s_nosockmcast; /* of above, arrived as multicast */
- u_quad_t rip6s_fullsock; /* not delivered, input socket full */
+ uint64_t rip6s_ipackets; /* total input packets */
+ uint64_t rip6s_isum; /* input checksum computations */
+ uint64_t rip6s_badsum; /* of above, checksum error */
+ uint64_t rip6s_nosock; /* no matching socket */
+ uint64_t rip6s_nosockmcast; /* of above, arrived as multicast */
+ uint64_t rip6s_fullsock; /* not delivered, input socket full */
- u_quad_t rip6s_opackets; /* total output packets */
+ uint64_t rip6s_opackets; /* total output packets */
};
#ifdef _KERNEL
-VNET_DECLARE(struct rip6stat, rip6stat);
-#define V_rip6stat VNET(rip6stat)
-#endif
+#include <sys/counter.h>
+VNET_PCPUSTAT_DECLARE(struct rip6stat, rip6stat);
+#define RIP6STAT_ADD(name, val) \
+ VNET_PCPUSTAT_ADD(struct rip6stat, rip6stat, name, (val))
+#define RIP6STAT_INC(name) RIP6STAT_ADD(name, 1)
+#endif /* _KERNEL */
+
#endif
Modified: trunk/sys/netinet6/route6.c
===================================================================
--- trunk/sys/netinet6/route6.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/route6.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/route6.c 238231 2012-07-08 11:28:33Z bz $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/route6.c 249294 2013-04-09 07:11:22Z ae $");
#include "opt_inet.h"
#include "opt_inet6.h"
Modified: trunk/sys/netinet6/scope6.c
===================================================================
--- trunk/sys/netinet6/scope6.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/scope6.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -31,14 +31,16 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/scope6.c 243382 2012-11-22 00:22:54Z ae $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/scope6.c 299145 2016-05-05 23:06:39Z markj $");
#include <sys/param.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
+#include <sys/sockio.h>
#include <sys/systm.h>
#include <sys/queue.h>
+#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <net/if.h>
@@ -56,6 +58,11 @@
#else
VNET_DEFINE(int, ip6_use_defzone) = 0;
#endif
+VNET_DEFINE(int, deembed_scopeid) = 1;
+SYSCTL_DECL(_net_inet6_ip6);
+SYSCTL_VNET_INT(_net_inet6_ip6, OID_AUTO, deembed_scopeid, CTLFLAG_RW,
+ &VNET_NAME(deembed_scopeid), 0,
+ "Extract embedded zone ID and set it to sin6_scope_id in sockaddr_in6.");
/*
* The scope6_lock protects the global sid default stored in
@@ -73,6 +80,9 @@
#define SID(ifp) \
(((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->scope6_id)
+static int scope6_get(struct ifnet *, struct scope6_id *);
+static int scope6_set(struct ifnet *, struct scope6_id *);
+
void
scope6_init(void)
{
@@ -116,6 +126,30 @@
}
int
+scope6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
+{
+ struct in6_ifreq *ifr;
+
+ if (ifp->if_afdata[AF_INET6] == NULL)
+ return (EPFNOSUPPORT);
+
+ ifr = (struct in6_ifreq *)data;
+ switch (cmd) {
+ case SIOCSSCOPE6:
+ return (scope6_set(ifp,
+ (struct scope6_id *)ifr->ifr_ifru.ifru_scope_id));
+ case SIOCGSCOPE6:
+ return (scope6_get(ifp,
+ (struct scope6_id *)ifr->ifr_ifru.ifru_scope_id));
+ case SIOCGSCOPE6DEF:
+ return (scope6_get_default(
+ (struct scope6_id *)ifr->ifr_ifru.ifru_scope_id));
+ default:
+ return (EOPNOTSUPP);
+ }
+}
+
+static int
scope6_set(struct ifnet *ifp, struct scope6_id *idlist)
{
int i;
@@ -178,7 +212,7 @@
return (error);
}
-int
+static int
scope6_get(struct ifnet *ifp, struct scope6_id *idlist)
{
struct scope6_id *sid;
@@ -197,7 +231,6 @@
return (0);
}
-
/*
* Get a scope of the address. Node-local, link-local, site-local or global.
*/
@@ -331,7 +364,6 @@
int
sa6_embedscope(struct sockaddr_in6 *sin6, int defaultok)
{
- struct ifnet *ifp;
u_int32_t zoneid;
if ((zoneid = sin6->sin6_scope_id) == 0 && defaultok)
@@ -346,15 +378,11 @@
* zone IDs assuming a one-to-one mapping between interfaces
* and links.
*/
- if (V_if_index < zoneid)
+ if (V_if_index < zoneid || ifnet_byindex(zoneid) == NULL)
return (ENXIO);
- ifp = ifnet_byindex(zoneid);
- if (ifp == NULL) /* XXX: this can happen for some OS */
- return (ENXIO);
/* XXX assignment to 16bit from 32bit variable */
sin6->sin6_addr.s6_addr16[1] = htons(zoneid & 0xffff);
-
sin6->sin6_scope_id = 0;
}
@@ -370,12 +398,6 @@
char ip6buf[INET6_ADDRSTRLEN];
u_int32_t zoneid;
- if (sin6->sin6_scope_id != 0) {
- log(LOG_NOTICE,
- "sa6_recoverscope: assumption failure (non 0 ID): %s%%%d\n",
- ip6_sprintf(ip6buf, &sin6->sin6_addr), sin6->sin6_scope_id);
- /* XXX: proceed anyway... */
- }
if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) ||
IN6_IS_ADDR_MC_INTFACELOCAL(&sin6->sin6_addr)) {
/*
@@ -386,8 +408,19 @@
/* sanity check */
if (V_if_index < zoneid)
return (ENXIO);
+#if 0
+ /* XXX: Disabled due to possible deadlock. */
if (!ifnet_byindex(zoneid))
return (ENXIO);
+#endif
+ if (sin6->sin6_scope_id != 0 &&
+ zoneid != sin6->sin6_scope_id) {
+ log(LOG_NOTICE,
+ "%s: embedded scope mismatch: %s%%%d. "
+ "sin6_scope_id was overridden\n", __func__,
+ ip6_sprintf(ip6buf, &sin6->sin6_addr),
+ sin6->sin6_scope_id);
+ }
sin6->sin6_addr.s6_addr16[1] = 0;
sin6->sin6_scope_id = zoneid;
}
@@ -410,63 +443,35 @@
u_int32_t zoneid = 0;
struct scope6_id *sid;
- IF_AFDATA_RLOCK(ifp);
-
- sid = SID(ifp);
-
-#ifdef DIAGNOSTIC
- if (sid == NULL) { /* should not happen */
- panic("in6_setscope: scope array is NULL");
- /* NOTREACHED */
- }
-#endif
-
/*
* special case: the loopback address can only belong to a loopback
* interface.
*/
if (IN6_IS_ADDR_LOOPBACK(in6)) {
- if (!(ifp->if_flags & IFF_LOOPBACK)) {
- IF_AFDATA_RUNLOCK(ifp);
+ if (!(ifp->if_flags & IFF_LOOPBACK))
return (EINVAL);
- } else {
- if (ret_id != NULL)
- *ret_id = 0; /* there's no ambiguity */
+ } else {
+ scope = in6_addrscope(in6);
+ if (scope == IPV6_ADDR_SCOPE_INTFACELOCAL ||
+ scope == IPV6_ADDR_SCOPE_LINKLOCAL) {
+ /*
+ * Currently we use interface indeces as the
+ * zone IDs for interface-local and link-local
+ * scopes.
+ */
+ zoneid = ifp->if_index;
+ in6->s6_addr16[1] = htons(zoneid & 0xffff); /* XXX */
+ } else if (scope != IPV6_ADDR_SCOPE_GLOBAL) {
+ IF_AFDATA_RLOCK(ifp);
+ sid = SID(ifp);
+ zoneid = sid->s6id_list[scope];
IF_AFDATA_RUNLOCK(ifp);
- return (0);
}
}
- scope = in6_addrscope(in6);
- switch (scope) {
- case IPV6_ADDR_SCOPE_INTFACELOCAL: /* should be interface index */
- zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL];
- break;
-
- case IPV6_ADDR_SCOPE_LINKLOCAL:
- zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL];
- break;
-
- case IPV6_ADDR_SCOPE_SITELOCAL:
- zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_SITELOCAL];
- break;
-
- case IPV6_ADDR_SCOPE_ORGLOCAL:
- zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_ORGLOCAL];
- break;
-
- default:
- zoneid = 0; /* XXX: treat as global. */
- break;
- }
- IF_AFDATA_RUNLOCK(ifp);
-
if (ret_id != NULL)
*ret_id = zoneid;
- if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6))
- in6->s6_addr16[1] = htons(zoneid & 0xffff); /* XXX */
-
return (0);
}
Modified: trunk/sys/netinet6/scope6_var.h
===================================================================
--- trunk/sys/netinet6/scope6_var.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/scope6_var.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*
* $KAME: scope6_var.h,v 1.4 2000/05/18 15:03:27 jinmei Exp $
- * $FreeBSD: stable/9/sys/netinet6/scope6_var.h 244524 2012-12-21 00:41:52Z delphij $
+ * $FreeBSD: stable/10/sys/netinet6/scope6_var.h 271185 2014-09-06 04:39:26Z markj $
*/
#ifndef _NETINET6_SCOPE6_VAR_H_
@@ -35,6 +35,8 @@
#define _NETINET6_SCOPE6_VAR_H_
#ifdef _KERNEL
+#include <net/vnet.h>
+
struct scope6_id {
/*
* 16 is correspondent to 4bit multicast scope field.
@@ -43,11 +45,13 @@
u_int32_t s6id_list[16];
};
+VNET_DECLARE(int, deembed_scopeid);
+#define V_deembed_scopeid VNET(deembed_scopeid)
+
void scope6_init(void);
struct scope6_id *scope6_ifattach(struct ifnet *);
void scope6_ifdetach(struct scope6_id *);
-int scope6_set(struct ifnet *, struct scope6_id *);
-int scope6_get(struct ifnet *, struct scope6_id *);
+int scope6_ioctl(u_long cmd, caddr_t data, struct ifnet *);
void scope6_setdefault(struct ifnet *);
int scope6_get_default(struct scope6_id *);
u_int32_t scope6_addr2default(struct in6_addr *);
Modified: trunk/sys/netinet6/sctp6_usrreq.c
===================================================================
--- trunk/sys/netinet6/sctp6_usrreq.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/sctp6_usrreq.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/sctp6_usrreq.c 238613 2012-07-19 09:32:59Z tuexen $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/sctp6_usrreq.c 296052 2016-02-25 18:46:06Z tuexen $");
#include <netinet/sctp_os.h>
#ifdef INET6
@@ -40,9 +40,7 @@
#include <netinet/sctp_pcb.h>
#include <netinet/sctp_header.h>
#include <netinet/sctp_var.h>
-#ifdef INET6
#include <netinet6/sctp6_var.h>
-#endif
#include <netinet/sctp_sysctl.h>
#include <netinet/sctp_output.h>
#include <netinet/sctp_uio.h>
@@ -55,261 +53,142 @@
#include <netinet/sctp_output.h>
#include <netinet/sctp_bsd_addr.h>
#include <netinet/sctp_crc32.h>
+#include <netinet/icmp6.h>
#include <netinet/udp.h>
#ifdef IPSEC
#include <netipsec/ipsec.h>
-#ifdef INET6
#include <netipsec/ipsec6.h>
-#endif /* INET6 */
#endif /* IPSEC */
extern struct protosw inetsw[];
int
-sctp6_input(struct mbuf **i_pak, int *offp, int proto)
+sctp6_input_with_port(struct mbuf **i_pak, int *offp, uint16_t port)
{
struct mbuf *m;
+ int iphlen;
+ uint32_t vrf_id;
+ uint8_t ecn_bits;
+ struct sockaddr_in6 src, dst;
struct ip6_hdr *ip6;
struct sctphdr *sh;
- struct sctp_inpcb *in6p = NULL;
- struct sctp_nets *net;
- int refcount_up = 0;
- uint32_t vrf_id = 0;
-
-#ifdef IPSEC
- struct inpcb *in6p_ip;
-
-#endif
struct sctp_chunkhdr *ch;
- int length, offset, iphlen;
- uint8_t ecn_bits;
- struct sctp_tcb *stcb = NULL;
- int pkt_len = 0;
- uint32_t mflowid;
- uint8_t use_mflowid;
+ int length, offset;
#if !defined(SCTP_WITH_NO_CSUM)
- uint32_t check, calc_check;
+ uint8_t compute_crc;
#endif
- int off = *offp;
- uint16_t port = 0;
+ uint32_t mflowid;
+ uint8_t mflowtype;
+ uint16_t fibnum;
- /* get the VRF and table id's */
+ iphlen = *offp;
if (SCTP_GET_PKT_VRFID(*i_pak, vrf_id)) {
SCTP_RELEASE_PKT(*i_pak);
- return (-1);
+ return (IPPROTO_DONE);
}
m = SCTP_HEADER_TO_CHAIN(*i_pak);
- pkt_len = SCTP_HEADER_LEN((*i_pak));
-
-#ifdef SCTP_PACKET_LOGGING
- sctp_packet_log(m, pkt_len);
+#ifdef SCTP_MBUF_LOGGING
+ /* Log in any input mbufs */
+ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
+ sctp_log_mbc(m, SCTP_MBUF_INPUT);
+ }
#endif
- if (m->m_flags & M_FLOWID) {
- mflowid = m->m_pkthdr.flowid;
- use_mflowid = 1;
- } else {
- mflowid = 0;
- use_mflowid = 0;
+#ifdef SCTP_PACKET_LOGGING
+ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) {
+ sctp_packet_log(m);
}
+#endif
+ SCTPDBG(SCTP_DEBUG_CRCOFFLOAD,
+ "sctp6_input(): Packet of length %d received on %s with csum_flags 0x%b.\n",
+ m->m_pkthdr.len,
+ if_name(m->m_pkthdr.rcvif),
+ (int)m->m_pkthdr.csum_flags, CSUM_BITS);
+ mflowid = m->m_pkthdr.flowid;
+ mflowtype = M_HASHTYPE_GET(m);
+ fibnum = M_GETFIB(m);
+ SCTP_STAT_INCR(sctps_recvpackets);
+ SCTP_STAT_INCR_COUNTER64(sctps_inpackets);
+ /* Get IP, SCTP, and first chunk header together in the first mbuf. */
+ offset = iphlen + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
ip6 = mtod(m, struct ip6_hdr *);
- /* Ensure that (sctphdr + sctp_chunkhdr) in a row. */
- IP6_EXTHDR_GET(sh, struct sctphdr *, m, off,
- (int)(sizeof(*sh) + sizeof(*ch)));
+ IP6_EXTHDR_GET(sh, struct sctphdr *, m, iphlen,
+ (int)(sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr)));
if (sh == NULL) {
SCTP_STAT_INCR(sctps_hdrops);
return (IPPROTO_DONE);
}
ch = (struct sctp_chunkhdr *)((caddr_t)sh + sizeof(struct sctphdr));
- iphlen = off;
- offset = iphlen + sizeof(*sh) + sizeof(*ch);
- SCTPDBG(SCTP_DEBUG_INPUT1,
- "sctp6_input() length:%d iphlen:%d\n", pkt_len, iphlen);
-
-
-#if defined(NFAITH) && NFAITH > 0
-
- if (faithprefix_p != NULL && (*faithprefix_p) (&ip6->ip6_dst)) {
+ offset -= sizeof(struct sctp_chunkhdr);
+ memset(&src, 0, sizeof(struct sockaddr_in6));
+ src.sin6_family = AF_INET6;
+ src.sin6_len = sizeof(struct sockaddr_in6);
+ src.sin6_port = sh->src_port;
+ src.sin6_addr = ip6->ip6_src;
+ if (in6_setscope(&src.sin6_addr, m->m_pkthdr.rcvif, NULL) != 0) {
+ goto out;
+ }
+ memset(&dst, 0, sizeof(struct sockaddr_in6));
+ dst.sin6_family = AF_INET6;
+ dst.sin6_len = sizeof(struct sockaddr_in6);
+ dst.sin6_port = sh->dest_port;
+ dst.sin6_addr = ip6->ip6_dst;
+ if (in6_setscope(&dst.sin6_addr, m->m_pkthdr.rcvif, NULL) != 0) {
+ goto out;
+ }
+ if (faithprefix_p != NULL && (*faithprefix_p) (&dst.sin6_addr)) {
/* XXX send icmp6 host/port unreach? */
- goto bad;
+ goto out;
}
-#endif /* NFAITH defined and > 0 */
- SCTP_STAT_INCR(sctps_recvpackets);
- SCTP_STAT_INCR_COUNTER64(sctps_inpackets);
- SCTPDBG(SCTP_DEBUG_INPUT1, "V6 input gets a packet iphlen:%d pktlen:%d\n",
- iphlen, pkt_len);
+ length = ntohs(ip6->ip6_plen) + iphlen;
+ /* Validate mbuf chain length with IP payload length. */
+ if (SCTP_HEADER_LEN(m) != length) {
+ SCTPDBG(SCTP_DEBUG_INPUT1,
+ "sctp6_input() length:%d reported length:%d\n", length, SCTP_HEADER_LEN(m));
+ SCTP_STAT_INCR(sctps_hdrops);
+ goto out;
+ }
if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
- /* No multi-cast support in SCTP */
- goto bad;
+ goto out;
}
- /* destination port of 0 is illegal, based on RFC2960. */
- if (sh->dest_port == 0)
- goto bad;
-
- SCTPDBG(SCTP_DEBUG_CRCOFFLOAD,
- "sctp_input(): Packet of length %d received on %s with csum_flags 0x%x.\n",
- m->m_pkthdr.len,
- if_name(m->m_pkthdr.rcvif),
- m->m_pkthdr.csum_flags);
+ ecn_bits = ((ntohl(ip6->ip6_flow) >> 20) & 0x000000ff);
#if defined(SCTP_WITH_NO_CSUM)
SCTP_STAT_INCR(sctps_recvnocrc);
#else
if (m->m_pkthdr.csum_flags & CSUM_SCTP_VALID) {
SCTP_STAT_INCR(sctps_recvhwcrc);
- goto sctp_skip_csum;
+ compute_crc = 0;
+ } else {
+ SCTP_STAT_INCR(sctps_recvswcrc);
+ compute_crc = 1;
}
- check = sh->checksum; /* save incoming checksum */
- sh->checksum = 0; /* prepare for calc */
- calc_check = sctp_calculate_cksum(m, iphlen);
- SCTP_STAT_INCR(sctps_recvswcrc);
- if (calc_check != check) {
- SCTPDBG(SCTP_DEBUG_INPUT1, "Bad CSUM on SCTP packet calc_check:%x check:%x m:%p phlen:%d\n",
- calc_check, check, m, iphlen);
- stcb = sctp_findassociation_addr(m, offset - sizeof(*ch),
- sh, ch, &in6p, &net, vrf_id);
- if ((net) && (port)) {
- if (net->port == 0) {
- sctp_pathmtu_adjustment(stcb, net->mtu - sizeof(struct udphdr));
- }
- net->port = port;
- }
- if ((net != NULL) && (use_mflowid != 0)) {
- net->flowid = mflowid;
-#ifdef INVARIANTS
- net->flowidset = 1;
#endif
- }
- /* in6p's ref-count increased && stcb locked */
- if ((in6p) && (stcb)) {
- sctp_send_packet_dropped(stcb, net, m, iphlen, 1);
- sctp_chunk_output((struct sctp_inpcb *)in6p, stcb, SCTP_OUTPUT_FROM_INPUT_ERROR, SCTP_SO_NOT_LOCKED);
- } else if ((in6p != NULL) && (stcb == NULL)) {
- refcount_up = 1;
- }
- SCTP_STAT_INCR(sctps_badsum);
- SCTP_STAT_INCR_COUNTER32(sctps_checksumerrors);
- goto bad;
- }
- sh->checksum = calc_check;
-
-sctp_skip_csum:
+ sctp_common_input_processing(&m, iphlen, offset, length,
+ (struct sockaddr *)&src,
+ (struct sockaddr *)&dst,
+ sh, ch,
+#if !defined(SCTP_WITH_NO_CSUM)
+ compute_crc,
#endif
- net = NULL;
- /*
- * Locate pcb and tcb for datagram sctp_findassociation_addr() wants
- * IP/SCTP/first chunk header...
- */
- stcb = sctp_findassociation_addr(m, offset - sizeof(*ch),
- sh, ch, &in6p, &net, vrf_id);
- if ((net) && (port)) {
- if (net->port == 0) {
- sctp_pathmtu_adjustment(stcb, net->mtu - sizeof(struct udphdr));
- }
- net->port = port;
- }
- if ((net != NULL) && (use_mflowid != 0)) {
- net->flowid = mflowid;
-#ifdef INVARIANTS
- net->flowidset = 1;
-#endif
- }
- /* in6p's ref-count increased */
- if (in6p == NULL) {
- struct sctp_init_chunk *init_chk, chunk_buf;
-
- SCTP_STAT_INCR(sctps_noport);
- if (ch->chunk_type == SCTP_INITIATION) {
- /*
- * we do a trick here to get the INIT tag, dig in
- * and get the tag from the INIT and put it in the
- * common header.
- */
- init_chk = (struct sctp_init_chunk *)sctp_m_getptr(m,
- iphlen + sizeof(*sh), sizeof(*init_chk),
- (uint8_t *) & chunk_buf);
- if (init_chk)
- sh->v_tag = init_chk->init.initiate_tag;
- else
- sh->v_tag = 0;
- }
- if (ch->chunk_type == SCTP_SHUTDOWN_ACK) {
- sctp_send_shutdown_complete2(m, sh,
- use_mflowid, mflowid,
- vrf_id, port);
- goto bad;
- }
- if (ch->chunk_type == SCTP_SHUTDOWN_COMPLETE) {
- goto bad;
- }
- if (ch->chunk_type != SCTP_ABORT_ASSOCIATION) {
- if ((SCTP_BASE_SYSCTL(sctp_blackhole) == 0) ||
- ((SCTP_BASE_SYSCTL(sctp_blackhole) == 1) &&
- (ch->chunk_type != SCTP_INIT))) {
- sctp_send_abort(m, iphlen, sh, 0, NULL,
- use_mflowid, mflowid,
- vrf_id, port);
- }
- }
- goto bad;
- } else if (stcb == NULL) {
- refcount_up = 1;
- }
-#ifdef IPSEC
- /*
- * Check AH/ESP integrity.
- */
- in6p_ip = (struct inpcb *)in6p;
- if (in6p_ip && (ipsec6_in_reject(m, in6p_ip))) {
-/* XXX */
- MODULE_GLOBAL(ipsec6stat).in_polvio++;
- goto bad;
- }
-#endif /* IPSEC */
-
- /*
- * CONTROL chunk processing
- */
- offset -= sizeof(*ch);
- ecn_bits = ((ntohl(ip6->ip6_flow) >> 20) & 0x000000ff);
-
- /* Length now holds the total packet length payload + iphlen */
- length = ntohs(ip6->ip6_plen) + iphlen;
-
- /* sa_ignore NO_NULL_CHK */
- sctp_common_input_processing(&m, iphlen, offset, length, sh, ch,
- in6p, stcb, net, ecn_bits,
- use_mflowid, mflowid,
+ ecn_bits,
+ mflowtype, mflowid, fibnum,
vrf_id, port);
- /* inp's ref-count reduced && stcb unlocked */
- /* XXX this stuff below gets moved to appropriate parts later... */
- if (m)
+out:
+ if (m) {
sctp_m_freem(m);
- if ((in6p) && refcount_up) {
- /* reduce ref-count */
- SCTP_INP_WLOCK(in6p);
- SCTP_INP_DECR_REF(in6p);
- SCTP_INP_WUNLOCK(in6p);
}
return (IPPROTO_DONE);
+}
-bad:
- if (stcb) {
- SCTP_TCB_UNLOCK(stcb);
- }
- if ((in6p) && refcount_up) {
- /* reduce ref-count */
- SCTP_INP_WLOCK(in6p);
- SCTP_INP_DECR_REF(in6p);
- SCTP_INP_WUNLOCK(in6p);
- }
- if (m)
- sctp_m_freem(m);
- return (IPPROTO_DONE);
+
+int
+sctp6_input(struct mbuf **i_pak, int *offp, int proto SCTP_UNUSED)
+{
+ return (sctp6_input_with_port(i_pak, offp, 0));
}
-
static void
sctp6_notify_mbuf(struct sctp_inpcb *inp, struct icmp6_hdr *icmp6,
struct sctphdr *sh, struct sctp_tcb *stcb, struct sctp_nets *net)
@@ -336,7 +215,8 @@
*/
nxtsz = ntohl(icmp6->icmp6_mtu);
/* Stop any PMTU timer */
- sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, NULL, SCTP_FROM_SCTP6_USRREQ + SCTP_LOC_1);
+ sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, NULL,
+ SCTP_FROM_SCTP6_USRREQ + SCTP_LOC_1);
/* Adjust destination size limit */
if (net->mtu > nxtsz) {
@@ -395,7 +275,7 @@
struct sctp_tcb *stcb,
struct sctp_nets *net)
{
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
@@ -450,7 +330,7 @@
* with no TCB
*/
sctp_abort_notification(stcb, 1, 0, NULL, SCTP_SO_NOT_LOCKED);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
so = SCTP_INP_SO(inp);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
@@ -458,8 +338,9 @@
SCTP_TCB_LOCK(stcb);
atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_2);
-#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP6_USRREQ + SCTP_LOC_2);
+#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
SCTP_SOCKET_UNLOCK(so, 1);
/* SCTP_TCB_UNLOCK(stcb); MT: I think this is not needed. */
#endif
@@ -503,7 +384,6 @@
* XXX: We assume that when IPV6 is non NULL, M and OFF are
* valid.
*/
- /* check if we can safely examine src and dst ports */
struct sctp_inpcb *inp = NULL;
struct sctp_tcb *stcb = NULL;
struct sctp_nets *net = NULL;
@@ -512,6 +392,10 @@
if (ip6cp->ip6c_m == NULL)
return;
+ /* Check if we can safely examine the SCTP header. */
+ if (ip6cp->ip6c_m->m_pkthdr.len < ip6cp->ip6c_off + sizeof(sh))
+ return;
+
bzero(&sh, sizeof(sh));
bzero(&final, sizeof(final));
inp = NULL;
@@ -523,8 +407,8 @@
final.sin6_family = AF_INET6;
final.sin6_addr = ((struct sockaddr_in6 *)pktdst)->sin6_addr;
final.sin6_port = sh.dest_port;
- stcb = sctp_findassociation_addr_sa((struct sockaddr *)ip6cp->ip6c_src,
- (struct sockaddr *)&final,
+ stcb = sctp_findassociation_addr_sa((struct sockaddr *)&final,
+ (struct sockaddr *)ip6cp->ip6c_src,
&inp, &net, 1, vrf_id);
/* inp's ref-count increased && stcb locked */
if (stcb != NULL && inp && (inp->sctp_socket != NULL)) {
@@ -591,8 +475,8 @@
if (error)
return (error);
- stcb = sctp_findassociation_addr_sa(sin6tosa(&addrs[0]),
- sin6tosa(&addrs[1]),
+ stcb = sctp_findassociation_addr_sa(sin6tosa(&addrs[1]),
+ sin6tosa(&addrs[0]),
&inp, &net, 1, vrf_id);
if (stcb == NULL || inp == NULL || inp->sctp_socket == NULL) {
if ((inp != NULL) && (stcb == NULL)) {
@@ -897,18 +781,11 @@
}
}
if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
- if (!MODULE_GLOBAL(ip6_v6only)) {
- struct sockaddr_in sin;
+ struct sockaddr_in sin;
- /* convert v4-mapped into v4 addr and send */
- in6_sin6_2_sin(&sin, sin6);
- return (sctp_sendm(so, flags, m, (struct sockaddr *)&sin,
- control, p));
- } else {
- /* mapped addresses aren't enabled */
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL);
- return (EINVAL);
- }
+ /* convert v4-mapped into v4 addr and send */
+ in6_sin6_2_sin(&sin, sin6);
+ return (sctp_sendm(so, flags, m, (struct sockaddr *)&sin, control, p));
}
#endif /* INET */
connected_type:
@@ -956,16 +833,18 @@
uint32_t vrf_id;
int error = 0;
struct sctp_inpcb *inp;
- struct in6pcb *inp6;
struct sctp_tcb *stcb;
#ifdef INET
+ struct in6pcb *inp6;
struct sockaddr_in6 *sin6;
- struct sockaddr_storage ss;
+ union sctp_sockstore store;
#endif
+#ifdef INET
inp6 = (struct in6pcb *)so->so_pcb;
+#endif
inp = (struct sctp_inpcb *)so->so_pcb;
if (inp == NULL) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ECONNRESET);
@@ -1042,17 +921,9 @@
}
}
if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
- if (!MODULE_GLOBAL(ip6_v6only)) {
- /* convert v4-mapped into v4 addr */
- in6_sin6_2_sin((struct sockaddr_in *)&ss, sin6);
- addr = (struct sockaddr *)&ss;
- } else {
- /* mapped addresses aren't enabled */
- SCTP_INP_RUNLOCK(inp);
- SCTP_ASOC_CREATE_UNLOCK(inp);
- SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL);
- return (EINVAL);
- }
+ /* convert v4-mapped into v4 addr */
+ in6_sin6_2_sin(&store.sin, sin6);
+ addr = &store.sa;
}
#endif /* INET */
/* Now do we connect? */
@@ -1083,7 +954,7 @@
return (EALREADY);
}
/* We are GOOD to go */
- stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id, p);
+ stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id, inp->sctp_ep.pre_open_stream_count, p);
SCTP_ASOC_CREATE_UNLOCK(inp);
if (stcb == NULL) {
/* Gak! no memory */
@@ -1142,7 +1013,10 @@
stcb = LIST_FIRST(&inp->sctp_asoc_list);
if (stcb == NULL) {
- goto notConn6;
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_FREE_SONAME(sin6);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ENOENT);
+ return (ENOENT);
}
fnd = 0;
sin_a6 = NULL;
@@ -1159,7 +1033,10 @@
}
if ((!fnd) || (sin_a6 == NULL)) {
/* punt */
- goto notConn6;
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_FREE_SONAME(sin6);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ENOENT);
+ return (ENOENT);
}
vrf_id = inp->def_vrf_id;
sctp_ifa = sctp_source_address_selection(inp, stcb, (sctp_route_t *) & net->ro, net, 0, vrf_id);
@@ -1168,7 +1045,6 @@
}
} else {
/* For the bound all case you get back 0 */
- notConn6:
memset(&sin6->sin6_addr, 0, sizeof(sin6->sin6_addr));
}
} else {
@@ -1180,7 +1056,7 @@
if (laddr->ifa->address.sa.sa_family == AF_INET6) {
struct sockaddr_in6 *sin_a;
- sin_a = (struct sockaddr_in6 *)&laddr->ifa->address.sin6;
+ sin_a = &laddr->ifa->address.sin6;
sin6->sin6_addr = sin_a->sin6_addr;
fnd = 1;
break;
@@ -1257,8 +1133,11 @@
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ENOENT);
return (ENOENT);
}
- if ((error = sa6_recoverscope(sin6)) != 0)
+ if ((error = sa6_recoverscope(sin6)) != 0) {
+ SCTP_FREE_SONAME(sin6);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, error);
return (error);
+ }
*addr = (struct sockaddr *)sin6;
return (0);
}
@@ -1266,10 +1145,6 @@
static int
sctp6_in6getaddr(struct socket *so, struct sockaddr **nam)
{
-#ifdef INET
- struct sockaddr *addr;
-
-#endif
struct in6pcb *inp6 = sotoin6pcb(so);
int error;
@@ -1281,19 +1156,21 @@
error = sctp6_getaddr(so, nam);
#ifdef INET
if (error) {
+ struct sockaddr_in6 *sin6;
+
/* try v4 next if v6 failed */
error = sctp_ingetaddr(so, nam);
if (error) {
return (error);
}
- addr = *nam;
- /* if I'm V6ONLY, convert it to v4-mapped */
- if (SCTP_IPV6_V6ONLY(inp6)) {
- struct sockaddr_in6 sin6;
-
- in6_sin_2_v4mapsin6((struct sockaddr_in *)addr, &sin6);
- memcpy(addr, &sin6, sizeof(struct sockaddr_in6));
+ SCTP_MALLOC_SONAME(sin6, struct sockaddr_in6 *, sizeof *sin6);
+ if (sin6 == NULL) {
+ SCTP_FREE_SONAME(*nam);
+ return (ENOMEM);
}
+ in6_sin_2_v4mapsin6((struct sockaddr_in *)*nam, sin6);
+ SCTP_FREE_SONAME(*nam);
+ *nam = (struct sockaddr *)sin6;
}
#endif
return (error);
@@ -1303,10 +1180,6 @@
static int
sctp6_getpeeraddr(struct socket *so, struct sockaddr **nam)
{
-#ifdef INET
- struct sockaddr *addr;
-
-#endif
struct in6pcb *inp6 = sotoin6pcb(so);
int error;
@@ -1318,19 +1191,21 @@
error = sctp6_peeraddr(so, nam);
#ifdef INET
if (error) {
+ struct sockaddr_in6 *sin6;
+
/* try v4 next if v6 failed */
error = sctp_peeraddr(so, nam);
if (error) {
return (error);
}
- addr = *nam;
- /* if I'm V6ONLY, convert it to v4-mapped */
- if (SCTP_IPV6_V6ONLY(inp6)) {
- struct sockaddr_in6 sin6;
-
- in6_sin_2_v4mapsin6((struct sockaddr_in *)addr, &sin6);
- memcpy(addr, &sin6, sizeof(struct sockaddr_in6));
+ SCTP_MALLOC_SONAME(sin6, struct sockaddr_in6 *, sizeof *sin6);
+ if (sin6 == NULL) {
+ SCTP_FREE_SONAME(*nam);
+ return (ENOMEM);
}
+ in6_sin_2_v4mapsin6((struct sockaddr_in *)*nam, sin6);
+ SCTP_FREE_SONAME(*nam);
+ *nam = (struct sockaddr *)sin6;
}
#endif
return (error);
Modified: trunk/sys/netinet6/sctp6_var.h
===================================================================
--- trunk/sys/netinet6/sctp6_var.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/sctp6_var.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/sctp6_var.h 244524 2012-12-21 00:41:52Z delphij $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/sctp6_var.h 243186 2012-11-17 20:04:04Z tuexen $");
#ifndef _NETINET6_SCTP6_VAR_H_
#define _NETINET6_SCTP6_VAR_H_
@@ -42,21 +42,16 @@
SYSCTL_DECL(_net_inet6_sctp6);
extern struct pr_usrreqs sctp6_usrreqs;
-
int sctp6_input(struct mbuf **, int *, int);
-int sctp6_output
-(struct sctp_inpcb *, struct mbuf *, struct sockaddr *,
+int sctp6_input_with_port(struct mbuf **, int *, uint16_t);
+int
+sctp6_output(struct sctp_inpcb *, struct mbuf *, struct sockaddr *,
struct mbuf *, struct proc *);
- void sctp6_ctlinput(int, struct sockaddr *, void *);
+void sctp6_ctlinput(int, struct sockaddr *, void *);
+extern void
+sctp6_notify(struct sctp_inpcb *, struct icmp6_hdr *,
+ struct sctphdr *, struct sockaddr *,
+ struct sctp_tcb *, struct sctp_nets *);
-
- extern void sctp6_notify(struct sctp_inpcb *inp,
- struct icmp6_hdr *icmph,
- struct sctphdr *sh,
- struct sockaddr *to,
- struct sctp_tcb *stcb,
- struct sctp_nets *net);
-
-
-#endif /* _KERNEL */
#endif
+#endif
Modified: trunk/sys/netinet6/send.c
===================================================================
--- trunk/sys/netinet6/send.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/send.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -26,8 +26,10 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/send.c 249132 2013-04-05 08:22:11Z mav $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/send.c 254889 2013-08-25 21:54:41Z markj $");
+#include "opt_kdtrace.h"
+
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/mbuf.h>
@@ -34,6 +36,7 @@
#include <sys/module.h>
#include <sys/priv.h>
#include <sys/protosw.h>
+#include <sys/sdt.h>
#include <sys/systm.h>
#include <sys/socket.h>
#include <sys/sockstate.h>
@@ -47,6 +50,7 @@
#include <net/vnet.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <netinet/ip_var.h>
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
@@ -181,6 +185,10 @@
dst.sin6_len = sizeof(dst);
dst.sin6_addr = ip6->ip6_dst;
+ m_clrprotoflags(m); /* Avoid confusing lower layers. */
+
+ IP_PROBE(send, NULL, NULL, ip6, ifp, NULL, ip6);
+
/*
* Output the packet as nd6.c:nd6_output_lle() would do.
* The mbuf is always consumed, so we do not have to care
Modified: trunk/sys/netinet6/send.h
===================================================================
--- trunk/sys/netinet6/send.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/send.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -24,7 +24,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: stable/9/sys/netinet6/send.h 222712 2011-06-05 11:40:30Z hrs $
+ * $FreeBSD: stable/10/sys/netinet6/send.h 222712 2011-06-05 11:40:30Z hrs $
*/
#ifndef _NETINET6_SEND_H_
Modified: trunk/sys/netinet6/tcp6_var.h
===================================================================
--- trunk/sys/netinet6/tcp6_var.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/tcp6_var.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -57,7 +57,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_var.h 8.4 (Berkeley) 5/24/95
- * $FreeBSD: stable/9/sys/netinet6/tcp6_var.h 244524 2012-12-21 00:41:52Z delphij $
+ * $FreeBSD: stable/10/sys/netinet6/tcp6_var.h 241916 2012-10-22 21:49:56Z delphij $
*/
#ifndef _NETINET_TCP6_VAR_H_
Modified: trunk/sys/netinet6/udp6_usrreq.c
===================================================================
--- trunk/sys/netinet6/udp6_usrreq.c 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/udp6_usrreq.c 2018-05-25 13:10:11 UTC (rev 9927)
@@ -2,6 +2,7 @@
/*-
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* Copyright (c) 2010-2011 Juniper Networks, Inc.
+ * Copyright (c) 2014 Kevin Lo
* All rights reserved.
*
* Portions of this software were developed by Robert N. M. Watson under
@@ -68,12 +69,13 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: stable/9/sys/netinet6/udp6_usrreq.c 243586 2012-11-27 01:59:51Z ae $");
+__FBSDID("$FreeBSD: stable/10/sys/netinet6/udp6_usrreq.c 277789 2015-01-27 06:19:30Z bryanv $");
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipfw.h"
#include "opt_ipsec.h"
+#include "opt_kdtrace.h"
#include <sys/param.h>
#include <sys/jail.h>
@@ -83,6 +85,7 @@
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
+#include <sys/sdt.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
@@ -96,6 +99,7 @@
#include <net/route.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <netinet/in_pcb.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
@@ -107,6 +111,7 @@
#include <netinet/ip_var.h>
#include <netinet/udp.h>
#include <netinet/udp_var.h>
+#include <netinet/udplite.h>
#include <netinet6/ip6protosw.h>
#include <netinet6/ip6_var.h>
@@ -135,14 +140,24 @@
{
struct socket *so;
struct mbuf *opts;
+ struct udpcb *up;
INP_LOCK_ASSERT(inp);
+ /*
+ * Engage the tunneling protocol.
+ */
+ up = intoudpcb(inp);
+ if (up->u_tun_func != NULL) {
+ (*up->u_tun_func)(n, off, inp, (struct sockaddr *)fromsa,
+ up->u_tun_ctx);
+ return;
+ }
#ifdef IPSEC
/* Check AH/ESP integrity. */
if (ipsec6_in_reject(n, inp)) {
m_freem(n);
- V_ipsec6stat.in_polvio++;
+ IPSEC6STAT_INC(ips_in_polvio);
return;
}
#endif /* IPSEC */
@@ -179,12 +194,15 @@
struct ip6_hdr *ip6;
struct udphdr *uh;
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
struct udpcb *up;
int off = *offp;
+ int cscov_partial;
int plen, ulen;
struct sockaddr_in6 fromsa;
struct m_tag *fwd_tag;
uint16_t uh_sum;
+ uint8_t nxt;
ifp = m->m_pkthdr.rcvif;
ip6 = mtod(m, struct ip6_hdr *);
@@ -216,28 +234,43 @@
plen = ntohs(ip6->ip6_plen) - off + sizeof(*ip6);
ulen = ntohs((u_short)uh->uh_ulen);
- if (plen != ulen) {
- UDPSTAT_INC(udps_badlen);
- goto badunlocked;
+ nxt = ip6->ip6_nxt;
+ cscov_partial = (nxt == IPPROTO_UDPLITE) ? 1 : 0;
+ if (nxt == IPPROTO_UDPLITE) {
+ /* Zero means checksum over the complete packet. */
+ if (ulen == 0)
+ ulen = plen;
+ if (ulen == plen)
+ cscov_partial = 0;
+ if ((ulen < sizeof(struct udphdr)) || (ulen > plen)) {
+ /* XXX: What is the right UDPLite MIB counter? */
+ goto badunlocked;
+ }
+ if (uh->uh_sum == 0) {
+ /* XXX: What is the right UDPLite MIB counter? */
+ goto badunlocked;
+ }
+ } else {
+ if ((ulen < sizeof(struct udphdr)) || (plen != ulen)) {
+ UDPSTAT_INC(udps_badlen);
+ goto badunlocked;
+ }
+ if (uh->uh_sum == 0) {
+ UDPSTAT_INC(udps_nosum);
+ goto badunlocked;
+ }
}
- /*
- * Checksum extended UDP header and data.
- */
- if (uh->uh_sum == 0) {
- UDPSTAT_INC(udps_nosum);
- goto badunlocked;
- }
-
- if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) {
+ if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) &&
+ !cscov_partial) {
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
uh_sum = m->m_pkthdr.csum_data;
else
- uh_sum = in6_cksum_pseudo(ip6, ulen,
- IPPROTO_UDP, m->m_pkthdr.csum_data);
+ uh_sum = in6_cksum_pseudo(ip6, ulen, nxt,
+ m->m_pkthdr.csum_data);
uh_sum ^= 0xffff;
} else
- uh_sum = in6_cksum(m, IPPROTO_UDP, off, ulen);
+ uh_sum = in6_cksum_partial(m, nxt, off, plen, ulen);
if (uh_sum != 0) {
UDPSTAT_INC(udps_badsum);
@@ -250,11 +283,13 @@
init_sin6(&fromsa, m);
fromsa.sin6_port = uh->uh_sport;
+ pcbinfo = get_inpcbinfo(nxt);
if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
struct inpcb *last;
+ struct inpcbhead *pcblist;
struct ip6_moptions *imo;
- INP_INFO_RLOCK(&V_udbinfo);
+ INP_INFO_RLOCK(pcbinfo);
/*
* In the event that laddr should be set to the link-local
* address (this happens in RIPng), the multicast address
@@ -270,8 +305,9 @@
* here. We need udphdr for IPsec processing so we do that
* later.
*/
+ pcblist = get_pcblist(nxt);
last = NULL;
- LIST_FOREACH(inp, &V_udb, inp_list) {
+ LIST_FOREACH(inp, pcblist, inp_list) {
if ((inp->inp_vflag & INP_IPV6) == 0)
continue;
if (inp->inp_lport != uh->uh_dport)
@@ -334,20 +370,9 @@
if ((n = m_copy(m, 0, M_COPYALL)) != NULL) {
INP_RLOCK(last);
- up = intoudpcb(last);
- if (up->u_tun_func == NULL) {
- udp6_append(last, n, off, &fromsa);
- } else {
- /*
- * Engage the tunneling
- * protocol we will have to
- * leave the info_lock up,
- * since we are hunting
- * through multiple UDP's.
- *
- */
- (*up->u_tun_func)(n, off, last);
- }
+ UDP_PROBE(receive, NULL, last, ip6,
+ last, uh);
+ udp6_append(last, n, off, &fromsa);
INP_RUNLOCK(last);
}
}
@@ -376,16 +401,9 @@
goto badheadlocked;
}
INP_RLOCK(last);
- INP_INFO_RUNLOCK(&V_udbinfo);
- up = intoudpcb(last);
- if (up->u_tun_func == NULL) {
- udp6_append(last, m, off, &fromsa);
- } else {
- /*
- * Engage the tunneling protocol.
- */
- (*up->u_tun_func)(m, off, last);
- }
+ INP_INFO_RUNLOCK(pcbinfo);
+ UDP_PROBE(receive, NULL, last, ip6, last, uh);
+ udp6_append(last, m, off, &fromsa);
INP_RUNLOCK(last);
return (IPPROTO_DONE);
}
@@ -406,8 +424,8 @@
* Transparently forwarded. Pretend to be the destination.
* Already got one like this?
*/
- inp = in6_pcblookup_mbuf(&V_udbinfo,
- &ip6->ip6_src, uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
+ inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src,
+ uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif, m);
if (!inp) {
/*
@@ -415,7 +433,7 @@
* Because we've rewritten the destination address,
* any hardware-generated hash is ignored.
*/
- inp = in6_pcblookup(&V_udbinfo, &ip6->ip6_src,
+ inp = in6_pcblookup(pcbinfo, &ip6->ip6_src,
uh->uh_sport, &next_hop6->sin6_addr,
next_hop6->sin6_port ? htons(next_hop6->sin6_port) :
uh->uh_dport, INPLOOKUP_WILDCARD |
@@ -425,7 +443,7 @@
m_tag_delete(m, fwd_tag);
m->m_flags &= ~M_IP6_NEXTHOP;
} else
- inp = in6_pcblookup_mbuf(&V_udbinfo, &ip6->ip6_src,
+ inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src,
uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB,
m->m_pkthdr.rcvif, m);
@@ -456,20 +474,20 @@
}
INP_RLOCK_ASSERT(inp);
up = intoudpcb(inp);
- if (up->u_tun_func == NULL) {
- udp6_append(inp, m, off, &fromsa);
- } else {
- /*
- * Engage the tunneling protocol.
- */
-
- (*up->u_tun_func)(m, off, inp);
+ if (cscov_partial) {
+ if (up->u_rxcslen == 0 || up->u_rxcslen > ulen) {
+ INP_RUNLOCK(inp);
+ m_freem(m);
+ return (IPPROTO_DONE);
+ }
}
+ UDP_PROBE(receive, NULL, inp, ip6, inp, uh);
+ udp6_append(inp, m, off, &fromsa);
INP_RUNLOCK(inp);
return (IPPROTO_DONE);
badheadlocked:
- INP_INFO_RUNLOCK(&V_udbinfo);
+ INP_INFO_RUNLOCK(pcbinfo);
badunlocked:
if (m)
m_freem(m);
@@ -476,8 +494,9 @@
return (IPPROTO_DONE);
}
-void
-udp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
+static void
+udp6_common_ctlinput(int cmd, struct sockaddr *sa, void *d,
+ struct inpcbinfo *pcbinfo)
{
struct udphdr uh;
struct ip6_hdr *ip6;
@@ -533,14 +552,28 @@
bzero(&uh, sizeof(uh));
m_copydata(m, off, sizeof(*uhp), (caddr_t)&uh);
- (void) in6_pcbnotify(&V_udbinfo, sa, uh.uh_dport,
+ (void)in6_pcbnotify(pcbinfo, sa, uh.uh_dport,
(struct sockaddr *)ip6cp->ip6c_src, uh.uh_sport, cmd,
cmdarg, notify);
} else
- (void) in6_pcbnotify(&V_udbinfo, sa, 0,
+ (void)in6_pcbnotify(pcbinfo, sa, 0,
(const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify);
}
+void
+udp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
+{
+
+ return (udp6_common_ctlinput(cmd, sa, d, &V_udbinfo));
+}
+
+void
+udplite6_ctlinput(int cmd, struct sockaddr *sa, void *d)
+{
+
+ return (udp6_common_ctlinput(cmd, sa, d, &V_ulitecbinfo));
+}
+
static int
udp6_getcred(SYSCTL_HANDLER_ARGS)
{
@@ -598,9 +631,12 @@
struct in6_addr *laddr, *faddr, in6a;
struct sockaddr_in6 *sin6 = NULL;
struct ifnet *oifp = NULL;
+ int cscov_partial = 0;
int scope_ambiguous = 0;
u_short fport;
int error = 0;
+ uint8_t nxt;
+ uint16_t cscov = 0;
struct ip6_pktopts *optp, opt;
int af = AF_INET6, hlen = sizeof(struct ip6_hdr);
int flags;
@@ -643,8 +679,6 @@
faddr = &sin6->sin6_addr;
/*
- * IPv4 version of udp_output calls in_pcbconnect in this case,
- * which needs splnet and affects performance.
* Since we saw no essential reason for calling in_pcbconnect,
* we get rid of such kind of logic, and call in6_selectsrc
* and in6_pcbsetport in order to fill in the local address
@@ -750,7 +784,7 @@
* Calculate data length and get a mbuf
* for UDP and IP6 headers.
*/
- M_PREPEND(m, hlen + sizeof(struct udphdr), M_DONTWAIT);
+ M_PREPEND(m, hlen + sizeof(struct udphdr), M_NOWAIT);
if (m == 0) {
error = ENOBUFS;
goto release;
@@ -759,10 +793,25 @@
/*
* Stuff checksum and output datagram.
*/
+ nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
+ IPPROTO_UDP : IPPROTO_UDPLITE;
udp6 = (struct udphdr *)(mtod(m, caddr_t) + hlen);
udp6->uh_sport = inp->inp_lport; /* lport is always set in the PCB */
udp6->uh_dport = fport;
- if (plen <= 0xffff)
+ if (nxt == IPPROTO_UDPLITE) {
+ struct udpcb *up;
+
+ up = intoudpcb(inp);
+ cscov = up->u_txcslen;
+ if (cscov >= plen)
+ cscov = 0;
+ udp6->uh_ulen = htons(cscov);
+ /*
+ * For UDP-Lite, checksum coverage length of zero means
+ * the entire UDPLite packet is covered by the checksum.
+ */
+ cscov_partial = (cscov == 0) ? 0 : 1;
+ } else if (plen <= 0xffff)
udp6->uh_ulen = htons((u_short)plen);
else
udp6->uh_ulen = 0;
@@ -774,20 +823,25 @@
ip6->ip6_flow = inp->inp_flow & IPV6_FLOWINFO_MASK;
ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
ip6->ip6_vfc |= IPV6_VERSION;
-#if 0 /* ip6_plen will be filled in ip6_output. */
ip6->ip6_plen = htons((u_short)plen);
-#endif
- ip6->ip6_nxt = IPPROTO_UDP;
+ ip6->ip6_nxt = nxt;
ip6->ip6_hlim = in6_selecthlim(inp, NULL);
ip6->ip6_src = *laddr;
ip6->ip6_dst = *faddr;
- udp6->uh_sum = in6_cksum_pseudo(ip6, plen, IPPROTO_UDP, 0);
- m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
- m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+ if (cscov_partial) {
+ if ((udp6->uh_sum = in6_cksum_partial(m, nxt,
+ sizeof(struct ip6_hdr), plen, cscov)) == 0)
+ udp6->uh_sum = 0xffff;
+ } else {
+ udp6->uh_sum = in6_cksum_pseudo(ip6, plen, nxt, 0);
+ m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
+ m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+ }
flags = 0;
+ UDP_PROBE(send, NULL, inp, ip6, inp, udp6);
UDPSTAT_INC(udps_opackets);
error = ip6_output(m, optp, NULL, flags, inp->in6p_moptions,
NULL, inp);
@@ -813,7 +867,9 @@
udp6_abort(struct socket *so)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
+ pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp6_abort: inp == NULL"));
@@ -829,10 +885,10 @@
INP_WLOCK(inp);
if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
in6_pcbdisconnect(inp);
inp->in6p_laddr = in6addr_any;
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
soisdisconnected(so);
}
INP_WUNLOCK(inp);
@@ -842,8 +898,10 @@
udp6_attach(struct socket *so, int proto, struct thread *td)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
int error;
+ pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp == NULL, ("udp6_attach: inp != NULL"));
@@ -852,10 +910,10 @@
if (error)
return (error);
}
- INP_INFO_WLOCK(&V_udbinfo);
- error = in_pcballoc(so, &V_udbinfo);
+ INP_INFO_WLOCK(pcbinfo);
+ error = in_pcballoc(so, pcbinfo);
if (error) {
- INP_INFO_WUNLOCK(&V_udbinfo);
+ INP_INFO_WUNLOCK(pcbinfo);
return (error);
}
inp = (struct inpcb *)so->so_pcb;
@@ -876,11 +934,11 @@
if (error) {
in_pcbdetach(inp);
in_pcbfree(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
+ INP_INFO_WUNLOCK(pcbinfo);
return (error);
}
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
+ INP_INFO_WUNLOCK(pcbinfo);
return (0);
}
@@ -888,13 +946,15 @@
udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
int error;
+ pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp6_bind: inp == NULL"));
INP_WLOCK(inp);
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
inp->inp_vflag &= ~INP_IPV4;
inp->inp_vflag |= INP_IPV6;
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
@@ -922,7 +982,7 @@
#ifdef INET
out:
#endif
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
INP_WUNLOCK(inp);
return (error);
}
@@ -931,7 +991,9 @@
udp6_close(struct socket *so)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
+ pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp6_close: inp == NULL"));
@@ -946,10 +1008,10 @@
#endif
INP_WLOCK(inp);
if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
in6_pcbdisconnect(inp);
inp->in6p_laddr = in6addr_any;
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
soisdisconnected(so);
}
INP_WUNLOCK(inp);
@@ -959,9 +1021,11 @@
udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
struct sockaddr_in6 *sin6;
int error;
+ pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
sin6 = (struct sockaddr_in6 *)nam;
KASSERT(inp != NULL, ("udp6_connect: inp == NULL"));
@@ -988,10 +1052,10 @@
error = prison_remote_ip4(td->td_ucred, &sin.sin_addr);
if (error != 0)
goto out;
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
error = in_pcbconnect(inp, (struct sockaddr *)&sin,
td->td_ucred);
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
if (error == 0)
soisconnected(so);
goto out;
@@ -1006,9 +1070,9 @@
error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr);
if (error != 0)
goto out;
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
error = in6_pcbconnect(inp, nam, td->td_ucred);
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
if (error == 0)
soisconnected(so);
out:
@@ -1020,18 +1084,20 @@
udp6_detach(struct socket *so)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
struct udpcb *up;
+ pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp6_detach: inp == NULL"));
- INP_INFO_WLOCK(&V_udbinfo);
+ INP_INFO_WLOCK(pcbinfo);
INP_WLOCK(inp);
up = intoudpcb(inp);
KASSERT(up != NULL, ("%s: up == NULL", __func__));
in_pcbdetach(inp);
in_pcbfree(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
+ INP_INFO_WUNLOCK(pcbinfo);
udp_discardcb(up);
}
@@ -1039,8 +1105,10 @@
udp6_disconnect(struct socket *so)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
int error;
+ pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp6_disconnect: inp == NULL"));
@@ -1061,10 +1129,10 @@
goto out;
}
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
in6_pcbdisconnect(inp);
inp->in6p_laddr = in6addr_any;
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
SOCK_LOCK(so);
so->so_state &= ~SS_ISCONNECTED; /* XXX */
SOCK_UNLOCK(so);
@@ -1078,8 +1146,10 @@
struct sockaddr *addr, struct mbuf *control, struct thread *td)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
int error = 0;
+ pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp6_send: inp == NULL"));
@@ -1131,9 +1201,9 @@
#ifdef MAC
mac_inpcb_create_mbuf(inp, m);
#endif
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
error = udp6_output(inp, m, addr, control, td);
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
#ifdef INET
#endif
INP_WUNLOCK(inp);
Modified: trunk/sys/netinet6/udp6_var.h
===================================================================
--- trunk/sys/netinet6/udp6_var.h 2018-05-25 13:07:22 UTC (rev 9926)
+++ trunk/sys/netinet6/udp6_var.h 2018-05-25 13:10:11 UTC (rev 9927)
@@ -58,7 +58,7 @@
* SUCH DAMAGE.
*
* @(#)udp_var.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: stable/9/sys/netinet6/udp6_var.h 174510 2007-12-10 16:03:40Z obrien $
+ * $FreeBSD: stable/10/sys/netinet6/udp6_var.h 265946 2014-05-13 06:05:53Z kevlo $
*/
#ifndef _NETINET6_UDP6_VAR_H_
@@ -70,6 +70,7 @@
extern struct pr_usrreqs udp6_usrreqs;
void udp6_ctlinput(int, struct sockaddr *, void *);
+void udplite6_ctlinput(int, struct sockaddr *, void *);
int udp6_input(struct mbuf **, int *, int);
#endif
More information about the Midnightbsd-cvs
mailing list